From 6b1c8a7fe4b6d128fbfbafa780b3ffe9a7948629 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 12 Aug 2022 12:50:36 -0700 Subject: [PATCH 01/95] Python array API conformity workflow added --- .github/workflows/array-api-conformity.yml | 79 ++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 .github/workflows/array-api-conformity.yml diff --git a/.github/workflows/array-api-conformity.yml b/.github/workflows/array-api-conformity.yml new file mode 100644 index 0000000000..8135948802 --- /dev/null +++ b/.github/workflows/array-api-conformity.yml @@ -0,0 +1,79 @@ +name: Check array api conformity +on: + pull_request: + push: + branches: [master] + +jobs: + array-api-test: + name: Test array API standard conformity + runs-on: ubuntu-20.04 + + steps: + - name: Add Intel repository + run: | + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" + sudo apt-get update + + - name: Install Intel OneAPI + run: | + sudo apt-get install intel-oneapi-compiler-dpcpp-cpp + sudo apt-get install intel-oneapi-tbb + + - name: Install CMake and Ninja + shell: bash -l {0} + run: | + sudo apt-get install cmake ninja-build + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.9' + architecture: x64 + + - name: Cache array API tests + id: cache-array-api-tests + uses: actions/cache@v3 + with: + path: | + /home/runner/work/array-api-tests/ + key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('/home/runner/work/array-api-tests/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-build-${{ env.cache-name }}- + ${{ runner.os }}-build- + ${{ runner.os }}- + + - name: Get array API tests repo + if: steps.cache-array-api-tests.outputs.cache-hit != 'true' + shell: bash -l {0} + run: | + cd /home/runner/work + git clone --recurse-submodules https://github.com/data-apis/array-api-tests -b 2022.05.18 + + - name: Install array API test dependencies + shell: bash -l {0} + run: | + cd /home/runner/work/array-api-tests + pip install -r requirements.txt + + - name: Checkout repo + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Install dpctl dependencies + shell: bash -l {0} + run: | + pip install numpy cython setuptools pytest scikit-build + + - name: Build dpctl+run conformity test + shell: bash -l {0} + run: | + source /opt/intel/oneapi/setvars.sh + python setup.py develop -- -G Ninja -DCMAKE_C_COMPILER:PATH=$(which icx) -DCMAKE_CXX_COMPILER:PATH=$(which icpx) + cd /home/runner/work/array-api-tests + export ARRAY_API_TESTS_MODULE=dpctl.tensor + pytest array_api_tests/ --ci From d1ffb5fbb6fecffcfbb2c487c405e11157ad6c26 Mon Sep 17 00:00:00 2001 From: ndgrigorian <46709016+ndgrigorian@users.noreply.github.com> Date: Mon, 15 Aug 2022 18:36:30 -0700 Subject: [PATCH 02/95] Fixed whitespace --- .github/workflows/array-api-conformity.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/array-api-conformity.yml b/.github/workflows/array-api-conformity.yml index 8135948802..838826f170 100644 --- a/.github/workflows/array-api-conformity.yml +++ b/.github/workflows/array-api-conformity.yml @@ -33,7 +33,7 @@ jobs: with: python-version: '3.9' architecture: x64 - + - name: Cache array API tests id: cache-array-api-tests uses: actions/cache@v3 From ae4c5b3d6ae1c5571730631ce32e838f7cf79d4a Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 12 Aug 2022 12:50:36 -0700 Subject: [PATCH 03/95] Python array API conformity workflow added --- .github/workflows/array-api-conformity.yml | 79 ++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 .github/workflows/array-api-conformity.yml diff --git a/.github/workflows/array-api-conformity.yml b/.github/workflows/array-api-conformity.yml new file mode 100644 index 0000000000..8135948802 --- /dev/null +++ b/.github/workflows/array-api-conformity.yml @@ -0,0 +1,79 @@ +name: Check array api conformity +on: + pull_request: + push: + branches: [master] + +jobs: + array-api-test: + name: Test array API standard conformity + runs-on: ubuntu-20.04 + + steps: + - name: Add Intel repository + run: | + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB + sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" + sudo apt-get update + + - name: Install Intel OneAPI + run: | + sudo apt-get install intel-oneapi-compiler-dpcpp-cpp + sudo apt-get install intel-oneapi-tbb + + - name: Install CMake and Ninja + shell: bash -l {0} + run: | + sudo apt-get install cmake ninja-build + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.9' + architecture: x64 + + - name: Cache array API tests + id: cache-array-api-tests + uses: actions/cache@v3 + with: + path: | + /home/runner/work/array-api-tests/ + key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('/home/runner/work/array-api-tests/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-build-${{ env.cache-name }}- + ${{ runner.os }}-build- + ${{ runner.os }}- + + - name: Get array API tests repo + if: steps.cache-array-api-tests.outputs.cache-hit != 'true' + shell: bash -l {0} + run: | + cd /home/runner/work + git clone --recurse-submodules https://github.com/data-apis/array-api-tests -b 2022.05.18 + + - name: Install array API test dependencies + shell: bash -l {0} + run: | + cd /home/runner/work/array-api-tests + pip install -r requirements.txt + + - name: Checkout repo + uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Install dpctl dependencies + shell: bash -l {0} + run: | + pip install numpy cython setuptools pytest scikit-build + + - name: Build dpctl+run conformity test + shell: bash -l {0} + run: | + source /opt/intel/oneapi/setvars.sh + python setup.py develop -- -G Ninja -DCMAKE_C_COMPILER:PATH=$(which icx) -DCMAKE_CXX_COMPILER:PATH=$(which icpx) + cd /home/runner/work/array-api-tests + export ARRAY_API_TESTS_MODULE=dpctl.tensor + pytest array_api_tests/ --ci From 60f43f5013f44dffe3989cb09e0f559a4363ca68 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 07:12:50 -0500 Subject: [PATCH 04/95] dpctl.tensor.asarray must check numpy array data-type ```python import numpy as np, dpctl.tensor as dpt dpt.asarray(np.array([1,2,3], dtype=object)) # now raises TypeError ``` --- dpctl/tensor/_ctors.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index b895ad2341..3bcedcbf63 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -44,7 +44,7 @@ def _get_dtype(dtype, sycl_obj, ref_type=None): dtype = ti.default_device_complex_type(sycl_obj) return np.dtype(dtype) else: - raise ValueError(f"Reference type {ref_type} not recognized.") + raise TypeError(f"Reference type {ref_type} not recognized.") else: return np.dtype(dtype) @@ -199,6 +199,11 @@ def _asarray_from_numpy_ndarray( if usm_type is None: usm_type = "device" copy_q = normalize_queue_device(sycl_queue=None, device=sycl_queue) + if ary.dtype.char not in "?bBhHiIlLqQefdFD": + raise TypeError( + f"Numpy array of data type {ary.dtype} is not supported. " + "Please convert the input to an array with numeric data type." + ) if dtype is None: ary_dtype = ary.dtype dtype = _get_dtype(dtype, copy_q, ref_type=ary_dtype) From c1b7625e31582946f2e0b75df25208510fc6e270 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 07:23:13 -0500 Subject: [PATCH 05/95] Altered the logic of finding the most recent tag to download from The tag must contain sycl-nightly substring in it. --- .github/workflows/os-llvm-sycl-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/os-llvm-sycl-build.yml b/.github/workflows/os-llvm-sycl-build.yml index 841143e890..e7140e6ab8 100644 --- a/.github/workflows/os-llvm-sycl-build.yml +++ b/.github/workflows/os-llvm-sycl-build.yml @@ -41,7 +41,7 @@ jobs: cd /home/runner/work mkdir -p sycl_bundle cd sycl_bundle - export LATEST_LLVM_TAG=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | tail --lines=1) + export LATEST_LLVM_TAG=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | grep sycl-nightly | tail --lines=1) export LATEST_LLVM_TAG_SHA=$(echo ${LATEST_LLVM_TAG} | awk '{print $1}') export NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \ $(echo ${LATEST_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}')) From e112521dc2ad57ec55069a9d677b8df47ec551db Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 07:48:53 -0500 Subject: [PATCH 06/95] Added a test to check validation for supported dtype --- dpctl/tests/test_tensor_asarray.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dpctl/tests/test_tensor_asarray.py b/dpctl/tests/test_tensor_asarray.py index df7331213e..a6b83caaf9 100644 --- a/dpctl/tests/test_tensor_asarray.py +++ b/dpctl/tests/test_tensor_asarray.py @@ -220,3 +220,13 @@ def test_asarray_copy_false(): assert Y6 is Xf with pytest.raises(ValueError): dpt.asarray(Xf, copy=False, order="C") + + +def test_asarray_invalid_dtype(): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Could not create a queue") + Xnp = np.array([1, 2, 3], dtype=object) + with pytest.raises(TypeError): + dpt.asarray(Xnp, sycl_queue=q) From cf5eb68bcb4ef929e24531a92ade8af99f95ae89 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 08:27:59 -0500 Subject: [PATCH 07/95] Fixed test to pass on Iris Xe --- dpctl/tests/test_tensor_asarray.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dpctl/tests/test_tensor_asarray.py b/dpctl/tests/test_tensor_asarray.py index a6b83caaf9..3d9ba3db4b 100644 --- a/dpctl/tests/test_tensor_asarray.py +++ b/dpctl/tests/test_tensor_asarray.py @@ -177,11 +177,17 @@ def test_asarray_scalars(): Y = dpt.asarray(5) assert Y.dtype == np.dtype(int) Y = dpt.asarray(5.2) - assert Y.dtype == np.dtype(float) + if Y.sycl_device.has_aspect_fp64: + assert Y.dtype == np.dtype(float) + else: + assert Y.dtype == np.dtype(np.float32) Y = dpt.asarray(np.float32(2.3)) assert Y.dtype == np.dtype(np.float32) Y = dpt.asarray(1.0j) - assert Y.dtype == np.dtype(complex) + if Y.sycl_device.has_aspect_fp64: + assert Y.dtype == np.dtype(complex) + else: + assert Y.dtype == np.dtype(np.complex64) Y = dpt.asarray(ctypes.c_int(8)) assert Y.dtype == np.dtype(ctypes.c_int) From 87a82cda825ca065c28325d361a4191782fd80da Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 12:12:23 -0500 Subject: [PATCH 08/95] Removing stray print --- dpctl/tests/test_usm_ndarray_manipulation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dpctl/tests/test_usm_ndarray_manipulation.py b/dpctl/tests/test_usm_ndarray_manipulation.py index 038ac007c8..037b5ac6ef 100644 --- a/dpctl/tests/test_usm_ndarray_manipulation.py +++ b/dpctl/tests/test_usm_ndarray_manipulation.py @@ -965,7 +965,6 @@ def test_stack_2arrays(data): Y = dpt.asarray(Ynp, sycl_queue=q) Znp = np.stack([Xnp, Ynp], axis=axis) - print(Znp.shape) Z = dpt.stack([X, Y], axis=axis) assert_array_equal(Znp, dpt.asnumpy(Z)) From d887e461247c3bcc5d4e17e6e9870f79982e0990 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 12:12:59 -0500 Subject: [PATCH 09/95] Correcting exception text --- dpctl/tensor/_usmarray.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 329794ebd6..42621a38a1 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -937,7 +937,7 @@ cdef class usm_ndarray: except Exception: raise ValueError( f"Input of type {type(val)} could not be " - "converted to numpy.ndarray" + "converted to usm_ndarray" ) def __sub__(first, other): From 78c3854d062a4f8128bea237ac669b956b4b142d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 12:15:55 -0500 Subject: [PATCH 10/95] Wrap SyclQueue constructor in try/catch --- dpctl/tests/test_usm_ndarray_ctor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 5d52694ca4..c1574e8570 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -588,7 +588,10 @@ def test_pyx_capi_check_constants(): ) @pytest.mark.parametrize("usm_type", ["device", "shared", "host"]) def test_tofrom_numpy(shape, dtype, usm_type): - q = dpctl.SyclQueue() + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Could nto create default SyclQueue") Xnp = np.zeros(shape, dtype=dtype) Xusm = dpt.from_numpy(Xnp, usm_type=usm_type, sycl_queue=q) Ynp = np.ones(shape, dtype=dtype) From 352dfa68ed78d413369f895e34b82df50fc902f9 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 12:17:01 -0500 Subject: [PATCH 11/95] dpctl.tensor.from_numpy should not try creating USM ndarray for 64-bit fp on HW that has no such support --- dpctl/tensor/_copy_utils.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index dd6f068596..24aedf667a 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -59,18 +59,19 @@ def _copy_to_numpy(ary): def _copy_from_numpy(np_ary, usm_type="device", sycl_queue=None): "Copies numpy array `np_ary` into a new usm_ndarray" # This may peform a copy to meet stated requirements - Xnp = np.require(np_ary, requirements=["A", "O", "C", "E"]) - if sycl_queue: - ctor_kwargs = {"queue": sycl_queue} + Xnp = np.require(np_ary, requirements=["A", "E"]) + alloc_q = normalize_queue_device(sycl_queue=sycl_queue, device=None) + dt = Xnp.dtype + if dt.char in "dD" and alloc_q.sycl_device.has_aspect_fp64 is False: + Xusm_dtype = ( + np.dtype("float32") if dt.char == "d" else np.dtype("complex64") + ) else: - ctor_kwargs = dict() - Xusm = dpt.usm_ndarray( - Xnp.shape, - dtype=Xnp.dtype, - buffer=usm_type, - buffer_ctor_kwargs=ctor_kwargs, + Xusm_dtype = dt + Xusm = dpt.empty( + Xnp.shape, dtype=Xusm_dtype, usm_type=usm_type, sycl_queue=sycl_queue ) - Xusm.usm_data.copy_from_host(Xnp.reshape((-1)).view("u1")) + _copy_from_numpy_into(Xusm, Xnp) return Xusm From eae1dda25a930cb13b024dae05eeae850fae1f5c Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 12:59:35 -0500 Subject: [PATCH 12/95] Replaced constructor with dtype 'd' with call to ones to dynamically figure out the data type appropriate for the device --- dpctl/tests/test_usm_ndarray_operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tests/test_usm_ndarray_operators.py b/dpctl/tests/test_usm_ndarray_operators.py index abe094d2c2..f52ab3c3e4 100644 --- a/dpctl/tests/test_usm_ndarray_operators.py +++ b/dpctl/tests/test_usm_ndarray_operators.py @@ -49,7 +49,7 @@ def multiply(a, b): @pytest.mark.parametrize("namespace", [None, Dummy()]) def test_fp_ops(namespace): - X = dpt.usm_ndarray(1, "d") + X = dpt.ones(1) X._set_namespace(namespace) assert X.__array_namespace__() is namespace X[0] = -2.5 From 4953e107496150130463e5ecbc7a688bf3ffe960 Mon Sep 17 00:00:00 2001 From: ndgrigorian <46709016+ndgrigorian@users.noreply.github.com> Date: Mon, 15 Aug 2022 16:48:57 -0700 Subject: [PATCH 13/95] Update array-api-conformity.yml Update array-api-conformity.yml Moved array API conformity test to conda workflow Fixed conflicting job names Fixed indentation Added PR commenting to array API conformity test Removed redundant installation Fixed yaml syntax More syntax fixes Allow-repeats for array conformity comment=false Testing pull_request_target Removed pull_request flag Reverted to pull request Formatting fix jq install fixed Moved array API dependency installation Continue-on-error added to array test step Adjusted PR message Testing if the passed env var is being set Testing more comment functionality Corrected environment variable names Removed separate file for array API workflow Raise array API test version (json report) Readded conformity yml to delete later Reset cache of array API test repo Commit with caching Adds version to test directory (for cache) Removed array API yml Allow repeats false for pr comment --- .github/workflows/array-api-conformity.yml | 79 -------------- .github/workflows/conda-package.yml | 114 +++++++++++++++++++++ 2 files changed, 114 insertions(+), 79 deletions(-) delete mode 100644 .github/workflows/array-api-conformity.yml diff --git a/.github/workflows/array-api-conformity.yml b/.github/workflows/array-api-conformity.yml deleted file mode 100644 index 8135948802..0000000000 --- a/.github/workflows/array-api-conformity.yml +++ /dev/null @@ -1,79 +0,0 @@ -name: Check array api conformity -on: - pull_request: - push: - branches: [master] - -jobs: - array-api-test: - name: Test array API standard conformity - runs-on: ubuntu-20.04 - - steps: - - name: Add Intel repository - run: | - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - sudo apt-get update - - - name: Install Intel OneAPI - run: | - sudo apt-get install intel-oneapi-compiler-dpcpp-cpp - sudo apt-get install intel-oneapi-tbb - - - name: Install CMake and Ninja - shell: bash -l {0} - run: | - sudo apt-get install cmake ninja-build - - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: '3.9' - architecture: x64 - - - name: Cache array API tests - id: cache-array-api-tests - uses: actions/cache@v3 - with: - path: | - /home/runner/work/array-api-tests/ - key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('/home/runner/work/array-api-tests/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-build-${{ env.cache-name }}- - ${{ runner.os }}-build- - ${{ runner.os }}- - - - name: Get array API tests repo - if: steps.cache-array-api-tests.outputs.cache-hit != 'true' - shell: bash -l {0} - run: | - cd /home/runner/work - git clone --recurse-submodules https://github.com/data-apis/array-api-tests -b 2022.05.18 - - - name: Install array API test dependencies - shell: bash -l {0} - run: | - cd /home/runner/work/array-api-tests - pip install -r requirements.txt - - - name: Checkout repo - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Install dpctl dependencies - shell: bash -l {0} - run: | - pip install numpy cython setuptools pytest scikit-build - - - name: Build dpctl+run conformity test - shell: bash -l {0} - run: | - source /opt/intel/oneapi/setvars.sh - python setup.py develop -- -G Ninja -DCMAKE_C_COMPILER:PATH=$(which icx) -DCMAKE_CXX_COMPILER:PATH=$(which icpx) - cd /home/runner/work/array-api-tests - export ARRAY_API_TESTS_MODULE=dpctl.tensor - pytest array_api_tests/ --ci diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index c50ef77fe7..5c9d3eea08 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -470,3 +470,117 @@ jobs: echo "Executing ${script}" python ${script} || exit 1 done + + array-api-conformity: + needs: test_linux + runs-on: ${{ matrix.runner }} + + strategy: + matrix: + python: ['3.10'] + experimental: [false] + runner: [ubuntu-latest] + continue-on-error: ${{ matrix.experimental }} + env: + CHANNELS: -c intel -c defaults --override-channels + steps: + - name: Cache array API tests + id: cache-array-api-tests + uses: actions/cache@v3 + with: + path: | + /home/runner/work/array-api-tests-08-19-22/ + key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('/home/runner/work/array-api-tests-08-19-22/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-build-${{ env.cache-name }}- + ${{ runner.os }}-build- + ${{ runner.os }}- + - name: Clone array API tests repo + if: steps.cache-array-api-tests.outputs.cache-hit != 'true' + shell: bash -l {0} + run: | + cd /home/runner/work + git clone --recurse-submodules https://github.com/data-apis/array-api-tests array-api-tests-08-19-22 + cd array-api-tests-08-19-22 + git checkout 66ab89c097d98f876e3c62f72e854ddb7d327f2e + - name: Download artifact + uses: actions/download-artifact@v2 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + - name: Add conda to system path + run: echo $CONDA/bin >> $GITHUB_PATH + - name: Install conda-build + run: conda install conda-build + - name: Create conda channel + run: | + mkdir -p $GITHUB_WORKSPACE/channel/linux-64 + conda index $GITHUB_WORKSPACE/channel || exit 1 + mv ${PACKAGE_NAME}-*.tar.bz2 $GITHUB_WORKSPACE/channel/linux-64 || exit 1 + conda index $GITHUB_WORKSPACE/channel || exit 1 + # Test channel + conda search $PACKAGE_NAME -c $GITHUB_WORKSPACE/channel --override-channels --info --json > $GITHUB_WORKSPACE/ver.json + cat ver.json + - name: Collect dependencies + run: | + CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" + export PACKAGE_VERSION=$(python -c "${VER_SCRIPT1} ${VER_SCRIPT2}") + conda install $PACKAGE_NAME=${PACKAGE_VERSION} python=${{ matrix.python }} $CHANNELS --only-deps --dry-run > lockfile + cat lockfile + - name: Set pkgs_dirs + run: | + echo "pkgs_dirs: [~/.conda/pkgs]" >> ~/.condarc + - name: Cache conda packages + uses: actions/cache@v3 + env: + CACHE_NUMBER: 1 # Increase to reset cache + with: + path: ~/.conda/pkgs + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('lockfile') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + - name: Install dpctl + run: | + CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" + export PACKAGE_VERSION=$(python -c "${VER_SCRIPT1} ${VER_SCRIPT2}") + conda install $PACKAGE_NAME=${PACKAGE_VERSION} pytest python=${{ matrix.python }} $CHANNELS + # Test installed packages + conda list + - name: Install array API test dependencies + shell: bash -l {0} + run: | + cd /home/runner/work/array-api-tests-08-19-22 + pip install -r requirements.txt + - name: Install jq + shell: bash -l {0} + run: | + sudo apt-get install jq + - name: Array API conformity tests + shell: bash -l {0} + run: | + export SYCL_ENABLE_HOST_DEVICE=1 + python -c "import dpctl; dpctl.lsplatform()" + export ARRAY_API_TESTS_MODULE=dpctl.tensor + cd /home/runner/work/array-api-tests-08-19-22 + pytest --ci --json-report array_api_tests/ + - name: Set Github environment variables + shell: bash -l {0} + run: | + PASSED_TESTS=$(jq '.summary | .passed // 0' ~/array-api-tests-08-19-22/.report.json) + echo "PASSED=$PASSED_TESTS" >> $ + FAILED_TESTS=$(jq '.summary | .failed // 0' ~/array-api-tests-08-19-22/.report.json) + echo "FAILED=$FAILED_TESTS" >> $GITHUB_ENV + SKIPPED_TESTS=$(jq '.summary | .skipped // 0' ~/array-api-tests-08-19-22/.report.json) + echo "SKIPPED=$SKIPPED_TESTS" >> $GITHUB_ENV + - name: Post result to PR + uses: mshick/add-pr-comment@v1 + with: + message: | + Array API standard conformance test ran successfully. + Passed: ${{ env.PASSED }} + Failed: ${{ env.FAILED }} + Skipped: ${{ env.SKIPPED }} + allow-repeats: false + repo-token: ${{ secrets.GITHUB_TOKEN }} + repo-token-user-login: 'github-actions[bot]' From 70484cb9b9f7225866fe9b619597152a09e654c8 Mon Sep 17 00:00:00 2001 From: ndgrigorian <46709016+ndgrigorian@users.noreply.github.com> Date: Mon, 22 Aug 2022 17:34:42 -0700 Subject: [PATCH 14/95] Delete array-api-conformity.yml --- .github/workflows/array-api-conformity.yml | 79 ---------------------- 1 file changed, 79 deletions(-) delete mode 100644 .github/workflows/array-api-conformity.yml diff --git a/.github/workflows/array-api-conformity.yml b/.github/workflows/array-api-conformity.yml deleted file mode 100644 index 838826f170..0000000000 --- a/.github/workflows/array-api-conformity.yml +++ /dev/null @@ -1,79 +0,0 @@ -name: Check array api conformity -on: - pull_request: - push: - branches: [master] - -jobs: - array-api-test: - name: Test array API standard conformity - runs-on: ubuntu-20.04 - - steps: - - name: Add Intel repository - run: | - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - sudo apt-get update - - - name: Install Intel OneAPI - run: | - sudo apt-get install intel-oneapi-compiler-dpcpp-cpp - sudo apt-get install intel-oneapi-tbb - - - name: Install CMake and Ninja - shell: bash -l {0} - run: | - sudo apt-get install cmake ninja-build - - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: '3.9' - architecture: x64 - - - name: Cache array API tests - id: cache-array-api-tests - uses: actions/cache@v3 - with: - path: | - /home/runner/work/array-api-tests/ - key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('/home/runner/work/array-api-tests/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-build-${{ env.cache-name }}- - ${{ runner.os }}-build- - ${{ runner.os }}- - - - name: Get array API tests repo - if: steps.cache-array-api-tests.outputs.cache-hit != 'true' - shell: bash -l {0} - run: | - cd /home/runner/work - git clone --recurse-submodules https://github.com/data-apis/array-api-tests -b 2022.05.18 - - - name: Install array API test dependencies - shell: bash -l {0} - run: | - cd /home/runner/work/array-api-tests - pip install -r requirements.txt - - - name: Checkout repo - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - - name: Install dpctl dependencies - shell: bash -l {0} - run: | - pip install numpy cython setuptools pytest scikit-build - - - name: Build dpctl+run conformity test - shell: bash -l {0} - run: | - source /opt/intel/oneapi/setvars.sh - python setup.py develop -- -G Ninja -DCMAKE_C_COMPILER:PATH=$(which icx) -DCMAKE_CXX_COMPILER:PATH=$(which icpx) - cd /home/runner/work/array-api-tests - export ARRAY_API_TESTS_MODULE=dpctl.tensor - pytest array_api_tests/ --ci From aee3f59ac810c8bb2149b89f420d83718b907f72 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 5 Sep 2022 20:59:55 -0700 Subject: [PATCH 15/95] Array API tests reworked --- .github/workflows/conda-package.yml | 66 ++++++++++++++++++----------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index ab5b71442f..f6e2f18af9 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -493,10 +493,12 @@ jobs: - name: Cache array API tests id: cache-array-api-tests uses: actions/cache@v3 + env: + ARRAY_CACHE: 3 with: path: | - /home/runner/work/array-api-tests-08-19-22/ - key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('/home/runner/work/array-api-tests-08-19-22/requirements.txt') }} + /home/runner/work/array-api-tests/ + key: ${{ runner.os }}-array-api-${{ env.cache-name }}-{{ env.ARRAY_CACHE }}-${{ hashFiles('/home/runner/work/array-api-tests/requirements.txt') }} restore-keys: | ${{ runner.os }}-build-${{ env.cache-name }}- ${{ runner.os }}-build- @@ -506,9 +508,8 @@ jobs: shell: bash -l {0} run: | cd /home/runner/work - git clone --recurse-submodules https://github.com/data-apis/array-api-tests array-api-tests-08-19-22 - cd array-api-tests-08-19-22 - git checkout 66ab89c097d98f876e3c62f72e854ddb7d327f2e + git clone --recurse-submodules https://github.com/data-apis/array-api-tests array-api-tests + cd array-api-tests - name: Download artifact uses: actions/download-artifact@v2 with: @@ -516,6 +517,7 @@ jobs: - name: Add conda to system path run: echo $CONDA/bin >> $GITHUB_PATH - name: Install conda-build + # Needed to be able to run conda index run: conda install conda-build - name: Create conda channel run: | @@ -530,7 +532,7 @@ jobs: run: | CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" export PACKAGE_VERSION=$(python -c "${VER_SCRIPT1} ${VER_SCRIPT2}") - conda install $PACKAGE_NAME=${PACKAGE_VERSION} python=${{ matrix.python }} $CHANNELS --only-deps --dry-run > lockfile + conda create -n test_dpctl $PACKAGE_NAME=${PACKAGE_VERSION} python=${{ matrix.python }} $CHANNELS --only-deps --dry-run > lockfile cat lockfile - name: Set pkgs_dirs run: | @@ -538,7 +540,7 @@ jobs: - name: Cache conda packages uses: actions/cache@v3 env: - CACHE_NUMBER: 1 # Increase to reset cache + CACHE_NUMBER: 3 # Increase to reset cache with: path: ~/.conda/pkgs key: @@ -550,43 +552,59 @@ jobs: run: | CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" export PACKAGE_VERSION=$(python -c "${VER_SCRIPT1} ${VER_SCRIPT2}") - conda install $PACKAGE_NAME=${PACKAGE_VERSION} pytest python=${{ matrix.python }} $CHANNELS + conda create -n test_dpctl $PACKAGE_NAME=${PACKAGE_VERSION} pytest python=${{ matrix.python }} $CHANNELS # Test installed packages conda list - name: Install array API test dependencies shell: bash -l {0} run: | - cd /home/runner/work/array-api-tests-08-19-22 + . $CONDA/etc/profile.d/conda.sh + conda activate test_dpctl + cd /home/runner/work/array-api-tests pip install -r requirements.txt + pip install numpy==1.22.1 - name: Install jq shell: bash -l {0} run: | sudo apt-get install jq - - name: Array API conformity tests + - name: Run array API conformance tests + id: run-array-api-tests shell: bash -l {0} run: | + FILE=/home/runner/work/array-api-tests/.report.json + . $CONDA/etc/profile.d/conda.sh + conda activate test_dpctl + # echo "libintelocl.so" | tee /etc/OpenCL/vendors/intel-cpu.icd + export OCL_ICD_FILENAMES=libintelocl.so export SYCL_ENABLE_HOST_DEVICE=1 python -c "import dpctl; dpctl.lsplatform()" - export ARRAY_API_TESTS_MODULE=dpctl.tensor - cd /home/runner/work/array-api-tests-08-19-22 - pytest --ci --json-report array_api_tests/ + export ARRAY_API_TESTS_MODULE=numpy.array_api + cd /home/runner/work/array-api-tests + pytest --ci --json-report --json-report-file=$FILE array_api_tests/ || true - name: Set Github environment variables shell: bash -l {0} run: | - PASSED_TESTS=$(jq '.summary | .passed // 0' ~/array-api-tests-08-19-22/.report.json) - echo "PASSED=$PASSED_TESTS" >> $ - FAILED_TESTS=$(jq '.summary | .failed // 0' ~/array-api-tests-08-19-22/.report.json) - echo "FAILED=$FAILED_TESTS" >> $GITHUB_ENV - SKIPPED_TESTS=$(jq '.summary | .skipped // 0' ~/array-api-tests-08-19-22/.report.json) - echo "SKIPPED=$SKIPPED_TESTS" >> $GITHUB_ENV + FILE=/home/runner/work/array-api-tests/.report.json + if test -f "$FILE"; then + PASSED_TESTS=$(jq '.summary | .passed // 0' $FILE) + FAILED_TESTS=$(jq '.summary | .failed // 0' $FILE) + SKIPPED_TESTS=$(jq '.summary | .skipped // 0' $FILE) + MESSAGE="Array API standard conformance tests ran successfully. + Passed: $PASSED_TESTS + Failed: $FAILED_TESTS + Skipped: $SKIPPED_TESTS" + echo "MESSAGE<> $GITHUB_ENV + echo "$MESSAGE" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + else + MESSAGE=$'Array API standard conformance tests failed to run.' + echo "MESSAGE=$MESSAGE" >> $GITHUB_ENV + fi - name: Post result to PR uses: mshick/add-pr-comment@v1 with: message: | - Array API standard conformance test ran successfully. - Passed: ${{ env.PASSED }} - Failed: ${{ env.FAILED }} - Skipped: ${{ env.SKIPPED }} - allow-repeats: false + ${{ env.MESSAGE }} + allow-repeats: true repo-token: ${{ secrets.GITHUB_TOKEN }} repo-token-user-login: 'github-actions[bot]' From 7045e0373727338d9cbe1639bd029406b10ba3ea Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 5 Sep 2022 21:42:05 -0700 Subject: [PATCH 16/95] Changed array API back to dpctl.tensor --- .github/workflows/conda-package.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index f6e2f18af9..6540939af3 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -562,7 +562,6 @@ jobs: conda activate test_dpctl cd /home/runner/work/array-api-tests pip install -r requirements.txt - pip install numpy==1.22.1 - name: Install jq shell: bash -l {0} run: | @@ -578,7 +577,7 @@ jobs: export OCL_ICD_FILENAMES=libintelocl.so export SYCL_ENABLE_HOST_DEVICE=1 python -c "import dpctl; dpctl.lsplatform()" - export ARRAY_API_TESTS_MODULE=numpy.array_api + export ARRAY_API_TESTS_MODULE=dpctl.tensor cd /home/runner/work/array-api-tests pytest --ci --json-report --json-report-file=$FILE array_api_tests/ || true - name: Set Github environment variables From d72f6a98e8177ed356f74be1e15731dbd08d6e63 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 5 Sep 2022 22:30:42 -0700 Subject: [PATCH 17/95] Changed where .report.json is saved --- .github/workflows/conda-package.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 6540939af3..5ee3a9bff6 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -562,6 +562,7 @@ jobs: conda activate test_dpctl cd /home/runner/work/array-api-tests pip install -r requirements.txt + pip install numpy==1.22.1 - name: Install jq shell: bash -l {0} run: | @@ -570,20 +571,20 @@ jobs: id: run-array-api-tests shell: bash -l {0} run: | - FILE=/home/runner/work/array-api-tests/.report.json + FILE=/home/runner/work/.report.json . $CONDA/etc/profile.d/conda.sh conda activate test_dpctl # echo "libintelocl.so" | tee /etc/OpenCL/vendors/intel-cpu.icd export OCL_ICD_FILENAMES=libintelocl.so export SYCL_ENABLE_HOST_DEVICE=1 python -c "import dpctl; dpctl.lsplatform()" - export ARRAY_API_TESTS_MODULE=dpctl.tensor + export ARRAY_API_TESTS_MODULE=numpy.array_api cd /home/runner/work/array-api-tests pytest --ci --json-report --json-report-file=$FILE array_api_tests/ || true - name: Set Github environment variables shell: bash -l {0} run: | - FILE=/home/runner/work/array-api-tests/.report.json + FILE=/home/runner/work/.report.json if test -f "$FILE"; then PASSED_TESTS=$(jq '.summary | .passed // 0' $FILE) FAILED_TESTS=$(jq '.summary | .failed // 0' $FILE) From 8a2989bc3d06e2f1348c87bc3ed3938b12f2bcc2 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 5 Sep 2022 23:03:49 -0700 Subject: [PATCH 18/95] Changed tested package to dpctl.tensor --- .github/workflows/conda-package.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 5ee3a9bff6..3c9bf4a5ba 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -562,7 +562,6 @@ jobs: conda activate test_dpctl cd /home/runner/work/array-api-tests pip install -r requirements.txt - pip install numpy==1.22.1 - name: Install jq shell: bash -l {0} run: | @@ -578,7 +577,7 @@ jobs: export OCL_ICD_FILENAMES=libintelocl.so export SYCL_ENABLE_HOST_DEVICE=1 python -c "import dpctl; dpctl.lsplatform()" - export ARRAY_API_TESTS_MODULE=numpy.array_api + export ARRAY_API_TESTS_MODULE=dpctl.tensor cd /home/runner/work/array-api-tests pytest --ci --json-report --json-report-file=$FILE array_api_tests/ || true - name: Set Github environment variables From de94428079c992b324d4f461691dec1c22df8ef5 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 21 Jul 2022 21:06:57 -0500 Subject: [PATCH 19/95] Adjustments to account for changed include location in main trunk of DPC++ --- .../source/dpctl_sycl_kernel_bundle_interface.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp index 47cda39092..a91325b4f3 100644 --- a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp @@ -32,7 +32,11 @@ #include "dpctl_error_handlers.h" #include /* OpenCL headers */ #include /* Sycl headers */ +#if __has_include() +#include +#else #include +#endif #include #ifdef DPCTL_ENABLE_L0_PROGRAM_CREATION @@ -40,7 +44,11 @@ // not reorder the includes. // clang-format off #include "ze_api.h" /* Level Zero headers */ -#include "sycl/ext/oneapi/backend/level_zero.hpp" +#if __has_include() +#include +#else +#include +#endif // clang-format on #endif From a624cc626671ddf635cd3f39872145d6767338ef Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 21 Jul 2022 15:24:40 -0500 Subject: [PATCH 20/95] Make sure to catch SYCL exception around event::wait calls --- .../source/dpctl_sycl_event_interface.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/libsyclinterface/source/dpctl_sycl_event_interface.cpp b/libsyclinterface/source/dpctl_sycl_event_interface.cpp index 406a873258..70bf037371 100644 --- a/libsyclinterface/source/dpctl_sycl_event_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_event_interface.cpp @@ -60,8 +60,12 @@ void DPCTLEvent_Wait(__dpctl_keep DPCTLSyclEventRef ERef) { if (ERef) { auto SyclEvent = unwrap(ERef); - if (SyclEvent) - SyclEvent->wait(); + try { + if (SyclEvent) + SyclEvent->wait(); + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } } else { error_handler("Cannot wait for the event. DPCTLSyclEventRef as " @@ -74,8 +78,12 @@ void DPCTLEvent_WaitAndThrow(__dpctl_keep DPCTLSyclEventRef ERef) { if (ERef) { auto SyclEvent = unwrap(ERef); - if (SyclEvent) - SyclEvent->wait_and_throw(); + try { + if (SyclEvent) + SyclEvent->wait_and_throw(); + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } } else { error_handler("Cannot wait_and_throw for the event. DPCTLSyclEventRef " From 7b0d4a57b418f32911d42fc5d6d523f26b5103e6 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 21 Jul 2022 15:26:00 -0500 Subject: [PATCH 21/95] Do not submit kernels if HW does not support double precision --- dpctl/tests/test_sycl_kernel_submit.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py index de24c02cd7..c4b0285699 100644 --- a/dpctl/tests/test_sycl_kernel_submit.py +++ b/dpctl/tests/test_sycl_kernel_submit.py @@ -45,6 +45,10 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor): q = dpctl.SyclQueue("opencl", property="enable_profiling") except dpctl.SyclQueueCreationError: pytest.skip("OpenCL queue could not be created") + if dtype == np.dtype("f8") and q.sycl_device.has_aspect_fp64 is False: + pytest.skip( + "Device does not support double precision floating point type" + ) # OpenCL conventions for indexing global_id is opposite to # that of SYCL (and DPCTL) oclSrc = ( From 7826c1839556b054cdc576825f98c857db8fa358 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 21 Jul 2022 15:26:40 -0500 Subject: [PATCH 22/95] Be explicit about allocation queue in tests --- dpctl/tests/test_sycl_queue_memcpy.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dpctl/tests/test_sycl_queue_memcpy.py b/dpctl/tests/test_sycl_queue_memcpy.py index db1831b78c..97f35a331c 100644 --- a/dpctl/tests/test_sycl_queue_memcpy.py +++ b/dpctl/tests/test_sycl_queue_memcpy.py @@ -24,9 +24,9 @@ import dpctl.memory -def _create_memory(): +def _create_memory(q): nbytes = 1024 - mobj = dpctl.memory.MemoryUSMShared(nbytes) + mobj = dpctl.memory.MemoryUSMShared(nbytes, queue=q) return mobj @@ -35,9 +35,9 @@ def _create_memory(): reason="No SYCL devices except the default host device.", ) def test_memcpy_copy_usm_to_usm(): - mobj1 = _create_memory() - mobj2 = _create_memory() q = dpctl.SyclQueue() + mobj1 = _create_memory(q) + mobj2 = _create_memory(q) mv1 = memoryview(mobj1) mv2 = memoryview(mobj2) @@ -54,8 +54,8 @@ def test_memcpy_copy_usm_to_usm(): # reason="No SYCL devices except the default host device." # ) def test_memcpy_type_error(): - mobj = _create_memory() - q = mobj._queue + q = dpctl.SyclQueue() + mobj = _create_memory(q) with pytest.raises(TypeError) as cm: q.memcpy(None, mobj, 3) From b5898821538544bdb5d6a9991296da8b496a6fc2 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 21 Jul 2022 15:35:14 -0500 Subject: [PATCH 23/95] Using quotes for dpctl/CMakeLists.txt per Anton's feedback --- dpctl/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/CMakeLists.txt b/dpctl/CMakeLists.txt index 205d4b40cf..2a4e829a83 100644 --- a/dpctl/CMakeLists.txt +++ b/dpctl/CMakeLists.txt @@ -2,7 +2,7 @@ find_package(PythonExtensions REQUIRED) find_package(NumPy REQUIRED) -set(CYTHON_FLAGS "-t -w ${CMAKE_SOURCE_DIR}") +set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"") find_package(Cython REQUIRED) if(WIN32) From fe0ad880aea30b862b831dcd2811397049d7e322 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 22 Jul 2022 14:51:31 -0500 Subject: [PATCH 24/95] Fixed stray use of deprecated SYCL-2020 quantifier in dpctl_c_api tests --- libsyclinterface/tests/test_sycl_event_interface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsyclinterface/tests/test_sycl_event_interface.cpp b/libsyclinterface/tests/test_sycl_event_interface.cpp index 1b317a3e4d..1954744fff 100644 --- a/libsyclinterface/tests/test_sycl_event_interface.cpp +++ b/libsyclinterface/tests/test_sycl_event_interface.cpp @@ -40,7 +40,7 @@ sycl::event produce_event(sycl::queue &Q, sycl::buffer &data) int N = data.get_range()[0]; auto e1 = Q.submit([&](sycl::handler &h) { - sycl::accessor a{data, h, sycl::write_only, sycl::noinit}; + sycl::accessor a{data, h, sycl::write_only, sycl::no_init}; h.parallel_for(N, [=](sycl::id<1> i) { a[i] = 1; }); }); From 4096ee0e6f5c533e82ae2e197747090d45800c3a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 22 Jul 2022 12:16:26 -0500 Subject: [PATCH 25/95] Changed test from copying doubles to copying int64 to run on Iris Xe --- dpctl/tests/test_tensor_asarray.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dpctl/tests/test_tensor_asarray.py b/dpctl/tests/test_tensor_asarray.py index f4e3d77bfb..df7331213e 100644 --- a/dpctl/tests/test_tensor_asarray.py +++ b/dpctl/tests/test_tensor_asarray.py @@ -191,7 +191,9 @@ def test_asarray_copy_false(): q = dpctl.SyclQueue() except dpctl.SyclQueueCreationError: pytest.skip("Could not create a queue") - X = dpt.from_numpy(np.random.randn(10, 4), usm_type="device", sycl_queue=q) + rng = np.random.default_rng() + Xnp = rng.integers(low=-255, high=255, size=(10, 4), dtype=np.int64) + X = dpt.from_numpy(Xnp, usm_type="device", sycl_queue=q) Y1 = dpt.asarray(X, copy=False, order="K") assert Y1 is X Y1c = dpt.asarray(X, copy=True, order="K") From d0e951e0eebe86d7194a01391b28dd8190c0ca32 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 22 Jul 2022 15:16:41 -0500 Subject: [PATCH 26/95] Added testing logic to skip test cases resulting in JIT-ting kernel for double precision type is the HW does not support it --- dpctl/tests/test_usm_ndarray_ctor.py | 70 ++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 9 deletions(-) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 87a66a7af2..28180c4714 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -601,6 +601,14 @@ def test_tofrom_numpy(shape, dtype, usm_type): @pytest.mark.parametrize("src_usm_type", ["device", "shared", "host"]) @pytest.mark.parametrize("dst_usm_type", ["device", "shared", "host"]) def test_setitem_same_dtype(dtype, src_usm_type, dst_usm_type): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Could not create default SyclQueue.") + if q.sycl_device.has_aspect_fp64 is False and dtype in ["f8", "c16"]: + pytest.skip( + "Device does not support double precision floating point types." + ) Xnp = ( np.random.randint(-10, 10, size=2 * 3 * 4) .astype(dtype) @@ -649,6 +657,12 @@ def test_setitem_same_dtype(dtype, src_usm_type, dst_usm_type): ) @pytest.mark.parametrize("usm_type", ["device", "shared", "host"]) def test_setitem_scalar(dtype, usm_type): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Could not create default SyclQueue") + if q.sycl_device.has_aspect_fp64 is False and dtype in ["f8", "c16"]: + pytest.skip("Device does not support double precision floating type") X = dpt.usm_ndarray((6, 6), dtype=dtype, buffer=usm_type) for i in range(X.size): X[np.unravel_index(i, X.shape)] = np.asarray(i, dtype=dtype) @@ -673,13 +687,22 @@ def test_setitem_errors(): X[:] = Y[None, :, 0] -def test_setitem_different_dtypes(): - X = dpt.from_numpy(np.ones(10, "f4")) - Y = dpt.from_numpy(np.zeros(10, "f4")) - Z = dpt.usm_ndarray((20,), "d") +@pytest.mark.parametrize("src_dt,dst_dt", [("i4", "i8"), ("f4", "f8")]) +def test_setitem_different_dtypes(src_dt, dst_dt): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Default queue could not be created") + if dst_dt == "f8" and q.sycl_device.has_aspect_fp64 is False: + pytest.skip( + "Device does not support double precision floating point type" + ) + X = dpt.from_numpy(np.ones(10, src_dt), sycl_queue=q) + Y = dpt.from_numpy(np.zeros(10, src_dt), sycl_queue=q) + Z = dpt.empty((20,), dtype=dst_dt, sycl_queue=q) Z[::2] = X Z[1::2] = Y - assert np.allclose(dpt.asnumpy(Z), np.tile(np.array([1, 0], "d"), 10)) + assert np.allclose(dpt.asnumpy(Z), np.tile(np.array([1, 0], Z.dtype), 10)) def test_shape_setter(): @@ -804,8 +827,8 @@ def test_to_device_migration(): def test_astype(): X = dpt.empty((5, 5), dtype="i4") X[:] = np.full((5, 5), 7, dtype="i4") - Y = dpt.astype(X, "c16", order="C") - assert np.allclose(dpt.to_numpy(Y), np.full((5, 5), 7, dtype="c16")) + Y = dpt.astype(X, "c8", order="C") + assert np.allclose(dpt.to_numpy(Y), np.full((5, 5), 7, dtype="c8")) Y = dpt.astype(X[::2, ::-1], "f2", order="K") assert np.allclose(dpt.to_numpy(Y), np.full(Y.shape, 7, dtype="f2")) Y = dpt.astype(X[::2, ::-1], "i4", order="K", copy=False) @@ -946,7 +969,15 @@ def test_zeros(dtype): _all_dtypes, ) def test_ones(dtype): - X = dpt.ones(10, dtype=dtype) + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Could not created default queue") + if dtype in ["f8", "c16"] and q.sycl_device.has_aspect_fp64 is False: + pytest.skip( + "Device does not support double precision floating point type" + ) + X = dpt.ones(10, dtype=dtype, sycl_queue=q) assert np.array_equal(dpt.asnumpy(X), np.ones(10, dtype=dtype)) @@ -955,7 +986,15 @@ def test_ones(dtype): _all_dtypes, ) def test_full(dtype): - X = dpt.full(10, 4, dtype=dtype) + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Could not created default queue") + if dtype in ["f8", "c16"] and q.sycl_device.has_aspect_fp64 is False: + pytest.skip( + "Device does not support double precision floating point type" + ) + X = dpt.full(10, 4, dtype=dtype, sycl_queue=q) assert np.array_equal(dpt.asnumpy(X), np.full(10, 4, dtype=dtype)) @@ -976,6 +1015,10 @@ def test_arange(dt): except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") + if dt in ["f8", "c16"] and q.sycl_device.has_aspect_fp64 is False: + pytest.skip( + "Device does not support double precision floating point type" + ) X = dpt.arange(0, 123, dtype=dt, sycl_queue=q) dt = np.dtype(dt) if np.issubdtype(dt, np.integer): @@ -1093,6 +1136,10 @@ def test_ones_like(dt, usm_kind): q = dpctl.SyclQueue() except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") + if dt in ["f8", "c16"] and q.sycl_device.has_aspect_fp64 is False: + pytest.skip( + "Device does not support double precision floating point type" + ) X = dpt.empty((4, 5), dtype=dt, usm_type=usm_kind, sycl_queue=q) Y = dpt.ones_like(X) @@ -1129,6 +1176,11 @@ def test_full_like(dt, usm_kind): except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") + if dt in ["f8", "c16"] and q.sycl_device.has_aspect_fp64 is False: + pytest.skip( + "Device does not support double precision floating point type" + ) + fill_v = np.dtype(dt).type(1) X = dpt.empty((4, 5), dtype=dt, usm_type=usm_kind, sycl_queue=q) Y = dpt.full_like(X, fill_v) From 26ba94f3564d9dd704b0b7e9ade48d99bbcb7a61 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 22 Jul 2022 09:31:30 -0500 Subject: [PATCH 27/95] Getter functions for usm_ndarray are marked const per gh-852 --- dpctl/apis/include/dpctl4pybind11.hpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dpctl/apis/include/dpctl4pybind11.hpp b/dpctl/apis/include/dpctl4pybind11.hpp index ee43dbcbbf..e9b8d84524 100644 --- a/dpctl/apis/include/dpctl4pybind11.hpp +++ b/dpctl/apis/include/dpctl4pybind11.hpp @@ -465,7 +465,7 @@ class usm_ndarray : public py::object throw py::error_already_set(); } - char *get_data() + char *get_data() const { PyObject *raw_o = this->ptr(); PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); @@ -473,12 +473,12 @@ class usm_ndarray : public py::object return UsmNDArray_GetData(raw_ar); } - template T *get_data() + template T *get_data() const { return reinterpret_cast(get_data()); } - int get_ndim() + int get_ndim() const { PyObject *raw_o = this->ptr(); PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); @@ -486,7 +486,7 @@ class usm_ndarray : public py::object return UsmNDArray_GetNDim(raw_ar); } - const py::ssize_t *get_shape_raw() + const py::ssize_t *get_shape_raw() const { PyObject *raw_o = this->ptr(); PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); @@ -494,13 +494,13 @@ class usm_ndarray : public py::object return UsmNDArray_GetShape(raw_ar); } - py::ssize_t get_shape(int i) + py::ssize_t get_shape(int i) const { auto shape_ptr = get_shape_raw(); return shape_ptr[i]; } - const py::ssize_t *get_strides_raw() + const py::ssize_t *get_strides_raw() const { PyObject *raw_o = this->ptr(); PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); @@ -508,7 +508,7 @@ class usm_ndarray : public py::object return UsmNDArray_GetStrides(raw_ar); } - py::ssize_t get_size() + py::ssize_t get_size() const { PyObject *raw_o = this->ptr(); PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); @@ -525,7 +525,7 @@ class usm_ndarray : public py::object return nelems; } - std::pair get_minmax_offsets() + std::pair get_minmax_offsets() const { PyObject *raw_o = this->ptr(); PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); @@ -559,7 +559,7 @@ class usm_ndarray : public py::object return std::make_pair(offset_min, offset_max); } - sycl::queue get_queue() + sycl::queue get_queue() const { PyObject *raw_o = this->ptr(); PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); @@ -568,7 +568,7 @@ class usm_ndarray : public py::object return *(reinterpret_cast(QRef)); } - int get_typenum() + int get_typenum() const { PyObject *raw_o = this->ptr(); PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); @@ -576,7 +576,7 @@ class usm_ndarray : public py::object return UsmNDArray_GetTypenum(raw_ar); } - int get_flags() + int get_flags() const { PyObject *raw_o = this->ptr(); PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); @@ -584,7 +584,7 @@ class usm_ndarray : public py::object return UsmNDArray_GetFlags(raw_ar); } - int get_elemsize() + int get_elemsize() const { PyObject *raw_o = this->ptr(); PyUSMArrayObject *raw_ar = reinterpret_cast(raw_o); From cc0d7e069c623d9a506d3d1854b9c08f47fef079 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 24 Jul 2022 15:22:17 -0500 Subject: [PATCH 28/95] Fixed typos in docstrings found by Doxygen --- libsyclinterface/include/dpctl_service.h | 2 +- libsyclinterface/include/dpctl_sycl_kernel_bundle_interface.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libsyclinterface/include/dpctl_service.h b/libsyclinterface/include/dpctl_service.h index d7f9cfb552..b6aa7cbda0 100644 --- a/libsyclinterface/include/dpctl_service.h +++ b/libsyclinterface/include/dpctl_service.h @@ -47,7 +47,7 @@ __dpctl_give const char *DPCTLService_GetDPCPPVersion(void); * @brief Initialize logger if compiled to use logger, no-op otherwise. * * @param app_name C-string for application name reflected in the log. - * @paral log_dir C-string for directory where log files are placed. + * @param log_dir C-string for directory where log files are placed. * @ingroup Service */ DPCTL_API diff --git a/libsyclinterface/include/dpctl_sycl_kernel_bundle_interface.h b/libsyclinterface/include/dpctl_sycl_kernel_bundle_interface.h index 8dacfbf581..6b38fb9e25 100644 --- a/libsyclinterface/include/dpctl_sycl_kernel_bundle_interface.h +++ b/libsyclinterface/include/dpctl_sycl_kernel_bundle_interface.h @@ -111,7 +111,7 @@ bool DPCTLKernelBundle_HasKernel(__dpctl_keep DPCTLSyclKernelBundleRef KBRef, /*! * @brief Frees the DPCTLSyclKernelBundleRef pointer. * - * @param PRef Opaque pointer to a sycl::kernel_bundle + * @param KBRef Opaque pointer to a sycl::kernel_bundle * @ingroup KernelBundleInterface */ DPCTL_API From 17cabc0f576e14e3052420dba0ee87c019752396 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 25 Jul 2022 07:39:04 -0500 Subject: [PATCH 29/95] Fix issues tripping documentation build Due to sphinx-doc/sphinx#10701 documentation build of dpctl started failing with release of Sphinx 5.1.0. The exception raised is triggered by missing docstrings for property class attributes. This change adds such docstrings and now local documentation build goes through as expected. --- dpctl/_sycl_device.pyx | 152 ++++++++++++++++++++++++------------- dpctl/_sycl_queue.pyx | 3 + dpctl/_sycl_timer.py | 4 + dpctl/tensor/_usmarray.pyx | 12 +++ 4 files changed, 120 insertions(+), 51 deletions(-) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 4fe6ffdfbe..ecbc249337 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -386,101 +386,154 @@ cdef class SyclDevice(_SyclDevice): @property def has_aspect_host(self): + "Returns True if this device is a host device, False otherwise" cdef _aspect_type AT = _aspect_type._host return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_cpu(self): + "Returns True if this device is a CPU device, False otherwise" cdef _aspect_type AT = _aspect_type._cpu return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_gpu(self): + "Returns True if this device is a GPU device, False otherwise" cdef _aspect_type AT = _aspect_type._gpu return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_accelerator(self): + "Returns True if this device is an accelerator device, False otherwise" cdef _aspect_type AT = _aspect_type._accelerator return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_custom(self): + "Returns True if this device is a custom device, False otherwise" cdef _aspect_type AT = _aspect_type._custom return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_fp16(self): + """ Returns True if kernels submitted to this device + may use 16-bit floating point types, False otherwise + """ cdef _aspect_type AT = _aspect_type._fp16 return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_fp64(self): + """ Returns True if kernels submitted to this device + may use 64-bit floating point types, False otherwise + """ cdef _aspect_type AT = _aspect_type._fp64 return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_atomic64(self): + """ Returns True if kernels submitted to this device + may perform 64-bit atomic operations, False otherwise + """ cdef _aspect_type AT = _aspect_type._atomic64 return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_image(self): + """ Returns True if this device supports images, False otherwise + """ cdef _aspect_type AT = _aspect_type._image return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_online_compiler(self): + """ Returns True if this device supports online compilation of + device code, False otherwise + """ cdef _aspect_type AT = _aspect_type._online_compiler return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_online_linker(self): + """ Returns True if this device supports online linking of + device code, False otherwise + """ cdef _aspect_type AT = _aspect_type._online_linker return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_queue_profiling(self): + """ Returns True if this device supports queue profiling, + False otherwise + """ cdef _aspect_type AT = _aspect_type._queue_profiling return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_usm_device_allocations(self): + """ Returns True if this device supports explicit USM allocations, + False otherwise + """ cdef _aspect_type AT = _aspect_type._usm_device_allocations return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_usm_host_allocations(self): + """ Returns True if this device can access USM-host memory, + False otherwise + """ cdef _aspect_type AT = _aspect_type._usm_host_allocations return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_usm_shared_allocations(self): + """ Returns True if this device supports USM-shared memory + allocated on the same device, False otherwise + """ cdef _aspect_type AT = _aspect_type._usm_shared_allocations return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_usm_restricted_shared_allocations(self): + """ Deprecated property, do not use. + """ cdef _aspect_type AT = _aspect_type._usm_restricted_shared_allocations return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_usm_system_allocations(self): + """ Returns True if system allocator may be used instead of SYCL USM + allocation mechanism for USM-shared allocations on this device, + False otherwise + """ cdef _aspect_type AT = _aspect_type._usm_system_allocations return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_usm_atomic_host_allocations(self): + """ Returns True if this device supports USM-host allocations and + the host and this device may concurrently access and atomically + modify host allocations, False otherwise + """ cdef _aspect_type AT = _aspect_type._usm_atomic_host_allocations return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_usm_atomic_shared_allocations(self): + """ Returns True if this device supports USM-shared allocations and + the host and other devices in the same context as this device may + concurrently access and atomically modify shared allocations, + False otherwise + """ cdef _aspect_type AT = _aspect_type._usm_atomic_shared_allocations return DPCTLDevice_HasAspect(self._device_ref, AT) @property def has_aspect_host_debuggable(self): + """ Returns True if kernels running on this device can be debugged + using standard debuggers that are normally available on the host + system, False otherwise + """ cdef _aspect_type AT = _aspect_type._host_debuggable return DPCTLDevice_HasAspect(self._device_ref, AT) @@ -521,6 +574,10 @@ cdef class SyclDevice(_SyclDevice): @property def default_selector_score(self): + """ Integer score assigned to this device by DPC++ runtime's default + scoring function. Score of -1 denotes that this device was rejected + and may not be properly programmed by the DPC++ runtime. + """ cdef DPCTLSyclDeviceSelectorRef DSRef = DPCTLDefaultSelector_Create() cdef int score = -1 if (DSRef): @@ -531,16 +588,16 @@ cdef class SyclDevice(_SyclDevice): @property def max_read_image_args(self): """ Returns the maximum number of simultaneous image objects that - can be read from by a kernel. The minimum value is 128 if the - SYCL device has aspect::image. + can be read from by a kernel. The minimum value is 128 if the + SYCL device has aspect::image. """ return DPCTLDevice_GetMaxReadImageArgs(self._device_ref) @property def max_write_image_args(self): """ Returns the maximum number of simultaneous image objects that - can be written to by a kernel. The minimum value is 8 if the SYCL - device has aspect::image. + can be written to by a kernel. The minimum value is 8 if the SYCL + device has aspect::image. """ return DPCTLDevice_GetMaxWriteImageArgs(self._device_ref) @@ -585,10 +642,7 @@ cdef class SyclDevice(_SyclDevice): @property def max_work_item_dims(self): """ Returns the maximum dimensions that specify the global and local - work-item IDs used by the data parallel execution model. - - The cb value is 3 if this SYCL device is not of device - type ``info::device_type::custom``. + work-item IDs used by the data parallel execution model. """ cdef uint32_t max_work_item_dims = 0 max_work_item_dims = DPCTLDevice_GetMaxWorkItemDims(self._device_ref) @@ -597,9 +651,9 @@ cdef class SyclDevice(_SyclDevice): @property def max_work_item_sizes(self): """ Returns the maximum number of work-items that are permitted in each - dimension of the work-group of the nd_range. The minimum value is - `(1; 1; 1)` for devices that are not of device type - ``info::device_type::custom``. + dimension of the work-group of the nd_range. The minimum value is + `(1; 1; 1)` for devices that are not of device type + ``info::device_type::custom``. """ return ( self._max_work_item_sizes[0], @@ -610,7 +664,7 @@ cdef class SyclDevice(_SyclDevice): @property def max_compute_units(self): """ Returns the number of parallel compute units available to the - device. The minimum value is 1. + device. The minimum value is 1. """ cdef uint32_t max_compute_units = 0 max_compute_units = DPCTLDevice_GetMaxComputeUnits(self._device_ref) @@ -619,9 +673,9 @@ cdef class SyclDevice(_SyclDevice): @property def max_work_group_size(self): """ Returns the maximum number of work-items - that are permitted in a work-group executing a - kernel on a single compute unit. The minimum - value is 1. + that are permitted in a work-group executing a + kernel on a single compute unit. The minimum + value is 1. """ cdef uint32_t max_work_group_size = 0 max_work_group_size = DPCTLDevice_GetMaxWorkGroupSize(self._device_ref) @@ -630,12 +684,12 @@ cdef class SyclDevice(_SyclDevice): @property def max_num_sub_groups(self): """ Returns the maximum number of sub-groups - in a work-group for any kernel executed on the - device. The minimum value is 1. + in a work-group for any kernel executed on the + device. The minimum value is 1. - Returns: - int: The maximum number of sub-groups support per work-group by - the device. + Returns: + int: The maximum number of sub-groups support per work-group by + the device. """ cdef uint32_t max_num_sub_groups = 0 if (not self.is_host): @@ -647,7 +701,7 @@ cdef class SyclDevice(_SyclDevice): @property def sub_group_independent_forward_progress(self): """ Returns true if the device supports independent forward progress of - sub-groups with respect to other sub-groups in the same work-group. + sub-groups with respect to other sub-groups in the same work-group. """ return DPCTLDevice_GetSubGroupIndependentForwardProgress( self._device_ref @@ -659,7 +713,7 @@ cdef class SyclDevice(_SyclDevice): Returns: :class:`dpctl.SyclPlatform`: The platform associated with this - device. + device. """ cdef DPCTLSyclPlatformRef PRef = ( DPCTLDevice_GetPlatform(self._device_ref) @@ -672,49 +726,49 @@ cdef class SyclDevice(_SyclDevice): @property def preferred_vector_width_char(self): """ Returns the preferred native vector width size for built-in scalar - types that can be put into vectors. + types that can be put into vectors. """ return DPCTLDevice_GetPreferredVectorWidthChar(self._device_ref) @property def preferred_vector_width_short(self): """ Returns the preferred native vector width size for built-in scalar - types that can be put into vectors. + types that can be put into vectors. """ return DPCTLDevice_GetPreferredVectorWidthShort(self._device_ref) @property def preferred_vector_width_int(self): """ Returns the preferred native vector width size for built-in scalar - types that can be put into vectors. + types that can be put into vectors. """ return DPCTLDevice_GetPreferredVectorWidthInt(self._device_ref) @property def preferred_vector_width_long(self): """ Returns the preferred native vector width size for built-in scalar - types that can be put into vectors. + types that can be put into vectors. """ return DPCTLDevice_GetPreferredVectorWidthLong(self._device_ref) @property def preferred_vector_width_float(self): """ Returns the preferred native vector width size for built-in scalar - types that can be put into vectors. + types that can be put into vectors. """ return DPCTLDevice_GetPreferredVectorWidthFloat(self._device_ref) @property def preferred_vector_width_double(self): """ Returns the preferred native vector width size for built-in scalar - types that can be put into vectors. + types that can be put into vectors. """ return DPCTLDevice_GetPreferredVectorWidthDouble(self._device_ref) @property def preferred_vector_width_half(self): """ Returns the preferred native vector width size for built-in scalar - types that can be put into vectors. + types that can be put into vectors. """ return DPCTLDevice_GetPreferredVectorWidthHalf(self._device_ref) @@ -763,6 +817,7 @@ cdef class SyclDevice(_SyclDevice): @property def __name__(self): + "Name of the class `dpctl.SyclDevice`" return "SyclDevice" def __repr__(self): @@ -788,13 +843,13 @@ cdef class SyclDevice(_SyclDevice): cdef list create_sub_devices_equally(self, size_t count): """ Returns a list of sub-devices partitioned from this SYCL device - based on the ``count`` parameter. + based on the ``count`` parameter. - The returned list contains as many sub-devices as can be created - such that each sub-device contains count compute units. If the - device’s total number of compute units is not evenly divided by - count, then the remaining compute units are not included in any of - the sub-devices. + The returned list contains as many sub-devices as can be created + such that each sub-device contains count compute units. If the + device’s total number of compute units is not evenly divided by + count, then the remaining compute units are not included in any of + the sub-devices. """ cdef DPCTLDeviceVectorRef DVRef = NULL if count > 0: @@ -809,10 +864,10 @@ cdef class SyclDevice(_SyclDevice): cdef list create_sub_devices_by_counts(self, object counts): """ Returns a list of sub-devices partitioned from this SYCL device - based on the ``counts`` parameter. + based on the ``counts`` parameter. - For each non-zero value ``M`` in the counts vector, a sub-device - with ``M`` compute units is created. + For each non-zero value ``M`` in the counts vector, a sub-device + with ``M`` compute units is created. """ cdef int ncounts = len(counts) cdef size_t *counts_buff = NULL @@ -850,7 +905,7 @@ cdef class SyclDevice(_SyclDevice): self, _partition_affinity_domain_type domain ): """ Returns a list of sub-devices partitioned from this SYCL device by - affinity domain based on the ``domain`` parameter. + affinity domain based on the ``domain`` parameter. """ cdef DPCTLDeviceVectorRef DVRef = NULL DVRef = DPCTLDevice_CreateSubDevicesByAffinity(self._device_ref, domain) @@ -1007,6 +1062,7 @@ cdef class SyclDevice(_SyclDevice): return DPCTLDevice_AreEq(self._device_ref, other.get_device_ref()) def __eq__(self, other): + "Returns True if two devices are the same" if isinstance(other, SyclDevice): return self.equals( other) else: @@ -1014,8 +1070,7 @@ cdef class SyclDevice(_SyclDevice): @property def filter_string(self): - """ - For a parent device, returns a fully specified filter selector + """ For a parent device, returns a fully specified filter selector string``backend:device_type:relative_id`` selecting the device. Returns: @@ -1056,8 +1111,7 @@ cdef class SyclDevice(_SyclDevice): raise TypeError("This SyclDevice is not a root device") cdef int get_backend_and_device_type_ordinal(self): - """ - If this device is a root ``sycl::device``, returns the ordinal + """ If this device is a root ``sycl::device``, returns the ordinal position of this device in the vector ``sycl::device::get_devices(device_type_of_this_device)`` filtered to contain only devices with the same backend as this @@ -1070,8 +1124,7 @@ cdef class SyclDevice(_SyclDevice): return relId cdef int get_device_type_ordinal(self): - """ - If this device is a root ``sycl::device``, returns the ordinal + """ If this device is a root ``sycl::device``, returns the ordinal position of this device in the vector ``sycl::device::get_devices(device_type_of_this_device)`` @@ -1086,8 +1139,7 @@ cdef class SyclDevice(_SyclDevice): return relId cdef int get_backend_ordinal(self): - """ - If this device is a root ``sycl::device``, returns the ordinal + """ If this device is a root ``sycl::device``, returns the ordinal position of this device in the vector ``sycl::device::get_devices()`` filtered to contain only devices with the same backend as this device. @@ -1103,8 +1155,7 @@ cdef class SyclDevice(_SyclDevice): return relId cdef int get_overall_ordinal(self): - """ - If this device is a root ``sycl::device``, returns the ordinal + """ If this device is a root ``sycl::device``, returns the ordinal position of this device in the vector ``sycl::device::get_devices()`` filtered to contain only devices with the same backend as this device. @@ -1121,8 +1172,7 @@ cdef class SyclDevice(_SyclDevice): return relId def get_filter_string(self, include_backend=True, include_device_type=True): - """ - get_filter_string(include_backend=True, include_device_type=True) + """ get_filter_string(include_backend=True, include_device_type=True) For a parent device, returns a filter selector string that includes backend or device type based on the value diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index ccd5a1dec0..f6b433db60 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -682,10 +682,12 @@ cdef class SyclQueue(_SyclQueue): @property def sycl_context(self): + "Returns :class:`.SyclContext` underlying this queue" return self._context @property def sycl_device(self): + "Returns :class:`.SyclDevice` underlying this queue" return self._device cpdef SyclContext get_sycl_context(self): @@ -926,6 +928,7 @@ cdef class SyclQueue(_SyclQueue): @property def __name__(self): + "The name of :class:`dpctl.SyclQueue` object" return "SyclQueue" def __repr__(self): diff --git a/dpctl/_sycl_timer.py b/dpctl/_sycl_timer.py index 05098bd837..c685d7ede5 100644 --- a/dpctl/_sycl_timer.py +++ b/dpctl/_sycl_timer.py @@ -93,6 +93,10 @@ def __exit__(self, *args): @property def dt(self): + """Returns a tuple of elapsed times where first + element is the duration as measured by the host timer, + while the second element is the duration as measured by + the device timer and encoded in profiling events""" self.event_start.wait() self.event_finish.wait() return ( diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index efe3d8d816..5ad7dfe6fa 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -262,6 +262,7 @@ cdef class usm_ndarray: @property def _pointer(self): + "Returns USM pointer for data allocation encoded as integer" return self.get_data() cdef Py_ssize_t get_offset(self) except *: @@ -577,6 +578,9 @@ cdef class usm_ndarray: @property def T(self): + """ Returns tranposed array for 2D array, raises `ValueError` + otherwise. + """ if self.nd_ == 2: return _transpose(self) else: @@ -588,6 +592,8 @@ cdef class usm_ndarray: @property def mT(self): + """ Returns array where the last two dimensions are transposed. + """ if self.nd_ < 2: raise ValueError( "array.mT requires array to have at least 2-dimensons." @@ -596,6 +602,9 @@ cdef class usm_ndarray: @property def real(self): + """ Returns real component for arrays with complex data-types + and returns itself for all other data-types. + """ if (self.typenum_ < UAR_CFLOAT): # elements are real return self @@ -604,6 +613,9 @@ cdef class usm_ndarray: @property def imag(self): + """ Returns imaginary component for arrays with complex data-types + and returns zero array for all other data-types. + """ if (self.typenum_ < UAR_CFLOAT): # elements are real return _zero_like(self) From e2b69e65c2f995bebbc09c6fc5b1b3aad9dfacd9 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 25 Jul 2022 16:35:36 -0500 Subject: [PATCH 30/95] expanded changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fc2557adc..7ab890f77c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Added `dpctl.SyclDevice.platform` and `dpctl.SyclPlatform.default_context` properties [#827](https://github.com/IntelPython/dpctl/pull/827). * Provided pybind11 example for functions working on `dpctl.tensor.usm_ndarray` container applying oneMKL functions [#780](https://github.com/IntelPython/dpctl/pull/780), [#793](https://github.com/IntelPython/dpctl/pull/793), [#819](https://github.com/IntelPython/dpctl/pull/819). The example was expanded to demonstrate implementing iterative linear solvers (Chebyshev solver, and Conjugate-Gradient solver) by asynchronously submitting individual SYCL kernels from Python [#821](https://github.com/IntelPython/dpctl/pull/821), [#833](https://github.com/IntelPython/dpctl/pull/833), [#838](https://github.com/IntelPython/dpctl/pull/838). * Wrote manual page about working with `dpctl.SyclQueue` [#829](https://github.com/IntelPython/dpctl/pull/829). +* Added cmake scripts to dpctl package layout and a way to query the location [#853](https://github.com/IntelPython/dpctl/pull/853). + ### Changed From 939aff3ce1f81496a1125ace1b68fa6ef2013ea2 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 25 Jul 2022 16:19:48 -0500 Subject: [PATCH 31/95] Added license text to IntelDPCPPConfig.cmake to align it with what's is oneAPI DPC++ --- cmake/IntelDPCPPConfig.cmake | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cmake/IntelDPCPPConfig.cmake b/cmake/IntelDPCPPConfig.cmake index 74714a4191..59d4b15ad8 100644 --- a/cmake/IntelDPCPPConfig.cmake +++ b/cmake/IntelDPCPPConfig.cmake @@ -1,3 +1,16 @@ +# +# Modifications, Copyright (C) 2021 Intel Corporation +# +# This software and the related documents are Intel copyrighted materials, and +# your use of them is governed by the express license under which they were +# provided to you ("License"). Unless the License provides otherwise, you may not +# use, modify, copy, publish, distribute, disclose or transmit this software or +# the related documents without Intel's prior written permission. +# +# This software and the related documents are provided as is, with no express +# or implied warranties, other than those that are expressly stated in the +# License. +# # Distributed under the OSI-approved BSD 3-Clause License. See accompanying # file Copyright.txt or https://cmake.org/licensing for details. From f847cb4670ca32f35762b3961599459b4295a829 Mon Sep 17 00:00:00 2001 From: Natalia Polina Date: Tue, 26 Jul 2022 13:04:40 -0700 Subject: [PATCH 32/95] Adding dpctl.tensor.concat feature and tests. concat() function concatenates several arrays along one of axis. https://data-apis.org/array-api/latest/API_specification/generated/signatures.manipulation_functions.concat.html --- dpctl/tensor/__init__.py | 2 + dpctl/tensor/_manipulation_functions.py | 86 +++++++++- dpctl/tests/test_usm_ndarray_manipulation.py | 160 +++++++++++++++++++ 3 files changed, 247 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 1026774947..de5532c5ed 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -39,6 +39,7 @@ from dpctl.tensor._manipulation_functions import ( broadcast_arrays, broadcast_to, + concat, expand_dims, flip, permute_dims, @@ -66,6 +67,7 @@ "flip", "reshape", "roll", + "concat", "broadcast_arrays", "broadcast_to", "expand_dims", diff --git a/dpctl/tensor/_manipulation_functions.py b/dpctl/tensor/_manipulation_functions.py index 5beefab5ec..fa1409ca17 100644 --- a/dpctl/tensor/_manipulation_functions.py +++ b/dpctl/tensor/_manipulation_functions.py @@ -18,11 +18,12 @@ from itertools import chain, product, repeat import numpy as np -from numpy.core.numeric import normalize_axis_tuple +from numpy.core.numeric import normalize_axis_index, normalize_axis_tuple import dpctl import dpctl.tensor as dpt import dpctl.tensor._tensor_impl as ti +import dpctl.utils as dputils def _broadcast_strides(X_shape, X_strides, res_ndim): @@ -285,3 +286,86 @@ def roll(X, shift, axes=None): dpctl.SyclEvent.wait_for(hev_list) return res + + +def concat(arrays, axis=0): + """ + concat(arrays: tuple or list of usm_ndarrays, axis: int) -> usm_ndarray + + Joins a sequence of arrays along an existing axis. + """ + n = len(arrays) + if n == 0: + raise TypeError("Missing 1 required positional argument: 'arrays'") + + if not isinstance(arrays, list) and not isinstance(arrays, tuple): + raise TypeError(f"Expected tuple or list type, got {type(arrays)}.") + + for X in arrays: + if not isinstance(X, dpt.usm_ndarray): + raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") + + exec_q = dputils.get_execution_queue([X.sycl_queue for X in arrays]) + if exec_q is None: + raise ValueError("All the input arrays must have same sycl queue") + + res_usm_type = dputils.get_coerced_usm_type([X.usm_type for X in arrays]) + if res_usm_type is None: + raise ValueError("All the input arrays must have usm_type") + + X0 = arrays[0] + if any(X0.dtype != arrays[i].dtype for i in range(1, n)): + raise ValueError("All the input arrays must have same dtype") + + for i in range(1, n): + if X0.ndim != arrays[i].ndim: + raise ValueError( + "All the input arrays must have same number of " + "dimensions, but the array at index 0 has " + f"{X0.ndim} dimension(s) and the array at index " + f"{i} has {arrays[i].ndim} dimension(s)" + ) + + axis = normalize_axis_index(axis, X0.ndim) + X0_shape = X0.shape + for i in range(1, n): + Xi_shape = arrays[i].shape + for j in range(X0.ndim): + if X0_shape[j] != Xi_shape[j] and j != axis: + raise ValueError( + "All the input array dimensions for the " + "concatenation axis must match exactly, but " + f"along dimension {j}, the array at index 0 " + f"has size {X0_shape[j]} and the array at " + f"index {i} has size {Xi_shape[j]}" + ) + + res_shape_axis = 0 + for X in arrays: + res_shape_axis = res_shape_axis + X.shape[axis] + + res_shape = tuple( + X0_shape[i] if i != axis else res_shape_axis for i in range(X0.ndim) + ) + + res = dpt.empty( + res_shape, dtype=X0.dtype, usm_type=res_usm_type, sycl_queue=exec_q + ) + + hev_list = [] + fill_start = 0 + for i in range(n): + fill_end = fill_start + arrays[i].shape[axis] + c_shapes_copy = tuple( + np.s_[fill_start:fill_end] if j == axis else np.s_[:] + for j in range(X0.ndim) + ) + hev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=arrays[i], dst=res[c_shapes_copy], sycl_queue=exec_q + ) + fill_start = fill_end + hev_list.append(hev) + + dpctl.SyclEvent.wait_for(hev_list) + + return res diff --git a/dpctl/tests/test_usm_ndarray_manipulation.py b/dpctl/tests/test_usm_ndarray_manipulation.py index 1b3716846b..1eb34d9e7d 100644 --- a/dpctl/tests/test_usm_ndarray_manipulation.py +++ b/dpctl/tests/test_usm_ndarray_manipulation.py @@ -725,3 +725,163 @@ def test_roll_2d(data): Y = dpt.roll(X, sh, ax) Ynp = np.roll(Xnp, sh, ax) assert_array_equal(Ynp, dpt.asnumpy(Y)) + + +def test_concat_incorrect_type(): + Xnp = np.ones((2, 2)) + pytest.raises(TypeError, dpt.concat) + pytest.raises(TypeError, dpt.concat, []) + pytest.raises(TypeError, dpt.concat, Xnp) + pytest.raises(TypeError, dpt.concat, [Xnp, Xnp]) + + +def test_concat_incorrect_queue(): + try: + q1 = dpctl.SyclQueue() + q2 = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + X = dpt.ones((2, 2), sycl_queue=q1) + Y = dpt.ones((2, 2), sycl_queue=q2) + + pytest.raises(ValueError, dpt.concat, [X, Y]) + + +def test_concat_incorrect_dtype(): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + X = dpt.ones((2, 2), dtype=np.int64, sycl_queue=q) + Y = dpt.ones((2, 2), dtype=np.uint64, sycl_queue=q) + + pytest.raises(ValueError, dpt.concat, [X, Y]) + + +def test_concat_incorrect_ndim(): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + X = dpt.ones((2, 2), sycl_queue=q) + Y = dpt.ones((2, 2, 2), sycl_queue=q) + + pytest.raises(ValueError, dpt.concat, [X, Y]) + + +@pytest.mark.parametrize( + "data", + [ + [(2, 2), (3, 3), 0], + [(2, 2), (3, 3), 1], + [(3, 2), (3, 3), 0], + [(2, 3), (3, 3), 1], + ], +) +def test_concat_incorrect_shape(data): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + Xshape, Yshape, axis = data + + X = dpt.ones(Xshape, sycl_queue=q) + Y = dpt.ones(Yshape, sycl_queue=q) + + pytest.raises(ValueError, dpt.concat, [X, Y], axis) + + +@pytest.mark.parametrize( + "data", + [ + [(6,), 0], + [(2, 3), 1], + [(3, 2), -1], + [(1, 6), 0], + [(2, 1, 3), 2], + ], +) +def test_concat_1array(data): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + Xshape, axis = data + + Xnp = np.arange(6).reshape(Xshape) + X = dpt.asarray(Xnp, sycl_queue=q) + + Ynp = np.concatenate([Xnp], axis=axis) + Y = dpt.concat([X], axis=axis) + + assert_array_equal(Ynp, dpt.asnumpy(Y)) + + Ynp = np.concatenate((Xnp,), axis=axis) + Y = dpt.concat((X,), axis=axis) + + assert_array_equal(Ynp, dpt.asnumpy(Y)) + + +@pytest.mark.parametrize( + "data", + [ + [(1,), (1,), 0], + [(0, 2), (2, 2), 0], + [(2, 1), (2, 2), -1], + [(2, 2, 2), (2, 1, 2), 1], + ], +) +def test_concat_2arrays(data): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + Xshape, Yshape, axis = data + + Xnp = np.ones(Xshape) + X = dpt.asarray(Xnp, sycl_queue=q) + + Ynp = np.zeros(Yshape) + Y = dpt.asarray(Ynp, sycl_queue=q) + + Znp = np.concatenate([Xnp, Ynp], axis=axis) + Z = dpt.concat([X, Y], axis=axis) + + assert_array_equal(Znp, dpt.asnumpy(Z)) + + +@pytest.mark.parametrize( + "data", + [ + [(1,), (1,), (1,), 0], + [(0, 2), (2, 2), (1, 2), 0], + [(2, 1, 2), (2, 2, 2), (2, 4, 2), 1], + ], +) +def test_concat_3arrays(data): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + Xshape, Yshape, Zshape, axis = data + + Xnp = np.ones(Xshape) + X = dpt.asarray(Xnp, sycl_queue=q) + + Ynp = np.zeros(Yshape) + Y = dpt.asarray(Ynp, sycl_queue=q) + + Znp = np.full(Zshape, 2.0) + Z = dpt.asarray(Znp, sycl_queue=q) + + Rnp = np.concatenate([Xnp, Ynp, Znp], axis=axis) + R = dpt.concat([X, Y, Z], axis=axis) + + assert_array_equal(Rnp, dpt.asnumpy(R)) From 6a084108c558d17a52f9f113c0dcf291c7cae227 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 27 Jul 2022 10:25:48 -0500 Subject: [PATCH 33/95] Use numpy.promote_types to allow concatenation of different dtypes --- dpctl/tensor/_manipulation_functions.py | 12 ++++++++---- dpctl/tests/test_usm_ndarray_manipulation.py | 10 +++++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/dpctl/tensor/_manipulation_functions.py b/dpctl/tensor/_manipulation_functions.py index fa1409ca17..2e36b26dc1 100644 --- a/dpctl/tensor/_manipulation_functions.py +++ b/dpctl/tensor/_manipulation_functions.py @@ -298,7 +298,7 @@ def concat(arrays, axis=0): if n == 0: raise TypeError("Missing 1 required positional argument: 'arrays'") - if not isinstance(arrays, list) and not isinstance(arrays, tuple): + if not isinstance(arrays, (list, tuple)): raise TypeError(f"Expected tuple or list type, got {type(arrays)}.") for X in arrays: @@ -314,8 +314,12 @@ def concat(arrays, axis=0): raise ValueError("All the input arrays must have usm_type") X0 = arrays[0] - if any(X0.dtype != arrays[i].dtype for i in range(1, n)): - raise ValueError("All the input arrays must have same dtype") + if not all(Xi.dtype.char in "?bBhHiIlLqQefdFD" for Xi in arrays): + raise ValueError("Unsupported dtype encountered.") + + res_dtype = X0.dtype + for i in range(1, n): + res_dtype = np.promote_types(res_dtype, arrays[i]) for i in range(1, n): if X0.ndim != arrays[i].ndim: @@ -349,7 +353,7 @@ def concat(arrays, axis=0): ) res = dpt.empty( - res_shape, dtype=X0.dtype, usm_type=res_usm_type, sycl_queue=exec_q + res_shape, dtype=res_dtype, usm_type=res_usm_type, sycl_queue=exec_q ) hev_list = [] diff --git a/dpctl/tests/test_usm_ndarray_manipulation.py b/dpctl/tests/test_usm_ndarray_manipulation.py index 1eb34d9e7d..9e99372639 100644 --- a/dpctl/tests/test_usm_ndarray_manipulation.py +++ b/dpctl/tests/test_usm_ndarray_manipulation.py @@ -748,16 +748,20 @@ def test_concat_incorrect_queue(): pytest.raises(ValueError, dpt.concat, [X, Y]) -def test_concat_incorrect_dtype(): +def test_concat_different_dtype(): try: q = dpctl.SyclQueue() except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") X = dpt.ones((2, 2), dtype=np.int64, sycl_queue=q) - Y = dpt.ones((2, 2), dtype=np.uint64, sycl_queue=q) + Y = dpt.ones((3, 2), dtype=np.uint32, sycl_queue=q) - pytest.raises(ValueError, dpt.concat, [X, Y]) + XY = dpt.concat([X, Y]) + + assert XY.dtype is X.dtype + assert XY.shape == (5, 2) + assert XY.sycl_queue == q def test_concat_incorrect_ndim(): From 7d1a0124e11334700a061f5e3ec3d2ab5d54b07a Mon Sep 17 00:00:00 2001 From: Natalia Polina Date: Wed, 27 Jul 2022 11:36:00 -0500 Subject: [PATCH 34/95] expanded changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ab890f77c..fc4cf6dc32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Provided pybind11 example for functions working on `dpctl.tensor.usm_ndarray` container applying oneMKL functions [#780](https://github.com/IntelPython/dpctl/pull/780), [#793](https://github.com/IntelPython/dpctl/pull/793), [#819](https://github.com/IntelPython/dpctl/pull/819). The example was expanded to demonstrate implementing iterative linear solvers (Chebyshev solver, and Conjugate-Gradient solver) by asynchronously submitting individual SYCL kernels from Python [#821](https://github.com/IntelPython/dpctl/pull/821), [#833](https://github.com/IntelPython/dpctl/pull/833), [#838](https://github.com/IntelPython/dpctl/pull/838). * Wrote manual page about working with `dpctl.SyclQueue` [#829](https://github.com/IntelPython/dpctl/pull/829). * Added cmake scripts to dpctl package layout and a way to query the location [#853](https://github.com/IntelPython/dpctl/pull/853). +* Implemented `dpctl.tensor.concat` function from array-API [#867](https://github.com/IntelPython/dpctl/867). ### Changed From e321aa56f803d8c9e31ae3c069f2be895bcab151 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 26 Jul 2022 10:51:14 -0500 Subject: [PATCH 35/95] Fixed filter-selector-string output by lspltform and print_device_info For example, now: ``` Platform 3 :: Name Intel(R) Level-Zero Version 1.3 Vendor Intel(R) Corporation Backend ext_oneapi_level_zero Num Devices 1 # 0 Name Intel(R) UHD Graphics [0x9bca] Version 1.3.23750 Filter string level_zero:gpu:0 ``` While previously the filter string was "ext_oneapi_level_zero:gpu:0". Similar change for print_device_info: ``` $ SYCL_FILTER_SELECTOR=level python -c "import dpctl; dpctl.SyclDevice().print_device_info()" Name Intel(R) UHD Graphics [0x9bca] Driver version 1.3.23750 Vendor Intel(R) Corporation Profile FULL_PROFILE Filter string level_zero:gpu:0 ``` Previously the filter string used to be "ext_oneapi_level_zero:gpu:0". --- .../helper/include/dpctl_utils_helper.h | 12 +++++++ .../helper/source/dpctl_utils_helper.cpp | 32 +++++++++++++++++++ .../source/dpctl_sycl_device_manager.cpp | 19 ++++++----- .../source/dpctl_sycl_platform_manager.cpp | 29 +++++++---------- 4 files changed, 65 insertions(+), 27 deletions(-) diff --git a/libsyclinterface/helper/include/dpctl_utils_helper.h b/libsyclinterface/helper/include/dpctl_utils_helper.h index 57a308f5f0..bf458dbb2b 100644 --- a/libsyclinterface/helper/include/dpctl_utils_helper.h +++ b/libsyclinterface/helper/include/dpctl_utils_helper.h @@ -197,6 +197,18 @@ DPCTLPartitionAffinityDomainType DPCTL_SyclPartitionAffinityDomainToDPCTLType( DPCTL_API int64_t DPCTL_GetRelativeDeviceId(const sycl::device &Device); +/*! + * @brief Gives the filter string which would select given root device if + * used as argument to ``sycl::ext::oneapi::filter_selector``. Throws exception + * if filter string can not be constructed. + * + * @param Device A ``sycl::device`` object whose filter selector + * needs to be computed. + * @return Filter selector for the device. + */ +DPCTL_API +std::string DPCTL_GetDeviceFilterString(const sycl::device &Device); + /*! * @brief Converts a ``sycl::info::event_command_status`` enum value to * corresponding DPCTLSyclEventStatusType enum value. diff --git a/libsyclinterface/helper/source/dpctl_utils_helper.cpp b/libsyclinterface/helper/source/dpctl_utils_helper.cpp index 7a10f5f894..905170f61a 100644 --- a/libsyclinterface/helper/source/dpctl_utils_helper.cpp +++ b/libsyclinterface/helper/source/dpctl_utils_helper.cpp @@ -470,6 +470,38 @@ int64_t DPCTL_GetRelativeDeviceId(const device &Device) return relid; } +std::string DPCTL_GetDeviceFilterString(const device &Device) +{ + std::stringstream ss; + static constexpr const char *filter_string_separator = ":"; + + auto be = Device.get_platform().get_backend(); + + switch (be) { + case backend::ext_oneapi_level_zero: + ss << "level_zero"; + break; + case backend::ext_oneapi_cuda: + ss << "cuda"; + break; + case backend::opencl: + ss << "opencl"; + break; + case backend::host: + ss << "host"; + break; + default: + ss << "unknown"; + }; + + ss << filter_string_separator; + ss << DPCTL_DeviceTypeToStr(Device.get_info()); + ss << filter_string_separator; + ss << DPCTL_GetRelativeDeviceId(Device); + + return ss.str(); +} + DPCTLSyclEventStatusType DPCTL_SyclEventStatusToDPCTLEventStatusType(info::event_command_status E) { diff --git a/libsyclinterface/source/dpctl_sycl_device_manager.cpp b/libsyclinterface/source/dpctl_sycl_device_manager.cpp index 7a81b828b2..40bf50ebad 100644 --- a/libsyclinterface/source/dpctl_sycl_device_manager.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_manager.cpp @@ -50,19 +50,18 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(context, DPCTLSyclContextRef) std::string get_device_info_str(const device &Device) { std::stringstream ss; + static constexpr const char *_endl = "\n"; ss << std::setw(4) << " " << std::left << std::setw(16) << "Name" - << Device.get_info() << '\n' - << std::setw(4) << " " << std::left << std::setw(16) << "Driver version" - << Device.get_info() << '\n' + << Device.get_info() << _endl << std::setw(4) << " " + << std::left << std::setw(16) << "Driver version" + << Device.get_info() << _endl << std::setw(4) << " " << std::left << std::setw(16) << "Vendor" - << Device.get_info() << '\n' - << std::setw(4) << " " << std::left << std::setw(16) << "Profile" - << Device.get_info() << '\n' - << std::setw(4) << " " << std::left << std::setw(16) << "Filter string" - << Device.get_platform().get_backend() << ":" - << DPCTL_DeviceTypeToStr(Device.get_info()) - << ":" << DPCTL_GetRelativeDeviceId(Device) << '\n'; + << Device.get_info() << _endl << std::setw(4) + << " " << std::left << std::setw(16) << "Profile" + << Device.get_info() << _endl << std::setw(4) + << " " << std::left << std::setw(16) << "Filter string" + << DPCTL_GetDeviceFilterString(Device) << _endl; return ss.str(); } diff --git a/libsyclinterface/source/dpctl_sycl_platform_manager.cpp b/libsyclinterface/source/dpctl_sycl_platform_manager.cpp index b1778259ed..ba78fcfa97 100644 --- a/libsyclinterface/source/dpctl_sycl_platform_manager.cpp +++ b/libsyclinterface/source/dpctl_sycl_platform_manager.cpp @@ -45,6 +45,7 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(platform, DPCTLSyclPlatformRef); std::string platform_print_info_impl(const platform &p, size_t verbosity) { std::stringstream ss; + static constexpr const char *_endl = "\n"; if (verbosity > 2) { error_handler("Illegal verbosity level. Accepted values are 0, 1, or 2." @@ -55,7 +56,7 @@ std::string platform_print_info_impl(const platform &p, size_t verbosity) if (verbosity == 0) ss << p.get_info() << " " - << p.get_info() << '\n'; + << p.get_info() << _endl; if (verbosity > 0) { auto vendor = p.get_info(); @@ -63,37 +64,31 @@ std::string platform_print_info_impl(const platform &p, size_t verbosity) vendor = "unknown"; ss << std::setw(4) << " " << std::left << std::setw(12) << "Name" - << p.get_info() << '\n' - << std::setw(4) << " " << std::left << std::setw(12) << "Version" - << p.get_info() << '\n' - << std::setw(4) << " " << std::left << std::setw(12) << "Vendor" - << vendor << '\n' + << p.get_info() << _endl << std::setw(4) << " " + << std::left << std::setw(12) << "Version" + << p.get_info() << _endl << std::setw(4) + << " " << std::left << std::setw(12) << "Vendor" << vendor << _endl << std::setw(4) << " " << std::left << std::setw(12) << "Backend"; p.is_host() ? (ss << "unknown") : (ss << p.get_backend()); - ss << '\n'; + ss << _endl; // Get number of devices on the platform auto devices = p.get_devices(); ss << std::setw(4) << " " << std::left << std::setw(12) << "Num Devices" - << devices.size() << '\n'; + << devices.size() << _endl; if (verbosity == 2) // Print some of the device information for (auto dn = 0ul; dn < devices.size(); ++dn) { - ss << std::setw(6) << " " << std::left << "# " << dn << '\n' + ss << std::setw(6) << " " << std::left << "# " << dn << _endl << std::setw(8) << " " << std::left << std::setw(20) << "Name" << devices[dn].get_info() - << '\n' - << std::setw(8) << " " << std::left << std::setw(20) + << _endl << std::setw(8) << " " << std::left << std::setw(20) << "Version" << devices[dn].get_info() - << '\n' - << std::setw(8) << " " << std::left << std::setw(20) + << _endl << std::setw(8) << " " << std::left << std::setw(20) << "Filter string" - << devices[dn].get_platform().get_backend() << ":" - << DPCTL_DeviceTypeToStr( - devices[dn].get_info()) - << ":" << DPCTL_GetRelativeDeviceId(devices[dn]) << '\n'; + << DPCTL_GetDeviceFilterString(devices[dn]) << _endl; } } From b49a85472193afce5d2e61c4b3666e6d1d4de3ab Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 28 Jul 2022 14:49:35 -0500 Subject: [PATCH 36/95] Improved exceptions msg --- dpctl/tensor/_usmarray.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 5ad7dfe6fa..329794ebd6 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -234,10 +234,10 @@ cdef class usm_ndarray: else: self._cleanup() raise ValueError( - "buffer='{}' is not understood. " + ("buffer='{}' is not understood. " "Recognized values are 'device', 'shared', 'host', " "an instance of `MemoryUSM*` object, or a usm_ndarray" - "".format(buffer)) + "").format(buffer)) elif isinstance(buffer, usm_ndarray): _buffer = buffer.usm_data else: @@ -246,8 +246,8 @@ cdef class usm_ndarray: if (_offset + ary_min_displacement < 0 or (_offset + ary_max_displacement + 1) * itemsize > _buffer.nbytes): self._cleanup() - raise ValueError("buffer='{}' can not accomodate the requested " - "array.".format(buffer)) + raise ValueError(("buffer='{}' can not accomodate " + "the requested array.").format(buffer)) self.base_ = _buffer self.data_ = ( ( _buffer._pointer)) + itemsize * _offset self.shape_ = shape_ptr From 940edd9c9998b2ba53e36756ed62b4ea2c0cf350 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 28 Jul 2022 14:49:55 -0500 Subject: [PATCH 37/95] Closes gh-870 Offset should not be incremented if the array is found empty. ``` import dpctl.tensor as dpt X = dpt.empty((0,4), dtype='u1') X[:, 1] # no longer raises ValueError X[:, 1:3] # no longer raises ValueError ``` --- dpctl/tensor/_slicing.pxi | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dpctl/tensor/_slicing.pxi b/dpctl/tensor/_slicing.pxi index 9fd62e4944..b94fd60b8f 100755 --- a/dpctl/tensor/_slicing.pxi +++ b/dpctl/tensor/_slicing.pxi @@ -110,6 +110,7 @@ cdef object _basic_slice_meta(object ind, tuple shape, new_strides = list() k = 0 new_offset = offset + is_empty = False for i in range(len(ind)): ind_i = ind[i] if (ind_i is Ellipsis): @@ -127,23 +128,27 @@ cdef object _basic_slice_meta(object ind, tuple shape, str_i = (1 if sh_i == 0 else sl_step) * strides[k] new_shape.append(sh_i) new_strides.append(str_i) - if sh_i > 0: + if sh_i > 0 and not is_empty: new_offset = new_offset + sl_start * strides[k] + if sh_i == 0: + is_empty = True k = k_new elif is_integral(ind_i): ind_i = ind_i.__index__() if 0 <= ind_i < shape[k]: k_new = k + 1 - new_offset = new_offset + ind_i * strides[k] + if not is_empty: + new_offset = new_offset + ind_i * strides[k] k = k_new elif -shape[k] <= ind_i < 0: k_new = k + 1 - new_offset = new_offset + (shape[k] + ind_i) * strides[k] + if not is_empty: + new_offset = new_offset + (shape[k] + ind_i) * strides[k] k = k_new else: raise IndexError( - "Index {0} is out of range for " - "axes {1} with size {2}".format(ind_i, k, shape[k])) + ("Index {0} is out of range for " + "axes {1} with size {2}").format(ind_i, k, shape[k])) new_shape.extend(shape[k:]) new_strides.extend(strides[k:]) return (tuple(new_shape), tuple(new_strides), new_offset) From ca3426329df91b70035d96a14b2031bd9692d481 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 28 Jul 2022 14:52:17 -0500 Subject: [PATCH 38/95] Adds tests for gh-870 --- dpctl/tests/test_usm_ndarray_ctor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 28180c4714..fffcf711d1 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -219,6 +219,9 @@ def test_empty_slice(): assert Y.shape == X.shape Z = X[::2] assert Z.shape == X.shape + X = dpt.empty((0, 4), dtype="u1") + assert X[:, 1].shape == (0,) + assert X[:, 1:3].shape == (0, 2) def test_slice_constructor_1d(): From b93353ed97eeb281424756b3f2da668db235dfdf Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 28 Jul 2022 15:12:45 -0500 Subject: [PATCH 39/95] Noted the change in CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc4cf6dc32..b7fec72cad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Fixed `dpctl.lsplatform()` to work correctly when used from within Jupyter notebook [#800](https://github.com/IntelPython/dpctl/pull/800). * Fixed script to drive debug build [#835](https://github.com/IntelPython/dpctl/pull/835) and fixed code to compile in debug mode [#836](https://github.com/IntelPython/dpctl/pull/836). +* Fixed issue with slicing reported in gh-870 in [#871](https://github.com/IntelPython/dpctl/pull/871). ## [0.12.0] - 03/01/2022 From cb67e6ecd0f38481c4e920acd926478e581e3ef8 Mon Sep 17 00:00:00 2001 From: Natalia Polina Date: Thu, 28 Jul 2022 15:42:45 -0500 Subject: [PATCH 40/95] Adds dpctl.tensor.concat test for gh-870 --- dpctl/tests/test_usm_ndarray_manipulation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dpctl/tests/test_usm_ndarray_manipulation.py b/dpctl/tests/test_usm_ndarray_manipulation.py index 9e99372639..e020ae84e8 100644 --- a/dpctl/tests/test_usm_ndarray_manipulation.py +++ b/dpctl/tests/test_usm_ndarray_manipulation.py @@ -835,6 +835,7 @@ def test_concat_1array(data): "data", [ [(1,), (1,), 0], + [(0, 2), (0, 2), 1], [(0, 2), (2, 2), 0], [(2, 1), (2, 2), -1], [(2, 2, 2), (2, 1, 2), 1], From 0f24836085b2fc3df69dace19333a09b7081b512 Mon Sep 17 00:00:00 2001 From: Natalia Polina Date: Thu, 28 Jul 2022 14:57:40 -0500 Subject: [PATCH 41/95] Added dpctl.tensor.stack feature and tests stack() function joins a sequence of arrays along a new axis and follows array API spec. https://data-apis.org/array-api/latest/API_specification/generated/signatures.manipulation_functions.stack.html#signatures.manipulation_functions.stack --- dpctl/tensor/__init__.py | 2 + dpctl/tensor/_manipulation_functions.py | 78 ++++++++++--- dpctl/tests/test_usm_ndarray_manipulation.py | 110 +++++++++++++++++++ 3 files changed, 175 insertions(+), 15 deletions(-) diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index de5532c5ed..44e77aa6b3 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -45,6 +45,7 @@ permute_dims, roll, squeeze, + stack, ) from dpctl.tensor._reshape import reshape from dpctl.tensor._usmarray import usm_ndarray @@ -68,6 +69,7 @@ "reshape", "roll", "concat", + "stack", "broadcast_arrays", "broadcast_to", "expand_dims", diff --git a/dpctl/tensor/_manipulation_functions.py b/dpctl/tensor/_manipulation_functions.py index 2e36b26dc1..365aa91f5d 100644 --- a/dpctl/tensor/_manipulation_functions.py +++ b/dpctl/tensor/_manipulation_functions.py @@ -288,12 +288,7 @@ def roll(X, shift, axes=None): return res -def concat(arrays, axis=0): - """ - concat(arrays: tuple or list of usm_ndarrays, axis: int) -> usm_ndarray - - Joins a sequence of arrays along an existing axis. - """ +def arrays_validation(arrays): n = len(arrays) if n == 0: raise TypeError("Missing 1 required positional argument: 'arrays'") @@ -324,11 +319,23 @@ def concat(arrays, axis=0): for i in range(1, n): if X0.ndim != arrays[i].ndim: raise ValueError( - "All the input arrays must have same number of " - "dimensions, but the array at index 0 has " - f"{X0.ndim} dimension(s) and the array at index " - f"{i} has {arrays[i].ndim} dimension(s)" + "All the input arrays must have same number of dimensions, " + f"but the array at index 0 has {X0.ndim} dimension(s) and the " + f"array at index {i} has {arrays[i].ndim} dimension(s)" ) + return res_dtype, res_usm_type, exec_q + + +def concat(arrays, axis=0): + """ + concat(arrays: tuple or list of usm_ndarrays, axis: int) -> usm_ndarray + + Joins a sequence of arrays along an existing axis. + """ + res_dtype, res_usm_type, exec_q = arrays_validation(arrays) + + n = len(arrays) + X0 = arrays[0] axis = normalize_axis_index(axis, X0.ndim) X0_shape = X0.shape @@ -337,11 +344,10 @@ def concat(arrays, axis=0): for j in range(X0.ndim): if X0_shape[j] != Xi_shape[j] and j != axis: raise ValueError( - "All the input array dimensions for the " - "concatenation axis must match exactly, but " - f"along dimension {j}, the array at index 0 " - f"has size {X0_shape[j]} and the array at " - f"index {i} has size {Xi_shape[j]}" + "All the input array dimensions for the concatenation " + f"axis must match exactly, but along dimension {j}, the " + f"array at index 0 has size {X0_shape[j]} and the array " + f"at index {i} has size {Xi_shape[j]}" ) res_shape_axis = 0 @@ -373,3 +379,45 @@ def concat(arrays, axis=0): dpctl.SyclEvent.wait_for(hev_list) return res + + +def stack(arrays, axis=0): + """ + stack(arrays: tuple or list of usm_ndarrays, axis: int) -> usm_ndarray + + Joins a sequence of arrays along a new axis. + """ + res_dtype, res_usm_type, exec_q = arrays_validation(arrays) + + n = len(arrays) + X0 = arrays[0] + res_ndim = X0.ndim + 1 + axis = normalize_axis_index(axis, res_ndim) + X0_shape = X0.shape + + for i in range(1, n): + if X0_shape != arrays[i].shape: + raise ValueError("All input arrays must have the same shape") + + res_shape = tuple( + X0_shape[i - 1 * (i >= axis)] if i != axis else n + for i in range(res_ndim) + ) + + res = dpt.empty( + res_shape, dtype=res_dtype, usm_type=res_usm_type, sycl_queue=exec_q + ) + + hev_list = [] + for i in range(n): + c_shapes_copy = tuple( + i if j == axis else np.s_[:] for j in range(res_ndim) + ) + hev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=arrays[i], dst=res[c_shapes_copy], sycl_queue=exec_q + ) + hev_list.append(hev) + + dpctl.SyclEvent.wait_for(hev_list) + + return res diff --git a/dpctl/tests/test_usm_ndarray_manipulation.py b/dpctl/tests/test_usm_ndarray_manipulation.py index e020ae84e8..0dd4ccc9d7 100644 --- a/dpctl/tests/test_usm_ndarray_manipulation.py +++ b/dpctl/tests/test_usm_ndarray_manipulation.py @@ -890,3 +890,113 @@ def test_concat_3arrays(data): R = dpt.concat([X, Y, Z], axis=axis) assert_array_equal(Rnp, dpt.asnumpy(R)) + + +def test_stack_incorrect_shape(): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + X = dpt.ones((1,), sycl_queue=q) + Y = dpt.ones((2,), sycl_queue=q) + + pytest.raises(ValueError, dpt.stack, [X, Y], 0) + + +@pytest.mark.parametrize( + "data", + [ + [(6,), 0], + [(2, 3), 1], + [(3, 2), -1], + [(1, 6), 2], + [(2, 1, 3), 2], + ], +) +def test_stack_1array(data): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + shape, axis = data + + Xnp = np.arange(6).reshape(shape) + X = dpt.asarray(Xnp, sycl_queue=q) + + Ynp = np.stack([Xnp], axis=axis) + Y = dpt.stack([X], axis=axis) + + assert_array_equal(Ynp, dpt.asnumpy(Y)) + + Ynp = np.stack((Xnp,), axis=axis) + Y = dpt.stack((X,), axis=axis) + + assert_array_equal(Ynp, dpt.asnumpy(Y)) + + +@pytest.mark.parametrize( + "data", + [ + [(1,), 0], + [(0, 2), 0], + [(2, 0), 0], + [(2, 3), 0], + [(2, 3), 1], + [(2, 3), 2], + [(2, 3), -1], + [(2, 3), -2], + [(2, 2, 2), 1], + ], +) +def test_stack_2arrays(data): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + shape, axis = data + + Xnp = np.ones(shape) + X = dpt.asarray(Xnp, sycl_queue=q) + + Ynp = np.zeros(shape) + Y = dpt.asarray(Ynp, sycl_queue=q) + + Znp = np.stack([Xnp, Ynp], axis=axis) + print(Znp.shape) + Z = dpt.stack([X, Y], axis=axis) + + assert_array_equal(Znp, dpt.asnumpy(Z)) + + +@pytest.mark.parametrize( + "data", + [ + [(1,), 0], + [(0, 2), 0], + [(2, 1, 2), 1], + ], +) +def test_stack_3arrays(data): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + shape, axis = data + + Xnp = np.ones(shape) + X = dpt.asarray(Xnp, sycl_queue=q) + + Ynp = np.zeros(shape) + Y = dpt.asarray(Ynp, sycl_queue=q) + + Znp = np.full(shape, 2.0) + Z = dpt.asarray(Znp, sycl_queue=q) + + Rnp = np.stack([Xnp, Ynp, Znp], axis=axis) + R = dpt.stack([X, Y, Z], axis=axis) + + assert_array_equal(Rnp, dpt.asnumpy(R)) From 06875de679b3b5a79f872f4afb72e71da402df1a Mon Sep 17 00:00:00 2001 From: Natalia Polina Date: Thu, 28 Jul 2022 15:47:37 -0500 Subject: [PATCH 42/95] Renames an internal function --- dpctl/tensor/_manipulation_functions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/_manipulation_functions.py b/dpctl/tensor/_manipulation_functions.py index 365aa91f5d..90c5cc895c 100644 --- a/dpctl/tensor/_manipulation_functions.py +++ b/dpctl/tensor/_manipulation_functions.py @@ -288,7 +288,7 @@ def roll(X, shift, axes=None): return res -def arrays_validation(arrays): +def _arrays_validation(arrays): n = len(arrays) if n == 0: raise TypeError("Missing 1 required positional argument: 'arrays'") @@ -332,7 +332,7 @@ def concat(arrays, axis=0): Joins a sequence of arrays along an existing axis. """ - res_dtype, res_usm_type, exec_q = arrays_validation(arrays) + res_dtype, res_usm_type, exec_q = _arrays_validation(arrays) n = len(arrays) X0 = arrays[0] @@ -387,7 +387,7 @@ def stack(arrays, axis=0): Joins a sequence of arrays along a new axis. """ - res_dtype, res_usm_type, exec_q = arrays_validation(arrays) + res_dtype, res_usm_type, exec_q = _arrays_validation(arrays) n = len(arrays) X0 = arrays[0] From 642d808a2800d4d49fe0af763f1de14cd9599b60 Mon Sep 17 00:00:00 2001 From: Natalia Polina Date: Thu, 28 Jul 2022 15:49:32 -0500 Subject: [PATCH 43/95] Noted the change in CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7fec72cad..15198b32dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Wrote manual page about working with `dpctl.SyclQueue` [#829](https://github.com/IntelPython/dpctl/pull/829). * Added cmake scripts to dpctl package layout and a way to query the location [#853](https://github.com/IntelPython/dpctl/pull/853). * Implemented `dpctl.tensor.concat` function from array-API [#867](https://github.com/IntelPython/dpctl/867). +* Implemented `dpctl.tensor.stack` function from array-API [#872](https://github.com/IntelPython/dpctl/872). ### Changed From bb7fc11e01efed57cacc80c741d159d7147789a5 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 28 Jul 2022 11:15:52 -0500 Subject: [PATCH 44/95] Updated changelog in preparation for release of 0.13.0 --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15198b32dd..c0eb335d3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [dev] +## [0.13.0] - 07/28/2022 ### Added @@ -51,6 +51,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Fixed `dpctl.lsplatform()` to work correctly when used from within Jupyter notebook [#800](https://github.com/IntelPython/dpctl/pull/800). * Fixed script to drive debug build [#835](https://github.com/IntelPython/dpctl/pull/835) and fixed code to compile in debug mode [#836](https://github.com/IntelPython/dpctl/pull/836). +* Fixed filter selector string produced in outputs of `dpctl.lsplatform(verbosity=2)` and `dpctl.SyclDevice.print_device_info` [#866](https://github.com/IntelPython/dpctl/pull/866). * Fixed issue with slicing reported in gh-870 in [#871](https://github.com/IntelPython/dpctl/pull/871). ## [0.12.0] - 03/01/2022 From 0a8acdd9e5248e5b9236dbfd2bef9321d97b804a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 2 Aug 2022 12:14:43 -0500 Subject: [PATCH 45/95] Added test to check that assigned of different types of the same size work without errors --- dpctl/tests/test_usm_ndarray_ctor.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index fffcf711d1..b84c391f59 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -708,6 +708,23 @@ def test_setitem_different_dtypes(src_dt, dst_dt): assert np.allclose(dpt.asnumpy(Z), np.tile(np.array([1, 0], Z.dtype), 10)) +def test_setitem_wingaps(): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Default queue could not be created") + if np.dtype("intc").itemsize == np.dtype("int32").itemsize: + dpt_dst = dpt.empty(4, dtype="int32", sycl_queue=q) + np_src = np.arange(4, dtype="intc") + dpt_dst[:] = np_src # should not raise exceptions + assert np.array_equal(dpt.asnumpy(dpt_dst), np_src) + if np.dtype("long").itemsize == np.dtype("longlong").itemsize: + dpt_dst = dpt.empty(4, dtype="longlong", sycl_queue=q) + np_src = np.arange(4, dtype="long") + dpt_dst[:] = np_src # should not raise exceptions + assert np.array_equal(dpt.asnumpy(dpt_dst), np_src) + + def test_shape_setter(): def cc_strides(sh): return np.empty(sh, dtype="u1").strides From cd0e13972ec17e4521d9a8f279bb1986d99afe4b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 2 Aug 2022 12:16:01 -0500 Subject: [PATCH 46/95] typenum_to_src_id now recognized shadowed types, i.e. intc on Windows --- .../libtensor/include/utils/type_dispatch.hpp | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp index 785a2157bb..888a225560 100644 --- a/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp +++ b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp @@ -228,9 +228,22 @@ struct usm_ndarray_types else if (typenum == UAR_HALF_) { return static_cast(typenum_t::HALF); } + else if (typenum == UAR_INT || typenum == UAR_UINT) { + switch (sizeof(int)) { + case sizeof(std::int32_t): + return ((typenum == UAR_INT) + ? static_cast(typenum_t::INT32) + : static_cast(typenum_t::UINT32)); + case sizeof(std::int64_t): + return ((typenum == UAR_INT) + ? static_cast(typenum_t::INT64) + : static_cast(typenum_t::UINT64)); + default: + throw_unrecognized_typenum_error(typenum); + } + } else { - throw std::runtime_error("Unrecogized typenum " + - std::to_string(typenum) + " encountered."); + throw_unrecognized_typenum_error(typenum); } } @@ -286,6 +299,12 @@ struct usm_ndarray_types return types; } + + void throw_unrecognized_typenum_error(int typenum) + { + throw std::runtime_error("Unrecogized typenum " + + std::to_string(typenum) + " encountered."); + } }; } // namespace detail From d8ec62fefb17b2ac767316e80d1361ab1340e216 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 2 Aug 2022 16:30:36 -0500 Subject: [PATCH 47/95] Fixed compiler warning --- dpctl/tensor/libtensor/include/utils/type_dispatch.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp index 888a225560..76e9b2c218 100644 --- a/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp +++ b/dpctl/tensor/libtensor/include/utils/type_dispatch.hpp @@ -245,6 +245,9 @@ struct usm_ndarray_types else { throw_unrecognized_typenum_error(typenum); } + // return code signalling error, should never be reached + assert(false); + return -1; } private: From f623f583a7affdee4043d0c4444c92013fd836d6 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 28 Jul 2022 10:51:43 -0500 Subject: [PATCH 48/95] Fixed arange(3.) to not produce float64 array on HW with fp64 support --- dpctl/tensor/_ctors.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 90ca52c6e4..5121a84a70 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -453,14 +453,12 @@ def empty( return res -def _coerce_and_infer_dt(*args, dt): +def _coerce_and_infer_dt(*args, dt, sycl_queue): "Deduce arange type from sequence spec" nd, seq_dt, d = _array_info_sequence(args) if d != _host_set or nd != (len(args),): raise ValueError("start, stop and step must be Python scalars") - if dt is None: - dt = seq_dt - dt = np.dtype(dt) + dt = _get_dtype(dt, sycl_queue, ref_type=seq_dt) if np.issubdtype(dt, np.integer): return tuple(int(v) for v in args), dt elif np.issubdtype(dt, np.floating): @@ -526,11 +524,15 @@ def arange( if stop is None: stop = start start = 0 + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) ( start, stop, step, - ), dt = _coerce_and_infer_dt(start, stop, step, dt=dtype) + ), dt = _coerce_and_infer_dt( + start, stop, step, dt=dtype, sycl_queue=sycl_queue + ) try: tmp = _get_arange_length(start, stop, step) sh = int(tmp) @@ -538,8 +540,6 @@ def arange( sh = 0 except TypeError: sh = 0 - dpctl.utils.validate_usm_type(usm_type, allow_none=False) - sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) res = dpt.usm_ndarray( (sh,), dtype=dt, From b12f492da1e4922b7c37697400a972ee56663517 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 1 Aug 2022 08:17:43 -0500 Subject: [PATCH 49/95] asarray on numpy arrays should take device capabilities into account in decision in data-type of the USM array --- dpctl/tensor/_ctors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 5121a84a70..6db5b90df6 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -197,7 +197,7 @@ def _asarray_from_numpy_ndarray( if usm_type is None: usm_type = "device" if dtype is None: - dtype = ary.dtype + dtype = _get_dtype(dtype, sycl_queue, ref_type=ary.dtype) copy_q = normalize_queue_device(sycl_queue=None, device=sycl_queue) f_contig = ary.flags["F"] c_contig = ary.flags["C"] From eea2a7434b3e1727d6e30b20a3a4fe4eaaa941b2 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 4 Aug 2022 17:21:45 -0500 Subject: [PATCH 50/95] Added array-API dpctl.tensor.linspace Added tests --- dpctl/tensor/__init__.py | 2 + dpctl/tensor/_ctors.py | 122 +++++++++++++++++++++++---- dpctl/tests/test_usm_ndarray_ctor.py | 28 ++++++ 3 files changed, 135 insertions(+), 17 deletions(-) diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 44e77aa6b3..049a4d686b 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -29,6 +29,7 @@ empty_like, full, full_like, + linspace, ones, ones_like, zeros, @@ -61,6 +62,7 @@ "zeros", "ones", "full", + "linspace", "empty_like", "zeros_like", "ones_like", diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 6db5b90df6..b895ad2341 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -14,6 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import operator + import numpy as np import dpctl @@ -196,9 +198,12 @@ def _asarray_from_numpy_ndarray( raise TypeError(f"Expected numpy.ndarray, got {type(ary)}") if usm_type is None: usm_type = "device" - if dtype is None: - dtype = _get_dtype(dtype, sycl_queue, ref_type=ary.dtype) copy_q = normalize_queue_device(sycl_queue=None, device=sycl_queue) + if dtype is None: + ary_dtype = ary.dtype + dtype = _get_dtype(dtype, copy_q, ref_type=ary_dtype) + if dtype.itemsize > ary_dtype.itemsize: + dtype = ary_dtype f_contig = ary.flags["F"] c_contig = ary.flags["C"] fc_contig = f_contig or c_contig @@ -292,7 +297,7 @@ def asarray( for output array allocation and copying. `sycl_queue` and `device` are exclusive keywords, i.e. use one or another. If both are specified, a `TypeError` is raised unless both imply the same - underlying SYCL queue to be used. If both a `None`, the + underlying SYCL queue to be used. If both are `None`, the `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ @@ -430,7 +435,7 @@ def empty( for output array allocation and copying. `sycl_queue` and `device` are exclusive keywords, i.e. use one or another. If both are specified, a `TypeError` is raised unless both imply the same - underlying SYCL queue to be used. If both a `None`, the + underlying SYCL queue to be used. If both are `None`, the `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ @@ -453,11 +458,11 @@ def empty( return res -def _coerce_and_infer_dt(*args, dt, sycl_queue): +def _coerce_and_infer_dt(*args, dt, sycl_queue, err_msg, allow_bool=False): "Deduce arange type from sequence spec" nd, seq_dt, d = _array_info_sequence(args) if d != _host_set or nd != (len(args),): - raise ValueError("start, stop and step must be Python scalars") + raise ValueError(err_msg) dt = _get_dtype(dt, sycl_queue, ref_type=seq_dt) if np.issubdtype(dt, np.integer): return tuple(int(v) for v in args), dt @@ -465,6 +470,8 @@ def _coerce_and_infer_dt(*args, dt, sycl_queue): return tuple(float(v) for v in args), dt elif np.issubdtype(dt, np.complexfloating): return tuple(complex(v) for v in args), dt + elif allow_bool and dt.char == "?": + return tuple(bool(v) for v in args), dt else: raise ValueError(f"Data type {dt} is not supported") @@ -517,7 +524,7 @@ def arange( for output array allocation and copying. `sycl_queue` and `device` are exclusive keywords, i.e. use one or another. If both are specified, a `TypeError` is raised unless both imply the same - underlying SYCL queue to be used. If both a `None`, the + underlying SYCL queue to be used. If both are `None`, the `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ @@ -526,12 +533,14 @@ def arange( start = 0 dpctl.utils.validate_usm_type(usm_type, allow_none=False) sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) - ( + (start, stop, step,), dt = _coerce_and_infer_dt( start, stop, step, - ), dt = _coerce_and_infer_dt( - start, stop, step, dt=dtype, sycl_queue=sycl_queue + dt=dtype, + sycl_queue=sycl_queue, + err_msg="start, stop, and step must be Python scalars", + allow_bool=False, ) try: tmp = _get_arange_length(start, stop, step) @@ -579,7 +588,7 @@ def zeros( for output array allocation and copying. `sycl_queue` and `device` are exclusive keywords, i.e. use one or another. If both are specified, a `TypeError` is raised unless both imply the same - underlying SYCL queue to be used. If both a `None`, the + underlying SYCL queue to be used. If both are `None`, the `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ @@ -627,7 +636,7 @@ def ones( for output array allocation and copying. `sycl_queue` and `device` are exclusive keywords, i.e. use one or another. If both are specified, a `TypeError` is raised unless both imply the same - underlying SYCL queue to be used. If both a `None`, the + underlying SYCL queue to be used. If both are `None`, the `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ @@ -683,7 +692,7 @@ def full( for output array allocation and copying. `sycl_queue` and `device` are exclusive keywords, i.e. use one or another. If both are specified, a `TypeError` is raised unless both imply the same - underlying SYCL queue to be used. If both a `None`, the + underlying SYCL queue to be used. If both are `None`, the `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ @@ -733,7 +742,7 @@ def empty_like( for output array allocation and copying. `sycl_queue` and `device` are exclusive keywords, i.e. use one or another. If both are specified, a `TypeError` is raised unless both imply the same - underlying SYCL queue to be used. If both a `None`, the + underlying SYCL queue to be used. If both are `None`, the `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ @@ -790,7 +799,7 @@ def zeros_like( for output array allocation and copying. `sycl_queue` and `device` are exclusive keywords, i.e. use one or another. If both are specified, a `TypeError` is raised unless both imply the same - underlying SYCL queue to be used. If both a `None`, the + underlying SYCL queue to be used. If both are `None`, the `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ @@ -847,7 +856,7 @@ def ones_like( for output array allocation and copying. `sycl_queue` and `device` are exclusive keywords, i.e. use one or another. If both are specified, a `TypeError` is raised unless both imply the same - underlying SYCL queue to be used. If both a `None`, the + underlying SYCL queue to be used. If both are `None`, the `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ @@ -911,7 +920,7 @@ def full_like( for output array allocation and copying. `sycl_queue` and `device` are exclusive keywords, i.e. use one or another. If both are specified, a `TypeError` is raised unless both imply the same - underlying SYCL queue to be used. If both a `None`, the + underlying SYCL queue to be used. If both are `None`, the `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ @@ -942,3 +951,82 @@ def full_like( usm_type=usm_type, sycl_queue=sycl_queue, ) + + +def linspace( + start, + stop, + /, + num, + *, + dtype=None, + device=None, + endpoint=True, + sycl_queue=None, + usm_type="device", +): + """ + linspace(start, stop, num, dtype=None, device=None, endpoint=True, + sycl_queue=None, usm_type=None): usm_ndarray + + Returns evenly spaced numbers of specified interval. + + Args: + start: the start of the interval. + stop: the end of the interval. If the `endpoint` is `False`, the + function must generate `num+1` evenly spaced points starting + with `start` and ending with `stop` and exclude the `stop` + from the returned array such that the returned array consists + of evenly spaced numbers over the half-open interval + `[start, stop)`. If `endpoint` is `True`, the output + array must consist of evenly spaced numbers over the closed + interval `[start, stop]`. Default: `True`. + num: number of samples. Must be a non-negative integer; otherwise, + the function must raise an exception. + dtype: output array data type. Should be a floating data type. + If `dtype` is `None`, the output array must be the default + floating point data type. Default: `None`. + device (optional): array API concept of device where the output array + is created. `device` can be `None`, a oneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a + non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a `Device` object returnedby + `dpctl.tensor.usm_array.device`. Default: `None`. + usm_type ("device"|"shared"|"host", optional): The type of SYCL USM + allocation for the output array. Default: `"device"`. + sycl_queue (:class:`dpctl.SyclQueue`, optional): The SYCL queue to use + for output array allocation and copying. `sycl_queue` and `device` + are exclusive keywords, i.e. use one or another. If both are + specified, a `TypeError` is raised unless both imply the same + underlying SYCL queue to be used. If both are `None`, the + `dpctl.SyclQueue()` is used for allocation and copying. + Default: `None`. + endpoint: boolean indicating whether to include `stop` in the + interval. Default: `True`. + """ + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + if endpoint not in [True, False]: + raise TypeError("endpoint keyword argument must be of boolean type") + num = operator.index(num) + if num < 0: + raise ValueError("Number of points must be non-negative") + ((start, stop,), dt) = _coerce_and_infer_dt( + start, + stop, + dt=dtype, + sycl_queue=sycl_queue, + err_msg="start and stop must be Python scalars.", + allow_bool=True, + ) + if dtype is None and np.issubdtype(dt, np.integer): + dt = ti.default_device_fp_type(sycl_queue) + dt = np.dtype(dt) + start = float(start) + stop = float(stop) + res = dpt.empty(num, dtype=dt, sycl_queue=sycl_queue) + hev, _ = ti._linspace_affine( + start, stop, dst=res, include_endpoint=endpoint, sycl_queue=sycl_queue + ) + hev.wait() + return res diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index b84c391f59..9b4759ae73 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1071,6 +1071,34 @@ def test_arange_fp(): assert dpt.arange(0, 1, 0.25, dtype="f4", device=q).shape == (4,) +@pytest.mark.parametrize( + "dt", + _all_dtypes, +) +def test_linspace(dt): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Default queue could not be created") + X = dpt.linspace(0, 1, num=2, dtype=dt, sycl_queue=q) + assert np.allclose(dpt.asnumpy(X), np.linspace(0, 1, num=2, dtype=dt)) + + +def test_linspace_fp(): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Default queue could not be created") + n = 16 + X = dpt.linspace(0, n - 1, num=n, sycl_queue=q) + if q.sycl_device.has_aspect_fp64: + assert X.dtype == np.dtype("float64") + else: + assert X.dtype == np.dtype("float32") + assert X.shape == (n,) + assert X.strides == (1,) + + @pytest.mark.parametrize( "dt", _all_dtypes, From bc5055c7e7e78603d11361949c001fef04a762dc Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 6 Aug 2022 15:07:49 -0500 Subject: [PATCH 51/95] Added changelog entry fo #875 --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0eb335d3c..2a0eaccd4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [dev] + +### Added + +* Implemented `dpctl.tensor.linspace` function from array-API [#875](https://github.com/IntelPython/dpctl/pull/875). + +### Changed +### Fixed + ## [0.13.0] - 07/28/2022 ### Added From a84b84a012bf028a52788800ab50a2d23a2e5d9f Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 2 Aug 2022 16:50:24 -0700 Subject: [PATCH 52/95] pybind11 v2.9.2 -> v2.10.0 --- CMakeLists.txt | 4 ++-- examples/pybind11/external_usm_allocation/CMakeLists.txt | 4 ++-- examples/pybind11/onemkl_gemv/CMakeLists.txt | 4 ++-- examples/pybind11/use_dpctl_syclqueue/CMakeLists.txt | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 23ae1dd632..704b10d58d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,8 +31,8 @@ include(FetchContent) FetchContent_Declare( pybind11 - URL https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.tar.gz - URL_HASH SHA256=6bd528c4dbe2276635dc787b6b1f2e5316cf6b49ee3e150264e455a0d68d19c1 + URL https://github.com/pybind/pybind11/archive/refs/tags/v2.10.0.tar.gz + URL_HASH SHA256=eacf582fa8f696227988d08cfc46121770823839fe9e301a20fbce67e7cd70ec ) FetchContent_MakeAvailable(pybind11) diff --git a/examples/pybind11/external_usm_allocation/CMakeLists.txt b/examples/pybind11/external_usm_allocation/CMakeLists.txt index d3ba8f4dd9..43af2d5e0d 100644 --- a/examples/pybind11/external_usm_allocation/CMakeLists.txt +++ b/examples/pybind11/external_usm_allocation/CMakeLists.txt @@ -13,8 +13,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED True) include(FetchContent) FetchContent_Declare( pybind11 - URL https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.tar.gz - URL_HASH SHA256=6bd528c4dbe2276635dc787b6b1f2e5316cf6b49ee3e150264e455a0d68d19c1 + URL https://github.com/pybind/pybind11/archive/refs/tags/v2.10.0.tar.gz + URL_HASH SHA256=eacf582fa8f696227988d08cfc46121770823839fe9e301a20fbce67e7cd70ec ) FetchContent_MakeAvailable(pybind11) diff --git a/examples/pybind11/onemkl_gemv/CMakeLists.txt b/examples/pybind11/onemkl_gemv/CMakeLists.txt index b9c4b087ed..f8d133b9aa 100644 --- a/examples/pybind11/onemkl_gemv/CMakeLists.txt +++ b/examples/pybind11/onemkl_gemv/CMakeLists.txt @@ -17,8 +17,8 @@ include(GNUInstallDirs) include(FetchContent) FetchContent_Declare( pybind11 - URL https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.tar.gz - URL_HASH SHA256=6bd528c4dbe2276635dc787b6b1f2e5316cf6b49ee3e150264e455a0d68d19c1 + URL https://github.com/pybind/pybind11/archive/refs/tags/v2.10.0.tar.gz + URL_HASH SHA256=eacf582fa8f696227988d08cfc46121770823839fe9e301a20fbce67e7cd70ec ) FetchContent_MakeAvailable(pybind11) diff --git a/examples/pybind11/use_dpctl_syclqueue/CMakeLists.txt b/examples/pybind11/use_dpctl_syclqueue/CMakeLists.txt index 0d4e262d1e..b424f6a327 100644 --- a/examples/pybind11/use_dpctl_syclqueue/CMakeLists.txt +++ b/examples/pybind11/use_dpctl_syclqueue/CMakeLists.txt @@ -13,8 +13,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED True) include(FetchContent) FetchContent_Declare( pybind11 - URL https://github.com/pybind/pybind11/archive/refs/tags/v2.9.2.tar.gz - URL_HASH SHA256=6bd528c4dbe2276635dc787b6b1f2e5316cf6b49ee3e150264e455a0d68d19c1 + URL https://github.com/pybind/pybind11/archive/refs/tags/v2.10.0.tar.gz + URL_HASH SHA256=eacf582fa8f696227988d08cfc46121770823839fe9e301a20fbce67e7cd70ec ) FetchContent_MakeAvailable(pybind11) From 1bb7e5061440e717af9722e8bd993c3fe769fe99 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 6 Aug 2022 15:25:19 -0500 Subject: [PATCH 53/95] Simplified impl of tensor::usm_ndarray and memory::usm_memory classes Used fix contributed to pybind11 that is part of pybind11 2.10 release --- dpctl/apis/include/dpctl4pybind11.hpp | 103 +++----------------------- 1 file changed, 10 insertions(+), 93 deletions(-) diff --git a/dpctl/apis/include/dpctl4pybind11.hpp b/dpctl/apis/include/dpctl4pybind11.hpp index e9b8d84524..bdf9f256e5 100644 --- a/dpctl/apis/include/dpctl4pybind11.hpp +++ b/dpctl/apis/include/dpctl4pybind11.hpp @@ -305,55 +305,13 @@ namespace memory class usm_memory : public py::object { public: - // Use macro once Pybind11 2.9.3 is released instead of code bewteen - // START_TOKEN and END_TOKEN - /* - PYBIND11_OBJECT_CVT( - usm_memory, - py::object, - [](PyObject *o) -> bool{ return PyObject_TypeCheck(o, &Py_MemoryType) - != 0;}, - [](PyObject *o) -> PyObject* { return as_usm_memory(o); } - ) - */ - // START_TOKEN - - // these constructors do not validate, but since borrowed_t and stolen_t are - // protected struct members of the object, they can only be called - // internally. - usm_memory(py::handle h, borrowed_t) : py::object(h, borrowed_t{}) {} - usm_memory(py::handle h, stolen_t) : py::object(h, stolen_t{}) {} - - static bool check_(py::handle h) - { - return h.ptr() != nullptr && - PyObject_TypeCheck(h.ptr(), &Py_MemoryType); - } - - template - /* NOLINTNEXTLINE(google-explicit-constructor) */ - usm_memory(const py::detail::accessor &a) - : usm_memory(py::object(a)) - { - } - - usm_memory(const py::object &o) - : py::object(check_(o) ? o.inc_ref().ptr() : as_usm_memory(o.ptr()), - stolen_t{}) - { - if (!m_ptr) - throw py::error_already_set(); - } - - /* NOLINTNEXTLINE(google-explicit-constructor) */ - usm_memory(py::object &&o) - : py::object(check_(o) ? o.release().ptr() : as_usm_memory(o.ptr()), - stolen_t{}) - { - if (!m_ptr) - throw py::error_already_set(); - } - // END_TOKEN + PYBIND11_OBJECT_CVT( + usm_memory, + py::object, + [](PyObject *o) -> bool { + return PyObject_TypeCheck(o, &Py_MemoryType) != 0; + }, + [](PyObject *o) -> PyObject * { return as_usm_memory(o); }) usm_memory() : py::object(::dpctl::detail::dpctl_api::get().default_usm_memory_(), @@ -412,50 +370,9 @@ namespace tensor class usm_ndarray : public py::object { public: - // In Pybind11 2.9.3 replace code between START_TOKEN and END_TOKEN with - // macro - /* - PYBIND11_OBJECT( - usm_ndarray, - py::object, - [](PyObject *o) -> bool {return PyObject_TypeCheck(o, &PyUSMArrayType) - != 0;} - ) - */ - - // START_TOKEN - static bool check_(py::handle h) - { - return h.ptr() != nullptr && - PyObject_TypeCheck(h.ptr(), &PyUSMArrayType); - } - - // these constructors do not validate, but since borrowed_t and stolen_t are - // protected struct members of the object, they can only be called - // internally. - usm_ndarray(py::handle h, borrowed_t) : py::object(h, borrowed_t{}) {} - usm_ndarray(py::handle h, stolen_t) : py::object(h, stolen_t{}) {} - - template - /* NOLINTNEXTLINE(google-explicit-constructor) */ - usm_ndarray(const py::detail::accessor &a) - : usm_ndarray(py::object(a)) - { - } - - usm_ndarray(const py::object &o) : py::object(o) - { - if (m_ptr && !check_(m_ptr)) - throw PYBIND11_OBJECT_CHECK_FAILED(usm_ndarray, m_ptr); - } - - /* NOLINTNEXTLINE(google-explicit-constructor) */ - usm_ndarray(py::object &&o) : py::object(std::move(o)) - { - if (m_ptr && !check_(m_ptr)) - throw PYBIND11_OBJECT_CHECK_FAILED(usm_ndarray, m_ptr); - } - // END_TOKEN + PYBIND11_OBJECT(usm_ndarray, py::object, [](PyObject *o) -> bool { + return PyObject_TypeCheck(o, &PyUSMArrayType) != 0; + }) usm_ndarray() : py::object(::dpctl::detail::dpctl_api::get().default_usm_ndarray_(), From c00aece513d04463669aaac96a9eb28dbfac25dc Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 6 Aug 2022 15:53:48 -0500 Subject: [PATCH 54/95] Fix build break with open-source intel/llvm DPC++ --- libsyclinterface/source/dpctl_sycl_device_interface.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index 214e3ad0d1..dcaaaf4380 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -232,7 +232,11 @@ DPCTLDevice_GetMaxWorkItemSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef) auto D = unwrap(DRef); if (D) { try { +#if __SYCL_COMPILER_VERSION >= 20220805 + auto id_sizes = D->get_info>(); +#else auto id_sizes = D->get_info(); +#endif sizes = new size_t[3]; for (auto i = 0ul; i < 3; ++i) { sizes[i] = id_sizes[i]; From 9a3a2d0010784adb4f179349ba37f258ab62f430 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 6 Aug 2022 23:20:02 -0500 Subject: [PATCH 55/95] Added max_work_item_sizes1d, 2d, 3d properties dpctl.SyclDevice.max_work_iterm_sizes is deprecated. --- dpctl/_backend.pxd | 4 +- dpctl/_sycl_device.pyx | 57 ++++++++++++++++- dpctl/tests/test_sycl_device.py | 20 +++++- dpctl/tests/test_sycl_queue.py | 20 +++++- .../include/dpctl_sycl_device_interface.h | 39 +++++++++++- .../source/dpctl_sycl_device_interface.cpp | 62 +++++++++++++------ 6 files changed, 172 insertions(+), 30 deletions(-) diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index 7d46ad0b77..c07df5097d 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -157,7 +157,9 @@ cdef extern from "syclinterface/dpctl_sycl_device_interface.h": cdef uint32_t DPCTLDevice_GetMaxNumSubGroups(const DPCTLSyclDeviceRef DRef) cdef size_t DPCTLDevice_GetMaxWorkGroupSize(const DPCTLSyclDeviceRef DRef) cdef uint32_t DPCTLDevice_GetMaxWorkItemDims(const DPCTLSyclDeviceRef DRef) - cdef size_t *DPCTLDevice_GetMaxWorkItemSizes(const DPCTLSyclDeviceRef DRef) + cdef size_t *DPCTLDevice_GetMaxWorkItemSizes1d(const DPCTLSyclDeviceRef DRef) + cdef size_t *DPCTLDevice_GetMaxWorkItemSizes2d(const DPCTLSyclDeviceRef DRef) + cdef size_t *DPCTLDevice_GetMaxWorkItemSizes3d(const DPCTLSyclDeviceRef DRef) cdef const char *DPCTLDevice_GetName(const DPCTLSyclDeviceRef DRef) cdef DPCTLSyclPlatformRef DPCTLDevice_GetPlatform( const DPCTLSyclDeviceRef DRef) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index ecbc249337..e2906792e8 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -46,7 +46,9 @@ from ._backend cimport ( # noqa: E211 DPCTLDevice_GetMaxReadImageArgs, DPCTLDevice_GetMaxWorkGroupSize, DPCTLDevice_GetMaxWorkItemDims, - DPCTLDevice_GetMaxWorkItemSizes, + DPCTLDevice_GetMaxWorkItemSizes1d, + DPCTLDevice_GetMaxWorkItemSizes2d, + DPCTLDevice_GetMaxWorkItemSizes3d, DPCTLDevice_GetMaxWriteImageArgs, DPCTLDevice_GetName, DPCTLDevice_GetParentDevice, @@ -185,7 +187,7 @@ cdef void _init_helper(_SyclDevice device, DPCTLSyclDeviceRef DRef): device._name = DPCTLDevice_GetName(DRef) device._driver_version = DPCTLDevice_GetDriverVersion(DRef) device._vendor = DPCTLDevice_GetVendor(DRef) - device._max_work_item_sizes = DPCTLDevice_GetMaxWorkItemSizes(DRef) + device._max_work_item_sizes = DPCTLDevice_GetMaxWorkItemSizes3d(DRef) cdef class SyclDevice(_SyclDevice): @@ -263,7 +265,7 @@ cdef class SyclDevice(_SyclDevice): self._name = DPCTLDevice_GetName(self._device_ref) self._driver_version = DPCTLDevice_GetDriverVersion(self._device_ref) self._max_work_item_sizes = ( - DPCTLDevice_GetMaxWorkItemSizes(self._device_ref) + DPCTLDevice_GetMaxWorkItemSizes3d(self._device_ref) ) self._vendor = DPCTLDevice_GetVendor(self._device_ref) return 0 @@ -648,6 +650,49 @@ cdef class SyclDevice(_SyclDevice): max_work_item_dims = DPCTLDevice_GetMaxWorkItemDims(self._device_ref) return max_work_item_dims + @property + def max_work_item_sizes1d(self): + """ Returns the maximum number of work-items that are permitted in each + dimension of the work-group of the nd_range<1>. The minimum value is + `(1 )` for devices that are not of device type + ``info::device_type::custom``. + """ + cdef size_t *max_work_item_sizes1d = NULL + max_work_item_sizes1d = DPCTLDevice_GetMaxWorkItemSizes1d( + self._device_ref + ) + res = (max_work_item_sizes1d[0], ) + DPCTLSize_t_Array_Delete(max_work_item_sizes1d) + return res + + @property + def max_work_item_sizes2d(self): + """ Returns the maximum number of work-items that are permitted in each + dimension of the work-group of the nd_range<2>. The minimum value is + `(1; 1)` for devices that are not of device type + ``info::device_type::custom``. + """ + cdef size_t *max_work_item_sizes2d = NULL + max_work_item_sizes2d = DPCTLDevice_GetMaxWorkItemSizes2d( + self._device_ref + ) + res = (max_work_item_sizes2d[0], max_work_item_sizes2d[1],) + DPCTLSize_t_Array_Delete(max_work_item_sizes2d) + return res + + @property + def max_work_item_sizes3d(self): + """ Returns the maximum number of work-items that are permitted in each + dimension of the work-group of the nd_range<3>. The minimum value is + `(1; 1; 1)` for devices that are not of device type + ``info::device_type::custom``. + """ + return ( + self._max_work_item_sizes[0], + self._max_work_item_sizes[1], + self._max_work_item_sizes[2], + ) + @property def max_work_item_sizes(self): """ Returns the maximum number of work-items that are permitted in each @@ -655,6 +700,12 @@ cdef class SyclDevice(_SyclDevice): `(1; 1; 1)` for devices that are not of device type ``info::device_type::custom``. """ + import warnings + warnings.warn( + "Use dpctl.SyclDevice.max_work_item_sizes3d", + DeprecationWarning, + stacklevel=2 + ) return ( self._max_work_item_sizes[0], self._max_work_item_sizes[1], diff --git a/dpctl/tests/test_sycl_device.py b/dpctl/tests/test_sycl_device.py index c85124870d..b047f6f16f 100644 --- a/dpctl/tests/test_sycl_device.py +++ b/dpctl/tests/test_sycl_device.py @@ -76,8 +76,20 @@ def check_get_max_work_item_dims(device): assert max_work_item_dims > 0 -def check_get_max_work_item_sizes(device): - max_work_item_sizes = device.max_work_item_sizes +def check_get_max_work_item_sizes1d(device): + max_work_item_sizes = device.max_work_item_sizes1d + for size in max_work_item_sizes: + assert size is not None + + +def check_get_max_work_item_sizes2d(device): + max_work_item_sizes = device.max_work_item_sizes2d + for size in max_work_item_sizes: + assert size is not None + + +def check_get_max_work_item_sizes3d(device): + max_work_item_sizes = device.max_work_item_sizes3d for size in max_work_item_sizes: assert size is not None @@ -518,7 +530,9 @@ def check_platform(device): list_of_checks = [ check_get_max_compute_units, check_get_max_work_item_dims, - check_get_max_work_item_sizes, + check_get_max_work_item_sizes1d, + check_get_max_work_item_sizes2d, + check_get_max_work_item_sizes3d, check_get_max_work_group_size, check_get_max_num_sub_groups, check_is_accelerator, diff --git a/dpctl/tests/test_sycl_queue.py b/dpctl/tests/test_sycl_queue.py index 6b53f68608..8db836cd1a 100644 --- a/dpctl/tests/test_sycl_queue.py +++ b/dpctl/tests/test_sycl_queue.py @@ -68,8 +68,20 @@ def check_get_max_work_item_dims(device): assert max_work_item_dims > 0 -def check_get_max_work_item_sizes(device): - max_work_item_sizes = device.max_work_item_sizes +def check_get_max_work_item_sizes1d(device): + max_work_item_sizes = device.max_work_item_sizes1d + for size in max_work_item_sizes: + assert size is not None + + +def check_get_max_work_item_sizes2d(device): + max_work_item_sizes = device.max_work_item_sizes2d + for size in max_work_item_sizes: + assert size is not None + + +def check_get_max_work_item_sizes3d(device): + max_work_item_sizes = device.max_work_item_sizes3d for size in max_work_item_sizes: assert size is not None @@ -263,7 +275,9 @@ def check_is_host(device): list_of_checks = [ check_get_max_compute_units, check_get_max_work_item_dims, - check_get_max_work_item_sizes, + check_get_max_work_item_sizes1d, + check_get_max_work_item_sizes2d, + check_get_max_work_item_sizes3d, check_get_max_work_group_size, check_get_max_num_sub_groups, check_is_accelerator, diff --git a/libsyclinterface/include/dpctl_sycl_device_interface.h b/libsyclinterface/include/dpctl_sycl_device_interface.h index 676ab4c367..f9b1c0f009 100644 --- a/libsyclinterface/include/dpctl_sycl_device_interface.h +++ b/libsyclinterface/include/dpctl_sycl_device_interface.h @@ -211,7 +211,7 @@ uint32_t DPCTLDevice_GetMaxWorkItemDims(__dpctl_keep const DPCTLSyclDeviceRef DRef); /*! - * @brief Wrapper for get_info(). + * @brief Wrapper for get_info>(). * * @param DRef Opaque pointer to a ``sycl::device`` * @return Returns the valid result if device exists else returns NULL. @@ -219,7 +219,42 @@ DPCTLDevice_GetMaxWorkItemDims(__dpctl_keep const DPCTLSyclDeviceRef DRef); */ DPCTL_API __dpctl_keep size_t * -DPCTLDevice_GetMaxWorkItemSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef); +DPCTLDevice_GetMaxWorkItemSizes1d(__dpctl_keep const DPCTLSyclDeviceRef DRef); + +/*! + * @brief Wrapper for get_info>(). + * + * @param DRef Opaque pointer to a ``sycl::device`` + * @return Returns the valid result if device exists else returns NULL. + * @ingroup DeviceInterface + */ +DPCTL_API +__dpctl_keep size_t * +DPCTLDevice_GetMaxWorkItemSizes2d(__dpctl_keep const DPCTLSyclDeviceRef DRef); + +/*! + * @brief Wrapper for get_info>(). + * + * @param DRef Opaque pointer to a ``sycl::device`` + * @return Returns the valid result if device exists else returns NULL. + * @ingroup DeviceInterface + */ +DPCTL_API +__dpctl_keep size_t * +DPCTLDevice_GetMaxWorkItemSizes3d(__dpctl_keep const DPCTLSyclDeviceRef DRef); + +/*! + * @brief Wrapper for deprecated get_info(). + * + * @param DRef Opaque pointer to a ``sycl::device`` + * @return Returns the valid result if device exists else returns NULL. + * @ingroup DeviceInterface + */ +DPCTL_API +__dpctl_keep size_t * +DPCTLDevice_GetMaxWorkItemSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef) + __attribute__((deprecated("DPCTLDevice_GetMaxWorkItemSizes is deprecated ", + "Use DPCTLDevice_WorkItemSizes3d instead"))); /*! * @brief Wrapper for get_info(). diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index dcaaaf4380..c65f9ac38b 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -46,6 +46,31 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(platform, DPCTLSyclPlatformRef) DEFINE_SIMPLE_CONVERSION_FUNCTIONS(std::vector, DPCTLDeviceVectorRef) +template +__dpctl_keep size_t * +DPCTLDevice__GetMaxWorkItemSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef) +{ + size_t *sizes = nullptr; + auto D = unwrap(DRef); + if (D) { + try { +#if __SYCL_COMPILER_VERSION >= 20220805 + auto id_sizes = + D->get_info>(); +#else + auto id_sizes = D->get_info(); +#endif + sizes = new size_t[dim]; + for (auto i = 0ul; i < dim; ++i) { + sizes[i] = id_sizes[i]; + } + } catch (std::exception const &e) { + error_handler(e, __FILE__, __func__, __LINE__); + } + } + return sizes; +} + } /* end of anonymous namespace */ __dpctl_give DPCTLSyclDeviceRef @@ -225,27 +250,28 @@ DPCTLDevice_GetMaxWorkItemDims(__dpctl_keep const DPCTLSyclDeviceRef DRef) return maxWorkItemDims; } +__dpctl_keep size_t * +DPCTLDevice_GetMaxWorkItemSizes1d(__dpctl_keep const DPCTLSyclDeviceRef DRef) +{ + return DPCTLDevice__GetMaxWorkItemSizes<1>(DRef); +} + +__dpctl_keep size_t * +DPCTLDevice_GetMaxWorkItemSizes2d(__dpctl_keep const DPCTLSyclDeviceRef DRef) +{ + return DPCTLDevice__GetMaxWorkItemSizes<2>(DRef); +} + +__dpctl_keep size_t * +DPCTLDevice_GetMaxWorkItemSizes3d(__dpctl_keep const DPCTLSyclDeviceRef DRef) +{ + return DPCTLDevice__GetMaxWorkItemSizes<3>(DRef); +} + __dpctl_keep size_t * DPCTLDevice_GetMaxWorkItemSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef) { - size_t *sizes = nullptr; - auto D = unwrap(DRef); - if (D) { - try { -#if __SYCL_COMPILER_VERSION >= 20220805 - auto id_sizes = D->get_info>(); -#else - auto id_sizes = D->get_info(); -#endif - sizes = new size_t[3]; - for (auto i = 0ul; i < 3; ++i) { - sizes[i] = id_sizes[i]; - } - } catch (std::exception const &e) { - error_handler(e, __FILE__, __func__, __LINE__); - } - } - return sizes; + return DPCTLDevice__GetMaxWorkItemSizes<3>(DRef); } size_t From 7cda3304c4155e1bf1b0c8845fb89e82f576c16b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 7 Aug 2022 10:08:52 -0500 Subject: [PATCH 56/95] Added tests for new device properties Reflactor common duplicated fixtures for device properties checks out into a common file made available to both `test_sycl_device.py` and `test_sycl_queue.py` via `conftest.py` per https://stackoverflow.com/questions/13641973/how-and-where-does-py-test-find-fixtures MaxWorkItemSizes1d, MaxWorkItemSizes2d and MaxWorkItemSizes3d properties are added. --- dpctl/tests/_device_attributes_checks.py | 592 ++++++++++++++++++ dpctl/tests/conftest.py | 10 + dpctl/tests/test_sycl_device.py | 591 +---------------- dpctl/tests/test_sycl_queue.py | 323 +--------- .../tests/test_sycl_device_interface.cpp | 24 + 5 files changed, 648 insertions(+), 892 deletions(-) create mode 100644 dpctl/tests/_device_attributes_checks.py diff --git a/dpctl/tests/_device_attributes_checks.py b/dpctl/tests/_device_attributes_checks.py new file mode 100644 index 0000000000..38c9e15341 --- /dev/null +++ b/dpctl/tests/_device_attributes_checks.py @@ -0,0 +1,592 @@ +import pytest + +import dpctl + +list_of_standard_selectors = [ + dpctl.select_accelerator_device, + dpctl.select_cpu_device, + dpctl.select_default_device, + dpctl.select_gpu_device, + dpctl.select_host_device, +] + +list_of_valid_filter_selectors = [ + "opencl", + "opencl:gpu", + "opencl:cpu", + "opencl:gpu:0", + "gpu", + "cpu", + "level_zero", + "level_zero:gpu", + "opencl:cpu:0", + "level_zero:gpu:0", + "gpu:0", + "gpu:1", + "1", +] + +list_of_invalid_filter_selectors = [ + "-1", + "opencl:gpu:-1", + "cuda:cpu:0", + "abc", + 1, +] + + +# Unit test cases that will be run for every device +def check_get_max_compute_units(device): + max_compute_units = device.max_compute_units + assert max_compute_units > 0 + + +def check_get_global_mem_size(device): + global_mem_size = device.global_mem_size + assert global_mem_size > 0 + + +def check_get_local_mem_size(device): + local_mem_size = device.local_mem_size + assert local_mem_size > 0 + + +def check_get_max_work_item_dims(device): + max_work_item_dims = device.max_work_item_dims + assert max_work_item_dims > 0 + + +def check_get_max_work_item_sizes1d(device): + max_work_item_sizes = device.max_work_item_sizes1d + for size in max_work_item_sizes: + assert size is not None + + +def check_get_max_work_item_sizes2d(device): + max_work_item_sizes = device.max_work_item_sizes2d + for size in max_work_item_sizes: + assert size is not None + + +def check_get_max_work_item_sizes3d(device): + max_work_item_sizes = device.max_work_item_sizes3d + for size in max_work_item_sizes: + assert size is not None + + +def check_get_max_work_group_size(device): + max_work_group_size = device.max_work_group_size + # Special case for FPGA simulator + if device.is_accelerator: + assert max_work_group_size >= 0 + else: + assert max_work_group_size > 0 + + +def check_get_max_num_sub_groups(device): + max_num_sub_groups = device.max_num_sub_groups + # Special case for FPGA simulator + if device.is_accelerator or device.is_host: + assert max_num_sub_groups >= 0 + else: + assert max_num_sub_groups > 0 + + +def check_has_aspect_host(device): + try: + device.has_aspect_host + except Exception: + pytest.fail("has_aspect_host call failed") + + +def check_has_aspect_cpu(device): + try: + device.has_aspect_cpu + except Exception: + pytest.fail("has_aspect_cpu call failed") + + +def check_has_aspect_gpu(device): + try: + device.has_aspect_gpu + except Exception: + pytest.fail("has_aspect_gpu call failed") + + +def check_has_aspect_accelerator(device): + try: + device.has_aspect_accelerator + except Exception: + pytest.fail("has_aspect_accelerator call failed") + + +def check_has_aspect_custom(device): + try: + device.has_aspect_custom + except Exception: + pytest.fail("has_aspect_custom call failed") + + +def check_has_aspect_fp16(device): + try: + device.has_aspect_fp16 + except Exception: + pytest.fail("has_aspect_fp16 call failed") + + +def check_has_aspect_fp64(device): + try: + device.has_aspect_fp64 + except Exception: + pytest.fail("has_aspect_fp64 call failed") + + +def check_has_aspect_atomic64(device): + try: + device.has_aspect_atomic64 + except Exception: + pytest.fail("has_aspect_atomic64 call failed") + + +def check_has_aspect_image(device): + try: + device.has_aspect_image + except Exception: + pytest.fail("has_aspect_image call failed") + + +def check_has_aspect_online_compiler(device): + try: + device.has_aspect_online_compiler + except Exception: + pytest.fail("has_aspect_online_compiler call failed") + + +def check_has_aspect_online_linker(device): + try: + device.has_aspect_online_linker + except Exception: + pytest.fail("has_aspect_online_linker call failed") + + +def check_has_aspect_queue_profiling(device): + try: + device.has_aspect_queue_profiling + except Exception: + pytest.fail("has_aspect_queue_profiling call failed") + + +def check_has_aspect_usm_device_allocations(device): + try: + device.has_aspect_usm_device_allocations + except Exception: + pytest.fail("has_aspect_usm_device_allocations call failed") + + +def check_has_aspect_usm_host_allocations(device): + try: + device.has_aspect_usm_host_allocations + except Exception: + pytest.fail("has_aspect_usm_host_allocations call failed") + + +def check_has_aspect_usm_shared_allocations(device): + try: + device.has_aspect_usm_shared_allocations + except Exception: + pytest.fail("has_aspect_usm_shared_allocations call failed") + + +def check_has_aspect_usm_restricted_shared_allocations(device): + try: + device.has_aspect_usm_restricted_shared_allocations + except Exception: + pytest.fail("has_aspect_usm_restricted_shared_allocations call failed") + + +def check_has_aspect_usm_system_allocations(device): + try: + device.has_aspect_usm_system_allocations + except Exception: + pytest.fail("has_aspect_usm_system_allocations call failed") + + +def check_has_aspect_usm_atomic_host_allocations(device): + try: + device.has_aspect_usm_atomic_host_allocations + except Exception: + pytest.fail("has_aspect_usm_atomic_host_allocations call failed") + + +def check_has_aspect_usm_atomic_shared_allocations(device): + try: + device.has_aspect_usm_atomic_shared_allocations + except Exception: + pytest.fail("has_aspect_usm_atomic_shared_allocations call failed") + + +def check_has_aspect_host_debuggable(device): + try: + device.has_aspect_host_debuggable + except Exception: + pytest.fail("has_aspect_host_debuggable call failed") + + +def check_is_accelerator(device): + try: + device.is_accelerator + except Exception: + pytest.fail("is_accelerator call failed") + + +def check_is_cpu(device): + try: + device.is_cpu + except Exception: + pytest.fail("is_cpu call failed") + + +def check_is_gpu(device): + try: + device.is_gpu + except Exception: + pytest.fail("is_gpu call failed") + + +def check_is_host(device): + try: + device.is_host + except Exception: + pytest.fail("is_hostcall failed") + + +def check_get_max_read_image_args(device): + try: + device.max_read_image_args + except Exception: + pytest.fail("max_read_image_args call failed") + + +def check_get_max_write_image_args(device): + try: + device.max_write_image_args + except Exception: + pytest.fail("max_write_image_args call failed") + + +def check_get_image_2d_max_width(device): + try: + device.image_2d_max_width + except Exception: + pytest.fail("image_2d_max_width call failed") + + +def check_get_image_2d_max_height(device): + try: + device.image_2d_max_height + except Exception: + pytest.fail("image_2d_max_height call failed") + + +def check_get_image_3d_max_width(device): + try: + device.image_3d_max_width + except Exception: + pytest.fail("image_3d_max_width call failed") + + +def check_get_image_3d_max_height(device): + try: + device.image_3d_max_height + except Exception: + pytest.fail("image_3d_max_height call failed") + + +def check_get_image_3d_max_depth(device): + try: + device.image_3d_max_depth + except Exception: + pytest.fail("image_3d_max_depth call failed") + + +def check_get_sub_group_independent_forward_progress(device): + try: + device.sub_group_independent_forward_progress + except Exception: + pytest.fail("sub_group_independent_forward_progress call failed") + + +def check_get_preferred_vector_width_char(device): + try: + device.preferred_vector_width_char + except Exception: + pytest.fail("preferred_vector_width_char call failed") + + +def check_get_preferred_vector_width_short(device): + try: + device.preferred_vector_width_short + except Exception: + pytest.fail("preferred_vector_width_short call failed") + + +def check_get_preferred_vector_width_int(device): + try: + device.preferred_vector_width_int + except Exception: + pytest.fail("preferred_vector_width_int call failed") + + +def check_get_preferred_vector_width_long(device): + try: + device.preferred_vector_width_long + except Exception: + pytest.fail("preferred_vector_width_long call failed") + + +def check_get_preferred_vector_width_float(device): + try: + device.preferred_vector_width_float + except Exception: + pytest.fail("preferred_vector_width_float call failed") + + +def check_get_preferred_vector_width_double(device): + try: + device.preferred_vector_width_double + except Exception: + pytest.fail("preferred_vector_width_double call failed") + + +def check_get_preferred_vector_width_half(device): + try: + device.preferred_vector_width_half + except Exception: + pytest.fail("preferred_vector_width_half call failed") + + +def check_create_sub_devices_equally(device): + try: + n = int(device.max_compute_units / 2) + device.create_sub_devices(partition=n) + except dpctl.SyclSubDeviceCreationError: + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) + except Exception: + pytest.fail("create_sub_devices failed") + + +def check_create_sub_devices_equally_zeros(device): + try: + device.create_sub_devices(partition=0) + except TypeError: + pass + + +def check_create_sub_devices_by_counts(device): + try: + n = device.max_compute_units / 2 + device.create_sub_devices(partition=(n, n)) + except dpctl.SyclSubDeviceCreationError: + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) + except Exception: + pytest.fail("create_sub_devices failed") + + +def check_create_sub_devices_by_counts_zeros(device): + try: + device.create_sub_devices(partition=(0, 1)) + except TypeError: + pass + + +def check_create_sub_devices_by_affinity_not_applicable(device): + try: + device.create_sub_devices(partition="not_applicable") + except dpctl.SyclSubDeviceCreationError: + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) + except Exception: + pytest.fail("create_sub_devices failed") + + +def check_create_sub_devices_by_affinity_numa(device): + try: + device.create_sub_devices(partition="numa") + except dpctl.SyclSubDeviceCreationError: + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) + except Exception: + pytest.fail("create_sub_devices failed") + + +def check_create_sub_devices_by_affinity_L4_cache(device): + try: + device.create_sub_devices(partition="L4_cache") + except dpctl.SyclSubDeviceCreationError: + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) + except Exception: + pytest.fail("create_sub_devices failed") + + +def check_create_sub_devices_by_affinity_L3_cache(device): + try: + device.create_sub_devices(partition="L3_cache") + except dpctl.SyclSubDeviceCreationError: + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) + except Exception: + pytest.fail("create_sub_devices failed") + + +def check_create_sub_devices_by_affinity_L2_cache(device): + try: + device.create_sub_devices(partition="L2_cache") + except dpctl.SyclSubDeviceCreationError: + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) + except Exception: + pytest.fail("create_sub_devices failed") + + +def check_create_sub_devices_by_affinity_L1_cache(device): + try: + device.create_sub_devices(partition="L1_cache") + except dpctl.SyclSubDeviceCreationError: + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) + except Exception: + pytest.fail("create_sub_devices failed") + + +def check_create_sub_devices_by_affinity_next_partitionable(device): + try: + device.create_sub_devices(partition="next_partitionable") + except dpctl.SyclSubDeviceCreationError: + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) + except Exception: + pytest.fail("create_sub_devices failed") + + +def check_print_device_info(device): + try: + device.print_device_info() + except Exception: + pytest.fail("Encountered an exception inside print_device_info().") + + +def check_repr(device): + assert type(repr(device)) is str + + +def check_profiling_timer_resolution(device): + try: + resol = device.profiling_timer_resolution + except Exception: + pytest.fail( + "Encountered an exception inside " + "profiling_timer_resolution property." + ) + assert isinstance(resol, int) and resol > 0 + + +def check_platform(device): + p = device.sycl_platform + assert isinstance(p, dpctl.SyclPlatform) + + +list_of_checks = [ + check_get_max_compute_units, + check_get_max_work_item_dims, + check_get_max_work_item_sizes1d, + check_get_max_work_item_sizes2d, + check_get_max_work_item_sizes3d, + check_get_max_work_group_size, + check_get_max_num_sub_groups, + check_is_accelerator, + check_is_cpu, + check_is_gpu, + check_is_host, + check_get_sub_group_independent_forward_progress, + check_get_preferred_vector_width_char, + check_get_preferred_vector_width_short, + check_get_preferred_vector_width_int, + check_get_preferred_vector_width_long, + check_get_preferred_vector_width_float, + check_get_preferred_vector_width_double, + check_get_preferred_vector_width_half, + check_has_aspect_host, + check_has_aspect_cpu, + check_has_aspect_gpu, + check_has_aspect_accelerator, + check_has_aspect_custom, + check_has_aspect_fp16, + check_has_aspect_fp64, + check_has_aspect_atomic64, + check_has_aspect_image, + check_has_aspect_online_compiler, + check_has_aspect_online_linker, + check_has_aspect_queue_profiling, + check_has_aspect_usm_device_allocations, + check_has_aspect_usm_host_allocations, + check_has_aspect_usm_shared_allocations, + check_has_aspect_usm_restricted_shared_allocations, + check_has_aspect_usm_system_allocations, + check_has_aspect_usm_atomic_host_allocations, + check_has_aspect_usm_atomic_shared_allocations, + check_has_aspect_host_debuggable, + check_get_max_read_image_args, + check_get_max_write_image_args, + check_get_image_2d_max_width, + check_get_image_2d_max_height, + check_get_image_3d_max_width, + check_get_image_3d_max_height, + check_get_image_3d_max_depth, + check_create_sub_devices_equally, + check_create_sub_devices_by_counts, + check_create_sub_devices_by_affinity_not_applicable, + check_create_sub_devices_by_affinity_numa, + check_create_sub_devices_by_affinity_L4_cache, + check_create_sub_devices_by_affinity_L3_cache, + check_create_sub_devices_by_affinity_L2_cache, + check_create_sub_devices_by_affinity_L1_cache, + check_create_sub_devices_by_affinity_next_partitionable, + check_print_device_info, + check_repr, + check_get_global_mem_size, + check_get_local_mem_size, + check_profiling_timer_resolution, + check_platform, +] + + +@pytest.fixture(params=list_of_valid_filter_selectors) +def valid_filter(request): + return request.param + + +@pytest.fixture(params=list_of_invalid_filter_selectors) +def invalid_filter(request): + return request.param + + +@pytest.fixture(params=list_of_standard_selectors) +def device_selector(request): + return request.param + + +@pytest.fixture(params=list_of_checks) +def check(request): + return request.param diff --git a/dpctl/tests/conftest.py b/dpctl/tests/conftest.py index 6b85eea77c..367b7b8bd0 100644 --- a/dpctl/tests/conftest.py +++ b/dpctl/tests/conftest.py @@ -20,4 +20,14 @@ import os import sys +from _device_attributes_checks import ( + check, + device_selector, + invalid_filter, + valid_filter, +) + sys.path.append(os.path.join(os.path.dirname(__file__), "helper")) + +# common fixtures +__all__ = ["check", "device_selector", "invalid_filter", "valid_filter"] diff --git a/dpctl/tests/test_sycl_device.py b/dpctl/tests/test_sycl_device.py index b047f6f16f..c938aec466 100644 --- a/dpctl/tests/test_sycl_device.py +++ b/dpctl/tests/test_sycl_device.py @@ -20,596 +20,7 @@ import pytest import dpctl -from dpctl import SyclDeviceCreationError, SyclSubDeviceCreationError - -list_of_standard_selectors = [ - dpctl.select_accelerator_device, - dpctl.select_cpu_device, - dpctl.select_default_device, - dpctl.select_gpu_device, - dpctl.select_host_device, -] - -list_of_valid_filter_selectors = [ - "opencl", - "opencl:gpu", - "opencl:cpu", - "opencl:gpu:0", - "gpu", - "cpu", - "level_zero", - "level_zero:gpu", - "opencl:cpu:0", - "level_zero:gpu:0", - "gpu:0", - "gpu:1", - "1", -] - -list_of_invalid_filter_selectors = [ - "-1", - "opencl:gpu:-1", - "cuda:cpu:0", - "abc", - 1, -] - - -# Unit test cases that will be run for every device -def check_get_max_compute_units(device): - max_compute_units = device.max_compute_units - assert max_compute_units > 0 - - -def check_get_global_mem_size(device): - global_mem_size = device.global_mem_size - assert global_mem_size > 0 - - -def check_get_local_mem_size(device): - local_mem_size = device.local_mem_size - assert local_mem_size > 0 - - -def check_get_max_work_item_dims(device): - max_work_item_dims = device.max_work_item_dims - assert max_work_item_dims > 0 - - -def check_get_max_work_item_sizes1d(device): - max_work_item_sizes = device.max_work_item_sizes1d - for size in max_work_item_sizes: - assert size is not None - - -def check_get_max_work_item_sizes2d(device): - max_work_item_sizes = device.max_work_item_sizes2d - for size in max_work_item_sizes: - assert size is not None - - -def check_get_max_work_item_sizes3d(device): - max_work_item_sizes = device.max_work_item_sizes3d - for size in max_work_item_sizes: - assert size is not None - - -def check_get_max_work_group_size(device): - max_work_group_size = device.max_work_group_size - # Special case for FPGA simulator - if device.is_accelerator: - assert max_work_group_size >= 0 - else: - assert max_work_group_size > 0 - - -def check_get_max_num_sub_groups(device): - max_num_sub_groups = device.max_num_sub_groups - # Special case for FPGA simulator - if device.is_accelerator or device.is_host: - assert max_num_sub_groups >= 0 - else: - assert max_num_sub_groups > 0 - - -def check_has_aspect_host(device): - try: - device.has_aspect_host - except Exception: - pytest.fail("has_aspect_host call failed") - - -def check_has_aspect_cpu(device): - try: - device.has_aspect_cpu - except Exception: - pytest.fail("has_aspect_cpu call failed") - - -def check_has_aspect_gpu(device): - try: - device.has_aspect_gpu - except Exception: - pytest.fail("has_aspect_gpu call failed") - - -def check_has_aspect_accelerator(device): - try: - device.has_aspect_accelerator - except Exception: - pytest.fail("has_aspect_accelerator call failed") - - -def check_has_aspect_custom(device): - try: - device.has_aspect_custom - except Exception: - pytest.fail("has_aspect_custom call failed") - - -def check_has_aspect_fp16(device): - try: - device.has_aspect_fp16 - except Exception: - pytest.fail("has_aspect_fp16 call failed") - - -def check_has_aspect_fp64(device): - try: - device.has_aspect_fp64 - except Exception: - pytest.fail("has_aspect_fp64 call failed") - - -def check_has_aspect_atomic64(device): - try: - device.has_aspect_atomic64 - except Exception: - pytest.fail("has_aspect_atomic64 call failed") - - -def check_has_aspect_image(device): - try: - device.has_aspect_image - except Exception: - pytest.fail("has_aspect_image call failed") - - -def check_has_aspect_online_compiler(device): - try: - device.has_aspect_online_compiler - except Exception: - pytest.fail("has_aspect_online_compiler call failed") - - -def check_has_aspect_online_linker(device): - try: - device.has_aspect_online_linker - except Exception: - pytest.fail("has_aspect_online_linker call failed") - - -def check_has_aspect_queue_profiling(device): - try: - device.has_aspect_queue_profiling - except Exception: - pytest.fail("has_aspect_queue_profiling call failed") - - -def check_has_aspect_usm_device_allocations(device): - try: - device.has_aspect_usm_device_allocations - except Exception: - pytest.fail("has_aspect_usm_device_allocations call failed") - - -def check_has_aspect_usm_host_allocations(device): - try: - device.has_aspect_usm_host_allocations - except Exception: - pytest.fail("has_aspect_usm_host_allocations call failed") - - -def check_has_aspect_usm_shared_allocations(device): - try: - device.has_aspect_usm_shared_allocations - except Exception: - pytest.fail("has_aspect_usm_shared_allocations call failed") - - -def check_has_aspect_usm_restricted_shared_allocations(device): - try: - device.has_aspect_usm_restricted_shared_allocations - except Exception: - pytest.fail("has_aspect_usm_restricted_shared_allocations call failed") - - -def check_has_aspect_usm_system_allocations(device): - try: - device.has_aspect_usm_system_allocations - except Exception: - pytest.fail("has_aspect_usm_system_allocations call failed") - - -def check_has_aspect_usm_atomic_host_allocations(device): - try: - device.has_aspect_usm_atomic_host_allocations - except Exception: - pytest.fail("has_aspect_usm_atomic_host_allocations call failed") - - -def check_has_aspect_usm_atomic_shared_allocations(device): - try: - device.has_aspect_usm_atomic_shared_allocations - except Exception: - pytest.fail("has_aspect_usm_atomic_shared_allocations call failed") - - -def check_has_aspect_host_debuggable(device): - try: - device.has_aspect_host_debuggable - except Exception: - pytest.fail("has_aspect_host_debuggable call failed") - - -def check_is_accelerator(device): - try: - device.is_accelerator - except Exception: - pytest.fail("is_accelerator call failed") - - -def check_is_cpu(device): - try: - device.is_cpu - except Exception: - pytest.fail("is_cpu call failed") - - -def check_is_gpu(device): - try: - device.is_gpu - except Exception: - pytest.fail("is_gpu call failed") - - -def check_is_host(device): - try: - device.is_host - except Exception: - pytest.fail("is_hostcall failed") - - -def check_get_max_read_image_args(device): - try: - device.max_read_image_args - except Exception: - pytest.fail("max_read_image_args call failed") - - -def check_get_max_write_image_args(device): - try: - device.max_write_image_args - except Exception: - pytest.fail("max_write_image_args call failed") - - -def check_get_image_2d_max_width(device): - try: - device.image_2d_max_width - except Exception: - pytest.fail("image_2d_max_width call failed") - - -def check_get_image_2d_max_height(device): - try: - device.image_2d_max_height - except Exception: - pytest.fail("image_2d_max_height call failed") - - -def check_get_image_3d_max_width(device): - try: - device.image_3d_max_width - except Exception: - pytest.fail("image_3d_max_width call failed") - - -def check_get_image_3d_max_height(device): - try: - device.image_3d_max_height - except Exception: - pytest.fail("image_3d_max_height call failed") - - -def check_get_image_3d_max_depth(device): - try: - device.image_3d_max_depth - except Exception: - pytest.fail("image_3d_max_depth call failed") - - -def check_get_sub_group_independent_forward_progress(device): - try: - device.sub_group_independent_forward_progress - except Exception: - pytest.fail("sub_group_independent_forward_progress call failed") - - -def check_get_preferred_vector_width_char(device): - try: - device.preferred_vector_width_char - except Exception: - pytest.fail("preferred_vector_width_char call failed") - - -def check_get_preferred_vector_width_short(device): - try: - device.preferred_vector_width_short - except Exception: - pytest.fail("preferred_vector_width_short call failed") - - -def check_get_preferred_vector_width_int(device): - try: - device.preferred_vector_width_int - except Exception: - pytest.fail("preferred_vector_width_int call failed") - - -def check_get_preferred_vector_width_long(device): - try: - device.preferred_vector_width_long - except Exception: - pytest.fail("preferred_vector_width_long call failed") - - -def check_get_preferred_vector_width_float(device): - try: - device.preferred_vector_width_float - except Exception: - pytest.fail("preferred_vector_width_float call failed") - - -def check_get_preferred_vector_width_double(device): - try: - device.preferred_vector_width_double - except Exception: - pytest.fail("preferred_vector_width_double call failed") - - -def check_get_preferred_vector_width_half(device): - try: - device.preferred_vector_width_half - except Exception: - pytest.fail("preferred_vector_width_half call failed") - - -def check_create_sub_devices_equally(device): - try: - n = int(device.max_compute_units / 2) - device.create_sub_devices(partition=n) - except SyclSubDeviceCreationError: - pytest.skip( - "create_sub_devices can't create sub-devices on this device" - ) - except Exception: - pytest.fail("create_sub_devices failed") - - -def check_create_sub_devices_equally_zeros(device): - try: - device.create_sub_devices(partition=0) - except TypeError: - pass - - -def check_create_sub_devices_by_counts(device): - try: - n = device.max_compute_units / 2 - device.create_sub_devices(partition=(n, n)) - except SyclSubDeviceCreationError: - pytest.skip( - "create_sub_devices can't create sub-devices on this device" - ) - except Exception: - pytest.fail("create_sub_devices failed") - - -def check_create_sub_devices_by_counts_zeros(device): - try: - device.create_sub_devices(partition=(0, 1)) - except TypeError: - pass - - -def check_create_sub_devices_by_affinity_not_applicable(device): - try: - device.create_sub_devices(partition="not_applicable") - except SyclSubDeviceCreationError: - pytest.skip( - "create_sub_devices can't create sub-devices on this device" - ) - except Exception: - pytest.fail("create_sub_devices failed") - - -def check_create_sub_devices_by_affinity_numa(device): - try: - device.create_sub_devices(partition="numa") - except SyclSubDeviceCreationError: - pytest.skip( - "create_sub_devices can't create sub-devices on this device" - ) - except Exception: - pytest.fail("create_sub_devices failed") - - -def check_create_sub_devices_by_affinity_L4_cache(device): - try: - device.create_sub_devices(partition="L4_cache") - except SyclSubDeviceCreationError: - pytest.skip( - "create_sub_devices can't create sub-devices on this device" - ) - except Exception: - pytest.fail("create_sub_devices failed") - - -def check_create_sub_devices_by_affinity_L3_cache(device): - try: - device.create_sub_devices(partition="L3_cache") - except SyclSubDeviceCreationError: - pytest.skip( - "create_sub_devices can't create sub-devices on this device" - ) - except Exception: - pytest.fail("create_sub_devices failed") - - -def check_create_sub_devices_by_affinity_L2_cache(device): - try: - device.create_sub_devices(partition="L2_cache") - except SyclSubDeviceCreationError: - pytest.skip( - "create_sub_devices can't create sub-devices on this device" - ) - except Exception: - pytest.fail("create_sub_devices failed") - - -def check_create_sub_devices_by_affinity_L1_cache(device): - try: - device.create_sub_devices(partition="L1_cache") - except SyclSubDeviceCreationError: - pytest.skip( - "create_sub_devices can't create sub-devices on this device" - ) - except Exception: - pytest.fail("create_sub_devices failed") - - -def check_create_sub_devices_by_affinity_next_partitionable(device): - try: - device.create_sub_devices(partition="next_partitionable") - except SyclSubDeviceCreationError: - pytest.skip( - "create_sub_devices can't create sub-devices on this device" - ) - except Exception: - pytest.fail("create_sub_devices failed") - - -def check_print_device_info(device): - try: - device.print_device_info() - except Exception: - pytest.fail("Encountered an exception inside print_device_info().") - - -def check_repr(device): - assert type(repr(device)) is str - - -def check_profiling_timer_resolution(device): - try: - resol = device.profiling_timer_resolution - except Exception: - pytest.fail( - "Encountered an exception inside " - "profiling_timer_resolution property." - ) - assert isinstance(resol, int) and resol > 0 - - -def check_platform(device): - p = device.sycl_platform - assert isinstance(p, dpctl.SyclPlatform) - - -list_of_checks = [ - check_get_max_compute_units, - check_get_max_work_item_dims, - check_get_max_work_item_sizes1d, - check_get_max_work_item_sizes2d, - check_get_max_work_item_sizes3d, - check_get_max_work_group_size, - check_get_max_num_sub_groups, - check_is_accelerator, - check_is_cpu, - check_is_gpu, - check_is_host, - check_get_sub_group_independent_forward_progress, - check_get_preferred_vector_width_char, - check_get_preferred_vector_width_short, - check_get_preferred_vector_width_int, - check_get_preferred_vector_width_long, - check_get_preferred_vector_width_float, - check_get_preferred_vector_width_double, - check_get_preferred_vector_width_half, - check_has_aspect_host, - check_has_aspect_cpu, - check_has_aspect_gpu, - check_has_aspect_accelerator, - check_has_aspect_custom, - check_has_aspect_fp16, - check_has_aspect_fp64, - check_has_aspect_atomic64, - check_has_aspect_image, - check_has_aspect_online_compiler, - check_has_aspect_online_linker, - check_has_aspect_queue_profiling, - check_has_aspect_usm_device_allocations, - check_has_aspect_usm_host_allocations, - check_has_aspect_usm_shared_allocations, - check_has_aspect_usm_restricted_shared_allocations, - check_has_aspect_usm_system_allocations, - check_has_aspect_usm_atomic_host_allocations, - check_has_aspect_usm_atomic_shared_allocations, - check_has_aspect_host_debuggable, - check_get_max_read_image_args, - check_get_max_write_image_args, - check_get_image_2d_max_width, - check_get_image_2d_max_height, - check_get_image_3d_max_width, - check_get_image_3d_max_height, - check_get_image_3d_max_depth, - check_create_sub_devices_equally, - check_create_sub_devices_by_counts, - check_create_sub_devices_by_affinity_not_applicable, - check_create_sub_devices_by_affinity_numa, - check_create_sub_devices_by_affinity_L4_cache, - check_create_sub_devices_by_affinity_L3_cache, - check_create_sub_devices_by_affinity_L2_cache, - check_create_sub_devices_by_affinity_L1_cache, - check_create_sub_devices_by_affinity_next_partitionable, - check_print_device_info, - check_repr, - check_get_global_mem_size, - check_get_local_mem_size, - check_profiling_timer_resolution, - check_platform, -] - - -@pytest.fixture(params=list_of_valid_filter_selectors) -def valid_filter(request): - return request.param - - -@pytest.fixture(params=list_of_invalid_filter_selectors) -def invalid_filter(request): - return request.param - - -@pytest.fixture(params=list_of_standard_selectors) -def device_selector(request): - return request.param - - -@pytest.fixture(params=list_of_checks) -def check(request): - return request.param +from dpctl import SyclDeviceCreationError def test_standard_selectors(device_selector, check): diff --git a/dpctl/tests/test_sycl_queue.py b/dpctl/tests/test_sycl_queue.py index 8db836cd1a..5a29dc81ce 100644 --- a/dpctl/tests/test_sycl_queue.py +++ b/dpctl/tests/test_sycl_queue.py @@ -25,307 +25,6 @@ import dpctl -list_of_standard_selectors = [ - dpctl.select_accelerator_device, - dpctl.select_cpu_device, - dpctl.select_default_device, - dpctl.select_gpu_device, - dpctl.select_host_device, -] - -list_of_valid_filter_selectors = [ - "opencl", - "opencl:gpu", - "opencl:cpu", - "opencl:gpu:0", - "gpu", - "cpu", - "level_zero", - "level_zero:gpu", - "opencl:cpu:0", - "level_zero:gpu:0", - "gpu:0", - "gpu:1", - "1", -] - -list_of_invalid_filter_selectors = [ - "-1", - "opencl:gpu:-1", - "level_zero:cpu:0", - "abc", -] - - -# Unit test cases that will be run for every device -def check_get_max_compute_units(device): - max_compute_units = device.max_compute_units - assert max_compute_units > 0 - - -def check_get_max_work_item_dims(device): - max_work_item_dims = device.max_work_item_dims - assert max_work_item_dims > 0 - - -def check_get_max_work_item_sizes1d(device): - max_work_item_sizes = device.max_work_item_sizes1d - for size in max_work_item_sizes: - assert size is not None - - -def check_get_max_work_item_sizes2d(device): - max_work_item_sizes = device.max_work_item_sizes2d - for size in max_work_item_sizes: - assert size is not None - - -def check_get_max_work_item_sizes3d(device): - max_work_item_sizes = device.max_work_item_sizes3d - for size in max_work_item_sizes: - assert size is not None - - -def check_get_max_work_group_size(device): - max_work_group_size = device.max_work_group_size - # Special case for FPGA simulator - if device.is_accelerator: - assert max_work_group_size >= 0 - else: - assert max_work_group_size > 0 - - -def check_get_max_num_sub_groups(device): - max_num_sub_groups = device.max_num_sub_groups - # Special case for FPGA simulator - if device.is_accelerator or device.is_host: - assert max_num_sub_groups >= 0 - else: - assert max_num_sub_groups > 0 - - -def check_has_aspect_host(device): - try: - device.has_aspect_host - except Exception: - pytest.fail("has_aspect_host call failed") - - -def check_has_aspect_cpu(device): - try: - device.has_aspect_cpu - except Exception: - pytest.fail("has_aspect_cpu call failed") - - -def check_has_aspect_gpu(device): - try: - device.has_aspect_gpu - except Exception: - pytest.fail("has_aspect_gpu call failed") - - -def check_has_aspect_accelerator(device): - try: - device.has_aspect_accelerator - except Exception: - pytest.fail("has_aspect_accelerator call failed") - - -def check_has_aspect_custom(device): - try: - device.has_aspect_custom - except Exception: - pytest.fail("has_aspect_custom call failed") - - -def check_has_aspect_fp16(device): - try: - device.has_aspect_fp16 - except Exception: - pytest.fail("has_aspect_fp16 call failed") - - -def check_has_aspect_fp64(device): - try: - device.has_aspect_fp64 - except Exception: - pytest.fail("has_aspect_fp64 call failed") - - -def check_has_aspect_atomic64(device): - try: - device.has_aspect_atomic64 - except Exception: - pytest.fail("has_aspect_atomic64 call failed") - - -def check_has_aspect_image(device): - try: - device.has_aspect_image - except Exception: - pytest.fail("has_aspect_image call failed") - - -def check_has_aspect_online_compiler(device): - try: - device.has_aspect_online_compiler - except Exception: - pytest.fail("has_aspect_online_compiler call failed") - - -def check_has_aspect_online_linker(device): - try: - device.has_aspect_online_linker - except Exception: - pytest.fail("has_aspect_online_linker call failed") - - -def check_has_aspect_queue_profiling(device): - try: - device.has_aspect_queue_profiling - except Exception: - pytest.fail("has_aspect_queue_profiling call failed") - - -def check_has_aspect_usm_device_allocations(device): - try: - device.has_aspect_usm_device_allocations - except Exception: - pytest.fail("has_aspect_usm_device_allocations call failed") - - -def check_has_aspect_usm_host_allocations(device): - try: - device.has_aspect_usm_host_allocations - except Exception: - pytest.fail("has_aspect_usm_host_allocations call failed") - - -def check_has_aspect_usm_shared_allocations(device): - try: - device.has_aspect_usm_shared_allocations - except Exception: - pytest.fail("has_aspect_usm_shared_allocations call failed") - - -def check_has_aspect_usm_restricted_shared_allocations(device): - try: - device.has_aspect_usm_restricted_shared_allocations - except Exception: - pytest.fail("has_aspect_usm_restricted_shared_allocations call failed") - - -def check_has_aspect_usm_system_allocations(device): - try: - device.has_aspect_usm_system_allocations - except Exception: - pytest.fail("has_aspect_usm_system_allocations call failed") - - -def check_has_aspect_usm_atomic_host_allocations(device): - try: - device.has_aspect_usm_atomic_host_allocations - except Exception: - pytest.fail("has_aspect_usm_atomic_host_allocations call failed") - - -def check_has_aspect_usm_atomic_shared_allocations(device): - try: - device.has_aspect_usm_atomic_shared_allocations - except Exception: - pytest.fail("has_aspect_usm_atomic_shared_allocations call failed") - - -def check_has_aspect_host_debuggable(device): - try: - device.has_aspect_host_debuggable - except Exception: - pytest.fail("has_aspect_host_debuggable call failed") - - -def check_is_accelerator(device): - try: - device.is_accelerator - except Exception: - pytest.fail("is_accelerator call failed") - - -def check_is_cpu(device): - try: - device.is_cpu - except Exception: - pytest.fail("is_cpu call failed") - - -def check_is_gpu(device): - try: - device.is_gpu - except Exception: - pytest.fail("is_gpu call failed") - - -def check_is_host(device): - try: - device.is_host - except Exception: - pytest.fail("is_hostcall failed") - - -list_of_checks = [ - check_get_max_compute_units, - check_get_max_work_item_dims, - check_get_max_work_item_sizes1d, - check_get_max_work_item_sizes2d, - check_get_max_work_item_sizes3d, - check_get_max_work_group_size, - check_get_max_num_sub_groups, - check_is_accelerator, - check_is_cpu, - check_is_gpu, - check_is_host, - check_has_aspect_host, - check_has_aspect_cpu, - check_has_aspect_gpu, - check_has_aspect_accelerator, - check_has_aspect_custom, - check_has_aspect_fp16, - check_has_aspect_fp64, - check_has_aspect_atomic64, - check_has_aspect_image, - check_has_aspect_online_compiler, - check_has_aspect_online_linker, - check_has_aspect_queue_profiling, - check_has_aspect_usm_device_allocations, - check_has_aspect_usm_host_allocations, - check_has_aspect_usm_shared_allocations, - check_has_aspect_usm_restricted_shared_allocations, - check_has_aspect_usm_system_allocations, - check_has_aspect_usm_atomic_host_allocations, - check_has_aspect_usm_atomic_shared_allocations, - check_has_aspect_host_debuggable, -] - - -@pytest.fixture(params=list_of_valid_filter_selectors) -def valid_filter(request): - return request.param - - -@pytest.fixture(params=list_of_invalid_filter_selectors) -def invalid_filter(request): - return request.param - - -@pytest.fixture(params=list_of_standard_selectors) -def device_selector(request): - return request.param - - -@pytest.fixture(params=list_of_checks) -def check(request): - return request.param - def test_standard_selectors(device_selector, check): """ @@ -376,7 +75,12 @@ def test_invalid_filter_selectors(invalid_filter): An invalid filter string should always be caught and a SyclQueueCreationError raised. """ - with pytest.raises(dpctl.SyclQueueCreationError): + expected_exception = ( + dpctl.SyclQueueCreationError + if isinstance(invalid_filter, str) + else TypeError + ) + with pytest.raises(expected_exception): dpctl.SyclQueue(invalid_filter) @@ -499,6 +203,21 @@ def test_queue_capsule(): assert q2 != [] # compare with other types +def test_queue_ctor(): + # construct from device + try: + d = dpctl.SyclDevice() + except dpctl.SyclDeviceCreationError: + pytest.skip("Could not create default device") + q = dpctl.SyclQueue(d) + assert q.sycl_device == d + + ctx = dpctl.SyclContext(d) + q = dpctl.SyclQueue(ctx, d) + assert q.sycl_context == ctx + assert q.sycl_device == d + + def test_cpython_api_SyclQueue_GetQueueRef(): q = dpctl.SyclQueue() mod = sys.modules[q.__class__.__module__] diff --git a/libsyclinterface/tests/test_sycl_device_interface.cpp b/libsyclinterface/tests/test_sycl_device_interface.cpp index 17cc1f0343..71be76fe80 100644 --- a/libsyclinterface/tests/test_sycl_device_interface.cpp +++ b/libsyclinterface/tests/test_sycl_device_interface.cpp @@ -153,6 +153,30 @@ TEST_P(TestDPCTLSyclDeviceInterface, ChkGetMaxWorkItemDims) EXPECT_TRUE(n > 0); } +TEST_P(TestDPCTLSyclDeviceInterface, ChkGetMaxWorkItemSizes1d) +{ + size_t *sizes = nullptr; + EXPECT_NO_FATAL_FAILURE(sizes = DPCTLDevice_GetMaxWorkItemSizes1d(DRef)); + EXPECT_TRUE(sizes != nullptr); + EXPECT_NO_FATAL_FAILURE(DPCTLSize_t_Array_Delete(sizes)); +} + +TEST_P(TestDPCTLSyclDeviceInterface, ChkGetMaxWorkItemSizes2d) +{ + size_t *sizes = nullptr; + EXPECT_NO_FATAL_FAILURE(sizes = DPCTLDevice_GetMaxWorkItemSizes2d(DRef)); + EXPECT_TRUE(sizes != nullptr); + EXPECT_NO_FATAL_FAILURE(DPCTLSize_t_Array_Delete(sizes)); +} + +TEST_P(TestDPCTLSyclDeviceInterface, ChkGetMaxWorkItemSizes3d) +{ + size_t *sizes = nullptr; + EXPECT_NO_FATAL_FAILURE(sizes = DPCTLDevice_GetMaxWorkItemSizes3d(DRef)); + EXPECT_TRUE(sizes != nullptr); + EXPECT_NO_FATAL_FAILURE(DPCTLSize_t_Array_Delete(sizes)); +} + TEST_P(TestDPCTLSyclDeviceInterface, ChkGetMaxWorkItemSizes) { size_t *sizes = nullptr; From 1e3eceed11726f282d247bcaa72b18bb55a80bf3 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 7 Aug 2022 11:13:32 -0500 Subject: [PATCH 57/95] Added tests for deprecated property: --- dpctl/_sycl_device.pyx | 5 ++--- dpctl/tests/_device_attributes_checks.py | 8 ++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index e2906792e8..2da48ef333 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -700,11 +700,10 @@ cdef class SyclDevice(_SyclDevice): `(1; 1; 1)` for devices that are not of device type ``info::device_type::custom``. """ - import warnings warnings.warn( - "Use dpctl.SyclDevice.max_work_item_sizes3d", + "dpctl.SyclDevice.max_work_item_sizes is deprecated, " + "use dpctl.SyclDevice.max_work_item_sizes3d instead", DeprecationWarning, - stacklevel=2 ) return ( self._max_work_item_sizes[0], diff --git a/dpctl/tests/_device_attributes_checks.py b/dpctl/tests/_device_attributes_checks.py index 38c9e15341..59c458fedb 100644 --- a/dpctl/tests/_device_attributes_checks.py +++ b/dpctl/tests/_device_attributes_checks.py @@ -74,6 +74,13 @@ def check_get_max_work_item_sizes3d(device): assert size is not None +@pytest.mark.filterwarnings("DeprecationWarning:") +def check_get_max_work_item_sizes(device): + max_work_item_sizes = device.max_work_item_sizes + for size in max_work_item_sizes: + assert size is not None + + def check_get_max_work_group_size(device): max_work_group_size = device.max_work_group_size # Special case for FPGA simulator @@ -513,6 +520,7 @@ def check_platform(device): check_get_max_work_item_sizes1d, check_get_max_work_item_sizes2d, check_get_max_work_item_sizes3d, + check_get_max_work_item_sizes, check_get_max_work_group_size, check_get_max_num_sub_groups, check_is_accelerator, From 2bc9581439bf2d796262a0938d8d85c84745e4cb Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 8 Aug 2022 12:07:59 -0500 Subject: [PATCH 58/95] linspace_affine should not use double precision type in kernels is HW does not support it --- dpctl/tensor/libtensor/source/tensor_py.cpp | 28 ++++++++++++++------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 4ef68a30a0..b3df82f54b 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -43,7 +43,7 @@ template class copy_cast_from_host_kernel; template class copy_cast_spec_kernel; template class copy_for_reshape_generic_kernel; template class linear_sequence_step_kernel; -template class linear_sequence_affine_kernel; +template class linear_sequence_affine_kernel; static dpctl::tensor::detail::usm_ndarray_types array_types; @@ -1526,7 +1526,7 @@ typedef sycl::event (*lin_space_affine_fn_ptr_t)( static lin_space_affine_fn_ptr_t lin_space_affine_dispatch_vector[_ns::num_types]; -template class LinearSequenceAffineFunctor +template class LinearSequenceAffineFunctor { private: Ty *p = nullptr; @@ -1544,8 +1544,8 @@ template class LinearSequenceAffineFunctor void operator()(sycl::id<1> wiid) const { auto i = wiid.get(0); - double wc = double(i) / n; - double w = double(n - i) / n; + wTy wc = wTy(i) / n; + wTy w = wTy(n - i) / n; if constexpr (is_complex::value) { auto _w = static_cast(w); auto _wc = static_cast(wc); @@ -1578,13 +1578,23 @@ sycl::event lin_space_affine_impl(sycl::queue exec_q, throw; } + bool device_supports_doubles = exec_q.get_device().has(sycl::aspect::fp64); sycl::event lin_space_affine_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); - cgh.parallel_for>( - sycl::range<1>{nelems}, - LinearSequenceAffineFunctor(array_data, start_v, end_v, - (include_endpoint) ? nelems - 1 - : nelems)); + if (device_supports_doubles) { + cgh.parallel_for>( + sycl::range<1>{nelems}, + LinearSequenceAffineFunctor( + array_data, start_v, end_v, + (include_endpoint) ? nelems - 1 : nelems)); + } + else { + cgh.parallel_for>( + sycl::range<1>{nelems}, + LinearSequenceAffineFunctor( + array_data, start_v, end_v, + (include_endpoint) ? nelems - 1 : nelems)); + } }); return lin_space_affine_event; From 87d972b5b309666ba2550f9b11d02bd09d9da99d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 8 Aug 2022 12:08:26 -0500 Subject: [PATCH 59/95] test_linspace should not try double precision if HW does not support it --- dpctl/tests/test_usm_ndarray_ctor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 9b4759ae73..5d52694ca4 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1080,6 +1080,8 @@ def test_linspace(dt): q = dpctl.SyclQueue() except dpctl.SyclQueueCreationError: pytest.skip("Default queue could not be created") + if dt in ["f8", "c16"] and not q.sycl_device.has_aspect_fp64: + pytest.skip("Device does not support double precision") X = dpt.linspace(0, 1, num=2, dtype=dt, sycl_queue=q) assert np.allclose(dpt.asnumpy(X), np.linspace(0, 1, num=2, dtype=dt)) From 1266bdb0341764a54d57da1ffa2a4a02b39613a0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 8 Aug 2022 12:30:31 -0500 Subject: [PATCH 60/95] Fix for copy of double precision NumPy array to device w/o HW support for DP Routine to copy NumPy array to USM array casts double to single precision on host is sycl device does not support double precision --- dpctl/tensor/_copy_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index ec2c63d604..dd6f068596 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -81,8 +81,15 @@ def _copy_from_numpy_into(dst, np_ary): if not isinstance(dst, dpt.usm_ndarray): raise TypeError("Expected usm_ndarray, got {}".format(type(dst))) src_ary = np.broadcast_to(np_ary, dst.shape) + copy_q = dst.sycl_queue + if copy_q.sycl_device.has_aspect_fp64 is False: + src_ary_dt_c = src_ary.dtype.char + if src_ary_dt_c == "d": + src_ary = src_ary.astype(np.float32) + elif src_ary_dt_c == "D": + src_ary = src_ary.astype(np.complex64) ti._copy_numpy_ndarray_into_usm_ndarray( - src=src_ary, dst=dst, sycl_queue=dst.sycl_queue + src=src_ary, dst=dst, sycl_queue=copy_q ) From d790190d290216c7df08e21daecd13f07fa150e8 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 8 Aug 2022 12:48:07 -0500 Subject: [PATCH 61/95] Tests should not use double precision arrays where it is not essential for testing --- dpctl/tests/test_usm_ndarray_manipulation.py | 42 ++++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/dpctl/tests/test_usm_ndarray_manipulation.py b/dpctl/tests/test_usm_ndarray_manipulation.py index 0dd4ccc9d7..038ac007c8 100644 --- a/dpctl/tests/test_usm_ndarray_manipulation.py +++ b/dpctl/tests/test_usm_ndarray_manipulation.py @@ -152,7 +152,7 @@ def test_expand_dims_tuple(axes): except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - Xnp = np.empty((3, 3, 3)) + Xnp = np.empty((3, 3, 3), dtype="u1") X = dpt.asarray(Xnp, sycl_queue=q) Y = dpt.expand_dims(X, axes) Ynp = np.expand_dims(Xnp, axes) @@ -234,7 +234,7 @@ def test_squeeze_without_axes(shapes): except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - Xnp = np.empty(shapes) + Xnp = np.empty(shapes, dtype="u1") X = dpt.asarray(Xnp, sycl_queue=q) Y = dpt.squeeze(X) Ynp = Xnp.squeeze() @@ -248,7 +248,7 @@ def test_squeeze_axes_arg(axes): except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - Xnp = np.array([[[1], [2], [3]]]) + Xnp = np.array([[[1], [2], [3]]], dtype="u1") X = dpt.asarray(Xnp, sycl_queue=q) Y = dpt.squeeze(X, axes) Ynp = Xnp.squeeze(axes) @@ -262,7 +262,7 @@ def test_squeeze_axes_arg_error(axes): except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - Xnp = np.array([[[1], [2], [3]]]) + Xnp = np.array([[[1], [2], [3]]], dtype="u1") X = dpt.asarray(Xnp, sycl_queue=q) pytest.raises(ValueError, dpt.squeeze, X, axes) @@ -270,21 +270,21 @@ def test_squeeze_axes_arg_error(axes): @pytest.mark.parametrize( "data", [ - [np.array(0), (0,)], - [np.array(0), (1,)], - [np.array(0), (3,)], - [np.ones(1), (1,)], - [np.ones(1), (2,)], - [np.ones(1), (1, 2, 3)], - [np.arange(3), (3,)], - [np.arange(3), (1, 3)], - [np.arange(3), (2, 3)], - [np.ones(0), 0], - [np.ones(1), 1], - [np.ones(1), 2], - [np.ones(1), (0,)], - [np.ones((1, 2)), (0, 2)], - [np.ones((2, 1)), (2, 0)], + [np.array(0, dtype="u1"), (0,)], + [np.array(0, dtype="u1"), (1,)], + [np.array(0, dtype="u1"), (3,)], + [np.ones(1, dtype="u1"), (1,)], + [np.ones(1, dtype="u1"), (2,)], + [np.ones(1, dtype="u1"), (1, 2, 3)], + [np.arange(3, dtype="u1"), (3,)], + [np.arange(3, dtype="u1"), (1, 3)], + [np.arange(3, dtype="u1"), (2, 3)], + [np.ones(0, dtype="u1"), 0], + [np.ones(1, dtype="u1"), 1], + [np.ones(1, dtype="u1"), 2], + [np.ones(1, dtype="u1"), (0,)], + [np.ones((1, 2), dtype="u1"), (0, 2)], + [np.ones((2, 1), dtype="u1"), (2, 0)], ], ) def test_broadcast_to_succeeds(data): @@ -323,7 +323,7 @@ def test_broadcast_to_raises(data): pytest.skip("Queue could not be created") orig_shape, target_shape = data - Xnp = np.zeros(orig_shape) + Xnp = np.zeros(orig_shape, dtype="i1") X = dpt.asarray(Xnp, sycl_queue=q) pytest.raises(ValueError, dpt.broadcast_to, X, target_shape) @@ -333,7 +333,7 @@ def assert_broadcast_correct(input_shapes): q = dpctl.SyclQueue() except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - np_arrays = [np.zeros(s) for s in input_shapes] + np_arrays = [np.zeros(s, dtype="i1") for s in input_shapes] out_np_arrays = np.broadcast_arrays(*np_arrays) usm_arrays = [dpt.asarray(Xnp, sycl_queue=q) for Xnp in np_arrays] out_usm_arrays = dpt.broadcast_arrays(*usm_arrays) From f251720e56733432709ecf1b5f85bf7e06c80bdb Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 10 Aug 2022 15:24:20 -0500 Subject: [PATCH 62/95] Added example of C extension using DPCTLSyclInterface --- examples/c/py_sycl_ls/py_sycl_ls/__init__.py | 21 +++++ examples/c/py_sycl_ls/py_sycl_ls/__main__.py | 20 +++++ examples/c/py_sycl_ls/setup.py | 61 ++++++++++++++ examples/c/py_sycl_ls/src/py_sycl-ls.c | 89 ++++++++++++++++++++ examples/c/py_sycl_ls/tests/test_sycl_ls.py | 26 ++++++ 5 files changed, 217 insertions(+) create mode 100644 examples/c/py_sycl_ls/py_sycl_ls/__init__.py create mode 100644 examples/c/py_sycl_ls/py_sycl_ls/__main__.py create mode 100644 examples/c/py_sycl_ls/setup.py create mode 100644 examples/c/py_sycl_ls/src/py_sycl-ls.c create mode 100644 examples/c/py_sycl_ls/tests/test_sycl_ls.py diff --git a/examples/c/py_sycl_ls/py_sycl_ls/__init__.py b/examples/c/py_sycl_ls/py_sycl_ls/__init__.py new file mode 100644 index 0000000000..d84c7c7278 --- /dev/null +++ b/examples/c/py_sycl_ls/py_sycl_ls/__init__.py @@ -0,0 +1,21 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ._py_sycl_ls import sycl_ls + +__all__ = [ + "sycl_ls", +] diff --git a/examples/c/py_sycl_ls/py_sycl_ls/__main__.py b/examples/c/py_sycl_ls/py_sycl_ls/__main__.py new file mode 100644 index 0000000000..3035dc444f --- /dev/null +++ b/examples/c/py_sycl_ls/py_sycl_ls/__main__.py @@ -0,0 +1,20 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2020-2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from py_sycl_ls import sycl_ls + +if __name__ == "__main__": + sycl_ls() diff --git a/examples/c/py_sycl_ls/setup.py b/examples/c/py_sycl_ls/setup.py new file mode 100644 index 0000000000..fc9183aec8 --- /dev/null +++ b/examples/c/py_sycl_ls/setup.py @@ -0,0 +1,61 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path +import sysconfig + +from setuptools import Extension, setup + +import dpctl + +setup( + name="py_sycl_ls", + version="0.0.1", + description="An example of C extension calling SYCLInterface routines", + long_description=""" + Example of using SYCLInterface. + + See README.md for more details. + """, + license="Apache 2.0", + author="Intel Corporation", + url="https://github.com/IntelPython/dpctl", + ext_modules=[ + Extension( + name="py_sycl_ls._py_sycl_ls", + sources=[ + "src/py_sycl-ls.c", + ], + include_dirs=[ + dpctl.get_include(), + os.path.join(sysconfig.get_paths()["include"], ".."), + ], + library_dirs=[ + os.path.join(dpctl.get_include(), ".."), + ], + libraries=["DPCTLSyclInterface"], + runtime_library_dirs=[ + os.path.join(dpctl.get_include(), ".."), + ], + extra_compile_args=[ + "-Wall", + "-Wextra", + ], + extra_link_args=["-fPIC"], + language="c", + ) + ], +) diff --git a/examples/c/py_sycl_ls/src/py_sycl-ls.c b/examples/c/py_sycl_ls/src/py_sycl-ls.c new file mode 100644 index 0000000000..64a331c92f --- /dev/null +++ b/examples/c/py_sycl_ls/src/py_sycl-ls.c @@ -0,0 +1,89 @@ +//==- py_sycl-ls.c - Example of C extension working with -===// +// DPCTLSyclInterface C-interface library. +// +// Data Parallel Control (dpctl) +// +// Copyright 2022 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements C Python extension using DPCTLSyclInterface library. +/// +//===----------------------------------------------------------------------===// + +// clang-format off +#include "Python.h" +#include "dpctl_capi.h" +#include "syclinterface/dpctl_sycl_platform_interface.h" +#include "syclinterface/dpctl_sycl_platform_manager.h" +#include "syclinterface/dpctl_utils.h" +// clang-format on + +PyObject *sycl_ls(PyObject *self_unused, PyObject *args) +{ + DPCTLPlatformVectorRef PVRef = NULL; + size_t psz = 0; + + (void)(self_unused); // avoid unused arguments warning + (void)(args); + PVRef = DPCTLPlatform_GetPlatforms(); + + if (PVRef) { + psz = DPCTLPlatformVector_Size(PVRef); + + for (size_t i = 0; i < psz; ++i) { + DPCTLSyclPlatformRef PRef = DPCTLPlatformVector_GetAt(PVRef, i); + const char *pl_info = DPCTLPlatformMgr_GetInfo(PRef, 2); + + printf("Platform: %ld::\n%s\n", i, pl_info); + + DPCTLCString_Delete(pl_info); + DPCTLPlatform_Delete(PRef); + } + + DPCTLPlatformVector_Delete(PVRef); + } + + Py_RETURN_NONE; +} + +static PyMethodDef SyclLSMethods[] = { + {"sycl_ls", sycl_ls, METH_NOARGS, "Output information about SYCL platform"}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static struct PyModuleDef syclls_module = { + PyModuleDef_HEAD_INIT, + "_py_sycl_ls", /* name of module */ + "", /* module documentation, may be NULL */ + -1, /* size of per-interpreter state of the module, + or -1 if the module keeps state in global variables. */ + SyclLSMethods, + NULL, + NULL, + NULL, + NULL}; + +PyMODINIT_FUNC PyInit__py_sycl_ls(void) +{ + PyObject *m; + + import_dpctl(); + + m = PyModule_Create(&syclls_module); + + return m; +} diff --git a/examples/c/py_sycl_ls/tests/test_sycl_ls.py b/examples/c/py_sycl_ls/tests/test_sycl_ls.py new file mode 100644 index 0000000000..e399ef2af2 --- /dev/null +++ b/examples/c/py_sycl_ls/tests/test_sycl_ls.py @@ -0,0 +1,26 @@ +# Data Parallel Control (dpctl) +# +# Copyright 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import subprocess +import sys + + +def test_sycl_ls(): + r = subprocess.run( + [sys.executable, "-m", "py_sycl_ls"], capture_output=True, check=True + ) + assert r.stdout + assert not r.stderr From e7aa9511fe7910f27f8245103c9cd072d3b0f523 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 10 Aug 2022 15:24:49 -0500 Subject: [PATCH 63/95] Use [[deprecated]] attributed supported in both gcc/g++ and icx/icpx This fixed build break of C extension in numba-dpex --- libsyclinterface/include/dpctl_sycl_device_interface.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/libsyclinterface/include/dpctl_sycl_device_interface.h b/libsyclinterface/include/dpctl_sycl_device_interface.h index f9b1c0f009..0762f595e9 100644 --- a/libsyclinterface/include/dpctl_sycl_device_interface.h +++ b/libsyclinterface/include/dpctl_sycl_device_interface.h @@ -250,11 +250,9 @@ DPCTLDevice_GetMaxWorkItemSizes3d(__dpctl_keep const DPCTLSyclDeviceRef DRef); * @return Returns the valid result if device exists else returns NULL. * @ingroup DeviceInterface */ -DPCTL_API -__dpctl_keep size_t * -DPCTLDevice_GetMaxWorkItemSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef) - __attribute__((deprecated("DPCTLDevice_GetMaxWorkItemSizes is deprecated ", - "Use DPCTLDevice_WorkItemSizes3d instead"))); +[[deprecated("Use DPCTLDevice_WorkItemSizes3d instead")]] DPCTL_API + __dpctl_keep size_t * + DPCTLDevice_GetMaxWorkItemSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef); /*! * @brief Wrapper for get_info(). From 8effee6286a028add8482f04a3d75269ddbe13ec Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 10 Aug 2022 15:25:51 -0500 Subject: [PATCH 64/95] Replaced print with use of warnings.warn --- dpctl/_sycl_platform.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/_sycl_platform.pyx b/dpctl/_sycl_platform.pyx index 3aab462a0b..85752a22fb 100644 --- a/dpctl/_sycl_platform.pyx +++ b/dpctl/_sycl_platform.pyx @@ -349,7 +349,7 @@ def lsplatform(verbosity=0): cdef DPCTLSyclPlatformRef PRef = NULL if not isinstance(verbosity, int): - print( + warnings.warn( "Illegal verbosity level. Accepted values are 0, 1, or 2. " "Using the default verbosity level of 0." ) From 28a1f8d95d487553b029d537cddb677fb301c7d4 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 10 Aug 2022 15:34:11 -0500 Subject: [PATCH 65/95] Add building of native Python extension written in C to test_linux_examples This serves the purpose of verifying that dpctl headers can be compiled by C compiler. --- .github/workflows/conda-package.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 6207110acb..c50ef77fe7 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -393,7 +393,7 @@ jobs: conda install -n examples -y ninja $CHANNELS || exit 1 conda install -n examples -y pybind11 cython scikit-build $CHANNELS || exit 1 conda install -n examples -y mkl-dpcpp mkl-devel-dpcpp dpcpp_cpp_rt $CHANNELS || exit 1 - conda create -y -n build_env -c intel dpcpp_linux-64 + conda create -y -n build_env -c intel gcc_linux-64 gxx_linux-64 dpcpp_linux-64 - name: Install dpctl shell: bash -l {0} run: | @@ -424,7 +424,7 @@ jobs: -DMKL_INCLUDE_DIR=${MKLROOT}/include \ -DTBB_INCLUDE_DIR=${TBBROOT}/include || exit 1 else - CC=dpcpp CXX=dpcpp LD_SHARED="dpcpp -shared" \ + CC=dpcpp CXX=dpcpp LDSHARED="dpcpp -shared" \ python setup.py build_ext --inplace || exit 1 fi conda deactivate @@ -441,12 +441,22 @@ jobs: do pushd $d conda activate --stack build_env - CC=dpcpp CXX=dpcpp LD_SHARED="dpcpp -shared" \ + CC=dpcpp CXX=dpcpp LDSHARED="dpcpp -shared" \ python setup.py build_ext --inplace || exit 1 conda deactivate LD_LIBRARY_PATH=${CONDA_PREFIX}/lib python run.py || exit 1 popd done + cd ../c + for d in $(ls) + do + pushd $d + conda activate --stack build_env + python setup.py build_ext --inplace || exit 1 + conda deactivate + python -m pytest tests || exit 1 + popd + done - name: Run Python examples shell: bash -l {0} run: | From b93d2a66e891f308a62bdec3c46f4aec129d7c79 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 10 Aug 2022 16:17:09 -0500 Subject: [PATCH 66/95] Removed use of deprecated methods --- examples/cython/sycl_buffer/use_sycl_buffer.cpp | 10 ++++------ examples/cython/sycl_direct_linkage/sycl_function.cpp | 6 ++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/examples/cython/sycl_buffer/use_sycl_buffer.cpp b/examples/cython/sycl_buffer/use_sycl_buffer.cpp index ba85510d09..6bd39ab93c 100644 --- a/examples/cython/sycl_buffer/use_sycl_buffer.cpp +++ b/examples/cython/sycl_buffer/use_sycl_buffer.cpp @@ -60,8 +60,7 @@ int c_columnwise_total(DPCTLSyclQueueRef q_ref, ev.wait_and_throw(); } catch (sycl::exception const &e) { std::cout << "\t\tCaught synchronous SYCL exception during fill:\n" - << e.what() << std::endl - << "OpenCL status: " << e.get_cl_code() << std::endl; + << e.what() << std::endl; goto cleanup; } @@ -72,8 +71,7 @@ int c_columnwise_total(DPCTLSyclQueueRef q_ref, q.wait(); } catch (sycl::exception const &e) { std::cout << "\t\tCaught synchronous SYCL exception during GEMV:\n" - << e.what() << std::endl - << "OpenCL status: " << e.get_cl_code() << std::endl; + << e.what() << std::endl; goto cleanup; } } @@ -128,8 +126,8 @@ int c_columnwise_total_no_mkl(DPCTLSyclQueueRef q_ref, std::plus()); if (it.get_local_id(0) == 0) { sycl::ext::oneapi::atomic_ref< - double, sycl::ext::oneapi::memory_order::relaxed, - sycl::ext::oneapi::memory_scope::system, + double, sycl::memory_order::relaxed, + sycl::memory_scope::system, sycl::access::address_space::global_space>(ct_acc[j]) += group_sum; } diff --git a/examples/cython/sycl_direct_linkage/sycl_function.cpp b/examples/cython/sycl_direct_linkage/sycl_function.cpp index 3083b30872..c7e92b9085 100644 --- a/examples/cython/sycl_direct_linkage/sycl_function.cpp +++ b/examples/cython/sycl_direct_linkage/sycl_function.cpp @@ -56,8 +56,7 @@ int c_columnwise_total(cl::sycl::queue &q, ev.wait_and_throw(); } catch (sycl::exception const &e) { std::cout << "\t\tCaught synchronous SYCL exception during fill:\n" - << e.what() << std::endl - << "OpenCL status: " << e.get_cl_code() << std::endl; + << e.what() << std::endl; goto cleanup; } @@ -68,8 +67,7 @@ int c_columnwise_total(cl::sycl::queue &q, q.wait(); } catch (sycl::exception const &e) { std::cout << "\t\tCaught synchronous SYCL exception during GEMV:\n" - << e.what() << std::endl - << "OpenCL status: " << e.get_cl_code() << std::endl; + << e.what() << std::endl; goto cleanup; } } From d8bdc67f32a0af2920d23771c32c805eff95e2d2 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 10 Aug 2022 16:36:04 -0500 Subject: [PATCH 67/95] Removed deprecated symbols, set library_dirs for setuptools builds --- examples/cython/sycl_buffer/setup.py | 4 +++- examples/cython/sycl_buffer/use_sycl_buffer.cpp | 9 ++++----- examples/cython/sycl_direct_linkage/setup.py | 4 +++- examples/cython/usm_memory/setup.py | 4 +++- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/examples/cython/sycl_buffer/setup.py b/examples/cython/sycl_buffer/setup.py index 3e2d98390f..630682ff47 100644 --- a/examples/cython/sycl_buffer/setup.py +++ b/examples/cython/sycl_buffer/setup.py @@ -48,6 +48,9 @@ dpctl.get_include(), os.path.join(sysconfig.get_paths()["include"], ".."), ], + library_dirs=[ + os.path.join(sysconfig.get_paths()["stdlib"], ".."), + ], libraries=["sycl"] + [ "mkl_sycl", @@ -55,7 +58,6 @@ "mkl_tbb_thread", "mkl_core", "tbb", - "iomp5", ], runtime_library_dirs=[], extra_compile_args=[ diff --git a/examples/cython/sycl_buffer/use_sycl_buffer.cpp b/examples/cython/sycl_buffer/use_sycl_buffer.cpp index 6bd39ab93c..7aa082aba8 100644 --- a/examples/cython/sycl_buffer/use_sycl_buffer.cpp +++ b/examples/cython/sycl_buffer/use_sycl_buffer.cpp @@ -125,11 +125,10 @@ int c_columnwise_total_no_mkl(DPCTLSyclQueueRef q_ref, it.get_group(), (i < n) ? mat_acc[it.get_global_id()] : 0.0, std::plus()); if (it.get_local_id(0) == 0) { - sycl::ext::oneapi::atomic_ref< - double, sycl::memory_order::relaxed, - sycl::memory_scope::system, - sycl::access::address_space::global_space>(ct_acc[j]) += - group_sum; + sycl::atomic_ref( + ct_acc[j]) += group_sum; } }); }); diff --git a/examples/cython/sycl_direct_linkage/setup.py b/examples/cython/sycl_direct_linkage/setup.py index b60e358afa..c01b33785d 100644 --- a/examples/cython/sycl_direct_linkage/setup.py +++ b/examples/cython/sycl_direct_linkage/setup.py @@ -52,6 +52,9 @@ dpctl.get_include(), os.path.join(sysconfig.get_paths()["include"], ".."), ], + library_dirs=[ + os.path.join(sysconfig.get_paths()["stdlib"], ".."), + ], libraries=["sycl"] + [ "mkl_sycl", @@ -59,7 +62,6 @@ "mkl_tbb_thread", "mkl_core", "tbb", - "iomp5", ], runtime_library_dirs=[], extra_compile_args=[ diff --git a/examples/cython/usm_memory/setup.py b/examples/cython/usm_memory/setup.py index cf5e4ed122..c5ca84a3ca 100644 --- a/examples/cython/usm_memory/setup.py +++ b/examples/cython/usm_memory/setup.py @@ -47,6 +47,9 @@ dpctl.get_include(), os.path.join(sysconfig.get_paths()["include"], ".."), ], + library_dirs=[ + os.path.join(sysconfig.get_paths()["stdlib"], ".."), + ], libraries=["sycl"] + [ "mkl_sycl", @@ -54,7 +57,6 @@ "mkl_tbb_thread", "mkl_core", "tbb", - "iomp5", ], runtime_library_dirs=[], extra_compile_args=[ From 79c05d74bff6e5166417a79faef09dd25fe7ebaa Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 07:12:50 -0500 Subject: [PATCH 68/95] dpctl.tensor.asarray must check numpy array data-type ```python import numpy as np, dpctl.tensor as dpt dpt.asarray(np.array([1,2,3], dtype=object)) # now raises TypeError ``` --- dpctl/tensor/_ctors.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index b895ad2341..3bcedcbf63 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -44,7 +44,7 @@ def _get_dtype(dtype, sycl_obj, ref_type=None): dtype = ti.default_device_complex_type(sycl_obj) return np.dtype(dtype) else: - raise ValueError(f"Reference type {ref_type} not recognized.") + raise TypeError(f"Reference type {ref_type} not recognized.") else: return np.dtype(dtype) @@ -199,6 +199,11 @@ def _asarray_from_numpy_ndarray( if usm_type is None: usm_type = "device" copy_q = normalize_queue_device(sycl_queue=None, device=sycl_queue) + if ary.dtype.char not in "?bBhHiIlLqQefdFD": + raise TypeError( + f"Numpy array of data type {ary.dtype} is not supported. " + "Please convert the input to an array with numeric data type." + ) if dtype is None: ary_dtype = ary.dtype dtype = _get_dtype(dtype, copy_q, ref_type=ary_dtype) From 3af53d409fe40e03ca67a031aa6b824c676f9867 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 07:23:13 -0500 Subject: [PATCH 69/95] Altered the logic of finding the most recent tag to download from The tag must contain sycl-nightly substring in it. --- .github/workflows/os-llvm-sycl-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/os-llvm-sycl-build.yml b/.github/workflows/os-llvm-sycl-build.yml index 841143e890..e7140e6ab8 100644 --- a/.github/workflows/os-llvm-sycl-build.yml +++ b/.github/workflows/os-llvm-sycl-build.yml @@ -41,7 +41,7 @@ jobs: cd /home/runner/work mkdir -p sycl_bundle cd sycl_bundle - export LATEST_LLVM_TAG=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | tail --lines=1) + export LATEST_LLVM_TAG=$(git -c 'versionsort.suffix=-' ls-remote --tags --sort='v:refname' https://github.com/intel/llvm.git | grep sycl-nightly | tail --lines=1) export LATEST_LLVM_TAG_SHA=$(echo ${LATEST_LLVM_TAG} | awk '{print $1}') export NIGHTLY_TAG=$(python3 -c "import sys, urllib.parse as ul; print (ul.quote_plus(sys.argv[1]))" \ $(echo ${LATEST_LLVM_TAG} | awk '{gsub(/^refs\/tags\//, "", $2)} {print $2}')) From 39257cc38523d035c8bbde2c8c9d8dc3936745ac Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 07:48:53 -0500 Subject: [PATCH 70/95] Added a test to check validation for supported dtype --- dpctl/tests/test_tensor_asarray.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dpctl/tests/test_tensor_asarray.py b/dpctl/tests/test_tensor_asarray.py index df7331213e..a6b83caaf9 100644 --- a/dpctl/tests/test_tensor_asarray.py +++ b/dpctl/tests/test_tensor_asarray.py @@ -220,3 +220,13 @@ def test_asarray_copy_false(): assert Y6 is Xf with pytest.raises(ValueError): dpt.asarray(Xf, copy=False, order="C") + + +def test_asarray_invalid_dtype(): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Could not create a queue") + Xnp = np.array([1, 2, 3], dtype=object) + with pytest.raises(TypeError): + dpt.asarray(Xnp, sycl_queue=q) From 458cecaea77cd40fc06afd56c32360286483a4ee Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 08:27:59 -0500 Subject: [PATCH 71/95] Fixed test to pass on Iris Xe --- dpctl/tests/test_tensor_asarray.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/dpctl/tests/test_tensor_asarray.py b/dpctl/tests/test_tensor_asarray.py index a6b83caaf9..3d9ba3db4b 100644 --- a/dpctl/tests/test_tensor_asarray.py +++ b/dpctl/tests/test_tensor_asarray.py @@ -177,11 +177,17 @@ def test_asarray_scalars(): Y = dpt.asarray(5) assert Y.dtype == np.dtype(int) Y = dpt.asarray(5.2) - assert Y.dtype == np.dtype(float) + if Y.sycl_device.has_aspect_fp64: + assert Y.dtype == np.dtype(float) + else: + assert Y.dtype == np.dtype(np.float32) Y = dpt.asarray(np.float32(2.3)) assert Y.dtype == np.dtype(np.float32) Y = dpt.asarray(1.0j) - assert Y.dtype == np.dtype(complex) + if Y.sycl_device.has_aspect_fp64: + assert Y.dtype == np.dtype(complex) + else: + assert Y.dtype == np.dtype(np.complex64) Y = dpt.asarray(ctypes.c_int(8)) assert Y.dtype == np.dtype(ctypes.c_int) From 59e94eb3f2d81fb7bfd1bd7df7d92aa240db9d8d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 12:12:23 -0500 Subject: [PATCH 72/95] Removing stray print --- dpctl/tests/test_usm_ndarray_manipulation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dpctl/tests/test_usm_ndarray_manipulation.py b/dpctl/tests/test_usm_ndarray_manipulation.py index 038ac007c8..037b5ac6ef 100644 --- a/dpctl/tests/test_usm_ndarray_manipulation.py +++ b/dpctl/tests/test_usm_ndarray_manipulation.py @@ -965,7 +965,6 @@ def test_stack_2arrays(data): Y = dpt.asarray(Ynp, sycl_queue=q) Znp = np.stack([Xnp, Ynp], axis=axis) - print(Znp.shape) Z = dpt.stack([X, Y], axis=axis) assert_array_equal(Znp, dpt.asnumpy(Z)) From 5bb39db2ac381a0269793b4d503bb222fc2745c6 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 12:12:59 -0500 Subject: [PATCH 73/95] Correcting exception text --- dpctl/tensor/_usmarray.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 329794ebd6..42621a38a1 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -937,7 +937,7 @@ cdef class usm_ndarray: except Exception: raise ValueError( f"Input of type {type(val)} could not be " - "converted to numpy.ndarray" + "converted to usm_ndarray" ) def __sub__(first, other): From 34959058fc41c4eeed92026545fac5ce5f5afbbd Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 12:15:55 -0500 Subject: [PATCH 74/95] Wrap SyclQueue constructor in try/catch --- dpctl/tests/test_usm_ndarray_ctor.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 5d52694ca4..c1574e8570 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -588,7 +588,10 @@ def test_pyx_capi_check_constants(): ) @pytest.mark.parametrize("usm_type", ["device", "shared", "host"]) def test_tofrom_numpy(shape, dtype, usm_type): - q = dpctl.SyclQueue() + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Could nto create default SyclQueue") Xnp = np.zeros(shape, dtype=dtype) Xusm = dpt.from_numpy(Xnp, usm_type=usm_type, sycl_queue=q) Ynp = np.ones(shape, dtype=dtype) From 0108c0cbee1ef61aa8465cd8b2b5482a403e1aa2 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 12:17:01 -0500 Subject: [PATCH 75/95] dpctl.tensor.from_numpy should not try creating USM ndarray for 64-bit fp on HW that has no such support --- dpctl/tensor/_copy_utils.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/dpctl/tensor/_copy_utils.py b/dpctl/tensor/_copy_utils.py index dd6f068596..24aedf667a 100644 --- a/dpctl/tensor/_copy_utils.py +++ b/dpctl/tensor/_copy_utils.py @@ -59,18 +59,19 @@ def _copy_to_numpy(ary): def _copy_from_numpy(np_ary, usm_type="device", sycl_queue=None): "Copies numpy array `np_ary` into a new usm_ndarray" # This may peform a copy to meet stated requirements - Xnp = np.require(np_ary, requirements=["A", "O", "C", "E"]) - if sycl_queue: - ctor_kwargs = {"queue": sycl_queue} + Xnp = np.require(np_ary, requirements=["A", "E"]) + alloc_q = normalize_queue_device(sycl_queue=sycl_queue, device=None) + dt = Xnp.dtype + if dt.char in "dD" and alloc_q.sycl_device.has_aspect_fp64 is False: + Xusm_dtype = ( + np.dtype("float32") if dt.char == "d" else np.dtype("complex64") + ) else: - ctor_kwargs = dict() - Xusm = dpt.usm_ndarray( - Xnp.shape, - dtype=Xnp.dtype, - buffer=usm_type, - buffer_ctor_kwargs=ctor_kwargs, + Xusm_dtype = dt + Xusm = dpt.empty( + Xnp.shape, dtype=Xusm_dtype, usm_type=usm_type, sycl_queue=sycl_queue ) - Xusm.usm_data.copy_from_host(Xnp.reshape((-1)).view("u1")) + _copy_from_numpy_into(Xusm, Xnp) return Xusm From 4ddb093e1ccb867aa866991cea22ba8303cab900 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 15 Aug 2022 12:59:35 -0500 Subject: [PATCH 76/95] Replaced constructor with dtype 'd' with call to ones to dynamically figure out the data type appropriate for the device --- dpctl/tests/test_usm_ndarray_operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tests/test_usm_ndarray_operators.py b/dpctl/tests/test_usm_ndarray_operators.py index abe094d2c2..f52ab3c3e4 100644 --- a/dpctl/tests/test_usm_ndarray_operators.py +++ b/dpctl/tests/test_usm_ndarray_operators.py @@ -49,7 +49,7 @@ def multiply(a, b): @pytest.mark.parametrize("namespace", [None, Dummy()]) def test_fp_ops(namespace): - X = dpt.usm_ndarray(1, "d") + X = dpt.ones(1) X._set_namespace(namespace) assert X.__array_namespace__() is namespace X[0] = -2.5 From 5acaa507ba390b5f2fb4ca473ebf2df53112055a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 17 Aug 2022 06:15:46 -0500 Subject: [PATCH 77/95] Adjust script setting hint dir variable to work with sycl bundles using different clang versions --- conda-recipe/bld.bat | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat index c4071c86c6..f400b45712 100644 --- a/conda-recipe/bld.bat +++ b/conda-recipe/bld.bat @@ -5,7 +5,13 @@ set "INCLUDE=%BUILD_PREFIX%\include;%INCLUDE%" "%PYTHON%" setup.py clean --all set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx" -set "SYCL_INCLUDE_DIR_HINT=%BUILD_PREFIX%\Library\lib\clang\14.0.0" + +FOR %%V IN (14.0.0 15.0.0 16.0.0) DO @( + REM set DIR_HINT if directory exists + IF EXIST "%BUILD_PREFIX%\Library\lib\clang\%%V\" ( + SET "SYCL_INCLUDE_DIR_HINT=%BUILD_PREFIX%\Library\lib\clang\%%V" + ) +) set "PLATFORM_DIR=%PREFIX%\Library\share\cmake-3.22\Modules\Platform" set "FN=Windows-IntelLLVM.cmake" From 27334835ae5a60996848b94a8353e24c0f387757 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 18 Aug 2022 16:48:52 -0500 Subject: [PATCH 78/95] static_cast to void pointer from char pointer in calls to memcpy to appease klocwork scanner --- dpctl/tensor/libtensor/source/tensor_py.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index b3df82f54b..f63cc75d57 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -570,8 +570,10 @@ copy_usm_ndarray_into_usm_ndarray(dpctl::tensor::usm_ndarray src, if (both_c_contig || both_f_contig) { if (src_type_id == dst_type_id) { - sycl::event copy_ev = exec_q.memcpy( - dst_data, src_data, src_nelems * src_elem_size, depends); + sycl::event copy_ev = + exec_q.memcpy(static_cast(dst_data), + static_cast(src_data), + src_nelems * src_elem_size, depends); // make sure src and dst are not GC-ed before copy_ev is complete return std::make_pair( @@ -1285,8 +1287,10 @@ void copy_numpy_ndarray_into_usm_ndarray( if (src_type_id == dst_type_id) { int src_elem_size = npy_src.itemsize(); - sycl::event copy_ev = exec_q.memcpy( - dst_data, src_data, src_nelems * src_elem_size, depends); + sycl::event copy_ev = + exec_q.memcpy(static_cast(dst_data), + static_cast(src_data), + src_nelems * src_elem_size, depends); // wait for copy_ev to complete copy_ev.wait_and_throw(); From db2bc18b912a21c0437a0df8278344ae1d9ae7f2 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 27 Aug 2022 07:34:57 -0500 Subject: [PATCH 79/95] Use consistent order of channels during build and test phases Bump up cache number Attempt to triage conda-package workflow source conda shell definition before calling calling activate test_examples_list must make sure to install the exact version built by build_linux step Require dpnp>=0.10.1 --- .github/workflows/conda-package.yml | 40 +++++++++++++++++------------ 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index c50ef77fe7..9f1a2a9e44 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -30,7 +30,7 @@ jobs: - name: Cache conda packages uses: actions/cache@v3 env: - CACHE_NUMBER: 1 # Increase to reset cache + CACHE_NUMBER: 3 # Increase to reset cache with: path: ~/.conda/pkgs key: @@ -78,7 +78,7 @@ jobs: - name: Cache conda packages uses: actions/cache@v3 env: - CACHE_NUMBER: 1 # Increase to reset cache + CACHE_NUMBER: 3 # Increase to reset cache with: path: /home/runner/conda_pkgs_dir key: @@ -107,7 +107,7 @@ jobs: runner: [ubuntu-latest] continue-on-error: ${{ matrix.experimental }} env: - CHANNELS: -c intel -c defaults --override-channels + CHANNELS: -c defaults -c intel --override-channels steps: - name: Download artifact @@ -132,7 +132,7 @@ jobs: run: | CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" export PACKAGE_VERSION=$(python -c "${VER_SCRIPT1} ${VER_SCRIPT2}") - conda install $PACKAGE_NAME=${PACKAGE_VERSION} python=${{ matrix.python }} $CHANNELS --only-deps --dry-run > lockfile + conda create -n test_dpctl $PACKAGE_NAME=${PACKAGE_VERSION} python=${{ matrix.python }} $CHANNELS --only-deps --dry-run > lockfile cat lockfile - name: Set pkgs_dirs run: | @@ -140,7 +140,7 @@ jobs: - name: Cache conda packages uses: actions/cache@v3 env: - CACHE_NUMBER: 1 # Increase to reset cache + CACHE_NUMBER: 3 # Increase to reset cache with: path: ~/.conda/pkgs key: @@ -152,16 +152,20 @@ jobs: run: | CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" export PACKAGE_VERSION=$(python -c "${VER_SCRIPT1} ${VER_SCRIPT2}") - conda install $PACKAGE_NAME=${PACKAGE_VERSION} pytest python=${{ matrix.python }} $CHANNELS + conda create -n test_dpctl $PACKAGE_NAME=${PACKAGE_VERSION} pytest python=${{ matrix.python }} $CHANNELS # Test installed packages conda list - name: Smoke test run: | + . $CONDA/etc/profile.d/conda.sh + conda activate test_dpctl export OCL_ICD_FILENAMES=libintelocl.so export SYCL_ENABLE_HOST_DEVICE=1 python -c "import dpctl; dpctl.lsplatform()" - name: Run tests run: | + . $CONDA/etc/profile.d/conda.sh + conda activate test_dpctl # echo "libintelocl.so" | tee /etc/OpenCL/vendors/intel-cpu.icd export OCL_ICD_FILENAMES=libintelocl.so export SYCL_ENABLE_HOST_DEVICE=1 @@ -179,7 +183,7 @@ jobs: runner: [windows-latest] continue-on-error: ${{ matrix.experimental }} env: - CHANNELS: -c intel -c defaults --override-channels + CHANNELS: -c defaults -c intel --override-channels steps: - name: Download artifact @@ -215,7 +219,7 @@ jobs: - name: Cache conda packages uses: actions/cache@v3 env: - CACHE_NUMBER: 1 # Increase to reset cache + CACHE_NUMBER: 3 # Increase to reset cache with: path: /home/runner/conda_pkgs_dir key: @@ -342,7 +346,7 @@ jobs: runner: [ubuntu-latest] continue-on-error: ${{ matrix.experimental }} env: - CHANNELS: -c intel -c defaults --override-channels + CHANNELS: -c defaults -c intel --override-channels steps: - name: Install conda-build @@ -361,10 +365,12 @@ jobs: - name: Create conda channel run: | mkdir -p $GITHUB_WORKSPACE/channel/linux-64 - mv ${PACKAGE_NAME}-*.tar.bz2 $GITHUB_WORKSPACE/channel/linux-64 - conda index $GITHUB_WORKSPACE/channel + conda index $GITHUB_WORKSPACE/channel || exit 1 + mv ${PACKAGE_NAME}-*.tar.bz2 $GITHUB_WORKSPACE/channel/linux-64 || exit 1 + conda index $GITHUB_WORKSPACE/channel || exit 1 # Test channel - conda search $PACKAGE_NAME -c $GITHUB_WORKSPACE/channel --override-channels + conda search $PACKAGE_NAME -c $GITHUB_WORKSPACE/channel --override-channels --info --json > $GITHUB_WORKSPACE/ver.json + cat ver.json - name: Collect dependencies run: | CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" @@ -375,7 +381,7 @@ jobs: - name: Cache conda packages uses: actions/cache@v3 env: - CACHE_NUMBER: 1 # Increase to reset cache + CACHE_NUMBER: 3 # Increase to reset cache with: path: ~/.conda/pkgs key: @@ -387,7 +393,7 @@ jobs: shell: bash -l {0} run: | CHANNELS="${{ env.CHANNELS }}" - source $CONDA/etc/profile.d/conda.sh + . $CONDA/etc/profile.d/conda.sh conda create -n examples -y pytest python=${{ matrix.python }} $CHANNELS conda install -n examples -y cmake">=3.22" $CHANNELS || exit 1 conda install -n examples -y ninja $CHANNELS || exit 1 @@ -398,9 +404,9 @@ jobs: shell: bash -l {0} run: | source $CONDA/etc/profile.d/conda.sh - conda activate - CHANNELS="-c $GITHUB_WORKSPACE/channel -c dppy/label/dev -c intel --override-channels" - conda install -n examples -y $CHANNELS numpy dpctl dpnp || exit 1 + CHANNELS="-c $GITHUB_WORKSPACE/channel -c dppy/label/dev -c intel -c defaults --override-channels" + export PACKAGE_VERSION=$(python -c "${VER_SCRIPT1} ${VER_SCRIPT2}") + conda install -n examples -y ${CHANNELS} dpctl=${PACKAGE_VERSION} dpnp">=0.10.1" || exit 1 - name: Build and run examples with native extensions shell: bash -l {0} run: | From 58ff336a61902da4e1abd760d4bebe96b846878e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 28 Aug 2022 10:55:21 -0500 Subject: [PATCH 80/95] Import error_already_set into namespace Import pybind11::error_already_set into dpctl::memory namespace to work-around compilation error due to use of not-fully qualified identified in PYBIND11_OBJECT_CVT macro --- dpctl/apis/include/dpctl4pybind11.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dpctl/apis/include/dpctl4pybind11.hpp b/dpctl/apis/include/dpctl4pybind11.hpp index bdf9f256e5..e6c31a4e38 100644 --- a/dpctl/apis/include/dpctl4pybind11.hpp +++ b/dpctl/apis/include/dpctl4pybind11.hpp @@ -302,6 +302,10 @@ struct dpctl_api namespace memory { +// since PYBIND11_OBJECT_CVT uses error_already_set without namespace, +// this allows to avoid compilation error +using pybind11::error_already_set; + class usm_memory : public py::object { public: From 87fafe62be60fe4ad180700dde1252fab1848529 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 26 Aug 2022 09:17:44 -0500 Subject: [PATCH 81/95] Improved exception message when SyclDevice could not created from invalid filter string --- dpctl/_sycl_device.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 2da48ef333..3de4a82a0d 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -293,7 +293,8 @@ cdef class SyclDevice(_SyclDevice): ret = self._init_from_selector(DSRef) if ret == -1: raise SyclDeviceCreationError( - "Could not create a SyclDevice with the selector string" + "Could not create a SyclDevice with the selector string " + "'{selector_string}'".format(selector_string=arg) ) elif isinstance(arg, _SyclDevice): ret = self._init_from__SyclDevice(arg) From 96aa0fc766c0876ac87e7aaaef143be56fb986cb Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 3 Sep 2022 14:46:13 -0500 Subject: [PATCH 82/95] Use [[deprecated("msg")]] annotation only for C++ builds --- libsyclinterface/include/dpctl_sycl_device_interface.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/libsyclinterface/include/dpctl_sycl_device_interface.h b/libsyclinterface/include/dpctl_sycl_device_interface.h index 0762f595e9..b45055b945 100644 --- a/libsyclinterface/include/dpctl_sycl_device_interface.h +++ b/libsyclinterface/include/dpctl_sycl_device_interface.h @@ -250,9 +250,11 @@ DPCTLDevice_GetMaxWorkItemSizes3d(__dpctl_keep const DPCTLSyclDeviceRef DRef); * @return Returns the valid result if device exists else returns NULL. * @ingroup DeviceInterface */ -[[deprecated("Use DPCTLDevice_WorkItemSizes3d instead")]] DPCTL_API - __dpctl_keep size_t * - DPCTLDevice_GetMaxWorkItemSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef); +#if __cplusplus +[[deprecated("Use DPCTLDevice_WorkItemSizes3d instead")]] +#endif +DPCTL_API __dpctl_keep size_t * +DPCTLDevice_GetMaxWorkItemSizes(__dpctl_keep const DPCTLSyclDeviceRef DRef); /*! * @brief Wrapper for get_info(). From b15b22f4b9738d5aae6c0eb6cc12dd709c152b98 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 4 Sep 2022 07:45:39 -0500 Subject: [PATCH 83/95] [[deprecated]] is part of C23 standard, it is supported in gcc version 10 and older --- libsyclinterface/include/dpctl_sycl_device_interface.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsyclinterface/include/dpctl_sycl_device_interface.h b/libsyclinterface/include/dpctl_sycl_device_interface.h index b45055b945..438428c822 100644 --- a/libsyclinterface/include/dpctl_sycl_device_interface.h +++ b/libsyclinterface/include/dpctl_sycl_device_interface.h @@ -250,7 +250,7 @@ DPCTLDevice_GetMaxWorkItemSizes3d(__dpctl_keep const DPCTLSyclDeviceRef DRef); * @return Returns the valid result if device exists else returns NULL. * @ingroup DeviceInterface */ -#if __cplusplus +#if __cplusplus || (defined(__GNUC__) && __GNUC__ > 10) [[deprecated("Use DPCTLDevice_WorkItemSizes3d instead")]] #endif DPCTL_API __dpctl_keep size_t * From b3c1d887f8de56e1e6d60deeaf6952534f9c2d2d Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 26 Aug 2022 11:35:50 -0500 Subject: [PATCH 84/95] Added 3 new device attributes per gh-886 These are DPCTLDevice_GetGlobalMemCacheSize, DPCTLDevice_GlobalMemCacheLineSize, and DPCTLDevice_GetGlobalMemCacheType. To support the latter, introduced DPCTLGlobalMemCacheType enum in dpctl_sycl_enum_types.h Tests are added to test_capi target. --- .../include/dpctl_sycl_device_interface.h | 38 ++++++++++++-- .../include/dpctl_sycl_enum_types.h | 8 +++ .../source/dpctl_sycl_device_interface.cpp | 51 +++++++++++++++++++ .../tests/test_sycl_device_interface.cpp | 46 +++++++++++++++++ 4 files changed, 140 insertions(+), 3 deletions(-) diff --git a/libsyclinterface/include/dpctl_sycl_device_interface.h b/libsyclinterface/include/dpctl_sycl_device_interface.h index 438428c822..d11a4c6bb5 100644 --- a/libsyclinterface/include/dpctl_sycl_device_interface.h +++ b/libsyclinterface/include/dpctl_sycl_device_interface.h @@ -494,9 +494,6 @@ DPCTL_API __dpctl_give DPCTLDeviceVectorRef DPCTLDevice_CreateSubDevicesByAffinity( __dpctl_keep const DPCTLSyclDeviceRef DRef, DPCTLPartitionAffinityDomainType PartAffDomTy); - -DPCTL_C_EXTERN_C_END - /*! * @brief Wrapper over * device.get_info. @@ -633,3 +630,38 @@ size_t DPCTLDevice_Hash(__dpctl_keep const DPCTLSyclDeviceRef DRef); DPCTL_API size_t DPCTLDevice_GetProfilingTimerResolution( __dpctl_keep const DPCTLSyclDeviceRef DRef); + +/*! + * @brief Wrapper over + * device.get_info + * + * @param DRef Opaque pointer to a sycl::device + * @return Returns the size of global memory cache line in bytes as uint32_t. + */ +DPCTL_API +uint32_t DPCTLDevice_GetGlobalMemCacheLineSize( + __dpctl_keep const DPCTLSyclDeviceRef DRef); + +/*! + * @brief Wrapper over + * device.get_info + * + * @param DRef Opaque pointer to a sycl::device + * @return Returns the size of global memory cache in bytes as uint64_t. + */ +DPCTL_API +uint64_t +DPCTLDevice_GetGlobalMemCacheSize(__dpctl_keep const DPCTLSyclDeviceRef DRef); + +/*! + * @brief Wrapper over + * device.get_info + * + * @param DRef Opaque pointer to a sycl::device + * @return Returns the type of global memory cache supported. + */ +DPCTL_API +DPCTLGlobalMemCacheType +DPCTLDevice_GetGlobalMemCacheType(__dpctl_keep const DPCTLSyclDeviceRef DRef); + +DPCTL_C_EXTERN_C_END diff --git a/libsyclinterface/include/dpctl_sycl_enum_types.h b/libsyclinterface/include/dpctl_sycl_enum_types.h index 6265850fbd..1ac169ce2c 100644 --- a/libsyclinterface/include/dpctl_sycl_enum_types.h +++ b/libsyclinterface/include/dpctl_sycl_enum_types.h @@ -161,4 +161,12 @@ typedef enum DPCTL_COMPLETE } DPCTLSyclEventStatusType; +typedef enum +{ + DPCTL_MEM_CACHE_TYPE_INDETERMINATE, + DPCTL_MEM_CACHE_TYPE_NONE, + DPCTL_MEM_CACHE_TYPE_READ_ONLY, + DPCTL_MEM_CACHE_TYPE_READ_WRITE +} DPCTLGlobalMemCacheType; + DPCTL_C_EXTERN_C_END diff --git a/libsyclinterface/source/dpctl_sycl_device_interface.cpp b/libsyclinterface/source/dpctl_sycl_device_interface.cpp index c65f9ac38b..7494367924 100644 --- a/libsyclinterface/source/dpctl_sycl_device_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_device_interface.cpp @@ -691,3 +691,54 @@ size_t DPCTLDevice_GetProfilingTimerResolution( return 0; } } + +uint32_t DPCTLDevice_GetGlobalMemCacheLineSize( + __dpctl_keep const DPCTLSyclDeviceRef DRef) +{ + if (DRef) { + auto D = unwrap(DRef); + return D->get_info(); + } + else { + error_handler("Argument DRef is null", __FILE__, __func__, __LINE__); + return 0; + } +} + +uint64_t +DPCTLDevice_GetGlobalMemCacheSize(__dpctl_keep const DPCTLSyclDeviceRef DRef) +{ + if (DRef) { + auto D = unwrap(DRef); + return D->get_info(); + } + else { + error_handler("Argument DRef is null", __FILE__, __func__, __LINE__); + return 0; + } +} + +DPCTLGlobalMemCacheType +DPCTLDevice_GetGlobalMemCacheType(__dpctl_keep const DPCTLSyclDeviceRef DRef) +{ + if (DRef) { + auto D = unwrap(DRef); + auto mem_type = D->get_info(); + switch (mem_type) { + case info::global_mem_cache_type::none: + return DPCTL_MEM_CACHE_TYPE_NONE; + case info::global_mem_cache_type::read_only: + return DPCTL_MEM_CACHE_TYPE_READ_ONLY; + case info::global_mem_cache_type::read_write: + return DPCTL_MEM_CACHE_TYPE_READ_WRITE; + } + // If execution reaches here unrecognized mem_type was returned. Check + // values in the enumeration `info::global_mem_cache_type` in SYCL specs + assert(false); + return DPCTL_MEM_CACHE_TYPE_INDETERMINATE; + } + else { + error_handler("Argument DRef is null", __FILE__, __func__, __LINE__); + return DPCTL_MEM_CACHE_TYPE_INDETERMINATE; + } +} diff --git a/libsyclinterface/tests/test_sycl_device_interface.cpp b/libsyclinterface/tests/test_sycl_device_interface.cpp index 71be76fe80..ba3f7fb245 100644 --- a/libsyclinterface/tests/test_sycl_device_interface.cpp +++ b/libsyclinterface/tests/test_sycl_device_interface.cpp @@ -407,6 +407,30 @@ TEST_P(TestDPCTLSyclDeviceInterface, ChkGetProfilingTimerResolution) EXPECT_TRUE(res != 0); } +TEST_P(TestDPCTLSyclDeviceInterface, ChkGetGlobalMemCacheSize) +{ + uint64_t res = 0; + EXPECT_NO_FATAL_FAILURE(res = DPCTLDevice_GetGlobalMemCacheSize(DRef)); + EXPECT_TRUE(res != 0); +} + +TEST_P(TestDPCTLSyclDeviceInterface, ChkGetGlobalMemCacheLineSize) +{ + uint32_t res = 0; + EXPECT_NO_FATAL_FAILURE(res = DPCTLDevice_GetGlobalMemCacheLineSize(DRef)); + EXPECT_TRUE(res != 0); +} + +TEST_P(TestDPCTLSyclDeviceInterface, ChkGetGlobalMemCacheType) +{ + DPCTLGlobalMemCacheType res = DPCTL_MEM_CACHE_TYPE_INDETERMINATE; + EXPECT_NO_FATAL_FAILURE(res = DPCTLDevice_GetGlobalMemCacheType(DRef)); + EXPECT_TRUE(res != DPCTL_MEM_CACHE_TYPE_INDETERMINATE); + EXPECT_TRUE((res == DPCTL_MEM_CACHE_TYPE_NONE || + res == DPCTL_MEM_CACHE_TYPE_READ_ONLY || + res == DPCTL_MEM_CACHE_TYPE_READ_WRITE)); +} + INSTANTIATE_TEST_SUITE_P(DPCTLDeviceFns, TestDPCTLSyclDeviceInterface, ::testing::Values("opencl", @@ -713,3 +737,25 @@ TEST_F(TestDPCTLSyclDeviceNullArgs, ChkGetProfilingTimerResolution) res = DPCTLDevice_GetProfilingTimerResolution(Null_DRef)); ASSERT_TRUE(res == 0); } + +TEST_F(TestDPCTLSyclDeviceNullArgs, ChkGetGlobalMemCacheSize) +{ + uint64_t res = 1; + EXPECT_NO_FATAL_FAILURE(res = DPCTLDevice_GetGlobalMemCacheSize(Null_DRef)); + ASSERT_TRUE(res == 0); +} + +TEST_F(TestDPCTLSyclDeviceNullArgs, ChkGetGlobalMemCacheLineSize) +{ + uint32_t res = 1; + EXPECT_NO_FATAL_FAILURE( + res = DPCTLDevice_GetGlobalMemCacheLineSize(Null_DRef)); + ASSERT_TRUE(res == 0); +} + +TEST_F(TestDPCTLSyclDeviceNullArgs, ChkGetGlobalMemCacheType) +{ + DPCTLGlobalMemCacheType res = DPCTL_MEM_CACHE_TYPE_NONE; + EXPECT_NO_FATAL_FAILURE(res = DPCTLDevice_GetGlobalMemCacheType(Null_DRef)); + ASSERT_TRUE(res == DPCTL_MEM_CACHE_TYPE_INDETERMINATE); +} From 0c0e60e409fd4764c38ff5214edd38c246f83224 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 28 Aug 2022 13:27:01 -0500 Subject: [PATCH 85/95] Expanded checks for attributes of SyclDevice to include all properties --- dpctl/tests/_device_attributes_checks.py | 157 +++++++++++++++-------- 1 file changed, 107 insertions(+), 50 deletions(-) diff --git a/dpctl/tests/_device_attributes_checks.py b/dpctl/tests/_device_attributes_checks.py index 59c458fedb..4221cefe73 100644 --- a/dpctl/tests/_device_attributes_checks.py +++ b/dpctl/tests/_device_attributes_checks.py @@ -36,52 +36,52 @@ # Unit test cases that will be run for every device -def check_get_max_compute_units(device): +def check_max_compute_units(device): max_compute_units = device.max_compute_units assert max_compute_units > 0 -def check_get_global_mem_size(device): +def check_global_mem_size(device): global_mem_size = device.global_mem_size assert global_mem_size > 0 -def check_get_local_mem_size(device): +def check_local_mem_size(device): local_mem_size = device.local_mem_size assert local_mem_size > 0 -def check_get_max_work_item_dims(device): +def check_max_work_item_dims(device): max_work_item_dims = device.max_work_item_dims assert max_work_item_dims > 0 -def check_get_max_work_item_sizes1d(device): +def check_max_work_item_sizes1d(device): max_work_item_sizes = device.max_work_item_sizes1d for size in max_work_item_sizes: assert size is not None -def check_get_max_work_item_sizes2d(device): +def check_max_work_item_sizes2d(device): max_work_item_sizes = device.max_work_item_sizes2d for size in max_work_item_sizes: assert size is not None -def check_get_max_work_item_sizes3d(device): +def check_max_work_item_sizes3d(device): max_work_item_sizes = device.max_work_item_sizes3d for size in max_work_item_sizes: assert size is not None @pytest.mark.filterwarnings("DeprecationWarning:") -def check_get_max_work_item_sizes(device): +def check_max_work_item_sizes(device): max_work_item_sizes = device.max_work_item_sizes for size in max_work_item_sizes: assert size is not None -def check_get_max_work_group_size(device): +def check_max_work_group_size(device): max_work_group_size = device.max_work_group_size # Special case for FPGA simulator if device.is_accelerator: @@ -90,7 +90,7 @@ def check_get_max_work_group_size(device): assert max_work_group_size > 0 -def check_get_max_num_sub_groups(device): +def check_max_num_sub_groups(device): max_num_sub_groups = device.max_num_sub_groups # Special case for FPGA simulator if device.is_accelerator or device.is_host: @@ -267,105 +267,105 @@ def check_is_host(device): pytest.fail("is_hostcall failed") -def check_get_max_read_image_args(device): +def check_max_read_image_args(device): try: device.max_read_image_args except Exception: pytest.fail("max_read_image_args call failed") -def check_get_max_write_image_args(device): +def check_max_write_image_args(device): try: device.max_write_image_args except Exception: pytest.fail("max_write_image_args call failed") -def check_get_image_2d_max_width(device): +def check_image_2d_max_width(device): try: device.image_2d_max_width except Exception: pytest.fail("image_2d_max_width call failed") -def check_get_image_2d_max_height(device): +def check_image_2d_max_height(device): try: device.image_2d_max_height except Exception: pytest.fail("image_2d_max_height call failed") -def check_get_image_3d_max_width(device): +def check_image_3d_max_width(device): try: device.image_3d_max_width except Exception: pytest.fail("image_3d_max_width call failed") -def check_get_image_3d_max_height(device): +def check_image_3d_max_height(device): try: device.image_3d_max_height except Exception: pytest.fail("image_3d_max_height call failed") -def check_get_image_3d_max_depth(device): +def check_image_3d_max_depth(device): try: device.image_3d_max_depth except Exception: pytest.fail("image_3d_max_depth call failed") -def check_get_sub_group_independent_forward_progress(device): +def check_sub_group_independent_forward_progress(device): try: device.sub_group_independent_forward_progress except Exception: pytest.fail("sub_group_independent_forward_progress call failed") -def check_get_preferred_vector_width_char(device): +def check_preferred_vector_width_char(device): try: device.preferred_vector_width_char except Exception: pytest.fail("preferred_vector_width_char call failed") -def check_get_preferred_vector_width_short(device): +def check_preferred_vector_width_short(device): try: device.preferred_vector_width_short except Exception: pytest.fail("preferred_vector_width_short call failed") -def check_get_preferred_vector_width_int(device): +def check_preferred_vector_width_int(device): try: device.preferred_vector_width_int except Exception: pytest.fail("preferred_vector_width_int call failed") -def check_get_preferred_vector_width_long(device): +def check_preferred_vector_width_long(device): try: device.preferred_vector_width_long except Exception: pytest.fail("preferred_vector_width_long call failed") -def check_get_preferred_vector_width_float(device): +def check_preferred_vector_width_float(device): try: device.preferred_vector_width_float except Exception: pytest.fail("preferred_vector_width_float call failed") -def check_get_preferred_vector_width_double(device): +def check_preferred_vector_width_double(device): try: device.preferred_vector_width_double except Exception: pytest.fail("preferred_vector_width_double call failed") -def check_get_preferred_vector_width_half(device): +def check_preferred_vector_width_half(device): try: device.preferred_vector_width_half except Exception: @@ -514,27 +514,76 @@ def check_platform(device): assert isinstance(p, dpctl.SyclPlatform) +def check_parent_device(device): + pd = device.parent_device + assert pd is None or isinstance(pd, dpctl.SyclDevice) + + +def check_filter_string(device): + try: + fs = device.filter_string + assert type(fs) is str + dd = dpctl.SyclDevice(fs) + assert device == dd + except TypeError: + pass + + +def check_name(device): + dn = device.name + assert dn + assert type(dn) is str + + +def check_driver_version(device): + dv = device.driver_version + assert dv + assert type(dv) is str + + +def check_vendor(device): + ve = device.vendor + assert ve + assert type(ve) is str + + +def check_default_selector_score(device): + sc = device.default_selector_score + assert type(sc) is int + assert sc > 0 + + +def check_backend(device): + be = device.backend + assert type(be) is dpctl.backend_type + + +def check_device_type(device): + dt = device.device_type + assert type(dt) is dpctl.device_type + + list_of_checks = [ - check_get_max_compute_units, - check_get_max_work_item_dims, - check_get_max_work_item_sizes1d, - check_get_max_work_item_sizes2d, - check_get_max_work_item_sizes3d, - check_get_max_work_item_sizes, - check_get_max_work_group_size, - check_get_max_num_sub_groups, + check_max_compute_units, + check_max_work_item_dims, + check_max_work_item_sizes1d, + check_max_work_item_sizes2d, + check_max_work_item_sizes3d, + check_max_work_item_sizes, + check_max_work_group_size, + check_max_num_sub_groups, check_is_accelerator, check_is_cpu, check_is_gpu, check_is_host, - check_get_sub_group_independent_forward_progress, - check_get_preferred_vector_width_char, - check_get_preferred_vector_width_short, - check_get_preferred_vector_width_int, - check_get_preferred_vector_width_long, - check_get_preferred_vector_width_float, - check_get_preferred_vector_width_double, - check_get_preferred_vector_width_half, + check_sub_group_independent_forward_progress, + check_preferred_vector_width_char, + check_preferred_vector_width_short, + check_preferred_vector_width_int, + check_preferred_vector_width_long, + check_preferred_vector_width_float, + check_preferred_vector_width_double, + check_preferred_vector_width_half, check_has_aspect_host, check_has_aspect_cpu, check_has_aspect_gpu, @@ -555,13 +604,13 @@ def check_platform(device): check_has_aspect_usm_atomic_host_allocations, check_has_aspect_usm_atomic_shared_allocations, check_has_aspect_host_debuggable, - check_get_max_read_image_args, - check_get_max_write_image_args, - check_get_image_2d_max_width, - check_get_image_2d_max_height, - check_get_image_3d_max_width, - check_get_image_3d_max_height, - check_get_image_3d_max_depth, + check_max_read_image_args, + check_max_write_image_args, + check_image_2d_max_width, + check_image_2d_max_height, + check_image_3d_max_width, + check_image_3d_max_height, + check_image_3d_max_depth, check_create_sub_devices_equally, check_create_sub_devices_by_counts, check_create_sub_devices_by_affinity_not_applicable, @@ -573,10 +622,18 @@ def check_platform(device): check_create_sub_devices_by_affinity_next_partitionable, check_print_device_info, check_repr, - check_get_global_mem_size, - check_get_local_mem_size, + check_global_mem_size, + check_local_mem_size, check_profiling_timer_resolution, check_platform, + check_parent_device, + check_filter_string, + check_vendor, + check_driver_version, + check_name, + check_default_selector_score, + check_backend, + check_device_type, ] From b31480351bdf4d189e44b9ca20ef7caa6329c370 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 28 Aug 2022 16:29:34 -0500 Subject: [PATCH 86/95] Added 3 more SyclDevice properties * dpctl.SyclDevice.global_mem_cache_size * dpctl.SyclDevice.global_mem_cache_line_size * dpctl.SyclDevice.global_mem_cache_type The last property output is a new enum dpctl.global_mem_cache_type which can assume 3 values: none, read_only, and read_write --- dpctl/__init__.py | 8 +++- dpctl/_backend.pxd | 12 +++++- dpctl/_sycl_device.pyx | 52 +++++++++++++++++++++++- dpctl/enum_types.py | 19 +++++++++ dpctl/tests/_device_attributes_checks.py | 22 +++++++++- 5 files changed, 108 insertions(+), 5 deletions(-) diff --git a/dpctl/__init__.py b/dpctl/__init__.py index 23bddf1222..2e8a70f470 100644 --- a/dpctl/__init__.py +++ b/dpctl/__init__.py @@ -73,7 +73,12 @@ from ._device_selection import select_device_with_aspects from ._sycl_timer import SyclTimer from ._version import get_versions -from .enum_types import backend_type, device_type, event_status_type +from .enum_types import ( + backend_type, + device_type, + event_status_type, + global_mem_cache_type, +) __all__ = [ "SyclContext", @@ -127,6 +132,7 @@ "device_type", "backend_type", "event_status_type", + "global_mem_cache_type", ] __all__ += [ "get_include", diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index c07df5097d..ec6fec65fc 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -21,7 +21,7 @@ types defined by dpctl's C API. """ -from libc.stdint cimport int64_t, uint32_t +from libc.stdint cimport int64_t, uint32_t, uint64_t from libcpp cimport bool @@ -112,6 +112,12 @@ cdef extern from "syclinterface/dpctl_sycl_enum_types.h": _RUNNING 'DPCTL_RUNNING' _COMPLETE 'DPCTL_COMPLETE' + ctypedef enum _global_mem_cache_type 'DPCTLGlobalMemCacheType': + _MEM_CACHE_TYPE_INDETERMINATE 'DPCTL_MEM_CACHE_TYPE_INDETERMINATE' + _MEM_CACHE_TYPE_NONE 'DPCTL_MEM_CACHE_TYPE_NONE' + _MEM_CACHE_TYPE_READ_ONLY 'DPCTL_MEM_CACHE_TYPE_READ_ONLY' + _MEM_CACHE_TYPE_READ_WRITE 'DPCTL_MEM_CACHE_TYPE_READ_WRITE' + cdef extern from "syclinterface/dpctl_sycl_types.h": cdef struct DPCTLOpaqueSyclContext @@ -195,6 +201,10 @@ cdef extern from "syclinterface/dpctl_sycl_device_interface.h": _partition_affinity_domain_type PartitionAffinityDomainTy) cdef DPCTLSyclDeviceRef DPCTLDevice_GetParentDevice(const DPCTLSyclDeviceRef DRef) cdef size_t DPCTLDevice_GetProfilingTimerResolution(const DPCTLSyclDeviceRef DRef) + cdef uint32_t DPCTLDevice_GetGlobalMemCacheLineSize(const DPCTLSyclDeviceRef DRef) + cdef uint64_t DPCTLDevice_GetGlobalMemCacheSize(const DPCTLSyclDeviceRef DRef) + cdef _global_mem_cache_type DPCTLDevice_GetGlobalMemCacheType( + const DPCTLSyclDeviceRef DRef) cdef extern from "syclinterface/dpctl_sycl_device_manager.h": diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 3de4a82a0d..723201c64c 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -34,6 +34,9 @@ from ._backend cimport ( # noqa: E211 DPCTLDevice_GetBackend, DPCTLDevice_GetDeviceType, DPCTLDevice_GetDriverVersion, + DPCTLDevice_GetGlobalMemCacheLineSize, + DPCTLDevice_GetGlobalMemCacheSize, + DPCTLDevice_GetGlobalMemCacheType, DPCTLDevice_GetGlobalMemSize, DPCTLDevice_GetImage2dMaxHeight, DPCTLDevice_GetImage2dMaxWidth, @@ -87,12 +90,13 @@ from ._backend cimport ( # noqa: E211 _aspect_type, _backend_type, _device_type, + _global_mem_cache_type, _partition_affinity_domain_type, ) -from .enum_types import backend_type, device_type +from .enum_types import backend_type, device_type, global_mem_cache_type -from libc.stdint cimport int64_t, uint32_t +from libc.stdint cimport int64_t, uint32_t, uint64_t from libc.stdlib cimport free, malloc from ._sycl_platform cimport SyclPlatform @@ -1098,6 +1102,50 @@ cdef class SyclDevice(_SyclDevice): raise RuntimeError("Failed to get device timer resolution.") return timer_res + @property + def global_mem_cache_type(self): + """ Global device cache memory type. + + Returns: + global_mem_cache_type: type of cache memory + Raises: + A RuntimeError is raised if an unrecognized memory type + is reported by runtime. + """ + cdef _global_mem_cache_type gmcTy = ( + DPCTLDevice_GetGlobalMemCacheType(self._device_ref) + ) + if gmcTy == _global_mem_cache_type._MEM_CACHE_TYPE_READ_WRITE: + return global_mem_cache_type.read_write + elif gmcTy == _global_mem_cache_type._MEM_CACHE_TYPE_READ_ONLY: + return global_mem_cache_type.read_only + elif gmcTy == _global_mem_cache_type._MEM_CACHE_TYPE_NONE: + return global_mem_cache_type.none + elif gmcTy == _global_mem_cache_type._MEM_CACHE_TYPE_INDETERMINATE: + raise RuntimeError("Unrecognized global memory cache type reported") + + @property + def global_mem_cache_size(self): + """ Global device memory cache size. + + Returns: + int: Cache size in bytes + """ + cdef uint64_t cache_sz = DPCTLDevice_GetGlobalMemCacheSize( + self._device_ref) + return cache_sz + + @property + def global_mem_cache_line_size(self): + """ Global device memory cache line size. + + Returns: + int: Cache size in bytes + """ + cdef uint64_t cache_line_sz = DPCTLDevice_GetGlobalMemCacheLineSize( + self._device_ref) + return cache_line_sz + cdef cpp_bool equals(self, SyclDevice other): """ Returns ``True`` if the :class:`dpctl.SyclDevice` argument has the same _device_ref as this SyclDevice. diff --git a/dpctl/enum_types.py b/dpctl/enum_types.py index bdf95959c0..d83aac5f87 100644 --- a/dpctl/enum_types.py +++ b/dpctl/enum_types.py @@ -96,3 +96,22 @@ class event_status_type(Enum): submitted = auto() running = auto() complete = auto() + + +class global_mem_cache_type(Enum): + """ + An enumeration of global memory cache types for a device. + + :Example: + .. code-block:: python + + import dpctl + dev = dpctl.SyclDevice() + print(dev.global_mem_cache_type) + # Possible output: + """ + + indeterminate = auto() + none = auto() + read_only = auto() + read_write = auto() diff --git a/dpctl/tests/_device_attributes_checks.py b/dpctl/tests/_device_attributes_checks.py index 4221cefe73..14c0b973a5 100644 --- a/dpctl/tests/_device_attributes_checks.py +++ b/dpctl/tests/_device_attributes_checks.py @@ -543,7 +543,7 @@ def check_driver_version(device): def check_vendor(device): ve = device.vendor - assert ve + assert ve or device.is_host assert type(ve) is str @@ -563,6 +563,23 @@ def check_device_type(device): assert type(dt) is dpctl.device_type +def check_global_mem_cache_type(device): + gmc_ty = device.global_mem_cache_type + assert type(gmc_ty) is dpctl.global_mem_cache_type + + +def check_global_mem_cache_size(device): + gmc_sz = device.global_mem_cache_size + assert type(gmc_sz) is int + assert gmc_sz + + +def check_global_mem_cache_line_size(device): + gmc_sz = device.global_mem_cache_line_size + assert type(gmc_sz) is int + assert gmc_sz + + list_of_checks = [ check_max_compute_units, check_max_work_item_dims, @@ -634,6 +651,9 @@ def check_device_type(device): check_default_selector_score, check_backend, check_device_type, + check_global_mem_cache_type, + check_global_mem_cache_size, + check_global_mem_cache_line_size, ] From c3ae01ee3bb0db99f54a7c1f1a3ebf17f51ab8a0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 29 Aug 2022 09:39:46 -0500 Subject: [PATCH 87/95] Added all but one kernel_device_specific properties max_sub_group_size property is currently on hold due to an issue in DPC++ runtime --- .../include/dpctl_sycl_kernel_interface.h | 109 ++++++++++- .../source/dpctl_sycl_kernel_interface.cpp | 171 +++++++++++++++- .../tests/test_sycl_kernel_interface.cpp | 182 ++++++++++++++++-- 3 files changed, 434 insertions(+), 28 deletions(-) diff --git a/libsyclinterface/include/dpctl_sycl_kernel_interface.h b/libsyclinterface/include/dpctl_sycl_kernel_interface.h index 3bc0470bd7..926255ea06 100644 --- a/libsyclinterface/include/dpctl_sycl_kernel_interface.h +++ b/libsyclinterface/include/dpctl_sycl_kernel_interface.h @@ -39,11 +39,12 @@ DPCTL_C_EXTERN_C_BEGIN */ /*! - * @brief Returns the number of arguments for the OpenCL kernel. + * @brief Returns the number of arguments for the sycl + * interoperability kernel. * - * @param KRef DPCTLSyclKernelRef pointer to an OpenCL + * @param KRef DPCTLSyclKernelRef pointer to an SYCL * interoperability kernel. - * @return Returns the number of arguments for the OpenCL interoperability + * @return Returns the number of arguments for the interoperability * kernel. * @ingroup KernelInterface */ @@ -51,13 +52,111 @@ DPCTL_API size_t DPCTLKernel_GetNumArgs(__dpctl_keep const DPCTLSyclKernelRef KRef); /*! - * @brief Deletes the DPCTLSyclKernelRef after casting it to a sycl::kernel. + * @brief Deletes the DPCTLSyclKernelRef after casting it to a + * ``sycl::kernel``. * - * @param KRef DPCTLSyclKernelRef pointer to an OpenCL + * @param KRef DPCTLSyclKernelRef pointer to an SYCL * interoperability kernel. * @ingroup KernelInterface */ DPCTL_API void DPCTLKernel_Delete(__dpctl_take DPCTLSyclKernelRef KRef); +/*! + * !brief Wrapper around + * `kernel::get_info()`. + * + * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * interoperability kernel. + * @return Returns the maximum number of work-items in a work-group + * that can be used to execute a kernel on a specific device. + * @ingroup KernelInterface + */ +DPCTL_API +size_t DPCTLKernel_GetWorkGroupSize(__dpctl_keep const DPCTLSyclKernelRef KRef); + +/*! + * !brief Wrapper around + * `kernel::get_info()`. + * + * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * interoperability kernel. + * @return Returns a value, of which work-group size is preferred to be a + * multiple, for executing a kernel on a specific device. + * @ingroup KernelInterface + */ +DPCTL_API +size_t DPCTLKernel_GetPreferredWorkGroupSizeMultiple( + __dpctl_keep const DPCTLSyclKernelRef KRef); + +/*! + * !brief Wrapper around + * `kernel::get_info()`. + * + * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * interoperability kernel. + * @return Returns the minimum amount of private memory, in bytes, + * used by each work-item in the kernel. + * @ingroup KernelInterface + */ +DPCTL_API +size_t +DPCTLKernel_GetPrivateMemSize(__dpctl_keep const DPCTLSyclKernelRef KRef); + +/*! + * !brief Wrapper around + * `kernel::get_info()`. + * + * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * interoperability kernel. + * @return Returns the maximum number of sub-groups for this kernel. + * @ingroup KernelInterface + */ +DPCTL_API +uint32_t +DPCTLKernel_GetMaxNumSubGroups(__dpctl_keep const DPCTLSyclKernelRef KRef); + +#if 0 +/*! + * !brief Wrapper around + * `kernel::get_info()`. + * + * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * interoperability kernel. + * @return Returns the maximum sub-group size for this kernel. + * @ingroup KernelInterface + */ +DPCTL_API +uint32_t +DPCTLKernel_GetMaxSubGroupSize(__dpctl_keep const DPCTLSyclKernelRef KRef); +#endif + +/*! + * !brief Wrapper around + * `kernel::get_info()`. + * + * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * interoperability kernel. + * @return Returns the number of sub-groups specified by the kernel, + * or 0 (if not specified). + * @ingroup KernelInterface + */ +DPCTL_API +uint32_t +DPCTLKernel_GetCompileNumSubGroups(__dpctl_keep const DPCTLSyclKernelRef KRef); + +/*! + * !brief Wrapper around + * `kernel::get_info()`. + * + * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * interoperability kernel. + * @return Returns the required sub-group size specified by this kernel, + * or 0 (if not specified). + * @ingroup KernelInterface + */ +DPCTL_API +uint32_t +DPCTLKernel_GetCompileSubGroupSize(__dpctl_keep const DPCTLSyclKernelRef KRef); + DPCTL_C_EXTERN_C_END diff --git a/libsyclinterface/source/dpctl_sycl_kernel_interface.cpp b/libsyclinterface/source/dpctl_sycl_kernel_interface.cpp index e2fc31bba1..236546fb8d 100644 --- a/libsyclinterface/source/dpctl_sycl_kernel_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_kernel_interface.cpp @@ -29,6 +29,7 @@ #include "dpctl_error_handlers.h" #include "dpctl_string_utils.hpp" #include /* Sycl headers */ +#include using namespace cl::sycl; @@ -39,21 +40,177 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(kernel, DPCTLSyclKernelRef) } /* end of anonymous namespace */ -size_t DPCTLKernel_GetNumArgs(__dpctl_keep const DPCTLSyclKernelRef Kernel) +size_t DPCTLKernel_GetNumArgs(__dpctl_keep const DPCTLSyclKernelRef KRef) { - if (!Kernel) { + if (!KRef) { error_handler("Cannot get the number of arguments from " "DPCTLSyclKernelRef as input is a nullptr.", __FILE__, __func__, __LINE__); return -1; } - auto SyclKernel = unwrap(Kernel); - auto num_args = SyclKernel->get_info(); - return (size_t)num_args; + auto sycl_kernel = unwrap(KRef); + auto num_args = sycl_kernel->get_info(); + return static_cast(num_args); } -void DPCTLKernel_Delete(__dpctl_take DPCTLSyclKernelRef Kernel) +void DPCTLKernel_Delete(__dpctl_take DPCTLSyclKernelRef KRef) { - delete unwrap(Kernel); + delete unwrap(KRef); +} + +size_t DPCTLKernel_GetWorkGroupSize(__dpctl_keep const DPCTLSyclKernelRef KRef) +{ + if (!KRef) { + error_handler("Input DPCTKSyclKernelRef is nullptr.", __FILE__, + __func__, __LINE__); + return 0; + } + + auto sycl_kern = unwrap(KRef); + auto devs = sycl_kern->get_kernel_bundle().get_devices(); + if (devs.empty()) { + error_handler("Input DPCTKSyclKernelRef has no associated device.", + __FILE__, __func__, __LINE__); + return 0; + } + auto v = sycl_kern->get_info( + devs[0]); + return static_cast(v); +} + +size_t DPCTLKernel_GetPreferredWorkGroupSizeMultiple( + __dpctl_keep const DPCTLSyclKernelRef KRef) +{ + if (!KRef) { + error_handler("Input DPCTKSyclKernelRef is nullptr.", __FILE__, + __func__, __LINE__); + return 0; + } + + auto sycl_kern = unwrap(KRef); + auto devs = sycl_kern->get_kernel_bundle().get_devices(); + if (devs.empty()) { + error_handler("Input DPCTKSyclKernelRef has no associated device.", + __FILE__, __func__, __LINE__); + return 0; + } + auto v = sycl_kern->get_info< + info::kernel_device_specific::preferred_work_group_size_multiple>( + devs[0]); + return static_cast(v); +} + +size_t DPCTLKernel_GetPrivateMemSize(__dpctl_keep const DPCTLSyclKernelRef KRef) +{ + if (!KRef) { + error_handler("Input DPCTKSyclKernelRef is nullptr.", __FILE__, + __func__, __LINE__); + return 0; + } + + auto sycl_kern = unwrap(KRef); + auto devs = sycl_kern->get_kernel_bundle().get_devices(); + if (devs.empty()) { + error_handler("Input DPCTKSyclKernelRef has no associated device.", + __FILE__, __func__, __LINE__); + return 0; + } + auto v = + sycl_kern->get_info( + devs[0]); + return static_cast(v); +} + +uint32_t +DPCTLKernel_GetMaxNumSubGroups(__dpctl_keep const DPCTLSyclKernelRef KRef) +{ + if (!KRef) { + error_handler("Input DPCTKSyclKernelRef is nullptr.", __FILE__, + __func__, __LINE__); + return 0; + } + + auto sycl_kern = unwrap(KRef); + auto devs = sycl_kern->get_kernel_bundle().get_devices(); + if (devs.empty()) { + error_handler("Input DPCTKSyclKernelRef has no associated device.", + __FILE__, __func__, __LINE__); + return 0; + } + auto v = + sycl_kern->get_info( + devs[0]); + return static_cast(v); +} + +#if 0 +// commented out due to bug in DPC++ runtime, get_info for max_sub_group_size +// exported by libsycl has different, not SPEC-compliant signature +uint32_t +DPCTLKernel_GetMaxSubGroupSize(__dpctl_keep const DPCTLSyclKernelRef KRef) +{ + if (!KRef) { + error_handler("Input DPCTKSyclKernelRef is nullptr.", __FILE__, + __func__, __LINE__); + return 0; + } + + auto sycl_kern = unwrap(KRef); + auto devs = sycl_kern->get_kernel_bundle().get_devices(); + if (devs.empty()) { + error_handler("Input DPCTKSyclKernelRef has no associated device.", + __FILE__, __func__, __LINE__); + return 0; + } + auto v = sycl_kern + ->get_info(devs[0]); + return v; +} +#endif + +uint32_t +DPCTLKernel_GetCompileNumSubGroups(__dpctl_keep const DPCTLSyclKernelRef KRef) +{ + if (!KRef) { + error_handler("Input DPCTKSyclKernelRef is nullptr.", __FILE__, + __func__, __LINE__); + return 0; + } + + auto sycl_kern = unwrap(KRef); + auto devs = sycl_kern->get_kernel_bundle().get_devices(); + if (devs.empty()) { + error_handler("Input DPCTKSyclKernelRef has no associated device.", + __FILE__, __func__, __LINE__); + return 0; + } + auto v = + sycl_kern + ->get_info( + devs[0]); + return static_cast(v); +} + +uint32_t +DPCTLKernel_GetCompileSubGroupSize(__dpctl_keep const DPCTLSyclKernelRef KRef) +{ + if (!KRef) { + error_handler("Input DPCTKSyclKernelRef is nullptr.", __FILE__, + __func__, __LINE__); + return 0; + } + + auto sycl_kern = unwrap(KRef); + auto devs = sycl_kern->get_kernel_bundle().get_devices(); + if (devs.empty()) { + error_handler("Input DPCTKSyclKernelRef has no associated device.", + __FILE__, __func__, __LINE__); + return 0; + } + auto v = + sycl_kern + ->get_info( + devs[0]); + return static_cast(v); } diff --git a/libsyclinterface/tests/test_sycl_kernel_interface.cpp b/libsyclinterface/tests/test_sycl_kernel_interface.cpp index 89cc586aab..d16c917d88 100644 --- a/libsyclinterface/tests/test_sycl_kernel_interface.cpp +++ b/libsyclinterface/tests/test_sycl_kernel_interface.cpp @@ -57,17 +57,33 @@ struct TestDPCTLSyclKernelInterface const char *CompileOpts = "-cl-fast-relaxed-math"; DPCTLSyclDeviceSelectorRef DSRef = nullptr; DPCTLSyclDeviceRef DRef = nullptr; + DPCTLSyclQueueRef QRef = nullptr; + DPCTLSyclContextRef CtxRef = nullptr; + DPCTLSyclKernelBundleRef KBRef = nullptr; + DPCTLSyclKernelRef AddKRef = nullptr; + DPCTLSyclKernelRef AxpyKRef = nullptr; TestDPCTLSyclKernelInterface() { DSRef = DPCTLFilterSelector_Create(GetParam()); DRef = DPCTLDevice_CreateFromSelector(DSRef); + QRef = DPCTLQueue_CreateForDevice(DRef, nullptr, 0); + CtxRef = DPCTLQueue_GetContext(QRef); + KBRef = DPCTLKernelBundle_CreateFromOCLSource( + CtxRef, DRef, CLProgramStr, CompileOpts); + AddKRef = DPCTLKernelBundle_GetKernel(KBRef, "add"); + AxpyKRef = DPCTLKernelBundle_GetKernel(KBRef, "axpy"); } ~TestDPCTLSyclKernelInterface() { DPCTLDeviceSelector_Delete(DSRef); DPCTLDevice_Delete(DRef); + DPCTLQueue_Delete(QRef); + DPCTLContext_Delete(CtxRef); + DPCTLKernelBundle_Delete(KBRef); + DPCTLKernel_Delete(AddKRef); + DPCTLKernel_Delete(AxpyKRef); } void SetUp() @@ -83,30 +99,164 @@ struct TestDPCTLSyclKernelInterface TEST_P(TestDPCTLSyclKernelInterface, CheckGetNumArgs) { - auto QueueRef = DPCTLQueue_CreateForDevice(DRef, nullptr, 0); - auto CtxRef = DPCTLQueue_GetContext(QueueRef); - auto KBRef = DPCTLKernelBundle_CreateFromOCLSource( - CtxRef, DRef, CLProgramStr, CompileOpts); - auto AddKernel = DPCTLKernelBundle_GetKernel(KBRef, "add"); - auto AxpyKernel = DPCTLKernelBundle_GetKernel(KBRef, "axpy"); - ASSERT_EQ(DPCTLKernel_GetNumArgs(AddKernel), 3ul); - ASSERT_EQ(DPCTLKernel_GetNumArgs(AxpyKernel), 4ul); + ASSERT_EQ(DPCTLKernel_GetNumArgs(AddKRef), 3ul); + ASSERT_EQ(DPCTLKernel_GetNumArgs(AxpyKRef), 4ul); +} + +TEST_P(TestDPCTLSyclKernelInterface, CheckGetWorkGroupSize) +{ + + size_t add_wgsz = 0, axpy_wgsz = 0; + EXPECT_NO_FATAL_FAILURE(add_wgsz = DPCTLKernel_GetWorkGroupSize(AddKRef)); + EXPECT_NO_FATAL_FAILURE(axpy_wgsz = DPCTLKernel_GetWorkGroupSize(AxpyKRef)); + + ASSERT_TRUE(add_wgsz != 0); + ASSERT_TRUE(axpy_wgsz != 0); +} + +TEST_P(TestDPCTLSyclKernelInterface, CheckGetPreferredWorkGroupSizeMultiple) +{ + + size_t add_wgsz_m = 0, axpy_wgsz_m = 0; + EXPECT_NO_FATAL_FAILURE( + add_wgsz_m = DPCTLKernel_GetPreferredWorkGroupSizeMultiple(AddKRef)); + EXPECT_NO_FATAL_FAILURE( + axpy_wgsz_m = DPCTLKernel_GetPreferredWorkGroupSizeMultiple(AxpyKRef)); + + ASSERT_TRUE(add_wgsz_m != 0); + ASSERT_TRUE(axpy_wgsz_m != 0); +} + +TEST_P(TestDPCTLSyclKernelInterface, CheckGetPrivateMemSize) +{ + + size_t add_private_mem_sz = 0, axpy_private_mem_sz = 0; + EXPECT_NO_FATAL_FAILURE(add_private_mem_sz = + DPCTLKernel_GetPrivateMemSize(AddKRef)); + EXPECT_NO_FATAL_FAILURE(axpy_private_mem_sz = + DPCTLKernel_GetPrivateMemSize(AxpyKRef)); + + ASSERT_TRUE(add_private_mem_sz != 0); + ASSERT_TRUE(axpy_private_mem_sz != 0); +} - DPCTLQueue_Delete(QueueRef); - DPCTLContext_Delete(CtxRef); - DPCTLKernelBundle_Delete(KBRef); - DPCTLKernel_Delete(AddKernel); - DPCTLKernel_Delete(AxpyKernel); +TEST_P(TestDPCTLSyclKernelInterface, CheckGetMaxNumSubGroups) +{ + + uint32_t add_mnsg = 0, axpy_mnsg = 0; + EXPECT_NO_FATAL_FAILURE(add_mnsg = DPCTLKernel_GetMaxNumSubGroups(AddKRef)); + EXPECT_NO_FATAL_FAILURE(axpy_mnsg = + DPCTLKernel_GetMaxNumSubGroups(AxpyKRef)); + + ASSERT_TRUE(add_mnsg != 0); + ASSERT_TRUE(axpy_mnsg != 0); +} + +/* +TEST_P(TestDPCTLSyclKernelInterface, CheckGetMaxSubGroupSize) +{ + + uint32_t add_msg_sz = 0, axpy_msg_sz = 0; + EXPECT_NO_FATAL_FAILURE(add_msg_sz = + DPCTLKernel_GetMaxSubGroupSize(AddKRef)); + EXPECT_NO_FATAL_FAILURE(axpy_msg_sz = + DPCTLKernel_GetMaxSubGroupSize(AxpyKRef)); + + ASSERT_TRUE(add_msg_sz != 0); + ASSERT_TRUE(axpy_msg_sz != 0); } +*/ -TEST_P(TestDPCTLSyclKernelInterface, CheckNullPtrArg) +TEST_P(TestDPCTLSyclKernelInterface, CheckGetCompileNumSubGroups) { - DPCTLSyclKernelRef AddKernel = nullptr; - ASSERT_EQ(DPCTLKernel_GetNumArgs(AddKernel), -1); + uint32_t add_cnsg = 0, axpy_cnsg = 0; + EXPECT_NO_FATAL_FAILURE(add_cnsg = + DPCTLKernel_GetCompileNumSubGroups(AddKRef)); + EXPECT_NO_FATAL_FAILURE(axpy_cnsg = + DPCTLKernel_GetCompileNumSubGroups(AxpyKRef)); + + EXPECT_TRUE(add_cnsg >= 0); + EXPECT_TRUE(axpy_cnsg >= 0); +} + +TEST_P(TestDPCTLSyclKernelInterface, CheckGetCompileSubGroupSize) +{ + + uint32_t add_csg_sz = 0, axpy_csg_sz = 0; + EXPECT_NO_FATAL_FAILURE(add_csg_sz = + DPCTLKernel_GetCompileSubGroupSize(AddKRef)); + EXPECT_NO_FATAL_FAILURE(axpy_csg_sz = + DPCTLKernel_GetCompileSubGroupSize(AxpyKRef)); + EXPECT_TRUE(add_csg_sz >= 0); + EXPECT_TRUE(axpy_csg_sz >= 0); } INSTANTIATE_TEST_SUITE_P(TestKernelInterfaceFunctions, TestDPCTLSyclKernelInterface, ::testing::Values("opencl:gpu:0", "opencl:cpu:0")); + +struct TestDPCTLSyclKernelNullArgs : public ::testing::Test +{ + DPCTLSyclKernelRef Null_KRef; + TestDPCTLSyclKernelNullArgs() : Null_KRef(nullptr) {} + ~TestDPCTLSyclKernelNullArgs() {} +}; + +TEST_F(TestDPCTLSyclKernelNullArgs, CheckNumArgsNullKRef) +{ + ASSERT_EQ(DPCTLKernel_GetNumArgs(Null_KRef), -1); +} + +TEST_F(TestDPCTLSyclKernelNullArgs, CheckGetWorkGroupsSizeNullKRef) +{ + DPCTLSyclKernelRef NullKRef = nullptr; + + ASSERT_EQ(DPCTLKernel_GetWorkGroupSize(NullKRef), 0); +} + +TEST_F(TestDPCTLSyclKernelNullArgs, + CheckGetPreferredWorkGroupsSizeMultipleNullKRef) +{ + DPCTLSyclKernelRef NullKRef = nullptr; + + ASSERT_EQ(DPCTLKernel_GetPreferredWorkGroupSizeMultiple(NullKRef), 0); +} + +TEST_F(TestDPCTLSyclKernelNullArgs, CheckGetPrivateMemSizeNullKRef) +{ + DPCTLSyclKernelRef NullKRef = nullptr; + + ASSERT_EQ(DPCTLKernel_GetPrivateMemSize(NullKRef), 0); +} + +TEST_F(TestDPCTLSyclKernelNullArgs, CheckGetMaxNumSubGroupsNullKRef) +{ + DPCTLSyclKernelRef NullKRef = nullptr; + + ASSERT_EQ(DPCTLKernel_GetMaxNumSubGroups(NullKRef), 0); +} + +/* +TEST_F(TestDPCTLSyclKernelNullArgs, CheckGetMaxSubGroupSizeNullKRef) +{ + DPCTLSyclKernelRef NullKRef = nullptr; + + ASSERT_EQ(DPCTLKernel_GetMaxSubGroupSize(NullKRef), 0); +} +*/ + +TEST_F(TestDPCTLSyclKernelNullArgs, CheckGetCompileNumSubGroupsNullKRef) +{ + DPCTLSyclKernelRef NullKRef = nullptr; + + ASSERT_EQ(DPCTLKernel_GetCompileNumSubGroups(NullKRef), 0); +} + +TEST_F(TestDPCTLSyclKernelNullArgs, CheckGetCompileSubGroupSizeNullKRef) +{ + DPCTLSyclKernelRef NullKRef = nullptr; + + ASSERT_EQ(DPCTLKernel_GetCompileSubGroupSize(NullKRef), 0); +} From ba2ab2e68bbb67084b95060bb3c4ed73c452e678 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 29 Aug 2022 16:51:55 -0500 Subject: [PATCH 88/95] Private mem size may be 0 on CPU devices --- libsyclinterface/tests/test_sycl_kernel_interface.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/libsyclinterface/tests/test_sycl_kernel_interface.cpp b/libsyclinterface/tests/test_sycl_kernel_interface.cpp index d16c917d88..016a3ccd3e 100644 --- a/libsyclinterface/tests/test_sycl_kernel_interface.cpp +++ b/libsyclinterface/tests/test_sycl_kernel_interface.cpp @@ -137,8 +137,14 @@ TEST_P(TestDPCTLSyclKernelInterface, CheckGetPrivateMemSize) EXPECT_NO_FATAL_FAILURE(axpy_private_mem_sz = DPCTLKernel_GetPrivateMemSize(AxpyKRef)); - ASSERT_TRUE(add_private_mem_sz != 0); - ASSERT_TRUE(axpy_private_mem_sz != 0); + if (DPCTLDevice_IsGPU(DRef)) { + ASSERT_TRUE(add_private_mem_sz != 0); + ASSERT_TRUE(axpy_private_mem_sz != 0); + } + else { + ASSERT_TRUE(add_private_mem_sz >= 0); + ASSERT_TRUE(axpy_private_mem_sz >= 0); + } } TEST_P(TestDPCTLSyclKernelInterface, CheckGetMaxNumSubGroups) From 1b85089f6e76d0a20888a289cd5458961904824b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 29 Aug 2022 16:52:33 -0500 Subject: [PATCH 89/95] Exported kernel device-specific properties --- dpctl/_backend.pxd | 8 +++++ dpctl/program/_program.pyx | 64 +++++++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index ec6fec65fc..dba49a36e0 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -264,6 +264,14 @@ cdef extern from "syclinterface/dpctl_sycl_event_interface.h": cdef extern from "syclinterface/dpctl_sycl_kernel_interface.h": cdef size_t DPCTLKernel_GetNumArgs(const DPCTLSyclKernelRef KRef) cdef void DPCTLKernel_Delete(DPCTLSyclKernelRef KRef) + cdef size_t DPCTLKernel_GetWorkGroupSize(const DPCTLSyclKernelRef KRef) + cdef size_t DPCTLKernel_GetPreferredWorkGroupSizeMultiple(const DPCTLSyclKernelRef KRef) + cdef size_t DPCTLKernel_GetPrivateMemSize(const DPCTLSyclKernelRef KRef) + cdef uint32_t DPCTLKernel_GetMaxNumSubGroups(const DPCTLSyclKernelRef KRef) +## Next line is commented out due to issue in DPC++ runtime +# cdef uint32_t DPCTLKernel_GetMaxSubGroupSize(const DPCTLSyclKernelRef KRef) + cdef uint32_t DPCTLKernel_GetCompileNumSubGroups(const DPCTLSyclKernelRef KRef) + cdef uint32_t DPCTLKernel_GetCompileSubGroupSize(const DPCTLSyclKernelRef KRef) cdef extern from "syclinterface/dpctl_sycl_platform_manager.h": diff --git a/dpctl/program/_program.pyx b/dpctl/program/_program.pyx index f638087793..7ed89af8e7 100644 --- a/dpctl/program/_program.pyx +++ b/dpctl/program/_program.pyx @@ -26,11 +26,18 @@ a OpenCL source string or a SPIR-V binary file. """ cimport cython.array +from libc.stdint cimport uint32_t -from dpctl._backend cimport ( # noqa: E211, E402 +from dpctl._backend cimport ( # noqa: E211, E402; DPCTLCString_Delete, DPCTLKernel_Delete, + DPCTLKernel_GetCompileNumSubGroups, + DPCTLKernel_GetCompileSubGroupSize, + DPCTLKernel_GetMaxNumSubGroups, DPCTLKernel_GetNumArgs, + DPCTLKernel_GetPreferredWorkGroupSizeMultiple, + DPCTLKernel_GetPrivateMemSize, + DPCTLKernel_GetWorkGroupSize, DPCTLKernelBundle_CreateFromOCLSource, DPCTLKernelBundle_CreateFromSpirv, DPCTLKernelBundle_Delete, @@ -95,6 +102,61 @@ cdef class SyclKernel: """ return int(self._kernel_ref) + @property + def work_group_size(self): + """ Returns the maximum number of work-items in a work-group that can + be used to execute the kernel on device it was built for. + """ + cdef size_t v = DPCTLKernel_GetWorkGroupSize(self._kernel_ref) + return v + + @property + def preferred_work_group_size_multiple(self): + """ Returns a value, of which work-group size is preferred to be + a multiple, for executing the kernel on the device it was built for. + """ + cdef size_t v = DPCTLKernel_GetPreferredWorkGroupSizeMultiple( + self._kernel_ref) + return v + + @property + def private_mem_size(self): + """ Returns the minimum amount of private memory, in bytes, used by each + work-item in the kernel. + """ + cdef size_t v = DPCTLKernel_GetPrivateMemSize(self._kernel_ref) + return v + + @property + def max_num_sub_groups(self): + """ Returns the maximum number of sub-groups for this kernel. + """ + cdef uint32_t n = DPCTLKernel_GetMaxNumSubGroups(self._kernel_ref) + return n + + @property + def max_sub_group_size(self): + """ Returns the maximum sub-groups size for this kernel. + """ + cdef uint32_t sz = 0 + return NotImplemented + + @property + def compile_num_sub_groups(self): + """ Returns the number of sub-groups specified by this kernel, + or 0 (if not specified). + """ + cdef size_t n = DPCTLKernel_GetCompileNumSubGroups(self._kernel_ref) + return n + + @property + def compile_sub_group_size(self): + """ Returns the required sub-group size specified by this kernel, + or 0 (if not specified). + """ + cdef size_t n = DPCTLKernel_GetCompileSubGroupSize(self._kernel_ref) + return n + cdef class SyclProgram: """ Wraps a ``sycl::kernel_bundle`` object From e1e28be5a7c933f1f745e9d083d3a9fa77307b74 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 30 Aug 2022 08:13:29 -0500 Subject: [PATCH 90/95] Improved docstring for DPCTLSyclKernelBundleRef --- libsyclinterface/include/dpctl_sycl_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libsyclinterface/include/dpctl_sycl_types.h b/libsyclinterface/include/dpctl_sycl_types.h index b00cab4702..2945fcef1b 100644 --- a/libsyclinterface/include/dpctl_sycl_types.h +++ b/libsyclinterface/include/dpctl_sycl_types.h @@ -60,7 +60,8 @@ typedef struct DPCTLOpaqueSyclEvent *DPCTLSyclEventRef; typedef struct DPCTLOpaqueSyclKernel *DPCTLSyclKernelRef; /*! - * @brief Opaque pointer to a ``sycl::kernel_bundle`` + * @brief Opaque pointer to a + * ``sycl::kernel_bundle`` * */ typedef struct DPCTLOpaqueSyclKernelBundle *DPCTLSyclKernelBundleRef; From eae46d25dbdab7e81a35a6cf9fea0516c505aa18 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 30 Aug 2022 08:23:18 -0500 Subject: [PATCH 91/95] Added tests for kernel_device_specific properties --- dpctl/tests/test_sycl_program.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/dpctl/tests/test_sycl_program.py b/dpctl/tests/test_sycl_program.py index bd55e2b4cf..ab238a3467 100644 --- a/dpctl/tests/test_sycl_program.py +++ b/dpctl/tests/test_sycl_program.py @@ -48,6 +48,24 @@ def _check_multi_kernel_program(prog): assert type(addKernel.addressof_ref()) is int assert type(axpyKernel.addressof_ref()) is int + for krn in [addKernel, axpyKernel]: + wgsz = krn.work_group_size + assert type(wgsz) is int + pwgszm = krn.preferred_work_group_size_multiple + assert type(pwgszm) is int + pmsz = krn.private_mem_size + assert type(pmsz) is int + vmnsg = krn.max_num_sub_groups + assert type(vmnsg) is int + v = krn.max_sub_group_size + assert ( + v == NotImplemented + ), "SyclKernel.max_sub_group_size acquired implementation, fix the test" + cmnsg = krn.compile_num_sub_groups + assert type(cmnsg) is int + cmsgsz = krn.compile_num_sub_groups + assert type(cmsgsz) is int + def test_create_program_from_source_ocl(): oclSrc = " \ From ff1cc3d730f41104b89e9c0900535d7b7d5ef6a7 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 30 Aug 2022 08:27:02 -0500 Subject: [PATCH 92/95] Added property dpctl.program.SyclKernel.num_args --- dpctl/program/_program.pyx | 6 ++++++ dpctl/tests/test_sycl_program.py | 2 ++ 2 files changed, 8 insertions(+) diff --git a/dpctl/program/_program.pyx b/dpctl/program/_program.pyx index 7ed89af8e7..5cc11922b5 100644 --- a/dpctl/program/_program.pyx +++ b/dpctl/program/_program.pyx @@ -102,6 +102,12 @@ cdef class SyclKernel: """ return int(self._kernel_ref) + @property + def num_args(self): + """ Property equivalent to method call `SyclKernel.get_num_args()` + """ + return self.get_num_args() + @property def work_group_size(self): """ Returns the maximum number of work-items in a work-group that can diff --git a/dpctl/tests/test_sycl_program.py b/dpctl/tests/test_sycl_program.py index ab238a3467..9c10b4a8ff 100644 --- a/dpctl/tests/test_sycl_program.py +++ b/dpctl/tests/test_sycl_program.py @@ -49,6 +49,8 @@ def _check_multi_kernel_program(prog): assert type(axpyKernel.addressof_ref()) is int for krn in [addKernel, axpyKernel]: + na = krn.num_args + assert na == krn.get_num_args() wgsz = krn.work_group_size assert type(wgsz) is int pwgszm = krn.preferred_work_group_size_multiple From 4db6d2003ac4ced93917af3fe378372c0251ba17 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Tue, 30 Aug 2022 09:56:38 -0500 Subject: [PATCH 93/95] Fixed test for sycl_kernel, added test for exercising equal testing for SyclDevice and object of another type --- dpctl/tests/test_sycl_device.py | 7 +++++++ dpctl/tests/test_sycl_program.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dpctl/tests/test_sycl_device.py b/dpctl/tests/test_sycl_device.py index c938aec466..4a6e42f8a4 100644 --- a/dpctl/tests/test_sycl_device.py +++ b/dpctl/tests/test_sycl_device.py @@ -129,6 +129,13 @@ def test_hashing_of_device(): assert device_dict +def test_equal(): + d1 = dpctl.SyclDevice() + d2 = dpctl.SyclDevice() + assert d1 != Ellipsis + assert d1 == d2 + + list_of_supported_aspects = [ "cpu", "gpu", diff --git a/dpctl/tests/test_sycl_program.py b/dpctl/tests/test_sycl_program.py index 9c10b4a8ff..da435704f4 100644 --- a/dpctl/tests/test_sycl_program.py +++ b/dpctl/tests/test_sycl_program.py @@ -65,7 +65,7 @@ def _check_multi_kernel_program(prog): ), "SyclKernel.max_sub_group_size acquired implementation, fix the test" cmnsg = krn.compile_num_sub_groups assert type(cmnsg) is int - cmsgsz = krn.compile_num_sub_groups + cmsgsz = krn.compile_sub_group_size assert type(cmsgsz) is int From 9f8c7089184aa1fa112f293d3ddd4d75bc22783b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sun, 4 Sep 2022 07:50:25 -0500 Subject: [PATCH 94/95] Adrressed PR review feedback --- dpctl/_sycl_device.pyx | 6 ++++-- dpctl/program/_program.pyx | 3 ++- .../include/dpctl_sycl_kernel_interface.h | 15 ++++++++------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index 723201c64c..155e4f2ca8 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -1132,7 +1132,8 @@ cdef class SyclDevice(_SyclDevice): int: Cache size in bytes """ cdef uint64_t cache_sz = DPCTLDevice_GetGlobalMemCacheSize( - self._device_ref) + self._device_ref + ) return cache_sz @property @@ -1143,7 +1144,8 @@ cdef class SyclDevice(_SyclDevice): int: Cache size in bytes """ cdef uint64_t cache_line_sz = DPCTLDevice_GetGlobalMemCacheLineSize( - self._device_ref) + self._device_ref + ) return cache_line_sz cdef cpp_bool equals(self, SyclDevice other): diff --git a/dpctl/program/_program.pyx b/dpctl/program/_program.pyx index 5cc11922b5..7c2341a883 100644 --- a/dpctl/program/_program.pyx +++ b/dpctl/program/_program.pyx @@ -122,7 +122,8 @@ cdef class SyclKernel: a multiple, for executing the kernel on the device it was built for. """ cdef size_t v = DPCTLKernel_GetPreferredWorkGroupSizeMultiple( - self._kernel_ref) + self._kernel_ref + ) return v @property diff --git a/libsyclinterface/include/dpctl_sycl_kernel_interface.h b/libsyclinterface/include/dpctl_sycl_kernel_interface.h index 926255ea06..6bc0ae83c8 100644 --- a/libsyclinterface/include/dpctl_sycl_kernel_interface.h +++ b/libsyclinterface/include/dpctl_sycl_kernel_interface.h @@ -42,7 +42,7 @@ DPCTL_C_EXTERN_C_BEGIN * @brief Returns the number of arguments for the sycl * interoperability kernel. * - * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * @param KRef DPCTLSyclKernelRef pointer to a SYCL * interoperability kernel. * @return Returns the number of arguments for the interoperability * kernel. @@ -55,7 +55,7 @@ size_t DPCTLKernel_GetNumArgs(__dpctl_keep const DPCTLSyclKernelRef KRef); * @brief Deletes the DPCTLSyclKernelRef after casting it to a * ``sycl::kernel``. * - * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * @param KRef DPCTLSyclKernelRef pointer to a SYCL * interoperability kernel. * @ingroup KernelInterface */ @@ -66,10 +66,11 @@ void DPCTLKernel_Delete(__dpctl_take DPCTLSyclKernelRef KRef); * !brief Wrapper around * `kernel::get_info()`. * - * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * @param KRef DPCTLSyclKernelRef pointer to a SYCL * interoperability kernel. * @return Returns the maximum number of work-items in a work-group - * that can be used to execute a kernel on a specific device. + * that can be used to execute a kernel on the device it was + * built for. * @ingroup KernelInterface */ DPCTL_API @@ -79,10 +80,10 @@ size_t DPCTLKernel_GetWorkGroupSize(__dpctl_keep const DPCTLSyclKernelRef KRef); * !brief Wrapper around * `kernel::get_info()`. * - * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * @param KRef DPCTLSyclKernelRef pointer to a SYCL * interoperability kernel. * @return Returns a value, of which work-group size is preferred to be a - * multiple, for executing a kernel on a specific device. + * multiple, for executing a kernel on the device it was built for. * @ingroup KernelInterface */ DPCTL_API @@ -93,7 +94,7 @@ size_t DPCTLKernel_GetPreferredWorkGroupSizeMultiple( * !brief Wrapper around * `kernel::get_info()`. * - * @param KRef DPCTLSyclKernelRef pointer to an SYCL + * @param KRef DPCTLSyclKernelRef pointer to a SYCL * interoperability kernel. * @return Returns the minimum amount of private memory, in bytes, * used by each work-item in the kernel. From facea802b5f405a3b3348eaf95db6871151c5bf2 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 5 Sep 2022 20:59:55 -0700 Subject: [PATCH 95/95] Array API tests reworked --- .github/workflows/conda-package.yml | 131 ++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 9f1a2a9e44..3c9bf4a5ba 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -476,3 +476,134 @@ jobs: echo "Executing ${script}" python ${script} || exit 1 done + + array-api-conformity: + needs: test_linux + runs-on: ${{ matrix.runner }} + + strategy: + matrix: + python: ['3.10'] + experimental: [false] + runner: [ubuntu-latest] + continue-on-error: ${{ matrix.experimental }} + env: + CHANNELS: -c intel -c defaults --override-channels + steps: + - name: Cache array API tests + id: cache-array-api-tests + uses: actions/cache@v3 + env: + ARRAY_CACHE: 3 + with: + path: | + /home/runner/work/array-api-tests/ + key: ${{ runner.os }}-array-api-${{ env.cache-name }}-{{ env.ARRAY_CACHE }}-${{ hashFiles('/home/runner/work/array-api-tests/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-build-${{ env.cache-name }}- + ${{ runner.os }}-build- + ${{ runner.os }}- + - name: Clone array API tests repo + if: steps.cache-array-api-tests.outputs.cache-hit != 'true' + shell: bash -l {0} + run: | + cd /home/runner/work + git clone --recurse-submodules https://github.com/data-apis/array-api-tests array-api-tests + cd array-api-tests + - name: Download artifact + uses: actions/download-artifact@v2 + with: + name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }} + - name: Add conda to system path + run: echo $CONDA/bin >> $GITHUB_PATH + - name: Install conda-build + # Needed to be able to run conda index + run: conda install conda-build + - name: Create conda channel + run: | + mkdir -p $GITHUB_WORKSPACE/channel/linux-64 + conda index $GITHUB_WORKSPACE/channel || exit 1 + mv ${PACKAGE_NAME}-*.tar.bz2 $GITHUB_WORKSPACE/channel/linux-64 || exit 1 + conda index $GITHUB_WORKSPACE/channel || exit 1 + # Test channel + conda search $PACKAGE_NAME -c $GITHUB_WORKSPACE/channel --override-channels --info --json > $GITHUB_WORKSPACE/ver.json + cat ver.json + - name: Collect dependencies + run: | + CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" + export PACKAGE_VERSION=$(python -c "${VER_SCRIPT1} ${VER_SCRIPT2}") + conda create -n test_dpctl $PACKAGE_NAME=${PACKAGE_VERSION} python=${{ matrix.python }} $CHANNELS --only-deps --dry-run > lockfile + cat lockfile + - name: Set pkgs_dirs + run: | + echo "pkgs_dirs: [~/.conda/pkgs]" >> ~/.condarc + - name: Cache conda packages + uses: actions/cache@v3 + env: + CACHE_NUMBER: 3 # Increase to reset cache + with: + path: ~/.conda/pkgs + key: + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}-${{hashFiles('lockfile') }} + restore-keys: | + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-python-${{ matrix.python }}- + ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}- + - name: Install dpctl + run: | + CHANNELS="-c $GITHUB_WORKSPACE/channel ${{ env.CHANNELS }}" + export PACKAGE_VERSION=$(python -c "${VER_SCRIPT1} ${VER_SCRIPT2}") + conda create -n test_dpctl $PACKAGE_NAME=${PACKAGE_VERSION} pytest python=${{ matrix.python }} $CHANNELS + # Test installed packages + conda list + - name: Install array API test dependencies + shell: bash -l {0} + run: | + . $CONDA/etc/profile.d/conda.sh + conda activate test_dpctl + cd /home/runner/work/array-api-tests + pip install -r requirements.txt + - name: Install jq + shell: bash -l {0} + run: | + sudo apt-get install jq + - name: Run array API conformance tests + id: run-array-api-tests + shell: bash -l {0} + run: | + FILE=/home/runner/work/.report.json + . $CONDA/etc/profile.d/conda.sh + conda activate test_dpctl + # echo "libintelocl.so" | tee /etc/OpenCL/vendors/intel-cpu.icd + export OCL_ICD_FILENAMES=libintelocl.so + export SYCL_ENABLE_HOST_DEVICE=1 + python -c "import dpctl; dpctl.lsplatform()" + export ARRAY_API_TESTS_MODULE=dpctl.tensor + cd /home/runner/work/array-api-tests + pytest --ci --json-report --json-report-file=$FILE array_api_tests/ || true + - name: Set Github environment variables + shell: bash -l {0} + run: | + FILE=/home/runner/work/.report.json + if test -f "$FILE"; then + PASSED_TESTS=$(jq '.summary | .passed // 0' $FILE) + FAILED_TESTS=$(jq '.summary | .failed // 0' $FILE) + SKIPPED_TESTS=$(jq '.summary | .skipped // 0' $FILE) + MESSAGE="Array API standard conformance tests ran successfully. + Passed: $PASSED_TESTS + Failed: $FAILED_TESTS + Skipped: $SKIPPED_TESTS" + echo "MESSAGE<> $GITHUB_ENV + echo "$MESSAGE" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + else + MESSAGE=$'Array API standard conformance tests failed to run.' + echo "MESSAGE=$MESSAGE" >> $GITHUB_ENV + fi + - name: Post result to PR + uses: mshick/add-pr-comment@v1 + with: + message: | + ${{ env.MESSAGE }} + allow-repeats: true + repo-token: ${{ secrets.GITHUB_TOKEN }} + repo-token-user-login: 'github-actions[bot]'