From 8257ca9ff49e02c130a833c5cd6edadafd835978 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Wed, 7 Aug 2024 10:53:30 -0500 Subject: [PATCH 1/4] Run pytest with -s for testign with nightly sycl bundle Also remove --no-sycl-interface-test option, since DPCTLSyclInterface library is no longer so-versioned. --- .github/workflows/os-llvm-sycl-build.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/os-llvm-sycl-build.yml b/.github/workflows/os-llvm-sycl-build.yml index 19180b6e2a..7b56af17bb 100644 --- a/.github/workflows/os-llvm-sycl-build.yml +++ b/.github/workflows/os-llvm-sycl-build.yml @@ -159,6 +159,4 @@ jobs: SYCL_CACHE_PERSISTENT: 1 run: | source set_allvars.sh - # Skip the test that checks if there is only one hard - # copy of DPCTLSyclInterface library - python -m pytest -v dpctl/tests --no-sycl-interface-test + python -m pytest -sv dpctl/tests From 02ac7faef41f0c95b525b396840ffd3f586cc017 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 8 Aug 2024 08:18:36 -0500 Subject: [PATCH 2/4] Write usm_host_allocator that wraps call to free in try/catch Wrote dpctl::tensor::offset_utils::usm_host_allocator to allocate USM-host memory as storage to std::vector. Replaced uses of sycl::usm_memory. The new class derives from this, but overrides deallocate method to wrap call to base::deallocate in try/except. The exception, if caught, is printed but otherwise ignored, consistent like this is done on USMDeleter class used in dpctl.memory This is to work around sporadic crashes due to unhandled exception thrown by openCL::CPU driver, which appears to be benign. The issue was reported to CPU driver team, with native reproducer (compiler LLVM jira ticket 58387). --- .../libtensor/include/utils/offset_utils.hpp | 28 +++++++++++++++++-- .../source/integer_advanced_indexing.cpp | 5 ++-- dpctl/tensor/libtensor/source/triul_ctor.cpp | 3 +- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/dpctl/tensor/libtensor/include/utils/offset_utils.hpp b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp index 1ad89c4fac..bbd384125d 100644 --- a/dpctl/tensor/libtensor/include/utils/offset_utils.hpp +++ b/dpctl/tensor/libtensor/include/utils/offset_utils.hpp @@ -27,6 +27,7 @@ #pragma once #include +#include #include #include #include @@ -81,6 +82,30 @@ std::vector concat(std::vector lhs, Vs &&...vs) } // namespace detail +template +class usm_host_allocator : public sycl::usm_allocator +{ +public: + using baseT = sycl::usm_allocator; + using baseT::baseT; + + template struct rebind + { + typedef usm_host_allocator other; + }; + + void deallocate(T *ptr, size_t n) + { + try { + baseT::deallocate(ptr, n); + } catch (const std::exception &e) { + std::cerr + << "Exception caught in `usm_host_allocator::deallocate`: " + << e.what() << std::endl; + } + } +}; + template std::tuple device_allocate_and_pack(sycl::queue &q, @@ -90,8 +115,7 @@ device_allocate_and_pack(sycl::queue &q, // memory transfer optimization, use USM-host for temporary speeds up // transfer to device, especially on dGPUs - using usm_host_allocatorT = - sycl::usm_allocator; + using usm_host_allocatorT = usm_host_allocator; using shT = std::vector; usm_host_allocatorT usm_host_allocator(q); diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp index 56db97eab7..77ec075ccf 100644 --- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp @@ -35,6 +35,7 @@ #include "dpctl4pybind11.hpp" #include "kernels/integer_advanced_indexing.hpp" #include "utils/memory_overlap.hpp" +#include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" @@ -91,7 +92,7 @@ _populate_kernel_params(sycl::queue &exec_q, { using usm_host_allocator_T = - sycl::usm_allocator; + dpctl::tensor::offset_utils::usm_host_allocator; using ptrT = std::vector; usm_host_allocator_T ptr_allocator(exec_q); @@ -99,7 +100,7 @@ _populate_kernel_params(sycl::queue &exec_q, std::make_shared(k, ptr_allocator); using usm_host_allocatorT = - sycl::usm_allocator; + dpctl::tensor::offset_utils::usm_host_allocator; using shT = std::vector; usm_host_allocatorT sz_allocator(exec_q); diff --git a/dpctl/tensor/libtensor/source/triul_ctor.cpp b/dpctl/tensor/libtensor/source/triul_ctor.cpp index 5c6f21c6f7..4f70e27e10 100644 --- a/dpctl/tensor/libtensor/source/triul_ctor.cpp +++ b/dpctl/tensor/libtensor/source/triul_ctor.cpp @@ -32,6 +32,7 @@ #include "kernels/constructors.hpp" #include "simplify_iteration_space.hpp" #include "utils/memory_overlap.hpp" +#include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" #include "utils/type_dispatch.hpp" @@ -150,7 +151,7 @@ usm_ndarray_triul(sycl::queue &exec_q, nd += 2; using usm_host_allocatorT = - sycl::usm_allocator; + dpctl::tensor::offset_utils::usm_host_allocator; using usmshT = std::vector; usm_host_allocatorT allocator(exec_q); From 709b6bd7227921fc2e9d80f9d6fbf4fab6a9561e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Thu, 8 Aug 2024 08:25:33 -0500 Subject: [PATCH 3/4] Removed unnecessary print from the test --- dpctl/tests/test_usm_ndarray_print.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dpctl/tests/test_usm_ndarray_print.py b/dpctl/tests/test_usm_ndarray_print.py index 9e15fa3310..983cb75d98 100644 --- a/dpctl/tests/test_usm_ndarray_print.py +++ b/dpctl/tests/test_usm_ndarray_print.py @@ -283,7 +283,6 @@ def test_print_repr(self): x = dpt.arange(4, dtype="i4", sycl_queue=q) x.sycl_queue.wait() r = repr(x) - print(r) assert r == "usm_ndarray([0, 1, 2, 3], dtype=int32)" dpt.set_print_options(linewidth=1) From a49bd76e7061e2fcaec3f8df12f48fdaa2360a5b Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 9 Aug 2024 16:17:17 -0500 Subject: [PATCH 4/4] Update version of TBB and OCL CPU RT to use in this workflow These were from previous year. Updated them to what DPC++ is using https://github.com/intel/llvm/blob/sycl/devops/dependencies.json#L27-L38 It might be nice to automate update these through some cron executed workflow. --- .github/workflows/os-llvm-sycl-build.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/os-llvm-sycl-build.yml b/.github/workflows/os-llvm-sycl-build.yml index 7b56af17bb..b8870bddef 100644 --- a/.github/workflows/os-llvm-sycl-build.yml +++ b/.github/workflows/os-llvm-sycl-build.yml @@ -13,11 +13,11 @@ jobs: env: DOWNLOAD_URL_PREFIX: https://github.com/intel/llvm/releases/download - DRIVER_PATH: 2023-WW27 - OCLCPUEXP_FN: oclcpuexp-2023.16.6.0.28_rel.tar.gz - TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.9.0/ - TBB_INSTALL_DIR: oneapi-tbb-2021.9.0 - TBB_FN: oneapi-tbb-2021.9.0-lin.tgz + DRIVER_PATH: 2024-WW25 + OCLCPUEXP_FN: oclcpuexp-2024.18.6.0.02_rel.tar.gz + TBB_URL: https://github.com/oneapi-src/oneTBB/releases/download/v2021.12.0/ + TBB_INSTALL_DIR: oneapi-tbb-2021.12.0 + TBB_FN: oneapi-tbb-2021.12.0-lin.tgz steps: - name: Cancel Previous Runs