From c90e0ab4191679cfd435feb8eed201f2ee5761dc Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Thu, 29 Feb 2024 22:05:33 -0600 Subject: [PATCH 1/6] Update gtests to not use queue manager --- .../test_sycl_kernel_bundle_interface.cpp | 1 - .../tests/test_sycl_kernel_interface.cpp | 1 - .../tests/test_sycl_queue_interface.cpp | 1 - .../tests/test_sycl_usm_interface.cpp | 24 +++++++++---------- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/libsyclinterface/tests/test_sycl_kernel_bundle_interface.cpp b/libsyclinterface/tests/test_sycl_kernel_bundle_interface.cpp index c3fc43f442..ad6d6289ad 100644 --- a/libsyclinterface/tests/test_sycl_kernel_bundle_interface.cpp +++ b/libsyclinterface/tests/test_sycl_kernel_bundle_interface.cpp @@ -33,7 +33,6 @@ #include "dpctl_sycl_kernel_bundle_interface.h" #include "dpctl_sycl_kernel_interface.h" #include "dpctl_sycl_queue_interface.h" -#include "dpctl_sycl_queue_manager.h" #include #include #include diff --git a/libsyclinterface/tests/test_sycl_kernel_interface.cpp b/libsyclinterface/tests/test_sycl_kernel_interface.cpp index 4e69ee4ffd..0a87790b51 100644 --- a/libsyclinterface/tests/test_sycl_kernel_interface.cpp +++ b/libsyclinterface/tests/test_sycl_kernel_interface.cpp @@ -31,7 +31,6 @@ #include "dpctl_sycl_kernel_bundle_interface.h" #include "dpctl_sycl_kernel_interface.h" #include "dpctl_sycl_queue_interface.h" -#include "dpctl_sycl_queue_manager.h" #include "dpctl_utils.h" #include #include diff --git a/libsyclinterface/tests/test_sycl_queue_interface.cpp b/libsyclinterface/tests/test_sycl_queue_interface.cpp index 1c7fe55561..75e083e471 100644 --- a/libsyclinterface/tests/test_sycl_queue_interface.cpp +++ b/libsyclinterface/tests/test_sycl_queue_interface.cpp @@ -31,7 +31,6 @@ #include "dpctl_sycl_device_selector_interface.h" #include "dpctl_sycl_event_interface.h" #include "dpctl_sycl_queue_interface.h" -#include "dpctl_sycl_queue_manager.h" #include "dpctl_sycl_type_casters.hpp" #include "dpctl_sycl_usm_interface.h" #include diff --git a/libsyclinterface/tests/test_sycl_usm_interface.cpp b/libsyclinterface/tests/test_sycl_usm_interface.cpp index 0af57a0b61..22c62d9065 100644 --- a/libsyclinterface/tests/test_sycl_usm_interface.cpp +++ b/libsyclinterface/tests/test_sycl_usm_interface.cpp @@ -29,7 +29,6 @@ #include "dpctl_sycl_device_selector_interface.h" #include "dpctl_sycl_event_interface.h" #include "dpctl_sycl_queue_interface.h" -#include "dpctl_sycl_queue_manager.h" #include "dpctl_sycl_type_casters.hpp" #include "dpctl_sycl_usm_interface.h" #include @@ -95,67 +94,68 @@ struct TestDPCTLSyclUSMInterface : public ::testing::Test TEST_F(TestDPCTLSyclUSMInterface, MallocShared) { - auto Q = DPCTLQueueMgr_GetCurrentQueue(); + sycl::queue q; + DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); ASSERT_TRUE(Q); const size_t nbytes = SIZE; auto Ptr = DPCTLmalloc_shared(nbytes, Q); EXPECT_TRUE(bool(Ptr)); common_test_body(nbytes, Ptr, Q, DPCTLSyclUSMType::DPCTL_USM_SHARED); DPCTLfree_with_queue(Ptr, Q); - DPCTLQueue_Delete(Q); } TEST_F(TestDPCTLSyclUSMInterface, MallocDevice) { - auto Q = DPCTLQueueMgr_GetCurrentQueue(); + sycl::queue q; + DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); ASSERT_TRUE(Q); const size_t nbytes = SIZE; auto Ptr = DPCTLmalloc_device(nbytes, Q); EXPECT_TRUE(bool(Ptr)); common_test_body(nbytes, Ptr, Q, DPCTLSyclUSMType::DPCTL_USM_DEVICE); DPCTLfree_with_queue(Ptr, Q); - DPCTLQueue_Delete(Q); } TEST_F(TestDPCTLSyclUSMInterface, MallocHost) { - auto Q = DPCTLQueueMgr_GetCurrentQueue(); + sycl::queue q; + DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); ASSERT_TRUE(Q); const size_t nbytes = SIZE; auto Ptr = DPCTLmalloc_host(nbytes, Q); EXPECT_TRUE(bool(Ptr)); common_test_body(nbytes, Ptr, Q, DPCTLSyclUSMType::DPCTL_USM_HOST); DPCTLfree_with_queue(Ptr, Q); - DPCTLQueue_Delete(Q); } TEST_F(TestDPCTLSyclUSMInterface, AlignedAllocShared) { - auto Q = DPCTLQueueMgr_GetCurrentQueue(); + sycl::queue q; + DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); ASSERT_TRUE(Q); const size_t nbytes = SIZE; auto Ptr = DPCTLaligned_alloc_shared(64, nbytes, Q); EXPECT_TRUE(bool(Ptr)); common_test_body(nbytes, Ptr, Q, DPCTLSyclUSMType::DPCTL_USM_SHARED); DPCTLfree_with_queue(Ptr, Q); - DPCTLQueue_Delete(Q); } TEST_F(TestDPCTLSyclUSMInterface, AlignedAllocDevice) { - auto Q = DPCTLQueueMgr_GetCurrentQueue(); + sycl::queue q; + DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); ASSERT_TRUE(Q); const size_t nbytes = SIZE; auto Ptr = DPCTLaligned_alloc_device(64, nbytes, Q); EXPECT_TRUE(bool(Ptr)); common_test_body(nbytes, Ptr, Q, DPCTLSyclUSMType::DPCTL_USM_DEVICE); DPCTLfree_with_queue(Ptr, Q); - DPCTLQueue_Delete(Q); } TEST_F(TestDPCTLSyclUSMInterface, AlignedAllocHost) { - auto Q = DPCTLQueueMgr_GetCurrentQueue(); + sycl::queue q; + DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); ASSERT_TRUE(Q); const size_t nbytes = SIZE; auto Ptr = DPCTLaligned_alloc_host(64, nbytes, Q); From d7e87d104a09decebc740e42aab682dc1b41c351 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Thu, 29 Feb 2024 23:26:33 -0600 Subject: [PATCH 2/6] Removes dpctl.device_context and related API --- dpctl/__init__.py | 31 +- dpctl/_backend.pxd | 10 - dpctl/_sycl_queue_manager.pxd | 4 - dpctl/_sycl_queue_manager.pyx | 266 ------------------ dpctl/apis/include/dpctl_sycl_interface.h | 1 - dpctl/tests/test_sycl_queue.py | 12 - dpctl/tests/test_sycl_queue_manager.py | 215 +------------- .../dpctl_sycl_queue_interface.h | 4 +- 8 files changed, 4 insertions(+), 539 deletions(-) diff --git a/dpctl/__init__.py b/dpctl/__init__.py index 062894d2cb..b7f85cea99 100644 --- a/dpctl/__init__.py +++ b/dpctl/__init__.py @@ -19,16 +19,7 @@ Dpctl implements a subset of SYCL's API providing wrappers for the SYCL runtime classes described in :sycl_runtime_classes:`Section 4.6 <>` of - the :sycl_spec_2020:`SYCL 2020 spec <>`. Note that the SYCL - ``device_selector`` class is not implemented, instead there are device - selection helper functions that can be used to simulate the same behavior. - Dpctl implements the ``ONEPI::filter_selector`` extension that is included - in Intel's DPC++ SYCL compiler. - - The module also includes a global SYCL queue manager. The queue manager - provides convenience functions to create a global instance of - a :class:`dpctl.SyclQueue`, to create a nested stack of queue objects, and - to create a queue object for use only within a specific scope. + the :sycl_spec_2020:`SYCL 2020 spec <>`. """ __author__ = "Intel Corp." @@ -61,17 +52,7 @@ SyclQueue, SyclQueueCreationError, ) -from ._sycl_queue_manager import ( - device_context, - get_current_backend, - get_current_device_type, - get_current_queue, - get_device_cached_queue, - get_num_activated_queues, - is_in_device_context, - nested_context_factories, - set_global_queue, -) +from ._sycl_queue_manager import get_device_cached_queue from ._sycl_timer import SyclTimer from ._version import get_versions from .enum_types import ( @@ -120,15 +101,7 @@ "SyclQueueCreationError", ] __all__ += [ - "device_context", - "get_current_backend", - "get_current_device_type", - "get_current_queue", "get_device_cached_queue", - "get_num_activated_queues", - "is_in_device_context", - "nested_context_factories", - "set_global_queue", ] __all__ += [ "device_type", diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index 6cbf1500ee..ae07e1de02 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -432,16 +432,6 @@ cdef extern from "syclinterface/dpctl_sycl_queue_interface.h": cdef bool DPCTLQueue_HasEnableProfiling(const DPCTLSyclQueueRef QRef) -cdef extern from "syclinterface/dpctl_sycl_queue_manager.h": - cdef DPCTLSyclQueueRef DPCTLQueueMgr_GetCurrentQueue() - cdef bool DPCTLQueueMgr_GlobalQueueIsCurrent() - cdef bool DPCTLQueueMgr_IsCurrentQueue(const DPCTLSyclQueueRef QRef) - cdef void DPCTLQueueMgr_PopQueue() - cdef void DPCTLQueueMgr_PushQueue(const DPCTLSyclQueueRef dRef) - cdef void DPCTLQueueMgr_SetGlobalQueue(const DPCTLSyclQueueRef dRef) - cdef size_t DPCTLQueueMgr_GetQueueStackSize() - - cdef extern from "syclinterface/dpctl_sycl_usm_interface.h": cdef DPCTLSyclUSMRef DPCTLmalloc_shared( size_t size, diff --git a/dpctl/_sycl_queue_manager.pxd b/dpctl/_sycl_queue_manager.pxd index 867ff383b7..7a49082ceb 100644 --- a/dpctl/_sycl_queue_manager.pxd +++ b/dpctl/_sycl_queue_manager.pxd @@ -21,8 +21,4 @@ from ._sycl_device cimport SyclDevice from ._sycl_queue cimport SyclQueue -cpdef SyclQueue get_current_queue() -cpdef get_current_device_type () -cpdef get_current_backend() - cpdef object get_device_cached_queue(object) diff --git a/dpctl/_sycl_queue_manager.pyx b/dpctl/_sycl_queue_manager.pyx index 76565f83ac..b64a7b0337 100644 --- a/dpctl/_sycl_queue_manager.pyx +++ b/dpctl/_sycl_queue_manager.pyx @@ -19,284 +19,18 @@ # cython: linetrace=True import logging -from contextlib import ExitStack, contextmanager from contextvars import ContextVar - -from .enum_types import backend_type, device_type - -from ._backend cimport ( # noqa: E211 - DPCTLQueueMgr_GetCurrentQueue, - DPCTLQueueMgr_GetQueueStackSize, - DPCTLQueueMgr_GlobalQueueIsCurrent, - DPCTLQueueMgr_PopQueue, - DPCTLQueueMgr_PushQueue, - DPCTLQueueMgr_SetGlobalQueue, - DPCTLSyclQueueRef, - _backend_type, - _device_type, -) from ._sycl_context cimport SyclContext from ._sycl_device cimport SyclDevice __all__ = [ "_global_device_queue_cache", - "device_context", - "get_current_backend", - "get_current_device_type", - "get_current_queue", "get_device_cached_queue", - "get_num_activated_queues", - "is_in_device_context", - "set_global_queue", ] _logger = logging.getLogger(__name__) -cdef class _SyclQueueManager: - """ Provides a SYCL queue manager interface for Python. - """ - - def _set_as_current_queue(self, arg): - cdef SyclQueue q - cdef DPCTLSyclQueueRef queue_ref = NULL - - if isinstance(arg, SyclQueue): - q_obj = arg - else: - q_obj = SyclQueue(arg) - - q = q_obj - queue_ref = q.get_queue_ref() - DPCTLQueueMgr_PushQueue(queue_ref) - - return q_obj - - def _remove_current_queue(self): - DPCTLQueueMgr_PopQueue() - - cpdef get_current_backend(self): - """ - Returns the backend for the current queue as a `backend_type` enum. - - Returns: - backend_type: The SYCL backend for the currently selected queue. - """ - return self.get_current_queue().backend - - cpdef get_current_device_type(self): - """ - Returns current device type as a `device_type` enum. - - Returns: - device_type: The SYCL device type for the currently selected queue. - Possible values can be gpu, cpu, accelerator, or host. - """ - return self.get_current_queue().sycl_device.device_type - - cpdef SyclQueue get_current_queue(self): - """ - Returns the currently activated SYCL queue as a new SyclQueue object. - - Returns: - SyclQueue: If there is a currently active SYCL queue that queue - is returned wrapped in a SyclQueue object. The SyclQueue object - owns a copy of the currently active SYCL queue as an opaque - `DPCTLSyclQueueRef` pointer. The pointer is freed when the SyclQueue - is garbage collected. - - Raises: - SyclQueueCreationError: If no currently active SYCL queue found. - """ - return SyclQueue._create(DPCTLQueueMgr_GetCurrentQueue()) - - def get_num_activated_queues(self): - """ - Returns the number of currently activated queues for this thread. - - Whenever a program's control enters a :func:`dpctl.device_context()` - scope, either a new SYCL queue is created or a previously created - queue is retrieved from a cache and yielded. The queue yielded by the - context manager is termed to be "activated". If a program creates - multiple nested :func:`dpctl.device_context()` scopes then multiple - queues can be activated at the same time, although only the latest - activated queue is usable directly via calling - :func:`dpctl.get_current_queue()`. This function returns the number of - currently activated queues. - - Returns: - int: The number of currently activated queues. - - """ - return DPCTLQueueMgr_GetQueueStackSize() - - def is_in_device_context(self): - """ - Checks if the control is inside a :func:`dpctl.device_context()` scope. - - Returns: - bool: True if the control is within a - :func:`dpctl.device_context()` scope, otherwise False. - """ - cdef int inCtx = DPCTLQueueMgr_GlobalQueueIsCurrent() - return not bool(inCtx) - - def set_global_queue(self, arg): - """ - Sets the global queue to the SYCL queue specified explicitly, - or created from given arguments. - - Args: - arg: An instance of :class:`dpctl.SyclQueue` or a filter selector - string to be used to construct a :class:`dpctl.SyclQueue`. The - queue is stored in the dpctl queue manager as the default queue. - Raises: - SyclQueueCreationError: If a SYCL queue could not be created. - """ - cdef SyclQueue q - cdef DPCTLSyclQueueRef queue_ref = NULL - - if type(arg) is SyclQueue: - q = arg - else: - q_obj = SyclQueue(arg) - q = q_obj - - queue_ref = q.get_queue_ref() - DPCTLQueueMgr_SetGlobalQueue(queue_ref) - - -# This private instance of the _SyclQueueManager should not be directly -# accessed outside the module. -_mgr = _SyclQueueManager() - -# Global bound functions -get_num_activated_queues = _mgr.get_num_activated_queues -set_global_queue = _mgr.set_global_queue -is_in_device_context = _mgr.is_in_device_context - - -cpdef SyclQueue get_current_queue(): - """ - Returns the currently activate SYCL queue as a new SyclQueue object. - - Returns: - SyclQueue: If there is a currently active SYCL queue that queue - is returned wrapped in a SyclQueue object. The SyclQueue object - owns a copy of the currently active SYCL queue as an opaque - `DPCTLSyclQueueRef` pointer. The pointer is freed when the SyclQueue - is garbage collected. - - Raises: - SyclQueueCreationError: If no currently active SYCL queue found. - """ - return _mgr.get_current_queue() - - -cpdef get_current_device_type(): - """ - Returns current device type as a `device_type` enum. - - Returns: - device_type: The SYCL device type for the currently selected queue. - Possible values can be gpu, cpu, accelerator, or host. - """ - return _mgr.get_current_device_type() - - -cpdef get_current_backend(): - """ - Returns the backend for the current queue as a `backend_type` enum. - - Returns: - backend_type: The SYCL backend for the currently selected queue. - """ - return _mgr.get_current_backend() - - -nested_context_factories = [] - - -def _get_nested_contexts(ctxt): - _help_numba_dppy() - return (factory(ctxt) for factory in nested_context_factories) - - -def _help_numba_dppy(): - """Import numba-dppy for registering nested contexts""" - try: - import numba_dppy - except Exception: - pass - - -@contextmanager -def device_context(arg): - """ - Yields a SYCL queue corresponding to the input queue object, device object, - or device filter selector string. - - This context manager "activates", *i.e.*, sets as the currently usable - queue, the SYCL queue defined by the argument `arg`. - The activated queue is yielded by the context manager and can also be - accessed by any subsequent call to :func:`dpctl.get_current_queue()` inside - the context manager's scope. The yielded queue is removed as the currently - usable queue on exiting the context manager. - - You can register context factory in the list of factories. - This context manager uses context factories to create and activate nested contexts. - - Args: - arg : A :class:`dpctl.SyclQueue` object, or a :class:`dpctl.SyclDevice` - object, or a filter selector string. - - Yields: - :class:`dpctl.SyclQueue`: A SYCL queue corresponding to the specified - input device, queue, or filter string. - - Raises: - SyclQueueCreationError: If the SYCL queue creation failed. - - :Example: - The following example sets current queue targeting specific device - indicated with filter selector string in the scope of `with` block: - - .. code-block:: python - - import dpctl - with dpctl.device_context("level0:gpu:0"): - do_something_on_gpu0() - - The following example registers nested context factory: - - .. code-block:: python - - import dctl - - def factory(sycl_queue): - ... - return context - - dpctl.nested_context_factories.append(factory) - - """ - ctxt = None - try: - ctxt = _mgr._set_as_current_queue(arg) - with ExitStack() as stack: - for nested_context in _get_nested_contexts(ctxt): - stack.enter_context(nested_context) - yield ctxt - finally: - # Code to release resource - if ctxt: - _logger.debug( - "Removing the queue from the stack of active queues") - _mgr._remove_current_queue() - else: - _logger.debug("No queue was created so nothing to do") - - cdef class _DeviceDefaultQueueCache: cdef dict __device_queue_map__ diff --git a/dpctl/apis/include/dpctl_sycl_interface.h b/dpctl/apis/include/dpctl_sycl_interface.h index 935f6af635..1b59c9a3df 100644 --- a/dpctl/apis/include/dpctl_sycl_interface.h +++ b/dpctl/apis/include/dpctl_sycl_interface.h @@ -39,7 +39,6 @@ #include "syclinterface/dpctl_sycl_usm_interface.h" #include "syclinterface/dpctl_sycl_device_manager.h" #include "syclinterface/dpctl_sycl_platform_manager.h" -#include "syclinterface/dpctl_sycl_queue_manager.h" #include "syclinterface/dpctl_sycl_kernel_bundle_interface.h" #include "syclinterface/dpctl_sycl_kernel_interface.h" // clang-format on diff --git a/dpctl/tests/test_sycl_queue.py b/dpctl/tests/test_sycl_queue.py index a400c0223a..b7f42ffb44 100644 --- a/dpctl/tests/test_sycl_queue.py +++ b/dpctl/tests/test_sycl_queue.py @@ -40,18 +40,6 @@ def test_standard_selectors(device_selector, check): pytest.skip() -def test_current_device(check): - """ - Test is the device for the current queue is valid. - """ - try: - q = dpctl.get_current_queue() - except Exception: - pytest.skip("Encountered an exception inside get_current_queue().") - device = q.get_sycl_device() - check(device) - - def test_valid_filter_selectors(valid_filter, check): """ Tests if we can create a SyclDevice using a supported filter selector diff --git a/dpctl/tests/test_sycl_queue_manager.py b/dpctl/tests/test_sycl_queue_manager.py index db4c42185c..b7c4c16208 100644 --- a/dpctl/tests/test_sycl_queue_manager.py +++ b/dpctl/tests/test_sycl_queue_manager.py @@ -14,226 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Defines unit test cases for the SyclQueueManager class. +"""Defines unit test cases for the dpctl._sycl_queue_manager module. """ -import contextlib -import sys - import pytest -from helper import has_cpu, has_gpu, has_sycl_platforms import dpctl -skip_no_platform = pytest.mark.skipif( - not has_sycl_platforms(), reason="No SYCL platforms available" -) -skip_no_gpu = pytest.mark.skipif( - not has_gpu(), reason="No OpenCL GPU queues available" -) -skip_no_cpu = pytest.mark.skipif( - not has_cpu(), reason="No OpenCL CPU queues available" -) - - -@skip_no_platform -def test_is_in_device_context_outside_device_ctxt(): - assert not dpctl.is_in_device_context() - - -@skip_no_gpu -def test_is_in_device_context_inside_device_ctxt_gpu(): - with dpctl.device_context("opencl:gpu:0"): - assert dpctl.is_in_device_context() - - -@skip_no_cpu -def test_is_in_device_context_inside_device_ctxt_cpu(): - with dpctl.device_context("opencl:cpu:0"): - assert dpctl.is_in_device_context() - - -@skip_no_gpu -@skip_no_cpu -def test_is_in_device_context_inside_nested_device_ctxt(): - with dpctl.device_context("opencl:cpu:0"): - with dpctl.device_context("opencl:gpu:0"): - assert dpctl.is_in_device_context() - assert dpctl.is_in_device_context() - assert not dpctl.is_in_device_context() - - -@skip_no_cpu -def test_is_in_device_context_inside_nested_device_ctxt_cpu(): - cpu = dpctl.SyclDevice("cpu") - n = cpu.max_compute_units - n_half = n // 2 - try: - d0, d1 = cpu.create_sub_devices(partition=[n_half, n - n_half]) - except dpctl.SyclSubDeviceCreationError: - pytest.skip("Could not create subdevices") - assert 0 == dpctl.get_num_activated_queues() - with dpctl.device_context(d0): - assert 1 == dpctl.get_num_activated_queues() - with dpctl.device_context(d1): - assert 2 == dpctl.get_num_activated_queues() - assert dpctl.is_in_device_context() - assert dpctl.is_in_device_context() - assert 1 == dpctl.get_num_activated_queues() - assert not dpctl.is_in_device_context() - assert 0 == dpctl.get_num_activated_queues() - - -@skip_no_platform -def test_get_current_device_type_outside_device_ctxt(): - assert dpctl.get_current_device_type() is not None - - -@skip_no_platform -@skip_no_gpu -def test_get_current_device_type_inside_device_ctxt(): - assert dpctl.get_current_device_type() is not None - - with dpctl.device_context("opencl:gpu:0"): - assert dpctl.get_current_device_type() == dpctl.device_type.gpu - - assert dpctl.get_current_device_type() is not None - - -@skip_no_cpu -@skip_no_gpu -def test_get_current_device_type_inside_nested_device_ctxt(): - assert dpctl.get_current_device_type() is not None - - with dpctl.device_context("opencl:cpu:0"): - assert dpctl.get_current_device_type() == dpctl.device_type.cpu - - with dpctl.device_context("opencl:gpu:0"): - assert dpctl.get_current_device_type() == dpctl.device_type.gpu - assert dpctl.get_current_device_type() == dpctl.device_type.cpu - - assert dpctl.get_current_device_type() is not None - - -@skip_no_platform -def test_num_current_queues_outside_with_clause(): - assert 0 == dpctl.get_num_activated_queues() - - -@skip_no_gpu -@skip_no_cpu -def test_num_current_queues_inside_with_clause(): - with dpctl.device_context("opencl:cpu:0"): - assert 1 == dpctl.get_num_activated_queues() - with dpctl.device_context("opencl:gpu:0"): - assert 2 == dpctl.get_num_activated_queues() - assert 0 == dpctl.get_num_activated_queues() - - -@skip_no_gpu -@skip_no_cpu -def test_num_current_queues_inside_threads(): - from threading import Thread - - def SessionThread(): - assert dpctl.get_num_activated_queues() == 0 - with dpctl.device_context("opencl:gpu:0"): - assert dpctl.get_num_activated_queues() == 1 - - Session1 = Thread(target=SessionThread()) - Session2 = Thread(target=SessionThread()) - with dpctl.device_context("opencl:cpu:0"): - assert dpctl.get_num_activated_queues() == 1 - Session1.start() - Session2.start() - - -@skip_no_platform -def test_get_current_backend(): - dpctl.get_current_backend() - dpctl.get_current_device_type() - q = dpctl.SyclQueue() - dpctl.set_global_queue(q) - if has_gpu(): - dpctl.set_global_queue("gpu") - elif has_cpu(): - dpctl.set_global_queue("cpu") - - -def test_nested_context_factory_is_list(): - assert isinstance(dpctl.nested_context_factories, list) - - -@contextlib.contextmanager -def _register_nested_context_factory(factory): - dpctl.nested_context_factories.append(factory) - try: - yield - finally: - dpctl.nested_context_factories.remove(factory) - - -def test_register_nested_context_factory_context(): - def factory(): - pass - - with _register_nested_context_factory(factory): - assert factory in dpctl.nested_context_factories - - assert isinstance(dpctl.nested_context_factories, list) - assert factory not in dpctl.nested_context_factories - - -@pytest.mark.skipif(not has_cpu(), reason="No OpenCL CPU queues available") -def test_device_context_activates_nested_context(): - in_context = False - factory_called = False - - @contextlib.contextmanager - def context(): - nonlocal in_context - old, in_context = in_context, True - yield - in_context = old - - def factory(_): - nonlocal factory_called - factory_called = True - return context() - - with _register_nested_context_factory(factory): - assert not factory_called - assert not in_context - - with dpctl.device_context("opencl:cpu:0"): - assert factory_called - assert in_context - - assert not in_context - - -@pytest.mark.skipif(not has_cpu(), reason="No OpenCL CPU queues available") -@pytest.mark.parametrize( - "factory, exception, match", - [ - (True, TypeError, "object is not callable"), - (lambda x: None, AttributeError, "no attribute '__exit__'") - if sys.version_info < (3, 11) - else ( - lambda x: None, - TypeError, - r".* object does not support the context manager protocol", - ), - ], -) -def test_nested_context_factory_exception_if_wrong_factory( - factory, exception, match -): - with pytest.raises(exception, match=match): - with _register_nested_context_factory(factory): - with dpctl.device_context("opencl:cpu:0"): - pass - def test__DeviceDefaultQueueCache(): import copy diff --git a/libsyclinterface/include/syclinterface/dpctl_sycl_queue_interface.h b/libsyclinterface/include/syclinterface/dpctl_sycl_queue_interface.h index 1763e1d2d5..2d876a97cf 100644 --- a/libsyclinterface/include/syclinterface/dpctl_sycl_queue_interface.h +++ b/libsyclinterface/include/syclinterface/dpctl_sycl_queue_interface.h @@ -19,9 +19,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This header declares a C interface to sycl::queue member functions. Note -/// that sycl::queue constructors are not exposed in this interface. Instead, -/// users should use the functions in dpctl_sycl_queue_manager.h. +/// This header declares a C interface to sycl::queue member functions. /// //===----------------------------------------------------------------------===// From 4641fbdf7882f80b611f284a392ae93b65e4467d Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Thu, 29 Feb 2024 23:40:18 -0600 Subject: [PATCH 3/6] Fix typo in docstring --- examples/python/sycl_queue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/python/sycl_queue.py b/examples/python/sycl_queue.py index 61cc6b8de2..fd18660ed8 100644 --- a/examples/python/sycl_queue.py +++ b/examples/python/sycl_queue.py @@ -28,7 +28,7 @@ def create_queue_from_filter_selector(): """Create queue for a GPU device or, if it is not available, for a CPU device. - Create in-order queue with profilign enabled. + Create in-order queue with profiling enabled. """ q = dpctl.SyclQueue("gpu,cpu", property=("in_order", "enable_profiling")) print("Queue {} is in order: {}".format(q, q.is_in_order)) From 8ded9bf5a692f85d0ac18187a0dbb3ac0104d038 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Thu, 29 Feb 2024 23:46:54 -0600 Subject: [PATCH 4/6] Remove dpctl_sycl_queue_manager --- dpctl/__init__.pxd | 1 - .../source/dpctl_sycl_queue_manager.cpp | 190 ---------- libsyclinterface/tests/CMakeLists.txt | 14 + .../tests/test_sycl_queue_manager.cpp | 355 ------------------ 4 files changed, 14 insertions(+), 546 deletions(-) delete mode 100644 libsyclinterface/source/dpctl_sycl_queue_manager.cpp delete mode 100644 libsyclinterface/tests/test_sycl_queue_manager.cpp diff --git a/dpctl/__init__.pxd b/dpctl/__init__.pxd index e97915f605..4108f6f169 100644 --- a/dpctl/__init__.pxd +++ b/dpctl/__init__.pxd @@ -28,4 +28,3 @@ from dpctl._sycl_device_factory cimport * from dpctl._sycl_event cimport * from dpctl._sycl_platform cimport * from dpctl._sycl_queue cimport * -from dpctl._sycl_queue_manager cimport * diff --git a/libsyclinterface/source/dpctl_sycl_queue_manager.cpp b/libsyclinterface/source/dpctl_sycl_queue_manager.cpp deleted file mode 100644 index 18704e6a3a..0000000000 --- a/libsyclinterface/source/dpctl_sycl_queue_manager.cpp +++ /dev/null @@ -1,190 +0,0 @@ -//===-------- dpctl_sycl_queue_manager.cpp - Implements a SYCL queue manager =// -// -// Data Parallel Control (dpctl) -// -// Copyright 2020-2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements the data types and functions declared in -/// dpctl_sycl_queue_manager.h. -/// -//===----------------------------------------------------------------------===// -#include "dpctl_sycl_queue_manager.h" -#include "Config/dpctl_config.h" -#include "dpctl_error_handlers.h" -#include "dpctl_sycl_device_manager.h" -#include "dpctl_sycl_type_casters.hpp" -#include /* SYCL headers */ -#include - -using namespace sycl; - -/*------------------------------- Private helpers ----------------------------*/ - -// Anonymous namespace for private helpers -namespace -{ -static_assert(__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_VERSION_REQUIRED, - "The compiler does not meet minimum version requirement"); - -using namespace dpctl::syclinterface; - -struct QueueManager -{ - using QueueStack = std::vector; - static QueueStack &getQueueStack() - { - thread_local static QueueStack *activeQueues = new QueueStack([] { - QueueStack qs; - auto DS = dpctl_default_selector(); - try { - auto DRef = wrap(new device(DS)); - auto CRef = DPCTLDeviceMgr_GetCachedContext(DRef); - if (CRef) { - qs.emplace_back(*unwrap(CRef), - *unwrap(DRef)); - } - else { - error_handler("Fatal Error: No cached context for default " - "device.", - __FILE__, __func__, __LINE__); - std::terminate(); - } - delete unwrap(DRef); - delete unwrap(CRef); - } catch (std::exception const &e) { - error_handler(e, __FILE__, __func__, __LINE__); - } - - return qs; - }()); - - return *activeQueues; - } -}; - -} /* end of anonymous namespace */ - -//----------------------------- Public API -----------------------------------// - -// If there are any queues in the QueueStack except the global queue return -// true, else return false. -bool DPCTLQueueMgr_GlobalQueueIsCurrent() -{ - auto &qs = QueueManager::getQueueStack(); - if (qs.empty()) { - error_handler("Error: No global queue found.", __FILE__, __func__, - __LINE__); - return false; - } - // The first entry of the QueueStack is always the global queue. If there - // are any more queues in the QueueStack, that indicates that the global - // queue is not the current queue. - return (qs.size() - 1) ? false : true; -} - -/*! - * Allocates a new copy of the current queue. The caller owns the pointer and is - * responsible for deallocating it. The helper function DPCTLQueue_Delete should - * be used for that purpose. - */ -DPCTLSyclQueueRef DPCTLQueueMgr_GetCurrentQueue() -{ - auto &qs = QueueManager::getQueueStack(); - if (qs.empty()) { - error_handler("No currently active queues.", __FILE__, __func__, - __LINE__); - return nullptr; - } - auto last = qs.size() - 1; - return wrap(new queue(qs[last])); -} - -// Relies on sycl::queue class' operator= to check for equivalent of queues. -bool DPCTLQueueMgr_IsCurrentQueue(__dpctl_keep const DPCTLSyclQueueRef QRef) -{ - if (!QRef) { - return false; - } - auto &qs = QueueManager::getQueueStack(); - if (qs.empty()) { - error_handler("No currently active queues.", __FILE__, __func__, - __LINE__); - return false; - } - auto last = qs.size() - 1; - auto currQ = qs[last]; - return (*unwrap(QRef) == currQ); -} - -// The function sets the global queue, i.e., the sycl::queue object at -// getQueueStack()[0] to the passed in sycl::queue. -void DPCTLQueueMgr_SetGlobalQueue(__dpctl_keep const DPCTLSyclQueueRef qRef) -{ - auto &qs = QueueManager::getQueueStack(); - if (qRef) { - qs[0] = *unwrap(qRef); - } - else { - error_handler("Error: Failed to set the global queue.", __FILE__, - __func__, __LINE__); - std::terminate(); - } -} - -// Push the passed in queue to the QueueStack -void DPCTLQueueMgr_PushQueue(__dpctl_keep const DPCTLSyclQueueRef qRef) -{ - auto &qs = QueueManager::getQueueStack(); - if (qRef) { - qs.emplace_back(*unwrap(qRef)); - } - else { - error_handler("Error: Failed to set the current queue.", __FILE__, - __func__, __LINE__); - std::terminate(); - } -} - -// Pop's a previously pushed queue from the QueueStack. Note that since the -// global queue is always stored at getQueueStack()[0] we check that the size of -// the QueueStack is >=1 before popping. -void DPCTLQueueMgr_PopQueue() -{ - auto &qs = QueueManager::getQueueStack(); - // The first entry in the QueueStack is the global queue, and should not be - // removed. - if (qs.size() <= 1) { - error_handler("No queue to pop.", __FILE__, __func__, __LINE__); - return; - } - qs.pop_back(); -} - -size_t DPCTLQueueMgr_GetQueueStackSize() -{ - auto &qs = QueueManager::getQueueStack(); - if (qs.empty()) { - error_handler("Error: No global queue found.", __FILE__, __func__, - __LINE__); - return -1; - } - // The first entry of the QueueStack is always the global queue. If there - // are any more queues in the QueueStack, that indicates that the global - // queue is not the current queue. - return (qs.size() - 1); -} diff --git a/libsyclinterface/tests/CMakeLists.txt b/libsyclinterface/tests/CMakeLists.txt index 771aec7fd1..8c62c4f777 100644 --- a/libsyclinterface/tests/CMakeLists.txt +++ b/libsyclinterface/tests/CMakeLists.txt @@ -84,6 +84,20 @@ target_include_directories(dpctl_c_api_tests ${LEVEL_ZERO_INCLUDE_DIR} ) +if(_dpctl_sycl_targets) + # make fat binary + target_compile_options( + dpctl_c_api_tests + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + target_link_options( + dpctl_c_api_tests + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) +endif() + if(DPCTL_GENERATE_COVERAGE) set(object_arg "-object;") add_custom_target(run-c-api-tests diff --git a/libsyclinterface/tests/test_sycl_queue_manager.cpp b/libsyclinterface/tests/test_sycl_queue_manager.cpp deleted file mode 100644 index 424f442869..0000000000 --- a/libsyclinterface/tests/test_sycl_queue_manager.cpp +++ /dev/null @@ -1,355 +0,0 @@ -//===------- test_sycl_queue_manager.cpp - Test cases for queue manager ===// -// -// Data Parallel Control (dpctl) -// -// Copyright 2020-2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file has unit test cases for functions defined in -/// dpctl_sycl_queue_interface.h and dpctl_sycl_queue_manager.h. -/// -//===----------------------------------------------------------------------===// -#include "dpctl_sycl_context_interface.h" -#include "dpctl_sycl_device_interface.h" -#include "dpctl_sycl_device_manager.h" -#include "dpctl_sycl_device_selector_interface.h" -#include "dpctl_sycl_queue_interface.h" -#include "dpctl_sycl_queue_manager.h" -#include "dpctl_sycl_type_casters.hpp" -#include -#include -#include - -using namespace std; -using namespace sycl; - -namespace -{ - -void foo(size_t &num) -{ - auto DS1 = DPCTLFilterSelector_Create("opencl:gpu"); - auto DS2 = DPCTLFilterSelector_Create("opencl:cpu"); - auto D1 = DPCTLDevice_CreateFromSelector(DS1); - auto D2 = DPCTLDevice_CreateFromSelector(DS2); - auto Q1 = DPCTLQueue_CreateForDevice(D1, nullptr, DPCTL_DEFAULT_PROPERTY); - auto Q2 = DPCTLQueue_CreateForDevice(D2, nullptr, DPCTL_DEFAULT_PROPERTY); - DPCTLQueueMgr_PushQueue(Q2); - DPCTLQueueMgr_PushQueue(Q1); - - // Capture the number of active queues in first - num = DPCTLQueueMgr_GetQueueStackSize(); - DPCTLQueueMgr_PopQueue(); - DPCTLQueueMgr_PopQueue(); - DPCTLQueue_Delete(Q1); - DPCTLQueue_Delete(Q2); - DPCTLDeviceSelector_Delete(DS1); - DPCTLDeviceSelector_Delete(DS2); - DPCTLDevice_Delete(D1); - DPCTLDevice_Delete(D2); -} - -void bar(size_t &num) -{ - auto DS1 = DPCTLFilterSelector_Create("opencl:gpu"); - auto D1 = DPCTLDevice_CreateFromSelector(DS1); - auto Q1 = DPCTLQueue_CreateForDevice(D1, nullptr, DPCTL_DEFAULT_PROPERTY); - DPCTLQueueMgr_PushQueue(Q1); - // Capture the number of active queues in second - num = DPCTLQueueMgr_GetQueueStackSize(); - DPCTLQueueMgr_PopQueue(); - DPCTLQueue_Delete(Q1); - DPCTLDeviceSelector_Delete(DS1); - DPCTLDevice_Delete(D1); -} - -} /* end of anonymous namespace */ - -struct TestDPCTLSyclQueueManager : public ::testing::TestWithParam -{ - DPCTLSyclDeviceSelectorRef DSRef = DPCTLFilterSelector_Create(GetParam()); - DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef); - - TestDPCTLSyclQueueManager() - { - DSRef = DPCTLFilterSelector_Create(GetParam()); - DRef = DPCTLDevice_CreateFromSelector(DSRef); - } - - void SetUp() - { - if (!DRef) { - auto message = "Skipping as no device of type " + - std::string(GetParam()) + "."; - GTEST_SKIP_(message.c_str()); - } - } - - ~TestDPCTLSyclQueueManager() - { - DPCTLDeviceSelector_Delete(DSRef); - DPCTLDevice_Delete(DRef); - } -}; - -TEST_P(TestDPCTLSyclQueueManager, CheckDPCTLGetCurrentQueue) -{ - DPCTLSyclQueueRef q = DPCTLQueueMgr_GetCurrentQueue(); - ASSERT_TRUE(q != nullptr); -} - -TEST_P(TestDPCTLSyclQueueManager, CheckIsCurrentQueue) -{ - auto Q0 = DPCTLQueueMgr_GetCurrentQueue(); - EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q0)); - auto Q1 = DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); - DPCTLQueueMgr_PushQueue(Q1); - EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q1)); - DPCTLQueue_Delete(Q1); - DPCTLQueueMgr_PopQueue(); - EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q0)); - DPCTLQueue_Delete(Q0); -} - -INSTANTIATE_TEST_SUITE_P(QueueMgrFunctions, - TestDPCTLSyclQueueManager, - ::testing::Values("opencl:gpu:0", - "opencl:cpu:0", - "level_zero:gpu:0")); - -struct TestDPCTLQueueMgrFeatures : public ::testing::Test -{ - TestDPCTLQueueMgrFeatures() {} - ~TestDPCTLQueueMgrFeatures() {} -}; - -TEST_F(TestDPCTLQueueMgrFeatures, CheckGetNumActivatedQueues) -{ - size_t num0, num1, num2, num4; - DPCTLSyclDeviceSelectorRef CPU_DSRef = nullptr, GPU_DSRef = nullptr; - DPCTLSyclDeviceRef CPU_DRef = nullptr, GPU_DRef = nullptr; - - GPU_DSRef = DPCTLFilterSelector_Create("opencl:gpu"); - GPU_DRef = DPCTLDevice_CreateFromSelector(GPU_DSRef); - CPU_DSRef = DPCTLFilterSelector_Create("opencl:cpu"); - CPU_DRef = DPCTLDevice_CreateFromSelector(CPU_DSRef); - - if (!(CPU_DRef && GPU_DRef)) { - DPCTLDeviceSelector_Delete(GPU_DSRef); - DPCTLDevice_Delete(GPU_DRef); - DPCTLDeviceSelector_Delete(CPU_DSRef); - DPCTLDevice_Delete(CPU_DRef); - GTEST_SKIP_( - "OpenCL GPU and CPU devices are needed, but were not found."); - } - else { - auto Q1 = DPCTLQueue_CreateForDevice(GPU_DRef, nullptr, - DPCTL_DEFAULT_PROPERTY); - DPCTLQueueMgr_PushQueue(Q1); - std::thread first(foo, std::ref(num1)); - std::thread second(bar, std::ref(num2)); - - // synchronize threads: - first.join(); - second.join(); - - // Capture the number of active queues in first - num0 = DPCTLQueueMgr_GetQueueStackSize(); - DPCTLQueueMgr_PopQueue(); - num4 = DPCTLQueueMgr_GetQueueStackSize(); - - // Verify what the expected number of activated queues each time a - // thread called getNumActivatedQueues. - EXPECT_EQ(num0, 1ul); - EXPECT_EQ(num1, 2ul); - EXPECT_EQ(num2, 1ul); - EXPECT_EQ(num4, 0ul); - - DPCTLQueue_Delete(Q1); - DPCTLDeviceSelector_Delete(GPU_DSRef); - DPCTLDevice_Delete(GPU_DRef); - DPCTLDeviceSelector_Delete(CPU_DSRef); - DPCTLDevice_Delete(CPU_DRef); - } -} - -TEST_F(TestDPCTLQueueMgrFeatures, CheckIsCurrentQueue2) -{ - DPCTLSyclDeviceSelectorRef DS1 = nullptr, DS2 = nullptr; - DPCTLSyclDeviceRef D1 = nullptr, D2 = nullptr; - - DS1 = DPCTLFilterSelector_Create("opencl:gpu"); - DS2 = DPCTLFilterSelector_Create("opencl:cpu"); - D1 = DPCTLDevice_CreateFromSelector(DS1); - D2 = DPCTLDevice_CreateFromSelector(DS2); - - if (!(D1 && D2)) { - DPCTLDeviceSelector_Delete(DS1); - DPCTLDeviceSelector_Delete(DS2); - DPCTLDevice_Delete(D1); - DPCTLDevice_Delete(D2); - GTEST_SKIP_( - "OpenCL GPU and CPU devices are needed, but were not found."); - } - - auto Q1 = DPCTLQueue_CreateForDevice(D1, nullptr, DPCTL_DEFAULT_PROPERTY); - DPCTLQueueMgr_PushQueue(Q1); - EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q1)); - auto Q2 = DPCTLQueue_CreateForDevice(D2, nullptr, DPCTL_DEFAULT_PROPERTY); - DPCTLQueueMgr_PushQueue(Q2); - EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q2)); - EXPECT_FALSE(DPCTLQueueMgr_IsCurrentQueue(Q1)); - DPCTLQueue_Delete(Q2); - DPCTLQueueMgr_PopQueue(); - EXPECT_TRUE(DPCTLQueueMgr_IsCurrentQueue(Q1)); - DPCTLQueue_Delete(Q1); - DPCTLQueueMgr_PopQueue(); - DPCTLDeviceSelector_Delete(DS1); - DPCTLDeviceSelector_Delete(DS2); - DPCTLDevice_Delete(D1); - DPCTLDevice_Delete(D2); -} - -TEST_F(TestDPCTLQueueMgrFeatures, CheckSetGlobalQueueForSubDevices) -{ - DPCTLSyclDeviceSelectorRef DS = nullptr; - DPCTLSyclDeviceRef RootCpu_DRef = nullptr; - size_t max_eu_count = 0; - DPCTLSyclDeviceRef SubDev0_DRef = nullptr; - DPCTLSyclDeviceRef SubDev1_DRef = nullptr; - DPCTLSyclQueueRef QRef = nullptr; - - EXPECT_NO_FATAL_FAILURE(DS = DPCTLFilterSelector_Create("opencl:cpu")); - EXPECT_NO_FATAL_FAILURE(RootCpu_DRef = DPCTLDevice_CreateFromSelector(DS)); - DPCTLDeviceSelector_Delete(DS); - EXPECT_TRUE(RootCpu_DRef); - EXPECT_NO_FATAL_FAILURE(max_eu_count = - DPCTLDevice_GetMaxComputeUnits(RootCpu_DRef)); - size_t n1 = max_eu_count / 2; - size_t n2 = max_eu_count - n1; - - if (n1 > 0 && n2 > 0) { - size_t counts[2] = {n1, n2}; - DPCTLDeviceVectorRef DVRef = nullptr; - EXPECT_NO_FATAL_FAILURE(DVRef = DPCTLDevice_CreateSubDevicesByCounts( - RootCpu_DRef, counts, 2)); - EXPECT_NO_FATAL_FAILURE(SubDev0_DRef = - DPCTLDeviceVector_GetAt(DVRef, 0)); - EXPECT_NO_FATAL_FAILURE(SubDev1_DRef = - DPCTLDeviceVector_GetAt(DVRef, 1)); - EXPECT_NO_FATAL_FAILURE( - QRef = DPCTLQueue_CreateForDevice(SubDev0_DRef, nullptr, - DPCTL_DEFAULT_PROPERTY)); - EXPECT_NO_FATAL_FAILURE(DPCTLDevice_Delete(SubDev1_DRef)); - EXPECT_NO_FATAL_FAILURE(DPCTLDevice_Delete(SubDev0_DRef)); - EXPECT_NO_FATAL_FAILURE(DPCTLDeviceVector_Delete(DVRef)); - EXPECT_NO_FATAL_FAILURE(DPCTLQueueMgr_SetGlobalQueue(QRef)); - EXPECT_NO_FATAL_FAILURE(DPCTLQueue_Delete(QRef)); - } - else { - DPCTLDevice_Delete(RootCpu_DRef); - GTEST_SKIP_("OpenCL CPU devices are needed, but were not found."); - } -} - -TEST_F(TestDPCTLQueueMgrFeatures, - CheckSetGlobalQueueForSubDevicesMultiDeviceContext) -{ - DPCTLSyclDeviceSelectorRef DS = nullptr; - DPCTLSyclDeviceRef RootCpu_DRef = nullptr; - size_t max_eu_count = 0; - DPCTLSyclDeviceRef SubDev0_DRef = nullptr; - DPCTLSyclDeviceRef SubDev1_DRef = nullptr; - DPCTLSyclQueueRef QRef = nullptr; - DPCTLSyclContextRef CRef = nullptr; - - EXPECT_NO_FATAL_FAILURE(DS = DPCTLFilterSelector_Create("opencl:cpu")); - EXPECT_NO_FATAL_FAILURE(RootCpu_DRef = DPCTLDevice_CreateFromSelector(DS)); - DPCTLDeviceSelector_Delete(DS); - EXPECT_TRUE(RootCpu_DRef); - EXPECT_NO_FATAL_FAILURE(max_eu_count = - DPCTLDevice_GetMaxComputeUnits(RootCpu_DRef)); - size_t n1 = max_eu_count / 2; - size_t n2 = max_eu_count - n1; - - if (n1 > 0 && n2 > 0) { - size_t counts[2] = {n1, n2}; - DPCTLDeviceVectorRef DVRef = nullptr; - EXPECT_NO_FATAL_FAILURE(DVRef = DPCTLDevice_CreateSubDevicesByCounts( - RootCpu_DRef, counts, 2)); - EXPECT_NO_FATAL_FAILURE(SubDev0_DRef = - DPCTLDeviceVector_GetAt(DVRef, 0)); - EXPECT_NO_FATAL_FAILURE(SubDev1_DRef = - DPCTLDeviceVector_GetAt(DVRef, 1)); - EXPECT_NO_FATAL_FAILURE(CRef = DPCTLContext_CreateFromDevices( - DVRef, nullptr, DPCTL_DEFAULT_PROPERTY)); - EXPECT_NO_FATAL_FAILURE( - QRef = DPCTLQueue_Create(CRef, SubDev0_DRef, nullptr, - DPCTL_DEFAULT_PROPERTY)); - EXPECT_NO_FATAL_FAILURE(DPCTLDevice_Delete(SubDev1_DRef)); - EXPECT_NO_FATAL_FAILURE(DPCTLDevice_Delete(SubDev0_DRef)); - EXPECT_NO_FATAL_FAILURE(DPCTLDeviceVector_Delete(DVRef)); - EXPECT_NO_FATAL_FAILURE(DPCTLQueueMgr_SetGlobalQueue(QRef)); - EXPECT_NO_FATAL_FAILURE(DPCTLQueue_Delete(QRef)); - EXPECT_NO_FATAL_FAILURE(DPCTLContext_Delete(CRef)); - } - else { - DPCTLDevice_Delete(RootCpu_DRef); - GTEST_SKIP_("OpenCL CPU devices are needed, but were not found."); - } -} - -struct TestDPCTLQueueMgrNullArgs : public ::testing::Test -{ - DPCTLSyclQueueRef Null_QRef = nullptr; - - TestDPCTLQueueMgrNullArgs() {} - ~TestDPCTLQueueMgrNullArgs() {} -}; - -TEST_F(TestDPCTLQueueMgrNullArgs, ChkGlobalQueueIsCurrent) -{ - bool res = true; - EXPECT_NO_FATAL_FAILURE(res = DPCTLQueueMgr_GlobalQueueIsCurrent()); - ASSERT_TRUE(res == true || res == false); -} - -TEST_F(TestDPCTLQueueMgrNullArgs, ChkIsCurrentQueue) -{ - bool res = true; - EXPECT_NO_FATAL_FAILURE(res = DPCTLQueueMgr_IsCurrentQueue(Null_QRef)); - ASSERT_FALSE(res); -} - -#if 0 -TEST_F(TestDPCTLQueueMgrNullArgs, ChkSetGlobalQueue) -{ - EXPECT_DEATH(DPCTLQueueMgr_SetGlobalQueue(Null_QRef), "*"); -} - -TEST_F(TestDPCTLQueueMgrNullArgs, ChkPushGlobalQueue) -{ - EXPECT_DEATH(DPCTLQueueMgr_SetGlobalQueue(Null_QRef), "*"); -} -#endif - -TEST_F(TestDPCTLQueueMgrNullArgs, ChkGetQueueStackSize) -{ - size_t n = 0; - EXPECT_NO_FATAL_FAILURE(n = DPCTLQueueMgr_GetQueueStackSize()); - ASSERT_TRUE(n < size_t(-1)); -} From ff104ad111d23245772132023dc07e4dfa2bc69b Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Fri, 1 Mar 2024 00:27:45 -0600 Subject: [PATCH 5/6] Update user guide steps to build libsyclinterface --- docs/docfiles/user_guides/QuickStart.rst | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/docfiles/user_guides/QuickStart.rst b/docs/docfiles/user_guides/QuickStart.rst index e3fc55e88b..57c443e543 100644 --- a/docs/docfiles/user_guides/QuickStart.rst +++ b/docs/docfiles/user_guides/QuickStart.rst @@ -259,21 +259,24 @@ library. set +xe rm -rf build mkdir build - pushd build + pushd build || exit 1 - INSTALL_PREFIX=`pwd`/../install + INSTALL_PREFIX=$(pwd)/../install rm -rf ${INSTALL_PREFIX} export ONEAPI_ROOT=/opt/intel/oneapi DPCPP_ROOT=${ONEAPI_ROOT}/compiler/latest/linux cmake \ - -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_C_COMPILER=icx \ + -DCMAKE_CXX_COMPILER=dpcpp \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} \ - -DDPCPP_INSTALL_DIR=${DPCPP_ROOT} \ - -DCMAKE_C_COMPILER:PATH=${DPCPP_ROOT}/bin/icx \ - -DCMAKE_CXX_COMPILER:PATH=${DPCPP_ROOT}/bin/dpcpp \ + -DDPCTL_ENABLE_L0_PROGRAM_CREATION=ON \ -DDPCTL_BUILD_CAPI_TESTS=ON \ + -DDPCTL_GENERATE_COVERAGE=ON \ .. make V=1 -n -j 4 && make check && make install + + popd || exit 1 From 653efa8c90da9117f1b670bbebebd9441a61ec5a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 18 Mar 2024 09:14:09 -0500 Subject: [PATCH 6/6] Use DPCTLQueue_CreateForDevice instead of wrapping sycl::queue pointer --- .../tests/test_sycl_usm_interface.cpp | 123 +++++++++++++----- 1 file changed, 87 insertions(+), 36 deletions(-) diff --git a/libsyclinterface/tests/test_sycl_usm_interface.cpp b/libsyclinterface/tests/test_sycl_usm_interface.cpp index 22c62d9065..38616bab00 100644 --- a/libsyclinterface/tests/test_sycl_usm_interface.cpp +++ b/libsyclinterface/tests/test_sycl_usm_interface.cpp @@ -94,74 +94,125 @@ struct TestDPCTLSyclUSMInterface : public ::testing::Test TEST_F(TestDPCTLSyclUSMInterface, MallocShared) { - sycl::queue q; - DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); - ASSERT_TRUE(Q); + DPCTLSyclDeviceSelectorRef DSRef = DPCTLDefaultSelector_Create(); + ASSERT_TRUE(DSRef); + DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef); + DPCTLDeviceSelector_Delete(DSRef); + ASSERT_TRUE(DRef); + DPCTLSyclQueueRef QRef = + DPCTLQueue_CreateForDevice(DRef, NULL, DPCTL_DEFAULT_PROPERTY); + DPCTLDevice_Delete(DRef); + ASSERT_TRUE(QRef); + const size_t nbytes = SIZE; - auto Ptr = DPCTLmalloc_shared(nbytes, Q); + auto Ptr = DPCTLmalloc_shared(nbytes, QRef); EXPECT_TRUE(bool(Ptr)); - common_test_body(nbytes, Ptr, Q, DPCTLSyclUSMType::DPCTL_USM_SHARED); - DPCTLfree_with_queue(Ptr, Q); + common_test_body(nbytes, Ptr, QRef, DPCTLSyclUSMType::DPCTL_USM_SHARED); + DPCTLfree_with_queue(Ptr, QRef); + + DPCTLQueue_Delete(QRef); } TEST_F(TestDPCTLSyclUSMInterface, MallocDevice) { - sycl::queue q; - DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); - ASSERT_TRUE(Q); + DPCTLSyclDeviceSelectorRef DSRef = DPCTLDefaultSelector_Create(); + ASSERT_TRUE(DSRef); + DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef); + DPCTLDeviceSelector_Delete(DSRef); + ASSERT_TRUE(DRef); + DPCTLSyclQueueRef QRef = + DPCTLQueue_CreateForDevice(DRef, NULL, DPCTL_DEFAULT_PROPERTY); + DPCTLDevice_Delete(DRef); + ASSERT_TRUE(QRef); + const size_t nbytes = SIZE; - auto Ptr = DPCTLmalloc_device(nbytes, Q); + auto Ptr = DPCTLmalloc_device(nbytes, QRef); EXPECT_TRUE(bool(Ptr)); - common_test_body(nbytes, Ptr, Q, DPCTLSyclUSMType::DPCTL_USM_DEVICE); - DPCTLfree_with_queue(Ptr, Q); + common_test_body(nbytes, Ptr, QRef, DPCTLSyclUSMType::DPCTL_USM_DEVICE); + DPCTLfree_with_queue(Ptr, QRef); + + DPCTLQueue_Delete(QRef); } TEST_F(TestDPCTLSyclUSMInterface, MallocHost) { - sycl::queue q; - DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); - ASSERT_TRUE(Q); + DPCTLSyclDeviceSelectorRef DSRef = DPCTLDefaultSelector_Create(); + ASSERT_TRUE(DSRef); + DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef); + DPCTLDeviceSelector_Delete(DSRef); + ASSERT_TRUE(DRef); + DPCTLSyclQueueRef QRef = + DPCTLQueue_CreateForDevice(DRef, NULL, DPCTL_DEFAULT_PROPERTY); + DPCTLDevice_Delete(DRef); + ASSERT_TRUE(QRef); + const size_t nbytes = SIZE; - auto Ptr = DPCTLmalloc_host(nbytes, Q); + auto Ptr = DPCTLmalloc_host(nbytes, QRef); EXPECT_TRUE(bool(Ptr)); - common_test_body(nbytes, Ptr, Q, DPCTLSyclUSMType::DPCTL_USM_HOST); - DPCTLfree_with_queue(Ptr, Q); + common_test_body(nbytes, Ptr, QRef, DPCTLSyclUSMType::DPCTL_USM_HOST); + DPCTLfree_with_queue(Ptr, QRef); + DPCTLQueue_Delete(QRef); } TEST_F(TestDPCTLSyclUSMInterface, AlignedAllocShared) { - sycl::queue q; - DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); - ASSERT_TRUE(Q); + DPCTLSyclDeviceSelectorRef DSRef = DPCTLDefaultSelector_Create(); + ASSERT_TRUE(DSRef); + DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef); + DPCTLDeviceSelector_Delete(DSRef); + ASSERT_TRUE(DRef); + DPCTLSyclQueueRef QRef = + DPCTLQueue_CreateForDevice(DRef, NULL, DPCTL_DEFAULT_PROPERTY); + DPCTLDevice_Delete(DRef); + ASSERT_TRUE(QRef); + const size_t nbytes = SIZE; - auto Ptr = DPCTLaligned_alloc_shared(64, nbytes, Q); + auto Ptr = DPCTLaligned_alloc_shared(64, nbytes, QRef); EXPECT_TRUE(bool(Ptr)); - common_test_body(nbytes, Ptr, Q, DPCTLSyclUSMType::DPCTL_USM_SHARED); - DPCTLfree_with_queue(Ptr, Q); + common_test_body(nbytes, Ptr, QRef, DPCTLSyclUSMType::DPCTL_USM_SHARED); + DPCTLfree_with_queue(Ptr, QRef); + DPCTLQueue_Delete(QRef); } TEST_F(TestDPCTLSyclUSMInterface, AlignedAllocDevice) { - sycl::queue q; - DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); - ASSERT_TRUE(Q); + DPCTLSyclDeviceSelectorRef DSRef = DPCTLDefaultSelector_Create(); + ASSERT_TRUE(DSRef); + DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef); + DPCTLDeviceSelector_Delete(DSRef); + ASSERT_TRUE(DRef); + DPCTLSyclQueueRef QRef = + DPCTLQueue_CreateForDevice(DRef, NULL, DPCTL_DEFAULT_PROPERTY); + DPCTLDevice_Delete(DRef); + ASSERT_TRUE(QRef); + const size_t nbytes = SIZE; - auto Ptr = DPCTLaligned_alloc_device(64, nbytes, Q); + auto Ptr = DPCTLaligned_alloc_device(64, nbytes, QRef); EXPECT_TRUE(bool(Ptr)); - common_test_body(nbytes, Ptr, Q, DPCTLSyclUSMType::DPCTL_USM_DEVICE); - DPCTLfree_with_queue(Ptr, Q); + common_test_body(nbytes, Ptr, QRef, DPCTLSyclUSMType::DPCTL_USM_DEVICE); + DPCTLfree_with_queue(Ptr, QRef); + DPCTLQueue_Delete(QRef); } TEST_F(TestDPCTLSyclUSMInterface, AlignedAllocHost) { - sycl::queue q; - DPCTLSyclQueueRef Q = dpctl::syclinterface::wrap(&q); - ASSERT_TRUE(Q); + DPCTLSyclDeviceSelectorRef DSRef = DPCTLDefaultSelector_Create(); + ASSERT_TRUE(DSRef); + DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef); + DPCTLDeviceSelector_Delete(DSRef); + ASSERT_TRUE(DRef); + DPCTLSyclQueueRef QRef = + DPCTLQueue_CreateForDevice(DRef, NULL, DPCTL_DEFAULT_PROPERTY); + DPCTLDevice_Delete(DRef); + ASSERT_TRUE(QRef); + const size_t nbytes = SIZE; - auto Ptr = DPCTLaligned_alloc_host(64, nbytes, Q); + auto Ptr = DPCTLaligned_alloc_host(64, nbytes, QRef); EXPECT_TRUE(bool(Ptr)); - common_test_body(nbytes, Ptr, Q, DPCTLSyclUSMType::DPCTL_USM_HOST); - DPCTLfree_with_queue(Ptr, Q); + common_test_body(nbytes, Ptr, QRef, DPCTLSyclUSMType::DPCTL_USM_HOST); + DPCTLfree_with_queue(Ptr, QRef); + + DPCTLQueue_Delete(QRef); } struct TestDPCTLSyclUSMNullArgs : public ::testing::Test