From 40e5bc2a64f3cff65ea888252680dda9ebfaba26 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 9 Mar 2023 08:07:48 -0800 Subject: [PATCH 01/10] Implements atomic memory scope capabilities device queries for OpenCL and Level Zero backends. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/pi.h | 13 +++++ .../esimd_emulator/pi_esimd_emulator.cpp | 1 + sycl/plugins/opencl/pi_opencl.cpp | 55 ++++++++++++++++++- .../ur/adapters/level_zero/ur_level_zero.cpp | 14 ++++- 4 files changed, 80 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 56b8b33fae583..37836179a0a1f 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -561,6 +561,19 @@ constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP = 0x04; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE = 0x08; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM = 0x10; +// CL equivalents are only available for OpenCL version 3.0 +#define PI_DEVICE_ATOMIC_MEMORY_CAPABILITIES 0x1063 +using pi_device_atomic_capabilities = pi_bitfield; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_RELAXED = 0x01; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_ACQ_REL = 0x02; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_SEQ_CST = 0x04; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_WORK_ITEM = 0x08; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_WORK_GROUP = + 0x10; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_DEVICE = 0x20; +constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_ALL_DEVICES = + 0x40; + typedef enum { PI_PROFILING_INFO_COMMAND_QUEUED = 0x1280, PI_PROFILING_INFO_COMMAND_SUBMIT = 0x1281, diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 0fc2a5a10f4f9..5bb0ce881e79f 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -805,6 +805,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_IMAGE_SRGB) CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_64) CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES) + CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_GLOBAL_WORK_GROUPS) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_1D) CASE_PI_UNSUPPORTED(PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 2c44f0cfe9eb3..b34e0985f530c 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -283,8 +283,61 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, // sycl/doc/extensions/supported/sycl_ext_intel_device_info.md case PI_DEVICE_INFO_UUID: case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: - case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: return PI_ERROR_INVALID_VALUE; + case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { + // Initialize result to minimum mandated capabilities according to + // SYCL2020 4.6.3.2 + pi_memory_scope_capabilities result = + PI_MEMORY_SCOPE_SUB_GROUP | PI_MEMORY_SCOPE_WORK_GROUP; + + OCLV::OpenCLVersion devVer; + + cl_device_id deviceID = cast(device); + cl_int ret_err = getDeviceVersion(deviceID, devVer); + if (ret_err != CL_SUCCESS) + return static_cast(ret_err); + + pi_device_atomic_capabilities devCapabilities = 0; + if (devVer >= OCLV::V3_0) { + ret_err = clGetDeviceInfo(deviceID, PI_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + sizeof(pi_device_atomic_capabilities), + &devCapabilities, nullptr); + if (ret_err != CL_SUCCESS) + return static_cast(ret_err); + assert(devCapabilities && PI_DEVICE_ATOMIC_SCOPE_WORK_GROUP && + "Violates minimum mandated guarantee"); + + // Because scopes are hierarchical, wider scopes support all narrower + // scopes (except work_item which is a special case). SUB_GROUP was + // already included in the initialization, since WORK_GROUP is mandated + // minimum capality. + + // Special case, only enable if it is explicitly enabled in the backend + if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_WORK_ITEM) + result |= PI_MEMORY_SCOPE_WORK_ITEM; + + if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_DEVICE) { + result |= PI_MEMORY_SCOPE_DEVICE; + } + + if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + result |= PI_MEMORY_SCOPE_SYSTEM; + } + + } else { + // This info is only available in OpenCL version >= 3.0 + // Just return minimum mandated capabilities for older versions. + // OpenCL 1.x minimum mandated capabilities are WORK_GROUP, we + // already initialized using it. + if (devVer >= OCLV::V2_0) { + // OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | + // ALL_DEVICES + result |= PI_MEMORY_SCOPE_DEVICE | PI_MEMORY_SCOPE_SYSTEM; + } + } + std::memcpy(paramValue, &result, sizeof(result)); + return PI_SUCCESS; + } case PI_DEVICE_INFO_ATOMIC_64: { cl_int ret_err = CL_SUCCESS; cl_bool result = CL_FALSE; diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 450a5aff1a4d8..8a75a0e3a4ce1 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -6,6 +6,8 @@ // //===-----------------------------------------------------------------===// +#include + #include #include #include @@ -1160,9 +1162,17 @@ ZER_APIEXPORT zer_result_t ZER_APICALL zerDeviceGetInfo( // bfloat16 math functions are not yet supported on Intel GPUs. return ReturnValue(bool{false}); } + case ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { + // There are no explicit restrictions in L0 programming guide, so assume all + // are supported + pi_memory_scope_capabilities result = + PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | + PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | + PI_MEMORY_SCOPE_SYSTEM; + + return ReturnValue(result); + } - // TODO: Implement. - case ZER_EXT_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: default: zePrint("Unsupported ParamName in piGetDeviceInfo\n"); zePrint("ParamName=%d(0x%x)\n", ParamName, ParamName); From db64e3c8093788d4761786a7a766e9a4aee4f91b Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 9 Mar 2023 08:08:18 -0800 Subject: [PATCH 02/10] Adds test for atomic memory scope capabilities device queries. Signed-off-by: Maronas, Marcos --- .../AtomicMemoryScopeCapabilities.cpp | 73 +++++++++++++++++++ sycl/unittests/SYCL2020/CMakeLists.txt | 1 + 2 files changed, 74 insertions(+) create mode 100644 sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp diff --git a/sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp b/sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp new file mode 100644 index 0000000000000..23be3b2c4869c --- /dev/null +++ b/sycl/unittests/SYCL2020/AtomicMemoryScopeCapabilities.cpp @@ -0,0 +1,73 @@ +//==-------- AtomicMemoryScopeCapabilities.cpp --- queue unit tests --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +using namespace sycl; + +namespace { + +thread_local bool deviceGetInfoCalled; + +pi_platform PiPlatform = nullptr; + +pi_result redefinedDeviceGetInfoAfter(pi_device device, + pi_device_info param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) { + if (param_name == PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES) { + deviceGetInfoCalled = true; + if (param_value) { + auto *Result = + reinterpret_cast(param_value); + *Result = PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | + PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | + PI_MEMORY_SCOPE_SYSTEM; + } + } + return PI_SUCCESS; +} + +TEST(AtomicMemoryScopeCapabilitiesCheck, CheckAtomicMemoryScopeCapabilities) { + sycl::unittest::PiMock Mock; + sycl::platform Plt = Mock.getPlatform(); + + PiPlatform = detail::getSyclObjImpl(Plt)->getHandleRef(); + context DefaultCtx = Plt.ext_oneapi_get_default_context(); + device Dev = DefaultCtx.get_devices()[0]; + + deviceGetInfoCalled = false; + + Mock.redefineAfter( + redefinedDeviceGetInfoAfter); + auto scope_capabilities = + Dev.get_info(); + EXPECT_TRUE(deviceGetInfoCalled); + size_t expectedSize = 5; + EXPECT_EQ(scope_capabilities.size(), expectedSize); + + auto res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::work_item); + EXPECT_FALSE(res == scope_capabilities.end()); + res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::sub_group); + EXPECT_FALSE(res == scope_capabilities.end()); + res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::work_group); + EXPECT_FALSE(res == scope_capabilities.end()); + res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::device); + EXPECT_FALSE(res == scope_capabilities.end()); + res = std::find(scope_capabilities.begin(), scope_capabilities.end(), + sycl::memory_scope::system); + EXPECT_FALSE(res == scope_capabilities.end()); +} +} // anonymous namespace diff --git a/sycl/unittests/SYCL2020/CMakeLists.txt b/sycl/unittests/SYCL2020/CMakeLists.txt index 9e22f73abfa00..6fd93b54b131c 100644 --- a/sycl/unittests/SYCL2020/CMakeLists.txt +++ b/sycl/unittests/SYCL2020/CMakeLists.txt @@ -9,5 +9,6 @@ add_sycl_unittest(SYCL2020Tests OBJECT IsCompatible.cpp DeviceGetInfoAspects.cpp DeviceAspectTraits.cpp + AtomicMemoryScopeCapabilities.cpp ) From 56badc0fd4830273492464d79c0beb6366469845 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 10 Mar 2023 06:09:41 -0800 Subject: [PATCH 03/10] SYCL should always return memory_scope::work_item. Signed-off-by: Maronas, Marcos --- sycl/plugins/opencl/pi_opencl.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index b34e0985f530c..2e20c9585bf1c 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -287,8 +287,9 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { // Initialize result to minimum mandated capabilities according to // SYCL2020 4.6.3.2 - pi_memory_scope_capabilities result = - PI_MEMORY_SCOPE_SUB_GROUP | PI_MEMORY_SCOPE_WORK_GROUP; + pi_memory_scope_capabilities result = PI_MEMORY_SCOPE_WORK_ITEM | + PI_MEMORY_SCOPE_SUB_GROUP | + PI_MEMORY_SCOPE_WORK_GROUP; OCLV::OpenCLVersion devVer; @@ -308,14 +309,8 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, "Violates minimum mandated guarantee"); // Because scopes are hierarchical, wider scopes support all narrower - // scopes (except work_item which is a special case). SUB_GROUP was - // already included in the initialization, since WORK_GROUP is mandated - // minimum capality. - - // Special case, only enable if it is explicitly enabled in the backend - if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_WORK_ITEM) - result |= PI_MEMORY_SCOPE_WORK_ITEM; - + // scopes. SUB_GROUP and WORK_ITEM were already included in the + // initialization, since WORK_GROUP is mandated minimum capality. if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_DEVICE) { result |= PI_MEMORY_SCOPE_DEVICE; } From 41b5cc116cc0cf3e64f51f5b1f0cb7ba17b2ff2a Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Mon, 13 Mar 2023 09:03:06 -0700 Subject: [PATCH 04/10] Addressing code review concerns. Signed-off-by: Maronas, Marcos --- opencl/CMakeLists.txt | 2 +- sycl/include/sycl/detail/cl.h | 2 +- sycl/include/sycl/detail/pi.h | 13 ------------- sycl/plugins/opencl/pi_opencl.cpp | 23 +++++++++++++++-------- 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/opencl/CMakeLists.txt b/opencl/CMakeLists.txt index 1442a1ac43075..4ed26bd5e719d 100644 --- a/opencl/CMakeLists.txt +++ b/opencl/CMakeLists.txt @@ -44,7 +44,7 @@ FetchContent_GetProperties(ocl-headers) set(OpenCL_INCLUDE_DIR ${ocl-headers_SOURCE_DIR} CACHE PATH "Path to OpenCL Headers") -target_compile_definitions(Headers INTERFACE -DCL_TARGET_OPENCL_VERSION=220) +target_compile_definitions(Headers INTERFACE -DCL_TARGET_OPENCL_VERSION=300) add_library(OpenCL-Headers ALIAS Headers) # OpenCL Library (ICD Loader) diff --git a/sycl/include/sycl/detail/cl.h b/sycl/include/sycl/detail/cl.h index 7e90fe126e40d..aa160d360563a 100644 --- a/sycl/include/sycl/detail/cl.h +++ b/sycl/include/sycl/detail/cl.h @@ -11,7 +11,7 @@ // Suppress a compiler message about undefined CL_TARGET_OPENCL_VERSION // and define all symbols up to OpenCL 2.2 #ifndef CL_TARGET_OPENCL_VERSION -#define CL_TARGET_OPENCL_VERSION 220 +#define CL_TARGET_OPENCL_VERSION 300 #endif #include diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 37836179a0a1f..56b8b33fae583 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -561,19 +561,6 @@ constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_WORK_GROUP = 0x04; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_DEVICE = 0x08; constexpr pi_memory_scope_capabilities PI_MEMORY_SCOPE_SYSTEM = 0x10; -// CL equivalents are only available for OpenCL version 3.0 -#define PI_DEVICE_ATOMIC_MEMORY_CAPABILITIES 0x1063 -using pi_device_atomic_capabilities = pi_bitfield; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_RELAXED = 0x01; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_ACQ_REL = 0x02; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_ORDER_SEQ_CST = 0x04; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_WORK_ITEM = 0x08; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_WORK_GROUP = - 0x10; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_DEVICE = 0x20; -constexpr pi_device_atomic_capabilities PI_DEVICE_ATOMIC_SCOPE_ALL_DEVICES = - 0x40; - typedef enum { PI_PROFILING_INFO_COMMAND_QUEUED = 0x1280, PI_PROFILING_INFO_COMMAND_SUBMIT = 0x1281, diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 2e20c9585bf1c..0e67644549bec 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -298,24 +298,24 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, if (ret_err != CL_SUCCESS) return static_cast(ret_err); - pi_device_atomic_capabilities devCapabilities = 0; + cl_device_atomic_capabilities devCapabilities = 0; if (devVer >= OCLV::V3_0) { - ret_err = clGetDeviceInfo(deviceID, PI_DEVICE_ATOMIC_MEMORY_CAPABILITIES, - sizeof(pi_device_atomic_capabilities), + ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), &devCapabilities, nullptr); if (ret_err != CL_SUCCESS) return static_cast(ret_err); - assert(devCapabilities && PI_DEVICE_ATOMIC_SCOPE_WORK_GROUP && + assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && "Violates minimum mandated guarantee"); // Because scopes are hierarchical, wider scopes support all narrower // scopes. SUB_GROUP and WORK_ITEM were already included in the // initialization, since WORK_GROUP is mandated minimum capality. - if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_DEVICE) { + if (devCapabilities && CL_DEVICE_ATOMIC_SCOPE_DEVICE) { result |= PI_MEMORY_SCOPE_DEVICE; } - if (devCapabilities && PI_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + if (devCapabilities && CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { result |= PI_MEMORY_SCOPE_SYSTEM; } @@ -330,7 +330,14 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, result |= PI_MEMORY_SCOPE_DEVICE | PI_MEMORY_SCOPE_SYSTEM; } } - std::memcpy(paramValue, &result, sizeof(result)); + if (paramValue) { + if (paramValueSize < sizeof(cl_device_atomic_capabilities)) + return PI_ERROR_INVALID_VALUE; + + std::memcpy(paramValue, &result, sizeof(result)); + if (paramValueSizeRet) + *paramValueSizeRet = sizeof(result); + } return PI_SUCCESS; } case PI_DEVICE_INFO_ATOMIC_64: { @@ -1835,7 +1842,7 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel, // Windows: dynamically loaded plugins might have been unloaded already // when this is called. Sycl RT holds onto the PI plugin so it can be // called safely. But this is not transitive. If the PI plugin in turn -// dynamically loaded a different DLL, that may have been unloaded. +// dynamically loaded a different DLL, that may have been unloaded. // TODO: add a global variable lifetime management code here (see // pi_level_zero.cpp for reference) Currently this is just a NOOP. pi_result piTearDown(void *PluginParameter) { From 8392d5bf8e8e8227fd5428cd70a2ba8eb45e4ee2 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Mon, 13 Mar 2023 10:45:51 -0700 Subject: [PATCH 05/10] Fixing compilation error. Signed-off-by: Maronas, Marcos --- sycl/plugins/opencl/pi_opencl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 0e67644549bec..9ee9ffeb6f8b4 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -311,11 +311,11 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, // Because scopes are hierarchical, wider scopes support all narrower // scopes. SUB_GROUP and WORK_ITEM were already included in the // initialization, since WORK_GROUP is mandated minimum capality. - if (devCapabilities && CL_DEVICE_ATOMIC_SCOPE_DEVICE) { + if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { result |= PI_MEMORY_SCOPE_DEVICE; } - if (devCapabilities && CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { result |= PI_MEMORY_SCOPE_SYSTEM; } From 8bacef1d85c9fb8c5e34b762c7c58d6ee64c6ead Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 16 Mar 2023 09:00:05 -0700 Subject: [PATCH 06/10] Addresses code review comments. Signed-off-by: Maronas, Marcos --- sycl/include/sycl/detail/cl.h | 2 +- sycl/plugins/opencl/pi_opencl.cpp | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/sycl/include/sycl/detail/cl.h b/sycl/include/sycl/detail/cl.h index aa160d360563a..20d640bcff59f 100644 --- a/sycl/include/sycl/detail/cl.h +++ b/sycl/include/sycl/detail/cl.h @@ -9,7 +9,7 @@ #pragma once // Suppress a compiler message about undefined CL_TARGET_OPENCL_VERSION -// and define all symbols up to OpenCL 2.2 +// and define all symbols up to OpenCL 3.0 #ifndef CL_TARGET_OPENCL_VERSION #define CL_TARGET_OPENCL_VERSION 300 #endif diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 9ee9ffeb6f8b4..856e02041edfa 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -308,9 +308,14 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && "Violates minimum mandated guarantee"); + // According to OCL 3.0 spec: // Because scopes are hierarchical, wider scopes support all narrower - // scopes. SUB_GROUP and WORK_ITEM were already included in the - // initialization, since WORK_GROUP is mandated minimum capality. + // scopes. OCL 3.0 spec also mentions that WORK_ITEM is an exception + // for this rule (wider support all narrower), but this is defined + // differently for SYCL. In short, SUB_GROUP and WORK_ITEM were already + // included in the initialization, since WORK_GROUP is mandated minimum + // capality, and wider scopes support narrower scopes. + // (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES) if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { result |= PI_MEMORY_SCOPE_DEVICE; } @@ -335,9 +340,9 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, return PI_ERROR_INVALID_VALUE; std::memcpy(paramValue, &result, sizeof(result)); - if (paramValueSizeRet) - *paramValueSizeRet = sizeof(result); } + if (paramValueSizeRet) + *paramValueSizeRet = sizeof(result); return PI_SUCCESS; } case PI_DEVICE_INFO_ATOMIC_64: { From 3c05a251c0efe63eef78f49e2898bef730dce203 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Thu, 16 Mar 2023 09:04:36 -0700 Subject: [PATCH 07/10] Reverting unrelated change. Signed-off-by: Maronas, Marcos --- sycl/plugins/opencl/pi_opencl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 856e02041edfa..4652484750905 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1847,7 +1847,7 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel, // Windows: dynamically loaded plugins might have been unloaded already // when this is called. Sycl RT holds onto the PI plugin so it can be // called safely. But this is not transitive. If the PI plugin in turn -// dynamically loaded a different DLL, that may have been unloaded. +// dynamically loaded a different DLL, that may have been unloaded. // TODO: add a global variable lifetime management code here (see // pi_level_zero.cpp for reference) Currently this is just a NOOP. pi_result piTearDown(void *PluginParameter) { From 69d901b578364dffce9dfa4849f5354579c13fc7 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 17 Mar 2023 04:58:12 -0700 Subject: [PATCH 08/10] Clarifies comment using new reference. Signed-off-by: Maronas, Marcos --- sycl/plugins/opencl/pi_opencl.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 4652484750905..c430534f4f904 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -287,6 +287,9 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { // Initialize result to minimum mandated capabilities according to // SYCL2020 4.6.3.2 + // Because scopes are hierarchical, wider scopes support all narrower + // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and + // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) pi_memory_scope_capabilities result = PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | PI_MEMORY_SCOPE_WORK_GROUP; @@ -308,14 +311,9 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, assert((devCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && "Violates minimum mandated guarantee"); - // According to OCL 3.0 spec: // Because scopes are hierarchical, wider scopes support all narrower - // scopes. OCL 3.0 spec also mentions that WORK_ITEM is an exception - // for this rule (wider support all narrower), but this is defined - // differently for SYCL. In short, SUB_GROUP and WORK_ITEM were already - // included in the initialization, since WORK_GROUP is mandated minimum - // capality, and wider scopes support narrower scopes. - // (https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES) + // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and + // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { result |= PI_MEMORY_SCOPE_DEVICE; } From 1005e808ecbd0d155aa554923fdcfa127276f956 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Fri, 17 Mar 2023 08:30:24 -0700 Subject: [PATCH 09/10] Extends comment. Signed-off-by: Maronas, Marcos --- sycl/plugins/opencl/pi_opencl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index c430534f4f904..0d4c6a8870dcd 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -314,6 +314,8 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, // Because scopes are hierarchical, wider scopes support all narrower // scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and // WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382) + // We already initialized to these minimum mandated capabilities. Just + // check wider scopes. if (devCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { result |= PI_MEMORY_SCOPE_DEVICE; } From f7aa8ee2f26a2a1cceae7dab4b89243e70bcd862 Mon Sep 17 00:00:00 2001 From: "Maronas, Marcos" Date: Wed, 22 Mar 2023 08:24:20 -0700 Subject: [PATCH 10/10] Addresses code review comments. Signed-off-by: Maronas, Marcos --- .../ur/adapters/level_zero/ur_level_zero.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp index 81182b9374a86..e060124901dd8 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero.cpp @@ -6,8 +6,6 @@ // //===-----------------------------------------------------------------===// -#include - #include #include #include @@ -1169,10 +1167,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { // There are no explicit restrictions in L0 programming guide, so assume all // are supported - pi_memory_scope_capabilities result = - PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | - PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | - PI_MEMORY_SCOPE_SYSTEM; + ur_memory_scope_capability_flags_t result = + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_ITEM | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SUB_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; return ReturnValue(result); }