From f3c884d8421332c015e0a532bc9875cb4631ae0b Mon Sep 17 00:00:00 2001 From: rehana begam Date: Thu, 1 Jul 2021 16:44:42 -0700 Subject: [PATCH 01/18] [DPC++] enable multi-context (c-slice) support. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 73 +++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index d1d69561a219..c3178223c133 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1504,6 +1504,53 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, return PI_SUCCESS; } +// sub-sub-device +// TODO: do we need to gather Ordinals or just the SubSubDevicesCount will do ? +pi_result getCmdQueueOrdinals(pi_device PiSubDevice, pi_uint32 &SubSubDevicesCount, std::vector& Ordinals, + std::vector& AllQueueProperties) { + uint32_t numQueueGroups = 0; + ZE_CALL(zeDeviceGetCommandQueueGroupProperties, + (PiSubDevice->ZeDevice, &numQueueGroups, nullptr)); + if (numQueueGroups == 0) { + return PI_ERROR_UNKNOWN; + } + std::vector QueueProperties( + numQueueGroups); + ZE_CALL(zeDeviceGetCommandQueueGroupProperties, + (PiSubDevice->ZeDevice, &numQueueGroups, QueueProperties.data())); + + SubSubDevicesCount = numQueueGroups; + AllQueueProperties = QueueProperties; + + bool noComputeEngineFlag = true; + for (uint32_t i = 0; i < numQueueGroups; i++) { + if (QueueProperties[i].flags & + ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + Ordinals.push_back(i); + noComputeEngineFlag = false; + } + } + + if (noComputeEngineFlag) { + return PI_ERROR_UNKNOWN; + } + + return PI_SUCCESS; +} + +pi_result initializeWithOrdinal(pi_device PiSubSubDevice, int Ordinal, std::vector& QueueProperties) { + PiSubSubDevice->ZeComputeQueueGroupIndex = Ordinal; + PiSubSubDevice->ZeComputeQueueGroupProperties = QueueProperties[Ordinal]; + + // Cache device properties + PiSubSubDevice->ZeDeviceProperties = {}; + ZE_CALL(zeDeviceGetProperties, (PiSubSubDevice->ZeDevice, &(PiSubSubDevice->ZeDeviceProperties))); + PiSubSubDevice->ZeDeviceComputeProperties = {}; + ZE_CALL(zeDeviceGetComputeProperties, (PiSubSubDevice->ZeDevice, &(PiSubSubDevice->ZeDeviceComputeProperties))); + + return PI_SUCCESS; +} + // Check the device cache and load it if necessary. pi_result _pi_platform::populateDeviceCacheIfNeeded() { std::lock_guard Lock(PiDevicesCacheMutex); @@ -1547,6 +1594,32 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { delete[] ZeSubdevices; return Result; } + + // sub-sub-device + // get all the ordinals for the sub-sub-devices + pi_uint32 SubSubDevicesCount = 0; + std::vector Ordinals; + std::vector AllQueueProperties; + Result = getCmdQueueOrdinals(PiSubDevice.get(), SubSubDevicesCount, Ordinals, AllQueueProperties); + if (Result != PI_SUCCESS) { + return Result; + } + + // Create PI sub-sub-devices with the sub-device for all the ordinals + for (uint32_t J = 0; J < SubSubDevicesCount; ++J) { + // TODO: check if a device can be it's own parent + std::unique_ptr<_pi_device> PiSubSubDevice( + new _pi_device(ZeSubdevices[I], this, PiSubDevice.get())); + pi_result Result = initializeWithOrdinal(PiSubSubDevice.get(), Ordinals[J], AllQueueProperties); + if (Result != PI_SUCCESS) { + return Result; + } + + // save pointers to sub-sub-devices for quick retrieval in the future. + PiSubDevice->SubDevices.push_back(PiSubSubDevice.get()); + PiDevicesCache.push_back(std::move(PiSubSubDevice)); + } + // save pointers to sub-devices for quick retrieval in the future. Device->SubDevices.push_back(PiSubDevice.get()); PiDevicesCache.push_back(std::move(PiSubDevice)); From d5f438734981e481c445683ec315fd5962083cc9 Mon Sep 17 00:00:00 2001 From: rehana begam Date: Wed, 7 Jul 2021 17:47:43 -0700 Subject: [PATCH 02/18] fix sub sub device count. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index c3178223c133..a0b087888f03 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1505,8 +1505,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, } // sub-sub-device -// TODO: do we need to gather Ordinals or just the SubSubDevicesCount will do ? -pi_result getCmdQueueOrdinals(pi_device PiSubDevice, pi_uint32 &SubSubDevicesCount, std::vector& Ordinals, +pi_result getComputeCmdQueueOrdinals(pi_device PiSubDevice, pi_uint32 &SubSubDevicesCount, std::vector& Ordinals, std::vector& AllQueueProperties) { uint32_t numQueueGroups = 0; ZE_CALL(zeDeviceGetCommandQueueGroupProperties, @@ -1519,7 +1518,6 @@ pi_result getCmdQueueOrdinals(pi_device PiSubDevice, pi_uint32 &SubSubDevicesCou ZE_CALL(zeDeviceGetCommandQueueGroupProperties, (PiSubDevice->ZeDevice, &numQueueGroups, QueueProperties.data())); - SubSubDevicesCount = numQueueGroups; AllQueueProperties = QueueProperties; bool noComputeEngineFlag = true; @@ -1531,6 +1529,8 @@ pi_result getCmdQueueOrdinals(pi_device PiSubDevice, pi_uint32 &SubSubDevicesCou } } + SubSubDevicesCount = Ordinals.size(); + if (noComputeEngineFlag) { return PI_ERROR_UNKNOWN; } @@ -1600,7 +1600,7 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { pi_uint32 SubSubDevicesCount = 0; std::vector Ordinals; std::vector AllQueueProperties; - Result = getCmdQueueOrdinals(PiSubDevice.get(), SubSubDevicesCount, Ordinals, AllQueueProperties); + Result = getComputeCmdQueueOrdinals(PiSubDevice.get(), SubSubDevicesCount, Ordinals, AllQueueProperties); if (Result != PI_SUCCESS) { return Result; } From 607746ed9827c17d89ccf5c376c50fd1c1fbd172 Mon Sep 17 00:00:00 2001 From: rehana begam Date: Thu, 8 Jul 2021 13:44:43 -0700 Subject: [PATCH 03/18] remove SubSubDevicesCount. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index a0b087888f03..bd9ae04bc02b 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1505,7 +1505,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, } // sub-sub-device -pi_result getComputeCmdQueueOrdinals(pi_device PiSubDevice, pi_uint32 &SubSubDevicesCount, std::vector& Ordinals, +pi_result getComputeCmdQueueOrdinals(pi_device PiSubDevice, std::vector& Ordinals, std::vector& AllQueueProperties) { uint32_t numQueueGroups = 0; ZE_CALL(zeDeviceGetCommandQueueGroupProperties, @@ -1518,18 +1518,17 @@ pi_result getComputeCmdQueueOrdinals(pi_device PiSubDevice, pi_uint32 &SubSubDev ZE_CALL(zeDeviceGetCommandQueueGroupProperties, (PiSubDevice->ZeDevice, &numQueueGroups, QueueProperties.data())); - AllQueueProperties = QueueProperties; bool noComputeEngineFlag = true; for (uint32_t i = 0; i < numQueueGroups; i++) { if (QueueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { Ordinals.push_back(i); + AllQueueProperties.push_back(QueueProperties[i]); noComputeEngineFlag = false; } } - SubSubDevicesCount = Ordinals.size(); if (noComputeEngineFlag) { return PI_ERROR_UNKNOWN; @@ -1597,16 +1596,15 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { // sub-sub-device // get all the ordinals for the sub-sub-devices - pi_uint32 SubSubDevicesCount = 0; std::vector Ordinals; std::vector AllQueueProperties; - Result = getComputeCmdQueueOrdinals(PiSubDevice.get(), SubSubDevicesCount, Ordinals, AllQueueProperties); + Result = getComputeCmdQueueOrdinals(PiSubDevice.get(), Ordinals, AllQueueProperties); if (Result != PI_SUCCESS) { return Result; } // Create PI sub-sub-devices with the sub-device for all the ordinals - for (uint32_t J = 0; J < SubSubDevicesCount; ++J) { + for (uint32_t J = 0; J < Ordinals.size(); ++J) { // TODO: check if a device can be it's own parent std::unique_ptr<_pi_device> PiSubSubDevice( new _pi_device(ZeSubdevices[I], this, PiSubDevice.get())); From beb8763042cd576b254904ce660928e4e1257bdd Mon Sep 17 00:00:00 2001 From: rehana begam Date: Thu, 8 Jul 2021 13:56:41 -0700 Subject: [PATCH 04/18] clang formatted. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 26 ++++++++++++++--------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index bd9ae04bc02b..066aab5cd817 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1505,8 +1505,9 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, } // sub-sub-device -pi_result getComputeCmdQueueOrdinals(pi_device PiSubDevice, std::vector& Ordinals, - std::vector& AllQueueProperties) { +pi_result getComputeCmdQueueOrdinals( + pi_device PiSubDevice, std::vector& Ordinals, + std::vector& AllQueueProperties) { uint32_t numQueueGroups = 0; ZE_CALL(zeDeviceGetCommandQueueGroupProperties, (PiSubDevice->ZeDevice, &numQueueGroups, nullptr)); @@ -1518,7 +1519,6 @@ pi_result getComputeCmdQueueOrdinals(pi_device PiSubDevice, std::vector& Or ZE_CALL(zeDeviceGetCommandQueueGroupProperties, (PiSubDevice->ZeDevice, &numQueueGroups, QueueProperties.data())); - bool noComputeEngineFlag = true; for (uint32_t i = 0; i < numQueueGroups; i++) { if (QueueProperties[i].flags & @@ -1529,7 +1529,6 @@ pi_result getComputeCmdQueueOrdinals(pi_device PiSubDevice, std::vector& Or } } - if (noComputeEngineFlag) { return PI_ERROR_UNKNOWN; } @@ -1537,15 +1536,20 @@ pi_result getComputeCmdQueueOrdinals(pi_device PiSubDevice, std::vector& Or return PI_SUCCESS; } -pi_result initializeWithOrdinal(pi_device PiSubSubDevice, int Ordinal, std::vector& QueueProperties) { +pi_result initializeWithOrdinal( + pi_device PiSubSubDevice, int Ordinal, + std::vector& QueueProperties) { PiSubSubDevice->ZeComputeQueueGroupIndex = Ordinal; PiSubSubDevice->ZeComputeQueueGroupProperties = QueueProperties[Ordinal]; // Cache device properties PiSubSubDevice->ZeDeviceProperties = {}; - ZE_CALL(zeDeviceGetProperties, (PiSubSubDevice->ZeDevice, &(PiSubSubDevice->ZeDeviceProperties))); + ZE_CALL(zeDeviceGetProperties, + (PiSubSubDevice->ZeDevice, &(PiSubSubDevice->ZeDeviceProperties))); PiSubSubDevice->ZeDeviceComputeProperties = {}; - ZE_CALL(zeDeviceGetComputeProperties, (PiSubSubDevice->ZeDevice, &(PiSubSubDevice->ZeDeviceComputeProperties))); + ZE_CALL( + zeDeviceGetComputeProperties, + (PiSubSubDevice->ZeDevice, &(PiSubSubDevice->ZeDeviceComputeProperties))); return PI_SUCCESS; } @@ -1598,7 +1602,8 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { // get all the ordinals for the sub-sub-devices std::vector Ordinals; std::vector AllQueueProperties; - Result = getComputeCmdQueueOrdinals(PiSubDevice.get(), Ordinals, AllQueueProperties); + Result = getComputeCmdQueueOrdinals(PiSubDevice.get(), Ordinals, + AllQueueProperties); if (Result != PI_SUCCESS) { return Result; } @@ -1607,8 +1612,9 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { for (uint32_t J = 0; J < Ordinals.size(); ++J) { // TODO: check if a device can be it's own parent std::unique_ptr<_pi_device> PiSubSubDevice( - new _pi_device(ZeSubdevices[I], this, PiSubDevice.get())); - pi_result Result = initializeWithOrdinal(PiSubSubDevice.get(), Ordinals[J], AllQueueProperties); + new _pi_device(ZeSubdevices[I], this, PiSubDevice.get())); + pi_result Result = initializeWithOrdinal( + PiSubSubDevice.get(), Ordinals[J], AllQueueProperties); if (Result != PI_SUCCESS) { return Result; } From e5b85fb532f073503c491be855ea9104c2671801 Mon Sep 17 00:00:00 2001 From: rehana begam Date: Thu, 8 Jul 2021 14:04:37 -0700 Subject: [PATCH 05/18] clang format. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 066aab5cd817..2360d0fde258 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1506,8 +1506,8 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, // sub-sub-device pi_result getComputeCmdQueueOrdinals( - pi_device PiSubDevice, std::vector& Ordinals, - std::vector& AllQueueProperties) { + pi_device PiSubDevice, std::vector &Ordinals, + std::vector &AllQueueProperties) { uint32_t numQueueGroups = 0; ZE_CALL(zeDeviceGetCommandQueueGroupProperties, (PiSubDevice->ZeDevice, &numQueueGroups, nullptr)); @@ -1538,7 +1538,7 @@ pi_result getComputeCmdQueueOrdinals( pi_result initializeWithOrdinal( pi_device PiSubSubDevice, int Ordinal, - std::vector& QueueProperties) { + std::vector &QueueProperties) { PiSubSubDevice->ZeComputeQueueGroupIndex = Ordinal; PiSubSubDevice->ZeComputeQueueGroupProperties = QueueProperties[Ordinal]; From 00aef90c1bb96a9d11ea18efdc6524cae07d46c4 Mon Sep 17 00:00:00 2001 From: rehana begam Date: Thu, 8 Jul 2021 18:58:07 -0700 Subject: [PATCH 06/18] inlining. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 101 +++++++++------------- sycl/plugins/level_zero/pi_level_zero.hpp | 3 +- 2 files changed, 43 insertions(+), 61 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 2360d0fde258..f3da4deaab50 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -529,7 +529,22 @@ createEventAndAssociateQueue(pi_queue Queue, pi_event *Event, return PI_SUCCESS; } -pi_result _pi_device::initialize() { +pi_result _pi_device::initialize(int SubSubDeviceOrdinal, + ze_command_queue_group_properties_t SubSubDeviceQueueProperties) { + if (SubSubDeviceOrdinal >= 0) { + ZeComputeQueueGroupIndex = SubSubDeviceOrdinal; + ZeComputeQueueGroupProperties = SubSubDeviceQueueProperties; + + // Cache device properties + ZeDeviceProperties = {}; + ZE_CALL(zeDeviceGetProperties, (ZeDevice, &(ZeDeviceProperties))); + ZeDeviceComputeProperties = {}; + ZE_CALL(zeDeviceGetComputeProperties, + (ZeDevice, &(ZeDeviceComputeProperties))); + + return PI_SUCCESS; + } + uint32_t numQueueGroups = 0; ZE_CALL(zeDeviceGetCommandQueueGroupProperties, (ZeDevice, &numQueueGroups, nullptr)); @@ -1504,56 +1519,6 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, return PI_SUCCESS; } -// sub-sub-device -pi_result getComputeCmdQueueOrdinals( - pi_device PiSubDevice, std::vector &Ordinals, - std::vector &AllQueueProperties) { - uint32_t numQueueGroups = 0; - ZE_CALL(zeDeviceGetCommandQueueGroupProperties, - (PiSubDevice->ZeDevice, &numQueueGroups, nullptr)); - if (numQueueGroups == 0) { - return PI_ERROR_UNKNOWN; - } - std::vector QueueProperties( - numQueueGroups); - ZE_CALL(zeDeviceGetCommandQueueGroupProperties, - (PiSubDevice->ZeDevice, &numQueueGroups, QueueProperties.data())); - - bool noComputeEngineFlag = true; - for (uint32_t i = 0; i < numQueueGroups; i++) { - if (QueueProperties[i].flags & - ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { - Ordinals.push_back(i); - AllQueueProperties.push_back(QueueProperties[i]); - noComputeEngineFlag = false; - } - } - - if (noComputeEngineFlag) { - return PI_ERROR_UNKNOWN; - } - - return PI_SUCCESS; -} - -pi_result initializeWithOrdinal( - pi_device PiSubSubDevice, int Ordinal, - std::vector &QueueProperties) { - PiSubSubDevice->ZeComputeQueueGroupIndex = Ordinal; - PiSubSubDevice->ZeComputeQueueGroupProperties = QueueProperties[Ordinal]; - - // Cache device properties - PiSubSubDevice->ZeDeviceProperties = {}; - ZE_CALL(zeDeviceGetProperties, - (PiSubSubDevice->ZeDevice, &(PiSubSubDevice->ZeDeviceProperties))); - PiSubSubDevice->ZeDeviceComputeProperties = {}; - ZE_CALL( - zeDeviceGetComputeProperties, - (PiSubSubDevice->ZeDevice, &(PiSubSubDevice->ZeDeviceComputeProperties))); - - return PI_SUCCESS; -} - // Check the device cache and load it if necessary. pi_result _pi_platform::populateDeviceCacheIfNeeded() { std::lock_guard Lock(PiDevicesCacheMutex); @@ -1598,14 +1563,31 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { return Result; } - // sub-sub-device - // get all the ordinals for the sub-sub-devices + // collect all the ordinals for the sub-sub-devices std::vector Ordinals; - std::vector AllQueueProperties; - Result = getComputeCmdQueueOrdinals(PiSubDevice.get(), Ordinals, - AllQueueProperties); - if (Result != PI_SUCCESS) { - return Result; + std::unordered_map AllQueueProperties; + + uint32_t numQueueGroups = 0; + ZE_CALL(zeDeviceGetCommandQueueGroupProperties, + (PiSubDevice->ZeDevice, &numQueueGroups, nullptr)); + if (numQueueGroups == 0) { + return PI_ERROR_UNKNOWN; + } + std::vector QueueProperties( + numQueueGroups); + ZE_CALL(zeDeviceGetCommandQueueGroupProperties, + (PiSubDevice->ZeDevice, &numQueueGroups, QueueProperties.data())); + + for (uint32_t i = 0; i < numQueueGroups; i++) { + if (QueueProperties[i].flags & + ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + Ordinals.push_back(i); + AllQueueProperties[i] = QueueProperties[i]; + } + } + + if (Ordinals.empty()) { + return PI_ERROR_UNKNOWN; } // Create PI sub-sub-devices with the sub-device for all the ordinals @@ -1613,8 +1595,7 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { // TODO: check if a device can be it's own parent std::unique_ptr<_pi_device> PiSubSubDevice( new _pi_device(ZeSubdevices[I], this, PiSubDevice.get())); - pi_result Result = initializeWithOrdinal( - PiSubSubDevice.get(), Ordinals[J], AllQueueProperties); + pi_result Result = PiSubSubDevice->initialize(Ordinals[J], AllQueueProperties[Ordinals[J]]); if (Result != PI_SUCCESS) { return Result; } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 318adf439773..3e56662126fa 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -299,7 +299,8 @@ struct _pi_device : _pi_object { bool hasCopyEngine() const { return ZeCopyQueueGroupIndex >= 0; } // Initialize the entire PI device. - pi_result initialize(); + pi_result initialize(int SubSubDeviceOrdinal = -1, + ze_command_queue_group_properties_t SubSubDeviceQueueProperties = {}); // Level Zero device handle. ze_device_handle_t ZeDevice; From 89fdf06f73f37c08bd82a0156ec1eea2d144e2e9 Mon Sep 17 00:00:00 2001 From: rehana begam Date: Thu, 8 Jul 2021 19:16:56 -0700 Subject: [PATCH 07/18] clang formatted. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 19 ++++++++++++------- sycl/plugins/level_zero/pi_level_zero.hpp | 5 +++-- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index f3da4deaab50..24e3d46f3aee 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -529,13 +529,15 @@ createEventAndAssociateQueue(pi_queue Queue, pi_event *Event, return PI_SUCCESS; } -pi_result _pi_device::initialize(int SubSubDeviceOrdinal, - ze_command_queue_group_properties_t SubSubDeviceQueueProperties) { +pi_result _pi_device::initialize( + int SubSubDeviceOrdinal, + ze_command_queue_group_properties_t SubSubDeviceQueueProperties) { + // initialize a sub-sub-devices with it's own Ordinal if (SubSubDeviceOrdinal >= 0) { ZeComputeQueueGroupIndex = SubSubDeviceOrdinal; ZeComputeQueueGroupProperties = SubSubDeviceQueueProperties; - // Cache device properties + // Cache sub-sub-device properties ZeDeviceProperties = {}; ZE_CALL(zeDeviceGetProperties, (ZeDevice, &(ZeDeviceProperties))); ZeDeviceComputeProperties = {}; @@ -1565,7 +1567,8 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { // collect all the ordinals for the sub-sub-devices std::vector Ordinals; - std::unordered_map AllQueueProperties; + std::unordered_map + AllQueueProperties; uint32_t numQueueGroups = 0; ZE_CALL(zeDeviceGetCommandQueueGroupProperties, @@ -1575,8 +1578,9 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { } std::vector QueueProperties( numQueueGroups); - ZE_CALL(zeDeviceGetCommandQueueGroupProperties, - (PiSubDevice->ZeDevice, &numQueueGroups, QueueProperties.data())); + ZE_CALL( + zeDeviceGetCommandQueueGroupProperties, + (PiSubDevice->ZeDevice, &numQueueGroups, QueueProperties.data())); for (uint32_t i = 0; i < numQueueGroups; i++) { if (QueueProperties[i].flags & @@ -1595,7 +1599,8 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { // TODO: check if a device can be it's own parent std::unique_ptr<_pi_device> PiSubSubDevice( new _pi_device(ZeSubdevices[I], this, PiSubDevice.get())); - pi_result Result = PiSubSubDevice->initialize(Ordinals[J], AllQueueProperties[Ordinals[J]]); + pi_result Result = PiSubSubDevice->initialize( + Ordinals[J], AllQueueProperties[Ordinals[J]]); if (Result != PI_SUCCESS) { return Result; } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 3e56662126fa..4a925f4c5c34 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -299,8 +299,9 @@ struct _pi_device : _pi_object { bool hasCopyEngine() const { return ZeCopyQueueGroupIndex >= 0; } // Initialize the entire PI device. - pi_result initialize(int SubSubDeviceOrdinal = -1, - ze_command_queue_group_properties_t SubSubDeviceQueueProperties = {}); + pi_result initialize( + int SubSubDeviceOrdinal = -1, + ze_command_queue_group_properties_t SubSubDeviceQueueProperties = {}); // Level Zero device handle. ze_device_handle_t ZeDevice; From d388a8b261ff5c8073435e80599ece68dc3f5f27 Mon Sep 17 00:00:00 2001 From: rehana begam Date: Thu, 8 Jul 2021 20:46:12 -0700 Subject: [PATCH 08/18] update pi_level_zero.cpp. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 95 ++++++++++------------- sycl/plugins/level_zero/pi_level_zero.hpp | 4 +- 2 files changed, 42 insertions(+), 57 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 24e3d46f3aee..9eabdf379762 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -529,24 +529,7 @@ createEventAndAssociateQueue(pi_queue Queue, pi_event *Event, return PI_SUCCESS; } -pi_result _pi_device::initialize( - int SubSubDeviceOrdinal, - ze_command_queue_group_properties_t SubSubDeviceQueueProperties) { - // initialize a sub-sub-devices with it's own Ordinal - if (SubSubDeviceOrdinal >= 0) { - ZeComputeQueueGroupIndex = SubSubDeviceOrdinal; - ZeComputeQueueGroupProperties = SubSubDeviceQueueProperties; - - // Cache sub-sub-device properties - ZeDeviceProperties = {}; - ZE_CALL(zeDeviceGetProperties, (ZeDevice, &(ZeDeviceProperties))); - ZeDeviceComputeProperties = {}; - ZE_CALL(zeDeviceGetComputeProperties, - (ZeDevice, &(ZeDeviceComputeProperties))); - - return PI_SUCCESS; - } - +pi_result _pi_device::initialize(int SubSubDeviceOrdinal) { uint32_t numQueueGroups = 0; ZE_CALL(zeDeviceGetCommandQueueGroupProperties, (ZeDevice, &numQueueGroups, nullptr)); @@ -559,44 +542,52 @@ pi_result _pi_device::initialize( (ZeDevice, &numQueueGroups, QueueProperties.data())); int ComputeGroupIndex = -1; - for (uint32_t i = 0; i < numQueueGroups; i++) { - if (QueueProperties[i].flags & - ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { - ComputeGroupIndex = i; - break; - } - } - // How is it possible that there are no "compute" capabilities? - if (ComputeGroupIndex < 0) { - return PI_ERROR_UNKNOWN; - } - ZeComputeQueueGroupIndex = ComputeGroupIndex; - ZeComputeQueueGroupProperties = QueueProperties[ComputeGroupIndex]; - int CopyGroupIndex = -1; - const char *CopyEngine = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); - bool UseCopyEngine = (!CopyEngine || (std::stoi(CopyEngine) != 0)); - if (UseCopyEngine) { + // initialize a sub-sub-devices with it's own Ordinal + if (SubSubDeviceOrdinal >= 0) { + ComputeGroupIndex = SubSubDeviceOrdinal; + } else { for (uint32_t i = 0; i < numQueueGroups; i++) { - if (((QueueProperties[i].flags & - ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) && - (QueueProperties[i].flags & - ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY)) { - CopyGroupIndex = i; + if (QueueProperties[i].flags & + ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + ComputeGroupIndex = i; break; } } - if (CopyGroupIndex < 0) - zePrint("NOTE: blitter/copy engine is not available though it was " - "requested\n"); - else - zePrint("NOTE: blitter/copy engine is available\n"); - } - ZeCopyQueueGroupIndex = CopyGroupIndex; - if (CopyGroupIndex >= 0) { - ZeCopyQueueGroupProperties = QueueProperties[CopyGroupIndex]; + // How is it possible that there are no "compute" capabilities? + if (ComputeGroupIndex < 0) { + return PI_ERROR_UNKNOWN; + } + + int CopyGroupIndex = -1; + const char *CopyEngine = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); + bool UseCopyEngine = (!CopyEngine || (std::stoi(CopyEngine) != 0)); + if (UseCopyEngine) { + for (uint32_t i = 0; i < numQueueGroups; i++) { + if (((QueueProperties[i].flags & + ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) && + (QueueProperties[i].flags & + ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY)) { + CopyGroupIndex = i; + break; + } + } + if (CopyGroupIndex < 0) + zePrint("NOTE: blitter/copy engine is not available though it was " + "requested\n"); + else + zePrint("NOTE: blitter/copy engine is available\n"); + } + + ZeCopyQueueGroupIndex = CopyGroupIndex; + if (CopyGroupIndex >= 0) { + ZeCopyQueueGroupProperties = QueueProperties[CopyGroupIndex]; + } } + ZeComputeQueueGroupIndex = ComputeGroupIndex; + ZeComputeQueueGroupProperties = QueueProperties[ComputeGroupIndex]; + // Cache device properties ZeDeviceProperties = {}; ZE_CALL(zeDeviceGetProperties, (ZeDevice, &ZeDeviceProperties)); @@ -1567,8 +1558,6 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { // collect all the ordinals for the sub-sub-devices std::vector Ordinals; - std::unordered_map - AllQueueProperties; uint32_t numQueueGroups = 0; ZE_CALL(zeDeviceGetCommandQueueGroupProperties, @@ -1586,7 +1575,6 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { if (QueueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { Ordinals.push_back(i); - AllQueueProperties[i] = QueueProperties[i]; } } @@ -1599,8 +1587,7 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { // TODO: check if a device can be it's own parent std::unique_ptr<_pi_device> PiSubSubDevice( new _pi_device(ZeSubdevices[I], this, PiSubDevice.get())); - pi_result Result = PiSubSubDevice->initialize( - Ordinals[J], AllQueueProperties[Ordinals[J]]); + pi_result Result = PiSubSubDevice->initialize(Ordinals[J]); if (Result != PI_SUCCESS) { return Result; } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 4a925f4c5c34..03d0631ee664 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -299,9 +299,7 @@ struct _pi_device : _pi_object { bool hasCopyEngine() const { return ZeCopyQueueGroupIndex >= 0; } // Initialize the entire PI device. - pi_result initialize( - int SubSubDeviceOrdinal = -1, - ze_command_queue_group_properties_t SubSubDeviceQueueProperties = {}); + pi_result initialize(int SubSubDeviceOrdinal = -1); // Level Zero device handle. ze_device_handle_t ZeDevice; From 485ecaf89e398420bb40ebae5b7e2b9ccd86283f Mon Sep 17 00:00:00 2001 From: rehana begam Date: Thu, 8 Jul 2021 20:49:10 -0700 Subject: [PATCH 09/18] clang formatted. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 9eabdf379762..3647c2cd9efe 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -567,7 +567,7 @@ pi_result _pi_device::initialize(int SubSubDeviceOrdinal) { if (((QueueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) && (QueueProperties[i].flags & - ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY)) { + ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY)) { CopyGroupIndex = i; break; } From fa22bcd575b29ed915163f8cefb5b6058210b806 Mon Sep 17 00:00:00 2001 From: rehana begam Date: Thu, 15 Jul 2021 13:30:40 -0700 Subject: [PATCH 10/18] update max sub-device count. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 18 ++++++++++++++---- sycl/source/detail/device_impl.cpp | 6 +++--- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 3647c2cd9efe..2dc4fa168aaf 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1828,6 +1828,14 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, return ReturnValue(Device->Platform->ZeDriverApiVersion.c_str()); case PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: { uint32_t ZeSubDeviceCount = 0; + if (Device->isSubDevice()) { + pi_result Res = Device->Platform->populateDeviceCacheIfNeeded(); + if (Res != PI_SUCCESS) { + return Res; + } + + return ReturnValue(pi_uint32{(unsigned int)(Device->SubDevices.size())}); + } ZE_CALL(zeDeviceGetSubDevices, (ZeDevice, &ZeSubDeviceCount, nullptr)); return ReturnValue(pi_uint32{ZeSubDeviceCount}); } @@ -1836,10 +1844,12 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, case PI_DEVICE_INFO_PARTITION_PROPERTIES: { // SYCL spec says: if this SYCL device cannot be partitioned into at least // two sub devices then the returned vector must be empty. - uint32_t ZeSubDeviceCount = 0; - ZE_CALL(zeDeviceGetSubDevices, (ZeDevice, &ZeSubDeviceCount, nullptr)); - if (ZeSubDeviceCount < 2) { - return ReturnValue(pi_device_partition_property{0}); + if (!Device->isSubDevice()) { + uint32_t ZeSubDeviceCount = 0; + ZE_CALL(zeDeviceGetSubDevices, (ZeDevice, &ZeSubDeviceCount, nullptr)); + if (ZeSubDeviceCount < 2) { + return ReturnValue(pi_device_partition_property{0}); + } } // It is debatable if SYCL sub-device and partitioning APIs sufficient to // expose Level Zero sub-devices? We start with support of diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 6d69747a55d6..8446dbe7127a 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -199,9 +199,9 @@ std::vector device_impl::create_sub_devices( !is_affinity_supported(AffinityDomain)) { throw cl::sycl::feature_not_supported(); } - const cl_device_partition_property Properties[3] = { - CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, - (cl_device_partition_property)AffinityDomain, 0}; + const pi_device_partition_property Properties[3] = { + PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, + (pi_device_partition_property)AffinityDomain, 0}; size_t SubDevicesCount = get_info(); return create_sub_devices(Properties, SubDevicesCount); } From 6ff74c227c15612b4dc0842f5b8061d1af3316f4 Mon Sep 17 00:00:00 2001 From: rehana begam Date: Thu, 15 Jul 2021 15:30:20 -0700 Subject: [PATCH 11/18] add comments. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 7 ++++--- sycl/plugins/level_zero/pi_level_zero.hpp | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 2dc4fa168aaf..74b294c5f093 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -543,7 +543,7 @@ pi_result _pi_device::initialize(int SubSubDeviceOrdinal) { int ComputeGroupIndex = -1; - // initialize a sub-sub-devices with it's own Ordinal + // initialize a sub-sub-devices with its own Ordinal if (SubSubDeviceOrdinal >= 0) { ComputeGroupIndex = SubSubDeviceOrdinal; } else { @@ -1582,9 +1582,10 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { return PI_ERROR_UNKNOWN; } - // Create PI sub-sub-devices with the sub-device for all the ordinals + // Create PI sub-sub-devices with the sub-device for all the ordinals. + // A {sub-device, ordinal} points to a specific CCS which constructs + // a sub-sub-device at this point. for (uint32_t J = 0; J < Ordinals.size(); ++J) { - // TODO: check if a device can be it's own parent std::unique_ptr<_pi_device> PiSubSubDevice( new _pi_device(ZeSubdevices[I], this, PiSubDevice.get())); pi_result Result = PiSubSubDevice->initialize(Ordinals[J]); diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 03d0631ee664..d3574d5946bd 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -299,6 +299,8 @@ struct _pi_device : _pi_object { bool hasCopyEngine() const { return ZeCopyQueueGroupIndex >= 0; } // Initialize the entire PI device. + // Optional param `SubSubDeviceOrdinal` is the compute command queue index + // used to initialize sub-sub-devices. pi_result initialize(int SubSubDeviceOrdinal = -1); // Level Zero device handle. From 0d9dfa61f74050ef36ff743205cf2d0fab799dff Mon Sep 17 00:00:00 2001 From: rehana begam Date: Fri, 16 Jul 2021 19:24:34 -0700 Subject: [PATCH 12/18] add index. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 29 +++++++++++++---------- sycl/plugins/level_zero/pi_level_zero.hpp | 5 +++- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 74b294c5f093..b90fb53862ba 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -529,7 +529,7 @@ createEventAndAssociateQueue(pi_queue Queue, pi_event *Event, return PI_SUCCESS; } -pi_result _pi_device::initialize(int SubSubDeviceOrdinal) { +pi_result _pi_device::initialize(int SubSubDeviceOrdinal, int SubSubDeviceIndex) { uint32_t numQueueGroups = 0; ZE_CALL(zeDeviceGetCommandQueueGroupProperties, (ZeDevice, &numQueueGroups, nullptr)); @@ -546,6 +546,7 @@ pi_result _pi_device::initialize(int SubSubDeviceOrdinal) { // initialize a sub-sub-devices with its own Ordinal if (SubSubDeviceOrdinal >= 0) { ComputeGroupIndex = SubSubDeviceOrdinal; + ZeComputeEngineIndex = SubSubDeviceIndex; } else { for (uint32_t i = 0; i < numQueueGroups; i++) { if (QueueProperties[i].flags & @@ -559,6 +560,8 @@ pi_result _pi_device::initialize(int SubSubDeviceOrdinal) { return PI_ERROR_UNKNOWN; } + ZeComputeEngineIndex = 0; + int CopyGroupIndex = -1; const char *CopyEngine = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); bool UseCopyEngine = (!CopyEngine || (std::stoi(CopyEngine) != 0)); @@ -606,7 +609,7 @@ pi_result _pi_context::initialize() { pi_device Device = SingleRootDevice ? SingleRootDevice : Devices[0]; ZeStruct ZeCommandQueueDesc; ZeCommandQueueDesc.ordinal = Device->ZeComputeQueueGroupIndex; - ZeCommandQueueDesc.index = 0; + ZeCommandQueueDesc.index = Device->ZeComputeEngineIndex; ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; ZE_CALL( zeCommandListCreateImmediate, @@ -1586,16 +1589,18 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { // A {sub-device, ordinal} points to a specific CCS which constructs // a sub-sub-device at this point. for (uint32_t J = 0; J < Ordinals.size(); ++J) { - std::unique_ptr<_pi_device> PiSubSubDevice( - new _pi_device(ZeSubdevices[I], this, PiSubDevice.get())); - pi_result Result = PiSubSubDevice->initialize(Ordinals[J]); - if (Result != PI_SUCCESS) { - return Result; + for (uint32_t K = 0; K < QueueProperties[Ordinals[J]].numQueues; ++K) { + std::unique_ptr<_pi_device> PiSubSubDevice( + new _pi_device(ZeSubdevices[I], this, PiSubDevice.get())); + pi_result Result = PiSubSubDevice->initialize(Ordinals[J], K); + if (Result != PI_SUCCESS) { + return Result; + } + + // save pointers to sub-sub-devices for quick retrieval in the future. + PiSubDevice->SubDevices.push_back(PiSubSubDevice.get()); + PiDevicesCache.push_back(std::move(PiSubSubDevice)); } - - // save pointers to sub-sub-devices for quick retrieval in the future. - PiSubDevice->SubDevices.push_back(PiSubSubDevice.get()); - PiDevicesCache.push_back(std::move(PiSubSubDevice)); } // save pointers to sub-devices for quick retrieval in the future. @@ -2463,7 +2468,7 @@ pi_result piQueueCreate(pi_context Context, pi_device Device, ZeDevice = Device->ZeDevice; ZeStruct ZeCommandQueueDesc; ZeCommandQueueDesc.ordinal = Device->ZeComputeQueueGroupIndex; - ZeCommandQueueDesc.index = 0; + ZeCommandQueueDesc.index = Device->ZeComputeEngineIndex; ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ZE_CALL(zeCommandQueueCreate, diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index d3574d5946bd..02dff7f4260e 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -291,6 +291,9 @@ struct _pi_device : _pi_object { int32_t ZeComputeQueueGroupIndex; int32_t ZeCopyQueueGroupIndex; + // Keep the index of the compute engine + int32_t ZeComputeEngineIndex = 0; + // Cache the properties of the compute/copy queue groups. ZeStruct ZeComputeQueueGroupProperties; ZeStruct ZeCopyQueueGroupProperties; @@ -301,7 +304,7 @@ struct _pi_device : _pi_object { // Initialize the entire PI device. // Optional param `SubSubDeviceOrdinal` is the compute command queue index // used to initialize sub-sub-devices. - pi_result initialize(int SubSubDeviceOrdinal = -1); + pi_result initialize(int SubSubDeviceOrdinal = -1, int SubSubDeviceIndex = -1); // Level Zero device handle. ze_device_handle_t ZeDevice; From a52ca46a313a7e39badab0514e70fc426e60f173 Mon Sep 17 00:00:00 2001 From: rehana begam Date: Fri, 16 Jul 2021 21:33:50 -0700 Subject: [PATCH 13/18] update context initialization. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 30 ++++++++++------------- sycl/plugins/level_zero/pi_level_zero.hpp | 10 +++++++- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index b90fb53862ba..8583226dff78 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1576,7 +1576,7 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { for (uint32_t i = 0; i < numQueueGroups; i++) { if (QueueProperties[i].flags & - ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE && QueueProperties[i].numQueues > 1) { Ordinals.push_back(i); } } @@ -1833,29 +1833,25 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, case PI_DEVICE_INFO_VERSION: return ReturnValue(Device->Platform->ZeDriverApiVersion.c_str()); case PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: { - uint32_t ZeSubDeviceCount = 0; - if (Device->isSubDevice()) { - pi_result Res = Device->Platform->populateDeviceCacheIfNeeded(); - if (Res != PI_SUCCESS) { - return Res; - } - - return ReturnValue(pi_uint32{(unsigned int)(Device->SubDevices.size())}); + pi_result Res = Device->Platform->populateDeviceCacheIfNeeded(); + if (Res != PI_SUCCESS) { + return Res; } - ZE_CALL(zeDeviceGetSubDevices, (ZeDevice, &ZeSubDeviceCount, nullptr)); - return ReturnValue(pi_uint32{ZeSubDeviceCount}); + return ReturnValue(pi_uint32{(unsigned int)(Device->SubDevices.size())}); } case PI_DEVICE_INFO_REFERENCE_COUNT: return ReturnValue(pi_uint32{Device->RefCount}); case PI_DEVICE_INFO_PARTITION_PROPERTIES: { // SYCL spec says: if this SYCL device cannot be partitioned into at least // two sub devices then the returned vector must be empty. - if (!Device->isSubDevice()) { - uint32_t ZeSubDeviceCount = 0; - ZE_CALL(zeDeviceGetSubDevices, (ZeDevice, &ZeSubDeviceCount, nullptr)); - if (ZeSubDeviceCount < 2) { - return ReturnValue(pi_device_partition_property{0}); - } + pi_result Res = Device->Platform->populateDeviceCacheIfNeeded(); + if (Res != PI_SUCCESS) { + return Res; + } + + uint32_t ZeSubDeviceCount = Device->SubDevices.size(); + if (ZeSubDeviceCount < 2) { + return ReturnValue(pi_device_partition_property{0}); } // It is debatable if SYCL sub-device and partitioning APIs sufficient to // expose Level Zero sub-devices? We start with support of diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 02dff7f4260e..3893469d1920 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -291,7 +291,7 @@ struct _pi_device : _pi_object { int32_t ZeComputeQueueGroupIndex; int32_t ZeCopyQueueGroupIndex; - // Keep the index of the compute engine + // Keep the index of the compute engine int32_t ZeComputeEngineIndex = 0; // Cache the properties of the compute/copy queue groups. @@ -363,6 +363,14 @@ struct _pi_context : _pi_object { // include root device itself as well) SingleRootDevice = Devices[0]->RootDevice ? Devices[0]->RootDevice : Devices[0]; + + // For context with sub subdevices, the SingleRootDevice might still + // not be the root device. + // Check whether the SingleRootDevice is the subdevice or root device. + if (SingleRootDevice->isSubDevice()) { + SingleRootDevice = SingleRootDevice->RootDevice; + } + for (auto &Device : Devices) { if ((!Device->RootDevice && Device != SingleRootDevice) || (Device->RootDevice && Device->RootDevice != SingleRootDevice)) { From bf50ccd3e1155e8e0cc291f693ac0efaa595c32f Mon Sep 17 00:00:00 2001 From: rehana begam Date: Fri, 16 Jul 2021 22:13:22 -0700 Subject: [PATCH 14/18] clang formatted. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 12 ++++++++---- sycl/plugins/level_zero/pi_level_zero.hpp | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 8583226dff78..8c59b45dd507 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -529,7 +529,8 @@ createEventAndAssociateQueue(pi_queue Queue, pi_event *Event, return PI_SUCCESS; } -pi_result _pi_device::initialize(int SubSubDeviceOrdinal, int SubSubDeviceIndex) { +pi_result _pi_device::initialize(int SubSubDeviceOrdinal, + int SubSubDeviceIndex) { uint32_t numQueueGroups = 0; ZE_CALL(zeDeviceGetCommandQueueGroupProperties, (ZeDevice, &numQueueGroups, nullptr)); @@ -1576,7 +1577,8 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { for (uint32_t i = 0; i < numQueueGroups; i++) { if (QueueProperties[i].flags & - ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE && QueueProperties[i].numQueues > 1) { + ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE && + QueueProperties[i].numQueues > 1) { Ordinals.push_back(i); } } @@ -1589,7 +1591,8 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { // A {sub-device, ordinal} points to a specific CCS which constructs // a sub-sub-device at this point. for (uint32_t J = 0; J < Ordinals.size(); ++J) { - for (uint32_t K = 0; K < QueueProperties[Ordinals[J]].numQueues; ++K) { + for (uint32_t K = 0; K < QueueProperties[Ordinals[J]].numQueues; + ++K) { std::unique_ptr<_pi_device> PiSubSubDevice( new _pi_device(ZeSubdevices[I], this, PiSubDevice.get())); pi_result Result = PiSubSubDevice->initialize(Ordinals[J], K); @@ -1597,7 +1600,8 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { return Result; } - // save pointers to sub-sub-devices for quick retrieval in the future. + // save pointers to sub-sub-devices for quick retrieval in the + // future. PiSubDevice->SubDevices.push_back(PiSubSubDevice.get()); PiDevicesCache.push_back(std::move(PiSubSubDevice)); } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 3893469d1920..03b25ef49dcc 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -304,7 +304,8 @@ struct _pi_device : _pi_object { // Initialize the entire PI device. // Optional param `SubSubDeviceOrdinal` is the compute command queue index // used to initialize sub-sub-devices. - pi_result initialize(int SubSubDeviceOrdinal = -1, int SubSubDeviceIndex = -1); + pi_result initialize(int SubSubDeviceOrdinal = -1, + int SubSubDeviceIndex = -1); // Level Zero device handle. ze_device_handle_t ZeDevice; From 33c4449f3ce84d405754b1617ffab3c4c529c78d Mon Sep 17 00:00:00 2001 From: rehana begam Date: Fri, 16 Jul 2021 22:22:38 -0700 Subject: [PATCH 15/18] add comments. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 2 +- sycl/plugins/level_zero/pi_level_zero.hpp | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 8c59b45dd507..d060a123a1bd 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1588,7 +1588,7 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { } // Create PI sub-sub-devices with the sub-device for all the ordinals. - // A {sub-device, ordinal} points to a specific CCS which constructs + // Each {ordinal, index} points to a specific CCS which constructs // a sub-sub-device at this point. for (uint32_t J = 0; J < Ordinals.size(); ++J) { for (uint32_t K = 0; K < QueueProperties[Ordinals[J]].numQueues; diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 03b25ef49dcc..0533c86f61c2 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -302,8 +302,9 @@ struct _pi_device : _pi_object { bool hasCopyEngine() const { return ZeCopyQueueGroupIndex >= 0; } // Initialize the entire PI device. - // Optional param `SubSubDeviceOrdinal` is the compute command queue index - // used to initialize sub-sub-devices. + // Optional param `SubSubDeviceOrdinal` `SubSubDeviceIndex` are the compute + // command queue ordinal and index respectively, used to initialize + // sub-sub-devices. pi_result initialize(int SubSubDeviceOrdinal = -1, int SubSubDeviceIndex = -1); From 306cc9f47038b4a503e07addeb1c991433698258 Mon Sep 17 00:00:00 2001 From: rehana begam Date: Mon, 19 Jul 2021 17:31:46 -0700 Subject: [PATCH 16/18] fix pre-commit fail. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index d060a123a1bd..2a7968cadd52 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -46,6 +46,12 @@ static const pi_uint32 ZeSerialize = [] { return SerializeModeValue; }(); +static const bool CopyEngineRequested = [] { + const char *CopyEngine = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); + bool UseCopyEngine = (!CopyEngine || (std::stoi(CopyEngine) != 0)); + return UseCopyEngine; +}(); + // This class encapsulates actions taken along with a call to Level Zero API. class ZeCall { private: @@ -564,9 +570,7 @@ pi_result _pi_device::initialize(int SubSubDeviceOrdinal, ZeComputeEngineIndex = 0; int CopyGroupIndex = -1; - const char *CopyEngine = std::getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE"); - bool UseCopyEngine = (!CopyEngine || (std::stoi(CopyEngine) != 0)); - if (UseCopyEngine) { + if (CopyEngineRequested) { for (uint32_t i = 0; i < numQueueGroups; i++) { if (((QueueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) == 0) && @@ -1583,10 +1587,6 @@ pi_result _pi_platform::populateDeviceCacheIfNeeded() { } } - if (Ordinals.empty()) { - return PI_ERROR_UNKNOWN; - } - // Create PI sub-sub-devices with the sub-device for all the ordinals. // Each {ordinal, index} points to a specific CCS which constructs // a sub-sub-device at this point. From 37b114e348163a8645b026b51f3d1f7d3d7f2fb3 Mon Sep 17 00:00:00 2001 From: rehana begam Date: Mon, 19 Jul 2021 17:50:46 -0700 Subject: [PATCH 17/18] update comments. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 2a7968cadd52..de23d74180c2 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -550,11 +550,11 @@ pi_result _pi_device::initialize(int SubSubDeviceOrdinal, int ComputeGroupIndex = -1; - // initialize a sub-sub-devices with its own Ordinal + // Initialize a sub-sub-device with its own ordinal and index if (SubSubDeviceOrdinal >= 0) { ComputeGroupIndex = SubSubDeviceOrdinal; ZeComputeEngineIndex = SubSubDeviceIndex; - } else { + } else { // This is a root or a sub-device for (uint32_t i = 0; i < numQueueGroups; i++) { if (QueueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { @@ -567,6 +567,7 @@ pi_result _pi_device::initialize(int SubSubDeviceOrdinal, return PI_ERROR_UNKNOWN; } + // The index for a root or a sub-device is always 0. ZeComputeEngineIndex = 0; int CopyGroupIndex = -1; From 35279825b66e9e55e68eaf9e609c2739c6dd77cd Mon Sep 17 00:00:00 2001 From: rehana begam Date: Mon, 19 Jul 2021 17:56:34 -0700 Subject: [PATCH 18/18] clang formatted. Signed-off-by: rehana begam --- sycl/plugins/level_zero/pi_level_zero.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index de23d74180c2..ea06b94c0aee 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -554,7 +554,7 @@ pi_result _pi_device::initialize(int SubSubDeviceOrdinal, if (SubSubDeviceOrdinal >= 0) { ComputeGroupIndex = SubSubDeviceOrdinal; ZeComputeEngineIndex = SubSubDeviceIndex; - } else { // This is a root or a sub-device + } else { // This is a root or a sub-device for (uint32_t i = 0; i < numQueueGroups; i++) { if (QueueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {