diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index 8cff5b2848b0f..318059da5eaf9 100755 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -4,7 +4,7 @@ if (NOT DEFINED UNIFIED_RUNTIME_LIBRARY OR NOT DEFINED UNIFIED_RUNTIME_INCLUDE_D include(FetchContent) set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - set(UNIFIED_RUNTIME_TAG 4a9e53b0d7b15d9b0239864d13999f32e6c73bac) + set(UNIFIED_RUNTIME_TAG 4136fbb19c37a8aa9d368559a738e2e7cc35033e) message(STATUS "Will fetch Unified Runtime from ${UNIFIED_RUNTIME_REPO}") FetchContent_Declare(unified-runtime diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 2c53321728b12..fbff734ecaf60 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -287,35 +287,34 @@ inline pi_result ur2piDeviceInfoValue(ur_device_info_t ParamName, return Value.convertBitSet(ConvertFunc); } else if (ParamName == UR_DEVICE_INFO_PARTITION_TYPE) { - auto ConvertFunc = [](ur_device_partition_property_t UrValue) { - switch (UrValue) { - case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: + auto ConvertFunc = [](ur_device_partition_t UrValue) { + if (UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN == UrValue) return PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; - case UR_DEVICE_PARTITION_BY_CSLICE: + else if (UR_DEVICE_PARTITION_BY_CSLICE == UrValue) return PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE; - case (ur_device_partition_property_t) - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE: + else if ((ur_device_partition_t) + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE == UrValue) return (pi_device_partition_property) PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; - default: - die("UR_DEVICE_INFO_PARTITION_TYPE: unhandled value"); - } + die("UR_DEVICE_INFO_PARTITION_TYPE: unhandled value"); }; - return Value.convertArray(ConvertFunc); - } else if (ParamName == UR_DEVICE_INFO_PARTITION_PROPERTIES) { - auto ConvertFunc = [](ur_device_partition_property_t UrValue) { + return Value + .convertArray( + ConvertFunc); + } else if (ParamName == UR_DEVICE_INFO_SUPPORTED_PARTITIONS) { + auto ConvertFunc = [](ur_device_partition_t UrValue) { switch (UrValue) { case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: return PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; case UR_DEVICE_PARTITION_BY_CSLICE: return PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE; default: - die("UR_DEVICE_INFO_PARTITION_PROPERTIES: unhandled value"); + die("UR_DEVICE_INFO_SUPPORTED_PARTITIONS: unhandled value"); } }; - return Value.convertArray(ConvertFunc); + return Value + .convertArray( + ConvertFunc); } else if (ParamName == UR_DEVICE_INFO_LOCAL_MEM_TYPE) { auto ConvertFunc = [](ur_device_local_mem_type_t UrValue) { switch (UrValue) { @@ -774,7 +773,7 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, InfoType = UR_DEVICE_INFO_REFERENCE_COUNT; break; case PI_DEVICE_INFO_PARTITION_PROPERTIES: - InfoType = UR_DEVICE_INFO_PARTITION_PROPERTIES; + InfoType = UR_DEVICE_INFO_SUPPORTED_PARTITIONS; break; case PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: InfoType = UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN; @@ -1081,7 +1080,7 @@ inline pi_result piDevicePartition( if (!Properties || !Properties[0]) return PI_ERROR_INVALID_VALUE; - ur_device_partition_property_t Property; + ur_device_partition_t Property; switch (Properties[0]) { case PI_DEVICE_PARTITION_EQUALLY: Property = UR_DEVICE_PARTITION_EQUALLY; @@ -1121,12 +1120,20 @@ inline pi_result piDevicePartition( // TODO: correctly terminate the UR properties, see: // https://github.com/oneapi-src/unified-runtime/issues/183 // - ur_device_partition_property_t UrProperties[] = { - ur_device_partition_property_t(Property), Value, 0}; + ur_device_partition_property_t UrProperty; + UrProperty.type = Property; + UrProperty.value.equally = Value; + + ur_device_partition_properties_t UrProperties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + &UrProperty, + 1, + }; auto UrDevice = reinterpret_cast(Device); auto UrSubDevices = reinterpret_cast(SubDevices); - HANDLE_ERRORS(urDevicePartition(UrDevice, UrProperties, NumEntries, + HANDLE_ERRORS(urDevicePartition(UrDevice, &UrProperties, NumEntries, UrSubDevices, NumSubDevices)); return PI_SUCCESS; } @@ -1911,6 +1918,8 @@ inline pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, if (ArgValue) UrMemory = reinterpret_cast(*ArgValue); + ur_kernel_arg_mem_obj_properties_t Properties{}; + // We don't yet know the device where this kernel will next be run on. // Thus we can't know the actual memory allocation that needs to be used. // Remember the memory object being used as an argument for this kernel @@ -1922,7 +1931,8 @@ inline pi_result piextKernelSetArgMemObj(pi_kernel Kernel, pi_uint32 ArgIndex, // ur_kernel_handle_t UrKernel = reinterpret_cast(Kernel); - HANDLE_ERRORS(urKernelSetArgMemObj(UrKernel, ArgIndex, UrMemory)); + HANDLE_ERRORS( + urKernelSetArgMemObj(UrKernel, ArgIndex, &Properties, UrMemory)); return PI_SUCCESS; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp index c364c6f384a49..016454c742bb9 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/device.cpp @@ -690,7 +690,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: { return ReturnValue(0u); } - case UR_DEVICE_INFO_PARTITION_PROPERTIES: { + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { return ReturnValue(static_cast(0u)); } case UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: { @@ -1019,7 +1019,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceRetain(ur_device_handle_t hDevice) { } UR_APIEXPORT ur_result_t UR_APICALL -urDevicePartition(ur_device_handle_t, const ur_device_partition_property_t *, +urDevicePartition(ur_device_handle_t, const ur_device_partition_properties_t *, uint32_t, ur_device_handle_t *, uint32_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp index e1d6f9f9a2cd3..f35d23a85e26f 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/kernel.cpp @@ -295,8 +295,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( - ur_kernel_handle_t hKernel, uint32_t argIndex, ur_mem_handle_t hArgValue) { +UR_APIEXPORT ur_result_t UR_APICALL +urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, + const ur_kernel_arg_mem_obj_properties_t *Properties, + ur_mem_handle_t hArgValue) { UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); diff --git a/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp b/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp index c7258ad241373..f8e806b0626a0 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/cuda/ur_interface_loader.cpp @@ -200,7 +200,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( if (UR_RESULT_SUCCESS != result) { return result; } - pDdiTable->pfnGetLastResult = urGetLastResult; pDdiTable->pfnInit = urInit; pDdiTable->pfnTearDown = urTearDown; return UR_RESULT_SUCCESS; diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_device.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_device.cpp index dc21c0f79399f..0be520e2928b6 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_device.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_device.cpp @@ -281,7 +281,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( } case UR_DEVICE_INFO_REFERENCE_COUNT: return ReturnValue(uint32_t{Device->RefCount.load()}); - case UR_DEVICE_INFO_PARTITION_PROPERTIES: { + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { // SYCL spec says: if this SYCL device cannot be partitioned into at least // two sub devices then the returned vector must be empty. auto Res = Device->Platform->populateDeviceCacheIfNeeded(); @@ -291,15 +291,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( uint32_t ZeSubDeviceCount = Device->SubDevices.size(); if (ZeSubDeviceCount < 2) { - return ReturnValue((ur_device_partition_property_t)0); + return ReturnValue((ur_device_partition_t)0); } bool PartitionedByCSlice = Device->SubDevices[0]->isCCS(); auto ReturnHelper = [&](auto... Partitions) { struct { - ur_device_partition_property_t Arr[sizeof...(Partitions) + 1]; - } PartitionProperties = { - {Partitions..., ur_device_partition_property_t(0)}}; + ur_device_partition_t Arr[sizeof...(Partitions) + 1]; + } PartitionProperties = {{Partitions..., ur_device_partition_t(0)}}; return ReturnValue(PartitionProperties); }; @@ -324,13 +323,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( case UR_DEVICE_INFO_PARTITION_TYPE: { // For root-device there is no partitioning to report. if (!Device->isSubDevice()) - return ReturnValue(ur_device_partition_property_t(0)); + return ReturnValue(ur_device_partition_t(0)); if (Device->isCCS()) { struct { ur_device_partition_property_t Arr[2]; } PartitionProperties = { - {UR_DEVICE_PARTITION_BY_CSLICE, ur_device_partition_property_t(0)}}; + {UR_DEVICE_PARTITION_BY_CSLICE, ur_device_partition_t(0)}}; return ReturnValue(PartitionProperties); } @@ -338,9 +337,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( ur_device_partition_property_t Arr[3]; } PartitionProperties = { {UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, - (ur_device_partition_property_t) + (ur_device_partition_t) UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE, - ur_device_partition_property_t(0)}}; + ur_device_partition_t(0)}}; return ReturnValue(PartitionProperties); } @@ -1088,9 +1087,8 @@ void ZeUSMImportExtension::doZeUSMRelease(ze_driver_handle_t DriverHandle, UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t Device, ///< [in] handle of the device to partition. - const ur_device_partition_property_t - *Properties, ///< [in] null-terminated array of <$_device_partition_t - ///< enum, value> pairs. + const ur_device_partition_properties_t + *Properties, ///< [in] Device partition properties. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t *OutDevices, ///< [out][optional][range(0, NumDevices)] array of handle @@ -1102,13 +1100,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( ///< according to the partitioning property. ) { // Other partitioning ways are not supported by Level Zero - if (Properties[0] == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { - if ((Properties[1] != UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && - Properties[1] != UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) { + if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN) { + if ((Properties->pProperties->value.affinity_domain != + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE && + Properties->pProperties->value.affinity_domain != + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA)) { return UR_RESULT_ERROR_INVALID_VALUE; } - } else if (Properties[0] == UR_DEVICE_PARTITION_BY_CSLICE) { - if (Properties[1] != 0) { + } else if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { + if (Properties->pProperties->value.affinity_domain != 0) { return UR_RESULT_ERROR_INVALID_VALUE; } } else { @@ -1132,13 +1132,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( // UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING overrides that // still expose CSlices in partitioning by affinity domain for compatibility // reasons. - if (Properties[0] == UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && + if (Properties->pProperties->type == + UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN && !ExposeCSliceInAffinityPartitioning) { if (Device->isSubDevice()) { return 0; } } - if (Properties[0] == UR_DEVICE_PARTITION_BY_CSLICE) { + if (Properties->pProperties->type == UR_DEVICE_PARTITION_BY_CSLICE) { // Not a CSlice-based partitioning. if (!Device->SubDevices[0]->isCCS()) { return 0; diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp index 73111abeb475a..110d40e8695b6 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_kernel.cpp @@ -669,9 +669,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( ur_kernel_handle_t Kernel, ///< [in] handle of the kernel object - uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] + uint32_t ArgIndex, ///< [in] argument index in range [0, num args - 1] + const ur_kernel_arg_mem_obj_properties_t + *Properties, ///< [in][optional] pointer to Memory object properties. ur_mem_handle_t ArgValue ///< [in][optional] handle of Memory object. ) { + std::ignore = Properties; + std::scoped_lock Guard(Kernel->Mutex); // The ArgValue may be a NULL pointer in which case a NULL value is used for // the kernel argument declared as a pointer to global or constant memory. diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_platform.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_platform.cpp index 8f9991d6f726e..1d9b3f6bc6fab 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_platform.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_level_zero_platform.cpp @@ -341,13 +341,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( return UR_RESULT_ERROR_INVALID_VALUE; } -UR_APIEXPORT ur_result_t UR_APICALL urGetLastResult( +UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetLastError( ur_platform_handle_t Platform, ///< [in] handle of the platform instance - const char **Message ///< [out] pointer to a string containing adapter - ///< specific result in string representation. + const char **Message, ///< [out] pointer to a C string where the adapter + ///< specific error message will be stored. + int32_t *Error ///< [out] pointer to an integer where the adapter specific + ///< error code will be stored. ) { std::ignore = Platform; std::ignore = Message; + std::ignore = Error; urPrint("[UR][L0] %s function not implemented!\n", __FUNCTION__); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_loader_interface.cpp b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_loader_interface.cpp index 0e2c5bc85bf71..280c9d025d702 100644 --- a/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_loader_interface.cpp +++ b/sycl/plugins/unified_runtime/ur/adapters/level_zero/ur_loader_interface.cpp @@ -32,7 +32,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( } pDdiTable->pfnInit = urInit; - pDdiTable->pfnGetLastResult = urGetLastResult; pDdiTable->pfnTearDown = urTearDown; return retVal; @@ -183,6 +182,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( pDdiTable->pfnCreateWithNativeHandle = urPlatformCreateWithNativeHandle; pDdiTable->pfnGetApiVersion = urPlatformGetApiVersion; pDdiTable->pfnGetBackendOption = urPlatformGetBackendOption; + pDdiTable->pfnGetLastError = urPlatformGetLastError; return retVal; }