diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 8e5c59ef0bdea..33368f2b9c197 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -28,6 +28,9 @@ set(UR_BUILD_TESTS "${SYCL_UR_BUILD_TESTS}" CACHE BOOL "" FORCE) # UR tests require the examples to be built set(UR_BUILD_EXAMPLES "${SYCL_UR_BUILD_TESTS}" CACHE BOOL "" FORCE) +option(SYCL_UR_FORMAT_CPP_STYLE "Format code style of UR C++ sources" OFF) +set(UR_FORMAT_CPP_STYLE "${SYCL_UR_FORMAT_CPP_STYLE}" CACHE BOOL "" FORCE) + # Here we override the defaults to unified-runtime set(UR_BUILD_XPTI_LIBS OFF CACHE BOOL "") set(UR_ENABLE_SYMBOLIZER ON CACHE BOOL "Enable symbolizer for sanitizer layer.") diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 4be6ee9ac60f8..150f0d3ee90cc 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -365,22 +365,26 @@ void GetCapabilitiesIntersectionSet(const std::vector &Devices, // We're under sycl/source and these won't be exported but it's way more // convenient to be able to reference them without extra `detail::`. -inline auto get_ur_handles(sycl::detail::context_impl &Ctx) { +inline auto get_ur_handles(const detail::context_impl &Ctx) { ur_context_handle_t urCtx = Ctx.getHandleRef(); return std::tuple{urCtx, &Ctx.getAdapter()}; } -inline auto get_ur_handles(const sycl::context &syclContext) { - return get_ur_handles(*sycl::detail::getSyclObjImpl(syclContext)); +inline auto get_ur_handles(const context &syclContext) { + return get_ur_handles(*detail::getSyclObjImpl(syclContext)); } -inline auto get_ur_handles(const sycl::device &syclDevice, - const sycl::context &syclContext) { +inline auto get_ur_handles(const detail::device_impl &syclDevice, + const detail::context_impl &syclContext) { auto [urCtx, Adapter] = get_ur_handles(syclContext); - ur_device_handle_t urDevice = - sycl::detail::getSyclObjImpl(syclDevice)->getHandleRef(); + ur_device_handle_t urDevice = syclDevice.getHandleRef(); return std::tuple{urDevice, urCtx, Adapter}; } -inline auto get_ur_handles(const sycl::device &syclDevice) { - auto &implDevice = *sycl::detail::getSyclObjImpl(syclDevice); +inline auto get_ur_handles(const device &syclDevice, + const context &syclContext) { + return get_ur_handles(*detail::getSyclObjImpl(syclDevice), + *detail::getSyclObjImpl(syclContext)); +} +inline auto get_ur_handles(const device &syclDevice) { + auto &implDevice = *detail::getSyclObjImpl(syclDevice); ur_device_handle_t urDevice = implDevice.getHandleRef(); return std::tuple{urDevice, &implDevice.getAdapter()}; } diff --git a/sycl/source/detail/graph/memory_pool.cpp b/sycl/source/detail/graph/memory_pool.cpp index 555072b9a4aa7..998d1ef1d7424 100644 --- a/sycl/source/detail/graph/memory_pool.cpp +++ b/sycl/source/detail/graph/memory_pool.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "memory_pool.hpp" +#include "detail/virtual_mem.hpp" #include "graph_impl.hpp" #include @@ -39,10 +40,12 @@ void *graph_mem_pool::malloc(size_t Size, usm::alloc AllocType, switch (AllocType) { case usm::alloc::device: { - context_impl &CtxImpl = *getSyclObjImpl(MContext); - adapter_impl &Adapter = CtxImpl.getAdapter(); + const context_impl &CtxImpl = *getSyclObjImpl(MContext); + const adapter_impl &Adapter = CtxImpl.getAdapter(); + const device_impl &DeviceImpl = *getSyclObjImpl(MDevice); - size_t Granularity = get_mem_granularity(MDevice, MContext); + const size_t Granularity = get_mem_granularity_for_allocation_size( + DeviceImpl, CtxImpl, granularity_mode::recommended, Size); uintptr_t StartPtr = 0; size_t AlignedSize = alignByteSize(Size, Granularity); // See if we can find an allocation to reuse diff --git a/sycl/source/detail/graph/memory_pool.hpp b/sycl/source/detail/graph/memory_pool.hpp index 73ef5fd436634..aa7c0a9cb1bac 100644 --- a/sycl/source/detail/graph/memory_pool.hpp +++ b/sycl/source/detail/graph/memory_pool.hpp @@ -12,7 +12,7 @@ #include // For physical_mem_impl #include // For context #include // For device -#include // For get_mem_granularity +#include // For unmap namespace sycl { inline namespace _V1 { diff --git a/sycl/source/detail/virtual_mem.hpp b/sycl/source/detail/virtual_mem.hpp new file mode 100644 index 0000000000000..719ee83f54fbf --- /dev/null +++ b/sycl/source/detail/virtual_mem.hpp @@ -0,0 +1,25 @@ +//==---------------- virtual_mem.hpp ---------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +size_t +get_mem_granularity_for_allocation_size(const detail::device_impl &SyclDevice, + const detail::context_impl &SyclContext, + granularity_mode Mode, + size_t AllocationSize); + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/virtual_mem.cpp b/sycl/source/virtual_mem.cpp index ea5550ee4db9b..9150e8a50dae2 100644 --- a/sycl/source/virtual_mem.cpp +++ b/sycl/source/virtual_mem.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include // System headers for querying page-size. #ifdef _WIN32 @@ -22,9 +22,11 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi::experimental { -__SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice, - const context &SyclContext, - granularity_mode Mode) { +size_t +get_mem_granularity_for_allocation_size(const detail::device_impl &SyclDevice, + const detail::context_impl &SyclContext, + granularity_mode Mode, + const size_t AllocationSize) { if (!SyclDevice.has(aspect::ext_oneapi_virtual_mem)) throw sycl::exception( sycl::make_error_code(sycl::errc::feature_not_supported), @@ -45,13 +47,15 @@ __SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice, #ifndef NDEBUG size_t InfoOutputSize = 0; Adapter->call( - urCtx, urDevice, GranularityQuery, 0u, nullptr, &InfoOutputSize); + urCtx, urDevice, AllocationSize, GranularityQuery, 0u, nullptr, + &InfoOutputSize); assert(InfoOutputSize == sizeof(size_t) && "Unexpected output size of granularity info query."); #endif // NDEBUG size_t Granularity = 0; Adapter->call( - urCtx, urDevice, GranularityQuery, sizeof(size_t), &Granularity, nullptr); + urCtx, urDevice, AllocationSize, GranularityQuery, sizeof(size_t), + &Granularity, nullptr); if (Granularity == 0) throw sycl::exception( sycl::make_error_code(sycl::errc::invalid), @@ -59,6 +63,14 @@ __SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice, return Granularity; } +__SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice, + const context &SyclContext, + granularity_mode Mode) { + return get_mem_granularity_for_allocation_size( + *detail::getSyclObjImpl(SyclDevice), *detail::getSyclObjImpl(SyclContext), + Mode, 1); +} + __SYCL_EXPORT size_t get_mem_granularity(const context &SyclContext, granularity_mode Mode) { const std::vector Devices = SyclContext.get_devices(); diff --git a/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_different_sizes.cpp b/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_different_sizes.cpp new file mode 100644 index 0000000000000..009cadf75616a --- /dev/null +++ b/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_different_sizes.cpp @@ -0,0 +1,7 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %if level_zero %{%{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_EXPLICIT + +#include "../Inputs/async_alloc_different_sizes.cpp" diff --git a/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_different_sizes.cpp b/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_different_sizes.cpp new file mode 100644 index 0000000000000..bca37b6ec4f08 --- /dev/null +++ b/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_different_sizes.cpp @@ -0,0 +1,34 @@ +// Tests async allocations with different sizes. + +#include "../../graph_common.hpp" +#include + +void asyncAllocWorksWithSize(size_t Size) { + queue Queue{}; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + void *AsyncPtr = nullptr; + // Add alloc node + auto AllocNode = add_node(Graph, Queue, [&](handler &CGH) { + AsyncPtr = exp_ext::async_malloc(CGH, usm::alloc::device, Size); + }); + + add_node( + Graph, Queue, + [&](handler &CGH) { + depends_on_helper(CGH, AllocNode); + exp_ext::async_free(CGH, AsyncPtr); + }, + AllocNode); + + auto GraphExec = Graph.finalize(); +} + +int main() { + asyncAllocWorksWithSize(1); + asyncAllocWorksWithSize(131); + asyncAllocWorksWithSize(10071); + asyncAllocWorksWithSize(1007177); + asyncAllocWorksWithSize(191439360); +} diff --git a/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_different_sizes.cpp b/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_different_sizes.cpp new file mode 100644 index 0000000000000..cac939c88ab32 --- /dev/null +++ b/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_different_sizes.cpp @@ -0,0 +1,7 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// RUN: %if level_zero %{%{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +#define GRAPH_E2E_RECORD_REPLAY + +#include "../Inputs/async_alloc_different_sizes.cpp" \ No newline at end of file diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 577bb4d5b2c89..8baf407095795 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -4993,6 +4993,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( /// device is null then the granularity is suitable for all devices in /// context. ur_device_handle_t hDevice, + /// [in] allocation size in bytes for which the alignment is being + /// queried. + size_t allocationSize, /// [in] type of the info to query. ur_virtual_mem_granularity_info_t propName, /// [in] size in bytes of the memory pointed to by pPropValue. @@ -15324,6 +15327,7 @@ typedef struct ur_loader_init_params_t { typedef struct ur_virtual_mem_granularity_get_info_params_t { ur_context_handle_t *phContext; ur_device_handle_t *phDevice; + size_t *pallocationSize; ur_virtual_mem_granularity_info_t *ppropName; size_t *ppropSize; void **ppPropValue; diff --git a/unified-runtime/include/ur_ddi.h b/unified-runtime/include/ur_ddi.h index cb944b6c395d6..5f58d4c560fe3 100644 --- a/unified-runtime/include/ur_ddi.h +++ b/unified-runtime/include/ur_ddi.h @@ -1834,8 +1834,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetUsmP2PExpProcAddrTable_t)( /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urVirtualMemGranularityGetInfo typedef ur_result_t(UR_APICALL *ur_pfnVirtualMemGranularityGetInfo_t)( - ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, - size_t, void *, size_t *); + ur_context_handle_t, ur_device_handle_t, size_t, + ur_virtual_mem_granularity_info_t, size_t, void *, size_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urVirtualMemReserve diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 7fc43237a2fbd..c7dc701db3624 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -20319,6 +20319,11 @@ inline std::ostream &operator<<( ur::details::printPtr(os, *(params->phDevice)); + os << ", "; + os << ".allocationSize = "; + + os << *(params->pallocationSize); + os << ", "; os << ".propName = "; diff --git a/unified-runtime/scripts/core/virtual_memory.yml b/unified-runtime/scripts/core/virtual_memory.yml index 61fca47d1b457..ec34ca4895bb2 100644 --- a/unified-runtime/scripts/core/virtual_memory.yml +++ b/unified-runtime/scripts/core/virtual_memory.yml @@ -41,6 +41,9 @@ params: [in][optional] is the device to get the granularity from, if the device is null then the granularity is suitable for all devices in context. + - type: size_t + name: allocationSize + desc: "[in] allocation size in bytes for which the alignment is being queried." - type: $x_virtual_mem_granularity_info_t name: propName desc: "[in] type of the info to query." diff --git a/unified-runtime/source/adapters/cuda/virtual_mem.cpp b/unified-runtime/source/adapters/cuda/virtual_mem.cpp index 29908ad1d4fd7..38f70e031dbca 100644 --- a/unified-runtime/source/adapters/cuda/virtual_mem.cpp +++ b/unified-runtime/source/adapters/cuda/virtual_mem.cpp @@ -18,6 +18,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( ur_context_handle_t, ur_device_handle_t hDevice, + [[maybe_unused]] size_t allocationSize, ur_virtual_mem_granularity_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); diff --git a/unified-runtime/source/adapters/hip/virtual_mem.cpp b/unified-runtime/source/adapters/hip/virtual_mem.cpp index 12cf9f838ed9c..1effbbfa06357 100644 --- a/unified-runtime/source/adapters/hip/virtual_mem.cpp +++ b/unified-runtime/source/adapters/hip/virtual_mem.cpp @@ -14,8 +14,8 @@ #include "physical_mem.hpp" UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( - ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, - size_t, void *, size_t *) { + ur_context_handle_t, ur_device_handle_t, size_t, + ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp index 5e9fad25cbf55..bbbe1fce9690a 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp @@ -165,8 +165,8 @@ ur_result_t urUSMPoolGetInfo(ur_usm_pool_handle_t hPool, void *pPropValue, size_t *pPropSizeRet); ur_result_t urVirtualMemGranularityGetInfo( ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_virtual_mem_granularity_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet); + size_t allocationSize, ur_virtual_mem_granularity_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet); ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, const void *pStart, size_t size, void **ppStart); diff --git a/unified-runtime/source/adapters/level_zero/virtual_mem.cpp b/unified-runtime/source/adapters/level_zero/virtual_mem.cpp index f61c8fd43fe2f..0488d2102318c 100644 --- a/unified-runtime/source/adapters/level_zero/virtual_mem.cpp +++ b/unified-runtime/source/adapters/level_zero/virtual_mem.cpp @@ -23,8 +23,8 @@ namespace ur::level_zero { ur_result_t urVirtualMemGranularityGetInfo( ur_context_handle_t hContext, ur_device_handle_t hDevice, - ur_virtual_mem_granularity_info_t propName, size_t propSize, - void *pPropValue, size_t *pPropSizeRet) { + size_t allocationSize, ur_virtual_mem_granularity_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: @@ -34,7 +34,8 @@ ur_result_t urVirtualMemGranularityGetInfo( // aligned size. size_t PageSize; ZE2UR_CALL(zeVirtualMemQueryPageSize, - (hContext->getZeHandle(), hDevice->ZeDevice, 1, &PageSize)); + (hContext->getZeHandle(), hDevice->ZeDevice, allocationSize, + &PageSize)); return ReturnValue(PageSize); } default: diff --git a/unified-runtime/source/adapters/mock/ur_mockddi.cpp b/unified-runtime/source/adapters/mock/ur_mockddi.cpp index 0a4a45d0898b2..4b88e0fbaf7cb 100644 --- a/unified-runtime/source/adapters/mock/ur_mockddi.cpp +++ b/unified-runtime/source/adapters/mock/ur_mockddi.cpp @@ -2729,6 +2729,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( /// device is null then the granularity is suitable for all devices in /// context. ur_device_handle_t hDevice, + /// [in] allocation size in bytes for which the alignment is being + /// queried. + size_t allocationSize, /// [in] type of the info to query. ur_virtual_mem_granularity_info_t propName, /// [in] size in bytes of the memory pointed to by pPropValue. @@ -2744,7 +2747,8 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( ur_result_t result = UR_RESULT_SUCCESS; ur_virtual_mem_granularity_get_info_params_t params = { - &hContext, &hDevice, &propName, &propSize, &pPropValue, &pPropSizeRet}; + &hContext, &hDevice, &allocationSize, &propName, + &propSize, &pPropValue, &pPropSizeRet}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback( diff --git a/unified-runtime/source/adapters/native_cpu/virtual_mem.cpp b/unified-runtime/source/adapters/native_cpu/virtual_mem.cpp index 131b480ac14b2..6697902564aa9 100644 --- a/unified-runtime/source/adapters/native_cpu/virtual_mem.cpp +++ b/unified-runtime/source/adapters/native_cpu/virtual_mem.cpp @@ -13,8 +13,8 @@ #include "physical_mem.hpp" UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( - ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, - size_t, void *, size_t *) { + ur_context_handle_t, ur_device_handle_t, size_t, + ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/unified-runtime/source/adapters/opencl/virtual_mem.cpp b/unified-runtime/source/adapters/opencl/virtual_mem.cpp index 7c411d9b7b468..c7db068eca057 100644 --- a/unified-runtime/source/adapters/opencl/virtual_mem.cpp +++ b/unified-runtime/source/adapters/opencl/virtual_mem.cpp @@ -13,8 +13,8 @@ #include "physical_mem.hpp" UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( - ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t, - size_t, void *, size_t *) { + ur_context_handle_t, ur_device_handle_t, size_t, + ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index 3539a2d2a5267..f8f7c58bf5c3c 100644 --- a/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -272,10 +272,13 @@ size_t GetKernelPrivateMemorySize(ur_kernel_handle_t Kernel, size_t GetVirtualMemGranularity(ur_context_handle_t Context, ur_device_handle_t Device) { size_t Size; + const size_t allocationSize = + 1; // probably we want to use actual allocation size [[maybe_unused]] auto Result = getContext()->urDdiTable.VirtualMem.pfnGranularityGetInfo( - Context, Device, UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED, - sizeof(Size), &Size, nullptr); + Context, Device, allocationSize, + UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED, sizeof(Size), &Size, + nullptr); assert(Result == UR_RESULT_SUCCESS); return Size; } diff --git a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp index e0d57228e4a61..0abbb7604c57d 100644 --- a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp +++ b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp @@ -2236,6 +2236,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( /// device is null then the granularity is suitable for all devices in /// context. ur_device_handle_t hDevice, + /// [in] allocation size in bytes for which the alignment is being + /// queried. + size_t allocationSize, /// [in] type of the info to query. ur_virtual_mem_granularity_info_t propName, /// [in] size in bytes of the memory pointed to by pPropValue. @@ -2255,7 +2258,8 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; ur_virtual_mem_granularity_get_info_params_t params = { - &hContext, &hDevice, &propName, &propSize, &pPropValue, &pPropSizeRet}; + &hContext, &hDevice, &allocationSize, &propName, + &propSize, &pPropValue, &pPropSizeRet}; uint64_t instance = getContext()->notify_begin(UR_FUNCTION_VIRTUAL_MEM_GRANULARITY_GET_INFO, "urVirtualMemGranularityGetInfo", ¶ms); @@ -2263,8 +2267,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( auto &logger = getContext()->logger; UR_LOG_L(logger, INFO, " ---> urVirtualMemGranularityGetInfo\n"); - ur_result_t result = pfnGranularityGetInfo( - hContext, hDevice, propName, propSize, pPropValue, pPropSizeRet); + ur_result_t result = + pfnGranularityGetInfo(hContext, hDevice, allocationSize, propName, + propSize, pPropValue, pPropSizeRet); getContext()->notify_end(UR_FUNCTION_VIRTUAL_MEM_GRANULARITY_GET_INFO, "urVirtualMemGranularityGetInfo", ¶ms, &result, diff --git a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp index 979eb3ef22746..b61356afd2b35 100644 --- a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp +++ b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp @@ -2182,6 +2182,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( /// device is null then the granularity is suitable for all devices in /// context. ur_device_handle_t hDevice, + /// [in] allocation size in bytes for which the alignment is being + /// queried. + size_t allocationSize, /// [in] type of the info to query. ur_virtual_mem_granularity_info_t propName, /// [in] size in bytes of the memory pointed to by pPropValue. @@ -2228,8 +2231,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( URLOG_CTX_INVALID_REFERENCE(hDevice); } - ur_result_t result = pfnGranularityGetInfo( - hContext, hDevice, propName, propSize, pPropValue, pPropSizeRet); + ur_result_t result = + pfnGranularityGetInfo(hContext, hDevice, allocationSize, propName, + propSize, pPropValue, pPropSizeRet); return result; } diff --git a/unified-runtime/source/loader/ur_ldrddi.cpp b/unified-runtime/source/loader/ur_ldrddi.cpp index d8dfaf8e8a9d4..bc2efdca055c0 100644 --- a/unified-runtime/source/loader/ur_ldrddi.cpp +++ b/unified-runtime/source/loader/ur_ldrddi.cpp @@ -1238,6 +1238,9 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( /// device is null then the granularity is suitable for all devices in /// context. ur_device_handle_t hDevice, + /// [in] allocation size in bytes for which the alignment is being + /// queried. + size_t allocationSize, /// [in] type of the info to query. ur_virtual_mem_granularity_info_t propName, /// [in] size in bytes of the memory pointed to by pPropValue. @@ -1258,8 +1261,8 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( return UR_RESULT_ERROR_UNINITIALIZED; // forward to device-platform - return pfnGranularityGetInfo(hContext, hDevice, propName, propSize, - pPropValue, pPropSizeRet); + return pfnGranularityGetInfo(hContext, hDevice, allocationSize, propName, + propSize, pPropValue, pPropSizeRet); } /////////////////////////////////////////////////////////////////////////////// diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp index 1261145424445..cad6de4dd9c38 100644 --- a/unified-runtime/source/loader/ur_libapi.cpp +++ b/unified-runtime/source/loader/ur_libapi.cpp @@ -2725,6 +2725,9 @@ ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( /// device is null then the granularity is suitable for all devices in /// context. ur_device_handle_t hDevice, + /// [in] allocation size in bytes for which the alignment is being + /// queried. + size_t allocationSize, /// [in] type of the info to query. ur_virtual_mem_granularity_info_t propName, /// [in] size in bytes of the memory pointed to by pPropValue. @@ -2742,8 +2745,8 @@ ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( if (nullptr == pfnGranularityGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - return pfnGranularityGetInfo(hContext, hDevice, propName, propSize, - pPropValue, pPropSizeRet); + return pfnGranularityGetInfo(hContext, hDevice, allocationSize, propName, + propSize, pPropValue, pPropSizeRet); } catch (...) { return exceptionToResult(std::current_exception()); } diff --git a/unified-runtime/source/ur_api.cpp b/unified-runtime/source/ur_api.cpp index cc69811f5782b..426ca95027401 100644 --- a/unified-runtime/source/ur_api.cpp +++ b/unified-runtime/source/ur_api.cpp @@ -2410,6 +2410,9 @@ ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( /// device is null then the granularity is suitable for all devices in /// context. ur_device_handle_t hDevice, + /// [in] allocation size in bytes for which the alignment is being + /// queried. + size_t allocationSize, /// [in] type of the info to query. ur_virtual_mem_granularity_info_t propName, /// [in] size in bytes of the memory pointed to by pPropValue. diff --git a/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index 327728bb5ace3..fa3eb3f4b5483 100644 --- a/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/unified-runtime/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -491,11 +491,11 @@ struct urEnqueueKernelLaunchWithVirtualMemory : uur::urKernelExecutionTest { GTEST_SKIP() << "Virtual memory is not supported."; } + alloc_size = 1024; ASSERT_SUCCESS(urVirtualMemGranularityGetInfo( - context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, + context, device, alloc_size, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, sizeof(granularity), &granularity, nullptr)); - alloc_size = 1024; virtual_page_size = uur::RoundUpToNearestFactor(alloc_size, granularity); ASSERT_SUCCESS(urPhysicalMemCreate(context, device, virtual_page_size, diff --git a/unified-runtime/test/conformance/testing/include/uur/fixtures.h b/unified-runtime/test/conformance/testing/include/uur/fixtures.h index b67eddd8f8182..fff0be4a0107e 100644 --- a/unified-runtime/test/conformance/testing/include/uur/fixtures.h +++ b/unified-runtime/test/conformance/testing/include/uur/fixtures.h @@ -976,9 +976,12 @@ struct urVirtualMemGranularityTest : urContextTest { GTEST_SKIP() << "Virtual memory is not supported."; } + const size_t allocationSize = + 1; // assuming allocations in test are small enough and minimal granularity is used ASSERT_SUCCESS(urVirtualMemGranularityGetInfo( - context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, - sizeof(granularity), &granularity, nullptr)); + context, device, allocationSize, + UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, sizeof(granularity), + &granularity, nullptr)); } size_t granularity; }; @@ -995,10 +998,12 @@ struct urVirtualMemGranularityTestWithParam : urContextTestWithParam { if (!virtual_memory_support) { GTEST_SKIP() << "Virtual memory is not supported."; } - + const size_t allocationSize = + 1; // assuming allocations in test are small and use smallest granularity ASSERT_SUCCESS(urVirtualMemGranularityGetInfo( - this->context, this->device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, - sizeof(granularity), &granularity, nullptr)); + this->context, this->device, allocationSize, + UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, sizeof(granularity), + &granularity, nullptr)); ASSERT_NE(granularity, 0); } diff --git a/unified-runtime/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp b/unified-runtime/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp index 0507b8903a361..cd4e3ed0768e0 100644 --- a/unified-runtime/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp +++ b/unified-runtime/test/conformance/virtual_memory/urVirtualMemGranularityGetInfo.cpp @@ -20,89 +20,96 @@ struct urVirtualMemGranularityGetInfoTest : uur::urContextTest { UUR_INSTANTIATE_DEVICE_TEST_SUITE(urVirtualMemGranularityGetInfoTest); -TEST_P(urVirtualMemGranularityGetInfoTest, SuccessMinimum) { +void urVirtualMemGranularityGetInfoTest_successCase( + ur_context_handle_t context, ur_device_handle_t device, + const ur_virtual_mem_granularity_info_t property_name, + const size_t allocation_size) { size_t property_size = 0; - const ur_virtual_mem_granularity_info_t property_name = - UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM; ASSERT_SUCCESS_OR_OPTIONAL_QUERY( - urVirtualMemGranularityGetInfo(context, device, property_name, 0, nullptr, - &property_size), + urVirtualMemGranularityGetInfo(context, device, allocation_size, + property_name, 0, nullptr, &property_size), property_name); ASSERT_EQ(sizeof(size_t), property_size); size_t property_value = 0; ASSERT_QUERY_RETURNS_VALUE( - urVirtualMemGranularityGetInfo(context, device, property_name, - property_size, &property_value, nullptr), + urVirtualMemGranularityGetInfo(context, device, allocation_size, + property_name, property_size, + &property_value, nullptr), property_value); ASSERT_GT(property_value, 0); } -TEST_P(urVirtualMemGranularityGetInfoTest, SuccessRecommended) { - size_t property_size = 0; - const ur_virtual_mem_granularity_info_t property_name = - UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED; +TEST_P(urVirtualMemGranularityGetInfoTest, SuccessMinimum_smallAllocation) { + urVirtualMemGranularityGetInfoTest_successCase( + context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 1); +} - ASSERT_SUCCESS_OR_OPTIONAL_QUERY( - urVirtualMemGranularityGetInfo(context, device, property_name, 0, nullptr, - &property_size), - property_name); - ASSERT_EQ(sizeof(size_t), property_size); +TEST_P(urVirtualMemGranularityGetInfoTest, SuccessMinimum_largeAllocation) { + urVirtualMemGranularityGetInfoTest_successCase( + context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 191439360); +} - size_t property_value = 0; - ASSERT_QUERY_RETURNS_VALUE( - urVirtualMemGranularityGetInfo(context, device, property_name, - property_size, &property_value, nullptr), - property_value); +TEST_P(urVirtualMemGranularityGetInfoTest, SuccessRecommended_smallAllocation) { + urVirtualMemGranularityGetInfoTest_successCase( + context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 19); +} - ASSERT_GT(property_value, 0); +TEST_P(urVirtualMemGranularityGetInfoTest, SuccessRecommended_largeAllocation) { + urVirtualMemGranularityGetInfoTest_successCase( + context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 211739367); } TEST_P(urVirtualMemGranularityGetInfoTest, InvalidNullHandleContext) { size_t property_size = 0; - ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo( - nullptr, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, - 0, nullptr, &property_size), - UR_RESULT_ERROR_INVALID_NULL_HANDLE); + ASSERT_EQ_RESULT( + urVirtualMemGranularityGetInfo(nullptr, device, 1, + UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 0, + nullptr, &property_size), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); } TEST_P(urVirtualMemGranularityGetInfoTest, InvalidEnumeration) { size_t property_size = 0; ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo( - context, device, + context, device, 1, UR_VIRTUAL_MEM_GRANULARITY_INFO_FORCE_UINT32, 0, nullptr, &property_size), UR_RESULT_ERROR_INVALID_ENUMERATION); } TEST_P(urVirtualMemGranularityGetInfoTest, InvalidNullPointerPropSizeRet) { - ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo( - context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, - 0, nullptr, nullptr), - UR_RESULT_ERROR_INVALID_NULL_POINTER); + ASSERT_EQ_RESULT( + urVirtualMemGranularityGetInfo(context, device, 1, + UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 0, + nullptr, nullptr), + UR_RESULT_ERROR_INVALID_NULL_POINTER); } TEST_P(urVirtualMemGranularityGetInfoTest, InvalidNullPointerPropValue) { - ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo( - context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, - sizeof(size_t), nullptr, nullptr), - UR_RESULT_ERROR_INVALID_NULL_POINTER); + ASSERT_EQ_RESULT( + urVirtualMemGranularityGetInfo(context, device, 1, + UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, + sizeof(size_t), nullptr, nullptr), + UR_RESULT_ERROR_INVALID_NULL_POINTER); } TEST_P(urVirtualMemGranularityGetInfoTest, InvalidPropSizeZero) { size_t minimum = 0; - ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo( - context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, - 0, &minimum, nullptr), - UR_RESULT_ERROR_INVALID_SIZE); + ASSERT_EQ_RESULT( + urVirtualMemGranularityGetInfo(context, device, 1, + UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, 0, + &minimum, nullptr), + UR_RESULT_ERROR_INVALID_SIZE); } TEST_P(urVirtualMemGranularityGetInfoTest, InvalidSizePropSizeSmall) { size_t minimum = 0; - ASSERT_EQ_RESULT(urVirtualMemGranularityGetInfo( - context, device, UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, - sizeof(size_t) - 1, &minimum, nullptr), - UR_RESULT_ERROR_INVALID_SIZE); + ASSERT_EQ_RESULT( + urVirtualMemGranularityGetInfo(context, device, 1, + UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM, + sizeof(size_t) - 1, &minimum, nullptr), + UR_RESULT_ERROR_INVALID_SIZE); }