Skip to content

[SYCL][Graph] async_malloc use allocation size for zeVirtualMemQueryPageSize #19402

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ set(UR_BUILD_TESTS "${SYCL_UR_BUILD_TESTS}" CACHE BOOL "" FORCE)
# UR tests require the examples to be built
set(UR_BUILD_EXAMPLES "${SYCL_UR_BUILD_TESTS}" CACHE BOOL "" FORCE)

option(SYCL_UR_FORMAT_CPP_STYLE "Format code style of UR C++ sources" OFF)
set(UR_FORMAT_CPP_STYLE "${SYCL_UR_FORMAT_CPP_STYLE}" CACHE BOOL "" FORCE)

# Here we override the defaults to unified-runtime
set(UR_BUILD_XPTI_LIBS OFF CACHE BOOL "")
set(UR_ENABLE_SYMBOLIZER ON CACHE BOOL "Enable symbolizer for sanitizer layer.")
Expand Down
22 changes: 13 additions & 9 deletions sycl/source/detail/context_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,22 +365,26 @@ void GetCapabilitiesIntersectionSet(const std::vector<sycl::device> &Devices,

// We're under sycl/source and these won't be exported but it's way more
// convenient to be able to reference them without extra `detail::`.
inline auto get_ur_handles(sycl::detail::context_impl &Ctx) {
inline auto get_ur_handles(const detail::context_impl &Ctx) {
ur_context_handle_t urCtx = Ctx.getHandleRef();
return std::tuple{urCtx, &Ctx.getAdapter()};
}
inline auto get_ur_handles(const sycl::context &syclContext) {
return get_ur_handles(*sycl::detail::getSyclObjImpl(syclContext));
inline auto get_ur_handles(const context &syclContext) {
return get_ur_handles(*detail::getSyclObjImpl(syclContext));
}
inline auto get_ur_handles(const sycl::device &syclDevice,
const sycl::context &syclContext) {
inline auto get_ur_handles(const detail::device_impl &syclDevice,
const detail::context_impl &syclContext) {
auto [urCtx, Adapter] = get_ur_handles(syclContext);
ur_device_handle_t urDevice =
sycl::detail::getSyclObjImpl(syclDevice)->getHandleRef();
ur_device_handle_t urDevice = syclDevice.getHandleRef();
return std::tuple{urDevice, urCtx, Adapter};
}
inline auto get_ur_handles(const sycl::device &syclDevice) {
auto &implDevice = *sycl::detail::getSyclObjImpl(syclDevice);
inline auto get_ur_handles(const device &syclDevice,
const context &syclContext) {
return get_ur_handles(*detail::getSyclObjImpl(syclDevice),
*detail::getSyclObjImpl(syclContext));
}
inline auto get_ur_handles(const device &syclDevice) {
auto &implDevice = *detail::getSyclObjImpl(syclDevice);
ur_device_handle_t urDevice = implDevice.getHandleRef();
return std::tuple{urDevice, &implDevice.getAdapter()};
}
Expand Down
9 changes: 6 additions & 3 deletions sycl/source/detail/graph/memory_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//

#include "memory_pool.hpp"
#include "detail/virtual_mem.hpp"
#include "graph_impl.hpp"

#include <optional>
Expand Down Expand Up @@ -39,10 +40,12 @@ void *graph_mem_pool::malloc(size_t Size, usm::alloc AllocType,
switch (AllocType) {
case usm::alloc::device: {

context_impl &CtxImpl = *getSyclObjImpl(MContext);
adapter_impl &Adapter = CtxImpl.getAdapter();
const context_impl &CtxImpl = *getSyclObjImpl(MContext);
const adapter_impl &Adapter = CtxImpl.getAdapter();
const device_impl &DeviceImpl = *getSyclObjImpl(MDevice);

size_t Granularity = get_mem_granularity(MDevice, MContext);
const size_t Granularity = get_mem_granularity_for_allocation_size(
DeviceImpl, CtxImpl, granularity_mode::recommended, Size);
uintptr_t StartPtr = 0;
size_t AlignedSize = alignByteSize(Size, Granularity);
// See if we can find an allocation to reuse
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/graph/memory_pool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <detail/physical_mem_impl.hpp> // For physical_mem_impl
#include <sycl/context.hpp> // For context
#include <sycl/device.hpp> // For device
#include <sycl/ext/oneapi/virtual_mem/virtual_mem.hpp> // For get_mem_granularity
#include <sycl/ext/oneapi/virtual_mem/virtual_mem.hpp> // For unmap

namespace sycl {
inline namespace _V1 {
Expand Down
25 changes: 25 additions & 0 deletions sycl/source/detail/virtual_mem.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//==---------------- virtual_mem.hpp ---------------------------------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <sycl/ext/oneapi/virtual_mem/virtual_mem.hpp>

namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

size_t
get_mem_granularity_for_allocation_size(const detail::device_impl &SyclDevice,
const detail::context_impl &SyclContext,
granularity_mode Mode,
size_t AllocationSize);

} // namespace ext::oneapi::experimental
} // namespace _V1
} // namespace sycl
24 changes: 18 additions & 6 deletions sycl/source/virtual_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <detail/context_impl.hpp>
#include <detail/device_impl.hpp>
#include <detail/physical_mem_impl.hpp>
#include <sycl/ext/oneapi/virtual_mem/virtual_mem.hpp>
#include <detail/virtual_mem.hpp>

// System headers for querying page-size.
#ifdef _WIN32
Expand All @@ -22,9 +22,11 @@ namespace sycl {
inline namespace _V1 {
namespace ext::oneapi::experimental {

__SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice,
const context &SyclContext,
granularity_mode Mode) {
size_t
get_mem_granularity_for_allocation_size(const detail::device_impl &SyclDevice,
const detail::context_impl &SyclContext,
granularity_mode Mode,
const size_t AllocationSize) {
if (!SyclDevice.has(aspect::ext_oneapi_virtual_mem))
throw sycl::exception(
sycl::make_error_code(sycl::errc::feature_not_supported),
Expand All @@ -45,20 +47,30 @@ __SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice,
#ifndef NDEBUG
size_t InfoOutputSize = 0;
Adapter->call<sycl::detail::UrApiKind::urVirtualMemGranularityGetInfo>(
urCtx, urDevice, GranularityQuery, 0u, nullptr, &InfoOutputSize);
urCtx, urDevice, AllocationSize, GranularityQuery, 0u, nullptr,
&InfoOutputSize);
assert(InfoOutputSize == sizeof(size_t) &&
"Unexpected output size of granularity info query.");
#endif // NDEBUG
size_t Granularity = 0;
Adapter->call<sycl::detail::UrApiKind::urVirtualMemGranularityGetInfo>(
urCtx, urDevice, GranularityQuery, sizeof(size_t), &Granularity, nullptr);
urCtx, urDevice, AllocationSize, GranularityQuery, sizeof(size_t),
&Granularity, nullptr);
if (Granularity == 0)
throw sycl::exception(
sycl::make_error_code(sycl::errc::invalid),
"Unexpected granularity result: memory granularity shouldn't be 0.");
return Granularity;
}

__SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice,
const context &SyclContext,
granularity_mode Mode) {
return get_mem_granularity_for_allocation_size(
*detail::getSyclObjImpl(SyclDevice), *detail::getSyclObjImpl(SyclContext),
Mode, 1);
}

__SYCL_EXPORT size_t get_mem_granularity(const context &SyclContext,
granularity_mode Mode) {
const std::vector<device> Devices = SyclContext.get_devices();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// RUN: %if level_zero %{%{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}

#define GRAPH_E2E_EXPLICIT

#include "../Inputs/async_alloc_different_sizes.cpp"
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Tests async allocations with different sizes.

#include "../../graph_common.hpp"
#include <sycl/ext/oneapi/experimental/async_alloc/async_alloc.hpp>

void asyncAllocWorksWithSize(size_t Size) {
queue Queue{};

exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};

void *AsyncPtr = nullptr;
// Add alloc node
auto AllocNode = add_node(Graph, Queue, [&](handler &CGH) {
AsyncPtr = exp_ext::async_malloc(CGH, usm::alloc::device, Size);
});

add_node(
Graph, Queue,
[&](handler &CGH) {
depends_on_helper(CGH, AllocNode);
exp_ext::async_free(CGH, AsyncPtr);
},
AllocNode);

auto GraphExec = Graph.finalize();
}

int main() {
asyncAllocWorksWithSize(1);
asyncAllocWorksWithSize(131);
asyncAllocWorksWithSize(10071);
asyncAllocWorksWithSize(1007177);
asyncAllocWorksWithSize(191439360);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out
// RUN: %if level_zero %{%{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}

#define GRAPH_E2E_RECORD_REPLAY

#include "../Inputs/async_alloc_different_sizes.cpp"
4 changes: 4 additions & 0 deletions unified-runtime/include/ur_api.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions unified-runtime/include/ur_ddi.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions unified-runtime/include/ur_print.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions unified-runtime/scripts/core/virtual_memory.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ params:
[in][optional] is the device to get the granularity from, if the
device is null then the granularity is suitable for all devices in
context.
- type: size_t
name: allocationSize
desc: "[in] allocation size in bytes for which the alignment is being queried."
- type: $x_virtual_mem_granularity_info_t
name: propName
desc: "[in] type of the info to query."
Expand Down
1 change: 1 addition & 0 deletions unified-runtime/source/adapters/cuda/virtual_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
ur_context_handle_t, ur_device_handle_t hDevice,
[[maybe_unused]] size_t allocationSize,
ur_virtual_mem_granularity_info_t propName, size_t propSize,
void *pPropValue, size_t *pPropSizeRet) {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
Expand Down
4 changes: 2 additions & 2 deletions unified-runtime/source/adapters/hip/virtual_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
#include "physical_mem.hpp"

UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t,
size_t, void *, size_t *) {
ur_context_handle_t, ur_device_handle_t, size_t,
ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions unified-runtime/source/adapters/level_zero/virtual_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ namespace ur::level_zero {

ur_result_t urVirtualMemGranularityGetInfo(
ur_context_handle_t hContext, ur_device_handle_t hDevice,
ur_virtual_mem_granularity_info_t propName, size_t propSize,
void *pPropValue, size_t *pPropSizeRet) {
size_t allocationSize, ur_virtual_mem_granularity_info_t propName,
size_t propSize, void *pPropValue, size_t *pPropSizeRet) {
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
switch (propName) {
case UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM:
Expand All @@ -34,7 +34,8 @@ ur_result_t urVirtualMemGranularityGetInfo(
// aligned size.
size_t PageSize;
ZE2UR_CALL(zeVirtualMemQueryPageSize,
(hContext->getZeHandle(), hDevice->ZeDevice, 1, &PageSize));
(hContext->getZeHandle(), hDevice->ZeDevice, allocationSize,
&PageSize));
return ReturnValue(PageSize);
}
default:
Expand Down
6 changes: 5 additions & 1 deletion unified-runtime/source/adapters/mock/ur_mockddi.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions unified-runtime/source/adapters/native_cpu/virtual_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#include "physical_mem.hpp"

UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t,
size_t, void *, size_t *) {
ur_context_handle_t, ur_device_handle_t, size_t,
ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

Expand Down
4 changes: 2 additions & 2 deletions unified-runtime/source/adapters/opencl/virtual_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#include "physical_mem.hpp"

UR_APIEXPORT ur_result_t UR_APICALL urVirtualMemGranularityGetInfo(
ur_context_handle_t, ur_device_handle_t, ur_virtual_mem_granularity_info_t,
size_t, void *, size_t *) {
ur_context_handle_t, ur_device_handle_t, size_t,
ur_virtual_mem_granularity_info_t, size_t, void *, size_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,10 +272,13 @@ size_t GetKernelPrivateMemorySize(ur_kernel_handle_t Kernel,
size_t GetVirtualMemGranularity(ur_context_handle_t Context,
ur_device_handle_t Device) {
size_t Size;
const size_t allocationSize =
1; // probably we want to use actual allocation size
[[maybe_unused]] auto Result =
getContext()->urDdiTable.VirtualMem.pfnGranularityGetInfo(
Context, Device, UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED,
sizeof(Size), &Size, nullptr);
Context, Device, allocationSize,
UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED, sizeof(Size), &Size,
nullptr);
assert(Result == UR_RESULT_SUCCESS);
return Size;
}
Expand Down
Loading
Loading