diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 38d5f2512a1c4..54357d1377c77 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -82,6 +82,7 @@ def AspectExt_intel_fpga_task_sequence : Aspect<"ext_intel_fpga_task_sequence">; def AspectExt_oneapi_limited_graph : Aspect<"ext_oneapi_limited_graph">; def AspectExt_oneapi_private_alloca : Aspect<"ext_oneapi_private_alloca">; def AspectExt_oneapi_queue_profiling_tag : Aspect<"ext_oneapi_queue_profiling_tag">; +def AspectExt_oneapi_virtual_mem : Aspect<"ext_oneapi_virtual_mem">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; def AspectInt64_extended_atomics : Aspect<"int64_extended_atomics">; @@ -139,7 +140,7 @@ def : TargetInfo<"__TestAspectList", AspectExt_oneapi_ballot_group, AspectExt_oneapi_fixed_size_group, AspectExt_oneapi_opportunistic_group, AspectExt_oneapi_tangle_group, AspectExt_intel_matrix, AspectExt_oneapi_is_composite, AspectExt_oneapi_is_component, AspectExt_oneapi_graph, AspectExt_intel_fpga_task_sequence, AspectExt_oneapi_limited_graph, - AspectExt_oneapi_private_alloca, AspectExt_oneapi_queue_profiling_tag], + AspectExt_oneapi_private_alloca, AspectExt_oneapi_queue_profiling_tag, AspectExt_oneapi_virtual_mem], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc new file mode 100644 index 0000000000000..72a6e1ed14f55 --- /dev/null +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_virtual_mem.asciidoc @@ -0,0 +1,398 @@ += sycl_ext_oneapi_virtual_mem + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] +:endnote: —{nbsp}end{nbsp}note + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2023 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 revision 8 specification. All +references below to the "core SYCL specification" or to section numbers in the +SYCL specification refer to that revision. + + +== Status + +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. +*Shipping software products should not rely on APIs defined in this +specification.* + + +== Backend support status + +The APIs in this extension may be used only on a device that has +`aspect::ext_oneapi_virtual_mem`. The application must check that the devices +in the corresponding context have this aspect before using any of the APIs +introduced in this extension. If the application fails to do this, the +implementation throws a synchronous exception with the +`errc::feature_not_supported` error code. + +== Overview + +This extension adds the notion of "virtual memory ranges" to SYCL, introducing +a way to map an address range onto multiple allocations of physical memory, +allowing users to avoid expensive reallocations and potentially running out of +device memory while relocating the corresponding memory. + + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_ONEAPI_VIRTUAL_MEM` to one of the values defined in the table +below. Applications can test for the existence of this macro to determine if +the implementation supports this feature, or applications can test the macro's +value to determine which of the extension's features the implementation +supports. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. +|=== + + +=== Device aspect + +Support for the features introduced in this extension can be queried using the +new `aspect::ext_oneapi_virtual_mem` defined as: + +```c++ +namespace sycl { + +enum class aspect : /* unspecified */ { + ... + ext_oneapi_virtual_mem +} + +} // namespace sycl +``` + + +=== Memory granularity + +Working with virtual address ranges and the underlying physical memory requires +the user to align and adjust in accordance with a specified minimum granularity. + +The interfaces make the distinction between device granularity, which is the +granularity required for physical memory allocations, and context granularity, +which is the granularity required for virtual memory range reservations. + +The queries provide both a minimum and a recommended granularity. The minimum +device granularity is the smallest granularity that is supported for physical +memory allocations, and the minimum context granularity is the smallest +granularity that is supported from virtual memory range reservations. However, +the recommended granularity may be larger than these minimums and may provide +better performance. + +The interfaces for querying these granularities are defined as: + +```c++ +namespace sycl::ext::oneapi::experimental { + +enum class granularity_mode : /*unspecified*/ { + minimum, + recommended +}; + +size_t get_mem_granularity(const device &syclDevice, const context &syclContext, + granularity_mode mode = granularity_mode::recommended); + +size_t get_mem_granularity(const context &syclContext, + granularity_mode mode = granularity_mode::recommended); + +} // namespace sycl::ext::oneapi::experimental +``` + +[frame="topbot",options="header,footer"] +|===================== +|Function |Description + +|`size_t get_mem_granularity(const device &syclDevice, const context &syclContext, granularity_mode mode = granularity_mode::recommended)` | +Returns the granularity of physical memory allocations on `syclDevice` in the +`syclContext`. The `mode` argument specifies whether the query is for the +minimum or recommended granularity. + +If `syclDevice` does not have `aspect::ext_oneapi_virtual_mem` the call throws +an exception with `errc::feature_not_supported`. + +|`size_t get_mem_granularity(const context &syclContext, granularity_mode mode = granularity_mode::recommended)` | +Returns the granularity of virtual memory range reservations in the +`syclContext`. The `mode` argument specifies whether the query is for the +minimum or recommended granularity. + +If any device in `syclContext` does not have `aspect::ext_oneapi_virtual_mem` +the call throws an exception with `errc::feature_not_supported`. + +|===================== + +=== Reserving virtual address ranges + +Virtual address ranges are represented by a `uintptr_t` and a number of bytes +reserved for it. The `uintptr_t` must be aligned in accordance with the minimum +granularity of the corresponding `context`, as queried through +`get_mem_granularity`, and likewise the number of bytes must be a multiple of +this granularity. It is the responsibility of the user to manage the +constituents of any virtual address range they reserve. + +The interfaces for reserving, freeing, and manipulating the access mode of a +virtual address range are defined as: + +```c++ +namespace sycl::ext::oneapi::experimental { + +uintptr_t reserve_virtual_mem(uintptr_t start, size_t numBytes, const context &syclContext); +uintptr_t reserve_virtual_mem(size_t numBytes, const context &syclContext); + +void free_virtual_mem(uintptr_t ptr, size_t numBytes, const context &syclContext); + +} // namespace sycl::ext::oneapi::experimental +``` + +[frame="topbot",options="header,footer"] +|===================== +|Function |Description + +|`uintptr_t reserve_virtual_mem(uintptr_t start, size_t numBytes, const context &syclContext)` | +Reserves a virtual memory range in `syclContext` with `numBytes` bytes. + +`start` specifies the requested start of the new virtual memory range +reservation. If the implementation is unable to reserve the virtual memory range +at the specified address, the implementation will pick another suitable address. + +`start` must be aligned in accordance with the minimum granularity for +`syclContext`, as returned by a call to `get_mem_granularity`. Likewise, +`numBytes` must be a multiple of the minimum granularity. Attempting to call +this function without meeting these requirements results in undefined behavior. + +If any of the devices in `syclContext` do not have +`aspect::ext_oneapi_virtual_mem` the call throws an exception with +`errc::feature_not_supported`. + +|`uintptr_t reserve_virtual_mem(size_t numBytes, const context &syclContext)` | +Same as `reserve_virtual_mem(0, numBytes, syclContext)`. + +|`void free_virtual_mem(uintptr_t ptr, size_t numBytes, const context &syclContext)` | +Frees a virtual memory range specified by `ptr` and `numBytes`. `ptr` must be +the same as returned by a call to `reserve_virtual_mem` and `numBytes` must be +the same as the size of the range specified in the reservation call. + +The virtual memory range must not currently be mapped to physical memory. A call +to this function with a mapped virtual memory range results in undefined +behavior. + +|===================== + + +=== Physical memory representation + +:crs: https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:reference-semantics + +To represent the underlying physical device memory a virtual address is mapped +to, the `physical_mem` class is added. This new class is defined as: + +```c++ +namespace sycl::ext::oneapi::experimental { + +enum class address_access_mode : /*unspecified*/ { + none, + read, + read_write +}; + +class physical_mem { +public: + physical_mem(const device &syclDevice, const context &syclContext, size_t numBytes); + physical_mem(const queue &syclQueue, size_t numBytes); + + /* -- common interface members -- */ + + void *map(uintptr_t ptr, size_t numBytes, address_access_mode mode, size_t offset = 0) const; + + context get_context() const; + device get_device() const; + + size_t size() const noexcept; +}; + +} // namespace sycl::ext::oneapi::experimental +``` + +`physical_mem` has common reference semantics, as described in +{crs}[section 4.5.2. Common reference semantics]. + +[frame="topbot",options="header,footer"] +|============================ +|Member function |Description + +|`physical_mem(const device &syclDevice, const context &syclContext, size_t numBytes)` | +Constructs a `physical_mem` instance using the `syclDevice` provided. This +device must either be contained by `syclContext` or it must be a descendent +device of some device that is contained by that context, otherwise this function +throws a synchronous exception with the `errc::invalid` error code. + +This will allocate `numBytes` of physical memory on the device. `numBytes` must +be a multiple of the granularity for `syclDevice`, as returned by a call to +`get_mem_granularity`. + +If `syclDevice` does not have `aspect::ext_oneapi_virtual_mem` the call throws +an exception with `errc::feature_not_supported`. + +If the constructor is unable to allocate the required memory on `syclDevice`, +the call throws an exception with `errc::memory_allocation`. + +|`physical_mem(const queue &syclQueue, size_t numBytes)` | +Same as `physical_mem(syclQueue.get_device(), syclQueue.get_context, numBytes)`. + +|`void *map(uintptr_t ptr, size_t numBytes, address_access_mode mode, size_t offset = 0)` | +Maps a virtual memory range, specified by `ptr` and `numBytes`, to the physical +memory corresponding to this instance of `physical_mem`, starting at an offset +of `offset` bytes. + +It is required that `offset + numBytes` is less than or equal to `size()` and +that `ptr`, `numBytes` and `offset` are all multiples of the minimum granularity +for the device associated with this instance of `physical_mem`. + +If `mode` is `address_access_mode::read` or `address_access_mode::read_write` +the returned pointer is accessible after the call as read-only or read-write +respectively. Otherwise, it is considered inaccessible and accessing it will +result in undefined behavior. + +The returned pointer is equivalent to `reinterpret_cast(ptr)`. + +Writing to any address in the virtual memory range with access mode set to +`access_mode::read` results in undefined behavior. + +An accessible pointer behaves the same as a pointer to device USM memory and can +be used in place of a device USM pointer in any interface accepting one. + +A virtual memory range cannot be simultaneously mapped to more than one +physical memory region. Likewise, multiple virtual memory ranges cannot be +mapped onto the same physical memory region. Attempting to violate either of +these restrictions will result in undefined behavior. + +|`context get_context() const` | +Returns the SYCL context associated with the instance of `physical_mem`. + +|`device get_device() const` | +Returns the SYCL device associated with the instance of `physical_mem`. + +|`size_t size() const` | +Returns the size of the corresponding physical memory in bytes. + +|============================ + +Virtual memory address ranges are mapped to the a `physical_mem` through the +`map` member functions, where the access mode can also be specified. +To further get or set the access mode of a mapped virtual address range, the +user does not need to know the associated `physical_mem` and can just call the +following free functions. + +```c++ +namespace sycl::ext::oneapi::experimental { + +void set_access_mode(const void *ptr, size_t numBytes, address_access_mode mode, const context &syclContext); + +address_access_mode get_access_mode(const void *ptr, size_t numBytes, const context &syclContext); + +void unmap(const void *ptr, size_t numBytes, const context &syclContext); + +} // namespace sycl::ext::oneapi::experimental +``` + +[frame="topbot",options="header,footer"] +|===================== +|Function |Description + +|`void set_access_mode(const void *ptr, size_t numBytes, address_access_mode mode, const context &syclContext)` | +Changes the access mode of a mapped virtual memory range specified by `ptr` and +`numBytes`. + +If `mode` is `address_access_mode::read` or `address_access_mode::read_write` +`ptr` pointer is accessible after the call as read-only or read-write +respectively. Otherwise, it is considered inaccessible and accessing it will +result in undefined behavior. + +The virtual memory range specified by `ptr` and `numBytes` must be a sub-range +of virtual memory ranges previously mapped to `physical_mem`. `ptr` +must be aligned to the minimum memory granularity of the device associated with +the `physical_mem` the range is mapped to and `numBytes` must be a multiple of +the minimum memory granularity of the device associated with the `physical_mem` +the range is mapped to. + +Writing to any address in the virtual memory range with access mode set to +`address_access_mode::read` results in undefined behavior. + +An accessible pointer behaves the same as a pointer to device USM memory and can +be used in place of a device USM pointer in any interface accepting one. + +|`address_access_mode get_access_mode(const void *ptr, size_t numBytes, const context &syclContext)` | +Returns the access mode of the mapped virtual memory range specified by `ptr` +and `numBytes`. + +The virtual memory range specified by `ptr` and `numBytes` must be a sub-range +of virtual memory ranges previously mapped to `physical_mem`. `ptr` +must be aligned to the minimum memory granularity of the device associated with +the `physical_mem` the range is mapped to and `numBytes` must be a multiple of +the minimum memory granularity of the device associated with the `physical_mem` +the range is mapped to. + +|`void unmap(const void *ptr, size_t numBytes, const device &syclDevice, const context &syclContext)` | +Unmaps the range specified by `ptr` and `numBytes`. The range must have been +mapped through a call to `physical_mem::map()` prior to calling this. The range +must not be a proper sub-range of a previously mapped range. `syclContext` must +be the same as the context returned by the `get_context()` member function on +the `physical_mem` the address range is currently mapped to. + +After this call, the full range will again be ready to be mapped through a call +to `physical_mem::map()`. + +[_Note:_ Unmapping ranges that span multiple contiguous mapped ranges is not +supported. Doing so will result in undefined behavior. This restriction may be +lifted in the future. _{endnote}_] + +[_Note:_ The destructor for `physical_mem` will not unmap ranges mapped to it. +As such, the user must call `unmap` on ranges mapped to `physical_mem` objects +prior to their destruction. _{endnote}_] + +|===================== \ No newline at end of file diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 995579d612afb..3090b2d488ee0 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -215,4 +215,16 @@ _PI_API(piextDestroyExternalSemaphore) _PI_API(piextWaitExternalSemaphore) _PI_API(piextSignalExternalSemaphore) +// Virtual memory +_PI_API(piextVirtualMemGranularityGetInfo) +_PI_API(piextPhysicalMemCreate) +_PI_API(piextPhysicalMemRetain) +_PI_API(piextPhysicalMemRelease) +_PI_API(piextVirtualMemReserve) +_PI_API(piextVirtualMemFree) +_PI_API(piextVirtualMemMap) +_PI_API(piextVirtualMemUnmap) +_PI_API(piextVirtualMemSetAccess) +_PI_API(piextVirtualMemGetInfo) + #undef _PI_API diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 79d67791ffc8d..ce7d34ef75899 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -191,9 +191,13 @@ // `win32_nt_dx12_resource` value. // the `pi_external_semaphore_handle_type` enum now has a new // `win32_nt_dx12_fence` value. +// 15.54 Added piextVirtualMem* functions, and piextPhysicalMem* functions, +// PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM device info descriptor, +// _pi_virtual_mem_granularity_info enum, _pi_virtual_mem_info enum and +// pi_virtual_access_flags bit flags. #define _PI_H_VERSION_MAJOR 15 -#define _PI_H_VERSION_MINOR 53 +#define _PI_H_VERSION_MINOR 54 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -505,6 +509,9 @@ typedef enum { // Timestamp enqueue PI_EXT_ONEAPI_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT = 0x2011D, + + // Virtual memory support + PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM = 0x2011E, } _pi_device_info; typedef enum { @@ -756,6 +763,15 @@ typedef enum { PI_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS = 0x1143, } _pi_sampler_cubemap_filter_mode; +typedef enum { + PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM = 0x30100, + PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED = 0x30101, +} _pi_virtual_mem_granularity_info; + +typedef enum { + PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE = 0x30200, +} _pi_virtual_mem_info; + using pi_context_properties = intptr_t; using pi_device_exec_capabilities = pi_bitfield; @@ -848,6 +864,10 @@ constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_NO_IMMEDIATE = (1 << constexpr pi_queue_properties PI_EXT_QUEUE_FLAG_SUBMISSION_IMMEDIATE = (1 << 8); // clang-format on +using pi_virtual_access_flags = pi_bitfield; +constexpr pi_virtual_access_flags PI_VIRTUAL_ACCESS_FLAG_RW = (1 << 0); +constexpr pi_virtual_access_flags PI_VIRTUAL_ACCESS_FLAG_READ_ONLY = (1 << 1); + typedef enum { // No preference for SLM or data cache. PI_EXT_KERNEL_EXEC_INFO_CACHE_DEFAULT = 0x0, @@ -889,6 +909,8 @@ using pi_program_binary_type = _pi_program_binary_type; using pi_kernel_info = _pi_kernel_info; using pi_profiling_info = _pi_profiling_info; using pi_kernel_cache_config = _pi_kernel_cache_config; +using pi_virtual_mem_granularity_info = _pi_virtual_mem_granularity_info; +using pi_virtual_mem_info = _pi_virtual_mem_info; using pi_image_copy_flags = _pi_image_copy_flags; @@ -1241,6 +1263,7 @@ struct _pi_program; struct _pi_kernel; struct _pi_event; struct _pi_sampler; +struct _pi_physical_mem; using pi_platform = _pi_platform *; using pi_device = _pi_device *; @@ -1255,6 +1278,7 @@ using pi_image_handle = pi_uint64; using pi_image_mem_handle = void *; using pi_interop_mem_handle = pi_uint64; using pi_interop_semaphore_handle = pi_uint64; +using pi_physical_mem = _pi_physical_mem *; typedef struct { pi_image_channel_order image_channel_order; @@ -2338,6 +2362,125 @@ pi_result piextEnqueueDeviceGlobalVariableRead( size_t count, size_t offset, void *dst, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event); +/// +/// Virtual memory +/// + +/// API for getting information about the minimum and recommended granularity +/// of physical and virtual memory. +/// +/// \param context is the context to get the granularity from. +/// \param device is the device to get the granularity from. +/// \param param_name is the type of query to perform. +/// \param param_value_size is the size of the result in bytes. +/// \param param_value is the result. +/// \param param_value_size_ret is how many bytes were written. +__SYCL_EXPORT pi_result piextVirtualMemGranularityGetInfo( + pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret); + +/// API for creating a physical memory handle that virtual memory can be mapped +/// to. +/// +/// \param context is the context within which the physical memory is allocated. +/// \param device is the device the physical memory is on. +/// \param mem_size is the size of physical memory to allocate. This must be a +/// multiple of the minimum virtual memory granularity. +/// \param ret_physical_mem is the handle for the resulting physical memory. +__SYCL_EXPORT pi_result +piextPhysicalMemCreate(pi_context context, pi_device device, size_t mem_size, + pi_physical_mem *ret_physical_mem); + +/// API for retaining a physical memory handle. +/// +/// \param physical_mem is the handle for the physical memory to retain. +__SYCL_EXPORT pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem); + +/// API for releasing a physical memory handle. +/// +/// \param physical_mem is the handle for the physical memory to free. +__SYCL_EXPORT pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem); + +/// API for reserving a virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// reserved. +/// \param start is a pointer to the start of the region to reserve. If nullptr +/// the implementation selects a start address. +/// \param range_size is the size of the virtual address range to reserve in +/// bytes. +/// \param ret_ptr is the pointer to the start of the resulting virtual memory +/// range. +__SYCL_EXPORT pi_result piextVirtualMemReserve(pi_context context, + const void *start, + size_t range_size, + void **ret_ptr); + +/// API for freeing a virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// reserved. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range. +__SYCL_EXPORT pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size); + +/// API for mapping a virtual memory range to a a physical memory allocation at +/// a given offset. +/// +/// \param context is the context within which both the virtual memory range is +/// reserved and the physical memory is allocated. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range. +/// \param physical_mem is the handle for the physical memory to map ptr to. +/// \param offset is the offset into physical_mem in bytes to map ptr to. +/// \param flags is the access flags to set for the mapping. +__SYCL_EXPORT pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, + pi_physical_mem physical_mem, + size_t offset, + pi_virtual_access_flags flags); + +/// API for unmapping a virtual memory range previously mapped in a context. +/// After a call to this function, the virtual memory range is left in a state +/// ready to be remapped. +/// +/// \param context is the context within which the virtual memory range is +/// currently mapped. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range in bytes. +__SYCL_EXPORT pi_result piextVirtualMemUnmap(pi_context context, + const void *ptr, + size_t range_size); + +/// API for setting the access mode of a mapped virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// currently mapped. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range in bytes. +/// \param flags is the access flags to set for the mapped virtual access range. +__SYCL_EXPORT pi_result piextVirtualMemSetAccess(pi_context context, + const void *ptr, + size_t range_size, + pi_virtual_access_flags flags); + +/// API for getting info about a mapped virtual memory range. +/// +/// \param context is the context within which the virtual memory range is +/// currently mapped. +/// \param ptr is the pointer to the start of the virtual memory range. +/// \param range_size is the size of the virtual address range in bytes. +/// \param param_name is the type of query to perform. +/// \param param_value_size is the size of the result in bytes. +/// \param param_value is the result. +/// \param param_value_size_ret is how many bytes were written. +__SYCL_EXPORT pi_result +piextVirtualMemGetInfo(pi_context context, const void *ptr, size_t range_size, + pi_virtual_mem_info param_name, size_t param_value_size, + void *param_value, size_t *param_value_size_ret); + /// /// Plugin /// diff --git a/sycl/include/sycl/detail/pi.hpp b/sycl/include/sycl/detail/pi.hpp index 3500c576bb599..1fe21d36a8aaa 100644 --- a/sycl/include/sycl/detail/pi.hpp +++ b/sycl/include/sycl/detail/pi.hpp @@ -146,6 +146,8 @@ using PiExternalMemDescriptor = ::pi_external_mem_descriptor; using PiExternalSemaphoreDescriptor = ::pi_external_semaphore_descriptor; using PiImageOffset = ::pi_image_offset_struct; using PiImageRegion = ::pi_image_region_struct; +using PiPhysicalMem = ::pi_physical_mem; +using PiVirtualAccessFlags = ::pi_virtual_access_flags; __SYCL_EXPORT void contextSetExtendedDeleter(const sycl::context &constext, pi_context_extended_deleter func, diff --git a/sycl/include/sycl/device_aspect_macros.hpp b/sycl/include/sycl/device_aspect_macros.hpp index df6c827de60f2..d756b0a62e88a 100644 --- a/sycl/include/sycl/device_aspect_macros.hpp +++ b/sycl/include/sycl/device_aspect_macros.hpp @@ -381,6 +381,11 @@ #define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_queue_profiling_tag__ 0 #endif +#ifndef __SYCL_ALL_DEVICES_HAVE_ext_oneapi_virtual_mem__ +// __SYCL_ASPECT(ext_oneapi_virtual_mem, 74) +#define __SYCL_ALL_DEVICES_HAVE_ext_oneapi_virtual_mem__ 0 +#endif + #ifndef __SYCL_ANY_DEVICE_HAS_host__ // __SYCL_ASPECT(host, 0) #define __SYCL_ANY_DEVICE_HAS_host__ 0 @@ -750,3 +755,8 @@ // __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73) #define __SYCL_ANY_DEVICE_HAS_ext_oneapi_queue_profiling_tag__ 0 #endif + +#ifndef __SYCL_ANY_DEVICE_HAS_ext_oneapi_virtual_mem__ +// __SYCL_ASPECT(ext_oneapi_virtual_mem, 74) +#define __SYCL_ANY_DEVICE_HAS_ext_oneapi_virtual_mem__ 0 +#endif diff --git a/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp b/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp new file mode 100644 index 0000000000000..24d371fe8c6fd --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/virtual_mem/physical_mem.hpp @@ -0,0 +1,81 @@ +//==--- physical_mem.hpp - sycl_ext_oneapi_virtual_mem physical_mem class --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { + +namespace detail { +class physical_mem_impl; +} // namespace detail + +namespace ext::oneapi::experimental { + +enum class address_access_mode : char { none = 0, read = 1, read_write = 2 }; + +class __SYCL_EXPORT physical_mem + : public sycl::detail::OwnerLessBase { +public: + physical_mem(const device &SyclDevice, const context &SyclContext, + size_t NumBytes); + + physical_mem(const queue &SyclQueue, size_t NumBytes) + : physical_mem(SyclQueue.get_device(), SyclQueue.get_context(), + NumBytes) {} + + physical_mem(const physical_mem &rhs) = default; + physical_mem(physical_mem &&rhs) = default; + + physical_mem &operator=(const physical_mem &rhs) = default; + physical_mem &operator=(physical_mem &&rhs) = default; + + ~physical_mem() noexcept(false) {}; + + bool operator==(const physical_mem &rhs) const { return impl == rhs.impl; } + bool operator!=(const physical_mem &rhs) const { return !(*this == rhs); } + + void *map(uintptr_t Ptr, size_t NumBytes, address_access_mode Mode, + size_t Offset = 0) const; + + context get_context() const; + device get_device() const; + + size_t size() const noexcept; + +private: + std::shared_ptr impl; + + template + friend decltype(Obj::impl) + sycl::detail::getSyclObjImpl(const Obj &SyclObject); + + template + friend T sycl::detail::createSyclObjFromImpl(decltype(T::impl) ImplObj); +}; + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl + +namespace std { +template <> struct hash { + size_t operator()( + const sycl::ext::oneapi::experimental::physical_mem &PhysicalMem) const { + return hash>()( + sycl::detail::getSyclObjImpl(PhysicalMem)); + } +}; +} // namespace std diff --git a/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp b/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp new file mode 100644 index 0000000000000..74a42354eaa01 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/virtual_mem/virtual_mem.hpp @@ -0,0 +1,61 @@ +//==- virtual_mem.hpp - sycl_ext_oneapi_virtual_mem virtual mem free funcs -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +enum class granularity_mode : char { + minimum = 0, + recommended = 1, +}; + +__SYCL_EXPORT size_t +get_mem_granularity(const device &SyclDevice, const context &SyclContext, + granularity_mode Mode = granularity_mode::recommended); + +__SYCL_EXPORT size_t +get_mem_granularity(const context &SyclContext, + granularity_mode Mode = granularity_mode::recommended); + +__SYCL_EXPORT uintptr_t reserve_virtual_mem(uintptr_t Start, size_t NumBytes, + const context &SyclContext); + +inline uintptr_t reserve_virtual_mem(size_t NumBytes, + const context &SyclContext) { + return reserve_virtual_mem(0, NumBytes, SyclContext); +} + +__SYCL_EXPORT void free_virtual_mem(uintptr_t Ptr, size_t NumBytes, + const context &SyclContext); + +__SYCL_EXPORT void set_access_mode(const void *Ptr, size_t NumBytes, + address_access_mode Mode, + const context &SyclContext); + +__SYCL_EXPORT address_access_mode get_access_mode(const void *Ptr, + size_t NumBytes, + const context &SyclContext); + +__SYCL_EXPORT void unmap(const void *Ptr, size_t NumBytes, + const context &SyclContext); + +} // Namespace ext::oneapi::experimental +} // namespace _V1 +} // Namespace sycl diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 2d9cee1351d7a..3b744a89dbb90 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -68,3 +68,4 @@ __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_2d, 70) __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d_usm, 71) __SYCL_ASPECT(ext_oneapi_bindless_sampled_image_fetch_3d, 72) __SYCL_ASPECT(ext_oneapi_queue_profiling_tag, 73) +__SYCL_ASPECT(ext_oneapi_virtual_mem, 74) diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 53a60381f0b8d..16b5e8f0f6c40 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -111,4 +111,6 @@ #include #include #include +#include +#include #include diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 0077b245905db..1628b1537fae5 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1298,6 +1298,69 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, ParamValueSizeRet); } +pi_result +piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result piextPhysicalMemCreate(pi_context context, pi_device device, + size_t mem_size, + pi_physical_mem *ret_physical_mem) { + return pi2ur::piextPhysicalMemCreate(context, device, mem_size, + ret_physical_mem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRetain(physical_mem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRelease(physical_mem); +} + +pi_result piextVirtualMemReserve(pi_context context, const void *start, + size_t range_size, void **ret_ptr) { + return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); +} + +pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemFree(context, ptr, range_size); +} + +pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, pi_physical_mem physical_mem, + size_t offset, pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, + offset, flags); +} + +pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); +} + +pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); +} + +pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, + param_value_size, param_value, + param_value_size_ret); +} + const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index 2b5d77b26ea9d..8c5112f4cc9d1 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -81,4 +82,8 @@ struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ { using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_; }; +struct _pi_physical_mem : ur_physical_mem_handle_t_ { + using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; +}; + #endif // PI_CUDA_HPP diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 33b7388f9c884..c3324463690eb 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -1301,6 +1301,69 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, ParamValueSizeRet); } +pi_result +piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result piextPhysicalMemCreate(pi_context context, pi_device device, + size_t mem_size, + pi_physical_mem *ret_physical_mem) { + return pi2ur::piextPhysicalMemCreate(context, device, mem_size, + ret_physical_mem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRetain(physical_mem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRelease(physical_mem); +} + +pi_result piextVirtualMemReserve(pi_context context, const void *start, + size_t range_size, void **ret_ptr) { + return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); +} + +pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemFree(context, ptr, range_size); +} + +pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, pi_physical_mem physical_mem, + size_t offset, pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, + offset, flags); +} + +pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); +} + +pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); +} + +pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, + param_value_size, param_value, + param_value_size_ret); +} + const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp index 018d069f5fe7f..bec26c9866fdb 100644 --- a/sycl/plugins/hip/pi_hip.hpp +++ b/sycl/plugins/hip/pi_hip.hpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -94,4 +95,8 @@ struct _pi_ext_command_buffer : ur_exp_command_buffer_handle_t_ { using ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_; }; +struct _pi_physical_mem : ur_physical_mem_handle_t_ { + using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; +}; + #endif // PI_HIP_HPP diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index f88e8c1ed3cd3..bab365effe85f 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1424,6 +1424,144 @@ piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { return pi2ur::piextCommandBufferReleaseCommand(Command); } +/// API for getting information about the minimum and recommended granularity +/// of physical and virtual memory. +/// +/// \param Context is the context to get the granularity from. +/// \param Device is the device to get the granularity from. +/// \param MemSize is the potentially unadjusted size to get granularity for. +/// \param ParamName is the type of query to perform. +/// \param ParamValueSize is the size of the result in bytes. +/// \param ParamValue is the result. +/// \param ParamValueSizeRet is how many bytes were written. +pi_result +piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +/// API for creating a physical memory handle that virtual memory can be mapped +/// to. +/// +/// \param Context is the context within which the physical memory is allocated. +/// \param Device is the device the physical memory is on. +/// \param MemSize is the size of physical memory to allocate. This must be a +/// multiple of the minimum virtual memory granularity. +/// \param RetPhysicalMem is the handle for the resulting physical memory. +pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, + size_t MemSize, + pi_physical_mem *RetPhysicalMem) { + return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, + RetPhysicalMem); +} + +/// API for retaining a physical memory handle. +/// +/// \param PhysicalMem is the handle for the physical memory to retain. +pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRetain(PhysicalMem); +} + +/// API for releasing a physical memory handle. +/// +/// \param PhysicalMem is the handle for the physical memory to free. +pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRelease(PhysicalMem); +} + +/// API for reserving a virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// reserved. +/// \param Start is a pointer to the start of the region to reserve. If nullptr +/// the implementation selects a start address. +/// \param RangeSize is the size of the virtual address range to reserve in +/// bytes. +/// \param RetPtr is the pointer to the start of the resulting virtual memory +/// range. +pi_result piextVirtualMemReserve(pi_context Context, const void *Start, + size_t RangeSize, void **RetPtr) { + return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); +} + +/// API for freeing a virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// reserved. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range. +pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); +} + +/// API for mapping a virtual memory range to a a physical memory allocation at +/// a given offset. +/// +/// \param Context is the context within which both the virtual memory range is +/// reserved and the physical memory is allocated. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range. +/// \param PhysicalMem is the handle for the physical memory to map Ptr to. +/// \param Offset is the offset into PhysicalMem in bytes to map Ptr to. +/// \param Flags is the access flags to set for the mapping. +pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, pi_physical_mem PhysicalMem, + size_t Offset, pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, + Flags); +} + +/// API for unmapping a virtual memory range previously mapped in a context. +/// After a call to this function, the virtual memory range is left in a state +/// ready to be remapped. +/// +/// \param Context is the context within which the virtual memory range is +/// currently mapped. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range in bytes. +pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); +} + +/// API for setting the access mode of a mapped virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// currently mapped. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range in bytes. +/// \param Flags is the access flags to set for the mapped virtual access range. +pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); +} + +/// API for getting info about a mapped virtual memory range. +/// +/// \param Context is the context within which the virtual memory range is +/// currently mapped. +/// \param Ptr is the pointer to the start of the virtual memory range. +/// \param RangeSize is the size of the virtual address range in bytes. +/// \param ParamName is the type of query to perform. +/// \param ParamValueSize is the size of the result in bytes. +/// \param ParamValue is the result. +/// \param ParamValueSizeRet is how many bytes were written. +pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_mem_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + const char SupportedVersion[] = _PI_LEVEL_ZERO_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { // missing diff --git a/sycl/plugins/native_cpu/pi_native_cpu.cpp b/sycl/plugins/native_cpu/pi_native_cpu.cpp index d867caea5e23d..2276e9f78f7ea 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.cpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.cpp @@ -1321,6 +1321,69 @@ pi_result piextKernelSuggestMaxCooperativeGroupCount( return PI_ERROR_UNSUPPORTED_FEATURE; } +pi_result +piextVirtualMemGranularityGetInfo(pi_context context, pi_device device, + pi_virtual_mem_granularity_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGranularityGetInfo(context, device, param_name, + param_value_size, param_value, + param_value_size_ret); +} + +pi_result piextPhysicalMemCreate(pi_context context, pi_device device, + size_t mem_size, + pi_physical_mem *ret_physical_mem) { + return pi2ur::piextPhysicalMemCreate(context, device, mem_size, + ret_physical_mem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRetain(physical_mem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem physical_mem) { + return pi2ur::piextPhysicalMemRelease(physical_mem); +} + +pi_result piextVirtualMemReserve(pi_context context, const void *start, + size_t range_size, void **ret_ptr) { + return pi2ur::piextVirtualMemReserve(context, start, range_size, ret_ptr); +} + +pi_result piextVirtualMemFree(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemFree(context, ptr, range_size); +} + +pi_result piextVirtualMemMap(pi_context context, const void *ptr, + size_t range_size, pi_physical_mem physical_mem, + size_t offset, pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemMap(context, ptr, range_size, physical_mem, + offset, flags); +} + +pi_result piextVirtualMemUnmap(pi_context context, const void *ptr, + size_t range_size) { + return pi2ur::piextVirtualMemUnmap(context, ptr, range_size); +} + +pi_result piextVirtualMemSetAccess(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_access_flags flags) { + return pi2ur::piextVirtualMemSetAccess(context, ptr, range_size, flags); +} + +pi_result piextVirtualMemGetInfo(pi_context context, const void *ptr, + size_t range_size, + pi_virtual_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + return pi2ur::piextVirtualMemGetInfo(context, ptr, range_size, param_name, + param_value_size, param_value, + param_value_size_ret); +} + // Initialize function table with stubs. #define _PI_API(api) \ (PluginInit->PiFunctionTable).api = (decltype(&::api))(&api); diff --git a/sycl/plugins/native_cpu/pi_native_cpu.hpp b/sycl/plugins/native_cpu/pi_native_cpu.hpp index 1d92580997b76..287b3c03115b6 100644 --- a/sycl/plugins/native_cpu/pi_native_cpu.hpp +++ b/sycl/plugins/native_cpu/pi_native_cpu.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -43,3 +44,7 @@ struct _pi_program : ur_program_handle_t_ { struct _pi_queue : ur_queue_handle_t_ { using ur_queue_handle_t_::ur_queue_handle_t_; }; + +struct _pi_physical_mem : ur_physical_mem_handle_t_ { + using ur_physical_mem_handle_t_::ur_physical_mem_handle_t_; +}; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 1fef329d179af..1d340b5685f4e 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1228,6 +1228,69 @@ pi_result piextPeerAccessGetInfo(pi_device command_device, ParamValueSizeRet); } +pi_result +piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, + size_t MemSize, + pi_physical_mem *RetPhysicalMem) { + return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, + RetPhysicalMem); +} + +pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRetain(PhysicalMem); +} + +pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRelease(PhysicalMem); +} + +pi_result piextVirtualMemReserve(pi_context Context, const void *Start, + size_t RangeSize, void **RetPtr) { + return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); +} + +pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); +} + +pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, pi_physical_mem PhysicalMem, + size_t Offset, pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, + Flags); +} + +pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); +} + +pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); +} + +pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_mem_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + pi_result piTearDown(void *PluginParameter) { return pi2ur::piTearDown(PluginParameter); } diff --git a/sycl/plugins/unified_runtime/pi2ur.hpp b/sycl/plugins/unified_runtime/pi2ur.hpp index 3ee63a025593b..f22e672d84423 100644 --- a/sycl/plugins/unified_runtime/pi2ur.hpp +++ b/sycl/plugins/unified_runtime/pi2ur.hpp @@ -678,6 +678,31 @@ inline pi_result ur2piSamplerInfoValue(ur_sampler_info_t ParamName, } } +inline pi_result ur2piVirtualMemInfoValue(ur_virtual_mem_info_t ParamName, + size_t ParamValueSizePI, + size_t *ParamValueSizeUR, + void *ParamValue) { + + ConvertHelper Value(ParamValueSizePI, ParamValue, ParamValueSizeUR); + switch (ParamName) { + case UR_VIRTUAL_MEM_INFO_ACCESS_MODE: { + auto ConvertFunc = [](ur_virtual_mem_access_flags_t UrValue) { + pi_virtual_access_flags PiValue = 0; + if (UrValue & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE) + PiValue |= PI_VIRTUAL_ACCESS_FLAG_RW; + if (UrValue & UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY) + PiValue |= PI_VIRTUAL_ACCESS_FLAG_READ_ONLY; + return PiValue; + }; + return Value + .convert( + ConvertFunc); + } + default: + return PI_SUCCESS; + } +} + // Translate UR device info values to PI info values inline pi_result ur2piUSMAllocInfoValue(ur_usm_alloc_info_t ParamName, size_t ParamValueSizePI, @@ -1311,6 +1336,8 @@ inline pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, PI_TO_UR_MAP_DEVICE_INFO( PI_EXT_ONEAPI_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP) + PI_TO_UR_MAP_DEVICE_INFO(PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM, + UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT) #undef PI_TO_UR_MAP_DEVICE_INFO default: return PI_ERROR_UNKNOWN; @@ -5665,4 +5692,194 @@ inline pi_result piextSignalExternalSemaphore( // Bindless Images Extension /////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// Virtual Memory + +inline pi_result +piextVirtualMemGranularityGetInfo(pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_device_handle_t UrDevice = reinterpret_cast(Device); + + ur_virtual_mem_granularity_info_t InfoType{}; + switch (ParamName) { + case PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM: + InfoType = UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM; + break; + case PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED: + InfoType = UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED; + break; + default: + return PI_ERROR_UNKNOWN; + } + + HANDLE_ERRORS(urVirtualMemGranularityGetInfo(UrContext, UrDevice, InfoType, + ParamValueSize, ParamValue, + ParamValueSizeRet)); + + return PI_SUCCESS; +} + +inline pi_result piextPhysicalMemCreate(pi_context Context, pi_device Device, + size_t MemSize, + pi_physical_mem *RetPhyscialMem) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_device_handle_t UrDevice = reinterpret_cast(Device); + + ur_physical_mem_handle_t *UrPhysicalMem = + reinterpret_cast(RetPhyscialMem); + + HANDLE_ERRORS(urPhysicalMemCreate(UrContext, UrDevice, MemSize, nullptr, + UrPhysicalMem)); + + return PI_SUCCESS; +} + +inline pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + PI_ASSERT(PhysicalMem, PI_ERROR_INVALID_ARG_VALUE); + + ur_physical_mem_handle_t UrPhysicalMem = + reinterpret_cast(PhysicalMem); + + HANDLE_ERRORS(urPhysicalMemRetain(UrPhysicalMem)); + + return PI_SUCCESS; +} + +inline pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + + ur_physical_mem_handle_t UrPhysicalMem = + reinterpret_cast(PhysicalMem); + + HANDLE_ERRORS(urPhysicalMemRelease(UrPhysicalMem)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemReserve(pi_context Context, const void *Start, + size_t RangeSize, void **RetPtr) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(RetPtr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + HANDLE_ERRORS(urVirtualMemReserve(UrContext, Start, RangeSize, RetPtr)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + HANDLE_ERRORS(urVirtualMemFree(UrContext, Ptr, RangeSize)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemSetAccess(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_access_flags Flags) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_virtual_mem_access_flags_t UrFlags = 0; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_RW) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; + + HANDLE_ERRORS(urVirtualMemSetAccess(UrContext, Ptr, RangeSize, UrFlags)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_physical_mem PhysicalMem, size_t Offset, + pi_virtual_access_flags Flags) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + PI_ASSERT(PhysicalMem, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + ur_physical_mem_handle_t UrPhysicalMem = + reinterpret_cast(PhysicalMem); + + ur_virtual_mem_access_flags_t UrFlags = 0; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_RW) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE; + if (Flags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) + UrFlags |= UR_VIRTUAL_MEM_ACCESS_FLAG_READ_ONLY; + + HANDLE_ERRORS(urVirtualMemMap(UrContext, Ptr, RangeSize, UrPhysicalMem, + Offset, UrFlags)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemUnmap(pi_context Context, const void *Ptr, + size_t RangeSize) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + HANDLE_ERRORS(urVirtualMemUnmap(UrContext, Ptr, RangeSize)); + + return PI_SUCCESS; +} + +inline pi_result piextVirtualMemGetInfo(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_virtual_mem_info ParamName, + size_t ParamValueSize, void *ParamValue, + size_t *ParamValueSizeRet) { + PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); + PI_ASSERT(Ptr, PI_ERROR_INVALID_ARG_VALUE); + + ur_context_handle_t UrContext = + reinterpret_cast(Context); + + ur_virtual_mem_info_t InfoType{}; + switch (ParamName) { + case PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE: + InfoType = UR_VIRTUAL_MEM_INFO_ACCESS_MODE; + break; + default: + return PI_ERROR_UNKNOWN; + } + + HANDLE_ERRORS(urVirtualMemGetInfo(UrContext, Ptr, RangeSize, InfoType, + ParamValueSize, ParamValue, + ParamValueSizeRet)); + ur2piVirtualMemInfoValue(InfoType, ParamValueSize, &ParamValueSize, + ParamValue); + + return PI_SUCCESS; +} + +// Virtual Memory +/////////////////////////////////////////////////////////////////////////////// + } // namespace pi2ur diff --git a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp index 30ba9a7afc8b1..7e268199bba77 100644 --- a/sycl/plugins/unified_runtime/pi_unified_runtime.cpp +++ b/sycl/plugins/unified_runtime/pi_unified_runtime.cpp @@ -1189,6 +1189,72 @@ piextCommandBufferReleaseCommand(pi_ext_command_buffer_command Command) { return pi2ur::piextCommandBufferReleaseCommand(Command); } +__SYCL_EXPORT pi_result piextVirtualMemGranularityGetInfo( + pi_context Context, pi_device Device, + pi_virtual_mem_granularity_info ParamName, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGranularityGetInfo(Context, Device, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + +__SYCL_EXPORT pi_result +piextPhysicalMemCreate(pi_context Context, pi_device Device, size_t MemSize, + pi_physical_mem *RetPhsycialMem) { + return pi2ur::piextPhysicalMemCreate(Context, Device, MemSize, + RetPhsycialMem); +} + +__SYCL_EXPORT pi_result piextPhysicalMemRetain(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRetain(PhysicalMem); +} + +__SYCL_EXPORT pi_result piextPhysicalMemRelease(pi_physical_mem PhysicalMem) { + return pi2ur::piextPhysicalMemRelease(PhysicalMem); +} + +__SYCL_EXPORT pi_result piextVirtualMemReserve(pi_context Context, + const void *Start, + size_t RangeSize, + void **RetPtr) { + return pi2ur::piextVirtualMemReserve(Context, Start, RangeSize, RetPtr); +} + +__SYCL_EXPORT pi_result piextVirtualMemFree(pi_context Context, const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemFree(Context, Ptr, RangeSize); +} + +__SYCL_EXPORT pi_result +piextVirtualMemSetAccess(pi_context Context, const void *Ptr, size_t RangeSize, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemSetAccess(Context, Ptr, RangeSize, Flags); +} + +__SYCL_EXPORT pi_result piextVirtualMemMap(pi_context Context, const void *Ptr, + size_t RangeSize, + pi_physical_mem PhysicalMem, + size_t Offset, + pi_virtual_access_flags Flags) { + return pi2ur::piextVirtualMemMap(Context, Ptr, RangeSize, PhysicalMem, Offset, + Flags); +} + +__SYCL_EXPORT pi_result piextVirtualMemUnmap(pi_context Context, + const void *Ptr, + size_t RangeSize) { + return pi2ur::piextVirtualMemUnmap(Context, Ptr, RangeSize); +} + +__SYCL_EXPORT pi_result +piextVirtualMemGetInfo(pi_context Context, const void *Ptr, size_t RangeSize, + pi_virtual_mem_info ParamName, size_t ParamValueSize, + void *ParamValue, size_t *ParamValueSizeRet) { + return pi2ur::piextVirtualMemGetInfo(Context, Ptr, RangeSize, ParamName, + ParamValueSize, ParamValue, + ParamValueSizeRet); +} + __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) { diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 7ef8ff587f0e2..e433dad9049f3 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -256,11 +256,13 @@ set(SYCL_COMMON_SOURCES "interop_handle.cpp" "kernel.cpp" "kernel_bundle.cpp" + "physical_mem.cpp" "platform.cpp" "queue.cpp" "sampler.cpp" "stream.cpp" "spirv_ops.cpp" + "virtual_mem.cpp" "$<$:detail/windows_pi.cpp>" "$<$,$>:detail/posix_pi.cpp>" ) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index ef02558bba55a..5d12b845c4cff 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -746,6 +746,14 @@ bool device_impl::has(aspect Aspect) const { sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; return call_successful && support; } + case aspect::ext_oneapi_virtual_mem: { + pi_bool support = PI_FALSE; + bool call_successful = + getPlugin()->call_nocheck( + MDevice, PI_EXT_ONEAPI_DEVICE_INFO_SUPPORTS_VIRTUAL_MEM, + sizeof(pi_bool), &support, nullptr) == PI_SUCCESS; + return call_successful && support; + } } throw runtime_error("This device aspect has not been implemented yet.", PI_ERROR_INVALID_DEVICE); diff --git a/sycl/source/detail/physical_mem_impl.hpp b/sycl/source/detail/physical_mem_impl.hpp new file mode 100644 index 0000000000000..9fb38f1202257 --- /dev/null +++ b/sycl/source/detail/physical_mem_impl.hpp @@ -0,0 +1,95 @@ +//==- physical_mem_impl.hpp - sycl_ext_oneapi_virtual_mem physical_mem impl ==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +inline sycl::detail::pi::PiVirtualAccessFlags AccessModeToVirtualAccessFlags( + ext::oneapi::experimental::address_access_mode Mode) { + switch (Mode) { + case ext::oneapi::experimental::address_access_mode::read: + return PI_VIRTUAL_ACCESS_FLAG_READ_ONLY; + case ext::oneapi::experimental::address_access_mode::read_write: + return PI_VIRTUAL_ACCESS_FLAG_RW; + case ext::oneapi::experimental::address_access_mode::none: + return 0; + default: + throw sycl::exception(make_error_code(errc::invalid), + "Invalid address_access_mode."); + } +} + +class physical_mem_impl { +public: + physical_mem_impl(const device &SyclDevice, const context &SyclContext, + size_t NumBytes) + : MDevice(getSyclObjImpl(SyclDevice)), + MContext(getSyclObjImpl(SyclContext)), MNumBytes(NumBytes) { + const PluginPtr &Plugin = MContext->getPlugin(); + + auto Err = Plugin->call_nocheck( + MContext->getHandleRef(), MDevice->getHandleRef(), MNumBytes, + &MPhysicalMem); + + if (Err == PI_ERROR_OUT_OF_RESOURCES || Err == PI_ERROR_OUT_OF_HOST_MEMORY) + throw sycl::exception(make_error_code(errc::memory_allocation), + "Failed to allocate physical memory."); + Plugin->checkPiResult(Err); + } + + ~physical_mem_impl() noexcept(false) { + const PluginPtr &Plugin = MContext->getPlugin(); + Plugin->call(MPhysicalMem); + } + + void *map(uintptr_t Ptr, size_t NumBytes, + ext::oneapi::experimental::address_access_mode Mode, + size_t Offset) const { + sycl::detail::pi::PiVirtualAccessFlags AccessFlags = + AccessModeToVirtualAccessFlags(Mode); + const PluginPtr &Plugin = MContext->getPlugin(); + void *ResultPtr = reinterpret_cast(Ptr); + Plugin->call( + MContext->getHandleRef(), ResultPtr, NumBytes, MPhysicalMem, Offset, + AccessFlags); + return ResultPtr; + } + + context get_context() const { + return createSyclObjFromImpl(MContext); + } + device get_device() const { return createSyclObjFromImpl(MDevice); } + size_t size() const noexcept { return MNumBytes; } + + sycl::detail::pi::PiPhysicalMem &getHandleRef() { return MPhysicalMem; } + const sycl::detail::pi::PiPhysicalMem &getHandleRef() const { + return MPhysicalMem; + } + +private: + sycl::detail::pi::PiPhysicalMem MPhysicalMem = nullptr; + const std::shared_ptr MDevice; + const std::shared_ptr MContext; + const size_t MNumBytes; +}; + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index ce88520fe50dd..f7e023c718462 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -86,6 +86,7 @@ inline namespace _V1 { #define SYCL_EXT_ONEAPI_ANNOTATED_ARG 1 #define SYCL_EXT_ONEAPI_ANNOTATED_PTR 1 #define SYCL_EXT_ONEAPI_COPY_OPTIMIZE 1 +#define SYCL_EXT_ONEAPI_VIRTUAL_MEM 1 #define SYCL_EXT_ONEAPI_USM_MALLOC_PROPERTIES 1 #cmakedefine01 SYCL_ENABLE_KERNEL_FUSION #if SYCL_ENABLE_KERNEL_FUSION diff --git a/sycl/source/physical_mem.cpp b/sycl/source/physical_mem.cpp new file mode 100644 index 0000000000000..d9d6073a68e89 --- /dev/null +++ b/sycl/source/physical_mem.cpp @@ -0,0 +1,38 @@ +//==--- physical_mem.cpp - sycl_ext_oneapi_virtual_mem physical_mem class --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +physical_mem::physical_mem(const device &SyclDevice, const context &SyclContext, + size_t NumBytes) { + if (!SyclDevice.has(aspect::ext_oneapi_virtual_mem)) + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "Device does not support aspect::ext_oneapi_virtual_mem."); + + impl = std::make_shared( + SyclDevice, SyclContext, NumBytes); +} + +void *physical_mem::map(uintptr_t Ptr, size_t NumBytes, + address_access_mode Mode, size_t Offset) const { + return impl->map(Ptr, NumBytes, Mode, Offset); +} + +context physical_mem::get_context() const { return impl->get_context(); } +device physical_mem::get_device() const { return impl->get_device(); } +size_t physical_mem::size() const noexcept { return impl->size(); } + +} // namespace ext::oneapi::experimental +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/virtual_mem.cpp b/sycl/source/virtual_mem.cpp new file mode 100644 index 0000000000000..8cdc5ffba0223 --- /dev/null +++ b/sycl/source/virtual_mem.cpp @@ -0,0 +1,183 @@ +//==- virtual_mem.cpp - sycl_ext_oneapi_virtual_mem virtual mem free funcs -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +// System headers for querying page-size. +#ifdef _WIN32 +#include +#else +#include +#endif + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental { + +__SYCL_EXPORT size_t get_mem_granularity(const device &SyclDevice, + const context &SyclContext, + granularity_mode Mode) { + if (!SyclDevice.has(aspect::ext_oneapi_virtual_mem)) + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "Device does not support aspect::ext_oneapi_virtual_mem."); + + pi_virtual_mem_granularity_info GranularityQuery = [=]() { + switch (Mode) { + case granularity_mode::minimum: + return PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM; + case granularity_mode::recommended: + return PI_EXT_ONEAPI_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED; + } + throw sycl::exception(sycl::make_error_code(sycl::errc::invalid), + "Unrecognized granularity mode."); + }(); + + std::shared_ptr DeviceImpl = + sycl::detail::getSyclObjImpl(SyclDevice); + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); +#ifndef NDEBUG + size_t InfoOutputSize; + Plugin->call( + ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), GranularityQuery, + 0, nullptr, &InfoOutputSize); + assert(InfoOutputSize == sizeof(size_t) && + "Unexpected output size of granularity info query."); +#endif // NDEBUG + size_t Granularity = 0; + Plugin->call( + ContextImpl->getHandleRef(), DeviceImpl->getHandleRef(), GranularityQuery, + sizeof(size_t), &Granularity, nullptr); + return Granularity; +} + +__SYCL_EXPORT size_t get_mem_granularity(const context &SyclContext, + granularity_mode Mode) { + const std::vector Devices = SyclContext.get_devices(); + if (!std::all_of(Devices.cbegin(), Devices.cend(), [](const device &Dev) { + return Dev.has(aspect::ext_oneapi_virtual_mem); + })) { + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "One or more devices in the context does not support " + "aspect::ext_oneapi_virtual_mem."); + } + + // CUDA only needs page-size granularity. + if (SyclContext.get_backend() == backend::ext_oneapi_cuda) { +#ifdef _WIN32 + SYSTEM_INFO SystemInfo; + GetSystemInfo(&SystemInfo); + return static_cast(SystemInfo.dwPageSize); +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif + } + + // Otherwise, we find the least common multiple of granularity of the devices + // in the context. + size_t LCMGranularity = get_mem_granularity(Devices[0], SyclContext, Mode); + for (size_t I = 1; I < Devices.size(); ++I) { + size_t DevGranularity = get_mem_granularity(Devices[I], SyclContext, Mode); + size_t GCD = LCMGranularity; + size_t Rem = DevGranularity % GCD; + while (Rem != 0) { + std::swap(GCD, Rem); + Rem %= GCD; + } + LCMGranularity *= DevGranularity / GCD; + } + return LCMGranularity; +} + +__SYCL_EXPORT uintptr_t reserve_virtual_mem(uintptr_t Start, size_t NumBytes, + const context &SyclContext) { + std::vector Devs = SyclContext.get_devices(); + if (std::any_of(Devs.cbegin(), Devs.cend(), [](const device &Dev) { + return !Dev.has(aspect::ext_oneapi_virtual_mem); + })) + throw sycl::exception( + sycl::make_error_code(sycl::errc::feature_not_supported), + "One or more devices in the supplied context does not support " + "aspect::ext_oneapi_virtual_mem."); + + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + void *OutPtr = nullptr; + Plugin->call( + ContextImpl->getHandleRef(), reinterpret_cast(Start), NumBytes, + &OutPtr); + return reinterpret_cast(OutPtr); +} + +__SYCL_EXPORT void free_virtual_mem(uintptr_t Ptr, size_t NumBytes, + const context &SyclContext) { + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + Plugin->call( + ContextImpl->getHandleRef(), reinterpret_cast(Ptr), NumBytes); +} + +__SYCL_EXPORT void set_access_mode(const void *Ptr, size_t NumBytes, + address_access_mode Mode, + const context &SyclContext) { + sycl::detail::pi::PiVirtualAccessFlags AccessFlags = + sycl::detail::AccessModeToVirtualAccessFlags(Mode); + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes, AccessFlags); +} + +__SYCL_EXPORT address_access_mode get_access_mode(const void *Ptr, + size_t NumBytes, + const context &SyclContext) { + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); +#ifndef NDEBUG + size_t InfoOutputSize; + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes, + PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE, 0, nullptr, &InfoOutputSize); + assert(InfoOutputSize == sizeof(sycl::detail::pi::PiVirtualAccessFlags) && + "Unexpected output size of access mode info query."); +#endif // NDEBUG + sycl::detail::pi::PiVirtualAccessFlags AccessFlags; + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes, + PI_EXT_ONEAPI_VIRTUAL_MEM_INFO_ACCESS_MODE, + sizeof(sycl::detail::pi::PiVirtualAccessFlags), &AccessFlags, nullptr); + + if (AccessFlags & PI_VIRTUAL_ACCESS_FLAG_RW) + return address_access_mode::read_write; + if (AccessFlags & PI_VIRTUAL_ACCESS_FLAG_READ_ONLY) + return address_access_mode::read; + return address_access_mode::none; +} + +__SYCL_EXPORT void unmap(const void *Ptr, size_t NumBytes, + const context &SyclContext) { + std::shared_ptr ContextImpl = + sycl::detail::getSyclObjImpl(SyclContext); + const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin(); + Plugin->call( + ContextImpl->getHandleRef(), Ptr, NumBytes); +} + +} // Namespace ext::oneapi::experimental +} // namespace _V1 +} // Namespace sycl diff --git a/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp b/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp new file mode 100644 index 0000000000000..cbbcf52e3ab25 --- /dev/null +++ b/sycl/test-e2e/VirtualMem/vector_with_virtual_mem.cpp @@ -0,0 +1,236 @@ +// REQUIRES: aspect-ext_oneapi_virtual_mem, usm_shared_allocations + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include + +#include +#include +#include + +namespace syclext = sycl::ext::oneapi::experimental; + +// Find the least common multiple of the context and device granularities. This +// value can be used for aligning both physical memory allocations and for +// reserving virtual memory ranges. +size_t GetLCMGranularity(const sycl::device &Dev, const sycl::context &Ctx) { + size_t CtxGranularity = syclext::get_mem_granularity(MContext); + size_t DevGranularity = syclext::get_mem_granularity(MDevice, MContext); + + size_t GCD = CtxGranularity; + size_t Rem = DevGranularity % GCD; + while (Rem != 0) { + std::swap(GCD, Rem); + Rem %= GCD; + } + return (DevGranularity / GCD) * LCMGranularity; +} + +template class VirtualVector { +public: + VirtualVector(sycl::queue &Q) + : MDevice{Q.get_device()}, MContext{Q.get_context()}, + MGranularity{GetLCMGranularity(MDevice, MContext)} {}; + + ~VirtualVector() { + // Free all mapped ranges. + unmap_all(); + for (const VirtualAddressRange &VARange : MVARanges) + syclext::free_virtual_mem(VARange.Ptr, VARange.Size, MContext); + // Physical memory allocations will be freed when the physical_mem objects + // die with MPhysicalMemMappings. + } + + void reserve(size_t NewSize) { + // If we already have more memory than required, we can return. + size_t NewByteSize = sizeof(T) * NewSize; + if (NewByteSize <= MByteSize) { + MSize = NewSize; + return; + } + + // Align the size by the granularity. + size_t AlignedNewByteSize = AlignByteSize(NewByteSize); + size_t AlignedNewVARangeSize = AlignedNewByteSize - MByteSize; + + // Try to reserve virtual memory at the end of the existing one. + uintptr_t CurrentEnd = reinterpret_cast(MBasePtr) + MByteSize; + uintptr_t NewVAPtr = syclext::reserve_virtual_mem( + CurrentEnd, AlignedNewVARangeSize, MContext); + + // If we failed to get a ptr to the end of the current range, we need to + // recreate the whole range. + if (CurrentEnd && NewVAPtr != CurrentEnd) { + // First we need to free the virtual address range we just reserved. + syclext::free_virtual_mem(NewVAPtr, AlignedNewVARangeSize, MContext); + + // Recreate the full range and update the new VA ptr. CurrentEnd is no + // longer valid after this call. + NewVAPtr = RecreateAddressRange(AlignedNewByteSize); + } else { + // Otherwise we need to register the new range. + MVARanges.emplace_back(NewVAPtr, AlignedNewVARangeSize); + + // If there was no base pointer previously, this is now the new base. + if (!MBasePtr) + MBasePtr = reinterpret_cast(NewVAPtr); + } + + // Create new physical memory allocation and map the new range to it. + syclext::physical_mem NewPhysicalMem{MDevice, MContext, + AlignedNewVARangeSize}; + void *MappedPtr = + NewPhysicalMem.map(NewVAPtr, AlignedNewVARangeSize, + syclext::address_access_mode::read_write); + MPhysicalMemMappings.push_back( + std::make_pair(std::move(NewPhysicalMem), MappedPtr)); + + // Update the byte size of the vector. + MSize = NewSize; + MByteSize = AlignedNewByteSize; + } + + size_t size() const noexcept { return MSize; } + T *data() const noexcept { return MBasePtr; } + +private: + size_t AlignByteSize(size_t UnalignedByteSize) const { + return ((UnalignedByteSize + MGranularity - 1) / MGranularity) * + MGranularity; + } + + void unmap_all() { + for (std::pair &Mapping : + MPhysicalMemMappings) { + if (Mapping.second == 0) + continue; + syclext::unmap(Mapping.second, Mapping.first.size(), MContext); + Mapping.second = 0; + } + } + + uintptr_t RecreateAddressRange(size_t AlignedNewByteSize) { + // Reserve the full range. + uintptr_t NewFullVAPtr = + syclext::reserve_virtual_mem(AlignedNewByteSize, MContext); + + // Unmap the old virtual address ranges. + unmap_all(); + + // Remap all existing ranges. + uintptr_t NewEnd = NewFullVAPtr; + for (std::pair &Mapping : + MPhysicalMemMappings) { + Mapping.second = + Mapping.first.map(NewEnd, Mapping.first.size(), + syclext::address_access_mode::read_write); + NewEnd += Mapping.first.size(); + } + + // Free the old ranges. + for (const VirtualAddressRange &VARange : MVARanges) + syclext::free_virtual_mem(VARange.Ptr, VARange.Size, MContext); + + // Insert the newly reserved range to the saved ranges. + MVARanges.clear(); + MVARanges.emplace_back(NewFullVAPtr, AlignedNewByteSize); + + // Update the base pointer to point to the new start. + MBasePtr = reinterpret_cast(NewFullVAPtr); + + // Return the new end of the mapped ranges. + return NewEnd; + } + + struct VirtualAddressRange { + VirtualAddressRange(uintptr_t Ptr, size_t Size) : Ptr{Ptr}, Size{Size} {} + + uintptr_t Ptr; + size_t Size; + }; + + sycl::device MDevice; + sycl::context MContext; + + std::vector MVARanges; + std::vector> MPhysicalMemMappings; + + T *MBasePtr = nullptr; + size_t MSize = 0; + size_t MByteSize = 0; + + const size_t MGranularity = 0; +}; + +static constexpr size_t NumIters = 10; +static constexpr size_t WriteValueOffset = 42; +static constexpr size_t NumWorkItems = 512; + +int main() { + sycl::queue Q; + + VirtualVector Vec(Q); + + // To better test the functionality, try to allocate below the granularity + // but enough to require more memory for some iterations. + size_t SizeIncrement = 11; + size_t MinSizeGran = + syclext::get_mem_granularity(Q.get_device(), Q.get_context()) / + sizeof(int); + SizeIncrement = std::max(MinSizeGran / 2 - 1, SizeIncrement); + + // Each work-item will work on multiple elements. + size_t NumElemsPerWI = 1 + (SizeIncrement - 1) / NumWorkItems; + + for (size_t I = 0; I < NumIters; ++I) { + // Increment the size of the vector. + size_t NewVecSize = (I + 1) * SizeIncrement; + Vec.reserve(NewVecSize); + assert(Vec.size() == NewVecSize); + + // Populate to the new memory + int *VecDataPtr = Vec.data(); + size_t StartOffset = I * SizeIncrement; + size_t IterWriteValueOffset = WriteValueOffset * (I + 1); + Q.parallel_for(sycl::range<1>{NumWorkItems}, [=](sycl::item<1> Idx) { + for (size_t J = 0; J < NumElemsPerWI; ++J) { + size_t LoopIdx = J * Idx.get_range(0) + Idx; + size_t OffsetIdx = StartOffset + LoopIdx; + if (OffsetIdx < NewVecSize) + VecDataPtr[OffsetIdx] = LoopIdx + IterWriteValueOffset; + } + }).wait_and_throw(); + + // Copy back the values and verify. + int *CopyBack = sycl::malloc_shared(NewVecSize, Q); + + // TODO: Level-zero (excluding on PVC) does not currently allow copy across + // virtual memory ranges, even if they are consequtive. + syclext::architecture DevArch = + Q.get_device().get_info(); + if (Q.get_backend() == sycl::backend::ext_oneapi_level_zero && + DevArch != syclext::architecture::intel_gpu_pvc && + DevArch != syclext::architecture::intel_gpu_pvc_vg) { + Q.parallel_for(sycl::range<1>{NewVecSize}, [=](sycl::id<1> Idx) { + CopyBack[Idx] = VecDataPtr[Idx]; + }).wait_and_throw(); + } else { + Q.copy(VecDataPtr, CopyBack, NewVecSize).wait_and_throw(); + } + + for (size_t J = 0; J < NewVecSize; ++J) { + int ExpectedVal = + J % SizeIncrement + WriteValueOffset * (J / SizeIncrement + 1); + if (CopyBack[J] != ExpectedVal) { + std::cout << "Comparison failed at index " << J << ": " << CopyBack[J] + << " != " << ExpectedVal << std::endl; + return 1; + } + } + sycl::free(CopyBack, Q); + } + + return 0; +} diff --git a/sycl/test/abi/pi_cuda_symbol_check.dump b/sycl/test/abi/pi_cuda_symbol_check.dump index d3047c6bb1cd0..e6b19e97d1b87 100644 --- a/sycl/test/abi/pi_cuda_symbol_check.dump +++ b/sycl/test/abi/pi_cuda_symbol_check.dump @@ -146,6 +146,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -171,4 +174,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_hip_symbol_check.dump b/sycl/test/abi/pi_hip_symbol_check.dump index c83b4a4ba6122..530ad95722494 100644 --- a/sycl/test/abi/pi_hip_symbol_check.dump +++ b/sycl/test/abi/pi_hip_symbol_check.dump @@ -146,6 +146,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -171,4 +174,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index d6cc82870c669..93cd4c4de10bb 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -145,6 +145,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -170,4 +173,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_nativecpu_symbol_check.dump b/sycl/test/abi/pi_nativecpu_symbol_check.dump index 850e6d22fdb72..c63f579ca6b53 100644 --- a/sycl/test/abi/pi_nativecpu_symbol_check.dump +++ b/sycl/test/abi/pi_nativecpu_symbol_check.dump @@ -146,6 +146,9 @@ piextMemSampledImageHandleDestroy piextMemUnsampledImageCreate piextMemUnsampledImageHandleDestroy piextPeerAccessGetInfo +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextPlatformCreateWithNativeHandle piextPlatformGetNativeHandle piextPluginGetOpaqueData @@ -171,4 +174,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index daaf7bbee5de5..8807d1647ebdc 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -133,6 +133,9 @@ piextMemGetNativeHandle piextMemImageAllocate piextMemImageCopy piextMemImageCreateWithNativeHandle +piextPhysicalMemCreate +piextPhysicalMemRelease +piextPhysicalMemRetain piextMemImageFree piextMemImageGetInfo piextMemImportOpaqueFD @@ -170,4 +173,11 @@ piextUSMImport piextUSMPitchedAlloc piextUSMRelease piextUSMSharedAlloc +piextVirtualMemFree +piextVirtualMemGetInfo +piextVirtualMemGranularityGetInfo +piextVirtualMemMap +piextVirtualMemReserve +piextVirtualMemSetAccess +piextVirtualMemUnmap piextWaitExternalSemaphore diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 6391a69978a56..ddef17e8966ca 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2989,6 +2989,15 @@ _ZN4sycl3_V13ext5intel12experimental15online_compilerILNS3_15source_languageE1EE _ZN4sycl3_V13ext5intel12experimental9pipe_base13get_pipe_nameB5cxx11EPKv _ZN4sycl3_V13ext5intel12experimental9pipe_base17wait_non_blockingERKNS0_5eventE _ZN4sycl3_V13ext6oneapi10level_zero6detail11make_deviceERKNS0_8platformEm +_ZN4sycl3_V13ext6oneapi12experimental12physical_memC1ERKNS0_6deviceERKNS0_7contextEm +_ZN4sycl3_V13ext6oneapi12experimental12physical_memC2ERKNS0_6deviceERKNS0_7contextEm +_ZN4sycl3_V13ext6oneapi12experimental15get_access_modeEPKvmRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental15set_access_modeEPKvmNS3_19address_access_modeERKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental16free_virtual_memEmmRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental19get_mem_granularityERKNS0_6deviceERKNS0_7contextENS3_16granularity_modeE +_ZN4sycl3_V13ext6oneapi12experimental19get_mem_granularityERKNS0_7contextENS3_16granularity_modeE +_ZN4sycl3_V13ext6oneapi12experimental19reserve_virtual_memEmmRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental5unmapEPKvmRKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_5queueE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_16image_descriptorERKNS0_6deviceERKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental12create_imageENS3_16image_mem_handleERKNS3_22bindless_image_samplerERKNS3_16image_descriptorERKNS0_5queueE @@ -3584,6 +3593,10 @@ _ZNK4sycl3_V114interop_handle16getNativeContextEv _ZNK4sycl3_V115device_selector13select_deviceEv _ZNK4sycl3_V116default_selectorclERKNS0_6deviceE _ZNK4sycl3_V120accelerator_selectorclERKNS0_6deviceE +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem10get_deviceEv +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem11get_contextEv +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem3mapEmmNS3_19address_access_modeEm +_ZNK4sycl3_V13ext6oneapi12experimental12physical_mem4sizeEv _ZNK4sycl3_V13ext6oneapi12experimental4node14get_successorsEv _ZNK4sycl3_V13ext6oneapi12experimental4node16get_predecessorsEv _ZNK4sycl3_V13ext6oneapi12experimental4node8get_typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index bcfdab110778d..5c8d4805f14df 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -607,6 +607,10 @@ ??0kernel_id@_V1@sycl@@AEAA@PEBD@Z ??0kernel_id@_V1@sycl@@QEAA@$$QEAV012@@Z ??0kernel_id@_V1@sycl@@QEAA@AEBV012@@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV012345@@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV012345@@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBVqueue@45@_K@Z +??0physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBVdevice@45@AEBVcontext@45@_K@Z ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEAA@AEBV?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV0123456@@Z ??0modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBV0123456@@Z @@ -677,6 +681,7 @@ ??1kernel@_V1@sycl@@QEAA@XZ ??1kernel_bundle_plain@detail@_V1@sycl@@QEAA@XZ ??1kernel_id@_V1@sycl@@QEAA@XZ +??1physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1node@experimental@oneapi@ext@_V1@sycl@@QEAA@XZ ??1platform@_V1@sycl@@QEAA@XZ @@ -694,6 +699,8 @@ ??4?$OwnerLessBase@Vkernel@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z +??4?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z +??4?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z ??4?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEAAAEAV0123@$$QEAV0123@@Z @@ -759,6 +766,8 @@ ??4kernel_bundle_plain@detail@_V1@sycl@@QEAAAEAV0123@AEBV0123@@Z ??4kernel_id@_V1@sycl@@QEAAAEAV012@$$QEAV012@@Z ??4kernel_id@_V1@sycl@@QEAAAEAV012@AEBV012@@Z +??4physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z +??4physical_mem@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@AEBV012345@@Z ??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@$$QEAV0123456@@Z ??4modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV0123456@AEBV0123456@@Z ??4node@experimental@oneapi@ext@_V1@sycl@@QEAAAEAV012345@$$QEAV012345@@Z @@ -780,6 +789,7 @@ ??8kernel@_V1@sycl@@QEBA_NAEBV012@@Z ??8kernel_bundle_plain@detail@_V1@sycl@@QEBA_NAEBV0123@@Z ??8kernel_id@_V1@sycl@@QEBA_NAEBV012@@Z +??8physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_NAEBV012345@@Z ??8platform@_V1@sycl@@QEBA_NAEBV012@@Z ??8queue@_V1@sycl@@QEBA_NAEBV012@@Z ??8sampler@_V1@sycl@@QEBA_NAEBV012@@Z @@ -792,6 +802,7 @@ ??9kernel@_V1@sycl@@QEBA_NAEBV012@@Z ??9kernel_bundle_plain@detail@_V1@sycl@@QEBA_NAEBV0123@@Z ??9kernel_id@_V1@sycl@@QEBA_NAEBV012@@Z +??9physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_NAEBV012345@@Z ??9platform@_V1@sycl@@QEBA_NAEBV012@@Z ??9queue@_V1@sycl@@QEBA_NAEBV012@@Z ??9sampler@_V1@sycl@@QEBA_NAEBV012@@Z @@ -4035,6 +4046,8 @@ ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVkernel@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vkernel_id@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vkernel_id@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVkernel_id@34@@Z +?ext_oneapi_owner_before@?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@2oneapi@ext@34@@Z +?ext_oneapi_owner_before@?$OwnerLessBase@Vphysical_mem@experimental@oneapi@ext@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVphysical_mem@experimental@oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vplatform@_V1@sycl@@@2oneapi@ext@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vplatform@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBVplatform@34@@Z ?ext_oneapi_owner_before@?$OwnerLessBase@Vqueue@_V1@sycl@@@detail@_V1@sycl@@QEBA_NAEBV?$weak_object_base@Vqueue@_V1@sycl@@@2oneapi@ext@34@@Z @@ -4072,12 +4085,14 @@ ?find_device_intersection@detail@_V1@sycl@@YA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@V?$kernel_bundle@$00@_V1@sycl@@V?$allocator@V?$kernel_bundle@$00@_V1@sycl@@@std@@@5@@Z ?free@_V1@sycl@@YAXPEAXAEBVcontext@12@AEBUcode_location@detail@12@@Z ?free@_V1@sycl@@YAXPEAXAEBVqueue@12@AEBUcode_location@detail@12@@Z -?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z +?free_virtual_mem@experimental@oneapi@ext@_V1@sycl@@YAX_K0AEBVcontext@45@@Z ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVqueue@45@@Z ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@W4image_type@12345@AEBVdevice@45@AEBVcontext@45@@Z ?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@W4image_type@12345@AEBVqueue@45@@Z +?free_image_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVqueue@45@@Z +?free_mipmap_mem@experimental@oneapi@ext@_V1@sycl@@YAXUimage_mem_handle@12345@AEBVdevice@45@AEBVcontext@45@@Z ?frexp_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@PEAH@Z ?frexp_impl@detail@_V1@sycl@@YAMMPEAH@Z ?frexp_impl@detail@_V1@sycl@@YANNPEAH@Z @@ -4167,6 +4182,7 @@ ?getStartTime@HostProfilingInfo@detail@_V1@sycl@@QEBA_KXZ ?getType@handler@_V1@sycl@@AEAA?AW4CGTYPE@CG@detail@23@XZ ?getValueFromDynamicParameter@detail@_V1@sycl@@YAPEAXAEAVdynamic_parameter_base@1experimental@oneapi@ext@23@@Z +?get_access_mode@experimental@oneapi@ext@_V1@sycl@@YA?AW4address_access_mode@12345@PEBX_KAEBVcontext@45@@Z ?get_addressing_mode@sampler@_V1@sycl@@QEBA?AW4addressing_mode@23@XZ ?get_allocator_internal@buffer_plain@detail@_V1@sycl@@IEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ ?get_allocator_internal@image_plain@detail@_V1@sycl@@IEBAAEBV?$unique_ptr@VSYCLMemObjAllocator@detail@_V1@sycl@@U?$default_delete@VSYCLMemObjAllocator@detail@_V1@sycl@@@std@@@std@@XZ @@ -4186,10 +4202,12 @@ ?get_context@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVcontext@56@XZ ?get_context@kernel@_V1@sycl@@QEBA?AVcontext@23@XZ ?get_context@kernel_bundle_plain@detail@_V1@sycl@@QEBA?AVcontext@34@XZ +?get_context@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVcontext@56@XZ ?get_context@queue@_V1@sycl@@QEBA?AVcontext@23@XZ ?get_coordinate_normalization_mode@sampler@_V1@sycl@@QEBA?AW4coordinate_normalization_mode@23@XZ ?get_count@image_plain@detail@_V1@sycl@@IEBA_KXZ ?get_descriptor@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBAAEBUimage_descriptor@23456@XZ +?get_device@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVdevice@56@XZ ?get_device@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AVdevice@56@XZ ?get_device@queue@_V1@sycl@@QEBA?AVdevice@23@XZ ?get_devices@context@_V1@sycl@@QEBA?AV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@XZ @@ -4215,6 +4233,8 @@ ?get_kernel_ids@_V1@sycl@@YA?AV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@std@@XZ ?get_kernel_ids@kernel_bundle_plain@detail@_V1@sycl@@QEBA?AV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@std@@XZ ?get_max_statement_size@stream@_V1@sycl@@QEBA_KXZ +?get_mem_granularity@experimental@oneapi@ext@_V1@sycl@@YA_KAEBVcontext@45@W4granularity_mode@12345@@Z +?get_mem_granularity@experimental@oneapi@ext@_V1@sycl@@YA_KAEBVdevice@45@AEBVcontext@45@W4granularity_mode@12345@@Z ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVdevice@45@AEBVcontext@45@@Z ?get_mip_level_mem_handle@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@U612345@IAEBVqueue@45@@Z ?get_mip_level_mem_handle@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AUimage_mem_handle@23456@I@Z @@ -4324,6 +4344,7 @@ ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVdevice@12@AEBVcontext@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBUcode_location@detail@12@@Z ?malloc_shared@_V1@sycl@@YAPEAX_KAEBVqueue@12@AEBVproperty_list@12@AEBUcode_location@detail@12@@Z +?map@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBAPEAX_K0W4address_access_mode@23456@0@Z ?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z ?map_external_image_memory@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVqueue@45@@Z ?map_external_memory_array@experimental@oneapi@ext@_V1@sycl@@YA?AUimage_mem_handle@12345@Uinterop_mem_handle@12345@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z @@ -4383,6 +4404,7 @@ ?remquo_impl@detail@_V1@sycl@@YA?AVhalf@half_impl@123@V45123@0PEAH@Z ?remquo_impl@detail@_V1@sycl@@YAMMMPEAH@Z ?remquo_impl@detail@_V1@sycl@@YANNNPEAH@Z +?reserve_virtual_mem@experimental@oneapi@ext@_V1@sycl@@YA_K_K0AEBVcontext@45@@Z ?reset@filter_selector@ONEAPI@_V1@sycl@@QEBAXXZ ?reset@filter_selector@oneapi@ext@_V1@sycl@@QEBAXXZ ?sampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@PEBXIAEBUcode_location@123@@Z @@ -4406,6 +4428,7 @@ ?setStateSpecConstSet@handler@_V1@sycl@@AEAAXXZ ?setType@handler@_V1@sycl@@AEAAXW4CGTYPE@CG@detail@23@@Z ?setUserFacingNodeType@handler@_V1@sycl@@AEAAXW4node_type@experimental@oneapi@ext@23@@Z +?set_access_mode@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KW4address_access_mode@12345@AEBVcontext@45@@Z ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z ?set_final_data_internal@buffer_plain@detail@_V1@sycl@@IEAAXXZ ?set_final_data_internal@image_plain@detail@_V1@sycl@@IEAAXAEBV?$function@$$A6AXAEBV?$function@$$A6AXPEAX@Z@std@@@Z@std@@@Z @@ -4421,6 +4444,7 @@ ?sincos_impl@detail@_V1@sycl@@YANNPEAN@Z ?single_task@handler@_V1@sycl@@QEAAXVkernel@23@@Z ?size@exception_list@_V1@sycl@@QEBA_KXZ +?size@physical_mem@experimental@oneapi@ext@_V1@sycl@@QEBA_KXZ ?size@stream@_V1@sycl@@QEBA_KXZ ?start@HostProfilingInfo@detail@_V1@sycl@@QEAAXXZ ?start_fusion@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEAAXXZ @@ -4435,6 +4459,7 @@ ?sycl_category@_V1@sycl@@YAAEBVerror_category@std@@XZ ?throwIfActionIsCreated@handler@_V1@sycl@@AEAAXXZ ?throw_asynchronous@queue@_V1@sycl@@QEAAXXZ +?unmap@experimental@oneapi@ext@_V1@sycl@@YAXPEBX_KAEBVcontext@45@@Z ?unsampledImageConstructorNotification@detail@_V1@sycl@@YAXPEAX0AEBV?$optional@W4image_target@_V1@sycl@@@std@@W4mode@access@23@PEBXIAEBUcode_location@123@@Z ?unsampledImageConstructorNotification@image_plain@detail@_V1@sycl@@IEAAXAEBUcode_location@234@PEAXPEBXIQEA_KW4image_format@34@@Z ?unsampledImageDestructorNotification@image_plain@detail@_V1@sycl@@IEAAXPEAX@Z diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 56803e7eab5bb..a598be1f6be7d 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1330,6 +1330,61 @@ inline pi_result mock_piextEnqueueDeviceGlobalVariableRead( return PI_SUCCESS; } +inline pi_result +mock_piextVirtualMemGranularityGetInfo(pi_context, pi_device, + pi_virtual_mem_granularity_info, size_t, + void *, size_t *) { + return PI_SUCCESS; +} + +inline pi_result +mock_piextPhysicalMemCreate(pi_context, pi_device, size_t, + pi_physical_mem *ret_physical_mem) { + *ret_physical_mem = createDummyHandle(); + return PI_SUCCESS; +} + +inline pi_result mock_piextPhysicalMemRetain(pi_physical_mem) { + return PI_SUCCESS; +} + +inline pi_result mock_piextPhysicalMemRelease(pi_physical_mem) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemReserve(pi_context, const void *start, + size_t range_size, + void **ret_ptr) { + *ret_ptr = + start ? const_cast(start) : createDummyHandle(range_size); + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemFree(pi_context, const void *, size_t) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemMap(pi_context, const void *, size_t, + pi_physical_mem, size_t, + pi_virtual_access_flags) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemUnmap(pi_context, const void *, size_t) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemSetAccess(pi_context, const void *, size_t, + pi_virtual_access_flags) { + return PI_SUCCESS; +} + +inline pi_result mock_piextVirtualMemGetInfo(pi_context, const void *, size_t, + pi_virtual_mem_info, size_t, + void *, size_t *) { + return PI_SUCCESS; +} + inline pi_result mock_piextPluginGetOpaqueData(void *opaque_data_param, void **opaque_data_return) { return PI_SUCCESS;