Skip to content

Candidate for v0.8.2 release tag #1163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ option(UR_USE_MSAN "enable MemorySanitizer" OFF)
option(UR_USE_TSAN "enable ThreadSanitizer" OFF)
option(UR_ENABLE_TRACING "enable api tracing through xpti" OFF)
option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF)
option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" OFF)
option(UMF_ENABLE_POOL_TRACKING "Build UMF with pool tracking" ON)
option(UR_BUILD_ADAPTER_L0 "build level 0 adapter from SYCL" OFF)
option(UR_BUILD_ADAPTER_OPENCL "build opencl adapter from SYCL" OFF)
option(UR_BUILD_ADAPTER_CUDA "build cuda adapter from SYCL" OFF)
Expand Down
20 changes: 20 additions & 0 deletions include/ur.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ class ur_structure_type_v(IntEnum):
KERNEL_EXEC_INFO_PROPERTIES = 31 ## ::ur_kernel_exec_info_properties_t
KERNEL_ARG_VALUE_PROPERTIES = 32 ## ::ur_kernel_arg_value_properties_t
KERNEL_ARG_LOCAL_PROPERTIES = 33 ## ::ur_kernel_arg_local_properties_t
USM_ALLOC_LOCATION_DESC = 35 ## ::ur_usm_alloc_location_desc_t
EXP_COMMAND_BUFFER_DESC = 0x1000 ## ::ur_exp_command_buffer_desc_t
EXP_SAMPLER_MIP_PROPERTIES = 0x2000 ## ::ur_exp_sampler_mip_properties_t
EXP_INTEROP_MEM_DESC = 0x2001 ## ::ur_exp_interop_mem_desc_t
Expand Down Expand Up @@ -1530,6 +1531,25 @@ class ur_usm_device_desc_t(Structure):
("flags", ur_usm_device_mem_flags_t) ## [in] device memory allocation flags.
]

###############################################################################
## @brief USM allocation location desc
##
## @details
## - Specify these properties in ::urUSMHostAlloc, ::urUSMDeviceAlloc and
## ::urUSMSharedAlloc via ::ur_usm_desc_t as part of a `pNext` chain.
##
## @remarks
## _Analogues_
## - cl_intel_mem_alloc_buffer_location
class ur_usm_alloc_location_desc_t(Structure):
_fields_ = [
("stype", ur_structure_type_t), ## [in] type of this structure, must be
## ::UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC
("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure
("location", c_ulong) ## [in] Identifies the ID of global memory partition to which the memory
## should be allocated.
]

###############################################################################
## @brief USM pool descriptor type
class ur_usm_pool_desc_t(Structure):
Expand Down
23 changes: 23 additions & 0 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ typedef enum ur_structure_type_t {
UR_STRUCTURE_TYPE_KERNEL_EXEC_INFO_PROPERTIES = 31, ///< ::ur_kernel_exec_info_properties_t
UR_STRUCTURE_TYPE_KERNEL_ARG_VALUE_PROPERTIES = 32, ///< ::ur_kernel_arg_value_properties_t
UR_STRUCTURE_TYPE_KERNEL_ARG_LOCAL_PROPERTIES = 33, ///< ::ur_kernel_arg_local_properties_t
UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC = 35, ///< ::ur_usm_alloc_location_desc_t
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC = 0x1000, ///< ::ur_exp_command_buffer_desc_t
UR_STRUCTURE_TYPE_EXP_SAMPLER_MIP_PROPERTIES = 0x2000, ///< ::ur_exp_sampler_mip_properties_t
UR_STRUCTURE_TYPE_EXP_INTEROP_MEM_DESC = 0x2001, ///< ::ur_exp_interop_mem_desc_t
Expand Down Expand Up @@ -3220,6 +3221,25 @@ typedef struct ur_usm_device_desc_t {

} ur_usm_device_desc_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief USM allocation location desc
///
/// @details
/// - Specify these properties in ::urUSMHostAlloc, ::urUSMDeviceAlloc and
/// ::urUSMSharedAlloc via ::ur_usm_desc_t as part of a `pNext` chain.
///
/// @remarks
/// _Analogues_
/// - cl_intel_mem_alloc_buffer_location
typedef struct ur_usm_alloc_location_desc_t {
ur_structure_type_t stype; ///< [in] type of this structure, must be
///< ::UR_STRUCTURE_TYPE_USM_ALLOC_LOCATION_DESC
const void *pNext; ///< [in][optional] pointer to extension-specific structure
uint32_t location; ///< [in] Identifies the ID of global memory partition to which the memory
///< should be allocated.

} ur_usm_alloc_location_desc_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief USM pool descriptor type
typedef struct ur_usm_pool_desc_t {
Expand Down Expand Up @@ -3257,6 +3277,7 @@ typedef struct ur_usm_pool_limits_desc_t {
/// - Any flags/hints passed through pUSMDesc only affect the single
/// allocation.
/// - See also ::ur_usm_host_desc_t.
/// - See also ::ur_usm_alloc_location_desc_t.
///
/// @returns
/// - ::UR_RESULT_SUCCESS
Expand Down Expand Up @@ -3300,6 +3321,7 @@ urUSMHostAlloc(
/// - Any flags/hints passed through pUSMDesc only affect the single
/// allocation.
/// - See also ::ur_usm_device_desc_t.
/// - See also ::ur_usm_alloc_location_desc_t.
///
/// @returns
/// - ::UR_RESULT_SUCCESS
Expand Down Expand Up @@ -3346,6 +3368,7 @@ urUSMDeviceAlloc(
/// allocation.
/// - See also ::ur_usm_host_desc_t.
/// - See also ::ur_usm_device_desc_t.
/// - See also ::ur_usm_alloc_location_desc_t.
///
/// @returns
/// - ::UR_RESULT_SUCCESS
Expand Down
3 changes: 3 additions & 0 deletions scripts/core/registry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -645,3 +645,6 @@ etors:
- name: KERNEL_ARG_LOCAL_PROPERTIES
desc: $x_kernel_arg_local_properties_t
value: '33'
- name: USM_ALLOC_LOCATION_DESC
desc: $x_usm_alloc_location_desc_t
value: '35'
20 changes: 20 additions & 0 deletions scripts/core/usm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,23 @@ members:
desc: "[in] device memory allocation flags."
--- #--------------------------------------------------------------------------
type: struct
desc: "USM allocation location desc"
details:
- Specify these properties in $xUSMHostAlloc, $xUSMDeviceAlloc and
$xUSMSharedAlloc via $x_usm_desc_t as part of a `pNext` chain.
analogue:
- "cl_intel_mem_alloc_buffer_location"
class: $xUSM
name: $x_usm_alloc_location_desc_t
base: $x_base_desc_t
members:
- type: uint32_t
name: location
desc: >
[in] Identifies the ID of global memory partition to which the memory
should be allocated.
--- #--------------------------------------------------------------------------
type: struct
desc: "USM pool descriptor type"
class: $xUSM
name: $x_usm_pool_desc_t
Expand Down Expand Up @@ -212,6 +229,7 @@ details:
- "Allocations served from different memory pools must be isolated and must not reside on the same page."
- "Any flags/hints passed through pUSMDesc only affect the single allocation."
- "See also $x_usm_host_desc_t."
- "See also $x_usm_alloc_location_desc_t."
params:
- type: $x_context_handle_t
name: hContext
Expand Down Expand Up @@ -253,6 +271,7 @@ details:
- "Allocations served from different memory pools must be isolated and must not reside on the same page."
- "Any flags/hints passed through pUSMDesc only affect the single allocation."
- "See also $x_usm_device_desc_t."
- "See also $x_usm_alloc_location_desc_t."
params:
- type: $x_context_handle_t
name: hContext
Expand Down Expand Up @@ -298,6 +317,7 @@ details:
- "Any flags/hints passed through pUSMDesc only affect the single allocation."
- "See also $x_usm_host_desc_t."
- "See also $x_usm_device_desc_t."
- "See also $x_usm_alloc_location_desc_t."
params:
- type: $x_context_handle_t
name: hContext
Expand Down
12 changes: 10 additions & 2 deletions scripts/templates/ldrddi.cpp.mako
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ namespace ur_loader
break;
}
adapterIndex++;
if (adapterIndex == NumEntries) {
break;
}
}
}

Expand Down Expand Up @@ -142,14 +145,17 @@ namespace ur_loader
%else:
<%param_replacements={}%>
%for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)):
%if 0 == i:
%if not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle':
// extract platform's function pointer table
auto dditable = reinterpret_cast<${item['obj']}*>( ${item['pointer']}${item['name']} )->dditable;
auto ${th.make_pfn_name(n, tags, obj)} = dditable->${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)};
if( nullptr == ${th.make_pfn_name(n, tags, obj)} )
return ${X}_RESULT_ERROR_UNINITIALIZED;

<%break%>
%endif
%endfor
%for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)):
%if 'range' in item:
<%
add_local = True
Expand All @@ -158,13 +164,15 @@ namespace ur_loader
for( size_t i = ${item['range'][0]}; i < ${item['range'][1]}; ++i )
${item['name']}Local[ i ] = reinterpret_cast<${item['obj']}*>( ${item['name']}[ i ] )->handle;
%else:
%if not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle':
// convert loader handle to platform handle
%if item['optional']:
${item['name']} = ( ${item['name']} ) ? reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle : nullptr;
%else:
${item['name']} = reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle;
%endif
%endif
%endif

%endfor
// forward to device-platform
Expand All @@ -185,7 +193,7 @@ namespace ur_loader
%if item['release']:
// release loader handle
${item['factory']}.release( ${item['name']} );
%else:
%elif not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle':
try
{
%if 'range' in item:
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
ur_context_handle_t hContext, ur_device_handle_t hDevice)
: Context(hContext),
Device(hDevice), CudaGraph{nullptr}, CudaGraphExec{nullptr}, RefCount{1} {
: Context(hContext), Device(hDevice), CudaGraph{nullptr},
CudaGraphExec{nullptr}, RefCount{1}, NextSyncPoint{0} {
urContextRetain(hContext);
urDeviceRetain(hDevice);
}
Expand Down
6 changes: 3 additions & 3 deletions source/adapters/cuda/command_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ struct ur_exp_command_buffer_handle_t_ {

void RegisterSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint,
std::shared_ptr<CUgraphNode> CuNode) {
SyncPoints[SyncPoint] = CuNode;
SyncPoints[SyncPoint] = std::move(CuNode);
NextSyncPoint++;
}

Expand All @@ -193,12 +193,12 @@ struct ur_exp_command_buffer_handle_t_ {
}

// Helper to register next sync point
// @param CuNode Node to register as next sycn point
// @param CuNode Node to register as next sync point
// @return Pointer to the sync that registers the Node
ur_exp_command_buffer_sync_point_t
AddSyncPoint(std::shared_ptr<CUgraphNode> CuNode) {
ur_exp_command_buffer_sync_point_t SyncPoint = NextSyncPoint;
RegisterSyncPoint(SyncPoint, CuNode);
RegisterSyncPoint(SyncPoint, std::move(CuNode));
return SyncPoint;
}

Expand Down
15 changes: 8 additions & 7 deletions source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1143,17 +1143,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
if (Result != UR_RESULT_SUCCESS)
return Result;

ur_platform_handle_t *Plat = static_cast<ur_platform_handle_t *>(
malloc(NumPlatforms * sizeof(ur_platform_handle_t)));
Result = urPlatformGet(&AdapterHandle, 1, NumPlatforms, Plat, nullptr);
std::vector<ur_platform_handle_t> Platforms(NumPlatforms);

Result =
urPlatformGet(&AdapterHandle, 1, NumPlatforms, Platforms.data(), nullptr);
if (Result != UR_RESULT_SUCCESS)
return Result;

// Iterate through platforms to find device that matches nativeHandle
for (uint32_t j = 0; j < NumPlatforms; ++j) {
auto SearchRes =
std::find_if(begin(Plat[j]->Devices), end(Plat[j]->Devices), IsDevice);
if (SearchRes != end(Plat[j]->Devices)) {
for (const auto Platform : Platforms) {
auto SearchRes = std::find_if(std::begin(Platform->Devices),
std::end(Platform->Devices), IsDevice);
if (SearchRes != end(Platform->Devices)) {
*phDevice = static_cast<ur_device_handle_t>((*SearchRes).get());
return UR_RESULT_SUCCESS;
}
Expand Down
29 changes: 9 additions & 20 deletions source/adapters/cuda/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
//===----------------------------------------------------------------------===//

#include "event.hpp"
#include "common.hpp"
#include "context.hpp"
#include "device.hpp"
#include "queue.hpp"
Expand All @@ -19,35 +18,25 @@

ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type,
ur_context_handle_t Context,
ur_queue_handle_t Queue, CUstream Stream,
ur_queue_handle_t Queue,
native_type EvEnd, native_type EvQueued,
native_type EvStart, CUstream Stream,
uint32_t StreamToken)
: CommandType{Type}, RefCount{1}, HasOwnership{true},
HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false},
StreamToken{StreamToken}, EvEnd{nullptr}, EvStart{nullptr},
EvQueued{nullptr}, Queue{Queue}, Stream{Stream}, Context{Context} {

bool ProfilingEnabled = Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE;

UR_CHECK_ERROR(cuEventCreate(
&EvEnd, ProfilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING));

if (ProfilingEnabled) {
UR_CHECK_ERROR(cuEventCreate(&EvQueued, CU_EVENT_DEFAULT));
UR_CHECK_ERROR(cuEventCreate(&EvStart, CU_EVENT_DEFAULT));
}

if (Queue != nullptr) {
urQueueRetain(Queue);
}
StreamToken{StreamToken}, EventID{0}, EvEnd{EvEnd}, EvStart{EvStart},
EvQueued{EvQueued}, Queue{Queue}, Stream{Stream}, Context{Context} {
urQueueRetain(Queue);
urContextRetain(Context);
}

ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context,
CUevent EventNative)
: CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false},
HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false},
StreamToken{std::numeric_limits<uint32_t>::max()}, EvEnd{EventNative},
EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, Context{Context} {
StreamToken{std::numeric_limits<uint32_t>::max()}, EventID{0},
EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr},
Stream{nullptr}, Context{Context} {
urContextRetain(Context);
}

Expand Down
18 changes: 15 additions & 3 deletions source/adapters/cuda/event.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <cuda.h>
#include <ur/ur.hpp>

#include "common.hpp"
#include "queue.hpp"

/// UR Event mapping to CUevent
Expand Down Expand Up @@ -82,8 +83,18 @@ struct ur_event_handle_t_ {
static ur_event_handle_t
makeNative(ur_command_t Type, ur_queue_handle_t Queue, CUstream Stream,
uint32_t StreamToken = std::numeric_limits<uint32_t>::max()) {
return new ur_event_handle_t_(Type, Queue->getContext(), Queue, Stream,
StreamToken);
const bool ProfilingEnabled =
Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE;
native_type EvEnd = nullptr, EvQueued = nullptr, EvStart = nullptr;
UR_CHECK_ERROR(cuEventCreate(
&EvEnd, ProfilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING));

if (ProfilingEnabled) {
UR_CHECK_ERROR(cuEventCreate(&EvQueued, CU_EVENT_DEFAULT));
UR_CHECK_ERROR(cuEventCreate(&EvStart, CU_EVENT_DEFAULT));
}
return new ur_event_handle_t_(Type, Queue->getContext(), Queue, EvEnd,
EvQueued, EvStart, Stream, StreamToken);
}

static ur_event_handle_t makeWithNative(ur_context_handle_t context,
Expand All @@ -99,7 +110,8 @@ struct ur_event_handle_t_ {
// This constructor is private to force programmers to use the makeNative /
// make_user static members in order to create a pi_event for CUDA.
ur_event_handle_t_(ur_command_t Type, ur_context_handle_t Context,
ur_queue_handle_t Queue, CUstream Stream,
ur_queue_handle_t Queue, native_type EvEnd,
native_type EvQueued, native_type EvStart, CUstream Stream,
uint32_t StreamToken);

// This constructor is private to force programmers to use the
Expand Down
2 changes: 1 addition & 1 deletion source/adapters/cuda/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ cudaToUrImageChannelFormat(CUarray_format cuda_format,

ur_result_t urTextureCreate(ur_sampler_handle_t hSampler,
const ur_image_desc_t *pImageDesc,
CUDA_RESOURCE_DESC ResourceDesc,
const CUDA_RESOURCE_DESC &ResourceDesc,
ur_exp_image_handle_t *phRetImage) {

try {
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,15 @@ struct ur_mem_handle_t_ {
/// Constructs the UR allocation for an unsampled image object
ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array,
CUsurfObject Surf, ur_mem_type_t ImageType)
: Context{Context}, RefCount{1}, MemType{Type::Surface},
: Context{Context}, RefCount{1}, MemType{Type::Surface}, MemFlags{0},
Mem{ImageMem{Array, (void *)Surf, ImageType, nullptr}} {
urContextRetain(Context);
}

/// Constructs the UR allocation for a sampled image object
ur_mem_handle_t_(ur_context_handle_t Context, CUarray Array, CUtexObject Tex,
ur_sampler_handle_t Sampler, ur_mem_type_t ImageType)
: Context{Context}, RefCount{1}, MemType{Type::Texture},
: Context{Context}, RefCount{1}, MemType{Type::Texture}, MemFlags{0},
Mem{ImageMem{Array, (void *)Tex, ImageType, Sampler}} {
urContextRetain(Context);
}
Expand Down
3 changes: 2 additions & 1 deletion source/adapters/cuda/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) {

if (!this->BuildOptions.empty()) {
unsigned int MaxRegs;
bool Valid = getMaxRegistersJitOptionValue(BuildOptions, MaxRegs);
const bool Valid =
getMaxRegistersJitOptionValue(this->BuildOptions, MaxRegs);
if (Valid) {
Options.push_back(CU_JIT_MAX_REGISTERS);
OptionVals.push_back(reinterpret_cast<void *>(MaxRegs));
Expand Down
Loading