From 84ddd10df9001062cf7b15415feeaa8460929a55 Mon Sep 17 00:00:00 2001 From: rbegam Date: Tue, 11 Feb 2020 16:46:13 -0800 Subject: [PATCH 01/19] [SYCL] Replaces some of the CL_* enums with PI_* enums. Signed-off-by: rbegam --- sycl/include/CL/sycl/detail/pi.h | 312 +++++++++++------- sycl/include/CL/sycl/queue.hpp | 2 +- .../detail/error_handling/enqueue_kernel.cpp | 9 +- sycl/source/detail/event_impl.cpp | 2 +- sycl/source/detail/event_info.hpp | 2 +- sycl/source/detail/program_impl.cpp | 8 +- .../program_manager/program_manager.cpp | 8 +- sycl/source/detail/queue_impl.cpp | 2 +- sycl/source/detail/queue_impl.hpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 6 +- sycl/source/queue.cpp | 2 +- sycl/test/usm/memadvise.cpp | 4 +- 12 files changed, 225 insertions(+), 134 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 686fdc49f753a..e6495688ae605 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -83,16 +83,6 @@ typedef enum { PI_EVENT_QUEUED = CL_QUEUED } _pi_event_status; -typedef enum { - PI_COMMAND_KERNEL_LAUNCH = CL_COMMAND_NDRANGE_KERNEL, - PI_COMMAND_MEMBUFFER_WRITE = CL_COMMAND_WRITE_BUFFER, - PI_COMMAND_MEMBUFFER_READ = CL_COMMAND_READ_BUFFER, - PI_COMMAND_USER = CL_COMMAND_USER, - PI_COMMAND_EVENTS_WAIT = CL_COMMAND_MARKER, - PI_COMMAND_MEMBUFFER_COPY = CL_COMMAND_COPY_BUFFER, - PI_COMMAND_MEMBUFFER_FILL = CL_COMMAND_FILL_BUFFER -} _pi_command_type; - typedef enum { PI_PLATFORM_INFO_EXTENSIONS = CL_PLATFORM_EXTENSIONS, PI_PLATFORM_INFO_NAME = CL_PLATFORM_NAME, @@ -101,17 +91,6 @@ typedef enum { PI_PLATFORM_INFO_VERSION = CL_PLATFORM_VERSION, } _pi_platform_info; -typedef enum { - PI_PROGRAM_INFO_REFERENCE_COUNT = CL_PROGRAM_REFERENCE_COUNT, - PI_PROGRAM_INFO_CONTEXT = CL_PROGRAM_CONTEXT, - PI_PROGRAM_INFO_NUM_DEVICES = CL_PROGRAM_NUM_DEVICES, - PI_PROGRAM_INFO_DEVICES = CL_PROGRAM_DEVICES, - PI_PROGRAM_INFO_SOURCE = CL_PROGRAM_SOURCE, - PI_PROGRAM_INFO_BINARY_SIZES = CL_PROGRAM_BINARY_SIZES, - PI_PROGRAM_INFO_BINARIES = CL_PROGRAM_BINARIES, - PI_PROGRAM_INFO_KERNEL_NAMES = CL_PROGRAM_KERNEL_NAMES -} _pi_program_info; - typedef enum { PI_PROGRAM_BUILD_INFO_STATUS = CL_PROGRAM_BUILD_STATUS, PI_PROGRAM_BUILD_INFO_OPTIONS = CL_PROGRAM_BUILD_OPTIONS, @@ -134,13 +113,61 @@ typedef enum : pi_uint64 { PI_DEVICE_TYPE_ACC = CL_DEVICE_TYPE_ACCELERATOR } _pi_device_type; +typedef enum { + PI_DEVICE_MEM_CACHE_TYPE_NONE = CL_NONE, + PI_DEVICE_MEM_CACHE_TYPE_READ_ONLY_CACHE = CL_READ_ONLY_CACHE, + PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE = CL_READ_WRITE_CACHE +} _pi_device_mem_cache_type; + +typedef enum { + PI_DEVICE_LOCAL_MEM_TYPE_LOCAL = CL_LOCAL, + PI_DEVICE_LOCAL_MEM_TYPE_GLOBAL = CL_GLOBAL +} _pi_device_local_mem_type; + typedef enum { PI_DEVICE_INFO_TYPE = CL_DEVICE_TYPE, + PI_DEVICE_INFO_PARENT_DEVICE = CL_DEVICE_PARENT_DEVICE, + PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, PI_DEVICE_INFO_VENDOR_ID = CL_DEVICE_VENDOR_ID, + PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR, + PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION, + PI_DEVICE_INFO_PROFILE = CL_DEVICE_PROFILE, + PI_DEVICE_INFO_VERSION = CL_DEVICE_VERSION, + PI_DEVICE_INFO_OPENCL_C_VERSION = CL_DEVICE_OPENCL_C_VERSION, + PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, + PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = CL_DEVICE_PRINTF_BUFFER_SIZE, + PI_DEVICE_INFO_PARTITION_PROPERTIES = CL_DEVICE_PARTITION_PROPERTIES, + PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = CL_DEVICE_PARTITION_MAX_SUB_DEVICES, + PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_AFFINITY_DOMAIN, + PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, + PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, + PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS, + PI_DEVICE_INFO_COMPILER_AVAILABLE = CL_DEVICE_COMPILER_AVAILABLE, + PI_DEVICE_INFO_LINKER_AVAILABLE = CL_DEVICE_LINKER_AVAILABLE, PI_DEVICE_INFO_MAX_COMPUTE_UNITS = CL_DEVICE_MAX_COMPUTE_UNITS, + PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, - PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES = CL_DEVICE_MAX_WORK_ITEM_SIZES, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE, + PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES = CL_DEVICE_MAX_WORK_ITEM_SIZES, + PI_DEVICE_INFO_SINGLE_FP_CONFIG = CL_DEVICE_SINGLE_FP_CONFIG, + PI_DEVICE_INFO_HALF_FP_CONFIG = CL_DEVICE_HALF_FP_CONFIG, + PI_DEVICE_INFO_DOUBLE_FP_CONFIG = CL_DEVICE_DOUBLE_FP_CONFIG, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_SIZE = CL_DEVICE_GLOBAL_MEM_SIZE, + PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + PI_DEVICE_INFO_MAX_CONSTANT_ARGS = CL_DEVICE_MAX_CONSTANT_ARGS, + PI_DEVICE_INFO_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_TYPE, + PI_DEVICE_INFO_LOCAL_MEM_SIZE = CL_DEVICE_LOCAL_MEM_SIZE, + PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = CL_DEVICE_ERROR_CORRECTION_SUPPORT, + PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = CL_DEVICE_PROFILING_TIMER_RESOLUTION, + PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = CL_DEVICE_HOST_UNIFIED_MEMORY, + PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, + PI_DEVICE_INFO_ENDIAN_LITTLE = CL_DEVICE_ENDIAN_LITTLE, + PI_DEVICE_INFO_AVAILABLE = CL_DEVICE_AVAILABLE, + PI_DEVICE_INFO_EXECUTION_CAPABILITIES = CL_DEVICE_EXECUTION_CAPABILITIES, + PI_DEVICE_INFO_QUEUE_PROPERTIES = CL_DEVICE_QUEUE_PROPERTIES, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, @@ -148,102 +175,68 @@ typedef enum { PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY = CL_DEVICE_MAX_CLOCK_FREQUENCY, PI_DEVICE_INFO_ADDRESS_BITS = CL_DEVICE_ADDRESS_BITS, - PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = CL_DEVICE_MAX_MEM_ALLOC_SIZE, - PI_DEVICE_INFO_IMAGE_SUPPORT = CL_DEVICE_IMAGE_SUPPORT, PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS = CL_DEVICE_MAX_READ_IMAGE_ARGS, PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS = CL_DEVICE_MAX_WRITE_IMAGE_ARGS, - PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT = CL_DEVICE_IMAGE2D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH = CL_DEVICE_IMAGE2D_MAX_WIDTH, - PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT = CL_DEVICE_IMAGE3D_MAX_HEIGHT, + PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT = CL_DEVICE_IMAGE2D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH = CL_DEVICE_IMAGE3D_MAX_WIDTH, + PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT = CL_DEVICE_IMAGE3D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH = CL_DEVICE_IMAGE3D_MAX_DEPTH, PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, - PI_DEVICE_INFO_MAX_SAMPLERS = CL_DEVICE_MAX_SAMPLERS, + PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = CL_DEVICE_MAX_MEM_ALLOC_SIZE, + PI_DEVICE_INFO_IMAGE_SUPPORT = CL_DEVICE_IMAGE_SUPPORT, PI_DEVICE_INFO_MAX_PARAMETER_SIZE = CL_DEVICE_MAX_PARAMETER_SIZE, + PI_DEVICE_INFO_MAX_SAMPLERS = CL_DEVICE_MAX_SAMPLERS, PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = CL_DEVICE_MEM_BASE_ADDR_ALIGN, - PI_DEVICE_INFO_HALF_FP_CONFIG = CL_DEVICE_HALF_FP_CONFIG, - PI_DEVICE_INFO_SINGLE_FP_CONFIG = CL_DEVICE_SINGLE_FP_CONFIG, - PI_DEVICE_INFO_DOUBLE_FP_CONFIG = CL_DEVICE_DOUBLE_FP_CONFIG, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, - PI_DEVICE_INFO_GLOBAL_MEM_SIZE = CL_DEVICE_GLOBAL_MEM_SIZE, - PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, - PI_DEVICE_INFO_MAX_CONSTANT_ARGS = CL_DEVICE_MAX_CONSTANT_ARGS, - PI_DEVICE_INFO_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_TYPE, - PI_DEVICE_INFO_LOCAL_MEM_SIZE = CL_DEVICE_LOCAL_MEM_SIZE, - PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = CL_DEVICE_ERROR_CORRECTION_SUPPORT, - PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = CL_DEVICE_HOST_UNIFIED_MEMORY, - PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = CL_DEVICE_PROFILING_TIMER_RESOLUTION, - PI_DEVICE_INFO_IS_ENDIAN_LITTLE = CL_DEVICE_ENDIAN_LITTLE, - PI_DEVICE_INFO_IS_AVAILABLE = CL_DEVICE_AVAILABLE, - PI_DEVICE_INFO_IS_COMPILER_AVAILABLE = CL_DEVICE_COMPILER_AVAILABLE, - PI_DEVICE_INFO_IS_LINKER_AVAILABLE = CL_DEVICE_LINKER_AVAILABLE, - PI_DEVICE_INFO_EXECUTION_CAPABILITIES = CL_DEVICE_EXECUTION_CAPABILITIES, - PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, - PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, - PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, - PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, - PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, - PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, - PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR, - PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION, - PI_DEVICE_INFO_PROFILE = CL_DEVICE_PROFILE, - PI_DEVICE_INFO_VERSION = CL_DEVICE_VERSION, - PI_DEVICE_INFO_OPENCL_C_VERSION = CL_DEVICE_OPENCL_C_VERSION, - PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS, - PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = CL_DEVICE_PRINTF_BUFFER_SIZE, - PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, - PI_DEVICE_INFO_PARENT_DEVICE = CL_DEVICE_PARENT_DEVICE, - PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = CL_DEVICE_PARTITION_MAX_SUB_DEVICES, - PI_DEVICE_INFO_PARTITION_PROPERTIES = CL_DEVICE_PARTITION_PROPERTIES, - PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_AFFINITY_DOMAIN, - PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, + PI_DEVICE_INFO_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT = CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT = CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT = CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL } _pi_device_info; typedef enum { - PI_LOCAL_MEM_TYPE_LOCAL = CL_LOCAL, - PI_LOCAL_MEM_TYPE_GLOBAL = CL_GLOBAL -} _pi_local_mem_type; + PI_PROGRAM_INFO_REFERENCE_COUNT = CL_PROGRAM_REFERENCE_COUNT, + PI_PROGRAM_INFO_CONTEXT = CL_PROGRAM_CONTEXT, + PI_PROGRAM_INFO_NUM_DEVICES = CL_PROGRAM_NUM_DEVICES, + PI_PROGRAM_INFO_DEVICES = CL_PROGRAM_DEVICES, + PI_PROGRAM_INFO_SOURCE = CL_PROGRAM_SOURCE, + PI_PROGRAM_INFO_BINARY_SIZES = CL_PROGRAM_BINARY_SIZES, + PI_PROGRAM_INFO_BINARIES = CL_PROGRAM_BINARIES, + PI_PROGRAM_INFO_KERNEL_NAMES = CL_PROGRAM_KERNEL_NAMES +} _pi_program_info; -// TODO: populate typedef enum { PI_CONTEXT_INFO_DEVICES = CL_CONTEXT_DEVICES, - PI_CONTEXT_INFO_NUM_DEVICES = CL_CONTEXT_NUM_DEVICES, - PI_CONTEXT_INFO_REFERENCE_COUNT = CL_CONTEXT_REFERENCE_COUNT + PI_CONTEXT_INFO_NUM_DEVICES = CL_CONTEXT_NUM_DEVICES, + PI_CONTEXT_INFO_PROPERTIES = CL_CONTEXT_PROPERTIES, + PI_CONTEXT_INFO_REFERENCE_COUNT = CL_CONTEXT_REFERENCE_COUNT, } _pi_context_info; -// TODO: populate typedef enum { + PI_QUEUE_INFO_CONTEXT = CL_QUEUE_CONTEXT, PI_QUEUE_INFO_DEVICE = CL_QUEUE_DEVICE, - PI_QUEUE_INFO_REFERENCE_COUNT = CL_QUEUE_REFERENCE_COUNT, + PI_QUEUE_INFO_DEVICE_DEFAULT = CL_QUEUE_DEVICE_DEFAULT, PI_QUEUE_INFO_PROPERTIES = CL_QUEUE_PROPERTIES, - PI_QUEUE_INFO_CONTEXT = CL_QUEUE_CONTEXT + PI_QUEUE_INFO_REFERENCE_COUNT = CL_QUEUE_REFERENCE_COUNT, + PI_QUEUE_INFO_SIZE = CL_QUEUE_SIZE } _pi_queue_info; -typedef enum { - PI_KERNEL_INFO_FUNCTION_NAME = CL_KERNEL_FUNCTION_NAME, - PI_KERNEL_INFO_NUM_ARGS = CL_KERNEL_NUM_ARGS, - PI_KERNEL_INFO_REFERENCE_COUNT = CL_KERNEL_REFERENCE_COUNT, - PI_KERNEL_INFO_CONTEXT = CL_KERNEL_CONTEXT, - PI_KERNEL_INFO_PROGRAM = CL_KERNEL_PROGRAM -} _pi_kernel_info; - -typedef enum { +/*typedef enum { PI_KERNEL_GROUP_INFO_SIZE = CL_KERNEL_WORK_GROUP_SIZE, PI_KERNEL_COMPILE_GROUP_INFO_SIZE = CL_KERNEL_COMPILE_WORK_GROUP_SIZE, PI_KERNEL_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE -} _pi_kernel_group_info; +} _pi_kernel_group_info;*/ typedef enum { PI_IMAGE_INFO_FORMAT = CL_IMAGE_FORMAT, @@ -255,6 +248,74 @@ typedef enum { PI_IMAGE_INFO_DEPTH = CL_IMAGE_DEPTH } _pi_image_info; +typedef enum { + PI_KERNEL_INFO_FUNCTION_NAME = CL_KERNEL_FUNCTION_NAME, + PI_KERNEL_INFO_NUM_ARGS = CL_KERNEL_NUM_ARGS, + PI_KERNEL_INFO_REFERENCE_COUNT = CL_KERNEL_REFERENCE_COUNT, + PI_KERNEL_INFO_CONTEXT = CL_KERNEL_CONTEXT, + PI_KERNEL_INFO_PROGRAM = CL_KERNEL_PROGRAM, + PI_KERNEL_INFO_ATTRIBUTES = CL_KERNEL_ATTRIBUTES +} _pi_kernel_info; + +typedef enum { + PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE = CL_KERNEL_GLOBAL_WORK_SIZE, + PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE = CL_KERNEL_WORK_GROUP_SIZE, + PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE = CL_KERNEL_COMPILE_WORK_GROUP_SIZE, + PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE, + PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, + PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE +} _pi_kernel_group_info; + +typedef enum { + PI_EVENT_INFO_COMMAND_QUEUE = CL_EVENT_COMMAND_QUEUE, + PI_EVENT_INFO_CONTEXT = CL_EVENT_CONTEXT, + PI_EVENT_INFO_COMMAND_TYPE = CL_EVENT_COMMAND_TYPE, + PI_EVENT_INFO_COMMAND_EXECUTION_STATUS = CL_EVENT_COMMAND_EXECUTION_STATUS, + PI_EVENT_INFO_REFERENCE_COUNT = CL_EVENT_REFERENCE_COUNT +} _pi_event_info; + +typedef enum { + PI_COMMAND_TYPE_NDRANGE_KERNEL = CL_COMMAND_NDRANGE_KERNEL, + PI_COMMAND_TYPE_MEM_BUFFER_READ = CL_COMMAND_READ_BUFFER, + PI_COMMAND_TYPE_MEM_BUFFER_WRITE = CL_COMMAND_WRITE_BUFFER, + PI_COMMAND_TYPE_MEM_BUFFER_COPY = CL_COMMAND_COPY_BUFFER, + PI_COMMAND_TYPE_MEM_BUFFER_MAP = CL_COMMAND_MAP_BUFFER, + PI_COMMAND_TYPE_MEM_BUFFER_UNMAP = CL_COMMAND_UNMAP_MEM_OBJECT, + PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT = CL_COMMAND_READ_BUFFER_RECT, + PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT = CL_COMMAND_WRITE_BUFFER_RECT, + PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT = CL_COMMAND_COPY_BUFFER_RECT, + PI_COMMAND_TYPE_USER = CL_COMMAND_USER, + PI_COMMAND_TYPE_MEM_BUFFER_FILL = CL_COMMAND_FILL_BUFFER, + PI_COMMAND_TYPE_IMAGE_READ = CL_COMMAND_READ_IMAGE, + PI_COMMAND_TYPE_IMAGE_WRITE = CL_COMMAND_WRITE_IMAGE, + PI_COMMAND_TYPE_IMAGE_COPY = CL_COMMAND_COPY_IMAGE, + PI_COMMAND_TYPE_NATIVE_KERNEL = CL_COMMAND_NATIVE_KERNEL, + PI_COMMAND_TYPE_COPY_BUFFER_TO_IMAGE = CL_COMMAND_COPY_BUFFER_TO_IMAGE, + PI_COMMAND_TYPE_COPY_IMAGE_TO_BUFFER = CL_COMMAND_COPY_IMAGE_TO_BUFFER, + PI_COMMAND_TYPE_MAP_IMAGE = CL_COMMAND_MAP_IMAGE, + PI_COMMAND_TYPE_MARKER = CL_COMMAND_MARKER, + PI_COMMAND_TYPE_ACQUIRE_GL_OBJECTS = CL_COMMAND_ACQUIRE_GL_OBJECTS, + PI_COMMAND_TYPE_RELEASE_GL_OBJECTS = CL_COMMAND_RELEASE_GL_OBJECTS, + PI_COMMAND_TYPE_BARRIER = CL_COMMAND_BARRIER, + PI_COMMAND_TYPE_MIGRATE_MEM_OBJECTS = CL_COMMAND_MIGRATE_MEM_OBJECTS, + PI_COMMAND_TYPE_FILL_IMAGE = CL_COMMAND_FILL_IMAGE, + PI_COMMAND_TYPE_SVM_FREE = CL_COMMAND_SVM_FREE, + PI_COMMAND_TYPE_SVM_MEMCPY = CL_COMMAND_SVM_MEMCPY, + PI_COMMAND_TYPE_SVM_MEMFILL = CL_COMMAND_SVM_MEMFILL, + PI_COMMAND_TYPE_SVM_MAP = CL_COMMAND_SVM_MAP, + PI_COMMAND_TYPE_SVM_UNMAP = CL_COMMAND_SVM_UNMAP +} _pi_command_type; + +/*typedef enum { + PI_COMMAND_KERNEL_LAUNCH = CL_COMMAND_NDRANGE_KERNEL, + PI_COMMAND_MEMBUFFER_WRITE = CL_COMMAND_WRITE_BUFFER, + PI_COMMAND_MEMBUFFER_READ = CL_COMMAND_READ_BUFFER, + PI_COMMAND_USER = CL_COMMAND_USER, + PI_COMMAND_EVENTS_WAIT = CL_COMMAND_MARKER, + PI_COMMAND_MEMBUFFER_COPY = CL_COMMAND_COPY_BUFFER, + PI_COMMAND_MEMBUFFER_FILL = CL_COMMAND_FILL_BUFFER +} _pi_command_type;*/ + typedef enum { PI_MEM_TYPE_BUFFER = CL_MEM_OBJECT_BUFFER, PI_MEM_TYPE_IMAGE2D = CL_MEM_OBJECT_IMAGE2D, @@ -265,6 +326,19 @@ typedef enum { PI_MEM_TYPE_IMAGE1D_BUFFER = CL_MEM_OBJECT_IMAGE1D_BUFFER } _pi_mem_type; +typedef enum { + PI_MEM_ADVICE_SET_READ_MOSTLY = 0, // hints that memory will be read from frequently and written to rarely + PI_MEM_ADVICE_CLEAR_READ_MOSTLY, // removes the affect of PI_MEM_ADVICE_SET_READ_MOSTLY + PI_MEM_ADVICE_SET_PREFERRED_LOCATION, // hints that the preferred memory location is the specified device + PI_MEM_ADVICE_CLEAR_PREFERRED_LOCATION, // removes the affect of PI_MEM_ADVICE_SET_PREFERRED_LOCATION + PI_MEM_ADVICE_SET_ACCESSED_BY, // hints that memory will be accessed by the specified device + PI_MEM_ADVICE_CLEAR_ACCESSED_BY, // removes the affect of PI_MEM_ADVICE_SET_ACCESSED_BY + PI_MEM_ADVICE_SET_NON_ATOMIC_MOSTLY, // hints that memory will mostly be accessed non-atomically + PI_MEM_ADVICE_CLEAR_NON_ATOMIC_MOSTLY, // removes the affect of PI_MEM_ADVICE_SET_NON_ATOMIC_MOSTLY + PI_MEM_ADVICE_BIAS_CACHED, // hints that memory should be cached + PI_MEM_ADVICE_BIAS_UNCACHED // hints that memory should not be cached +} _pi_mem_advice; + typedef enum { PI_IMAGE_CHANNEL_ORDER_A = CL_A, PI_IMAGE_CHANNEL_ORDER_R = CL_R, @@ -304,7 +378,6 @@ typedef enum { PI_BUFFER_CREATE_TYPE_REGION = CL_BUFFER_CREATE_TYPE_REGION } _pi_buffer_create_type; -typedef pi_bitfield pi_sampler_properties; const pi_bool PI_TRUE = CL_TRUE; const pi_bool PI_FALSE = CL_FALSE; @@ -332,14 +405,14 @@ typedef enum { PI_SAMPLER_FILTER_MODE_LINEAR = CL_FILTER_LINEAR, } _pi_sampler_filter_mode; -typedef enum { - PI_EVENT_INFO_QUEUE = CL_EVENT_COMMAND_QUEUE, - PI_EVENT_INFO_COMMAND_TYPE = CL_EVENT_COMMAND_TYPE, - PI_EVENT_INFO_REFERENCE_COUNT = CL_EVENT_REFERENCE_COUNT, - PI_EVENT_INFO_COMMAND_EXECUTION_STATUS = CL_EVENT_COMMAND_EXECUTION_STATUS, - PI_EVENT_INFO_CONTEXT = CL_EVENT_CONTEXT -} _pi_event_info; +typedef pi_bitfield pi_device_exec_capabilities; +const pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_KERNEL = CL_EXEC_KERNEL; +const pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL = CL_EXEC_NATIVE_KERNEL; +typedef pi_bitfield pi_sampler_properties; +const pi_sampler_properties PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS = CL_SAMPLER_NORMALIZED_COORDS; +const pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE = CL_SAMPLER_ADDRESSING_MODE; +const pi_sampler_properties PI_SAMPLER_PROPERTIES_FILTER_MODE = CL_SAMPLER_FILTER_MODE; // NOTE: this is made 64-bit to match the size of cl_mem_flags to // make the translation to OpenCL transparent. @@ -365,11 +438,19 @@ const pi_queue_properties PI_QUEUE_ON_DEVICE_DEFAULT = typedef _pi_result pi_result; typedef _pi_platform_info pi_platform_info; typedef _pi_device_type pi_device_type; +typedef _pi_device_mem_cache_type pi_device_mem_cache_type; +typedef _pi_device_local_mem_type pi_device_local_mem_type; typedef _pi_device_info pi_device_info; +typedef _pi_program_info pi_program_info; typedef _pi_context_info pi_context_info; typedef _pi_queue_info pi_queue_info; typedef _pi_image_info pi_image_info; +typedef _pi_kernel_info pi_kernel_info; +typedef _pi_kernel_group_info pi_kernel_group_info; +typedef _pi_event_info pi_event_info; +typedef _pi_command_type pi_command_type; typedef _pi_mem_type pi_mem_type; +typedef _pi_mem_advice pi_mem_advice; typedef _pi_image_channel_order pi_image_channel_order; typedef _pi_image_channel_type pi_image_channel_type; typedef _pi_buffer_create_type pi_buffer_create_type; @@ -377,13 +458,23 @@ typedef _pi_sampler_addressing_mode pi_sampler_addressing_mode; typedef _pi_sampler_filter_mode pi_sampler_filter_mode; typedef _pi_sampler_info pi_sampler_info; typedef _pi_event_status pi_event_status; -typedef _pi_event_info pi_event_info; -typedef _pi_command_type pi_command_type; -typedef _pi_program_info pi_program_info; typedef _pi_program_build_info pi_program_build_info; typedef _pi_program_build_status pi_program_build_status; typedef _pi_kernel_info pi_kernel_info; -typedef _pi_kernel_group_info pi_kernel_group_info; + +// For compatibility with OpenCL define this not as enum. +typedef intptr_t pi_device_partition_property; +static const pi_device_partition_property + PI_DEVICE_PARTITION_EQUALLY = CL_DEVICE_PARTITION_EQUALLY; +static const pi_device_partition_property + PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + +// For compatibility with OpenCL define this not as enum. +typedef pi_bitfield pi_device_affinity_domain; +static const pi_device_affinity_domain + PI_DEVICE_AFFINITY_DOMAIN_NUMA = CL_DEVICE_AFFINITY_DOMAIN_NUMA; +static const pi_device_affinity_domain + PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; // Entry type, matches OpenMP for compatibility struct _pi_offload_entry_struct { @@ -598,7 +689,7 @@ pi_result piDeviceRelease(pi_device device); pi_result piDevicePartition( pi_device device, - const cl_device_partition_property * properties, // TODO: untie from OpenCL + const pi_device_partition_property * properties, pi_uint32 num_devices, pi_device * out_devices, pi_uint32 * out_num_devices); @@ -656,10 +747,10 @@ pi_result piContextRelease(pi_context context); // Queue // pi_result piQueueCreate( - pi_context context, - pi_device device, - pi_queue_properties properties, - pi_queue * queue); + pi_context context, + pi_device device, + pi_queue_properties properties, + pi_queue * queue); pi_result piQueueGetInfo( pi_queue command_queue, @@ -712,7 +803,6 @@ pi_result piMemRetain( pi_result piMemRelease( pi_mem mem); - pi_result piMemBufferPartition( pi_mem buffer, pi_mem_flags flags, @@ -883,7 +973,7 @@ pi_result piEventCreate( pi_result piEventGetInfo( pi_event event, - cl_event_info param_name, // TODO: untie from OpenCL + pi_event_info param_name, size_t param_value_size, void * param_value, size_t * param_value_size_ret); @@ -1301,7 +1391,7 @@ pi_result piextUSMEnqueueMemAdvise( pi_queue queue, const void * ptr, size_t length, - int advice, + pi_mem_advice advice, pi_event * event); /// API to query information about USM allocated pointers diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index bb811decac723..489216dfbf68c 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -239,7 +239,7 @@ class queue { /// @param Length is a number of bytes in the allocation. /// @param Advice is a device-defined advice for the specified allocation. /// @return an event representing advice operation. - event mem_advise(const void *Ptr, size_t Length, int Advice); + event mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice); /// Provides hints to the runtime library that data should be made available /// on a device earlier than Unified Shared Memory would normally require it diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 5d733ca7bbfe2..3a7ccf82dd9df 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -40,8 +40,8 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, size_t CompileWGSize[3] = {0}; Plugin.call( - Kernel, Device, PI_KERNEL_COMPILE_GROUP_INFO_SIZE, sizeof(size_t) * 3, - CompileWGSize, nullptr); + Kernel, Device, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, + sizeof(size_t) * 3, CompileWGSize, nullptr); if (CompileWGSize[0] != 0) { // OpenCL 1.x && 2.0: @@ -90,10 +90,11 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the // total number of work-items in the work-group computed as // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater - // than the value specified by PI_KERNEL_GROUP_INFO_SIZE in table 5.21. + // than the value specified by PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in + // table 5.21. size_t KernelWGSize = 0; Plugin.call( - Kernel, Device, PI_KERNEL_GROUP_INFO_SIZE, sizeof(size_t), + Kernel, Device, PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof(size_t), &KernelWGSize, nullptr); const size_t TotalNumberOfWIs = NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index a7b3b467b8988..c60f3bd0e457c 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -74,7 +74,7 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) RT::PiContext TempContext; getPlugin().call( - MEvent, CL_EVENT_CONTEXT, sizeof(RT::PiContext), &TempContext, nullptr); + MEvent, PI_EVENT_INFO_CONTEXT, sizeof(RT::PiContext), &TempContext, nullptr); if (MContext->getHandleRef() != TempContext) { throw cl::sycl::invalid_parameter_error( "The syclContext must match the OpenCL context associated with the " diff --git a/sycl/source/detail/event_info.hpp b/sycl/source/detail/event_info.hpp index e2ba912cd6d70..e3f487f063ce7 100644 --- a/sycl/source/detail/event_info.hpp +++ b/sycl/source/detail/event_info.hpp @@ -36,7 +36,7 @@ template struct get_event_info { static RetType get(RT::PiEvent Event, const plugin &Plugin) { RetType Result = (RetType)0; // TODO catch an exception and put it to list of asynchronous exceptions - Plugin.call(Event, cl_profiling_info(Param), + Plugin.call(Event, pi_event_info(Param), sizeof(Result), &Result, nullptr); return Result; } diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index 3125008515487..acf0fc32435b9 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -81,10 +81,10 @@ program_impl::program_impl(ContextImplPtr Context, RT::PiProgram Program) : MProgram(Program), MContext(Context), MLinkable(true) { // TODO handle the case when cl_program build is in progress - cl_uint NumDevices; + pi_uint32 NumDevices; const detail::plugin &Plugin = getPlugin(); Plugin.call( - Program, PI_PROGRAM_INFO_NUM_DEVICES, sizeof(cl_uint), &NumDevices, nullptr); + Program, PI_PROGRAM_INFO_NUM_DEVICES, sizeof(pi_uint32), &NumDevices, nullptr); vector_class PiDevices(NumDevices); Plugin.call(Program, PI_PROGRAM_INFO_DEVICES, sizeof(RT::PiDevice) * NumDevices, @@ -402,10 +402,10 @@ cl_uint program_impl::get_info() const { if (is_host()) { throw invalid_object_error("This instance of program is a host instance"); } - cl_uint Result; + pi_uint32 Result; const detail::plugin &Plugin = getPlugin(); Plugin.call(MProgram, PI_PROGRAM_INFO_REFERENCE_COUNT, - sizeof(cl_uint), &Result, nullptr); + sizeof(pi_uint32), &Result, nullptr); return Result; } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index e2959329ce441..74d08711d8643 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -70,7 +70,7 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, // FIXME: we don't yet support multiple devices with a single binary. const detail::plugin &Plugin = Context->getPlugin(); #ifndef _NDEBUG - cl_uint NumDevices = 0; + pi_uint32 NumDevices = 0; Plugin.call(Context->getHandleRef(), PI_CONTEXT_INFO_NUM_DEVICES, sizeof(NumDevices), &NumDevices, @@ -435,7 +435,7 @@ ProgramManager::getClProgramFromClKernel(RT::PiKernel Kernel, RT::PiProgram Program; const detail::plugin &Plugin = Context->getPlugin(); Plugin.call( - Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(cl_program), &Program, nullptr); + Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(RT::PiProgram), &Program, nullptr); return Program; } @@ -446,8 +446,8 @@ string_class ProgramManager::getProgramBuildLog(const RT::PiProgram &Program, Plugin.call(Program, PI_PROGRAM_INFO_DEVICES, 0, nullptr, &Size); vector_class PIDevices(Size / sizeof(RT::PiDevice)); - Plugin.call(Program, PI_PROGRAM_INFO_DEVICES, Size, - PIDevices.data(), nullptr); + Plugin.call(Program, PI_PROGRAM_INFO_DEVICES, + Size, PIDevices.data(), nullptr); string_class Log = "The program was built for " + std::to_string(PIDevices.size()) + " devices"; for (RT::PiDevice &Device : PIDevices) { diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 5b7690cf5fa36..b079577eb3e3d 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -64,7 +64,7 @@ event queue_impl::memcpy(shared_ptr_class Impl, void *Dest, return ResEvent; } -event queue_impl::mem_advise(const void *Ptr, size_t Length, int Advice) { +event queue_impl::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice) { context Context = get_context(); if (Context.is_host()) { return event(); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 2d1fd58e8489a..c8434f0176b1a 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -332,7 +332,7 @@ class queue_impl { /// @param Ptr is a USM pointer to the allocation. /// @param Length is a number of bytes in the allocation. /// @param Advice is a device-defined advice for the specified allocation. - event mem_advise(const void *Ptr, size_t Length, int Advice); + event mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice); /// Puts exception to the list of asynchronous ecxeptions. /// diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d9859929191f6..6dfdc29d2726e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -934,11 +934,11 @@ cl_int ExecCGCommand::enqueueImp() { pi_mem MemArg = (pi_mem)AllocaCmd->getMemAllocation(); Plugin.call(Kernel, Arg.MIndex, &MemArg); #else - cl_mem MemArg = (cl_mem)AllocaCmd->getMemAllocation(); + RT::PiMem MemArg = (RT::PiMem)AllocaCmd->getMemAllocation(); Plugin.call(Kernel, Arg.MIndex, - sizeof(cl_mem), &MemArg); + sizeof(RT::PiMem), &MemArg); Plugin.call(Kernel, Arg.MIndex, - sizeof(cl_mem), &MemArg); + sizeof(RT::PiMem), &MemArg); #endif break; } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index c47eba506958d..ea057815b8524 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -102,7 +102,7 @@ event queue::memcpy(void *dest, const void *src, size_t count) { return impl->memcpy(impl, dest, src, count); } -event queue::mem_advise(const void *ptr, size_t length, int advice) { +event queue::mem_advise(const void *ptr, size_t length, pi_mem_advice advice) { return impl->mem_advise(ptr, length, advice); } diff --git a/sycl/test/usm/memadvise.cpp b/sycl/test/usm/memadvise.cpp index a7e152b02d946..111519169c7ac 100644 --- a/sycl/test/usm/memadvise.cpp +++ b/sycl/test/usm/memadvise.cpp @@ -37,7 +37,7 @@ int main() { if (s_head == nullptr) { return -1; } - q.mem_advise(s_head, sizeof(Node), 42); + q.mem_advise(s_head, sizeof(Node), PI_MEM_ADVICE_SET_READ_MOSTLY); Node *s_cur = s_head; for (int i = 0; i < numNodes; i++) { @@ -48,7 +48,7 @@ int main() { if (s_cur->pNext == nullptr) { return -1; } - q.mem_advise(s_cur->pNext, sizeof(Node), 42); + q.mem_advise(s_cur->pNext, sizeof(Node), PI_MEM_ADVICE_SET_READ_MOSTLY); } else { s_cur->pNext = nullptr; } From 6193e298d6b55298b5b4a4bb20ce8ef3cbde22d9 Mon Sep 17 00:00:00 2001 From: rbegam Date: Fri, 28 Feb 2020 17:12:41 -0800 Subject: [PATCH 02/19] [SYCL] Rearranges enum pi_device_info. Signed-off-by: rbegam --- sycl/include/CL/sycl/detail/pi.h | 90 ++++++++++++++++---------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index e6495688ae605..b9bf34fd7d17a 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -126,47 +126,14 @@ typedef enum { typedef enum { PI_DEVICE_INFO_TYPE = CL_DEVICE_TYPE, - PI_DEVICE_INFO_PARENT_DEVICE = CL_DEVICE_PARENT_DEVICE, - PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, PI_DEVICE_INFO_VENDOR_ID = CL_DEVICE_VENDOR_ID, - PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR, - PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION, - PI_DEVICE_INFO_PROFILE = CL_DEVICE_PROFILE, - PI_DEVICE_INFO_VERSION = CL_DEVICE_VERSION, - PI_DEVICE_INFO_OPENCL_C_VERSION = CL_DEVICE_OPENCL_C_VERSION, - PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, - PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = CL_DEVICE_PRINTF_BUFFER_SIZE, - PI_DEVICE_INFO_PARTITION_PROPERTIES = CL_DEVICE_PARTITION_PROPERTIES, - PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = CL_DEVICE_PARTITION_MAX_SUB_DEVICES, - PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_AFFINITY_DOMAIN, - PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, - PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, - PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS, - PI_DEVICE_INFO_COMPILER_AVAILABLE = CL_DEVICE_COMPILER_AVAILABLE, - PI_DEVICE_INFO_LINKER_AVAILABLE = CL_DEVICE_LINKER_AVAILABLE, PI_DEVICE_INFO_MAX_COMPUTE_UNITS = CL_DEVICE_MAX_COMPUTE_UNITS, - PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, - PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES = CL_DEVICE_MAX_WORK_ITEM_SIZES, + PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE, PI_DEVICE_INFO_SINGLE_FP_CONFIG = CL_DEVICE_SINGLE_FP_CONFIG, PI_DEVICE_INFO_HALF_FP_CONFIG = CL_DEVICE_HALF_FP_CONFIG, PI_DEVICE_INFO_DOUBLE_FP_CONFIG = CL_DEVICE_DOUBLE_FP_CONFIG, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, - PI_DEVICE_INFO_GLOBAL_MEM_SIZE = CL_DEVICE_GLOBAL_MEM_SIZE, - PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, - PI_DEVICE_INFO_MAX_CONSTANT_ARGS = CL_DEVICE_MAX_CONSTANT_ARGS, - PI_DEVICE_INFO_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_TYPE, - PI_DEVICE_INFO_LOCAL_MEM_SIZE = CL_DEVICE_LOCAL_MEM_SIZE, - PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = CL_DEVICE_ERROR_CORRECTION_SUPPORT, - PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = CL_DEVICE_PROFILING_TIMER_RESOLUTION, - PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = CL_DEVICE_HOST_UNIFIED_MEMORY, - PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, - PI_DEVICE_INFO_ENDIAN_LITTLE = CL_DEVICE_ENDIAN_LITTLE, - PI_DEVICE_INFO_AVAILABLE = CL_DEVICE_AVAILABLE, - PI_DEVICE_INFO_EXECUTION_CAPABILITIES = CL_DEVICE_EXECUTION_CAPABILITIES, PI_DEVICE_INFO_QUEUE_PROPERTIES = CL_DEVICE_QUEUE_PROPERTIES, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, @@ -175,8 +142,17 @@ typedef enum { PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY = CL_DEVICE_MAX_CLOCK_FREQUENCY, PI_DEVICE_INFO_ADDRESS_BITS = CL_DEVICE_ADDRESS_BITS, + PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = CL_DEVICE_MAX_MEM_ALLOC_SIZE, + PI_DEVICE_INFO_IMAGE_SUPPORT = CL_DEVICE_IMAGE_SUPPORT, PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS = CL_DEVICE_MAX_READ_IMAGE_ARGS, PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS = CL_DEVICE_MAX_WRITE_IMAGE_ARGS, PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH = CL_DEVICE_IMAGE2D_MAX_WIDTH, @@ -186,18 +162,44 @@ typedef enum { PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH = CL_DEVICE_IMAGE3D_MAX_DEPTH, PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, - PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = CL_DEVICE_MAX_MEM_ALLOC_SIZE, - PI_DEVICE_INFO_IMAGE_SUPPORT = CL_DEVICE_IMAGE_SUPPORT, - PI_DEVICE_INFO_MAX_PARAMETER_SIZE = CL_DEVICE_MAX_PARAMETER_SIZE, PI_DEVICE_INFO_MAX_SAMPLERS = CL_DEVICE_MAX_SAMPLERS, + PI_DEVICE_INFO_MAX_PARAMETER_SIZE = CL_DEVICE_MAX_PARAMETER_SIZE, PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = CL_DEVICE_MEM_BASE_ADDR_ALIGN, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_SIZE = CL_DEVICE_GLOBAL_MEM_SIZE, + PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + PI_DEVICE_INFO_MAX_CONSTANT_ARGS = CL_DEVICE_MAX_CONSTANT_ARGS, + PI_DEVICE_INFO_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_TYPE, + PI_DEVICE_INFO_LOCAL_MEM_SIZE = CL_DEVICE_LOCAL_MEM_SIZE, + PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = CL_DEVICE_ERROR_CORRECTION_SUPPORT, + PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = CL_DEVICE_HOST_UNIFIED_MEMORY, + PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = CL_DEVICE_PROFILING_TIMER_RESOLUTION, + PI_DEVICE_INFO_ENDIAN_LITTLE = CL_DEVICE_ENDIAN_LITTLE, + PI_DEVICE_INFO_AVAILABLE = CL_DEVICE_AVAILABLE, + PI_DEVICE_INFO_COMPILER_AVAILABLE = CL_DEVICE_COMPILER_AVAILABLE, + PI_DEVICE_INFO_LINKER_AVAILABLE = CL_DEVICE_LINKER_AVAILABLE, + PI_DEVICE_INFO_EXECUTION_CAPABILITIES = CL_DEVICE_EXECUTION_CAPABILITIES, + PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, + PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, + PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, + PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, + PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, + PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, + PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR, + PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION, + PI_DEVICE_INFO_PROFILE = CL_DEVICE_PROFILE, + PI_DEVICE_INFO_VERSION = CL_DEVICE_VERSION, + PI_DEVICE_INFO_OPENCL_C_VERSION = CL_DEVICE_OPENCL_C_VERSION, + PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS, + PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = CL_DEVICE_PRINTF_BUFFER_SIZE, + PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, + PI_DEVICE_INFO_PARENT_DEVICE = CL_DEVICE_PARENT_DEVICE, + PI_DEVICE_INFO_PARTITION_PROPERTIES = CL_DEVICE_PARTITION_PROPERTIES, + PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = CL_DEVICE_PARTITION_MAX_SUB_DEVICES, + PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_AFFINITY_DOMAIN, + PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, PI_DEVICE_INFO_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, PI_DEVICE_INFO_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT = CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, From 071df8dec8e2b3454c080a578a3c2f6dfd5ec6ac Mon Sep 17 00:00:00 2001 From: rbegam Date: Fri, 28 Feb 2020 17:33:37 -0800 Subject: [PATCH 03/19] [SYCL] removes unneccessasy comments. Signed-off-by: rbegam --- sycl/include/CL/sycl/detail/pi.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index b9bf34fd7d17a..dffa5aab9611f 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -234,12 +234,6 @@ typedef enum { PI_QUEUE_INFO_SIZE = CL_QUEUE_SIZE } _pi_queue_info; -/*typedef enum { - PI_KERNEL_GROUP_INFO_SIZE = CL_KERNEL_WORK_GROUP_SIZE, - PI_KERNEL_COMPILE_GROUP_INFO_SIZE = CL_KERNEL_COMPILE_WORK_GROUP_SIZE, - PI_KERNEL_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE -} _pi_kernel_group_info;*/ - typedef enum { PI_IMAGE_INFO_FORMAT = CL_IMAGE_FORMAT, PI_IMAGE_INFO_ELEMENT_SIZE = CL_IMAGE_ELEMENT_SIZE, @@ -308,16 +302,6 @@ typedef enum { PI_COMMAND_TYPE_SVM_UNMAP = CL_COMMAND_SVM_UNMAP } _pi_command_type; -/*typedef enum { - PI_COMMAND_KERNEL_LAUNCH = CL_COMMAND_NDRANGE_KERNEL, - PI_COMMAND_MEMBUFFER_WRITE = CL_COMMAND_WRITE_BUFFER, - PI_COMMAND_MEMBUFFER_READ = CL_COMMAND_READ_BUFFER, - PI_COMMAND_USER = CL_COMMAND_USER, - PI_COMMAND_EVENTS_WAIT = CL_COMMAND_MARKER, - PI_COMMAND_MEMBUFFER_COPY = CL_COMMAND_COPY_BUFFER, - PI_COMMAND_MEMBUFFER_FILL = CL_COMMAND_FILL_BUFFER -} _pi_command_type;*/ - typedef enum { PI_MEM_TYPE_BUFFER = CL_MEM_OBJECT_BUFFER, PI_MEM_TYPE_IMAGE2D = CL_MEM_OBJECT_IMAGE2D, From b3a9426ee40c6fd89f01d69efba7a4b0b248fc90 Mon Sep 17 00:00:00 2001 From: John Pennycook Date: Sat, 29 Feb 2020 04:21:41 -0600 Subject: [PATCH 04/19] [SYCL] Fix mismatch between sub_group headers (#1215) sub_group::store took T& for host and const T& for device. Signed-off-by: John Pennycook --- sycl/include/CL/sycl/intel/sub_group_host.hpp | 2 +- .../regression/sub-group-store-const-ref.cpp | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 sycl/test/regression/sub-group-store-const-ref.cpp diff --git a/sycl/include/CL/sycl/intel/sub_group_host.hpp b/sycl/include/CL/sycl/intel/sub_group_host.hpp index d6fade163b117..16d9514fa4333 100644 --- a/sycl/include/CL/sycl/intel/sub_group_host.hpp +++ b/sycl/include/CL/sycl/intel/sub_group_host.hpp @@ -136,7 +136,7 @@ struct sub_group { } template - void store(multi_ptr dst, T &x) const { + void store(multi_ptr dst, const T &x) const { throw runtime_error("Subgroups are not supported on host device. "); } diff --git a/sycl/test/regression/sub-group-store-const-ref.cpp b/sycl/test/regression/sub-group-store-const-ref.cpp new file mode 100644 index 0000000000000..991f56f30a130 --- /dev/null +++ b/sycl/test/regression/sub-group-store-const-ref.cpp @@ -0,0 +1,16 @@ +// RUN: %clangxx -I %sycl_include -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics +// +//==-- sub-group-store-const-ref.cpp ---------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This test checks that sub_group::store supports const reference. +//===----------------------------------------------------------------------===// +#include +using namespace sycl; + +void test(intel::sub_group sg, global_ptr ptr) { sg.store(ptr, 1); } From 3da5473d7487e6330166099c3049918a51bd81c8 Mon Sep 17 00:00:00 2001 From: Sergey Semenov <43845535+sergey-semenov@users.noreply.github.com> Date: Sun, 1 Mar 2020 14:29:51 +0300 Subject: [PATCH 05/19] [SYCL] Fix command cleanup invoked from multiple threads (#1214) This patch fixes a sporadic bug where one thread attempted to clean up a command already deleted by another. Signed-off-by: Sergey Semenov --- sycl/source/detail/event_impl.cpp | 6 +-- sycl/source/detail/scheduler/scheduler.cpp | 8 +++- sycl/source/detail/scheduler/scheduler.hpp | 2 +- .../scheduler/CommandCleanupThreadSafety.cpp | 40 +++++++++++++++++++ sycl/test/scheduler/FinishedCmdCleanup.cpp | 5 ++- 5 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 sycl/test/scheduler/CommandCleanupThreadSafety.cpp diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index a7b3b467b8988..398e0cc7bdc9a 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -95,16 +95,14 @@ event_impl::event_impl(QueueImplPtr Queue) : MQueue(Queue) { void event_impl::wait( std::shared_ptr Self) const { - if (MEvent) // presence of MEvent means the command has been enqueued, so no need to // go via the slow path event waiting in the scheduler waitInternal(); else if (MCommand) - detail::Scheduler::getInstance().waitForEvent(std::move(Self)); + detail::Scheduler::getInstance().waitForEvent(Self); if (MCommand && !SYCLConfig::get()) - detail::Scheduler::getInstance().cleanupFinishedCommands( - static_cast(MCommand)); + detail::Scheduler::getInstance().cleanupFinishedCommands(std::move(Self)); } void event_impl::wait_and_throw( diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 4da29c0a23299..37c2529d44863 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -123,9 +123,13 @@ void Scheduler::waitForEvent(EventImplPtr Event) { GraphProcessor::waitForEvent(std::move(Event)); } -void Scheduler::cleanupFinishedCommands(Command *FinishedCmd) { +void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { std::lock_guard lock(MGraphLock); - MGraphBuilder.cleanupFinishedCommands(FinishedCmd); + Command *FinishedCmd = static_cast(FinishedEvent->getCommand()); + // The command might have been cleaned up (and set to nullptr) by another + // thread + if (FinishedCmd) + MGraphBuilder.cleanupFinishedCommands(FinishedCmd); } void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index e0429510eed1b..90000f6ab558c 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -79,7 +79,7 @@ class Scheduler { // Removes finished non-leaf non-alloca commands from the subgraph (assuming // that all its commands have been waited for). - void cleanupFinishedCommands(Command *FinishedCmd); + void cleanupFinishedCommands(EventImplPtr FinishedEvent); // Creates nodes in the graph, that update Req with the pointer to the host // memory which contains the latest data of the memory object. New diff --git a/sycl/test/scheduler/CommandCleanupThreadSafety.cpp b/sycl/test/scheduler/CommandCleanupThreadSafety.cpp new file mode 100644 index 0000000000000..f1d39db4e8995 --- /dev/null +++ b/sycl/test/scheduler/CommandCleanupThreadSafety.cpp @@ -0,0 +1,40 @@ +// UNSUPPORTED: windows +// RUN: %clangxx -fsycl %s -o %t.out -lpthread +// RUN: %CPU_RUN_PLACEHOLDER %t.out +#include + +#include +#include +#include +#include + +// This test checks that the command graph cleanup works properly when +// invoked from multiple threads. +using namespace cl::sycl; + +class Foo; + +event submitTask(queue &Q, buffer &Buf) { + return Q.submit([&](handler &Cgh) { + auto Acc = Buf.get_access(Cgh); + Cgh.single_task([=]() { Acc[0] = 42; }); + }); +} + +int main() { + queue Q; + buffer Buf(range<1>(1)); + + // Create multiple commands, each one dependent on the previous + std::vector Events; + const std::size_t NTasks = 16; + for (std::size_t I = 0; I < NTasks; ++I) + Events.push_back(submitTask(Q, Buf)); + + // Initiate cleanup from multiple threads + std::vector Threads; + for (event &E : Events) + Threads.emplace_back([&]() { E.wait(); }); + for (std::thread &T : Threads) + T.join(); +} diff --git a/sycl/test/scheduler/FinishedCmdCleanup.cpp b/sycl/test/scheduler/FinishedCmdCleanup.cpp index e0f736886b040..cf036d81d65fa 100644 --- a/sycl/test/scheduler/FinishedCmdCleanup.cpp +++ b/sycl/test/scheduler/FinishedCmdCleanup.cpp @@ -1,6 +1,7 @@ // RUN: %clangxx -fsycl -I %sycl_source_dir %s -o %t.out // RUN: %t.out #include +#include #include #include @@ -76,7 +77,9 @@ int main() { addEdge(InnerA, &LeafA, &AllocaA); addEdge(InnerA, InnerB, &AllocaB); - TS.cleanupFinishedCommands(InnerA); + std::shared_ptr Event{new detail::event_impl{}}; + Event->setCommand(InnerA); + TS.cleanupFinishedCommands(Event); TS.removeRecordForMemObj(detail::getSyclObjImpl(BufC).get()); assert(NInnerCommandsAlive == 0); From 4b5d25b2f97953da0355b146185440510e4dc2c6 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Sun, 1 Mar 2020 17:54:45 +0300 Subject: [PATCH 06/19] [SYCL][NFC] Add clang-format configuration file for SYCL LIT tests (#1224) By default LLVM style limit code lines size to 80 characters. It should not be applied to LIT tests as clang-format can't preserve LIT commands semantic written in the code comments. This configuration file was shamelessly copied from the clang project. Signed-off-by: Alexey Bader --- sycl/test/.clang-format | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 sycl/test/.clang-format diff --git a/sycl/test/.clang-format b/sycl/test/.clang-format new file mode 100644 index 0000000000000..4799b66f3e9a6 --- /dev/null +++ b/sycl/test/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: LLVM +ColumnLimit: 0 From c220eb8e49fded764e2dcd618dccbeaf685131f4 Mon Sep 17 00:00:00 2001 From: jbrodman Date: Mon, 2 Mar 2020 03:57:41 -0600 Subject: [PATCH 07/19] [SYCL] Make context constructors explicit to avoid unintended conversions (#1219) Signed-off-by: James Brodman --- sycl/include/CL/sycl/context.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/context.hpp b/sycl/include/CL/sycl/context.hpp index 926bb22aebfdd..f640bd2f53e23 100644 --- a/sycl/include/CL/sycl/context.hpp +++ b/sycl/include/CL/sycl/context.hpp @@ -49,8 +49,8 @@ class context { /// @param AsyncHandler is an instance of async_handler. /// @param UseCUDAPrimaryContext is a bool determining whether to use the /// primary context in the CUDA backend. - context(const device &Device, async_handler AsyncHandler = {}, - bool UseCUDAPrimaryContext = false); + explicit context(const device &Device, async_handler AsyncHandler = {}, + bool UseCUDAPrimaryContext = false); /// Constructs a SYCL context instance using the provided platform. /// @@ -63,8 +63,8 @@ class context { /// @param AsyncHandler is an instance of async_handler. /// @param UseCUDAPrimaryContext is a bool determining whether to use the /// primary context in the CUDA backend. - context(const platform &Platform, async_handler AsyncHandler = {}, - bool UseCUDAPrimaryContext = false); + explicit context(const platform &Platform, async_handler AsyncHandler = {}, + bool UseCUDAPrimaryContext = false); /// Constructs a SYCL context instance using list of devices. /// @@ -78,8 +78,9 @@ class context { /// @param AsyncHandler is an instance of async_handler. /// @param UseCUDAPrimaryContext is a bool determining whether to use the /// primary context in the CUDA backend. - context(const vector_class &DeviceList, - async_handler AsyncHandler = {}, bool UseCUDAPrimaryContext = false); + explicit context(const vector_class &DeviceList, + async_handler AsyncHandler = {}, + bool UseCUDAPrimaryContext = false); /// Constructs a SYCL context instance from OpenCL cl_context. /// From 3035170074ac422207b4566a0134016bcaa0bf52 Mon Sep 17 00:00:00 2001 From: vladimirlaz Date: Mon, 2 Mar 2020 17:33:04 +0300 Subject: [PATCH 08/19] [SYCL] Disable tests which take more than 5 minutes (#1220) Signed-off-by: Vladimir Lazarev --- buildbot/testlist.cfg | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/buildbot/testlist.cfg b/buildbot/testlist.cfg index e60fea347dc99..01aa2bd10c151 100644 --- a/buildbot/testlist.cfg +++ b/buildbot/testlist.cfg @@ -11,7 +11,7 @@ :test_exception_handling :test_group :test_h_item -:test_handler +#:test_handler :test_header :test_hierarchical :test_id @@ -20,7 +20,7 @@ :test_item :test_kernel :test_kernel_args -:test_math_builtin_api +#:test_math_builtin_api :test_multi_ptr :test_nd_item :test_nd_range @@ -38,8 +38,7 @@ :test_vector_api :test_vector_constructors :test_vector_load_store -# Disable test to speedup testing until JIT is optimized #:test_vector_operators :test_vector_swizzle_assignment -:test_vector_swizzles -:test_vector_swizzles_opencl +#:test_vector_swizzles +#:test_vector_swizzles_opencl From aa0619c2891075e235a749ef0f9750671db5b4ae Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Mon, 2 Mar 2020 18:55:12 +0300 Subject: [PATCH 09/19] [SYCL] Fix check-sycl-deploy target problems (#1165) - deploy-lit.site.cfg configuration file used to owerwrite llvm-lit to use binaries from deploy, which sometimes do not exist. Removed deploy-lit.site.cfg and made all configuration files flexible using llvm-lit command line options. - check-all target used to run check-sycl-deploy target. This caused problems if CMAKE_INSTALL_PREFIX wasn't specified, because in this case check-sycl-deploy target leads to trying to install sycl binaries to system directories. Excluded check-sycl-deploy target from check-all. - Reduced copy-paste in check-sycl-deploy target dependencies. - Reduced number of variables passed from cmake to lit. Signed-off-by: Mariya Podchishchaeva --- sycl/test/CMakeLists.txt | 31 ++++--------------- .../test/basic_tests/handler/interop_task.cpp | 2 +- sycl/test/deploy-lit.site.cfg.py.in | 25 --------------- sycl/test/devicelib/assert-windows.cpp | 2 +- sycl/test/devicelib/assert.cpp | 2 +- .../devicelib/c99_complex_math_fp64_test.cpp | 2 +- sycl/test/devicelib/c99_complex_math_test.cpp | 2 +- sycl/test/devicelib/cmath_test.cpp | 2 +- sycl/test/devicelib/cmath_test_fp64.cpp | 2 +- sycl/test/devicelib/math_fp64_test.cpp | 2 +- sycl/test/devicelib/math_override_test.cpp | 2 +- sycl/test/devicelib/math_test.cpp | 2 +- .../devicelib/std_complex_math_fp64_test.cpp | 2 +- sycl/test/devicelib/std_complex_math_test.cpp | 2 +- sycl/test/lit.cfg.py | 31 +++++++++---------- sycl/test/lit.site.cfg.py.in | 12 +++---- 16 files changed, 37 insertions(+), 86 deletions(-) delete mode 100644 sycl/test/deploy-lit.site.cfg.py.in diff --git a/sycl/test/CMakeLists.txt b/sycl/test/CMakeLists.txt index 95dacdcffe48e..5d891547560fa 100644 --- a/sycl/test/CMakeLists.txt +++ b/sycl/test/CMakeLists.txt @@ -1,23 +1,11 @@ -set(LLVM_BUILD_LIBRARY_DIRS "${LLVM_BINARY_DIR}/lib/") -set(LLVM_BUILD_BINARY_DIRS "${LLVM_BINARY_DIR}/bin/") set(LLVM_TOOLS_DIR "${LLVM_BINARY_DIR}/bin/") -set(CLANG_IN_BUILD "${LLVM_BINARY_DIR}/bin/clang") -set(CLANGXX_IN_BUILD "${LLVM_BINARY_DIR}/bin/clang++") -set(CLANGCL_IN_BUILD "${LLVM_BINARY_DIR}/bin/clang-cl") - -set(LLVM_DEPLOY_LIBRARY_DIRS "${CMAKE_INSTALL_PREFIX}/lib/") -set(LLVM_DEPLOY_BINARY_DIRS "${CMAKE_INSTALL_PREFIX}/bin/") -set(CLANG_IN_DEPLOY "${CMAKE_INSTALL_PREFIX}/bin/clang") -set(CLANGXX_IN_DEPLOY "${CMAKE_INSTALL_PREFIX}/bin/clang++") -set(CLANGCL_IN_DEPLOY "${CMAKE_INSTALL_PREFIX}/bin/clang-cl") get_target_property(SYCL_BINARY_DIR sycl-toolchain BINARY_DIR) set(SYCL_INCLUDE "${dst_dir}") -set(SYCL_DEPLOY_INCLUDE "${dst_deploy_dir}") set(RT_TEST_ARGS ${RT_TEST_ARGS} "-v") -set(DEPLOY_RT_TEST_ARGS ${DEPLOY_RT_TEST_ARGS} "-v --config-prefix=deploy-lit") +set(DEPLOY_RT_TEST_ARGS ${DEPLOY_RT_TEST_ARGS} "-v -D SYCL_TOOLS_DIR=${CMAKE_INSTALL_PREFIX}/bin -D SYCL_LIBS_DIR=${CMAKE_INSTALL_PREFIX}/lib -D SYCL_INCLUDE=${dst_deploy_dir}") configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in @@ -33,13 +21,6 @@ configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.cfg.py ) -configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/deploy-lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/deploy-lit.site.cfg.py - MAIN_CONFIG - ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py - ) - list(APPEND SYCL_TEST_DEPS sycl-toolchain FileCheck @@ -49,11 +30,8 @@ list(APPEND SYCL_TEST_DEPS ) list(APPEND SYCL_DEPLOY_TEST_DEPS + ${SYCL_TEST_DEPS} deploy-sycl-toolchain - FileCheck - not - get_device_count_by_type - llvm-config ) add_lit_testsuite(check-sycl "Running the SYCL regression tests" @@ -62,10 +40,13 @@ add_lit_testsuite(check-sycl "Running the SYCL regression tests" PARAMS "SYCL_BE=PI_OPENCL" DEPENDS ${SYCL_TEST_DEPS} ) + add_lit_testsuite(check-sycl-deploy "Running the SYCL regression tests" ${CMAKE_CURRENT_BINARY_DIR} ARGS ${DEPLOY_RT_TEST_ARGS} + PARAMS "SYCL_BE=PI_OPENCL" DEPENDS ${SYCL_DEPLOY_TEST_DEPS} + EXCLUDE_FROM_CHECK_ALL ) set_target_properties(check-sycl PROPERTIES FOLDER "SYCL tests") @@ -76,7 +57,7 @@ add_lit_testsuites(SYCL ${CMAKE_CURRENT_SOURCE_DIR} if(SYCL_BUILD_PI_CUDA) add_lit_testsuite(check-sycl-cuda "Running the SYCL regression tests for CUDA" ${CMAKE_CURRENT_BINARY_DIR} - ARGS ${RT_TEST_ARGS} + ARGS ${RT_TEST_ARGS} PARAMS "SYCL_BE=PI_CUDA" DEPENDS ${SYCL_TEST_DEPS} ) diff --git a/sycl/test/basic_tests/handler/interop_task.cpp b/sycl/test/basic_tests/handler/interop_task.cpp index 1857a0e359db5..dba8cf0d8be2d 100644 --- a/sycl/test/basic_tests/handler/interop_task.cpp +++ b/sycl/test/basic_tests/handler/interop_task.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl %s -o %t.out -lOpenCL +// RUN: %clangxx -fsycl %s -o %t.out -L %opencl_libs_dir -lOpenCL // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // REQUIRES: opencl diff --git a/sycl/test/deploy-lit.site.cfg.py.in b/sycl/test/deploy-lit.site.cfg.py.in deleted file mode 100644 index e99cee34e1098..0000000000000 --- a/sycl/test/deploy-lit.site.cfg.py.in +++ /dev/null @@ -1,25 +0,0 @@ -@LIT_SITE_CFG_IN_HEADER@ - -import sys - -config.clang = "@CLANG_IN_DEPLOY@" -config.clangxx = "@CLANGXX_IN_DEPLOY@" -config.clang_cl = "@CLANGCL_IN_DEPLOY@" -config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" -config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" -config.llvm_build_libs_dir = "@LLVM_DEPLOY_LIBRARY_DIRS@" -config.llvm_build_bins_dir = "@LLVM_DEPLOY_BINARY_DIRS@" -config.llvm_binary_dir = "@LLVM_BINARY_DIR@" -config.sycl_include = "@SYCL_DEPLOY_INCLUDE@" -config.sycl_obj_root = "@SYCL_BINARY_DIR@" -config.opencl_lib = "@OpenCL_LIBRARIES@" -config.opencl_libs_dir = os.path.dirname("@OpenCL_LIBRARIES@") - -config.llvm_enable_projects = "@LLVM_ENABLE_PROJECTS@" - - -import lit.llvm -lit.llvm.initialize(lit_config, config) - -# Let the main config do the real work. -lit_config.load_config(config, "@SYCL_SOURCE_DIR@/test/lit.cfg.py") diff --git a/sycl/test/devicelib/assert-windows.cpp b/sycl/test/devicelib/assert-windows.cpp index 1451431b46946..266ec3845d766 100644 --- a/sycl/test/devicelib/assert-windows.cpp +++ b/sycl/test/devicelib/assert-windows.cpp @@ -5,7 +5,7 @@ // XFAIL: * // // RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/../bin/libsycl-msvc.o -o %t.out +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/../bin/libsycl-msvc.o -o %t.out // // MSVC implementation of assert does not call an unreachable built-in, so the // program doesn't terminate when fallback is used. diff --git a/sycl/test/devicelib/assert.cpp b/sycl/test/devicelib/assert.cpp index d0f18fe8cb544..757647322cb64 100644 --- a/sycl/test/devicelib/assert.cpp +++ b/sycl/test/devicelib/assert.cpp @@ -1,6 +1,6 @@ // REQUIRES: cpu,linux // RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/libsycl-glibc.o -o %t.out +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-glibc.o -o %t.out // (see the other RUN lines below; it is a bit complicated) // // assert() call in device code guarantees nothing: on some devices it behaves diff --git a/sycl/test/devicelib/c99_complex_math_fp64_test.cpp b/sycl/test/devicelib/c99_complex_math_fp64_test.cpp index 7fdd07d4c3cd7..b7da3b6a9e37a 100644 --- a/sycl/test/devicelib/c99_complex_math_fp64_test.cpp +++ b/sycl/test/devicelib/c99_complex_math_fp64_test.cpp @@ -1,6 +1,6 @@ // UNSUPPORTED: windows // RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/libsycl-complex-fp64.o -o %t.out +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-complex-fp64.o -o %t.out #include #include #include diff --git a/sycl/test/devicelib/c99_complex_math_test.cpp b/sycl/test/devicelib/c99_complex_math_test.cpp index 8b28e943d6547..9637ccd4a0568 100644 --- a/sycl/test/devicelib/c99_complex_math_test.cpp +++ b/sycl/test/devicelib/c99_complex_math_test.cpp @@ -1,6 +1,6 @@ // UNSUPPORTED: windows // RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/libsycl-complex.o -o %t.out +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-complex.o -o %t.out #include #include #include diff --git a/sycl/test/devicelib/cmath_test.cpp b/sycl/test/devicelib/cmath_test.cpp index ded8d047a54d8..217ad4121f6c8 100644 --- a/sycl/test/devicelib/cmath_test.cpp +++ b/sycl/test/devicelib/cmath_test.cpp @@ -1,6 +1,6 @@ // UNSUPPORTED: windows // RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/libsycl-cmath.o -o %t.out +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath.o -o %t.out #include #include #include diff --git a/sycl/test/devicelib/cmath_test_fp64.cpp b/sycl/test/devicelib/cmath_test_fp64.cpp index 5ec7f1b34c24e..1c8b7afa5d4a5 100644 --- a/sycl/test/devicelib/cmath_test_fp64.cpp +++ b/sycl/test/devicelib/cmath_test_fp64.cpp @@ -1,6 +1,6 @@ // UNSUPPORTED: windows // RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/libsycl-cmath-fp64.o -o %t.out +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath-fp64.o -o %t.out #include #include #include diff --git a/sycl/test/devicelib/math_fp64_test.cpp b/sycl/test/devicelib/math_fp64_test.cpp index b0eedae1e8f7e..30c21dbc3b77e 100644 --- a/sycl/test/devicelib/math_fp64_test.cpp +++ b/sycl/test/devicelib/math_fp64_test.cpp @@ -1,6 +1,6 @@ // UNSUPPORTED: windows // RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/libsycl-cmath-fp64.o -o %t.out +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath-fp64.o -o %t.out #include #include #include diff --git a/sycl/test/devicelib/math_override_test.cpp b/sycl/test/devicelib/math_override_test.cpp index 92b419bfdca8b..62166b95ab1ab 100644 --- a/sycl/test/devicelib/math_override_test.cpp +++ b/sycl/test/devicelib/math_override_test.cpp @@ -1,6 +1,6 @@ // UNSUPPORTED: windows // RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/libsycl-cmath.o -o %t.out +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath.o -o %t.out #include #include #include diff --git a/sycl/test/devicelib/math_test.cpp b/sycl/test/devicelib/math_test.cpp index 4afba887681a2..1b5aa4332fe8e 100644 --- a/sycl/test/devicelib/math_test.cpp +++ b/sycl/test/devicelib/math_test.cpp @@ -1,6 +1,6 @@ // UNSUPPORTED: windows // RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/libsycl-cmath.o -o %t.out +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-cmath.o -o %t.out #include #include #include diff --git a/sycl/test/devicelib/std_complex_math_fp64_test.cpp b/sycl/test/devicelib/std_complex_math_fp64_test.cpp index b59591b578981..e1213a713ad5b 100644 --- a/sycl/test/devicelib/std_complex_math_fp64_test.cpp +++ b/sycl/test/devicelib/std_complex_math_fp64_test.cpp @@ -1,6 +1,6 @@ // UNSUPPORTED: windows // RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/libsycl-complex-fp64.o %llvm_build_libs_dir/libsycl-cmath-fp64.o -o %t.out +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-complex-fp64.o %sycl_libs_dir/libsycl-cmath-fp64.o -o %t.out #include #include #include "math_utils.hpp" diff --git a/sycl/test/devicelib/std_complex_math_test.cpp b/sycl/test/devicelib/std_complex_math_test.cpp index 9c817e714a88f..f3fe5ae9ae510 100644 --- a/sycl/test/devicelib/std_complex_math_test.cpp +++ b/sycl/test/devicelib/std_complex_math_test.cpp @@ -1,6 +1,6 @@ // UNSUPPORTED: windows // RUN: %clangxx -fsycl -c %s -o %t.o -// RUN: %clangxx -fsycl %t.o %llvm_build_libs_dir/libsycl-complex.o %llvm_build_libs_dir/libsycl-cmath.o -o %t.out +// RUN: %clangxx -fsycl %t.o %sycl_libs_dir/libsycl-complex.o %sycl_libs_dir/libsycl-cmath.o -o %t.out #include #include #include "math_utils.hpp" diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index f4839e086efdb..999b6d1cd5584 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -38,21 +38,21 @@ config.available_features.add('linux') # Propagate 'LD_LIBRARY_PATH' through the environment. if 'LD_LIBRARY_PATH' in os.environ: - config.environment['LD_LIBRARY_PATH'] = os.path.pathsep.join((config.environment['LD_LIBRARY_PATH'], config.llvm_build_libs_dir)) + config.environment['LD_LIBRARY_PATH'] = os.path.pathsep.join((config.environment['LD_LIBRARY_PATH'], config.sycl_libs_dir)) else: - config.environment['LD_LIBRARY_PATH'] = config.llvm_build_libs_dir + config.environment['LD_LIBRARY_PATH'] = config.sycl_libs_dir elif platform.system() == "Windows": config.available_features.add('windows') if 'LIB' in os.environ: - config.environment['LIB'] = os.path.pathsep.join((config.environment['LIB'], config.llvm_build_libs_dir)) + config.environment['LIB'] = os.path.pathsep.join((config.environment['LIB'], config.sycl_libs_dir)) else: - config.environment['LIB'] = config.llvm_build_libs_dir + config.environment['LIB'] = config.sycl_libs_dir if 'PATH' in os.environ: - config.environment['PATH'] = os.path.pathsep.join((config.environment['PATH'], config.llvm_build_bins_dir)) + config.environment['PATH'] = os.path.pathsep.join((config.environment['PATH'], config.sycl_tools_dir)) else: - config.environment['PATH'] = config.llvm_build_bins_dir + config.environment['PATH'] = config.sycl_tools_dir elif platform.system() == "Darwin": # FIXME: surely there is a more elegant way to instantiate the Xcode directories. @@ -61,34 +61,31 @@ else: config.environment['CPATH'] = "/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/include/c++/v1" config.environment['CPATH'] = os.path.pathsep.join((config.environment['CPATH'], "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/")) - config.environment['DYLD_LIBRARY_PATH'] = config.llvm_build_libs_dir + config.environment['DYLD_LIBRARY_PATH'] = config.sycl_libs_dir # propagate the environment variable OCL_ICD_FILANEMES to use proper runtime. if 'OCL_ICD_FILENAMES' in os.environ: config.environment['OCL_ICD_FILENAMES'] = os.environ['OCL_ICD_FILENAMES'] -config.substitutions.append( ('%clang_cc1', ' ' + config.clang + ' -cc1 ') ) -config.substitutions.append( ('%clangxx', ' ' + config.clangxx ) ) -config.substitutions.append( ('%clang_cl', ' ' + config.clang_cl ) ) -config.substitutions.append( ('%clang', ' ' + config.clang ) ) -config.substitutions.append( ('%llvm_build_libs_dir', config.llvm_build_libs_dir ) ) +config.substitutions.append( ('%sycl_libs_dir', config.sycl_libs_dir ) ) config.substitutions.append( ('%sycl_include', config.sycl_include ) ) config.substitutions.append( ('%opencl_libs_dir', config.opencl_libs_dir) ) config.substitutions.append( ('%sycl_source_dir', config.sycl_source_dir) ) +llvm_config.use_clang() + tools = ['llvm-spirv'] -tool_dirs = [config.llvm_tools_dir] +tool_dirs = [config.sycl_tools_dir] llvm_config.add_tool_substitutions(tools, tool_dirs) if "opencl-aot" in config.llvm_enable_projects: if 'PATH' in os.environ: print("Adding path to opencl-aot tool to PATH") - os.environ['PATH'] = os.path.pathsep.join((os.getenv('PATH'), config.llvm_build_bins_dir)) + os.environ['PATH'] = os.path.pathsep.join((os.getenv('PATH'), config.sycl_tools_dir)) backend=lit_config.params.get('SYCL_BE', "PI_OPENCL") -get_device_count_by_type_path = os.path.join(config.llvm_binary_dir, - "bin", "get_device_count_by_type") +get_device_count_by_type_path = os.path.join(config.llvm_tools_dir, "get_device_count_by_type") def getDeviceCount(device_type): is_cuda = False; @@ -186,7 +183,7 @@ def getDeviceCount(device_type): path = config.environment['PATH'] -path = os.path.pathsep.join((config.llvm_tools_dir, path)) +path = os.path.pathsep.join((config.sycl_tools_dir, path)) config.environment['PATH'] = path # Device AOT compilation tools aren't part of the SYCL project, diff --git a/sycl/test/lit.site.cfg.py.in b/sycl/test/lit.site.cfg.py.in index f1a66894b1f0f..18ef72f3a94e7 100644 --- a/sycl/test/lit.site.cfg.py.in +++ b/sycl/test/lit.site.cfg.py.in @@ -2,18 +2,16 @@ import sys -config.clang = "@CLANG_IN_BUILD@" -config.clangxx = "@CLANGXX_IN_BUILD@" -config.clang_cl = "@CLANGCL_IN_BUILD@" config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" +config.sycl_tools_dir = lit_config.params.get('SYCL_TOOLS_DIR', "@LLVM_TOOLS_DIR@") config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" -config.llvm_build_libs_dir = "@LLVM_BUILD_LIBRARY_DIRS@" -config.llvm_build_bins_dir = "@LLVM_BUILD_BINARY_DIRS@" -config.llvm_binary_dir = "@LLVM_BINARY_DIR@" -config.sycl_include = "@SYCL_INCLUDE@" +config.sycl_include = lit_config.params.get('SYCL_INCLUDE', "@SYCL_INCLUDE@") config.sycl_obj_root = "@SYCL_BINARY_DIR@" config.sycl_source_dir = "@SYCL_SOURCE_DIR@/source" config.opencl_libs_dir = os.path.dirname("@OpenCL_LIBRARIES@") +config.sycl_libs_dir = lit_config.params.get('SYCL_LIBS_DIR', "@LLVM_LIBS_DIR@") +config.target_triple = "@TARGET_TRIPLE@" +config.host_triple = "@LLVM_HOST_TRIPLE@" config.llvm_enable_projects = "@LLVM_ENABLE_PROJECTS@" From 745e7599f3230808dd3e8d84473f4f61ff161cb4 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Mon, 2 Mar 2020 17:30:52 +0100 Subject: [PATCH 10/19] [SYCL][CUDA] Handle the case of not having any CUDA device (#1212) If CUDA initialisation fails or there are no CUDA devices, do not return a CUDA-based SYCL platform rather than aborting or throwing an exception. Signed-off-by: Andrea Bocci --- sycl/plugins/cuda/pi_cuda.cpp | 72 +++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index fe42b1d8dc3a1..fe466ce5b832a 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -528,43 +528,57 @@ pi_result cuda_piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms) { try { - static constexpr pi_uint32 numPlatforms = 1; + static std::once_flag initFlag; + static pi_uint32 numPlatforms = 1; + static _pi_platform platformId; - if (num_platforms != nullptr) { - *num_platforms = numPlatforms; + if (num_entries == 0 and platforms != nullptr) { + return PI_INVALID_VALUE; + } + if (platforms == nullptr and num_platforms == nullptr) { + return PI_INVALID_VALUE; } pi_result err = PI_SUCCESS; - if (platforms != nullptr) { - - assert(num_entries != 0); - - static std::once_flag initFlag; - static _pi_platform platformId; - std::call_once( - initFlag, - [](pi_result &err) { - err = PI_CHECK_ERROR(cuInit(0)); - - int numDevices = 0; - err = PI_CHECK_ERROR(cuDeviceGetCount(&numDevices)); + std::call_once( + initFlag, + [](pi_result &err) { + if (cuInit(0) != CUDA_SUCCESS) { + numPlatforms = 0; + return; + } + int numDevices = 0; + err = PI_CHECK_ERROR(cuDeviceGetCount(&numDevices)); + if (numDevices == 0) { + numPlatforms = 0; + return; + } + try { platformId.devices_.reserve(numDevices); - try { - for (int i = 0; i < numDevices; ++i) { - CUdevice device; - err = PI_CHECK_ERROR(cuDeviceGet(&device, i)); - platformId.devices_.emplace_back( - new _pi_device{device, &platformId}); - } - } catch (...) { - // Clear and rethrow to allow retry - platformId.devices_.clear(); - throw; + for (int i = 0; i < numDevices; ++i) { + CUdevice device; + err = PI_CHECK_ERROR(cuDeviceGet(&device, i)); + platformId.devices_.emplace_back( + new _pi_device{device, &platformId}); } - }, - err); + } catch (const std::bad_alloc &) { + // Signal out-of-memory situation + platformId.devices_.clear(); + err = PI_OUT_OF_HOST_MEMORY; + } catch (...) { + // Clear and rethrow to allow retry + platformId.devices_.clear(); + throw; + } + }, + err); + if (num_platforms != nullptr) { + *num_platforms = numPlatforms; + } + + if (platforms != nullptr) { *platforms = &platformId; } From b1aa2220be175b3307dcb4d4071731bff68488e0 Mon Sep 17 00:00:00 2001 From: Bjoern Knafla Date: Thu, 27 Feb 2020 11:36:22 +0000 Subject: [PATCH 11/19] [CUDA][PI] clang-format pi.h Align `pi.h` formatting style with LLVM formatting style and rest of project. Signed-off-by: Bjoern Knafla --- sycl/include/CL/sycl/detail/pi.h | 1347 +++++++++++++----------------- 1 file changed, 576 insertions(+), 771 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 686fdc49f753a..eb0a2e4a316e6 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -31,19 +31,19 @@ // elsewhere, e.g. in the pi_opencl, but constants/enums mapping is now // done here, for efficiency and simplicity. // -#include #include +#include #include #ifdef __cplusplus extern "C" { #endif // __cplusplus -typedef int32_t pi_int32; -typedef uint32_t pi_uint32; -typedef uint64_t pi_uint64; -typedef pi_uint32 pi_bool; -typedef pi_uint64 pi_bitfield; +typedef int32_t pi_int32; +typedef uint32_t pi_uint32; +typedef uint64_t pi_uint64; +typedef pi_uint32 pi_bool; +typedef pi_uint64 pi_bitfield; // // NOTE: prefer to map 1:1 to OpenCL so that no translation is needed @@ -53,75 +53,75 @@ typedef pi_uint64 pi_bitfield; // TODO: populate PI enums. // typedef enum { - PI_SUCCESS = CL_SUCCESS, - PI_RESULT_INVALID_KERNEL_NAME = CL_INVALID_KERNEL_NAME, - PI_INVALID_OPERATION = CL_INVALID_OPERATION, - PI_INVALID_KERNEL = CL_INVALID_KERNEL, - PI_INVALID_QUEUE_PROPERTIES = CL_INVALID_QUEUE_PROPERTIES, - PI_INVALID_VALUE = CL_INVALID_VALUE, - PI_INVALID_CONTEXT = CL_INVALID_CONTEXT, - PI_INVALID_PLATFORM = CL_INVALID_PLATFORM, - PI_INVALID_DEVICE = CL_INVALID_DEVICE, - PI_INVALID_BINARY = CL_INVALID_BINARY, - PI_INVALID_QUEUE = CL_INVALID_COMMAND_QUEUE, - PI_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY, - PI_INVALID_PROGRAM = CL_INVALID_PROGRAM, - PI_INVALID_MEM_OBJECT = CL_INVALID_MEM_OBJECT, - PI_OUT_OF_RESOURCES = CL_OUT_OF_RESOURCES, - PI_INVALID_EVENT = CL_INVALID_EVENT, - PI_INVALID_EVENT_WAIT_LIST = CL_INVALID_EVENT_WAIT_LIST, + PI_SUCCESS = CL_SUCCESS, + PI_RESULT_INVALID_KERNEL_NAME = CL_INVALID_KERNEL_NAME, + PI_INVALID_OPERATION = CL_INVALID_OPERATION, + PI_INVALID_KERNEL = CL_INVALID_KERNEL, + PI_INVALID_QUEUE_PROPERTIES = CL_INVALID_QUEUE_PROPERTIES, + PI_INVALID_VALUE = CL_INVALID_VALUE, + PI_INVALID_CONTEXT = CL_INVALID_CONTEXT, + PI_INVALID_PLATFORM = CL_INVALID_PLATFORM, + PI_INVALID_DEVICE = CL_INVALID_DEVICE, + PI_INVALID_BINARY = CL_INVALID_BINARY, + PI_INVALID_QUEUE = CL_INVALID_COMMAND_QUEUE, + PI_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY, + PI_INVALID_PROGRAM = CL_INVALID_PROGRAM, + PI_INVALID_MEM_OBJECT = CL_INVALID_MEM_OBJECT, + PI_OUT_OF_RESOURCES = CL_OUT_OF_RESOURCES, + PI_INVALID_EVENT = CL_INVALID_EVENT, + PI_INVALID_EVENT_WAIT_LIST = CL_INVALID_EVENT_WAIT_LIST, PI_MISALIGNED_SUB_BUFFER_OFFSET = CL_MISALIGNED_SUB_BUFFER_OFFSET, - PI_BUILD_PROGRAM_FAILURE = CL_BUILD_PROGRAM_FAILURE, - PI_INVALID_WORK_GROUP_SIZE = CL_INVALID_WORK_GROUP_SIZE, - PI_ERROR_UNKNOWN = -999 + PI_BUILD_PROGRAM_FAILURE = CL_BUILD_PROGRAM_FAILURE, + PI_INVALID_WORK_GROUP_SIZE = CL_INVALID_WORK_GROUP_SIZE, + PI_ERROR_UNKNOWN = -999 } _pi_result; typedef enum { - PI_EVENT_COMPLETE = CL_COMPLETE, - PI_EVENT_RUNNING = CL_RUNNING, + PI_EVENT_COMPLETE = CL_COMPLETE, + PI_EVENT_RUNNING = CL_RUNNING, PI_EVENT_SUBMITTED = CL_SUBMITTED, - PI_EVENT_QUEUED = CL_QUEUED + PI_EVENT_QUEUED = CL_QUEUED } _pi_event_status; typedef enum { - PI_COMMAND_KERNEL_LAUNCH = CL_COMMAND_NDRANGE_KERNEL, + PI_COMMAND_KERNEL_LAUNCH = CL_COMMAND_NDRANGE_KERNEL, PI_COMMAND_MEMBUFFER_WRITE = CL_COMMAND_WRITE_BUFFER, - PI_COMMAND_MEMBUFFER_READ = CL_COMMAND_READ_BUFFER, - PI_COMMAND_USER = CL_COMMAND_USER, - PI_COMMAND_EVENTS_WAIT = CL_COMMAND_MARKER, - PI_COMMAND_MEMBUFFER_COPY = CL_COMMAND_COPY_BUFFER, - PI_COMMAND_MEMBUFFER_FILL = CL_COMMAND_FILL_BUFFER + PI_COMMAND_MEMBUFFER_READ = CL_COMMAND_READ_BUFFER, + PI_COMMAND_USER = CL_COMMAND_USER, + PI_COMMAND_EVENTS_WAIT = CL_COMMAND_MARKER, + PI_COMMAND_MEMBUFFER_COPY = CL_COMMAND_COPY_BUFFER, + PI_COMMAND_MEMBUFFER_FILL = CL_COMMAND_FILL_BUFFER } _pi_command_type; typedef enum { PI_PLATFORM_INFO_EXTENSIONS = CL_PLATFORM_EXTENSIONS, - PI_PLATFORM_INFO_NAME = CL_PLATFORM_NAME, - PI_PLATFORM_INFO_PROFILE = CL_PLATFORM_PROFILE, - PI_PLATFORM_INFO_VENDOR = CL_PLATFORM_VENDOR, - PI_PLATFORM_INFO_VERSION = CL_PLATFORM_VERSION, + PI_PLATFORM_INFO_NAME = CL_PLATFORM_NAME, + PI_PLATFORM_INFO_PROFILE = CL_PLATFORM_PROFILE, + PI_PLATFORM_INFO_VENDOR = CL_PLATFORM_VENDOR, + PI_PLATFORM_INFO_VERSION = CL_PLATFORM_VERSION, } _pi_platform_info; typedef enum { PI_PROGRAM_INFO_REFERENCE_COUNT = CL_PROGRAM_REFERENCE_COUNT, - PI_PROGRAM_INFO_CONTEXT = CL_PROGRAM_CONTEXT, - PI_PROGRAM_INFO_NUM_DEVICES = CL_PROGRAM_NUM_DEVICES, - PI_PROGRAM_INFO_DEVICES = CL_PROGRAM_DEVICES, - PI_PROGRAM_INFO_SOURCE = CL_PROGRAM_SOURCE, - PI_PROGRAM_INFO_BINARY_SIZES = CL_PROGRAM_BINARY_SIZES, - PI_PROGRAM_INFO_BINARIES = CL_PROGRAM_BINARIES, - PI_PROGRAM_INFO_KERNEL_NAMES = CL_PROGRAM_KERNEL_NAMES + PI_PROGRAM_INFO_CONTEXT = CL_PROGRAM_CONTEXT, + PI_PROGRAM_INFO_NUM_DEVICES = CL_PROGRAM_NUM_DEVICES, + PI_PROGRAM_INFO_DEVICES = CL_PROGRAM_DEVICES, + PI_PROGRAM_INFO_SOURCE = CL_PROGRAM_SOURCE, + PI_PROGRAM_INFO_BINARY_SIZES = CL_PROGRAM_BINARY_SIZES, + PI_PROGRAM_INFO_BINARIES = CL_PROGRAM_BINARIES, + PI_PROGRAM_INFO_KERNEL_NAMES = CL_PROGRAM_KERNEL_NAMES } _pi_program_info; typedef enum { - PI_PROGRAM_BUILD_INFO_STATUS = CL_PROGRAM_BUILD_STATUS, + PI_PROGRAM_BUILD_INFO_STATUS = CL_PROGRAM_BUILD_STATUS, PI_PROGRAM_BUILD_INFO_OPTIONS = CL_PROGRAM_BUILD_OPTIONS, - PI_PROGRAM_BUILD_INFO_LOG = CL_PROGRAM_BUILD_LOG + PI_PROGRAM_BUILD_INFO_LOG = CL_PROGRAM_BUILD_LOG } _pi_program_build_info; typedef enum { - PI_PROGRAM_BUILD_STATUS_NONE = CL_BUILD_NONE, - PI_PROGRAM_BUILD_STATUS_ERROR = CL_BUILD_ERROR, - PI_PROGRAM_BUILD_STATUS_SUCCESS = CL_BUILD_SUCCESS, + PI_PROGRAM_BUILD_STATUS_NONE = CL_BUILD_NONE, + PI_PROGRAM_BUILD_STATUS_ERROR = CL_BUILD_ERROR, + PI_PROGRAM_BUILD_STATUS_SUCCESS = CL_BUILD_SUCCESS, PI_PROGRAM_BUILD_STATUS_IN_PROGRESS = CL_BUILD_IN_PROGRESS } _pi_program_build_status; @@ -135,80 +135,96 @@ typedef enum : pi_uint64 { } _pi_device_type; typedef enum { - PI_DEVICE_INFO_TYPE = CL_DEVICE_TYPE, - PI_DEVICE_INFO_VENDOR_ID = CL_DEVICE_VENDOR_ID, - PI_DEVICE_INFO_MAX_COMPUTE_UNITS = CL_DEVICE_MAX_COMPUTE_UNITS, - PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, - PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES = CL_DEVICE_MAX_WORK_ITEM_SIZES, - PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG = CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, - PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY = CL_DEVICE_MAX_CLOCK_FREQUENCY, - PI_DEVICE_INFO_ADDRESS_BITS = CL_DEVICE_ADDRESS_BITS, - PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = CL_DEVICE_MAX_MEM_ALLOC_SIZE, - PI_DEVICE_INFO_IMAGE_SUPPORT = CL_DEVICE_IMAGE_SUPPORT, - PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS = CL_DEVICE_MAX_READ_IMAGE_ARGS, - PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS = CL_DEVICE_MAX_WRITE_IMAGE_ARGS, - PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT = CL_DEVICE_IMAGE2D_MAX_HEIGHT, - PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH = CL_DEVICE_IMAGE2D_MAX_WIDTH, - PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT = CL_DEVICE_IMAGE3D_MAX_HEIGHT, - PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH = CL_DEVICE_IMAGE3D_MAX_WIDTH, - PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH = CL_DEVICE_IMAGE3D_MAX_DEPTH, - PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, - PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, - PI_DEVICE_INFO_MAX_SAMPLERS = CL_DEVICE_MAX_SAMPLERS, - PI_DEVICE_INFO_MAX_PARAMETER_SIZE = CL_DEVICE_MAX_PARAMETER_SIZE, - PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = CL_DEVICE_MEM_BASE_ADDR_ALIGN, - PI_DEVICE_INFO_HALF_FP_CONFIG = CL_DEVICE_HALF_FP_CONFIG, - PI_DEVICE_INFO_SINGLE_FP_CONFIG = CL_DEVICE_SINGLE_FP_CONFIG, - PI_DEVICE_INFO_DOUBLE_FP_CONFIG = CL_DEVICE_DOUBLE_FP_CONFIG, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, - PI_DEVICE_INFO_GLOBAL_MEM_SIZE = CL_DEVICE_GLOBAL_MEM_SIZE, - PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, - PI_DEVICE_INFO_MAX_CONSTANT_ARGS = CL_DEVICE_MAX_CONSTANT_ARGS, - PI_DEVICE_INFO_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_TYPE, - PI_DEVICE_INFO_LOCAL_MEM_SIZE = CL_DEVICE_LOCAL_MEM_SIZE, - PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = CL_DEVICE_ERROR_CORRECTION_SUPPORT, - PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = CL_DEVICE_HOST_UNIFIED_MEMORY, - PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = CL_DEVICE_PROFILING_TIMER_RESOLUTION, - PI_DEVICE_INFO_IS_ENDIAN_LITTLE = CL_DEVICE_ENDIAN_LITTLE, - PI_DEVICE_INFO_IS_AVAILABLE = CL_DEVICE_AVAILABLE, - PI_DEVICE_INFO_IS_COMPILER_AVAILABLE = CL_DEVICE_COMPILER_AVAILABLE, - PI_DEVICE_INFO_IS_LINKER_AVAILABLE = CL_DEVICE_LINKER_AVAILABLE, - PI_DEVICE_INFO_EXECUTION_CAPABILITIES = CL_DEVICE_EXECUTION_CAPABILITIES, - PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, - PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, - PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, - PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, - PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, - PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, - PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR, - PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION, - PI_DEVICE_INFO_PROFILE = CL_DEVICE_PROFILE, - PI_DEVICE_INFO_VERSION = CL_DEVICE_VERSION, - PI_DEVICE_INFO_OPENCL_C_VERSION = CL_DEVICE_OPENCL_C_VERSION, - PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS, - PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = CL_DEVICE_PRINTF_BUFFER_SIZE, - PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, - PI_DEVICE_INFO_PARENT_DEVICE = CL_DEVICE_PARENT_DEVICE, - PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = CL_DEVICE_PARTITION_MAX_SUB_DEVICES, - PI_DEVICE_INFO_PARTITION_PROPERTIES = CL_DEVICE_PARTITION_PROPERTIES, - PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_AFFINITY_DOMAIN, - PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, + PI_DEVICE_INFO_TYPE = CL_DEVICE_TYPE, + PI_DEVICE_INFO_VENDOR_ID = CL_DEVICE_VENDOR_ID, + PI_DEVICE_INFO_MAX_COMPUTE_UNITS = CL_DEVICE_MAX_COMPUTE_UNITS, + PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES = CL_DEVICE_MAX_WORK_ITEM_SIZES, + PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = + CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = + CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = + CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, + PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY = CL_DEVICE_MAX_CLOCK_FREQUENCY, + PI_DEVICE_INFO_ADDRESS_BITS = CL_DEVICE_ADDRESS_BITS, + PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = CL_DEVICE_MAX_MEM_ALLOC_SIZE, + PI_DEVICE_INFO_IMAGE_SUPPORT = CL_DEVICE_IMAGE_SUPPORT, + PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS = CL_DEVICE_MAX_READ_IMAGE_ARGS, + PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS = CL_DEVICE_MAX_WRITE_IMAGE_ARGS, + PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT = CL_DEVICE_IMAGE2D_MAX_HEIGHT, + PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH = CL_DEVICE_IMAGE2D_MAX_WIDTH, + PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT = CL_DEVICE_IMAGE3D_MAX_HEIGHT, + PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH = CL_DEVICE_IMAGE3D_MAX_WIDTH, + PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH = CL_DEVICE_IMAGE3D_MAX_DEPTH, + PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, + PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, + PI_DEVICE_INFO_MAX_SAMPLERS = CL_DEVICE_MAX_SAMPLERS, + PI_DEVICE_INFO_MAX_PARAMETER_SIZE = CL_DEVICE_MAX_PARAMETER_SIZE, + PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = CL_DEVICE_MEM_BASE_ADDR_ALIGN, + PI_DEVICE_INFO_HALF_FP_CONFIG = CL_DEVICE_HALF_FP_CONFIG, + PI_DEVICE_INFO_SINGLE_FP_CONFIG = CL_DEVICE_SINGLE_FP_CONFIG, + PI_DEVICE_INFO_DOUBLE_FP_CONFIG = CL_DEVICE_DOUBLE_FP_CONFIG, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = + CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_SIZE = CL_DEVICE_GLOBAL_MEM_SIZE, + PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + PI_DEVICE_INFO_MAX_CONSTANT_ARGS = CL_DEVICE_MAX_CONSTANT_ARGS, + PI_DEVICE_INFO_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_TYPE, + PI_DEVICE_INFO_LOCAL_MEM_SIZE = CL_DEVICE_LOCAL_MEM_SIZE, + PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = CL_DEVICE_ERROR_CORRECTION_SUPPORT, + PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = CL_DEVICE_HOST_UNIFIED_MEMORY, + PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = + CL_DEVICE_PROFILING_TIMER_RESOLUTION, + PI_DEVICE_INFO_IS_ENDIAN_LITTLE = CL_DEVICE_ENDIAN_LITTLE, + PI_DEVICE_INFO_IS_AVAILABLE = CL_DEVICE_AVAILABLE, + PI_DEVICE_INFO_IS_COMPILER_AVAILABLE = CL_DEVICE_COMPILER_AVAILABLE, + PI_DEVICE_INFO_IS_LINKER_AVAILABLE = CL_DEVICE_LINKER_AVAILABLE, + PI_DEVICE_INFO_EXECUTION_CAPABILITIES = CL_DEVICE_EXECUTION_CAPABILITIES, + PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = + CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, + PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, + PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, + PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, + PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, + PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, + PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR, + PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION, + PI_DEVICE_INFO_PROFILE = CL_DEVICE_PROFILE, + PI_DEVICE_INFO_VERSION = CL_DEVICE_VERSION, + PI_DEVICE_INFO_OPENCL_C_VERSION = CL_DEVICE_OPENCL_C_VERSION, + PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS, + PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = CL_DEVICE_PRINTF_BUFFER_SIZE, + PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = + CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, + PI_DEVICE_INFO_PARENT_DEVICE = CL_DEVICE_PARENT_DEVICE, + PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = + CL_DEVICE_PARTITION_MAX_SUB_DEVICES, + PI_DEVICE_INFO_PARTITION_PROPERTIES = CL_DEVICE_PARTITION_PROPERTIES, + PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = + CL_DEVICE_PARTITION_AFFINITY_DOMAIN, + PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, } _pi_device_info; typedef enum { @@ -218,86 +234,86 @@ typedef enum { // TODO: populate typedef enum { - PI_CONTEXT_INFO_DEVICES = CL_CONTEXT_DEVICES, - PI_CONTEXT_INFO_NUM_DEVICES = CL_CONTEXT_NUM_DEVICES, + PI_CONTEXT_INFO_DEVICES = CL_CONTEXT_DEVICES, + PI_CONTEXT_INFO_NUM_DEVICES = CL_CONTEXT_NUM_DEVICES, PI_CONTEXT_INFO_REFERENCE_COUNT = CL_CONTEXT_REFERENCE_COUNT } _pi_context_info; // TODO: populate typedef enum { - PI_QUEUE_INFO_DEVICE = CL_QUEUE_DEVICE, + PI_QUEUE_INFO_DEVICE = CL_QUEUE_DEVICE, PI_QUEUE_INFO_REFERENCE_COUNT = CL_QUEUE_REFERENCE_COUNT, - PI_QUEUE_INFO_PROPERTIES = CL_QUEUE_PROPERTIES, - PI_QUEUE_INFO_CONTEXT = CL_QUEUE_CONTEXT + PI_QUEUE_INFO_PROPERTIES = CL_QUEUE_PROPERTIES, + PI_QUEUE_INFO_CONTEXT = CL_QUEUE_CONTEXT } _pi_queue_info; typedef enum { - PI_KERNEL_INFO_FUNCTION_NAME = CL_KERNEL_FUNCTION_NAME, - PI_KERNEL_INFO_NUM_ARGS = CL_KERNEL_NUM_ARGS, + PI_KERNEL_INFO_FUNCTION_NAME = CL_KERNEL_FUNCTION_NAME, + PI_KERNEL_INFO_NUM_ARGS = CL_KERNEL_NUM_ARGS, PI_KERNEL_INFO_REFERENCE_COUNT = CL_KERNEL_REFERENCE_COUNT, - PI_KERNEL_INFO_CONTEXT = CL_KERNEL_CONTEXT, - PI_KERNEL_INFO_PROGRAM = CL_KERNEL_PROGRAM + PI_KERNEL_INFO_CONTEXT = CL_KERNEL_CONTEXT, + PI_KERNEL_INFO_PROGRAM = CL_KERNEL_PROGRAM } _pi_kernel_info; typedef enum { - PI_KERNEL_GROUP_INFO_SIZE = CL_KERNEL_WORK_GROUP_SIZE, + PI_KERNEL_GROUP_INFO_SIZE = CL_KERNEL_WORK_GROUP_SIZE, PI_KERNEL_COMPILE_GROUP_INFO_SIZE = CL_KERNEL_COMPILE_WORK_GROUP_SIZE, - PI_KERNEL_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE + PI_KERNEL_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE } _pi_kernel_group_info; typedef enum { - PI_IMAGE_INFO_FORMAT = CL_IMAGE_FORMAT, + PI_IMAGE_INFO_FORMAT = CL_IMAGE_FORMAT, PI_IMAGE_INFO_ELEMENT_SIZE = CL_IMAGE_ELEMENT_SIZE, - PI_IMAGE_INFO_ROW_PITCH = CL_IMAGE_ROW_PITCH, - PI_IMAGE_INFO_SLICE_PITCH = CL_IMAGE_SLICE_PITCH, - PI_IMAGE_INFO_WIDTH = CL_IMAGE_WIDTH, - PI_IMAGE_INFO_HEIGHT = CL_IMAGE_HEIGHT, - PI_IMAGE_INFO_DEPTH = CL_IMAGE_DEPTH + PI_IMAGE_INFO_ROW_PITCH = CL_IMAGE_ROW_PITCH, + PI_IMAGE_INFO_SLICE_PITCH = CL_IMAGE_SLICE_PITCH, + PI_IMAGE_INFO_WIDTH = CL_IMAGE_WIDTH, + PI_IMAGE_INFO_HEIGHT = CL_IMAGE_HEIGHT, + PI_IMAGE_INFO_DEPTH = CL_IMAGE_DEPTH } _pi_image_info; typedef enum { - PI_MEM_TYPE_BUFFER = CL_MEM_OBJECT_BUFFER, - PI_MEM_TYPE_IMAGE2D = CL_MEM_OBJECT_IMAGE2D, - PI_MEM_TYPE_IMAGE3D = CL_MEM_OBJECT_IMAGE3D, - PI_MEM_TYPE_IMAGE2D_ARRAY = CL_MEM_OBJECT_IMAGE2D_ARRAY, - PI_MEM_TYPE_IMAGE1D = CL_MEM_OBJECT_IMAGE1D, - PI_MEM_TYPE_IMAGE1D_ARRAY = CL_MEM_OBJECT_IMAGE1D_ARRAY, + PI_MEM_TYPE_BUFFER = CL_MEM_OBJECT_BUFFER, + PI_MEM_TYPE_IMAGE2D = CL_MEM_OBJECT_IMAGE2D, + PI_MEM_TYPE_IMAGE3D = CL_MEM_OBJECT_IMAGE3D, + PI_MEM_TYPE_IMAGE2D_ARRAY = CL_MEM_OBJECT_IMAGE2D_ARRAY, + PI_MEM_TYPE_IMAGE1D = CL_MEM_OBJECT_IMAGE1D, + PI_MEM_TYPE_IMAGE1D_ARRAY = CL_MEM_OBJECT_IMAGE1D_ARRAY, PI_MEM_TYPE_IMAGE1D_BUFFER = CL_MEM_OBJECT_IMAGE1D_BUFFER } _pi_mem_type; typedef enum { - PI_IMAGE_CHANNEL_ORDER_A = CL_A, - PI_IMAGE_CHANNEL_ORDER_R = CL_R, - PI_IMAGE_CHANNEL_ORDER_RG = CL_RG, - PI_IMAGE_CHANNEL_ORDER_RA = CL_RA, - PI_IMAGE_CHANNEL_ORDER_RGB = CL_RGB, - PI_IMAGE_CHANNEL_ORDER_RGBA = CL_RGBA, - PI_IMAGE_CHANNEL_ORDER_BGRA = CL_BGRA, - PI_IMAGE_CHANNEL_ORDER_ARGB = CL_ARGB, - PI_IMAGE_CHANNEL_ORDER_ABGR = CL_ABGR, + PI_IMAGE_CHANNEL_ORDER_A = CL_A, + PI_IMAGE_CHANNEL_ORDER_R = CL_R, + PI_IMAGE_CHANNEL_ORDER_RG = CL_RG, + PI_IMAGE_CHANNEL_ORDER_RA = CL_RA, + PI_IMAGE_CHANNEL_ORDER_RGB = CL_RGB, + PI_IMAGE_CHANNEL_ORDER_RGBA = CL_RGBA, + PI_IMAGE_CHANNEL_ORDER_BGRA = CL_BGRA, + PI_IMAGE_CHANNEL_ORDER_ARGB = CL_ARGB, + PI_IMAGE_CHANNEL_ORDER_ABGR = CL_ABGR, PI_IMAGE_CHANNEL_ORDER_INTENSITY = CL_INTENSITY, PI_IMAGE_CHANNEL_ORDER_LUMINANCE = CL_LUMINANCE, - PI_IMAGE_CHANNEL_ORDER_Rx = CL_Rx, - PI_IMAGE_CHANNEL_ORDER_RGx = CL_RGx, - PI_IMAGE_CHANNEL_ORDER_RGBx = CL_RGBx + PI_IMAGE_CHANNEL_ORDER_Rx = CL_Rx, + PI_IMAGE_CHANNEL_ORDER_RGx = CL_RGx, + PI_IMAGE_CHANNEL_ORDER_RGBx = CL_RGBx } _pi_image_channel_order; typedef enum { - PI_IMAGE_CHANNEL_TYPE_SNORM_INT8 = CL_SNORM_INT8, - PI_IMAGE_CHANNEL_TYPE_SNORM_INT16 = CL_SNORM_INT16, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT8 = CL_UNORM_INT8, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT16 = CL_UNORM_INT16, - PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = CL_UNORM_SHORT_565, - PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = CL_UNORM_SHORT_555, + PI_IMAGE_CHANNEL_TYPE_SNORM_INT8 = CL_SNORM_INT8, + PI_IMAGE_CHANNEL_TYPE_SNORM_INT16 = CL_SNORM_INT16, + PI_IMAGE_CHANNEL_TYPE_UNORM_INT8 = CL_UNORM_INT8, + PI_IMAGE_CHANNEL_TYPE_UNORM_INT16 = CL_UNORM_INT16, + PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = CL_UNORM_SHORT_565, + PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = CL_UNORM_SHORT_555, PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010 = CL_UNORM_INT_101010, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = CL_SIGNED_INT8, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = CL_SIGNED_INT16, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = CL_SIGNED_INT32, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = CL_UNSIGNED_INT8, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = CL_UNSIGNED_INT16, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = CL_UNSIGNED_INT32, - PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT = CL_HALF_FLOAT, - PI_IMAGE_CHANNEL_TYPE_FLOAT = CL_FLOAT + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = CL_SIGNED_INT8, + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = CL_SIGNED_INT16, + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = CL_SIGNED_INT32, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = CL_UNSIGNED_INT8, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = CL_UNSIGNED_INT16, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = CL_UNSIGNED_INT32, + PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT = CL_HALF_FLOAT, + PI_IMAGE_CHANNEL_TYPE_FLOAT = CL_FLOAT } _pi_image_channel_type; typedef enum { @@ -309,81 +325,80 @@ const pi_bool PI_TRUE = CL_TRUE; const pi_bool PI_FALSE = CL_FALSE; typedef enum { - PI_SAMPLER_INFO_REFERENCE_COUNT = CL_SAMPLER_REFERENCE_COUNT, - PI_SAMPLER_INFO_CONTEXT = CL_SAMPLER_CONTEXT, - PI_SAMPLER_INFO_NORMALIZED_COORDS = CL_SAMPLER_NORMALIZED_COORDS, - PI_SAMPLER_INFO_ADDRESSING_MODE = CL_SAMPLER_ADDRESSING_MODE, - PI_SAMPLER_INFO_FILTER_MODE = CL_SAMPLER_FILTER_MODE, - PI_SAMPLER_INFO_MIP_FILTER_MODE = CL_SAMPLER_MIP_FILTER_MODE, - PI_SAMPLER_INFO_LOD_MIN = CL_SAMPLER_LOD_MIN, - PI_SAMPLER_INFO_LOD_MAX = CL_SAMPLER_LOD_MAX + PI_SAMPLER_INFO_REFERENCE_COUNT = CL_SAMPLER_REFERENCE_COUNT, + PI_SAMPLER_INFO_CONTEXT = CL_SAMPLER_CONTEXT, + PI_SAMPLER_INFO_NORMALIZED_COORDS = CL_SAMPLER_NORMALIZED_COORDS, + PI_SAMPLER_INFO_ADDRESSING_MODE = CL_SAMPLER_ADDRESSING_MODE, + PI_SAMPLER_INFO_FILTER_MODE = CL_SAMPLER_FILTER_MODE, + PI_SAMPLER_INFO_MIP_FILTER_MODE = CL_SAMPLER_MIP_FILTER_MODE, + PI_SAMPLER_INFO_LOD_MIN = CL_SAMPLER_LOD_MIN, + PI_SAMPLER_INFO_LOD_MAX = CL_SAMPLER_LOD_MAX } _pi_sampler_info; typedef enum { PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT = CL_ADDRESS_MIRRORED_REPEAT, - PI_SAMPLER_ADDRESSING_MODE_REPEAT = CL_ADDRESS_REPEAT, - PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = CL_ADDRESS_CLAMP_TO_EDGE, - PI_SAMPLER_ADDRESSING_MODE_CLAMP = CL_ADDRESS_CLAMP, - PI_SAMPLER_ADDRESSING_MODE_NONE = CL_ADDRESS_NONE + PI_SAMPLER_ADDRESSING_MODE_REPEAT = CL_ADDRESS_REPEAT, + PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = CL_ADDRESS_CLAMP_TO_EDGE, + PI_SAMPLER_ADDRESSING_MODE_CLAMP = CL_ADDRESS_CLAMP, + PI_SAMPLER_ADDRESSING_MODE_NONE = CL_ADDRESS_NONE } _pi_sampler_addressing_mode; typedef enum { PI_SAMPLER_FILTER_MODE_NEAREST = CL_FILTER_NEAREST, - PI_SAMPLER_FILTER_MODE_LINEAR = CL_FILTER_LINEAR, + PI_SAMPLER_FILTER_MODE_LINEAR = CL_FILTER_LINEAR, } _pi_sampler_filter_mode; typedef enum { - PI_EVENT_INFO_QUEUE = CL_EVENT_COMMAND_QUEUE, - PI_EVENT_INFO_COMMAND_TYPE = CL_EVENT_COMMAND_TYPE, - PI_EVENT_INFO_REFERENCE_COUNT = CL_EVENT_REFERENCE_COUNT, + PI_EVENT_INFO_QUEUE = CL_EVENT_COMMAND_QUEUE, + PI_EVENT_INFO_COMMAND_TYPE = CL_EVENT_COMMAND_TYPE, + PI_EVENT_INFO_REFERENCE_COUNT = CL_EVENT_REFERENCE_COUNT, PI_EVENT_INFO_COMMAND_EXECUTION_STATUS = CL_EVENT_COMMAND_EXECUTION_STATUS, - PI_EVENT_INFO_CONTEXT = CL_EVENT_CONTEXT + PI_EVENT_INFO_CONTEXT = CL_EVENT_CONTEXT } _pi_event_info; - // NOTE: this is made 64-bit to match the size of cl_mem_flags to // make the translation to OpenCL transparent. // TODO: populate // typedef pi_bitfield pi_mem_flags; // Access -const pi_mem_flags PI_MEM_FLAGS_ACCESS_RW = CL_MEM_READ_WRITE; +const pi_mem_flags PI_MEM_FLAGS_ACCESS_RW = CL_MEM_READ_WRITE; // Host pointer -const pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE = CL_MEM_USE_HOST_PTR; +const pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE = CL_MEM_USE_HOST_PTR; const pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY = CL_MEM_COPY_HOST_PTR; // NOTE: queue properties are implemented this way to better support bit // manipulations typedef pi_bitfield pi_queue_properties; const pi_queue_properties PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; const pi_queue_properties PI_QUEUE_PROFILING_ENABLE = CL_QUEUE_PROFILING_ENABLE; const pi_queue_properties PI_QUEUE_ON_DEVICE = CL_QUEUE_ON_DEVICE; const pi_queue_properties PI_QUEUE_ON_DEVICE_DEFAULT = - CL_QUEUE_ON_DEVICE_DEFAULT; - -typedef _pi_result pi_result; -typedef _pi_platform_info pi_platform_info; -typedef _pi_device_type pi_device_type; -typedef _pi_device_info pi_device_info; -typedef _pi_context_info pi_context_info; -typedef _pi_queue_info pi_queue_info; -typedef _pi_image_info pi_image_info; -typedef _pi_mem_type pi_mem_type; -typedef _pi_image_channel_order pi_image_channel_order; -typedef _pi_image_channel_type pi_image_channel_type; -typedef _pi_buffer_create_type pi_buffer_create_type; + CL_QUEUE_ON_DEVICE_DEFAULT; + +typedef _pi_result pi_result; +typedef _pi_platform_info pi_platform_info; +typedef _pi_device_type pi_device_type; +typedef _pi_device_info pi_device_info; +typedef _pi_context_info pi_context_info; +typedef _pi_queue_info pi_queue_info; +typedef _pi_image_info pi_image_info; +typedef _pi_mem_type pi_mem_type; +typedef _pi_image_channel_order pi_image_channel_order; +typedef _pi_image_channel_type pi_image_channel_type; +typedef _pi_buffer_create_type pi_buffer_create_type; typedef _pi_sampler_addressing_mode pi_sampler_addressing_mode; -typedef _pi_sampler_filter_mode pi_sampler_filter_mode; -typedef _pi_sampler_info pi_sampler_info; -typedef _pi_event_status pi_event_status; -typedef _pi_event_info pi_event_info; -typedef _pi_command_type pi_command_type; -typedef _pi_program_info pi_program_info; -typedef _pi_program_build_info pi_program_build_info; -typedef _pi_program_build_status pi_program_build_status; -typedef _pi_kernel_info pi_kernel_info; -typedef _pi_kernel_group_info pi_kernel_group_info; +typedef _pi_sampler_filter_mode pi_sampler_filter_mode; +typedef _pi_sampler_info pi_sampler_info; +typedef _pi_event_status pi_event_status; +typedef _pi_event_info pi_event_info; +typedef _pi_command_type pi_command_type; +typedef _pi_program_info pi_program_info; +typedef _pi_program_build_info pi_program_build_info; +typedef _pi_program_build_status pi_program_build_status; +typedef _pi_kernel_info pi_kernel_info; +typedef _pi_kernel_group_info pi_kernel_group_info; // Entry type, matches OpenMP for compatibility struct _pi_offload_entry_struct { @@ -394,17 +409,17 @@ struct _pi_offload_entry_struct { int32_t reserved; }; -typedef _pi_offload_entry_struct * _pi_offload_entry; +typedef _pi_offload_entry_struct *_pi_offload_entry; /// Types of device binary. typedef uint8_t pi_device_binary_type; // format is not determined -static const pi_device_binary_type PI_DEVICE_BINARY_TYPE_NONE = 0; +static const pi_device_binary_type PI_DEVICE_BINARY_TYPE_NONE = 0; // specific to a device -static const pi_device_binary_type PI_DEVICE_BINARY_TYPE_NATIVE = 1; +static const pi_device_binary_type PI_DEVICE_BINARY_TYPE_NATIVE = 1; // portable binary types go next // SPIR-V -static const pi_device_binary_type PI_DEVICE_BINARY_TYPE_SPIRV = 2; +static const pi_device_binary_type PI_DEVICE_BINARY_TYPE_SPIRV = 2; // LLVM bitcode static const pi_device_binary_type PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3; @@ -433,10 +448,10 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4; #define PI_DEVICE_BINARY_TARGET_SPIRV64_FPGA "spir64_fpga" /// This struct is a record of the device binary information. If the Kind field -/// denotes a portable binary type (SPIR-V or LLVM IR), the DeviceTargetSpec field -/// can still be specific and denote e.g. FPGA target. -/// It must match the __tgt_device_image structure generated by -/// the clang-offload-wrapper tool when their Version field match. +/// denotes a portable binary type (SPIR-V or LLVM IR), the DeviceTargetSpec +/// field can still be specific and denote e.g. FPGA target. It must match the +/// __tgt_device_image structure generated by the clang-offload-wrapper tool +/// when their Version field match. struct pi_device_binary_struct { /// version of this structure - for backward compatibility; /// all modifications which change order/type/offsets of existing fields @@ -473,7 +488,7 @@ struct pi_device_binary_struct { _pi_offload_entry EntriesBegin; _pi_offload_entry EntriesEnd; }; -typedef pi_device_binary_struct * pi_device_binary; +typedef pi_device_binary_struct *pi_device_binary; // Offload binaries descriptor version supported by this library. static const uint16_t PI_DEVICE_BINARIES_VERSION = 1; @@ -494,7 +509,7 @@ struct pi_device_binaries_struct { _pi_offload_entry *HostEntriesBegin; _pi_offload_entry *HostEntriesEnd; }; -typedef pi_device_binaries_struct * pi_device_binaries; +typedef pi_device_binaries_struct *pi_device_binaries; // Opaque types that make reading build log errors easier. struct _pi_platform; @@ -507,15 +522,15 @@ struct _pi_kernel; struct _pi_event; struct _pi_sampler; -typedef _pi_platform * pi_platform; -typedef _pi_device * pi_device; -typedef _pi_context * pi_context; -typedef _pi_queue * pi_queue; -typedef _pi_mem * pi_mem; -typedef _pi_program * pi_program; -typedef _pi_kernel * pi_kernel; -typedef _pi_event * pi_event; -typedef _pi_sampler * pi_sampler; +typedef _pi_platform *pi_platform; +typedef _pi_device *pi_device; +typedef _pi_context *pi_context; +typedef _pi_queue *pi_queue; +typedef _pi_mem *pi_mem; +typedef _pi_program *pi_program; +typedef _pi_kernel *pi_kernel; +typedef _pi_event *pi_event; +typedef _pi_sampler *pi_sampler; typedef struct { pi_image_channel_order image_channel_order; @@ -535,8 +550,8 @@ typedef struct { pi_mem buffer; } _pi_image_desc; -typedef _pi_image_format pi_image_format; -typedef _pi_image_desc pi_image_desc; +typedef _pi_image_format pi_image_format; +typedef _pi_image_desc pi_image_desc; // // Following section contains SYCL RT Plugin Interface (PI) functions. // They are 3 distinct categories: @@ -563,54 +578,39 @@ pi_result piPluginInit(pi_plugin *plugin_info); // // Platform // -pi_result piPlatformsGet( - pi_uint32 num_entries, - pi_platform * platforms, - pi_uint32 * num_platforms); - -pi_result piPlatformGetInfo( - pi_platform platform, - pi_platform_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); +pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, + pi_uint32 *num_platforms); + +pi_result piPlatformGetInfo(pi_platform platform, pi_platform_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); // // Device // -pi_result piDevicesGet( - pi_platform platform, - pi_device_type device_type, - pi_uint32 num_entries, - pi_device * devices, - pi_uint32 * num_devices); - -pi_result piDeviceGetInfo( - pi_device device, - pi_device_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); +pi_result piDevicesGet(pi_platform platform, pi_device_type device_type, + pi_uint32 num_entries, pi_device *devices, + pi_uint32 *num_devices); + +pi_result piDeviceGetInfo(pi_device device, pi_device_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); pi_result piDeviceRetain(pi_device device); pi_result piDeviceRelease(pi_device device); pi_result piDevicePartition( - pi_device device, - const cl_device_partition_property * properties, // TODO: untie from OpenCL - pi_uint32 num_devices, - pi_device * out_devices, - pi_uint32 * out_num_devices); + pi_device device, + const cl_device_partition_property *properties, // TODO: untie from OpenCL + pi_uint32 num_devices, pi_device *out_devices, pi_uint32 *out_num_devices); /// Selects the most appropriate device binary based on runtime information /// and the IR characteristics. /// -pi_result piextDeviceSelectBinary( - pi_device device, - pi_device_binary * binaries, - pi_uint32 num_binaries, - pi_device_binary * selected_binary); +pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *binaries, + pi_uint32 num_binaries, + pi_device_binary *selected_binary); /// Retrieves a device function pointer to a user-defined function /// \arg \c function_name. \arg \c function_pointer_ret is set to 0 if query @@ -620,33 +620,23 @@ pi_result piextDeviceSelectBinary( /// must present in the list of devices returned by \c get_device method for /// \arg \c program. /// -pi_result piextGetDeviceFunctionPointer( - pi_device device, - pi_program program, - const char * function_name, - pi_uint64 * function_pointer_ret); +pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, + const char *function_name, + pi_uint64 *function_pointer_ret); // // Context // pi_result piContextCreate( - const cl_context_properties * properties, // TODO: untie from OpenCL - pi_uint32 num_devices, - const pi_device * devices, - void (* pfn_notify)( - const char * errinfo, - const void * private_info, - size_t cb, - void * user_data), - void * user_data, - pi_context * ret_context); - -pi_result piContextGetInfo( - pi_context context, - pi_context_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); + const cl_context_properties *properties, // TODO: untie from OpenCL + pi_uint32 num_devices, const pi_device *devices, + void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, + void *user_data), + void *user_data, pi_context *ret_context); + +pi_result piContextGetInfo(pi_context context, pi_context_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); pi_result piContextRetain(pi_context context); @@ -655,18 +645,12 @@ pi_result piContextRelease(pi_context context); // // Queue // -pi_result piQueueCreate( - pi_context context, - pi_device device, - pi_queue_properties properties, - pi_queue * queue); - -pi_result piQueueGetInfo( - pi_queue command_queue, - pi_queue_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); +pi_result piQueueCreate(pi_context context, pi_device device, + pi_queue_properties properties, pi_queue *queue); + +pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); pi_result piQueueRetain(pi_queue command_queue); @@ -677,118 +661,75 @@ pi_result piQueueFinish(pi_queue command_queue); // // Memory // -pi_result piMemBufferCreate( - pi_context context, - pi_mem_flags flags, - size_t size, - void * host_ptr, - pi_mem * ret_mem); - -pi_result piMemImageCreate( - pi_context context, - pi_mem_flags flags, - const pi_image_format * image_format, - const pi_image_desc * image_desc, - void * host_ptr, - pi_mem * ret_mem); - -pi_result piMemGetInfo( - pi_mem mem, - cl_mem_info param_name, // TODO: untie from OpenCL - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); - -pi_result piMemImageGetInfo ( - pi_mem image, - pi_image_info param_name, - size_t param_value_size, - void * param_value , - size_t * param_value_size_ret); - -pi_result piMemRetain( - pi_mem mem); - -pi_result piMemRelease( - pi_mem mem); - - -pi_result piMemBufferPartition( - pi_mem buffer, - pi_mem_flags flags, - pi_buffer_create_type buffer_create_type, - void * buffer_create_info, - pi_mem * ret_mem); +pi_result piMemBufferCreate(pi_context context, pi_mem_flags flags, size_t size, + void *host_ptr, pi_mem *ret_mem); + +pi_result piMemImageCreate(pi_context context, pi_mem_flags flags, + const pi_image_format *image_format, + const pi_image_desc *image_desc, void *host_ptr, + pi_mem *ret_mem); + +pi_result piMemGetInfo(pi_mem mem, + cl_mem_info param_name, // TODO: untie from OpenCL + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); + +pi_result piMemImageGetInfo(pi_mem image, pi_image_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); + +pi_result piMemRetain(pi_mem mem); + +pi_result piMemRelease(pi_mem mem); + +pi_result piMemBufferPartition(pi_mem buffer, pi_mem_flags flags, + pi_buffer_create_type buffer_create_type, + void *buffer_create_info, pi_mem *ret_mem); // // Program // -pi_result piProgramCreate( - pi_context context, - const void * il, - size_t length, - pi_program * res_program); - -pi_result piclProgramCreateWithSource( - pi_context context, - pi_uint32 count, - const char ** strings, - const size_t * lengths, - pi_program * ret_program); - -pi_result piclProgramCreateWithBinary( - pi_context context, - pi_uint32 num_devices, - const pi_device * device_list, - const size_t * lengths, - const unsigned char ** binaries, - pi_int32 * binary_status, - pi_program * ret_program); - -pi_result piProgramGetInfo( - pi_program program, - pi_program_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); - -pi_result piProgramLink( - pi_context context, - pi_uint32 num_devices, - const pi_device * device_list, - const char * options, - pi_uint32 num_input_programs, - const pi_program * input_programs, - void (* pfn_notify)(pi_program program, - void * user_data), - void * user_data, - pi_program * ret_program); +pi_result piProgramCreate(pi_context context, const void *il, size_t length, + pi_program *res_program); + +pi_result piclProgramCreateWithSource(pi_context context, pi_uint32 count, + const char **strings, + const size_t *lengths, + pi_program *ret_program); + +pi_result piclProgramCreateWithBinary(pi_context context, pi_uint32 num_devices, + const pi_device *device_list, + const size_t *lengths, + const unsigned char **binaries, + pi_int32 *binary_status, + pi_program *ret_program); + +pi_result piProgramGetInfo(pi_program program, pi_program_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); + +pi_result piProgramLink(pi_context context, pi_uint32 num_devices, + const pi_device *device_list, const char *options, + pi_uint32 num_input_programs, + const pi_program *input_programs, + void (*pfn_notify)(pi_program program, void *user_data), + void *user_data, pi_program *ret_program); pi_result piProgramCompile( - pi_program program, - pi_uint32 num_devices, - const pi_device * device_list, - const char * options, - pi_uint32 num_input_headers, - const pi_program * input_headers, - const char ** header_include_names, - void (* pfn_notify)(pi_program program, void * user_data), - void * user_data); - -pi_result piProgramBuild( - pi_program program, - pi_uint32 num_devices, - const pi_device * device_list, - const char * options, - void (* pfn_notify)(pi_program program, void * user_data), - void * user_data); + pi_program program, pi_uint32 num_devices, const pi_device *device_list, + const char *options, pi_uint32 num_input_headers, + const pi_program *input_headers, const char **header_include_names, + void (*pfn_notify)(pi_program program, void *user_data), void *user_data); + +pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, + const pi_device *device_list, const char *options, + void (*pfn_notify)(pi_program program, + void *user_data), + void *user_data); pi_result piProgramGetBuildInfo( - pi_program program, - pi_device device, - cl_program_build_info param_name, // TODO: untie from OpenCL - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); + pi_program program, pi_device device, + cl_program_build_info param_name, // TODO: untie from OpenCL + size_t param_value_size, void *param_value, size_t *param_value_size_ret); pi_result piProgramRetain(pi_program program); @@ -802,50 +743,35 @@ typedef enum { /// indicates that the kernel might access data through USM ptrs PI_USM_INDIRECT_ACCESS, /// provides an explicit list of pointers that the kernel will access - PI_USM_PTRS = CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL + PI_USM_PTRS = CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL } _pi_kernel_exec_info; -typedef _pi_kernel_exec_info pi_kernel_exec_info; - -pi_result piKernelCreate( - pi_program program, - const char * kernel_name, - pi_kernel * ret_kernel); - -pi_result piKernelSetArg( - pi_kernel kernel, - pi_uint32 arg_index, - size_t arg_size, - const void * arg_value); - -pi_result piKernelGetInfo( - pi_kernel kernel, - pi_kernel_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); - -pi_result piKernelGetGroupInfo( - pi_kernel kernel, - pi_device device, - pi_kernel_group_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); +typedef _pi_kernel_exec_info pi_kernel_exec_info; + +pi_result piKernelCreate(pi_program program, const char *kernel_name, + pi_kernel *ret_kernel); + +pi_result piKernelSetArg(pi_kernel kernel, pi_uint32 arg_index, size_t arg_size, + const void *arg_value); + +pi_result piKernelGetInfo(pi_kernel kernel, pi_kernel_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); + +pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, + pi_kernel_group_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); pi_result piKernelGetSubGroupInfo( - pi_kernel kernel, - pi_device device, - cl_kernel_sub_group_info param_name, // TODO: untie from OpenCL - size_t input_value_size, - const void* input_value, - size_t param_value_size, - void* param_value, - size_t* param_value_size_ret); + pi_kernel kernel, pi_device device, + cl_kernel_sub_group_info param_name, // TODO: untie from OpenCL + size_t input_value_size, const void *input_value, size_t param_value_size, + void *param_value, size_t *param_value_size_ret); -pi_result piKernelRetain(pi_kernel kernel); +pi_result piKernelRetain(pi_kernel kernel); -pi_result piKernelRelease(pi_kernel kernel); +pi_result piKernelRelease(pi_kernel kernel); /// Sets up pointer arguments for CL kernels. An extra indirection /// is required due to CL argument conventions. @@ -854,11 +780,8 @@ pi_result piKernelRelease(pi_kernel kernel); /// @param arg_index is the index of the kernel argument /// @param arg_size is the size in bytes of the argument (ignored in CL) /// @param arg_value is the pointer argument -pi_result piextKernelSetArgPointer( - pi_kernel kernel, - pi_uint32 arg_index, - size_t arg_size, - const void * arg_value); +pi_result piextKernelSetArgPointer(pi_kernel kernel, pi_uint32 arg_index, + size_t arg_size, const void *arg_value); /// API to set attributes controlling kernel execution /// @@ -877,39 +800,29 @@ pi_result piKernelSetExecInfo(pi_kernel kernel, pi_kernel_exec_info value_name, // // Events // -pi_result piEventCreate( - pi_context context, - pi_event * ret_event); - -pi_result piEventGetInfo( - pi_event event, - cl_event_info param_name, // TODO: untie from OpenCL - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); - -pi_result piEventGetProfilingInfo( - pi_event event, - cl_profiling_info param_name, // TODO: untie from OpenCL - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); - -pi_result piEventsWait( - pi_uint32 num_events, - const pi_event * event_list); - -pi_result piEventSetCallback( - pi_event event, - pi_int32 command_exec_callback_type, - void (* pfn_notify)(pi_event event, - pi_int32 event_command_status, - void * user_data), - void * user_data); - -pi_result piEventSetStatus( - pi_event event, - pi_int32 execution_status); +pi_result piEventCreate(pi_context context, pi_event *ret_event); + +pi_result piEventGetInfo(pi_event event, + cl_event_info param_name, // TODO: untie from OpenCL + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); + +pi_result +piEventGetProfilingInfo(pi_event event, + cl_profiling_info param_name, // TODO: untie from OpenCL + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); + +pi_result piEventsWait(pi_uint32 num_events, const pi_event *event_list); + +pi_result piEventSetCallback(pi_event event, + pi_int32 command_exec_callback_type, + void (*pfn_notify)(pi_event event, + pi_int32 event_command_status, + void *user_data), + void *user_data); + +pi_result piEventSetStatus(pi_event event, pi_int32 execution_status); pi_result piEventRetain(pi_event event); @@ -918,17 +831,13 @@ pi_result piEventRelease(pi_event event); // // Sampler // -pi_result piSamplerCreate( - pi_context context, - const pi_sampler_properties * sampler_properties, - pi_sampler * result_sampler); - -pi_result piSamplerGetInfo( - pi_sampler sampler, - pi_sampler_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); +pi_result piSamplerCreate(pi_context context, + const pi_sampler_properties *sampler_properties, + pi_sampler *result_sampler); + +pi_result piSamplerGetInfo(pi_sampler sampler, pi_sampler_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); pi_result piSamplerRetain(pi_sampler sampler); @@ -938,232 +847,156 @@ pi_result piSamplerRelease(pi_sampler sampler); // Queue Commands // pi_result piEnqueueKernelLaunch( - pi_queue queue, - pi_kernel kernel, - pi_uint32 work_dim, - const size_t * global_work_offset, - const size_t * global_work_size, - const size_t * local_work_size, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueNativeKernel( - pi_queue queue, - void (*user_func)(void *), - void * args, - size_t cb_args, - pi_uint32 num_mem_objects, - const pi_mem * mem_list, - const void ** args_mem_loc, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueEventsWait( - pi_queue command_queue, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueMemBufferRead( - pi_queue queue, - pi_mem buffer, - pi_bool blocking_read, - size_t offset, - size_t size, - void * ptr, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueMemBufferReadRect( - pi_queue command_queue, - pi_mem buffer, - pi_bool blocking_read, - const size_t * buffer_offset, - const size_t * host_offset, - const size_t * region, - size_t buffer_row_pitch, - size_t buffer_slice_pitch, - size_t host_row_pitch, - size_t host_slice_pitch, - void * ptr, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueMemBufferWrite( - pi_queue command_queue, - pi_mem buffer, - pi_bool blocking_write, - size_t offset, - size_t size, - const void * ptr, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueMemBufferWriteRect( - pi_queue command_queue, - pi_mem buffer, - pi_bool blocking_write, - const size_t * buffer_offset, - const size_t * host_offset, - const size_t * region, - size_t buffer_row_pitch, - size_t buffer_slice_pitch, - size_t host_row_pitch, - size_t host_slice_pitch, - const void * ptr, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueMemBufferCopy( - pi_queue command_queue, - pi_mem src_buffer, - pi_mem dst_buffer, - size_t src_offset, - size_t dst_offset, - size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); + pi_queue queue, pi_kernel kernel, pi_uint32 work_dim, + const size_t *global_work_offset, const size_t *global_work_size, + const size_t *local_work_size, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +pi_result +piEnqueueNativeKernel(pi_queue queue, void (*user_func)(void *), void *args, + size_t cb_args, pi_uint32 num_mem_objects, + const pi_mem *mem_list, const void **args_mem_loc, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +pi_result piEnqueueEventsWait(pi_queue command_queue, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +pi_result piEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, + pi_bool blocking_read, size_t offset, + size_t size, void *ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event); + +pi_result +piEnqueueMemBufferReadRect(pi_queue command_queue, pi_mem buffer, + pi_bool blocking_read, const size_t *buffer_offset, + const size_t *host_offset, const size_t *region, + size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, + void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, + pi_bool blocking_write, size_t offset, + size_t size, const void *ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event); + +pi_result +piEnqueueMemBufferWriteRect(pi_queue command_queue, pi_mem buffer, + pi_bool blocking_write, const size_t *buffer_offset, + const size_t *host_offset, const size_t *region, + size_t buffer_row_pitch, size_t buffer_slice_pitch, + size_t host_row_pitch, size_t host_slice_pitch, + const void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, + pi_mem dst_buffer, size_t src_offset, + size_t dst_offset, size_t size, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event); pi_result piEnqueueMemBufferCopyRect( - pi_queue command_queue, - pi_mem src_buffer, - pi_mem dst_buffer, - const size_t * src_origin, - const size_t * dst_origin, - const size_t * region, - size_t src_row_pitch, - size_t src_slice_pitch, - size_t dst_row_pitch, - size_t dst_slice_pitch, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueMemBufferFill( - pi_queue command_queue, - pi_mem buffer, - const void * pattern, - size_t pattern_size, - size_t offset, - size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueMemImageRead( - pi_queue command_queue, - pi_mem image, - pi_bool blocking_read, - const size_t * origin, - const size_t * region, - size_t row_pitch, - size_t slice_pitch, - void * ptr, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueMemImageWrite( - pi_queue command_queue, - pi_mem image, - pi_bool blocking_write, - const size_t * origin, - const size_t * region, - size_t input_row_pitch, - size_t input_slice_pitch, - const void * ptr, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueMemImageCopy( - pi_queue command_queue, - pi_mem src_image, - pi_mem dst_image, - const size_t * src_origin, - const size_t * dst_origin, - const size_t * region, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piEnqueueMemImageFill( - pi_queue command_queue, - pi_mem image, - const void * fill_color, - const size_t * origin, - const size_t * region, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); + pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, + const size_t *src_origin, const size_t *dst_origin, const size_t *region, + size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, + size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, + const void *pattern, size_t pattern_size, + size_t offset, size_t size, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event); + +pi_result piEnqueueMemImageRead(pi_queue command_queue, pi_mem image, + pi_bool blocking_read, const size_t *origin, + const size_t *region, size_t row_pitch, + size_t slice_pitch, void *ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event); + +pi_result piEnqueueMemImageWrite(pi_queue command_queue, pi_mem image, + pi_bool blocking_write, const size_t *origin, + const size_t *region, size_t input_row_pitch, + size_t input_slice_pitch, const void *ptr, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event); + +pi_result piEnqueueMemImageCopy(pi_queue command_queue, pi_mem src_image, + pi_mem dst_image, const size_t *src_origin, + const size_t *dst_origin, const size_t *region, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event); + +pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, + const void *fill_color, const size_t *origin, + const size_t *region, + pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, + pi_event *event); pi_result piEnqueueMemBufferMap( - pi_queue command_queue, - pi_mem buffer, - pi_bool blocking_map, - cl_map_flags map_flags, // TODO: untie from OpenCL - size_t offset, - size_t size, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event, - void* * ret_map); - -pi_result piEnqueueMemUnmap( - pi_queue command_queue, - pi_mem memobj, - void * mapped_ptr, - pi_uint32 num_events_in_wait_list, - const pi_event * event_wait_list, - pi_event * event); - -pi_result piextKernelSetArgMemObj( - pi_kernel kernel, - pi_uint32 arg_index, - const pi_mem *arg_value); + pi_queue command_queue, pi_mem buffer, pi_bool blocking_map, + cl_map_flags map_flags, // TODO: untie from OpenCL + size_t offset, size_t size, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event, void **ret_map); + +pi_result piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj, + void *mapped_ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); + +pi_result piextKernelSetArgMemObj(pi_kernel kernel, pi_uint32 arg_index, + const pi_mem *arg_value); /// // USM /// typedef enum { - PI_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, - PI_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, - PI_USM_SINGLE_SHARED_SUPPORT = CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, - PI_USM_CROSS_SHARED_SUPPORT = CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + PI_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, + PI_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, + PI_USM_SINGLE_SHARED_SUPPORT = + CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + PI_USM_CROSS_SHARED_SUPPORT = + CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, PI_USM_SYSTEM_SHARED_SUPPORT = CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL } _pi_usm_capability_query; typedef enum : pi_bitfield { - PI_USM_ACCESS = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL, - PI_USM_ATOMIC_ACCESS = CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL, - PI_USM_CONCURRENT_ACCESS = CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL, - PI_USM_CONCURRENT_ATOMIC_ACCESS = CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL + PI_USM_ACCESS = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL, + PI_USM_ATOMIC_ACCESS = CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL, + PI_USM_CONCURRENT_ACCESS = CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL, + PI_USM_CONCURRENT_ATOMIC_ACCESS = + CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL } _pi_usm_capabilities; typedef enum { - PI_MEM_ALLOC_TYPE = CL_MEM_ALLOC_TYPE_INTEL, - PI_MEM_ALLOC_BASE_PTR = CL_MEM_ALLOC_BASE_PTR_INTEL, - PI_MEM_ALLOC_SIZE = CL_MEM_ALLOC_SIZE_INTEL, - PI_MEM_ALLOC_DEVICE = CL_MEM_ALLOC_DEVICE_INTEL, - PI_MEM_ALLOC_INFO_TBD0 = CL_MEM_ALLOC_INFO_TBD0_INTEL, - PI_MEM_ALLOC_INFO_TBD1 = CL_MEM_ALLOC_INFO_TBD1_INTEL, + PI_MEM_ALLOC_TYPE = CL_MEM_ALLOC_TYPE_INTEL, + PI_MEM_ALLOC_BASE_PTR = CL_MEM_ALLOC_BASE_PTR_INTEL, + PI_MEM_ALLOC_SIZE = CL_MEM_ALLOC_SIZE_INTEL, + PI_MEM_ALLOC_DEVICE = CL_MEM_ALLOC_DEVICE_INTEL, + PI_MEM_ALLOC_INFO_TBD0 = CL_MEM_ALLOC_INFO_TBD0_INTEL, + PI_MEM_ALLOC_INFO_TBD1 = CL_MEM_ALLOC_INFO_TBD1_INTEL, } _pi_mem_info; typedef enum { PI_MEM_TYPE_UNKNOWN = CL_MEM_TYPE_UNKNOWN_INTEL, - PI_MEM_TYPE_HOST = CL_MEM_TYPE_HOST_INTEL, - PI_MEM_TYPE_DEVICE = CL_MEM_TYPE_DEVICE_INTEL, - PI_MEM_TYPE_SHARED = CL_MEM_TYPE_SHARED_INTEL + PI_MEM_TYPE_HOST = CL_MEM_TYPE_HOST_INTEL, + PI_MEM_TYPE_DEVICE = CL_MEM_TYPE_DEVICE_INTEL, + PI_MEM_TYPE_SHARED = CL_MEM_TYPE_SHARED_INTEL } _pi_usm_type; -typedef enum : pi_bitfield { +typedef enum : pi_bitfield { PI_MEM_ALLOC_FLAGS = CL_MEM_ALLOC_FLAGS_INTEL } _pi_usm_mem_properties; @@ -1171,12 +1004,12 @@ typedef enum : pi_bitfield { PI_USM_MIGRATION_TBD0 = (1 << 0) } _pi_usm_migration_flags; -typedef _pi_usm_capability_query pi_usm_capability_query; -typedef _pi_usm_capabilities pi_usm_capabilities; -typedef _pi_mem_info pi_mem_info; -typedef _pi_usm_type pi_usm_type; -typedef _pi_usm_mem_properties pi_usm_mem_properties; -typedef _pi_usm_migration_flags pi_usm_migration_flags; +typedef _pi_usm_capability_query pi_usm_capability_query; +typedef _pi_usm_capabilities pi_usm_capabilities; +typedef _pi_mem_info pi_mem_info; +typedef _pi_usm_type pi_usm_type; +typedef _pi_usm_mem_properties pi_usm_mem_properties; +typedef _pi_usm_migration_flags pi_usm_migration_flags; /// Allocates host memory accessible by the device. /// @@ -1185,12 +1018,9 @@ typedef _pi_usm_migration_flags pi_usm_migration_flags; /// @param pi_usm_mem_properties are optional allocation properties /// @param size_t is the size of the allocation /// @param alignment is the desired alignment of the allocation -pi_result piextUSMHostAlloc( - void ** result_ptr, - pi_context context, - pi_usm_mem_properties * properties, - size_t size, - pi_uint32 alignment); +pi_result piextUSMHostAlloc(void **result_ptr, pi_context context, + pi_usm_mem_properties *properties, size_t size, + pi_uint32 alignment); /// Allocates device memory /// @@ -1200,13 +1030,10 @@ pi_result piextUSMHostAlloc( /// @param pi_usm_mem_properties are optional allocation properties /// @param size_t is the size of the allocation /// @param alignment is the desired alignment of the allocation -pi_result piextUSMDeviceAlloc( - void ** result_ptr, - pi_context context, - pi_device device, - pi_usm_mem_properties * properties, - size_t size, - pi_uint32 alignment); +pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, + pi_device device, + pi_usm_mem_properties *properties, size_t size, + pi_uint32 alignment); /// Allocates memory accessible on both host and device /// @@ -1216,40 +1043,32 @@ pi_result piextUSMDeviceAlloc( /// @param pi_usm_mem_properties are optional allocation properties /// @param size_t is the size of the allocation /// @param alignment is the desired alignment of the allocation -pi_result piextUSMSharedAlloc( - void ** result_ptr, - pi_context context, - pi_device device, - pi_usm_mem_properties * properties, - size_t size, - pi_uint32 alignment); +pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, + pi_device device, + pi_usm_mem_properties *properties, size_t size, + pi_uint32 alignment); /// Frees allocated USM memory /// /// @param context is the pi_context of the allocation /// @param ptr is the memory to be freed -pi_result piextUSMFree( - pi_context context, - void * ptr); +pi_result piextUSMFree(pi_context context, void *ptr); /// USM Memset API /// /// @param queue is the queue to submit to /// @param ptr is the ptr to memset -/// @param value is value to set. It is interpreted as an 8-bit value and the upper +/// @param value is value to set. It is interpreted as an 8-bit value and the +/// upper /// 24 bits are ignored /// @param count is the size in bytes to memset /// @param num_events_in_waitlist is the number of events to wait on /// @param events_waitlist is an array of events to wait on /// @param event is the event that represents this operation -pi_result piextUSMEnqueueMemset( - pi_queue queue, - void * ptr, - pi_int32 value, - size_t count, - pi_uint32 num_events_in_waitlist, - const pi_event * events_waitlist, - pi_event * event); +pi_result piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value, + size_t count, pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event); /// USM Memcpy API /// @@ -1261,15 +1080,11 @@ pi_result piextUSMEnqueueMemset( /// @param num_events_in_waitlist is the number of events to wait on /// @param events_waitlist is an array of events to wait on /// @param event is the event that represents this operation -pi_result piextUSMEnqueueMemcpy( - pi_queue queue, - pi_bool blocking, - void * dst_ptr, - const void * src_ptr, - size_t size, - pi_uint32 num_events_in_waitlist, - const pi_event * events_waitlist, - pi_event * event); +pi_result piextUSMEnqueueMemcpy(pi_queue queue, pi_bool blocking, void *dst_ptr, + const void *src_ptr, size_t size, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event); /// Hint to migrate memory to the device /// @@ -1280,14 +1095,11 @@ pi_result piextUSMEnqueueMemcpy( /// @param num_events_in_waitlist is the number of events to wait on /// @param events_waitlist is an array of events to wait on /// @param event is the event that represents this operation -pi_result piextUSMEnqueuePrefetch( - pi_queue queue, - const void * ptr, - size_t size, - pi_usm_migration_flags flags, - pi_uint32 num_events_in_waitlist, - const pi_event * events_waitlist, - pi_event * event); +pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size, + pi_usm_migration_flags flags, + pi_uint32 num_events_in_waitlist, + const pi_event *events_waitlist, + pi_event *event); /// USM Memadvise API /// @@ -1297,12 +1109,8 @@ pi_result piextUSMEnqueuePrefetch( /// @param advice is device specific advice /// @param event is the event that represents this operation // USM memadvise API to govern behavior of automatic migration mechanisms -pi_result piextUSMEnqueueMemAdvise( - pi_queue queue, - const void * ptr, - size_t length, - int advice, - pi_event * event); +pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr, + size_t length, int advice, pi_event *event); /// API to query information about USM allocated pointers /// Valid Queries: @@ -1320,13 +1128,10 @@ pi_result piextUSMEnqueueMemAdvise( /// @param param_value_size is the size of the result in bytes /// @param param_value is the result /// @param param_value_ret is how many bytes were written -pi_result piextUSMGetMemAllocInfo( - pi_context context, - const void * ptr, - pi_mem_info param_name, - size_t param_value_size, - void * param_value, - size_t * param_value_size_ret); +pi_result piextUSMGetMemAllocInfo(pi_context context, const void *ptr, + pi_mem_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret); struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin From 5e7ea068395611b97a782d539e38b291b8df61a1 Mon Sep 17 00:00:00 2001 From: Bjoern Knafla Date: Thu, 27 Feb 2020 11:40:01 +0000 Subject: [PATCH 12/19] [SYCL][CUDA] Fix context creation property parsing Align the property parsing of `piContextCreate` with the way OpenCL properties are provided, i.e., a property list contains keys/IDs and values, never just a key/ID. Furthermore enable property lists that just contain a list terminating `0`. Introduce a PI type for context properties to untie from OpenCL. Adapt the SYCL runtime and tests accordingly. Signed-off-by: Bjoern Knafla --- sycl/include/CL/sycl/backend/cuda.hpp | 4 +- sycl/include/CL/sycl/detail/pi.h | 14 ++-- sycl/plugins/cuda/pi_cuda.cpp | 82 ++++++++++++++------ sycl/plugins/opencl/pi_opencl.cpp | 12 +-- sycl/source/detail/context_impl.cpp | 5 +- sycl/unittests/pi/cuda/test_base_objects.cpp | 53 ++++++++++++- 6 files changed, 129 insertions(+), 41 deletions(-) diff --git a/sycl/include/CL/sycl/backend/cuda.hpp b/sycl/include/CL/sycl/backend/cuda.hpp index a0dfae334497f..aff3e96d54e4a 100644 --- a/sycl/include/CL/sycl/backend/cuda.hpp +++ b/sycl/include/CL/sycl/backend/cuda.hpp @@ -18,7 +18,9 @@ namespace cuda { // Mem Object info: Retrieve the raw CUDA pointer from a cl_mem #define PI_CUDA_RAW_POINTER (0xFF01) -// Context creation: Use the primary context instead of a custom one +// Context creation: Use a primary CUDA context instead of a custom one by +// providing a property value of PI_TRUE for the following +// property ID. #define PI_CONTEXT_PROPERTIES_CUDA_PRIMARY (0xFF02) // PI Command Queue using Default stream diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index eb0a2e4a316e6..1275524e8c71d 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -232,6 +232,8 @@ typedef enum { PI_LOCAL_MEM_TYPE_GLOBAL = CL_GLOBAL } _pi_local_mem_type; +typedef intptr_t pi_context_properties; + // TODO: populate typedef enum { PI_CONTEXT_INFO_DEVICES = CL_CONTEXT_DEVICES, @@ -627,12 +629,12 @@ pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, // // Context // -pi_result piContextCreate( - const cl_context_properties *properties, // TODO: untie from OpenCL - pi_uint32 num_devices, const pi_device *devices, - void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, - void *user_data), - void *user_data, pi_context *ret_context); +pi_result piContextCreate(const pi_context_properties *properties, + pi_uint32 num_devices, const pi_device *devices, + void (*pfn_notify)(const char *errinfo, + const void *private_info, + size_t cb, void *user_data), + void *user_data, pi_context *ret_context); pi_result piContextGetInfo(pi_context context, pi_context_info param_name, size_t param_value_size, void *param_value, diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index fe466ce5b832a..66b514bb29408 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1124,12 +1124,30 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } /* Context APIs */ -pi_result cuda_piContextCreate(const cl_context_properties *properties, - pi_uint32 num_devices, const pi_device *devices, - void (*pfn_notify)(const char *errinfo, - const void *private_info, - size_t cb, void *user_data), - void *user_data, pi_context *retcontext) { + +/// Create a PI CUDA context. +/// +/// By default creates a scoped context and keeps the last active CUDA context +/// on top of the CUDA context stack. +/// With the PI_CONTEXT_PROPERTIES_CUDA_PRIMARY key/id and a value of PI_TRUE +/// creates a primary CUDA context and activates it on the CUDA context stack. +/// +/// @param[in] properties 0 terminated array of key/id-value combinations. Can +/// be nullptr. Only accepts property key/id PI_CONTEXT_PROPERTIES_CUDA_PRIMARY +/// with a pi_bool value. +/// @param[in] num_devices Number of devices to create the context for. +/// @param[in] devices Devices to create the context for. +/// @param[in] pfn_notify Callback, currently unused. +/// @param[in] user_data User data for callback. +/// @param[out] retcontext Set to created context on success. +/// +/// @return PI_SUCCESS on success, otherwise an error return code. +pi_result cuda_piContextCreate(const pi_context_properties *properties, + pi_uint32 num_devices, const pi_device *devices, + void (*pfn_notify)(const char *errinfo, + const void *private_info, + size_t cb, void *user_data), + void *user_data, pi_context *retcontext) { assert(devices != nullptr); // TODO: How to implement context callback? @@ -1141,31 +1159,51 @@ pi_result cuda_piContextCreate(const cl_context_properties *properties, assert(retcontext != nullptr); pi_result errcode_ret = PI_SUCCESS; + // Parse properties. + bool property_cuda_primary = false; + while (properties && (0 != *properties)) { + // Consume property ID. + pi_context_properties id = *properties; + ++properties; + // Consume property value. + pi_context_properties value = *properties; + ++properties; + switch (id) { + case PI_CONTEXT_PROPERTIES_CUDA_PRIMARY: + assert(value == PI_FALSE || value == PI_TRUE); + property_cuda_primary = static_cast(value); + break; + default: + // Unknown property. + assert(!"Unknown piContextCreate property in property list"); + return PI_INVALID_VALUE; + } + } + std::unique_ptr<_pi_context> piContextPtr{nullptr}; try { - if (properties && *properties != PI_CONTEXT_PROPERTIES_CUDA_PRIMARY) { - throw pi_result(CL_INVALID_VALUE); - } else if (!properties) { + if (property_cuda_primary) { + // Use the CUDA primary context and assume that we want to use it + // immediately as we want to forge context switches. + CUcontext Ctxt; + errcode_ret = PI_CHECK_ERROR( + cuDevicePrimaryCtxRetain(&Ctxt, devices[0]->cuDevice_)); + piContextPtr = std::unique_ptr<_pi_context>( + new _pi_context{_pi_context::kind::primary, Ctxt, *devices}); + errcode_ret = PI_CHECK_ERROR(cuCtxPushCurrent(Ctxt)); + } else { + // Create a scoped context. CUcontext newContext, current; PI_CHECK_ERROR(cuCtxGetCurrent(¤t)); - errcode_ret = PI_CHECK_ERROR(cuCtxCreate(&newContext, CU_CTX_MAP_HOST, - (*devices)->cuDevice_)); + errcode_ret = PI_CHECK_ERROR( + cuCtxCreate(&newContext, CU_CTX_MAP_HOST, devices[0]->cuDevice_)); piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{ _pi_context::kind::user_defined, newContext, *devices}); + // For scoped contexts keep the last active CUDA one on top of the stack + // as `cuCtxCreate` replaces it implicitly otherwise. if (current != nullptr) { - // If there was an existing context on the thread we recover it PI_CHECK_ERROR(cuCtxSetCurrent(current)); } - } else if (properties - && *properties == PI_CONTEXT_PROPERTIES_CUDA_PRIMARY) { - CUcontext Ctxt; - errcode_ret = PI_CHECK_ERROR(cuDevicePrimaryCtxRetain( - &Ctxt, (*devices)->cuDevice_)); - piContextPtr = std::unique_ptr<_pi_context>( - new _pi_context{_pi_context::kind::primary, Ctxt, *devices}); - errcode_ret = PI_CHECK_ERROR(cuCtxPushCurrent(Ctxt)); - } else { - throw pi_result(CL_INVALID_VALUE); } *retcontext = piContextPtr.release(); diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 2d32088886560..97f084207976a 100755 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -451,12 +451,12 @@ pi_result OCL(piextGetDeviceFunctionPointer)(pi_device device, function_pointer_ret)); } -pi_result OCL(piContextCreate)( - const cl_context_properties *properties, // TODO: untie from OpenCL - pi_uint32 num_devices, const pi_device *devices, - void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, - void *user_data1), - void *user_data, pi_context *retcontext) { +pi_result OCL(piContextCreate)(const pi_context_properties *properties, + pi_uint32 num_devices, const pi_device *devices, + void (*pfn_notify)(const char *errinfo, + const void *private_info, + size_t cb, void *user_data1), + void *user_data, pi_context *retcontext) { pi_result ret = PI_INVALID_OPERATION; *retcontext = cast( clCreateContext(properties, cast(num_devices), diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 7039f30ee9401..1eced4ec82ce0 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -44,9 +44,8 @@ context_impl::context_impl(const vector_class Devices, if (MPlatform->is_cuda()) { #if USE_PI_CUDA - const cl_context_properties props[] = { - PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, - 0}; + const pi_context_properties props[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, + UseCUDAPrimaryContext, 0}; getPlugin().call(props, DeviceIds.size(), DeviceIds.data(), nullptr, nullptr, &MContext); diff --git a/sycl/unittests/pi/cuda/test_base_objects.cpp b/sycl/unittests/pi/cuda/test_base_objects.cpp index d854441088db3..3c4f8888a4bc2 100644 --- a/sycl/unittests/pi/cuda/test_base_objects.cpp +++ b/sycl/unittests/pi/cuda/test_base_objects.cpp @@ -73,7 +73,7 @@ TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreate) { ASSERT_EQ(cuErr, CUDA_SUCCESS); } -TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimary) { +TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimaryTrue) { pi_uint32 numPlatforms = 0; pi_platform platform; pi_device device; @@ -91,11 +91,12 @@ TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimary) { ASSERT_EQ((Plugins[0].call_nocheck( platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)), PI_SUCCESS); - cl_context_properties properties = PI_CONTEXT_PROPERTIES_CUDA_PRIMARY; + pi_context_properties properties[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, + PI_TRUE, 0}; pi_context ctxt; ASSERT_EQ((Plugins[0].call_nocheck( - &properties, 1, &device, nullptr, nullptr, &ctxt)), + properties, 1, &device, nullptr, nullptr, &ctxt)), PI_SUCCESS); EXPECT_NE(ctxt, nullptr); EXPECT_EQ(ctxt->get_device(), device); @@ -118,6 +119,52 @@ TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimary) { PI_SUCCESS); } +TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimaryFalse) { + pi_uint32 numPlatforms = 0; + pi_platform platform; + pi_device device; + + ASSERT_EQ((Plugins[0].call_nocheck( + 0, nullptr, &numPlatforms)), + PI_SUCCESS) + << "piPlatformsGet failed.\n"; + + ASSERT_EQ((Plugins[0].call_nocheck( + numPlatforms, &platform, nullptr)), + PI_SUCCESS) + << "piPlatformsGet failed.\n"; + + ASSERT_EQ((Plugins[0].call_nocheck( + platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)), + PI_SUCCESS); + pi_context_properties properties[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY, + PI_FALSE, 0}; + + pi_context ctxt; + ASSERT_EQ((Plugins[0].call_nocheck( + properties, 1, &device, nullptr, nullptr, &ctxt)), + PI_SUCCESS); + EXPECT_NE(ctxt, nullptr); + EXPECT_EQ(ctxt->get_device(), device); + EXPECT_FALSE(ctxt->is_primary()); + + // Retrieve the cuCtxt to check information is correct + CUcontext cudaContext = ctxt->get(); + unsigned int version = 0; + CUresult cuErr = cuCtxGetApiVersion(cudaContext, &version); + ASSERT_EQ(cuErr, CUDA_SUCCESS); + EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION); + + // Current context in the stack? + CUcontext current; + cuErr = cuCtxGetCurrent(¤t); + ASSERT_EQ(cuErr, CUDA_SUCCESS); + ASSERT_EQ(current, cudaContext); + ASSERT_EQ( + (Plugins[0].call_nocheck(ctxt)), + PI_SUCCESS); +} + TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreateChildThread) { pi_uint32 numPlatforms = 0; pi_platform platform; From d2147180af723a2c58e9a60f34576cbf1877e7b3 Mon Sep 17 00:00:00 2001 From: rbegam <60943159+rbegam@users.noreply.github.com> Date: Mon, 2 Mar 2020 10:42:34 -0800 Subject: [PATCH 13/19] Update sycl/include/CL/sycl/detail/pi.h Rehana Begam Co-Authored-By: Alexey Bader --- sycl/include/CL/sycl/detail/pi.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index dffa5aab9611f..da15cce88d3f9 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -449,17 +449,17 @@ typedef _pi_program_build_status pi_program_build_status; typedef _pi_kernel_info pi_kernel_info; // For compatibility with OpenCL define this not as enum. -typedef intptr_t pi_device_partition_property; -static const pi_device_partition_property +using pi_device_partition_property intptr_t; +static constexpr pi_device_partition_property PI_DEVICE_PARTITION_EQUALLY = CL_DEVICE_PARTITION_EQUALLY; -static const pi_device_partition_property +static constexpr pi_device_partition_property PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; // For compatibility with OpenCL define this not as enum. -typedef pi_bitfield pi_device_affinity_domain; -static const pi_device_affinity_domain +using pi_device_affinity_domain pi_bitfield; +static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NUMA = CL_DEVICE_AFFINITY_DOMAIN_NUMA; -static const pi_device_affinity_domain +static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; // Entry type, matches OpenMP for compatibility From 399acefe110b4cab7a073ffbad3e11e3db8d0685 Mon Sep 17 00:00:00 2001 From: rbegam <60943159+rbegam@users.noreply.github.com> Date: Mon, 2 Mar 2020 10:44:01 -0800 Subject: [PATCH 14/19] Update sycl/include/CL/sycl/detail/pi.h Rehana Begam Co-Authored-By: Alexey Bader --- sycl/include/CL/sycl/detail/pi.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index da15cce88d3f9..6b416fa256d1d 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -391,14 +391,14 @@ typedef enum { PI_SAMPLER_FILTER_MODE_LINEAR = CL_FILTER_LINEAR, } _pi_sampler_filter_mode; -typedef pi_bitfield pi_device_exec_capabilities; -const pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_KERNEL = CL_EXEC_KERNEL; -const pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL = CL_EXEC_NATIVE_KERNEL; - -typedef pi_bitfield pi_sampler_properties; -const pi_sampler_properties PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS = CL_SAMPLER_NORMALIZED_COORDS; -const pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE = CL_SAMPLER_ADDRESSING_MODE; -const pi_sampler_properties PI_SAMPLER_PROPERTIES_FILTER_MODE = CL_SAMPLER_FILTER_MODE; +using pi_device_exec_capabilities pi_bitfield; +constexpr pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_KERNEL = CL_EXEC_KERNEL; +constexpr pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL = CL_EXEC_NATIVE_KERNEL; + +using pi_sampler_properties pi_bitfield; +constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS = CL_SAMPLER_NORMALIZED_COORDS; +constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE = CL_SAMPLER_ADDRESSING_MODE; +constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_FILTER_MODE = CL_SAMPLER_FILTER_MODE; // NOTE: this is made 64-bit to match the size of cl_mem_flags to // make the translation to OpenCL transparent. From ef68270e1d81502b3a55b9fcb970a1dc1938f8d0 Mon Sep 17 00:00:00 2001 From: Steffen Larsen <56076654+steffenlarsen@users.noreply.github.com> Date: Mon, 2 Mar 2020 21:04:27 +0000 Subject: [PATCH 15/19] [SYCL][CUDA] Fixes context release and unnamed context scope (#1207) This PR fixes the following bugs: 1 Some times upon deletion, the deleted context would stay on the CUDA context stack and cause subsequent CUDA operations to fail. To fix this the context stack is now checked after destruction and if the dead context is still there it is removed. 2 In one location a ScopedContext was created without being assigned to a variable and would therefore die immediately. It should now live for the entirety of the surrounding scope. Signed-off-by: Steffen Larsen --- sycl/plugins/cuda/pi_cuda.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 66b514bb29408..bc5fcdc6591b2 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1230,11 +1230,14 @@ pi_result cuda_piContextRelease(pi_context ctxt) { CUcontext cuCtxt = ctxt->get(); CUcontext current = nullptr; cuCtxGetCurrent(¤t); - if(cuCtxt != current) - { - PI_CHECK_ERROR(cuCtxSetCurrent(cuCtxt)); + if (cuCtxt != current) { + PI_CHECK_ERROR(cuCtxPushCurrent(cuCtxt)); } PI_CHECK_ERROR(cuCtxSynchronize()); + cuCtxGetCurrent(¤t); + if (cuCtxt == current) { + PI_CHECK_ERROR(cuCtxPopCurrent(¤t)); + } return PI_CHECK_ERROR(cuCtxDestroy(cuCtxt)); } else { // Primary context is not destroyed, but released @@ -1305,6 +1308,7 @@ pi_result cuda_piMemRelease(pi_mem memObj) { pi_result ret = PI_SUCCESS; try { + // Do nothing if there are other references if (memObj->decrement_reference_count() > 0) { return PI_SUCCESS; @@ -1315,7 +1319,7 @@ pi_result cuda_piMemRelease(pi_mem memObj) { if (!memObj->is_sub_buffer()) { - ScopedContext(uniqueMemObj->get_context()); + ScopedContext active(uniqueMemObj->get_context()); switch (uniqueMemObj->allocMode_) { case _pi_mem::alloc_mode::classic: From a2bf2f12c4af75d66737b0b5b496d14cd28ee80f Mon Sep 17 00:00:00 2001 From: rbegam Date: Tue, 11 Feb 2020 16:46:13 -0800 Subject: [PATCH 16/19] [SYCL] Replaces some of the CL_* enums with PI_* enums. Signed-off-by: rbegam --- sycl/include/CL/sycl/detail/pi.h | 462 ++++++++++-------- sycl/include/CL/sycl/queue.hpp | 2 +- .../detail/error_handling/enqueue_kernel.cpp | 9 +- sycl/source/detail/event_impl.cpp | 2 +- sycl/source/detail/event_info.hpp | 2 +- sycl/source/detail/program_impl.cpp | 8 +- .../program_manager/program_manager.cpp | 8 +- sycl/source/detail/queue_impl.cpp | 2 +- sycl/source/detail/queue_impl.hpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 6 +- sycl/source/queue.cpp | 2 +- sycl/test/usm/memadvise.cpp | 4 +- 12 files changed, 285 insertions(+), 224 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 1275524e8c71d..add17030ef8dc 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -83,16 +83,6 @@ typedef enum { PI_EVENT_QUEUED = CL_QUEUED } _pi_event_status; -typedef enum { - PI_COMMAND_KERNEL_LAUNCH = CL_COMMAND_NDRANGE_KERNEL, - PI_COMMAND_MEMBUFFER_WRITE = CL_COMMAND_WRITE_BUFFER, - PI_COMMAND_MEMBUFFER_READ = CL_COMMAND_READ_BUFFER, - PI_COMMAND_USER = CL_COMMAND_USER, - PI_COMMAND_EVENTS_WAIT = CL_COMMAND_MARKER, - PI_COMMAND_MEMBUFFER_COPY = CL_COMMAND_COPY_BUFFER, - PI_COMMAND_MEMBUFFER_FILL = CL_COMMAND_FILL_BUFFER -} _pi_command_type; - typedef enum { PI_PLATFORM_INFO_EXTENSIONS = CL_PLATFORM_EXTENSIONS, PI_PLATFORM_INFO_NAME = CL_PLATFORM_NAME, @@ -102,20 +92,9 @@ typedef enum { } _pi_platform_info; typedef enum { - PI_PROGRAM_INFO_REFERENCE_COUNT = CL_PROGRAM_REFERENCE_COUNT, - PI_PROGRAM_INFO_CONTEXT = CL_PROGRAM_CONTEXT, - PI_PROGRAM_INFO_NUM_DEVICES = CL_PROGRAM_NUM_DEVICES, - PI_PROGRAM_INFO_DEVICES = CL_PROGRAM_DEVICES, - PI_PROGRAM_INFO_SOURCE = CL_PROGRAM_SOURCE, - PI_PROGRAM_INFO_BINARY_SIZES = CL_PROGRAM_BINARY_SIZES, - PI_PROGRAM_INFO_BINARIES = CL_PROGRAM_BINARIES, - PI_PROGRAM_INFO_KERNEL_NAMES = CL_PROGRAM_KERNEL_NAMES -} _pi_program_info; - -typedef enum { - PI_PROGRAM_BUILD_INFO_STATUS = CL_PROGRAM_BUILD_STATUS, + PI_PROGRAM_BUILD_INFO_STATUS = CL_PROGRAM_BUILD_STATUS, PI_PROGRAM_BUILD_INFO_OPTIONS = CL_PROGRAM_BUILD_OPTIONS, - PI_PROGRAM_BUILD_INFO_LOG = CL_PROGRAM_BUILD_LOG + PI_PROGRAM_BUILD_INFO_LOG = CL_PROGRAM_BUILD_LOG } _pi_program_build_info; typedef enum { @@ -135,194 +114,256 @@ typedef enum : pi_uint64 { } _pi_device_type; typedef enum { - PI_DEVICE_INFO_TYPE = CL_DEVICE_TYPE, - PI_DEVICE_INFO_VENDOR_ID = CL_DEVICE_VENDOR_ID, - PI_DEVICE_INFO_MAX_COMPUTE_UNITS = CL_DEVICE_MAX_COMPUTE_UNITS, - PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, - PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES = CL_DEVICE_MAX_WORK_ITEM_SIZES, - PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = - CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = - CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT = - CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG = - CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = - CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = - CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = - CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = - CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = - CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = - CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, - PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY = CL_DEVICE_MAX_CLOCK_FREQUENCY, - PI_DEVICE_INFO_ADDRESS_BITS = CL_DEVICE_ADDRESS_BITS, - PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = CL_DEVICE_MAX_MEM_ALLOC_SIZE, - PI_DEVICE_INFO_IMAGE_SUPPORT = CL_DEVICE_IMAGE_SUPPORT, - PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS = CL_DEVICE_MAX_READ_IMAGE_ARGS, - PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS = CL_DEVICE_MAX_WRITE_IMAGE_ARGS, - PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT = CL_DEVICE_IMAGE2D_MAX_HEIGHT, - PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH = CL_DEVICE_IMAGE2D_MAX_WIDTH, - PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT = CL_DEVICE_IMAGE3D_MAX_HEIGHT, - PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH = CL_DEVICE_IMAGE3D_MAX_WIDTH, - PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH = CL_DEVICE_IMAGE3D_MAX_DEPTH, - PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, - PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, - PI_DEVICE_INFO_MAX_SAMPLERS = CL_DEVICE_MAX_SAMPLERS, - PI_DEVICE_INFO_MAX_PARAMETER_SIZE = CL_DEVICE_MAX_PARAMETER_SIZE, - PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = CL_DEVICE_MEM_BASE_ADDR_ALIGN, - PI_DEVICE_INFO_HALF_FP_CONFIG = CL_DEVICE_HALF_FP_CONFIG, - PI_DEVICE_INFO_SINGLE_FP_CONFIG = CL_DEVICE_SINGLE_FP_CONFIG, - PI_DEVICE_INFO_DOUBLE_FP_CONFIG = CL_DEVICE_DOUBLE_FP_CONFIG, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = - CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, - PI_DEVICE_INFO_GLOBAL_MEM_SIZE = CL_DEVICE_GLOBAL_MEM_SIZE, - PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, - PI_DEVICE_INFO_MAX_CONSTANT_ARGS = CL_DEVICE_MAX_CONSTANT_ARGS, - PI_DEVICE_INFO_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_TYPE, - PI_DEVICE_INFO_LOCAL_MEM_SIZE = CL_DEVICE_LOCAL_MEM_SIZE, - PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = CL_DEVICE_ERROR_CORRECTION_SUPPORT, - PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = CL_DEVICE_HOST_UNIFIED_MEMORY, - PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = - CL_DEVICE_PROFILING_TIMER_RESOLUTION, - PI_DEVICE_INFO_IS_ENDIAN_LITTLE = CL_DEVICE_ENDIAN_LITTLE, - PI_DEVICE_INFO_IS_AVAILABLE = CL_DEVICE_AVAILABLE, - PI_DEVICE_INFO_IS_COMPILER_AVAILABLE = CL_DEVICE_COMPILER_AVAILABLE, - PI_DEVICE_INFO_IS_LINKER_AVAILABLE = CL_DEVICE_LINKER_AVAILABLE, - PI_DEVICE_INFO_EXECUTION_CAPABILITIES = CL_DEVICE_EXECUTION_CAPABILITIES, - PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = - CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, - PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, - PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, - PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, - PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, - PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, - PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR, - PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION, - PI_DEVICE_INFO_PROFILE = CL_DEVICE_PROFILE, - PI_DEVICE_INFO_VERSION = CL_DEVICE_VERSION, - PI_DEVICE_INFO_OPENCL_C_VERSION = CL_DEVICE_OPENCL_C_VERSION, - PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS, - PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = CL_DEVICE_PRINTF_BUFFER_SIZE, - PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = - CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, - PI_DEVICE_INFO_PARENT_DEVICE = CL_DEVICE_PARENT_DEVICE, - PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = - CL_DEVICE_PARTITION_MAX_SUB_DEVICES, - PI_DEVICE_INFO_PARTITION_PROPERTIES = CL_DEVICE_PARTITION_PROPERTIES, - PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = - CL_DEVICE_PARTITION_AFFINITY_DOMAIN, - PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, + PI_DEVICE_MEM_CACHE_TYPE_NONE = CL_NONE, + PI_DEVICE_MEM_CACHE_TYPE_READ_ONLY_CACHE = CL_READ_ONLY_CACHE, + PI_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE = CL_READ_WRITE_CACHE +} _pi_device_mem_cache_type; + +typedef enum { + PI_DEVICE_LOCAL_MEM_TYPE_LOCAL = CL_LOCAL, + PI_DEVICE_LOCAL_MEM_TYPE_GLOBAL = CL_GLOBAL +} _pi_device_local_mem_type; + +typedef enum { + PI_DEVICE_INFO_TYPE = CL_DEVICE_TYPE, + PI_DEVICE_INFO_PARENT_DEVICE = CL_DEVICE_PARENT_DEVICE, + PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, + PI_DEVICE_INFO_VENDOR_ID = CL_DEVICE_VENDOR_ID, + PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR, + PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION, + PI_DEVICE_INFO_PROFILE = CL_DEVICE_PROFILE, + PI_DEVICE_INFO_VERSION = CL_DEVICE_VERSION, + PI_DEVICE_INFO_OPENCL_C_VERSION = CL_DEVICE_OPENCL_C_VERSION, + PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, + PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = CL_DEVICE_PRINTF_BUFFER_SIZE, + PI_DEVICE_INFO_PARTITION_PROPERTIES = CL_DEVICE_PARTITION_PROPERTIES, + PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = CL_DEVICE_PARTITION_MAX_SUB_DEVICES, + PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_AFFINITY_DOMAIN, + PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, + PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, + PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS, + PI_DEVICE_INFO_COMPILER_AVAILABLE = CL_DEVICE_COMPILER_AVAILABLE, + PI_DEVICE_INFO_LINKER_AVAILABLE = CL_DEVICE_LINKER_AVAILABLE, + PI_DEVICE_INFO_MAX_COMPUTE_UNITS = CL_DEVICE_MAX_COMPUTE_UNITS, + PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, + PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE, + PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES = CL_DEVICE_MAX_WORK_ITEM_SIZES, + PI_DEVICE_INFO_SINGLE_FP_CONFIG = CL_DEVICE_SINGLE_FP_CONFIG, + PI_DEVICE_INFO_HALF_FP_CONFIG = CL_DEVICE_HALF_FP_CONFIG, + PI_DEVICE_INFO_DOUBLE_FP_CONFIG = CL_DEVICE_DOUBLE_FP_CONFIG, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_SIZE = CL_DEVICE_GLOBAL_MEM_SIZE, + PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + PI_DEVICE_INFO_MAX_CONSTANT_ARGS = CL_DEVICE_MAX_CONSTANT_ARGS, + PI_DEVICE_INFO_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_TYPE, + PI_DEVICE_INFO_LOCAL_MEM_SIZE = CL_DEVICE_LOCAL_MEM_SIZE, + PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = CL_DEVICE_ERROR_CORRECTION_SUPPORT, + PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = CL_DEVICE_PROFILING_TIMER_RESOLUTION, + PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = CL_DEVICE_HOST_UNIFIED_MEMORY, + PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, + PI_DEVICE_INFO_ENDIAN_LITTLE = CL_DEVICE_ENDIAN_LITTLE, + PI_DEVICE_INFO_AVAILABLE = CL_DEVICE_AVAILABLE, + PI_DEVICE_INFO_EXECUTION_CAPABILITIES = CL_DEVICE_EXECUTION_CAPABILITIES, + PI_DEVICE_INFO_QUEUE_PROPERTIES = CL_DEVICE_QUEUE_PROPERTIES, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG = CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, + PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY = CL_DEVICE_MAX_CLOCK_FREQUENCY, + PI_DEVICE_INFO_ADDRESS_BITS = CL_DEVICE_ADDRESS_BITS, + PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS = CL_DEVICE_MAX_READ_IMAGE_ARGS, + PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS = CL_DEVICE_MAX_WRITE_IMAGE_ARGS, + PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH = CL_DEVICE_IMAGE2D_MAX_WIDTH, + PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT = CL_DEVICE_IMAGE2D_MAX_HEIGHT, + PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH = CL_DEVICE_IMAGE3D_MAX_WIDTH, + PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT = CL_DEVICE_IMAGE3D_MAX_HEIGHT, + PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH = CL_DEVICE_IMAGE3D_MAX_DEPTH, + PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, + PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, + PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = CL_DEVICE_MAX_MEM_ALLOC_SIZE, + PI_DEVICE_INFO_IMAGE_SUPPORT = CL_DEVICE_IMAGE_SUPPORT, + PI_DEVICE_INFO_MAX_PARAMETER_SIZE = CL_DEVICE_MAX_PARAMETER_SIZE, + PI_DEVICE_INFO_MAX_SAMPLERS = CL_DEVICE_MAX_SAMPLERS, + PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = CL_DEVICE_MEM_BASE_ADDR_ALIGN, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, + PI_DEVICE_INFO_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT = CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT = CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT = CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL } _pi_device_info; typedef enum { - PI_LOCAL_MEM_TYPE_LOCAL = CL_LOCAL, - PI_LOCAL_MEM_TYPE_GLOBAL = CL_GLOBAL -} _pi_local_mem_type; + PI_PROGRAM_INFO_REFERENCE_COUNT = CL_PROGRAM_REFERENCE_COUNT, + PI_PROGRAM_INFO_CONTEXT = CL_PROGRAM_CONTEXT, + PI_PROGRAM_INFO_NUM_DEVICES = CL_PROGRAM_NUM_DEVICES, + PI_PROGRAM_INFO_DEVICES = CL_PROGRAM_DEVICES, + PI_PROGRAM_INFO_SOURCE = CL_PROGRAM_SOURCE, + PI_PROGRAM_INFO_BINARY_SIZES = CL_PROGRAM_BINARY_SIZES, + PI_PROGRAM_INFO_BINARIES = CL_PROGRAM_BINARIES, + PI_PROGRAM_INFO_KERNEL_NAMES = CL_PROGRAM_KERNEL_NAMES +} _pi_program_info; typedef intptr_t pi_context_properties; -// TODO: populate typedef enum { - PI_CONTEXT_INFO_DEVICES = CL_CONTEXT_DEVICES, - PI_CONTEXT_INFO_NUM_DEVICES = CL_CONTEXT_NUM_DEVICES, - PI_CONTEXT_INFO_REFERENCE_COUNT = CL_CONTEXT_REFERENCE_COUNT + PI_CONTEXT_INFO_DEVICES = CL_CONTEXT_DEVICES, + PI_CONTEXT_INFO_NUM_DEVICES = CL_CONTEXT_NUM_DEVICES, + PI_CONTEXT_INFO_PROPERTIES = CL_CONTEXT_PROPERTIES, + PI_CONTEXT_INFO_REFERENCE_COUNT = CL_CONTEXT_REFERENCE_COUNT, } _pi_context_info; -// TODO: populate typedef enum { - PI_QUEUE_INFO_DEVICE = CL_QUEUE_DEVICE, + PI_QUEUE_INFO_CONTEXT = CL_QUEUE_CONTEXT, + PI_QUEUE_INFO_DEVICE = CL_QUEUE_DEVICE, + PI_QUEUE_INFO_DEVICE_DEFAULT = CL_QUEUE_DEVICE_DEFAULT, + PI_QUEUE_INFO_PROPERTIES = CL_QUEUE_PROPERTIES, PI_QUEUE_INFO_REFERENCE_COUNT = CL_QUEUE_REFERENCE_COUNT, - PI_QUEUE_INFO_PROPERTIES = CL_QUEUE_PROPERTIES, - PI_QUEUE_INFO_CONTEXT = CL_QUEUE_CONTEXT + PI_QUEUE_INFO_SIZE = CL_QUEUE_SIZE } _pi_queue_info; typedef enum { - PI_KERNEL_INFO_FUNCTION_NAME = CL_KERNEL_FUNCTION_NAME, - PI_KERNEL_INFO_NUM_ARGS = CL_KERNEL_NUM_ARGS, - PI_KERNEL_INFO_REFERENCE_COUNT = CL_KERNEL_REFERENCE_COUNT, - PI_KERNEL_INFO_CONTEXT = CL_KERNEL_CONTEXT, - PI_KERNEL_INFO_PROGRAM = CL_KERNEL_PROGRAM + PI_IMAGE_INFO_FORMAT = CL_IMAGE_FORMAT, + PI_IMAGE_INFO_ELEMENT_SIZE = CL_IMAGE_ELEMENT_SIZE, + PI_IMAGE_INFO_ROW_PITCH = CL_IMAGE_ROW_PITCH, + PI_IMAGE_INFO_SLICE_PITCH = CL_IMAGE_SLICE_PITCH, + PI_IMAGE_INFO_WIDTH = CL_IMAGE_WIDTH, + PI_IMAGE_INFO_HEIGHT = CL_IMAGE_HEIGHT, + PI_IMAGE_INFO_DEPTH = CL_IMAGE_DEPTH +} _pi_image_info; + +typedef enum { + PI_KERNEL_INFO_FUNCTION_NAME = CL_KERNEL_FUNCTION_NAME, + PI_KERNEL_INFO_NUM_ARGS = CL_KERNEL_NUM_ARGS, + PI_KERNEL_INFO_REFERENCE_COUNT = CL_KERNEL_REFERENCE_COUNT, + PI_KERNEL_INFO_CONTEXT = CL_KERNEL_CONTEXT, + PI_KERNEL_INFO_PROGRAM = CL_KERNEL_PROGRAM, + PI_KERNEL_INFO_ATTRIBUTES = CL_KERNEL_ATTRIBUTES } _pi_kernel_info; typedef enum { - PI_KERNEL_GROUP_INFO_SIZE = CL_KERNEL_WORK_GROUP_SIZE, - PI_KERNEL_COMPILE_GROUP_INFO_SIZE = CL_KERNEL_COMPILE_WORK_GROUP_SIZE, - PI_KERNEL_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE + PI_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE = CL_KERNEL_GLOBAL_WORK_SIZE, + PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE = CL_KERNEL_WORK_GROUP_SIZE, + PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE = CL_KERNEL_COMPILE_WORK_GROUP_SIZE, + PI_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE = CL_KERNEL_LOCAL_MEM_SIZE, + PI_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, + PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE } _pi_kernel_group_info; typedef enum { - PI_IMAGE_INFO_FORMAT = CL_IMAGE_FORMAT, - PI_IMAGE_INFO_ELEMENT_SIZE = CL_IMAGE_ELEMENT_SIZE, - PI_IMAGE_INFO_ROW_PITCH = CL_IMAGE_ROW_PITCH, - PI_IMAGE_INFO_SLICE_PITCH = CL_IMAGE_SLICE_PITCH, - PI_IMAGE_INFO_WIDTH = CL_IMAGE_WIDTH, - PI_IMAGE_INFO_HEIGHT = CL_IMAGE_HEIGHT, - PI_IMAGE_INFO_DEPTH = CL_IMAGE_DEPTH -} _pi_image_info; + PI_EVENT_INFO_COMMAND_QUEUE = CL_EVENT_COMMAND_QUEUE, + PI_EVENT_INFO_CONTEXT = CL_EVENT_CONTEXT, + PI_EVENT_INFO_COMMAND_TYPE = CL_EVENT_COMMAND_TYPE, + PI_EVENT_INFO_COMMAND_EXECUTION_STATUS = CL_EVENT_COMMAND_EXECUTION_STATUS, + PI_EVENT_INFO_REFERENCE_COUNT = CL_EVENT_REFERENCE_COUNT +} _pi_event_info; + +typedef enum { + PI_COMMAND_TYPE_NDRANGE_KERNEL = CL_COMMAND_NDRANGE_KERNEL, + PI_COMMAND_TYPE_MEM_BUFFER_READ = CL_COMMAND_READ_BUFFER, + PI_COMMAND_TYPE_MEM_BUFFER_WRITE = CL_COMMAND_WRITE_BUFFER, + PI_COMMAND_TYPE_MEM_BUFFER_COPY = CL_COMMAND_COPY_BUFFER, + PI_COMMAND_TYPE_MEM_BUFFER_MAP = CL_COMMAND_MAP_BUFFER, + PI_COMMAND_TYPE_MEM_BUFFER_UNMAP = CL_COMMAND_UNMAP_MEM_OBJECT, + PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT = CL_COMMAND_READ_BUFFER_RECT, + PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT = CL_COMMAND_WRITE_BUFFER_RECT, + PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT = CL_COMMAND_COPY_BUFFER_RECT, + PI_COMMAND_TYPE_USER = CL_COMMAND_USER, + PI_COMMAND_TYPE_MEM_BUFFER_FILL = CL_COMMAND_FILL_BUFFER, + PI_COMMAND_TYPE_IMAGE_READ = CL_COMMAND_READ_IMAGE, + PI_COMMAND_TYPE_IMAGE_WRITE = CL_COMMAND_WRITE_IMAGE, + PI_COMMAND_TYPE_IMAGE_COPY = CL_COMMAND_COPY_IMAGE, + PI_COMMAND_TYPE_NATIVE_KERNEL = CL_COMMAND_NATIVE_KERNEL, + PI_COMMAND_TYPE_COPY_BUFFER_TO_IMAGE = CL_COMMAND_COPY_BUFFER_TO_IMAGE, + PI_COMMAND_TYPE_COPY_IMAGE_TO_BUFFER = CL_COMMAND_COPY_IMAGE_TO_BUFFER, + PI_COMMAND_TYPE_MAP_IMAGE = CL_COMMAND_MAP_IMAGE, + PI_COMMAND_TYPE_MARKER = CL_COMMAND_MARKER, + PI_COMMAND_TYPE_ACQUIRE_GL_OBJECTS = CL_COMMAND_ACQUIRE_GL_OBJECTS, + PI_COMMAND_TYPE_RELEASE_GL_OBJECTS = CL_COMMAND_RELEASE_GL_OBJECTS, + PI_COMMAND_TYPE_BARRIER = CL_COMMAND_BARRIER, + PI_COMMAND_TYPE_MIGRATE_MEM_OBJECTS = CL_COMMAND_MIGRATE_MEM_OBJECTS, + PI_COMMAND_TYPE_FILL_IMAGE = CL_COMMAND_FILL_IMAGE, + PI_COMMAND_TYPE_SVM_FREE = CL_COMMAND_SVM_FREE, + PI_COMMAND_TYPE_SVM_MEMCPY = CL_COMMAND_SVM_MEMCPY, + PI_COMMAND_TYPE_SVM_MEMFILL = CL_COMMAND_SVM_MEMFILL, + PI_COMMAND_TYPE_SVM_MAP = CL_COMMAND_SVM_MAP, + PI_COMMAND_TYPE_SVM_UNMAP = CL_COMMAND_SVM_UNMAP +} _pi_command_type; typedef enum { - PI_MEM_TYPE_BUFFER = CL_MEM_OBJECT_BUFFER, - PI_MEM_TYPE_IMAGE2D = CL_MEM_OBJECT_IMAGE2D, - PI_MEM_TYPE_IMAGE3D = CL_MEM_OBJECT_IMAGE3D, - PI_MEM_TYPE_IMAGE2D_ARRAY = CL_MEM_OBJECT_IMAGE2D_ARRAY, - PI_MEM_TYPE_IMAGE1D = CL_MEM_OBJECT_IMAGE1D, - PI_MEM_TYPE_IMAGE1D_ARRAY = CL_MEM_OBJECT_IMAGE1D_ARRAY, + PI_MEM_TYPE_BUFFER = CL_MEM_OBJECT_BUFFER, + PI_MEM_TYPE_IMAGE2D = CL_MEM_OBJECT_IMAGE2D, + PI_MEM_TYPE_IMAGE3D = CL_MEM_OBJECT_IMAGE3D, + PI_MEM_TYPE_IMAGE2D_ARRAY = CL_MEM_OBJECT_IMAGE2D_ARRAY, + PI_MEM_TYPE_IMAGE1D = CL_MEM_OBJECT_IMAGE1D, + PI_MEM_TYPE_IMAGE1D_ARRAY = CL_MEM_OBJECT_IMAGE1D_ARRAY, PI_MEM_TYPE_IMAGE1D_BUFFER = CL_MEM_OBJECT_IMAGE1D_BUFFER } _pi_mem_type; typedef enum { - PI_IMAGE_CHANNEL_ORDER_A = CL_A, - PI_IMAGE_CHANNEL_ORDER_R = CL_R, - PI_IMAGE_CHANNEL_ORDER_RG = CL_RG, - PI_IMAGE_CHANNEL_ORDER_RA = CL_RA, - PI_IMAGE_CHANNEL_ORDER_RGB = CL_RGB, - PI_IMAGE_CHANNEL_ORDER_RGBA = CL_RGBA, - PI_IMAGE_CHANNEL_ORDER_BGRA = CL_BGRA, - PI_IMAGE_CHANNEL_ORDER_ARGB = CL_ARGB, - PI_IMAGE_CHANNEL_ORDER_ABGR = CL_ABGR, + PI_MEM_ADVICE_SET_READ_MOSTLY = 0, // hints that memory will be read from frequently and written to rarely + PI_MEM_ADVICE_CLEAR_READ_MOSTLY, // removes the affect of PI_MEM_ADVICE_SET_READ_MOSTLY + PI_MEM_ADVICE_SET_PREFERRED_LOCATION, // hints that the preferred memory location is the specified device + PI_MEM_ADVICE_CLEAR_PREFERRED_LOCATION, // removes the affect of PI_MEM_ADVICE_SET_PREFERRED_LOCATION + PI_MEM_ADVICE_SET_ACCESSED_BY, // hints that memory will be accessed by the specified device + PI_MEM_ADVICE_CLEAR_ACCESSED_BY, // removes the affect of PI_MEM_ADVICE_SET_ACCESSED_BY + PI_MEM_ADVICE_SET_NON_ATOMIC_MOSTLY, // hints that memory will mostly be accessed non-atomically + PI_MEM_ADVICE_CLEAR_NON_ATOMIC_MOSTLY, // removes the affect of PI_MEM_ADVICE_SET_NON_ATOMIC_MOSTLY + PI_MEM_ADVICE_BIAS_CACHED, // hints that memory should be cached + PI_MEM_ADVICE_BIAS_UNCACHED // hints that memory should not be cached +} _pi_mem_advice; + +typedef enum { + PI_IMAGE_CHANNEL_ORDER_A = CL_A, + PI_IMAGE_CHANNEL_ORDER_R = CL_R, + PI_IMAGE_CHANNEL_ORDER_RG = CL_RG, + PI_IMAGE_CHANNEL_ORDER_RA = CL_RA, + PI_IMAGE_CHANNEL_ORDER_RGB = CL_RGB, + PI_IMAGE_CHANNEL_ORDER_RGBA = CL_RGBA, + PI_IMAGE_CHANNEL_ORDER_BGRA = CL_BGRA, + PI_IMAGE_CHANNEL_ORDER_ARGB = CL_ARGB, + PI_IMAGE_CHANNEL_ORDER_ABGR = CL_ABGR, PI_IMAGE_CHANNEL_ORDER_INTENSITY = CL_INTENSITY, PI_IMAGE_CHANNEL_ORDER_LUMINANCE = CL_LUMINANCE, - PI_IMAGE_CHANNEL_ORDER_Rx = CL_Rx, - PI_IMAGE_CHANNEL_ORDER_RGx = CL_RGx, - PI_IMAGE_CHANNEL_ORDER_RGBx = CL_RGBx + PI_IMAGE_CHANNEL_ORDER_Rx = CL_Rx, + PI_IMAGE_CHANNEL_ORDER_RGx = CL_RGx, + PI_IMAGE_CHANNEL_ORDER_RGBx = CL_RGBx } _pi_image_channel_order; typedef enum { - PI_IMAGE_CHANNEL_TYPE_SNORM_INT8 = CL_SNORM_INT8, - PI_IMAGE_CHANNEL_TYPE_SNORM_INT16 = CL_SNORM_INT16, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT8 = CL_UNORM_INT8, - PI_IMAGE_CHANNEL_TYPE_UNORM_INT16 = CL_UNORM_INT16, - PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = CL_UNORM_SHORT_565, - PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = CL_UNORM_SHORT_555, + PI_IMAGE_CHANNEL_TYPE_SNORM_INT8 = CL_SNORM_INT8, + PI_IMAGE_CHANNEL_TYPE_SNORM_INT16 = CL_SNORM_INT16, + PI_IMAGE_CHANNEL_TYPE_UNORM_INT8 = CL_UNORM_INT8, + PI_IMAGE_CHANNEL_TYPE_UNORM_INT16 = CL_UNORM_INT16, + PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 = CL_UNORM_SHORT_565, + PI_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 = CL_UNORM_SHORT_555, PI_IMAGE_CHANNEL_TYPE_UNORM_INT_101010 = CL_UNORM_INT_101010, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = CL_SIGNED_INT8, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = CL_SIGNED_INT16, - PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = CL_SIGNED_INT32, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = CL_UNSIGNED_INT8, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = CL_UNSIGNED_INT16, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = CL_UNSIGNED_INT32, - PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT = CL_HALF_FLOAT, - PI_IMAGE_CHANNEL_TYPE_FLOAT = CL_FLOAT + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT8 = CL_SIGNED_INT8, + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT16 = CL_SIGNED_INT16, + PI_IMAGE_CHANNEL_TYPE_SIGNED_INT32 = CL_SIGNED_INT32, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 = CL_UNSIGNED_INT8, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 = CL_UNSIGNED_INT16, + PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 = CL_UNSIGNED_INT32, + PI_IMAGE_CHANNEL_TYPE_HALF_FLOAT = CL_HALF_FLOAT, + PI_IMAGE_CHANNEL_TYPE_FLOAT = CL_FLOAT } _pi_image_channel_type; typedef enum { PI_BUFFER_CREATE_TYPE_REGION = CL_BUFFER_CREATE_TYPE_REGION } _pi_buffer_create_type; -typedef pi_bitfield pi_sampler_properties; const pi_bool PI_TRUE = CL_TRUE; const pi_bool PI_FALSE = CL_FALSE; @@ -350,13 +391,14 @@ typedef enum { PI_SAMPLER_FILTER_MODE_LINEAR = CL_FILTER_LINEAR, } _pi_sampler_filter_mode; -typedef enum { - PI_EVENT_INFO_QUEUE = CL_EVENT_COMMAND_QUEUE, - PI_EVENT_INFO_COMMAND_TYPE = CL_EVENT_COMMAND_TYPE, - PI_EVENT_INFO_REFERENCE_COUNT = CL_EVENT_REFERENCE_COUNT, - PI_EVENT_INFO_COMMAND_EXECUTION_STATUS = CL_EVENT_COMMAND_EXECUTION_STATUS, - PI_EVENT_INFO_CONTEXT = CL_EVENT_CONTEXT -} _pi_event_info; +typedef pi_bitfield pi_device_exec_capabilities; +const pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_KERNEL = CL_EXEC_KERNEL; +const pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL = CL_EXEC_NATIVE_KERNEL; + +typedef pi_bitfield pi_sampler_properties; +const pi_sampler_properties PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS = CL_SAMPLER_NORMALIZED_COORDS; +const pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE = CL_SAMPLER_ADDRESSING_MODE; +const pi_sampler_properties PI_SAMPLER_PROPERTIES_FILTER_MODE = CL_SAMPLER_FILTER_MODE; // NOTE: this is made 64-bit to match the size of cl_mem_flags to // make the translation to OpenCL transparent. @@ -379,28 +421,46 @@ const pi_queue_properties PI_QUEUE_ON_DEVICE = CL_QUEUE_ON_DEVICE; const pi_queue_properties PI_QUEUE_ON_DEVICE_DEFAULT = CL_QUEUE_ON_DEVICE_DEFAULT; -typedef _pi_result pi_result; -typedef _pi_platform_info pi_platform_info; -typedef _pi_device_type pi_device_type; -typedef _pi_device_info pi_device_info; -typedef _pi_context_info pi_context_info; -typedef _pi_queue_info pi_queue_info; -typedef _pi_image_info pi_image_info; -typedef _pi_mem_type pi_mem_type; -typedef _pi_image_channel_order pi_image_channel_order; -typedef _pi_image_channel_type pi_image_channel_type; -typedef _pi_buffer_create_type pi_buffer_create_type; +typedef _pi_result pi_result; +typedef _pi_platform_info pi_platform_info; +typedef _pi_device_type pi_device_type; +typedef _pi_device_mem_cache_type pi_device_mem_cache_type; +typedef _pi_device_local_mem_type pi_device_local_mem_type; +typedef _pi_device_info pi_device_info; +typedef _pi_program_info pi_program_info; +typedef _pi_context_info pi_context_info; +typedef _pi_queue_info pi_queue_info; +typedef _pi_image_info pi_image_info; +typedef _pi_kernel_info pi_kernel_info; +typedef _pi_kernel_group_info pi_kernel_group_info; +typedef _pi_event_info pi_event_info; +typedef _pi_command_type pi_command_type; +typedef _pi_mem_type pi_mem_type; +typedef _pi_mem_advice pi_mem_advice; +typedef _pi_image_channel_order pi_image_channel_order; +typedef _pi_image_channel_type pi_image_channel_type; +typedef _pi_buffer_create_type pi_buffer_create_type; typedef _pi_sampler_addressing_mode pi_sampler_addressing_mode; -typedef _pi_sampler_filter_mode pi_sampler_filter_mode; -typedef _pi_sampler_info pi_sampler_info; -typedef _pi_event_status pi_event_status; -typedef _pi_event_info pi_event_info; -typedef _pi_command_type pi_command_type; -typedef _pi_program_info pi_program_info; -typedef _pi_program_build_info pi_program_build_info; -typedef _pi_program_build_status pi_program_build_status; -typedef _pi_kernel_info pi_kernel_info; -typedef _pi_kernel_group_info pi_kernel_group_info; +typedef _pi_sampler_filter_mode pi_sampler_filter_mode; +typedef _pi_sampler_info pi_sampler_info; +typedef _pi_event_status pi_event_status; +typedef _pi_program_build_info pi_program_build_info; +typedef _pi_program_build_status pi_program_build_status; +typedef _pi_kernel_info pi_kernel_info; + +// For compatibility with OpenCL define this not as enum. +typedef intptr_t pi_device_partition_property; +static const pi_device_partition_property + PI_DEVICE_PARTITION_EQUALLY = CL_DEVICE_PARTITION_EQUALLY; +static const pi_device_partition_property + PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + +// For compatibility with OpenCL define this not as enum. +typedef pi_bitfield pi_device_affinity_domain; +static const pi_device_affinity_domain + PI_DEVICE_AFFINITY_DOMAIN_NUMA = CL_DEVICE_AFFINITY_DOMAIN_NUMA; +static const pi_device_affinity_domain + PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; // Entry type, matches OpenMP for compatibility struct _pi_offload_entry_struct { diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index bb811decac723..489216dfbf68c 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -239,7 +239,7 @@ class queue { /// @param Length is a number of bytes in the allocation. /// @param Advice is a device-defined advice for the specified allocation. /// @return an event representing advice operation. - event mem_advise(const void *Ptr, size_t Length, int Advice); + event mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice); /// Provides hints to the runtime library that data should be made available /// on a device earlier than Unified Shared Memory would normally require it diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 5d733ca7bbfe2..3a7ccf82dd9df 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -40,8 +40,8 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, size_t CompileWGSize[3] = {0}; Plugin.call( - Kernel, Device, PI_KERNEL_COMPILE_GROUP_INFO_SIZE, sizeof(size_t) * 3, - CompileWGSize, nullptr); + Kernel, Device, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, + sizeof(size_t) * 3, CompileWGSize, nullptr); if (CompileWGSize[0] != 0) { // OpenCL 1.x && 2.0: @@ -90,10 +90,11 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the // total number of work-items in the work-group computed as // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater - // than the value specified by PI_KERNEL_GROUP_INFO_SIZE in table 5.21. + // than the value specified by PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in + // table 5.21. size_t KernelWGSize = 0; Plugin.call( - Kernel, Device, PI_KERNEL_GROUP_INFO_SIZE, sizeof(size_t), + Kernel, Device, PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof(size_t), &KernelWGSize, nullptr); const size_t TotalNumberOfWIs = NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 398e0cc7bdc9a..dea1f622b1ca4 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -74,7 +74,7 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) RT::PiContext TempContext; getPlugin().call( - MEvent, CL_EVENT_CONTEXT, sizeof(RT::PiContext), &TempContext, nullptr); + MEvent, PI_EVENT_INFO_CONTEXT, sizeof(RT::PiContext), &TempContext, nullptr); if (MContext->getHandleRef() != TempContext) { throw cl::sycl::invalid_parameter_error( "The syclContext must match the OpenCL context associated with the " diff --git a/sycl/source/detail/event_info.hpp b/sycl/source/detail/event_info.hpp index e2ba912cd6d70..e3f487f063ce7 100644 --- a/sycl/source/detail/event_info.hpp +++ b/sycl/source/detail/event_info.hpp @@ -36,7 +36,7 @@ template struct get_event_info { static RetType get(RT::PiEvent Event, const plugin &Plugin) { RetType Result = (RetType)0; // TODO catch an exception and put it to list of asynchronous exceptions - Plugin.call(Event, cl_profiling_info(Param), + Plugin.call(Event, pi_event_info(Param), sizeof(Result), &Result, nullptr); return Result; } diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index 915c210393579..6339213dcb090 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -81,10 +81,10 @@ program_impl::program_impl(ContextImplPtr Context, RT::PiProgram Program) : MProgram(Program), MContext(Context), MLinkable(true) { // TODO handle the case when cl_program build is in progress - cl_uint NumDevices; + pi_uint32 NumDevices; const detail::plugin &Plugin = getPlugin(); Plugin.call( - Program, PI_PROGRAM_INFO_NUM_DEVICES, sizeof(cl_uint), &NumDevices, nullptr); + Program, PI_PROGRAM_INFO_NUM_DEVICES, sizeof(pi_uint32), &NumDevices, nullptr); vector_class PiDevices(NumDevices); Plugin.call(Program, PI_PROGRAM_INFO_DEVICES, sizeof(RT::PiDevice) * NumDevices, @@ -402,10 +402,10 @@ cl_uint program_impl::get_info() const { if (is_host()) { throw invalid_object_error("This instance of program is a host instance"); } - cl_uint Result; + pi_uint32 Result; const detail::plugin &Plugin = getPlugin(); Plugin.call(MProgram, PI_PROGRAM_INFO_REFERENCE_COUNT, - sizeof(cl_uint), &Result, nullptr); + sizeof(pi_uint32), &Result, nullptr); return Result; } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index e2959329ce441..74d08711d8643 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -70,7 +70,7 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, // FIXME: we don't yet support multiple devices with a single binary. const detail::plugin &Plugin = Context->getPlugin(); #ifndef _NDEBUG - cl_uint NumDevices = 0; + pi_uint32 NumDevices = 0; Plugin.call(Context->getHandleRef(), PI_CONTEXT_INFO_NUM_DEVICES, sizeof(NumDevices), &NumDevices, @@ -435,7 +435,7 @@ ProgramManager::getClProgramFromClKernel(RT::PiKernel Kernel, RT::PiProgram Program; const detail::plugin &Plugin = Context->getPlugin(); Plugin.call( - Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(cl_program), &Program, nullptr); + Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(RT::PiProgram), &Program, nullptr); return Program; } @@ -446,8 +446,8 @@ string_class ProgramManager::getProgramBuildLog(const RT::PiProgram &Program, Plugin.call(Program, PI_PROGRAM_INFO_DEVICES, 0, nullptr, &Size); vector_class PIDevices(Size / sizeof(RT::PiDevice)); - Plugin.call(Program, PI_PROGRAM_INFO_DEVICES, Size, - PIDevices.data(), nullptr); + Plugin.call(Program, PI_PROGRAM_INFO_DEVICES, + Size, PIDevices.data(), nullptr); string_class Log = "The program was built for " + std::to_string(PIDevices.size()) + " devices"; for (RT::PiDevice &Device : PIDevices) { diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 5b7690cf5fa36..b079577eb3e3d 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -64,7 +64,7 @@ event queue_impl::memcpy(shared_ptr_class Impl, void *Dest, return ResEvent; } -event queue_impl::mem_advise(const void *Ptr, size_t Length, int Advice) { +event queue_impl::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice) { context Context = get_context(); if (Context.is_host()) { return event(); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 2d1fd58e8489a..c8434f0176b1a 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -332,7 +332,7 @@ class queue_impl { /// @param Ptr is a USM pointer to the allocation. /// @param Length is a number of bytes in the allocation. /// @param Advice is a device-defined advice for the specified allocation. - event mem_advise(const void *Ptr, size_t Length, int Advice); + event mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice); /// Puts exception to the list of asynchronous ecxeptions. /// diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index d9859929191f6..6dfdc29d2726e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -934,11 +934,11 @@ cl_int ExecCGCommand::enqueueImp() { pi_mem MemArg = (pi_mem)AllocaCmd->getMemAllocation(); Plugin.call(Kernel, Arg.MIndex, &MemArg); #else - cl_mem MemArg = (cl_mem)AllocaCmd->getMemAllocation(); + RT::PiMem MemArg = (RT::PiMem)AllocaCmd->getMemAllocation(); Plugin.call(Kernel, Arg.MIndex, - sizeof(cl_mem), &MemArg); + sizeof(RT::PiMem), &MemArg); Plugin.call(Kernel, Arg.MIndex, - sizeof(cl_mem), &MemArg); + sizeof(RT::PiMem), &MemArg); #endif break; } diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index c47eba506958d..ea057815b8524 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -102,7 +102,7 @@ event queue::memcpy(void *dest, const void *src, size_t count) { return impl->memcpy(impl, dest, src, count); } -event queue::mem_advise(const void *ptr, size_t length, int advice) { +event queue::mem_advise(const void *ptr, size_t length, pi_mem_advice advice) { return impl->mem_advise(ptr, length, advice); } diff --git a/sycl/test/usm/memadvise.cpp b/sycl/test/usm/memadvise.cpp index a7e152b02d946..111519169c7ac 100644 --- a/sycl/test/usm/memadvise.cpp +++ b/sycl/test/usm/memadvise.cpp @@ -37,7 +37,7 @@ int main() { if (s_head == nullptr) { return -1; } - q.mem_advise(s_head, sizeof(Node), 42); + q.mem_advise(s_head, sizeof(Node), PI_MEM_ADVICE_SET_READ_MOSTLY); Node *s_cur = s_head; for (int i = 0; i < numNodes; i++) { @@ -48,7 +48,7 @@ int main() { if (s_cur->pNext == nullptr) { return -1; } - q.mem_advise(s_cur->pNext, sizeof(Node), 42); + q.mem_advise(s_cur->pNext, sizeof(Node), PI_MEM_ADVICE_SET_READ_MOSTLY); } else { s_cur->pNext = nullptr; } From ec0b39d87417bfd7e63c201ab8378ec6856c6865 Mon Sep 17 00:00:00 2001 From: rbegam Date: Fri, 28 Feb 2020 17:12:41 -0800 Subject: [PATCH 17/19] [SYCL] Rearranges enum pi_device_info. Signed-off-by: rbegam --- sycl/include/CL/sycl/detail/pi.h | 90 ++++++++++++++++---------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index add17030ef8dc..a9dc8724cf5da 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -126,47 +126,14 @@ typedef enum { typedef enum { PI_DEVICE_INFO_TYPE = CL_DEVICE_TYPE, - PI_DEVICE_INFO_PARENT_DEVICE = CL_DEVICE_PARENT_DEVICE, - PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, PI_DEVICE_INFO_VENDOR_ID = CL_DEVICE_VENDOR_ID, - PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR, - PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION, - PI_DEVICE_INFO_PROFILE = CL_DEVICE_PROFILE, - PI_DEVICE_INFO_VERSION = CL_DEVICE_VERSION, - PI_DEVICE_INFO_OPENCL_C_VERSION = CL_DEVICE_OPENCL_C_VERSION, - PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, - PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = CL_DEVICE_PRINTF_BUFFER_SIZE, - PI_DEVICE_INFO_PARTITION_PROPERTIES = CL_DEVICE_PARTITION_PROPERTIES, - PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = CL_DEVICE_PARTITION_MAX_SUB_DEVICES, - PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_AFFINITY_DOMAIN, - PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, - PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, - PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS, - PI_DEVICE_INFO_COMPILER_AVAILABLE = CL_DEVICE_COMPILER_AVAILABLE, - PI_DEVICE_INFO_LINKER_AVAILABLE = CL_DEVICE_LINKER_AVAILABLE, PI_DEVICE_INFO_MAX_COMPUTE_UNITS = CL_DEVICE_MAX_COMPUTE_UNITS, - PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS = CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, - PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES = CL_DEVICE_MAX_WORK_ITEM_SIZES, + PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE, PI_DEVICE_INFO_SINGLE_FP_CONFIG = CL_DEVICE_SINGLE_FP_CONFIG, PI_DEVICE_INFO_HALF_FP_CONFIG = CL_DEVICE_HALF_FP_CONFIG, PI_DEVICE_INFO_DOUBLE_FP_CONFIG = CL_DEVICE_DOUBLE_FP_CONFIG, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, - PI_DEVICE_INFO_GLOBAL_MEM_SIZE = CL_DEVICE_GLOBAL_MEM_SIZE, - PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, - PI_DEVICE_INFO_MAX_CONSTANT_ARGS = CL_DEVICE_MAX_CONSTANT_ARGS, - PI_DEVICE_INFO_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_TYPE, - PI_DEVICE_INFO_LOCAL_MEM_SIZE = CL_DEVICE_LOCAL_MEM_SIZE, - PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = CL_DEVICE_ERROR_CORRECTION_SUPPORT, - PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = CL_DEVICE_PROFILING_TIMER_RESOLUTION, - PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = CL_DEVICE_HOST_UNIFIED_MEMORY, - PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, - PI_DEVICE_INFO_ENDIAN_LITTLE = CL_DEVICE_ENDIAN_LITTLE, - PI_DEVICE_INFO_AVAILABLE = CL_DEVICE_AVAILABLE, - PI_DEVICE_INFO_EXECUTION_CAPABILITIES = CL_DEVICE_EXECUTION_CAPABILITIES, PI_DEVICE_INFO_QUEUE_PROPERTIES = CL_DEVICE_QUEUE_PROPERTIES, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, @@ -175,8 +142,17 @@ typedef enum { PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY = CL_DEVICE_MAX_CLOCK_FREQUENCY, PI_DEVICE_INFO_ADDRESS_BITS = CL_DEVICE_ADDRESS_BITS, + PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = CL_DEVICE_MAX_MEM_ALLOC_SIZE, + PI_DEVICE_INFO_IMAGE_SUPPORT = CL_DEVICE_IMAGE_SUPPORT, PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS = CL_DEVICE_MAX_READ_IMAGE_ARGS, PI_DEVICE_INFO_MAX_WRITE_IMAGE_ARGS = CL_DEVICE_MAX_WRITE_IMAGE_ARGS, PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH = CL_DEVICE_IMAGE2D_MAX_WIDTH, @@ -186,18 +162,44 @@ typedef enum { PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH = CL_DEVICE_IMAGE3D_MAX_DEPTH, PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, - PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = CL_DEVICE_MAX_MEM_ALLOC_SIZE, - PI_DEVICE_INFO_IMAGE_SUPPORT = CL_DEVICE_IMAGE_SUPPORT, - PI_DEVICE_INFO_MAX_PARAMETER_SIZE = CL_DEVICE_MAX_PARAMETER_SIZE, PI_DEVICE_INFO_MAX_SAMPLERS = CL_DEVICE_MAX_SAMPLERS, + PI_DEVICE_INFO_MAX_PARAMETER_SIZE = CL_DEVICE_MAX_PARAMETER_SIZE, PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = CL_DEVICE_MEM_BASE_ADDR_ALIGN, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_SIZE = CL_DEVICE_GLOBAL_MEM_SIZE, + PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + PI_DEVICE_INFO_MAX_CONSTANT_ARGS = CL_DEVICE_MAX_CONSTANT_ARGS, + PI_DEVICE_INFO_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_TYPE, + PI_DEVICE_INFO_LOCAL_MEM_SIZE = CL_DEVICE_LOCAL_MEM_SIZE, + PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = CL_DEVICE_ERROR_CORRECTION_SUPPORT, + PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = CL_DEVICE_HOST_UNIFIED_MEMORY, + PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = CL_DEVICE_PROFILING_TIMER_RESOLUTION, + PI_DEVICE_INFO_ENDIAN_LITTLE = CL_DEVICE_ENDIAN_LITTLE, + PI_DEVICE_INFO_AVAILABLE = CL_DEVICE_AVAILABLE, + PI_DEVICE_INFO_COMPILER_AVAILABLE = CL_DEVICE_COMPILER_AVAILABLE, + PI_DEVICE_INFO_LINKER_AVAILABLE = CL_DEVICE_LINKER_AVAILABLE, + PI_DEVICE_INFO_EXECUTION_CAPABILITIES = CL_DEVICE_EXECUTION_CAPABILITIES, + PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, + PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, + PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, + PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, + PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, + PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, + PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR, + PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION, + PI_DEVICE_INFO_PROFILE = CL_DEVICE_PROFILE, + PI_DEVICE_INFO_VERSION = CL_DEVICE_VERSION, + PI_DEVICE_INFO_OPENCL_C_VERSION = CL_DEVICE_OPENCL_C_VERSION, + PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS, + PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = CL_DEVICE_PRINTF_BUFFER_SIZE, + PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, + PI_DEVICE_INFO_PARENT_DEVICE = CL_DEVICE_PARENT_DEVICE, + PI_DEVICE_INFO_PARTITION_PROPERTIES = CL_DEVICE_PARTITION_PROPERTIES, + PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = CL_DEVICE_PARTITION_MAX_SUB_DEVICES, + PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_AFFINITY_DOMAIN, + PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, PI_DEVICE_INFO_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, PI_DEVICE_INFO_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT = CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, From 5bf8bc918b437594b041578e16d23fdb26ae689b Mon Sep 17 00:00:00 2001 From: rbegam Date: Mon, 2 Mar 2020 14:40:08 -0800 Subject: [PATCH 18/19] [SYCL] Update sycl/include/CL/sycl/detail/pi.h Signed-off-by: rbegam --- sycl/include/CL/sycl/detail/pi.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index a9dc8724cf5da..a36abce10914e 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -393,14 +393,14 @@ typedef enum { PI_SAMPLER_FILTER_MODE_LINEAR = CL_FILTER_LINEAR, } _pi_sampler_filter_mode; -typedef pi_bitfield pi_device_exec_capabilities; -const pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_KERNEL = CL_EXEC_KERNEL; -const pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL = CL_EXEC_NATIVE_KERNEL; +using pi_device_exec_capabilities pi_bitfield; +constexpr pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_KERNEL = CL_EXEC_KERNEL; +constexpr pi_device_exec_capabilities PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL = CL_EXEC_NATIVE_KERNEL; -typedef pi_bitfield pi_sampler_properties; -const pi_sampler_properties PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS = CL_SAMPLER_NORMALIZED_COORDS; -const pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE = CL_SAMPLER_ADDRESSING_MODE; -const pi_sampler_properties PI_SAMPLER_PROPERTIES_FILTER_MODE = CL_SAMPLER_FILTER_MODE; +using pi_sampler_properties pi_bitfield; +constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_NORMALIZED_COORDS = CL_SAMPLER_NORMALIZED_COORDS; +constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE = CL_SAMPLER_ADDRESSING_MODE; +constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_FILTER_MODE = CL_SAMPLER_FILTER_MODE; // NOTE: this is made 64-bit to match the size of cl_mem_flags to // make the translation to OpenCL transparent. @@ -451,17 +451,17 @@ typedef _pi_program_build_status pi_program_build_status; typedef _pi_kernel_info pi_kernel_info; // For compatibility with OpenCL define this not as enum. -typedef intptr_t pi_device_partition_property; -static const pi_device_partition_property +using pi_device_partition_property intptr_t; +static constexpr pi_device_partition_property PI_DEVICE_PARTITION_EQUALLY = CL_DEVICE_PARTITION_EQUALLY; -static const pi_device_partition_property +static constexpr pi_device_partition_property PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; // For compatibility with OpenCL define this not as enum. -typedef pi_bitfield pi_device_affinity_domain; -static const pi_device_affinity_domain +using pi_device_affinity_domain pi_bitfield; +static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NUMA = CL_DEVICE_AFFINITY_DOMAIN_NUMA; -static const pi_device_affinity_domain +static constexpr pi_device_affinity_domain PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE; // Entry type, matches OpenMP for compatibility From b6ec99910281ec64c059cd1b84850f24ebba0e97 Mon Sep 17 00:00:00 2001 From: rbegam Date: Mon, 2 Mar 2020 16:07:40 -0800 Subject: [PATCH 19/19] [SYCL] Resolves conflicts for sycl/include/CL/sycl/detail/pi.h Signed-off-by: rbegam --- sycl/include/CL/sycl/detail/pi.h | 181 ++++++++++++++++++------------- 1 file changed, 107 insertions(+), 74 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index a36abce10914e..83a6dd0d8c9c7 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -53,42 +53,42 @@ typedef pi_uint64 pi_bitfield; // TODO: populate PI enums. // typedef enum { - PI_SUCCESS = CL_SUCCESS, - PI_RESULT_INVALID_KERNEL_NAME = CL_INVALID_KERNEL_NAME, - PI_INVALID_OPERATION = CL_INVALID_OPERATION, - PI_INVALID_KERNEL = CL_INVALID_KERNEL, - PI_INVALID_QUEUE_PROPERTIES = CL_INVALID_QUEUE_PROPERTIES, - PI_INVALID_VALUE = CL_INVALID_VALUE, - PI_INVALID_CONTEXT = CL_INVALID_CONTEXT, - PI_INVALID_PLATFORM = CL_INVALID_PLATFORM, - PI_INVALID_DEVICE = CL_INVALID_DEVICE, - PI_INVALID_BINARY = CL_INVALID_BINARY, - PI_INVALID_QUEUE = CL_INVALID_COMMAND_QUEUE, - PI_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY, - PI_INVALID_PROGRAM = CL_INVALID_PROGRAM, - PI_INVALID_MEM_OBJECT = CL_INVALID_MEM_OBJECT, - PI_OUT_OF_RESOURCES = CL_OUT_OF_RESOURCES, - PI_INVALID_EVENT = CL_INVALID_EVENT, - PI_INVALID_EVENT_WAIT_LIST = CL_INVALID_EVENT_WAIT_LIST, + PI_SUCCESS = CL_SUCCESS, + PI_RESULT_INVALID_KERNEL_NAME = CL_INVALID_KERNEL_NAME, + PI_INVALID_OPERATION = CL_INVALID_OPERATION, + PI_INVALID_KERNEL = CL_INVALID_KERNEL, + PI_INVALID_QUEUE_PROPERTIES = CL_INVALID_QUEUE_PROPERTIES, + PI_INVALID_VALUE = CL_INVALID_VALUE, + PI_INVALID_CONTEXT = CL_INVALID_CONTEXT, + PI_INVALID_PLATFORM = CL_INVALID_PLATFORM, + PI_INVALID_DEVICE = CL_INVALID_DEVICE, + PI_INVALID_BINARY = CL_INVALID_BINARY, + PI_INVALID_QUEUE = CL_INVALID_COMMAND_QUEUE, + PI_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY, + PI_INVALID_PROGRAM = CL_INVALID_PROGRAM, + PI_INVALID_MEM_OBJECT = CL_INVALID_MEM_OBJECT, + PI_OUT_OF_RESOURCES = CL_OUT_OF_RESOURCES, + PI_INVALID_EVENT = CL_INVALID_EVENT, + PI_INVALID_EVENT_WAIT_LIST = CL_INVALID_EVENT_WAIT_LIST, PI_MISALIGNED_SUB_BUFFER_OFFSET = CL_MISALIGNED_SUB_BUFFER_OFFSET, - PI_BUILD_PROGRAM_FAILURE = CL_BUILD_PROGRAM_FAILURE, - PI_INVALID_WORK_GROUP_SIZE = CL_INVALID_WORK_GROUP_SIZE, - PI_ERROR_UNKNOWN = -999 + PI_BUILD_PROGRAM_FAILURE = CL_BUILD_PROGRAM_FAILURE, + PI_INVALID_WORK_GROUP_SIZE = CL_INVALID_WORK_GROUP_SIZE, + PI_ERROR_UNKNOWN = -999 } _pi_result; typedef enum { - PI_EVENT_COMPLETE = CL_COMPLETE, - PI_EVENT_RUNNING = CL_RUNNING, + PI_EVENT_COMPLETE = CL_COMPLETE, + PI_EVENT_RUNNING = CL_RUNNING, PI_EVENT_SUBMITTED = CL_SUBMITTED, - PI_EVENT_QUEUED = CL_QUEUED + PI_EVENT_QUEUED = CL_QUEUED } _pi_event_status; typedef enum { PI_PLATFORM_INFO_EXTENSIONS = CL_PLATFORM_EXTENSIONS, - PI_PLATFORM_INFO_NAME = CL_PLATFORM_NAME, - PI_PLATFORM_INFO_PROFILE = CL_PLATFORM_PROFILE, - PI_PLATFORM_INFO_VENDOR = CL_PLATFORM_VENDOR, - PI_PLATFORM_INFO_VERSION = CL_PLATFORM_VERSION, + PI_PLATFORM_INFO_NAME = CL_PLATFORM_NAME, + PI_PLATFORM_INFO_PROFILE = CL_PLATFORM_PROFILE, + PI_PLATFORM_INFO_VENDOR = CL_PLATFORM_VENDOR, + PI_PLATFORM_INFO_VERSION = CL_PLATFORM_VERSION, } _pi_platform_info; typedef enum { @@ -98,9 +98,9 @@ typedef enum { } _pi_program_build_info; typedef enum { - PI_PROGRAM_BUILD_STATUS_NONE = CL_BUILD_NONE, - PI_PROGRAM_BUILD_STATUS_ERROR = CL_BUILD_ERROR, - PI_PROGRAM_BUILD_STATUS_SUCCESS = CL_BUILD_SUCCESS, + PI_PROGRAM_BUILD_STATUS_NONE = CL_BUILD_NONE, + PI_PROGRAM_BUILD_STATUS_ERROR = CL_BUILD_ERROR, + PI_PROGRAM_BUILD_STATUS_SUCCESS = CL_BUILD_SUCCESS, PI_PROGRAM_BUILD_STATUS_IN_PROGRESS = CL_BUILD_IN_PROGRESS } _pi_program_build_status; @@ -135,20 +135,34 @@ typedef enum { PI_DEVICE_INFO_HALF_FP_CONFIG = CL_DEVICE_HALF_FP_CONFIG, PI_DEVICE_INFO_DOUBLE_FP_CONFIG = CL_DEVICE_DOUBLE_FP_CONFIG, PI_DEVICE_INFO_QUEUE_PROPERTIES = CL_DEVICE_QUEUE_PROPERTIES, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG = CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, - PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, - PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, + PI_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF = + CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR = + CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT = + CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT = + CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG = + CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT = + CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE = + CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, + PI_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF = + CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, PI_DEVICE_INFO_MAX_CLOCK_FREQUENCY = CL_DEVICE_MAX_CLOCK_FREQUENCY, PI_DEVICE_INFO_ADDRESS_BITS = CL_DEVICE_ADDRESS_BITS, PI_DEVICE_INFO_MAX_MEM_ALLOC_SIZE = CL_DEVICE_MAX_MEM_ALLOC_SIZE, @@ -160,29 +174,39 @@ typedef enum { PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH = CL_DEVICE_IMAGE3D_MAX_WIDTH, PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT = CL_DEVICE_IMAGE3D_MAX_HEIGHT, PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH = CL_DEVICE_IMAGE3D_MAX_DEPTH, - PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, + PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE = + CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, PI_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, PI_DEVICE_INFO_MAX_SAMPLERS = CL_DEVICE_MAX_SAMPLERS, PI_DEVICE_INFO_MAX_PARAMETER_SIZE = CL_DEVICE_MAX_PARAMETER_SIZE, PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN = CL_DEVICE_MEM_BASE_ADDR_ALIGN, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, - PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE = + CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHELINE_SIZE = + CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, + PI_DEVICE_INFO_GLOBAL_MEM_CACHE_SIZE = + CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, PI_DEVICE_INFO_GLOBAL_MEM_SIZE = CL_DEVICE_GLOBAL_MEM_SIZE, - PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + PI_DEVICE_INFO_MAX_CONSTANT_BUFFER_SIZE = + CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, PI_DEVICE_INFO_MAX_CONSTANT_ARGS = CL_DEVICE_MAX_CONSTANT_ARGS, PI_DEVICE_INFO_LOCAL_MEM_TYPE = CL_DEVICE_LOCAL_MEM_TYPE, PI_DEVICE_INFO_LOCAL_MEM_SIZE = CL_DEVICE_LOCAL_MEM_SIZE, - PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = CL_DEVICE_ERROR_CORRECTION_SUPPORT, + PI_DEVICE_INFO_ERROR_CORRECTION_SUPPORT = + CL_DEVICE_ERROR_CORRECTION_SUPPORT, PI_DEVICE_INFO_HOST_UNIFIED_MEMORY = CL_DEVICE_HOST_UNIFIED_MEMORY, - PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = CL_DEVICE_PROFILING_TIMER_RESOLUTION, + PI_DEVICE_INFO_PROFILING_TIMER_RESOLUTION = + CL_DEVICE_PROFILING_TIMER_RESOLUTION, PI_DEVICE_INFO_ENDIAN_LITTLE = CL_DEVICE_ENDIAN_LITTLE, PI_DEVICE_INFO_AVAILABLE = CL_DEVICE_AVAILABLE, PI_DEVICE_INFO_COMPILER_AVAILABLE = CL_DEVICE_COMPILER_AVAILABLE, PI_DEVICE_INFO_LINKER_AVAILABLE = CL_DEVICE_LINKER_AVAILABLE, - PI_DEVICE_INFO_EXECUTION_CAPABILITIES = CL_DEVICE_EXECUTION_CAPABILITIES, - PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, - PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, + PI_DEVICE_INFO_EXECUTION_CAPABILITIES = + CL_DEVICE_EXECUTION_CAPABILITIES, + PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES = + CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, + PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES = + CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS, PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT, @@ -194,17 +218,25 @@ typedef enum { PI_DEVICE_INFO_OPENCL_C_VERSION = CL_DEVICE_OPENCL_C_VERSION, PI_DEVICE_INFO_EXTENSIONS = CL_DEVICE_EXTENSIONS, PI_DEVICE_INFO_PRINTF_BUFFER_SIZE = CL_DEVICE_PRINTF_BUFFER_SIZE, - PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, + PI_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = + CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, PI_DEVICE_INFO_PARENT_DEVICE = CL_DEVICE_PARENT_DEVICE, PI_DEVICE_INFO_PARTITION_PROPERTIES = CL_DEVICE_PARTITION_PROPERTIES, - PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = CL_DEVICE_PARTITION_MAX_SUB_DEVICES, - PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = CL_DEVICE_PARTITION_AFFINITY_DOMAIN, + PI_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = + CL_DEVICE_PARTITION_MAX_SUB_DEVICES, + PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = + CL_DEVICE_PARTITION_AFFINITY_DOMAIN, PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, - PI_DEVICE_INFO_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, - PI_DEVICE_INFO_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, - PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT = CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, - PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT = CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, - PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT = CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL + PI_DEVICE_INFO_USM_HOST_SUPPORT = + CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_DEVICE_SUPPORT = + CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT = + CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_CROSS_SHARED_SUPPORT = + CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + PI_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT = + CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL } _pi_device_info; typedef enum { @@ -215,6 +247,7 @@ typedef enum { PI_PROGRAM_INFO_SOURCE = CL_PROGRAM_SOURCE, PI_PROGRAM_INFO_BINARY_SIZES = CL_PROGRAM_BINARY_SIZES, PI_PROGRAM_INFO_BINARIES = CL_PROGRAM_BINARIES, + PI_PROGRAM_INFO_NUM_KERNELS = CL_PROGRAM_NUM_KERNELS, PI_PROGRAM_INFO_KERNEL_NAMES = CL_PROGRAM_KERNEL_NAMES } _pi_program_info; @@ -370,27 +403,27 @@ const pi_bool PI_TRUE = CL_TRUE; const pi_bool PI_FALSE = CL_FALSE; typedef enum { - PI_SAMPLER_INFO_REFERENCE_COUNT = CL_SAMPLER_REFERENCE_COUNT, - PI_SAMPLER_INFO_CONTEXT = CL_SAMPLER_CONTEXT, + PI_SAMPLER_INFO_REFERENCE_COUNT = CL_SAMPLER_REFERENCE_COUNT, + PI_SAMPLER_INFO_CONTEXT = CL_SAMPLER_CONTEXT, PI_SAMPLER_INFO_NORMALIZED_COORDS = CL_SAMPLER_NORMALIZED_COORDS, - PI_SAMPLER_INFO_ADDRESSING_MODE = CL_SAMPLER_ADDRESSING_MODE, - PI_SAMPLER_INFO_FILTER_MODE = CL_SAMPLER_FILTER_MODE, - PI_SAMPLER_INFO_MIP_FILTER_MODE = CL_SAMPLER_MIP_FILTER_MODE, - PI_SAMPLER_INFO_LOD_MIN = CL_SAMPLER_LOD_MIN, - PI_SAMPLER_INFO_LOD_MAX = CL_SAMPLER_LOD_MAX + PI_SAMPLER_INFO_ADDRESSING_MODE = CL_SAMPLER_ADDRESSING_MODE, + PI_SAMPLER_INFO_FILTER_MODE = CL_SAMPLER_FILTER_MODE, + PI_SAMPLER_INFO_MIP_FILTER_MODE = CL_SAMPLER_MIP_FILTER_MODE, + PI_SAMPLER_INFO_LOD_MIN = CL_SAMPLER_LOD_MIN, + PI_SAMPLER_INFO_LOD_MAX = CL_SAMPLER_LOD_MAX } _pi_sampler_info; typedef enum { PI_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT = CL_ADDRESS_MIRRORED_REPEAT, - PI_SAMPLER_ADDRESSING_MODE_REPEAT = CL_ADDRESS_REPEAT, - PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = CL_ADDRESS_CLAMP_TO_EDGE, - PI_SAMPLER_ADDRESSING_MODE_CLAMP = CL_ADDRESS_CLAMP, - PI_SAMPLER_ADDRESSING_MODE_NONE = CL_ADDRESS_NONE + PI_SAMPLER_ADDRESSING_MODE_REPEAT = CL_ADDRESS_REPEAT, + PI_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE = CL_ADDRESS_CLAMP_TO_EDGE, + PI_SAMPLER_ADDRESSING_MODE_CLAMP = CL_ADDRESS_CLAMP, + PI_SAMPLER_ADDRESSING_MODE_NONE = CL_ADDRESS_NONE } _pi_sampler_addressing_mode; typedef enum { PI_SAMPLER_FILTER_MODE_NEAREST = CL_FILTER_NEAREST, - PI_SAMPLER_FILTER_MODE_LINEAR = CL_FILTER_LINEAR, + PI_SAMPLER_FILTER_MODE_LINEAR = CL_FILTER_LINEAR, } _pi_sampler_filter_mode; using pi_device_exec_capabilities pi_bitfield;