diff --git a/sycl/plugins/opencl/CMakeLists.txt b/sycl/plugins/opencl/CMakeLists.txt index 9943827e48788..f516dee7a2574 100644 --- a/sycl/plugins/opencl/CMakeLists.txt +++ b/sycl/plugins/opencl/CMakeLists.txt @@ -10,12 +10,22 @@ #TODO: Currently, the pi.h header is common between sycl and plugin library sources. #This can be changed by copying the pi.h file in the plugins project. +find_package(Threads REQUIRED) + add_sycl_plugin(opencl SOURCES + "../unified_runtime/pi2ur.hpp" + "../unified_runtime/pi2ur.cpp" + "../unified_runtime/ur/ur.hpp" + "../unified_runtime/ur/ur.cpp" "${sycl_inc_dir}/sycl/detail/pi.h" "pi_opencl.cpp" + INCLUDE_DIRS + ${CMAKE_CURRENT_SOURCE_DIR}/../unified_runtime # for Unified Runtime LIBRARIES OpenCL-ICD + Threads::Threads + UnifiedRuntime-Headers ) set_target_properties(pi_opencl PROPERTIES LINKER_LANGUAGE CXX) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 8c30389285c83..18020ac100eb3 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -17,7 +17,6 @@ #define CL_USE_DEPRECATED_OPENCL_1_2_APIS #include -#include #include #include @@ -79,6 +78,37 @@ constexpr size_t MaxMessageSize = 256; thread_local pi_result ErrorMessageCode = PI_SUCCESS; thread_local char ErrorMessage[MaxMessageSize]; +// Following are helper data structures to extend OpenCL plugin behavior. +// These data structures are persistent during run-time. +// TODO: Optimizations to clean-up resources during CL objects deletion +// A longer term solution will be to extend pi_* data structures to add new +// fields and get rid of these data structures. + +// This data structure is used to represent information about cslice subdevices. +struct csliceSubDevInfo { + cl_device_id cl_dev; // device to which the cslice belongs + size_t family; + size_t index; +}; + +// This data structure is used to store all cslice subdevices. +// For a regular pi_device, cl_device_id can be obtained by a simple typecast. +// For a cslice subdevice, we explicitly store the cl_device_id and then +// retrieve it when needed. +static std::map cslice_devices; + +// This map is used to capture pi_device info during queue creation and retrieve +// it during getinfo calls. +static std::map queue2dev; + +// This map is used to capture pi_device info during context creation and +// retrieve it during getinfo calls. +static std::map> context2devlist; + +// This map is used to capture pi_device info during program creation and +// retrieve it during getinfo calls. +static std::map> program2devlist; + // Utility function for setting a message and warning [[maybe_unused]] static void setErrorMessage(const char *message, pi_result error_code) { @@ -262,9 +292,65 @@ static pi_result USMSetIndirectAccess(pi_kernel kernel) { extern "C" { +// Helper functions + +// Returns true if the device is a cslice subdevice. +static bool isCCS(pi_device device) { + if (!device) + return false; + return cslice_devices.find(device) != cslice_devices.end(); +} + +// Returns the underlying CL device. +// For a regular pi_device, cl_device_id can be obtained by a simple typecast. +// For a cslice subdevice, we explicitly store the cl_device_id and then +// retrieve it when needed. +static cl_device_id getClDevice(pi_device device) { + assert(device); + if (isCCS(device)) + return cslice_devices[device].cl_dev; + else + return cast(device); +} + +// Returns true if the device is a root device. +static bool isRootDevice(pi_device device) { + if (!device) + return false; + if (isCCS(device)) + return false; + cl_device_id parentId = nullptr; + clGetDeviceInfo(getClDevice(device), CL_DEVICE_PARENT_DEVICE, + sizeof(cl_device_id), &parentId, NULL); + if (parentId == nullptr) + return true; + return false; +} + +// Returns the list of underlying cl_devices. +static std::vector getClDevices(pi_uint32 num_devices, + const pi_device *devices) { + std::vector cl_devices(num_devices); + for (size_t i = 0; i < num_devices; ++i) + cl_devices[i] = getClDevice(devices[i]); + return cl_devices; +} + +// Return true if the device is a Data Center GPU Max series (PVC) device. +static bool isPVC(pi_device device) { + cl_uint deviceId; + cl_int res = clGetDeviceInfo(getClDevice(device), CL_DEVICE_ID_INTEL, + sizeof(cl_uint), &deviceId, nullptr); + return (res == CL_SUCCESS) && ((deviceId & 0xff0) == 0xbd0); +} + +// End of helper functions + pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) { + PI_ASSERT(device, PI_ERROR_INVALID_DEVICE); + ReturnHelper return_value(paramValueSize, paramValue, paramValueSizeRet); switch (paramName) { // TODO: Check regularly to see if support in enabled in OpenCL. // Intel GPU EU device-specific information extensions. @@ -291,7 +377,7 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, bool supported = false; ret_err = checkDeviceExtensions( - cast(device), + getClDevice(device), {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, supported); if (ret_err != CL_SUCCESS) @@ -314,7 +400,7 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, } case PI_DEVICE_INFO_BUILD_ON_SUBDEVICE: { cl_device_type devType = CL_DEVICE_TYPE_DEFAULT; - cl_int res = clGetDeviceInfo(cast(device), CL_DEVICE_TYPE, + cl_int res = clGetDeviceInfo(getClDevice(device), CL_DEVICE_TYPE, sizeof(cl_device_type), &devType, nullptr); // FIXME: here we assume that program built for a root GPU device can be @@ -346,14 +432,170 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, return PI_SUCCESS; } + case PI_DEVICE_INFO_PARTITION_PROPERTIES: { + // SYCL spec says: if this SYCL device cannot be partitioned into at least + // two sub devices then the returned vector must be empty. + pi_uint32 num_sub_devices = 0; + clGetDeviceInfo(getClDevice(device), CL_DEVICE_PARTITION_MAX_SUB_DEVICES, + sizeof(num_sub_devices), &num_sub_devices, nullptr); + // Check is done later for devices at root level. + + // Helper function to populate property and return success/failure. + auto ReturnHelper = [&](auto... Partitions) { + struct { + pi_device_partition_property arr[sizeof...(Partitions) + 1]; + } partition_properties = {{Partitions..., 0}}; + return return_value(partition_properties); + }; + + // Partition property for non PVC backends. + // For non-GPU backends, partition property are obtained by calling + // clGetDeviceInfo. + if (!isPVC(device)) { + if (num_sub_devices < 2) + return return_value(pi_device_partition_property{0}); + cl_int result = + clGetDeviceInfo(getClDevice(device), cast(paramName), + paramValueSize, paramValue, paramValueSizeRet); + return static_cast(result); + } else { + // Partition property for GPU + if (isRootDevice(device)) { + if (num_sub_devices < 2) + return return_value(pi_device_partition_property{0}); + return ReturnHelper(PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN); + } else if (!isCCS(device)) { // it is subdevice + // Find out number of CCSes. + bool supported = false; + cl_int ret_err = CL_SUCCESS; + ret_err = checkDeviceExtensions(getClDevice(device), + {"cl_intel_command_queue_families"}, + supported); + if (ret_err != CL_SUCCESS) + return static_cast(ret_err); + if (!supported) + return return_value(pi_device_partition_property{0}); + cl_queue_family_properties_intel qfprops[3]; + size_t qsize = 0; + clGetDeviceInfo(getClDevice(device), + CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, + sizeof(qfprops), qfprops, &qsize); + qsize = qsize / sizeof(cl_queue_family_properties_intel); + for (size_t q = 0; q < qsize; q++) { + if (qfprops[q].capabilities == CL_QUEUE_DEFAULT_CAPABILITIES_INTEL && + qfprops[q].count > num_sub_devices) { + num_sub_devices = qfprops[q].count; + } + } + if (num_sub_devices < 2) { + return return_value(pi_device_partition_property{0}); + } + return ReturnHelper(PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE); + } else // it is CCS + return return_value(pi_device_partition_property{0}); + } + } + case PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN: + return return_value(pi_device_affinity_domain{ + PI_DEVICE_AFFINITY_DOMAIN_NUMA | + PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE}); + case PI_DEVICE_INFO_PARTITION_TYPE: { + if (!isPVC(device)) { + cl_int result = + clGetDeviceInfo(getClDevice(device), cast(paramName), + paramValueSize, paramValue, paramValueSizeRet); + return static_cast(result); + } else { + // For root-device there is no partitioning to report. + if (isRootDevice(device)) + return return_value(pi_device_partition_property{0}); + if (!isCCS(device)) { // is subdevice + struct { + pi_device_partition_property arr[3]; + } partition_properties = {{PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, + PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, + 0}}; + return return_value(partition_properties); + } else { // it is CCS + struct { + pi_device_partition_property arr[2]; + } partition_properties = {{PI_EXT_INTEL_DEVICE_PARTITION_BY_CSLICE, 0}}; + return return_value(partition_properties); + } + } + return return_value(pi_device_partition_property{0}); + } + default: - cl_int result = clGetDeviceInfo( - cast(device), cast(paramName), - paramValueSize, paramValue, paramValueSizeRet); + cl_int result = + clGetDeviceInfo(getClDevice(device), cast(paramName), + paramValueSize, paramValue, paramValueSizeRet); return static_cast(result); } } +pi_result piDevicePartition(pi_device device, + const pi_device_partition_property *properties, + pi_uint32 num_devices, pi_device *out_devices, + pi_uint32 *out_num_devices) { + cl_int result = CL_DEVICE_NOT_FOUND; + if (isRootDevice(device)) { + result = clCreateSubDevices( + getClDevice(device), + cast(properties), + cast(num_devices), cast(out_devices), + out_num_devices); + } else if (!isCCS(device)) { + cl_queue_family_properties_intel qfprops[3]; + size_t qsize = 0; + pi_uint32 family = 0; + cl_uint sub_device_count = 0; + clGetDeviceInfo(getClDevice(device), + CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, sizeof(qfprops), + qfprops, &qsize); + qsize = qsize / sizeof(cl_queue_family_properties_intel); + for (size_t q = 0; q < qsize; q++) { + if (qfprops[q].capabilities == CL_QUEUE_DEFAULT_CAPABILITIES_INTEL && + qfprops[q].count > sub_device_count) { + family = q; + sub_device_count = qfprops[q].count; + } + } + *out_num_devices = sub_device_count; + if (!out_devices) + return PI_SUCCESS; + for (uint32_t i = 0; i < *out_num_devices; ++i) { + out_devices[i] = cast(new cl_device_id()); + csliceSubDevInfo info; + info.cl_dev = cast(device); + info.family = family; + info.index = i; + cslice_devices.insert({out_devices[i], info}); + auto res = clRetainDevice(info.cl_dev); + if (res) + return cast(res); + } + return PI_SUCCESS; + } + // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in out_num_devices. + if (result == CL_DEVICE_NOT_FOUND) { + assert(out_num_devices != 0); + *out_num_devices = 0; + return PI_SUCCESS; + } + return cast(result); +} + +pi_result piDeviceRetain(pi_device device) { + cl_int result = clRetainDevice(getClDevice(device)); + return cast(result); +} + +pi_result piDeviceRelease(pi_device device) { + cl_int result = clReleaseDevice(getClDevice(device)); + return cast(result); +} + pi_result piPlatformsGet(pi_uint32 num_entries, pi_platform *platforms, pi_uint32 *num_platforms) { cl_int result = clGetPlatformIDs(cast(num_entries), @@ -384,7 +626,6 @@ pi_result piDevicesGet(pi_platform platform, pi_device_type device_type, cast(platform), cast(device_type), cast(num_entries), cast(devices), cast(num_devices)); - // Absorb the CL_DEVICE_NOT_FOUND and just return 0 in num_devices if (result == CL_DEVICE_NOT_FOUND) { assert(num_devices != 0); @@ -417,7 +658,7 @@ pi_result piextDeviceSelectBinary(pi_device device, pi_device_binary *images, cl_device_type device_type; constexpr pi_uint32 invalid_ind = std::numeric_limits::max(); cl_int ret_err = - clGetDeviceInfo(cast(device), CL_DEVICE_TYPE, + clGetDeviceInfo(getClDevice(device), CL_DEVICE_TYPE, sizeof(cl_device_type), &device_type, nullptr); if (ret_err != CL_SUCCESS) { *selected_image_ind = invalid_ind; @@ -484,6 +725,7 @@ pi_result piextQueueCreate(pi_context Context, pi_device Device, assert(Properties[2] == 0); if (Properties[2] != 0) return PI_ERROR_INVALID_VALUE; + queue2dev.insert({*Queue, Device}); return piQueueCreate(Context, Device, Flags, Queue); } pi_result piQueueCreate(pi_context context, pi_device device, @@ -492,7 +734,7 @@ pi_result piQueueCreate(pi_context context, pi_device device, cl_platform_id curPlatform; cl_int ret_err = - clGetDeviceInfo(cast(device), CL_DEVICE_PLATFORM, + clGetDeviceInfo(getClDevice(device), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &curPlatform, nullptr); CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err); @@ -514,20 +756,39 @@ pi_result piQueueCreate(pi_context context, pi_device device, CHECK_ERR_SET_NULL_RET(ret_err, queue, ret_err); - if (version >= OCLV::V2_0) { + if (version < OCLV::V2_0) { *queue = cast(clCreateCommandQueue( - cast(context), cast(device), + cast(context), getClDevice(device), cast(properties) & SupportByOpenCL, &ret_err)); return cast(ret_err); } - cl_queue_properties CreationFlagProperties[] = { - CL_QUEUE_PROPERTIES, - cast(properties) & SupportByOpenCL, 0}; - *queue = cast(clCreateCommandQueueWithProperties( - cast(context), cast(device), - CreationFlagProperties, &ret_err)); + if (isCCS(device)) { + auto family = cslice_devices[device].family; + auto index = cslice_devices[device].index; + cl_queue_properties CreationFlagProperties[] = { + CL_QUEUE_PROPERTIES, + cast(properties) & SupportByOpenCL, + CL_QUEUE_FAMILY_INTEL, + family, + CL_QUEUE_INDEX_INTEL, + index, + 0}; + *queue = cast(clCreateCommandQueueWithProperties( + cast(context), getClDevice(device), CreationFlagProperties, + &ret_err)); + + } else { + cl_queue_properties CreationFlagProperties[] = { + CL_QUEUE_PROPERTIES, + cast(properties) & SupportByOpenCL, 0}; + *queue = cast(clCreateCommandQueueWithProperties( + cast(context), getClDevice(device), CreationFlagProperties, + &ret_err)); + } + if (ret_err == CL_SUCCESS) + queue2dev.insert({*queue, device}); return cast(ret_err); } @@ -542,6 +803,17 @@ pi_result piQueueGetInfo(pi_queue queue, pi_queue_info param_name, case PI_EXT_ONEAPI_QUEUE_INFO_EMPTY: // OpenCL doesn't provide API to check the status of the queue. return PI_ERROR_INVALID_VALUE; + case PI_QUEUE_INFO_DEVICE: { + if (queue2dev.find(queue) != queue2dev.end()) { + pi_device dev = queue2dev[queue]; + if (param_value) + std::memcpy(param_value, &dev, sizeof(dev)); + if (param_value_size_ret) + *param_value_size_ret = sizeof(pi_device); + return PI_SUCCESS; + } else + return PI_ERROR_INVALID_VALUE; + } default: cl_int CLErr = clGetCommandQueueInfo( cast(queue), cast(param_name), @@ -794,9 +1066,9 @@ pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program, // If clGetDeviceFunctionPointer is in list of extensions if (FuncT) { - pi_ret_err = cast(FuncT(cast(device), - cast(program), func_name, - function_pointer_ret)); + pi_ret_err = + cast(FuncT(getClDevice(device), cast(program), + func_name, function_pointer_ret)); // GPU runtime sometimes returns PI_ERROR_INVALID_ARG_VALUE if func address // cannot be found even if kernel exits. As the kernel does exist return // that the address is not available @@ -815,11 +1087,16 @@ pi_result piContextCreate(const pi_context_properties *properties, size_t cb, void *user_data1), void *user_data, pi_context *retcontext) { pi_result ret = PI_ERROR_INVALID_OPERATION; + std::vector cl_devices = getClDevices(num_devices, devices); *retcontext = cast( - clCreateContext(properties, cast(num_devices), - cast(devices), pfn_notify, - user_data, cast(&ret))); - + clCreateContext(properties, cast(num_devices), cl_devices.data(), + pfn_notify, user_data, cast(&ret))); + if (ret == PI_SUCCESS) { + std::vector device_list_vec(num_devices); + for (size_t i = 0; i < num_devices; ++i) + device_list_vec[i] = devices[i]; + context2devlist.insert({*retcontext, device_list_vec}); + } return ret; } @@ -834,6 +1111,10 @@ pi_result piextContextCreateWithNativeHandle(pi_native_handle nativeHandle, assert(piContext != nullptr); assert(ownNativeHandle == false); *piContext = reinterpret_cast(nativeHandle); + std::vector device_list_vec(num_devices); + for (size_t i = 0; i < num_devices; ++i) + device_list_vec[i] = devices[i]; + context2devlist.insert({*piContext, device_list_vec}); return PI_SUCCESS; } @@ -849,6 +1130,19 @@ pi_result piContextGetInfo(pi_context context, pi_context_info paramName, std::memcpy(paramValue, &result, sizeof(cl_bool)); return PI_SUCCESS; } + case PI_CONTEXT_INFO_DEVICES: { + if (context2devlist.find(context) != context2devlist.end()) { + auto devlist = context2devlist[context]; + size_t num_devices = devlist.size(); + if (paramValueSizeRet) + *paramValueSizeRet = num_devices * sizeof(pi_device); + if (paramValue) + std::memcpy(paramValue, devlist.data(), + num_devices * sizeof(pi_device)); + return PI_SUCCESS; + } + [[fallthrough]]; + } default: cl_int result = clGetContextInfo( cast(context), cast(paramName), @@ -940,13 +1234,72 @@ pi_result piProgramCreateWithBinary( (void)num_metadata_entries; pi_result ret_err = PI_ERROR_INVALID_OPERATION; + std::vector cl_devices = getClDevices(num_devices, device_list); *ret_program = cast(clCreateProgramWithBinary( - cast(context), cast(num_devices), - cast(device_list), lengths, binaries, - cast(binary_status), cast(&ret_err))); + cast(context), cast(num_devices), cl_devices.data(), + lengths, binaries, cast(binary_status), + cast(&ret_err))); + if (ret_err == PI_SUCCESS) { + std::vector device_list_vec(num_devices); + for (size_t i = 0; i < num_devices; ++i) + device_list_vec[i] = device_list[i]; + program2devlist.insert({*ret_program, device_list_vec}); + } return ret_err; } +pi_result piProgramGetInfo(pi_program program, pi_program_info paramName, + size_t paramValueSize, void *paramValue, + size_t *paramValueSizeRet) { + assert(program != nullptr); + switch (paramName) { + case PI_PROGRAM_INFO_DEVICES: { + if (program2devlist.find(program) != program2devlist.end()) { + auto devlist = program2devlist[program]; + size_t num_devices = devlist.size(); + if (paramValueSizeRet) + *paramValueSizeRet = num_devices * sizeof(pi_device); + if (paramValue) + std::memcpy(paramValue, devlist.data(), + num_devices * sizeof(pi_device)); + return PI_SUCCESS; + } + [[fallthrough]]; + } + default: + cl_int result = clGetProgramInfo( + cast(program), cast(paramName), + paramValueSize, paramValue, paramValueSizeRet); + return static_cast(result); + } +} + +pi_result piProgramCompile( + pi_program program, pi_uint32 num_devices, const pi_device *device_list, + const char *options, pi_uint32 num_input_headers, + const pi_program *input_headers, const char **header_include_names, + void (*pfn_notify)(pi_program program, void *user_data), void *user_data) { + std::vector cl_devices = getClDevices(num_devices, device_list); + cl_int result = clCompileProgram( + cast(program), cast(num_devices), cl_devices.data(), + options, cast(num_input_headers), + cast(input_headers), header_include_names, + cast(pfn_notify), user_data); + return static_cast(result); +} + +pi_result piProgramBuild(pi_program program, pi_uint32 num_devices, + const pi_device *device_list, const char *options, + void (*pfn_notify)(pi_program program, + void *user_data), + void *user_data) { + std::vector cl_devices = getClDevices(num_devices, device_list); + cl_int result = clBuildProgram( + cast(program), cast(num_devices), cl_devices.data(), + options, cast(pfn_notify), user_data); + return static_cast(result); +} + pi_result piProgramLink(pi_context context, pi_uint32 num_devices, const pi_device *device_list, const char *options, pi_uint32 num_input_programs, @@ -955,16 +1308,33 @@ pi_result piProgramLink(pi_context context, pi_uint32 num_devices, void *user_data, pi_program *ret_program) { pi_result ret_err = PI_ERROR_INVALID_OPERATION; - *ret_program = cast( - clLinkProgram(cast(context), cast(num_devices), - cast(device_list), options, - cast(num_input_programs), - cast(input_programs), - cast(pfn_notify), user_data, - cast(&ret_err))); + std::vector cl_devices = getClDevices(num_devices, device_list); + *ret_program = cast(clLinkProgram( + cast(context), cast(num_devices), cl_devices.data(), + options, cast(num_input_programs), + cast(input_programs), + cast(pfn_notify), user_data, + cast(&ret_err))); + if (ret_err == PI_SUCCESS) { + std::vector device_list_vec(num_devices); + for (size_t i = 0; i < num_devices; ++i) + device_list_vec[i] = device_list[i]; + program2devlist.insert({*ret_program, device_list_vec}); + } return ret_err; } +pi_result piProgramGetBuildInfo(pi_program program, pi_device device, + pi_program_build_info param_name, + size_t param_value_size, void *param_value, + size_t *param_value_size_ret) { + cl_int result = clGetProgramBuildInfo( + cast(program), getClDevice(device), + cast(param_name), param_value_size, param_value, + param_value_size_ret); + return static_cast(result); +} + pi_result piKernelCreate(pi_program program, const char *kernel_name, pi_kernel *ret_kernel) { @@ -987,7 +1357,7 @@ pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device, return PI_ERROR_INVALID_VALUE; default: cl_int result = clGetKernelWorkGroupInfo( - cast(kernel), cast(device), + cast(kernel), getClDevice(device), cast(param_name), param_value_size, param_value, param_value_size_ret); return static_cast(result); @@ -1029,7 +1399,7 @@ pi_result piKernelGetSubGroupInfo(pi_kernel kernel, pi_device device, } ret_err = cast(clGetKernelSubGroupInfo( - cast(kernel), cast(device), + cast(kernel), getClDevice(device), cast(param_name), input_value_size, input_value, sizeof(size_t), &ret_val, param_value_size_ret)); @@ -1150,7 +1520,7 @@ pi_result piextUSMDeviceAlloc(void **result_ptr, pi_context context, context, &FuncPtr); if (FuncPtr) { - Ptr = FuncPtr(cast(context), cast(device), + Ptr = FuncPtr(cast(context), getClDevice(device), cast(properties), size, alignment, cast(&RetVal)); } @@ -1188,7 +1558,7 @@ pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context, context, &FuncPtr); if (FuncPtr) { - Ptr = FuncPtr(cast(context), cast(device), + Ptr = FuncPtr(cast(context), getClDevice(device), cast(properties), size, alignment, cast(&RetVal)); } @@ -1747,6 +2117,7 @@ pi_result piextKernelGetNativeHandle(pi_kernel kernel, // This API is called by Sycl RT to notify the end of the plugin lifetime. // TODO: add a global variable lifetime management code here (see // pi_level_zero.cpp for reference) Currently this is just a NOOP. +// We clear all the 'map' variables here. pi_result piTearDown(void *PluginParameter) { (void)PluginParameter; return PI_SUCCESS; @@ -1816,9 +2187,9 @@ pi_result piPluginInit(pi_plugin *PluginInit) { // Device _PI_CL(piDevicesGet, piDevicesGet) _PI_CL(piDeviceGetInfo, piDeviceGetInfo) - _PI_CL(piDevicePartition, clCreateSubDevices) - _PI_CL(piDeviceRetain, clRetainDevice) - _PI_CL(piDeviceRelease, clReleaseDevice) + _PI_CL(piDevicePartition, piDevicePartition) + _PI_CL(piDeviceRetain, piDeviceRetain) + _PI_CL(piDeviceRelease, piDeviceRelease) _PI_CL(piextDeviceSelectBinary, piextDeviceSelectBinary) _PI_CL(piextGetDeviceFunctionPointer, piextGetDeviceFunctionPointer) _PI_CL(piextDeviceGetNativeHandle, piextDeviceGetNativeHandle) @@ -1854,11 +2225,11 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piProgramCreate, piProgramCreate) _PI_CL(piclProgramCreateWithSource, piclProgramCreateWithSource) _PI_CL(piProgramCreateWithBinary, piProgramCreateWithBinary) - _PI_CL(piProgramGetInfo, clGetProgramInfo) - _PI_CL(piProgramCompile, clCompileProgram) - _PI_CL(piProgramBuild, clBuildProgram) + _PI_CL(piProgramGetInfo, piProgramGetInfo) + _PI_CL(piProgramCompile, piProgramCompile) + _PI_CL(piProgramBuild, piProgramBuild) _PI_CL(piProgramLink, piProgramLink) - _PI_CL(piProgramGetBuildInfo, clGetProgramBuildInfo) + _PI_CL(piProgramGetBuildInfo, piProgramGetBuildInfo) _PI_CL(piProgramRetain, clRetainProgram) _PI_CL(piProgramRelease, clReleaseProgram) _PI_CL(piextProgramSetSpecializationConstant, diff --git a/sycl/plugins/opencl/pi_opencl.hpp b/sycl/plugins/opencl/pi_opencl.hpp index 7835df8c4cb6e..68315128af080 100644 --- a/sycl/plugins/opencl/pi_opencl.hpp +++ b/sycl/plugins/opencl/pi_opencl.hpp @@ -18,9 +18,12 @@ #define PI_OPENCL_HPP #include +#include +#include #include #include - +#include +#include // This version should be incremented for any change made to this file or its // corresponding .cpp file. #define _PI_OPENCL_PLUGIN_VERSION 1 diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 7925dfcbc6b53..db8d5a2f1b09a 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -10,6 +10,9 @@ piContextCreate piContextGetInfo piDeviceGetInfo +piDevicePartition +piDeviceRelease +piDeviceRetain piDevicesGet piEnqueueMemBufferMap piEventCreate @@ -24,8 +27,12 @@ piMemImageCreate piPlatformsGet piPluginGetLastError piPluginInit +piProgramBuild +piProgramCompile piProgramCreate piProgramCreateWithBinary +piProgramGetBuildInfo +piProgramGetInfo piProgramLink piQueueCreate piextQueueCreate