diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index d193f1028af03..45d3eeca3cc49 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -185,10 +185,9 @@ template <> ze_result_t zeHostSynchronize(ze_event_handle_t Handle) { template <> ze_result_t zeHostSynchronize(ze_command_queue_handle_t Handle) { return zeHostSynchronizeImpl(zeCommandQueueSynchronize, Handle); } -// template <> -// ze_result_t zeHostSynchronize(ze_fence_handle_t Handle) { -// return zeHostSynchronizeImpl(zeFenceHostSynchronize, Handle); -// } +template <> ze_result_t zeHostSynchronize(ze_fence_handle_t Handle) { + return zeHostSynchronizeImpl(zeFenceHostSynchronize, Handle); +} template pi_result getInfoImpl(size_t param_value_size, void *param_value, @@ -4229,6 +4228,22 @@ static pi_result cleanupAfterEvent(pi_event Event) { if (it == Queue->ZeCommandListFenceMap.end()) { die("Missing command-list completition fence"); } + + // Workaround for VM_BIND mode. + // Make sure that the command-list doing memcpy is reset before non-USM + // host memory potentially involved in the memcpy is freed. + // + // NOTE: it is valid to wait for the fence here as long as we aren't + // doing batching on the involved command-list. Today memcpy goes by + // itself in a command list. + // + // TODO: this will unnecessarily(?) wait for non-USM memory buffers too, + // so we might need to add a new command type to differentiate. + // + if (Event->CommandType == PI_COMMAND_TYPE_MEM_BUFFER_COPY) { + ZE_CALL(zeHostSynchronize, (it->second.ZeFence)); + } + ze_result_t ZeResult = ZE_CALL_NOCHECK(zeFenceQueryStatus, (it->second.ZeFence)); if (ZeResult == ZE_RESULT_SUCCESS) {