diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 0e737c0256fc9..3b7bee9d6c64c 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -141,8 +141,10 @@ _PI_API(piPluginGetLastError) _PI_API(piTearDown) + _PI_API(piextUSMEnqueueFill2D) _PI_API(piextUSMEnqueueMemset2D) _PI_API(piextUSMEnqueueMemcpy2D) +_PI_API(piGetDeviceAndHostTimer) #undef _PI_API diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 2b70daa7aeeac..05d003437d0b5 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -74,9 +74,10 @@ // PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT, and // PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT context info query // descriptors. +// 12.22 Add piGetDeviceAndHostTimer to query device wall-clock timestamp #define _PI_H_VERSION_MAJOR 12 -#define _PI_H_VERSION_MINOR 21 +#define _PI_H_VERSION_MINOR 22 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -1898,9 +1899,24 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// /// \return PI_SUCCESS if plugin is indicating non-fatal warning. Any other /// error code indicates that plugin considers this to be a fatal error and the -/// runtime must handle it or end the application. +/// Returns the global timestamp from \param device , and syncronized host +/// timestamp __SYCL_EXPORT pi_result piPluginGetLastError(char **message); +/// Queries device for it's global timestamp in nanoseconds, and updates +/// HostTime with the value of the host timer at the closest possible point in +/// time to that at which DeviceTime was returned. +/// +/// \param Device device to query for timestamp +/// \param DeviceTime pointer to store device timestamp in nanoseconds. Optional +/// argument, can be nullptr +/// \param HostTime pointer to store host timestamp in +/// nanoseconds. Optional argurment, can be nullptr in which case timestamp will +/// not be written +__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, + uint64_t *DeviceTime, + uint64_t *HostTime); + struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin // checks and writes the appropriate Function Pointers in diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index a8ae7a44bee38..b71fab48946ae 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -2134,7 +2135,6 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{ _pi_context::kind::user_defined, newContext, *devices}); } - static std::once_flag initFlag; std::call_once( initFlag, @@ -3889,6 +3889,7 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event, switch (param_name) { case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: + // Note: No user for this case return getInfo(param_value_size, param_value, param_value_size_ret, event->get_queued_time()); case PI_PROFILING_INFO_COMMAND_START: @@ -5486,6 +5487,35 @@ pi_result cuda_piTearDown(void *) { return PI_SUCCESS; } +pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { + _pi_event::native_type event; + ScopedContext active(Device->get_context()); + + if (DeviceTime) { + PI_CHECK_ERROR(cuEventCreate(&event, CU_EVENT_DEFAULT)); + PI_CHECK_ERROR(cuEventRecord(event, 0)); + } + if (HostTime) { + + using namespace std::chrono; + *HostTime = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); + } + + if (DeviceTime) { + PI_CHECK_ERROR(cuEventSynchronize(event)); + + float elapsedTime = 0.0f; + PI_CHECK_ERROR( + cuEventElapsedTime(&elapsedTime, _pi_platform::evBase_, event)); + *DeviceTime = (uint64_t)(elapsedTime * (double)1e6); + } + + return PI_SUCCESS; +} + const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -5634,6 +5664,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgSampler, cuda_piextKernelSetArgSampler) _PI_CL(piPluginGetLastError, cuda_piPluginGetLastError) _PI_CL(piTearDown, cuda_piTearDown) + _PI_CL(piGetDeviceAndHostTimer, cuda_piGetDeviceAndHostTimer) #undef _PI_CL diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index 542a8bfe4368a..8514083346434 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -88,6 +88,7 @@ struct _pi_device { native_type cuDevice_; std::atomic_uint32_t refCount_; pi_platform platform_; + pi_context context_; static constexpr pi_uint32 max_work_item_dimensions = 3u; size_t max_work_item_sizes[max_work_item_dimensions]; @@ -103,6 +104,10 @@ struct _pi_device { pi_platform get_platform() const noexcept { return platform_; }; + void set_context(pi_context ctx) { context_ = ctx; }; + + pi_context get_context() { return context_; }; + void save_max_work_item_sizes(size_t size, size_t *save_max_work_item_sizes) noexcept { memcpy(max_work_item_sizes, save_max_work_item_sizes, size); @@ -178,6 +183,7 @@ struct _pi_context { bool backend_owns = true) : kind_{k}, cuContext_{ctxt}, deviceId_{devId}, refCount_{1}, has_ownership{backend_owns} { + deviceId_->set_context(this); cuda_piDeviceRetain(deviceId_); }; diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 1988d8f0db53c..9fdf6cb9d30b6 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2049,6 +2049,12 @@ pi_result piTearDown(void *) { return PI_SUCCESS; } +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, + uint64_t *hostTime) { + PiTrace( + "Warning : Querying device clock not supported under PI_ESIMD_EMULATOR"); + return PI_SUCCESS; +} const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index f4d316db6593a..202626dcfb9b6 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -605,7 +606,7 @@ pi_uint64 _pi_event::get_start_time() const { assert(is_started()); PI_CHECK_ERROR( - hipEventElapsedTime(&miliSeconds, context_->evBase_, evStart_)); + hipEventElapsedTime(&miliSeconds, _pi_platform::evBase_, evStart_)); return static_cast(miliSeconds * 1.0e6); } @@ -613,7 +614,8 @@ pi_uint64 _pi_event::get_end_time() const { float miliSeconds = 0.0f; assert(is_started() && is_recorded()); - PI_CHECK_ERROR(hipEventElapsedTime(&miliSeconds, context_->evBase_, evEnd_)); + PI_CHECK_ERROR( + hipEventElapsedTime(&miliSeconds, _pi_platform::evBase_, evEnd_)); return static_cast(miliSeconds * 1.0e6); } @@ -1988,10 +1990,16 @@ pi_result hip_piContextCreate(const pi_context_properties *properties, _pi_context::kind::user_defined, newContext, *devices}); } - // Use default stream to record base event counter - PI_CHECK_ERROR( - hipEventCreateWithFlags(&piContextPtr->evBase_, hipEventDefault)); - PI_CHECK_ERROR(hipEventRecord(piContextPtr->evBase_, 0)); + static std::once_flag initFlag; + std::call_once( + initFlag, + [](pi_result &err) { + // Use default stream to record base event counter + PI_CHECK_ERROR( + hipEventCreateWithFlags(&_pi_platform::evBase_, hipEventDefault)); + PI_CHECK_ERROR(hipEventRecord(_pi_platform::evBase_, 0)); + }, + errcode_ret); // For non-primary scoped contexts keep the last active on top of the stack // as `cuCtxCreate` replaces it implicitly otherwise. @@ -2021,8 +2029,6 @@ pi_result hip_piContextRelease(pi_context ctxt) { std::unique_ptr<_pi_context> context{ctxt}; - PI_CHECK_ERROR(hipEventDestroy(context->evBase_)); - if (!ctxt->is_primary()) { hipCtx_t hipCtxt = ctxt->get(); // hipCtxSynchronize is not supported for AMD platform so we can just @@ -3707,6 +3713,7 @@ pi_result hip_piEventGetProfilingInfo(pi_event event, switch (param_name) { case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: + // Note: No user for this case return getInfo(param_value_size, param_value, param_value_size_ret, event->get_queued_time()); case PI_PROFILING_INFO_COMMAND_START: @@ -5208,6 +5215,34 @@ pi_result hip_piTearDown(void *PluginParameter) { return PI_SUCCESS; } +pi_result hip_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { + _pi_event::native_type event; + + ScopedContext active(Device->get_context()); + + if (DeviceTime) { + PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault)); + PI_CHECK_ERROR(hipEventRecord(event)); + } + if (HostTime) { + using namespace std::chrono; + *HostTime = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); + } + + if (DeviceTime) { + PI_CHECK_ERROR(hipEventSynchronize(event)); + + float elapsedTime = 0.0f; + PI_CHECK_ERROR( + hipEventElapsedTime(&elapsedTime, _pi_platform::evBase_, event)); + *DeviceTime = (uint64_t)(elapsedTime * (double)1e6); + } + return PI_SUCCESS; +} + const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -5350,6 +5385,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgSampler, hip_piextKernelSetArgSampler) _PI_CL(piPluginGetLastError, hip_piPluginGetLastError) _PI_CL(piTearDown, hip_piTearDown) + _PI_CL(piGetDeviceAndHostTimer, hip_piGetDeviceAndHostTimer) #undef _PI_CL @@ -5357,3 +5393,5 @@ pi_result piPluginInit(pi_plugin *PluginInit) { } } // extern "C" + +hipEvent_t _pi_platform::evBase_{nullptr}; \ No newline at end of file diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp index af2ff53d6fa6f..02e96570d5d88 100644 --- a/sycl/plugins/hip/pi_hip.hpp +++ b/sycl/plugins/hip/pi_hip.hpp @@ -65,6 +65,7 @@ using _pi_stream_guard = std::unique_lock; /// when devices are used. /// struct _pi_platform { + static hipEvent_t evBase_; // HIP event used as base counter std::vector> devices_; }; @@ -80,6 +81,7 @@ struct _pi_device { native_type cuDevice_; std::atomic_uint32_t refCount_; pi_platform platform_; + pi_context context_; public: _pi_device(native_type cuDevice, pi_platform platform) @@ -90,6 +92,10 @@ struct _pi_device { pi_uint32 get_reference_count() const noexcept { return refCount_; } pi_platform get_platform() const noexcept { return platform_; }; + + void set_context(pi_context ctx) { context_ = ctx; }; + + pi_context get_context() { return context_; }; }; /// PI context mapping to a HIP context object. @@ -146,11 +152,9 @@ struct _pi_context { _pi_device *deviceId_; std::atomic_uint32_t refCount_; - hipEvent_t evBase_; // HIP event used as base counter - _pi_context(kind k, hipCtx_t ctxt, _pi_device *devId) - : kind_{k}, hipContext_{ctxt}, deviceId_{devId}, refCount_{1}, - evBase_(nullptr) { + : kind_{k}, hipContext_{ctxt}, deviceId_{devId}, refCount_{1} { + deviceId_->set_context(this); hip_piDeviceRetain(deviceId_); }; diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 887e1ffdf52a3..79f9cbe6791fe 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -5988,7 +5988,10 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, } case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: - // TODO: Support these when Level Zero supported is added. + // Note: No users for this case + // TODO: Implement commmand submission time when needed, + // by recording device timestamp (using zeDeviceGetGlobalTimestamps) + // before submitting command to device return ReturnValue(uint64_t{0}); default: zePrint("piEventGetProfilingInfo: not supported ParamName\n"); @@ -9354,4 +9357,22 @@ pi_result _pi_buffer::free() { return PI_SUCCESS; } +pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { + const uint64_t &ZeTimerResolution = + Device->ZeDeviceProperties->timerResolution; + const uint64_t TimestampMaxCount = + ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); + uint64_t DeviceClockCount, Dummy; + + ZE_CALL(zeDeviceGetGlobalTimestamps, + (Device->ZeDevice, HostTime == nullptr ? &Dummy : HostTime, + &DeviceClockCount)); + + if (DeviceTime != nullptr) { + + *DeviceTime = (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution; + } + return PI_SUCCESS; +} } // extern "C" diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index e71f23e0d2e4d..c00e24a014acc 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1665,6 +1665,46 @@ pi_result piTearDown(void *PluginParameter) { return PI_SUCCESS; } +pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { + OCLV::OpenCLVersion devVer, platVer; + cl_platform_id platform; + cl_device_id deviceID = cast(Device); + + // TODO: Cache OpenCL version for each device and platform + auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &platform, nullptr); + if (ret_err != CL_SUCCESS) { + return cast(ret_err); + } + + ret_err = getDeviceVersion(deviceID, devVer); + + if (ret_err != CL_SUCCESS) { + return cast(ret_err); + } + + ret_err = getPlatformVersion(platform, platVer); + + if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) { + setErrorMessage( + "OpenCL version for device and/or platform is less than 2.1", + PI_ERROR_INVALID_OPERATION); + return PI_ERROR_INVALID_OPERATION; + } + + if (DeviceTime) { + uint64_t dummy; + clGetDeviceAndHostTimer(deviceID, DeviceTime, + HostTime == nullptr ? &dummy : HostTime); + + } else if (HostTime) { + clGetHostTimer(deviceID, HostTime); + } + + return PI_SUCCESS; +} + const char SupportedVersion[] = _PI_OPENCL_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -1802,6 +1842,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgSampler, piextKernelSetArgSampler) _PI_CL(piPluginGetLastError, piPluginGetLastError) _PI_CL(piTearDown, piTearDown) + _PI_CL(piGetDeviceAndHostTimer, piGetDeviceAndHostTimer) #undef _PI_CL diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index f039e99afd2a4..b030b5f78dc9d 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -36,7 +36,8 @@ device_impl::device_impl(RT::PiDevice Device, const plugin &Plugin) device_impl::device_impl(pi_native_handle InteropDeviceHandle, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin) - : MDevice(Device), MIsHostDevice(false) { + : MDevice(Device), MIsHostDevice(false), + MDeviceHostBaseTime(std::make_pair(0, 0)) { bool InteroperabilityConstructor = false; if (Device == nullptr) { @@ -435,6 +436,59 @@ std::string device_impl::getDeviceName() const { return MDeviceName; } +/* On first call this function queries for device timestamp + along with host synchronized timestamp + and stores it in memeber varaible deviceTimePair. + Subsequent calls to this function would just retrieve the host timestamp , + compute difference against the host timestamp in deviceTimePair + and calculate the device timestamp based on the difference. + deviceTimePair is refreshed with new device and host timestamp after a + certain interval (determined by timeTillRefresh) to account for clock drift + between host and device. +*/ + +uint64_t device_impl::getCurrentDeviceTime() { + // To account for potential clock drift between host clock and device clock. + // The value set is arbitrary: 200 seconds + constexpr uint64_t timeTillRefresh = 200e9; + + uint64_t hostTime; + if (MIsHostDevice) { + using namespace std::chrono; + return duration_cast(steady_clock::now().time_since_epoch()) + .count(); + } + auto plugin = getPlugin(); + RT::PiResult result = + plugin.call_nocheck( + MDevice, nullptr, &hostTime); + plugin.checkPiResult(result == PI_ERROR_INVALID_OPERATION ? PI_SUCCESS + : result); + + if (result == PI_ERROR_INVALID_OPERATION) { + std::string errorMsg{}; + char *p; + plugin.call_nocheck(&p); + while (*p != '\0') { + errorMsg += *p; + p++; + } + throw sycl::feature_not_supported( + "Device and/or backend does not support querying timestamp: " + + errorMsg, + result); + } + uint64_t diff = hostTime - MDeviceHostBaseTime.second; + + if (diff > timeTillRefresh || diff <= 0) { + plugin.call( + MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second); + diff = 0; + } + + return MDeviceHostBaseTime.first + diff; +} + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 17fdd6c746367..41cb1c9fdb62c 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -18,6 +18,7 @@ #include #include +#include namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { @@ -237,6 +238,10 @@ class device_impl { std::string getDeviceName() const; + /// Gets the current device timestamp + /// @throw sycl::feature_not_supported if feature is not supported on device + uint64_t getCurrentDeviceTime(); + private: explicit device_impl(pi_native_handle InteropDevice, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin); @@ -248,6 +253,7 @@ class device_impl { bool MIsAssertFailSupported = false; mutable std::string MDeviceName; mutable std::once_flag MDeviceNameFlag; + std::pair MDeviceHostBaseTime; }; // class device_impl } // namespace detail diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index d95173c47e7fb..3e2cd116cfaad 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -265,16 +265,7 @@ template <> uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); - if (!MHostEvent) { - if (MEvent) - return get_event_profiling_info( - this->getHandleRef(), this->getPlugin()); - return 0; - } - if (!MHostProfilingInfo) - throw invalid_object_error("Profiling info is not available.", - PI_ERROR_PROFILING_INFO_NOT_AVAILABLE); - return MHostProfilingInfo->getStartTime(); + return MSubmitTime; } template <> @@ -424,6 +415,22 @@ void event_impl::cleanDepEventsThroughOneLevel() { } } +void event_impl::setSubmissionTime() { + if (!MIsProfilingEnabled) + return; + if (QueueImplPtr Queue = MQueue.lock()) { + try { + MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime(); + } catch (feature_not_supported &e) { + throw sycl::exception(make_error_code(errc::profiling), + std::string("Unable to get command group submission time: ") + + e.what()); + } + } +} + +uint64_t event_impl::getSubmissionTime() { return MSubmitTime; } + bool event_impl::isCompleted() { return get_info() == info::event_command_status::complete; diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 3330654501fa4..464d285acde76 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -221,6 +221,13 @@ class event_impl { MSubmittedQueue = SubmittedQueue; }; + /// Calling this function queries the current device timestamp and sets it as + /// submission time for the command associated with this event. + void setSubmissionTime(); + + /// @return Submission time for command associated with this event + uint64_t getSubmissionTime(); + QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; /// Checks if an event is in a fully intialized state. Default-constructed @@ -257,6 +264,8 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; + // Stores submission time of command associated with event + uint64_t MSubmitTime = 0; ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index e2e7f5df48cee..eed28df954632 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -137,6 +137,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, NewEvent = Result.NewEvent; ShouldEnqueue = Result.ShouldEnqueue; } + NewEvent->setSubmissionTime(); } if (ShouldEnqueue) { diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index 7950e70162d5e..7892de69cce81 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -77,6 +77,15 @@ event::get_info() const { return impl->template get_info(); } +template +typename detail::is_event_profiling_info_desc::return_type +event::get_profiling_info() const { + if constexpr (!std::is_same_v) { + impl->wait(impl); + } + return impl->template get_profiling_info(); +} + #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ template __SYCL_EXPORT ReturnT event::get_info() const; @@ -85,12 +94,8 @@ event::get_info() const { #undef __SYCL_PARAM_TRAITS_SPEC #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ - template <> \ - __SYCL_EXPORT ReturnT event::get_profiling_info() \ - const { \ - impl->wait(impl); \ - return impl->get_profiling_info(); \ - } + template __SYCL_EXPORT ReturnT \ + event::get_profiling_info() const; #include diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index c0b5e3881818c..4d6d2c878fb29 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -205,6 +205,8 @@ event handler::finalize() { NewEvent->setStateIncomplete(); OutEvent = &NewEvent->getHandleRef(); + NewEvent->setSubmissionTime(); + if (PI_SUCCESS != EnqueueKernel()) throw runtime_error("Enqueue process failed.", PI_ERROR_INVALID_OPERATION); diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 44f6d2f781ca3..54229d45356a4 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -41,6 +41,7 @@ piEventRetain piEventSetCallback piEventSetStatus piEventsWait +piGetDeviceAndHostTimer piKernelCreate piKernelGetGroupInfo piKernelGetInfo diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 1f359bf582458..e54388658985e 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -13,6 +13,7 @@ piDeviceGetInfo piDevicesGet piEnqueueMemBufferMap piEventCreate +piGetDeviceAndHostTimer piKernelCreate piKernelGetGroupInfo piKernelGetSubGroupInfo diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index e08b2a015748e..c8c46da7ec357 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -14,6 +14,7 @@ #include #include +#include #include // Helpers for dummy handles @@ -1094,3 +1095,21 @@ inline pi_result mock_piTearDown(void *PluginParameter) { return PI_SUCCESS; } inline pi_result mock_piPluginGetLastError(char **message) { return PI_SUCCESS; } + +// Returns the wall-clock timestamp of host for deviceTime and hostTime +inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, + uint64_t *deviceTime, + uint64_t *hostTime) { + + using namespace std::chrono; + auto timeNanoseconds = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); + if (deviceTime) { + *deviceTime = timeNanoseconds; + } + if (hostTime) { + *hostTime = timeNanoseconds; + } + return PI_SUCCESS; +} diff --git a/sycl/unittests/queue/GetProfilingInfo.cpp b/sycl/unittests/queue/GetProfilingInfo.cpp index f410811e63445..7cc9750255baf 100644 --- a/sycl/unittests/queue/GetProfilingInfo.cpp +++ b/sycl/unittests/queue/GetProfilingInfo.cpp @@ -17,6 +17,7 @@ #include #include +#include #include @@ -316,3 +317,94 @@ TEST(GetProfilingInfo, check_if_now_dead_queue_property_not_set) { // The test passes without this, but keep it still, just in case. sycl::detail::getSyclObjImpl(Ctx)->getKernelProgramCache().reset(); } + +bool DeviceTimerCalled; + +pi_result redefinedPiGetDeviceAndHostTimer(pi_device Device, + uint64_t *DeviceTime, + uint64_t *HostTime) { + DeviceTimerCalled = true; + return PI_SUCCESS; +} + +TEST(GetProfilingInfo, + check_no_command_submission_time_when_event_profiling_disabled) { + using namespace sycl; + unittest::PiMock Mock; + platform Plt = Mock.getPlatform(); + Mock.redefine( + redefinedPiGetDeviceAndHostTimer); + device Dev = Plt.get_devices()[0]; + context Ctx{Dev}; + queue Queue{Ctx, Dev}; + DeviceTimerCalled = false; + + event E = Queue.submit( + [&](handler &cgh) { cgh.single_task>([]() {}); }); + EXPECT_FALSE(DeviceTimerCalled); +} + +// Checks to see if command submit time is calculated before queue.submit +// returns. A host accessor is contructed before submitting the command, to +// ensure command submission time is calculated even if command may not be +// enqueued due to overlap in data dependencies between the kernel and host +// accessor +TEST(GetProfilingInfo, check_command_submission_time_with_host_accessor) { + using namespace sycl; + unittest::PiMock Mock; + platform Plt = Mock.getPlatform(); + Mock.redefine( + redefinedPiGetDeviceAndHostTimer); + device Dev = Plt.get_devices()[0]; + context Ctx{Dev}; + queue Queue{Ctx, Dev, property::queue::enable_profiling()}; + int data[1024]; + buffer Buf{data, range<1>{1024}}; + DeviceTimerCalled = false; + + accessor host_acc = Buf.get_access(); + event E = Queue.submit([&](handler &cgh) { + accessor writeRes{Buf, cgh, read_write}; + + cgh.single_task>([]() {}); + }); + + EXPECT_TRUE(DeviceTimerCalled); +} + +pi_result redefinedFailedPiGetDeviceAndHostTimer(pi_device Device, + uint64_t *DeviceTime, + uint64_t *HostTime) { + return PI_ERROR_INVALID_OPERATION; +} + +pi_result redefinedPiPluginGetLastError(char **message) { + static char messageString[50] = "Plugin version not supported"; + *message = messageString; + return PI_SUCCESS; +} + +TEST(GetProfilingInfo, submission_time_exception_check) { + using namespace sycl; + unittest::PiMock Mock; + platform Plt = Mock.getPlatform(); + Mock.redefine( + redefinedFailedPiGetDeviceAndHostTimer); + Mock.redefine( + redefinedPiPluginGetLastError); + device Dev = Plt.get_devices()[0]; + context Ctx{Dev}; + queue Queue{Ctx, Dev, property::queue::enable_profiling()}; + + try { + event E = Queue.submit( + [&](handler &cgh) { cgh.single_task>([]() {}); }); + FAIL(); + } catch (sycl::exception &e) { + EXPECT_STREQ( + e.what(), + "Unable to get command group submission time: " + "Device and/or backend does not support querying timestamp: " + "Plugin version not supported -59 (PI_ERROR_INVALID_OPERATION)"); + } +}