diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 4a96d7a3d7fab..4965549e61449 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -134,7 +134,7 @@ class NDRDescT { class HostKernelBase { public: // The method executes lambda stored using NDRange passed. - virtual void call(const NDRDescT &NDRDesc) = 0; + virtual void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) = 0; // Return pointer to the lambda object. // Used to extract captured variables. virtual char *getPtr() = 0; @@ -149,7 +149,7 @@ class HostKernel : public HostKernelBase { public: HostKernel(KernelType Kernel) : MKernel(Kernel) {} - void call(const NDRDescT &NDRDesc) override { + void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override { // adjust ND range for serial host: NDRDescT AdjustedRange; bool Adjust = false; @@ -167,7 +167,11 @@ class HostKernel : public HostKernelBase { Adjust = true; } const NDRDescT &R = Adjust ? AdjustedRange : NDRDesc; + if (HPI) + HPI->start(); runOnHost(R); + if (HPI) + HPI->end(); } char *getPtr() override { return reinterpret_cast(&MKernel); } diff --git a/sycl/include/CL/sycl/detail/event_impl.hpp b/sycl/include/CL/sycl/detail/event_impl.hpp index 148ae108aadda..c402585480138 100644 --- a/sycl/include/CL/sycl/detail/event_impl.hpp +++ b/sycl/include/CL/sycl/detail/event_impl.hpp @@ -21,11 +21,26 @@ class context; namespace detail { class context_impl; using ContextImplPtr = std::shared_ptr; +class queue_impl; + +// Profiling info for the host execution. +class HostProfilingInfo { + cl_ulong StartTime = 0; + cl_ulong EndTime = 0; + +public: + cl_ulong getStartTime() const { return StartTime; } + cl_ulong getEndTime() const { return EndTime; } + + void start(); + void end(); +}; class event_impl { public: event_impl() = default; event_impl(cl_event CLEvent, const context &SyclContext); + event_impl(std::shared_ptr Queue); // Threat all devices that don't support interoperability as host devices to // avoid attempts to call method get on such events. @@ -65,11 +80,16 @@ class event_impl { void setCommand(void *Command) { m_Command = Command; } + HostProfilingInfo *getHostProfilingInfo() { + return m_HostProfilingInfo.get(); + } + private: RT::PiEvent m_Event = nullptr; ContextImplPtr m_Context; bool m_OpenCLInterop = false; bool m_HostEvent = true; + std::unique_ptr m_HostProfilingInfo; void *m_Command = nullptr; }; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 80ad516c721d7..a5f5daa722ac2 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -11,6 +11,8 @@ #include #include +#include + namespace cl { namespace sycl { namespace detail { @@ -81,6 +83,15 @@ event_impl::event_impl(cl_event CLEvent, const context &SyclContext) PI_CALL(RT::piEventRetain(m_Event)); } +event_impl::event_impl(std::shared_ptr Queue) { + if (Queue->is_host() && + Queue->has_property()) { + m_HostProfilingInfo.reset(new HostProfilingInfo()); + if (!m_HostProfilingInfo) + throw runtime_error("Out of host memory"); + } +} + void event_impl::wait( std::shared_ptr Self) const { @@ -110,8 +121,9 @@ event_impl::get_profiling_info() const { return get_event_profiling_info< info::event_profiling::command_submit>::_(this->getHandleRef()); } - assert(!"Not implemented for host device."); - return (cl_ulong)0; + if (!m_HostProfilingInfo) + throw invalid_object_error("Profiling info is not available."); + return m_HostProfilingInfo->getStartTime(); } template <> @@ -121,8 +133,9 @@ event_impl::get_profiling_info() const { return get_event_profiling_info::_( this->getHandleRef()); } - assert(!"Not implemented for host device."); - return (cl_ulong)0; + if (!m_HostProfilingInfo) + throw invalid_object_error("Profiling info is not available."); + return m_HostProfilingInfo->getStartTime(); } template <> @@ -132,8 +145,9 @@ event_impl::get_profiling_info() const { return get_event_profiling_info::_( this->getHandleRef()); } - assert(!"Not implemented for host device."); - return (cl_ulong)0; + if (!m_HostProfilingInfo) + throw invalid_object_error("Profiling info is not available."); + return m_HostProfilingInfo->getEndTime(); } template <> cl_uint event_impl::get_info() const { @@ -141,8 +155,7 @@ template <> cl_uint event_impl::get_info() const { return get_event_info::_( this->getHandleRef()); } - assert(!"Not implemented for host device."); - return (cl_ulong)0; + return 0; } template <> @@ -152,10 +165,18 @@ event_impl::get_info() const { return get_event_info::_( this->getHandleRef()); } - assert(!"Not implemented for host device."); return info::event_command_status::complete; } +static uint64_t getTimestamp() { + auto ts = std::chrono::high_resolution_clock::now().time_since_epoch(); + return std::chrono::duration_cast(ts).count(); +} + +void HostProfilingInfo::start() { StartTime = getTimestamp(); } + +void HostProfilingInfo::end() { EndTime = getTimestamp(); } + } // namespace detail } // namespace sycl } // namespace cl diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 66b8944082144..d673bacdd5628 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -125,7 +125,7 @@ std::vector Command::prepareEvents(ContextImplPtr Context) { Command::Command(CommandType Type, QueueImplPtr Queue, bool UseExclusiveQueue) : MQueue(std::move(Queue)), MUseExclusiveQueue(UseExclusiveQueue), MType(Type), MEnqueued(false) { - MEvent.reset(new detail::event_impl()); + MEvent.reset(new detail::event_impl(MQueue)); MEvent->setCommand(this); MEvent->setContextImpl(detail::getSyclObjImpl(MQueue->get_context())); } @@ -550,7 +550,7 @@ void DispatchNativeKernel(void *Blob) { void **NextArg = (void **)Blob + 1; for (detail::Requirement *Req : HostTask->MRequirements) Req->MData = *(NextArg++); - HostTask->MHostKernel->call(HostTask->MNDRDesc); + HostTask->MHostKernel->call(HostTask->MNDRDesc, nullptr); } cl_int ExecCGCommand::enqueueImp() { @@ -702,7 +702,8 @@ cl_int ExecCGCommand::enqueueImp() { } if (!RawEvents.empty()) PI_CALL(RT::piEventsWait(RawEvents.size(), &RawEvents[0])); - ExecKernel->MHostKernel->call(NDRDesc); + ExecKernel->MHostKernel->call(NDRDesc, + getEvent()->getHostProfilingInfo()); return CL_SUCCESS; } diff --git a/sycl/test/basic_tests/event_profiling_info.cpp b/sycl/test/basic_tests/event_profiling_info.cpp index dd17124f40246..0913391abc312 100644 --- a/sycl/test/basic_tests/event_profiling_info.cpp +++ b/sycl/test/basic_tests/event_profiling_info.cpp @@ -1,7 +1,6 @@ // RUN: %clangxx -fsycl %s -o %t.out // -// Profiling info is not supported on host device so far. -// +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -14,6 +13,7 @@ //===----------------------------------------------------------------------===// #include +#include using namespace cl; @@ -25,7 +25,15 @@ int main() { CGH.single_task([=]() {}); }); - Event.get_profiling_info(); + auto Submit = + Event.get_profiling_info(); + auto Start = + Event.get_profiling_info(); + auto End = + Event.get_profiling_info(); + + assert(Submit <= Start); + assert(Start <= End); bool Fail = sycl::info::event_command_status::complete != Event.get_info();