diff --git a/sycl/include/CL/sycl/detail/memory_manager.hpp b/sycl/include/CL/sycl/detail/memory_manager.hpp index ded30d827db37..dfeb1b1365f6a 100644 --- a/sycl/include/CL/sycl/detail/memory_manager.hpp +++ b/sycl/include/CL/sycl/detail/memory_manager.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -140,18 +141,43 @@ class __SYCL_EXPORT MemoryManager { void *MappedPtr, std::vector DepEvents, RT::PiEvent &OutEvent); + static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len, + void *DstMem, std::vector DepEvents, + RT::PiEvent *OutEvent); + + __SYCL_DEPRECATED("copy_usm() accepting PiEvent& is deprecated, use " + "copy_usm() accepting PiEvent* instead") static void copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len, void *DstMem, std::vector DepEvents, RT::PiEvent &OutEvent); + static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, + int Pattern, std::vector DepEvents, + RT::PiEvent *OutEvent); + + __SYCL_DEPRECATED("fill_usm() accepting PiEvent& is deprecated, use " + "fill_usm() accepting PiEvent* instead") static void fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, int Pattern, std::vector DepEvents, RT::PiEvent &OutEvent); + static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len, + std::vector DepEvents, + RT::PiEvent *OutEvent); + + __SYCL_DEPRECATED("prefetch_usm() accepting PiEvent& is deprecated, use " + "prefetch_usm() accepting PiEvent* instead") static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len, std::vector DepEvents, RT::PiEvent &OutEvent); + static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, + pi_mem_advice Advice, + std::vector DepEvents, + RT::PiEvent *OutEvent); + + __SYCL_DEPRECATED("advise_usm() accepting PiEvent& is deprecated, use " + "advise_usm() accepting PiEvent* instead") static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, pi_mem_advice Advice, std::vector DepEvents, diff --git a/sycl/include/CL/sycl/detail/property_helper.hpp b/sycl/include/CL/sycl/detail/property_helper.hpp index d87e03c53167d..12bc497ee2a70 100644 --- a/sycl/include/CL/sycl/detail/property_helper.hpp +++ b/sycl/include/CL/sycl/detail/property_helper.hpp @@ -33,8 +33,9 @@ enum DataLessPropKind { UsePrimaryContext = 6, InitializeToIdentity = 7, UseDefaultStream = 8, + DiscardEvents = 9, // Indicates the last known dataless property. - LastKnownDataLessPropKind = 8, + LastKnownDataLessPropKind = 9, // Exceeding 32 may cause ABI breaking change on some of OSes. DataLessPropKindSize = 32 }; diff --git a/sycl/include/CL/sycl/feature_test.hpp.in b/sycl/include/CL/sycl/feature_test.hpp.in index 4e719225258b9..4e945b19c601e 100644 --- a/sycl/include/CL/sycl/feature_test.hpp.in +++ b/sycl/include/CL/sycl/feature_test.hpp.in @@ -35,6 +35,7 @@ namespace sycl { #define SYCL_EXT_ONEAPI_MATRIX 2 #endif #define SYCL_EXT_ONEAPI_ASSERT 1 +#define SYCL_EXT_ONEAPI_DISCARD_QUEUE_EVENTS 1 #define SYCL_EXT_ONEAPI_ENQUEUE_BARRIER 1 #define SYCL_EXT_ONEAPI_FREE_FUNCTION_QUERIES 1 #define SYCL_EXT_ONEAPI_GROUP_ALGORITHMS 1 diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 1f0170ef8a455..d9b688b37507b 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -1354,18 +1354,12 @@ class __SYCL_EXPORT handler { /// Registers event dependencies on this command group. /// /// \param Event is a valid SYCL event to wait on. - void depends_on(event Event) { - MEvents.push_back(detail::getSyclObjImpl(Event)); - } + void depends_on(event Event); /// Registers event dependencies on this command group. /// /// \param Events is a vector of valid SYCL events to wait on. - void depends_on(const std::vector &Events) { - for (const event &Event : Events) { - MEvents.push_back(detail::getSyclObjImpl(Event)); - } - } + void depends_on(const std::vector &Events); template using remove_cv_ref_t = diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index f47ff2b53399a..9dd57d5324997 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -287,7 +287,10 @@ enum class event : cl_event_info { enum class event_command_status : cl_int { submitted = CL_SUBMITTED, running = CL_RUNNING, - complete = CL_COMPLETE + complete = CL_COMPLETE, + // Since all BE values are positive, it is safe to use a negative value If you + // add other ext_oneapi values + ext_oneapi_unknown = -1 }; enum class event_profiling : cl_profiling_info { diff --git a/sycl/include/CL/sycl/properties/queue_properties.hpp b/sycl/include/CL/sycl/properties/queue_properties.hpp index ecae0a3cd8062..76a3bfaea9373 100644 --- a/sycl/include/CL/sycl/properties/queue_properties.hpp +++ b/sycl/include/CL/sycl/properties/queue_properties.hpp @@ -23,6 +23,14 @@ class enable_profiling namespace ext { namespace oneapi { + +namespace property { +namespace queue { +class discard_events : public ::cl::sycl::detail::DataLessProperty< + ::cl::sycl::detail::DiscardEvents> {}; +} // namespace queue +} // namespace property + namespace cuda { namespace property { namespace queue { @@ -52,6 +60,9 @@ template <> struct is_property : std::true_type {}; template <> struct is_property : std::true_type {}; template <> +struct is_property + : std::true_type {}; +template <> struct is_property : std::true_type { }; template <> @@ -64,6 +75,9 @@ template <> struct is_property_of : std::true_type {}; template <> +struct is_property_of + : std::true_type {}; +template <> struct is_property_of : std::true_type {}; template <> diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 4026d42c2d978..375ba0e969fd4 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -251,8 +251,6 @@ class __SYCL_EXPORT queue { template event submit(T CGF _CODELOCPARAM(&CodeLoc)) { _CODELOCARG(&CodeLoc); - event Event; - #if __SYCL_USE_FALLBACK_ASSERT if (!is_host()) { auto PostProcess = [this, &CodeLoc](bool IsKernel, bool KernelUsesAssert, @@ -268,14 +266,14 @@ class __SYCL_EXPORT queue { } }; - Event = submit_impl_and_postprocess(CGF, CodeLoc, PostProcess); + auto Event = submit_impl_and_postprocess(CGF, CodeLoc, PostProcess); + return discard_or_return(Event); } else #endif // __SYCL_USE_FALLBACK_ASSERT { - Event = submit_impl(CGF, CodeLoc); + auto Event = submit_impl(CGF, CodeLoc); + return discard_or_return(Event); } - - return Event; } /// Submits a command group function object to the queue, in order to be @@ -293,8 +291,6 @@ class __SYCL_EXPORT queue { event submit(T CGF, queue &SecondaryQueue _CODELOCPARAM(&CodeLoc)) { _CODELOCARG(&CodeLoc); - event Event; - #if __SYCL_USE_FALLBACK_ASSERT if (!is_host()) { auto PostProcess = [this, &SecondaryQueue, &CodeLoc]( @@ -315,15 +311,15 @@ class __SYCL_EXPORT queue { } }; - Event = submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc, - PostProcess); + auto Event = submit_impl_and_postprocess(CGF, SecondaryQueue, CodeLoc, + PostProcess); + return discard_or_return(Event); } else #endif // __SYCL_USE_FALLBACK_ASSERT { - Event = submit_impl(CGF, SecondaryQueue, CodeLoc); + auto Event = submit_impl(CGF, SecondaryQueue, CodeLoc); + return discard_or_return(Event); } - - return Event; } /// Prevents any commands submitted afterward to this queue from executing @@ -1089,6 +1085,10 @@ class __SYCL_EXPORT queue { event submit_impl(std::function CGH, queue secondQueue, const detail::code_location &CodeLoc); + /// Checks if the event needs to be discarded and if so, discards it and + /// returns a discarded event. Otherwise, it returns input event. + event discard_or_return(const event &Event); + // Function to postprocess submitted command // Arguments: // bool IsKernel - true if the submitted command was kernel, false otherwise diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 8a4c1138e84df..4bafe6a55f38b 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -56,6 +56,11 @@ void event_impl::waitInternal() const { return; } + if (MState == HES_Discarded) + throw sycl::exception( + make_error_code(errc::invalid), + "waitInternal method cannot be used for a discarded event."); + while (MState != HES_Complete) ; } @@ -93,7 +98,8 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { MState = HES_NotComplete; } -event_impl::event_impl() : MIsFlushed(true), MState(HES_Complete) {} +event_impl::event_impl(HostEventState State) + : MIsFlushed(true), MState(State) {} event_impl::event_impl(RT::PiEvent Event, const context &SyclContext) : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), @@ -188,6 +194,10 @@ void event_impl::instrumentationEpilog(void *TelemetryEvent, void event_impl::wait( std::shared_ptr Self) const { + if (MState == HES_Discarded) + throw sycl::exception(make_error_code(errc::invalid), + "wait method cannot be used for a discarded event."); + #ifdef XPTI_ENABLE_INSTRUMENTATION void *TelemetryEvent = nullptr; uint64_t IId; @@ -304,6 +314,9 @@ template <> cl_uint event_impl::get_info() const { template <> info::event_command_status event_impl::get_info() const { + if (MState == HES_Discarded) + return info::event_command_status::ext_oneapi_unknown; + if (!MHostEvent && MEvent) { return get_event_info::get( this->getHandleRef(), this->getPlugin()); @@ -333,6 +346,11 @@ pi_native_handle event_impl::getNative() const { } std::vector event_impl::getWaitList() { + if (MState == HES_Discarded) + throw sycl::exception( + make_error_code(errc::invalid), + "get_wait_list() cannot be used for a discarded event."); + std::lock_guard Lock(MMutex); std::vector Result; diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 7255e5ccdfe5f..fb8214ddf6b8d 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -32,10 +32,16 @@ using EventImplPtr = std::shared_ptr; class event_impl { public: + enum HostEventState : int { + HES_NotComplete = 0, + HES_Complete, + HES_Discarded + }; + /// Constructs a ready SYCL event. /// /// If the constructed SYCL event is waited on it will complete immediately. - event_impl(); + event_impl(HostEventState State = HES_Complete); /// Constructs an event instance from a plug-in event handle. /// /// The SyclContext must match the plug-in context associated with the @@ -190,6 +196,11 @@ class event_impl { /// Cleans dependencies of this event_impl void cleanupDependencyEvents(); + /// Checks if this event is discarded by SYCL implementation. + /// + /// \return true if this event is discarded. + bool isDiscarded() const { return MState == HES_Discarded; } + private: // When instrumentation is enabled emits trace event for event wait begin and // returns the telemetry event generated for the wait @@ -215,8 +226,6 @@ class event_impl { /// the queue to the device. std::atomic MIsFlushed = false; - enum HostEventState : int { HES_NotComplete = 0, HES_Complete }; - // State of host event. Employed only for host events and event with no // backend's representation (e.g. alloca). Used values are listed in // HostEventState enum. diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index da1c0acebdcc6..67c308ba4c8ff 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -647,14 +647,14 @@ void MemoryManager::unmap(SYCLMemObjI *, void *Mem, QueueImplPtr Queue, void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, size_t Len, void *DstMem, std::vector DepEvents, - RT::PiEvent &OutEvent) { + RT::PiEvent *OutEvent) { sycl::context Context = SrcQueue->get_context(); if (!Len) { // no-op, but ensure DepEvents will still be waited on if (!Context.is_host() && !DepEvents.empty()) { SrcQueue->getPlugin().call( SrcQueue->getHandleRef(), DepEvents.size(), DepEvents.data(), - &OutEvent); + OutEvent); } return; } @@ -670,19 +670,19 @@ void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr SrcQueue, Plugin.call(SrcQueue->getHandleRef(), /* blocking */ false, DstMem, SrcMem, Len, DepEvents.size(), - DepEvents.data(), &OutEvent); + DepEvents.data(), OutEvent); } } void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, int Pattern, std::vector DepEvents, - RT::PiEvent &OutEvent) { + RT::PiEvent *OutEvent) { sycl::context Context = Queue->get_context(); if (!Length) { // no-op, but ensure DepEvents will still be waited on if (!Context.is_host() && !DepEvents.empty()) { Queue->getPlugin().call( - Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), &OutEvent); + Queue->getHandleRef(), DepEvents.size(), DepEvents.data(), OutEvent); } return; } @@ -697,13 +697,13 @@ void MemoryManager::fill_usm(void *Mem, QueueImplPtr Queue, size_t Length, const detail::plugin &Plugin = Queue->getPlugin(); Plugin.call( Queue->getHandleRef(), Mem, Pattern, Length, DepEvents.size(), - DepEvents.data(), &OutEvent); + DepEvents.data(), OutEvent); } } void MemoryManager::prefetch_usm(void *Mem, QueueImplPtr Queue, size_t Length, std::vector DepEvents, - RT::PiEvent &OutEvent) { + RT::PiEvent *OutEvent) { sycl::context Context = Queue->get_context(); if (Context.is_host()) { @@ -712,23 +712,52 @@ void MemoryManager::prefetch_usm(void *Mem, QueueImplPtr Queue, size_t Length, const detail::plugin &Plugin = Queue->getPlugin(); Plugin.call( Queue->getHandleRef(), Mem, Length, _pi_usm_migration_flags(0), - DepEvents.size(), DepEvents.data(), &OutEvent); + DepEvents.size(), DepEvents.data(), OutEvent); } } void MemoryManager::advise_usm(const void *Mem, QueueImplPtr Queue, size_t Length, pi_mem_advice Advice, std::vector /*DepEvents*/, - RT::PiEvent &OutEvent) { + RT::PiEvent *OutEvent) { sycl::context Context = Queue->get_context(); if (!Context.is_host()) { const detail::plugin &Plugin = Queue->getPlugin(); Plugin.call(Queue->getHandleRef(), Mem, - Length, Advice, &OutEvent); + Length, Advice, OutEvent); } } +// TODO: Delete this function when ABI breaking changes are allowed. +void MemoryManager::copy_usm(const void *SrcMem, QueueImplPtr Queue, size_t Len, + void *DstMem, std::vector DepEvents, + RT::PiEvent &OutEvent) { + copy_usm(SrcMem, Queue, Len, DstMem, DepEvents, &OutEvent); +} + +// TODO: Delete this function when ABI breaking changes are allowed. +void MemoryManager::fill_usm(void *DstMem, QueueImplPtr Queue, size_t Len, + int Pattern, std::vector DepEvents, + RT::PiEvent &OutEvent) { + fill_usm(DstMem, Queue, Len, Pattern, DepEvents, &OutEvent); +} + +// TODO: Delete this function when ABI breaking changes are allowed. +void MemoryManager::prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len, + std::vector DepEvents, + RT::PiEvent &OutEvent) { + prefetch_usm(Ptr, Queue, Len, DepEvents, &OutEvent); +} + +// TODO: Delete this function when ABI breaking changes are allowed. +void MemoryManager::advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, + pi_mem_advice Advice, + std::vector DepEvents, + RT::PiEvent &OutEvent) { + advise_usm(Ptr, Queue, Len, Advice, DepEvents, &OutEvent); +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 072007d107d97..c67645ec36c57 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -51,15 +51,26 @@ prepareUSMEvent(const std::shared_ptr &QueueImpl, return detail::createSyclObjFromImpl(EventImpl); } +static event createDiscardedEvent() { + EventImplPtr EventImpl = + std::make_shared(event_impl::HES_Discarded); + return createSyclObjFromImpl(EventImpl); +} + event queue_impl::memset(const std::shared_ptr &Self, void *Ptr, int Value, size_t Count, const std::vector &DepEvents) { + if (MHasDiscardEventsSupport) { + MemoryManager::fill_usm(Ptr, Self, Count, Value, + getOrWaitEvents(DepEvents, MContext), nullptr); + return createDiscardedEvent(); + } RT::PiEvent NativeEvent{}; MemoryManager::fill_usm(Ptr, Self, Count, Value, - getOrWaitEvents(DepEvents, MContext), NativeEvent); + getOrWaitEvents(DepEvents, MContext), &NativeEvent); if (MContext->is_host()) - return event(); + return MDiscardEvents ? createDiscardedEvent() : event(); event ResEvent = prepareUSMEvent(Self, NativeEvent); // Track only if we won't be able to handle it with piQueueFinish. @@ -68,18 +79,23 @@ event queue_impl::memset(const std::shared_ptr &Self, if (!MSupportOOO || getPlugin().getBackend() == backend::ext_oneapi_level_zero) addSharedEvent(ResEvent); - return ResEvent; + return MDiscardEvents ? createDiscardedEvent() : ResEvent; } event queue_impl::memcpy(const std::shared_ptr &Self, void *Dest, const void *Src, size_t Count, const std::vector &DepEvents) { + if (MHasDiscardEventsSupport) { + MemoryManager::copy_usm(Src, Self, Count, Dest, + getOrWaitEvents(DepEvents, MContext), nullptr); + return createDiscardedEvent(); + } RT::PiEvent NativeEvent{}; MemoryManager::copy_usm(Src, Self, Count, Dest, - getOrWaitEvents(DepEvents, MContext), NativeEvent); + getOrWaitEvents(DepEvents, MContext), &NativeEvent); if (MContext->is_host()) - return event(); + return MDiscardEvents ? createDiscardedEvent() : event(); event ResEvent = prepareUSMEvent(Self, NativeEvent); // Track only if we won't be able to handle it with piQueueFinish. @@ -88,19 +104,24 @@ event queue_impl::memcpy(const std::shared_ptr &Self, if (!MSupportOOO || getPlugin().getBackend() == backend::ext_oneapi_level_zero) addSharedEvent(ResEvent); - return ResEvent; + return MDiscardEvents ? createDiscardedEvent() : ResEvent; } event queue_impl::mem_advise(const std::shared_ptr &Self, const void *Ptr, size_t Length, pi_mem_advice Advice, const std::vector &DepEvents) { + if (MHasDiscardEventsSupport) { + MemoryManager::advise_usm(Ptr, Self, Length, Advice, + getOrWaitEvents(DepEvents, MContext), nullptr); + return createDiscardedEvent(); + } RT::PiEvent NativeEvent{}; MemoryManager::advise_usm(Ptr, Self, Length, Advice, - getOrWaitEvents(DepEvents, MContext), NativeEvent); + getOrWaitEvents(DepEvents, MContext), &NativeEvent); if (MContext->is_host()) - return event(); + return MDiscardEvents ? createDiscardedEvent() : event(); event ResEvent = prepareUSMEvent(Self, NativeEvent); // Track only if we won't be able to handle it with piQueueFinish. @@ -109,7 +130,7 @@ event queue_impl::mem_advise(const std::shared_ptr &Self, if (!MSupportOOO || getPlugin().getBackend() == backend::ext_oneapi_level_zero) addSharedEvent(ResEvent); - return ResEvent; + return MDiscardEvents ? createDiscardedEvent() : ResEvent; } void queue_impl::addEvent(const event &Event) { diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 3a95ef495e267..a2d24846ba555 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -94,7 +94,20 @@ class queue_impl { : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler), MPropList(PropList), MHostQueue(MDevice->is_host()), MAssertHappenedBuffer(range<1>{1}), - MIsInorder(has_property()) { + MIsInorder(has_property()), + MDiscardEvents( + has_property()), + MHasDiscardEventsSupport( + MDiscardEvents && + (MHostQueue ? true + : (MIsInorder && getPlugin().getBackend() != + backend::ext_oneapi_level_zero))) { + if (has_property() && + has_property()) { + throw sycl::exception(make_error_code(errc::invalid), + "Queue cannot be constructed with both of " + "discard_events and enable_profiling."); + } if (!Context->hasDevice(Device)) throw cl::sycl::invalid_parameter_error( "Queue cannot be constructed with the given context and device " @@ -119,7 +132,20 @@ class queue_impl { const async_handler &AsyncHandler) : MContext(Context), MAsyncHandler(AsyncHandler), MPropList(), MHostQueue(false), MAssertHappenedBuffer(range<1>{1}), - MIsInorder(has_property()) { + MIsInorder(has_property()), + MDiscardEvents( + has_property()), + MHasDiscardEventsSupport( + MDiscardEvents && + (MHostQueue ? true + : (MIsInorder && getPlugin().getBackend() != + backend::ext_oneapi_level_zero))) { + if (has_property() && + has_property()) { + throw sycl::exception(make_error_code(errc::invalid), + "Queue cannot be constructed with both of " + "discard_events and enable_profiling."); + } MQueues.push_back(pi::cast(PiQueue)); @@ -167,6 +193,9 @@ class queue_impl { /// \return true if this queue is a SYCL host queue. bool is_host() const { return MHostQueue; } + /// \return true if this queue has discard_events support. + bool has_discard_events_support() const { return MHasDiscardEventsSupport; } + /// Queries SYCL queue for information. /// /// The return type depends on information being queried. @@ -411,9 +440,12 @@ class queue_impl { } private: - void finalizeHandler(handler &Handler, bool NeedSeparateDependencyMgmt, + void finalizeHandler(handler &Handler, const CG::CGTYPE &Type, event &EventRet) { if (MIsInorder) { + bool NeedSeparateDependencyMgmt = + (Type == CG::CGTYPE::CodeplayHostTask || + Type == CG::CGTYPE::CodeplayInteropTask); // Accessing and changing of an event isn't atomic operation. // Hence, here is the lock for thread-safety. std::lock_guard Lock{MLastEventMtx}; @@ -452,10 +484,6 @@ class queue_impl { // Host and interop tasks, however, are not submitted to low-level runtimes // and require separate dependency management. const CG::CGTYPE Type = Handler.getType(); - bool NeedSeparateDependencyMgmt = - MIsInorder && (Type == CG::CGTYPE::CodeplayHostTask || - Type == CG::CGTYPE::CodeplayInteropTask); - event Event; if (PostProcess) { @@ -468,11 +496,11 @@ class queue_impl { ProgramManager::getInstance().kernelUsesAssert( Handler.MOSModuleHandle, Handler.MKernelName); - finalizeHandler(Handler, NeedSeparateDependencyMgmt, Event); + finalizeHandler(Handler, Type, Event); (*PostProcess)(IsKernel, KernelUsesAssert, Event); } else - finalizeHandler(Handler, NeedSeparateDependencyMgmt, Event); + finalizeHandler(Handler, Type, Event); addEvent(Event); return Event; @@ -534,6 +562,18 @@ class queue_impl { std::mutex MLastEventMtx; const bool MIsInorder; + +public: + // Queue constructed with the discard_events property + const bool MDiscardEvents; + +private: + // This flag says if we can discard events based on a queue "setup" which will + // be common for all operations submitted to the queue. This is a must + // condition for discarding, but even if it's true, in some cases, we won't be + // able to discard events, because the final decision is made right before the + // operation itself. + const bool MHasDiscardEventsSupport; }; } // namespace detail diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 6724608227699..7bc32896e0280 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1757,7 +1757,7 @@ static pi_result SetKernelParamsAndLaunch( const QueueImplPtr &Queue, std::vector &Args, const std::shared_ptr &DeviceImageImpl, RT::PiKernel Kernel, NDRDescT &NDRDesc, std::vector &RawEvents, - const EventImplPtr &EventImpl, + RT::PiEvent *OutEvent, const ProgramManager::KernelArgMask &EliminatedArgMask, const std::function &getMemAllocationFunc) { const detail::plugin &Plugin = Queue->getPlugin(); @@ -1880,8 +1880,7 @@ static pi_result SetKernelParamsAndLaunch( pi_result Error = Plugin.call_nocheck( Queue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], LocalSize, RawEvents.size(), - RawEvents.empty() ? nullptr : &RawEvents[0], - (EventImpl ? &EventImpl->getHandleRef() : nullptr)); + RawEvents.empty() ? nullptr : &RawEvents[0], OutEvent); return Error; } @@ -1910,7 +1909,7 @@ cl_int enqueueImpKernel( const std::shared_ptr &KernelBundleImplPtr, const std::shared_ptr &MSyclKernel, const std::string &KernelName, const detail::OSModuleHandle &OSModuleHandle, - std::vector &RawEvents, const EventImplPtr &EventImpl, + std::vector &RawEvents, RT::PiEvent *OutEvent, const std::function &getMemAllocationFunc) { // Run OpenCL kernel @@ -1977,11 +1976,11 @@ cl_int enqueueImpKernel( // For cacheable kernels, we use per-kernel mutex std::lock_guard Lock(*KernelMutex); Error = SetKernelParamsAndLaunch(Queue, Args, DeviceImageImpl, Kernel, - NDRDesc, RawEvents, EventImpl, + NDRDesc, RawEvents, OutEvent, EliminatedArgMask, getMemAllocationFunc); } else { Error = SetKernelParamsAndLaunch(Queue, Args, DeviceImageImpl, Kernel, - NDRDesc, RawEvents, EventImpl, + NDRDesc, RawEvents, OutEvent, EliminatedArgMask, getMemAllocationFunc); } @@ -2003,8 +2002,10 @@ cl_int ExecCGCommand::enqueueImp() { auto RawEvents = getPiEvents(EventImpls); flushCrossQueueDeps(EventImpls, getWorkerQueue()); - RT::PiEvent &Event = MEvent->getHandleRef(); - + RT::PiEvent *Event = (MQueue->has_discard_events_support() && + MCommandGroup->MRequirements.size() == 0) + ? nullptr + : &MEvent->getHandleRef(); switch (MCommandGroup->getType()) { case CG::CGTYPE::UpdateHost: { @@ -2022,7 +2023,7 @@ cl_int ExecCGCommand::enqueueImp() { Req->MElemSize, Copy->getDst(), Scheduler::getInstance().getDefaultHostQueue(), Req->MDims, Req->MAccessRange, Req->MAccessRange, /*DstOffset=*/{0, 0, 0}, - Req->MElemSize, std::move(RawEvents), Event); + Req->MElemSize, std::move(RawEvents), MEvent->getHandleRef()); return CL_SUCCESS; } @@ -2033,13 +2034,13 @@ cl_int ExecCGCommand::enqueueImp() { Scheduler::getInstance().getDefaultHostQueue(); - MemoryManager::copy(AllocaCmd->getSYCLMemObj(), Copy->getSrc(), - Scheduler::getInstance().getDefaultHostQueue(), - Req->MDims, Req->MAccessRange, Req->MAccessRange, - /*SrcOffset*/ {0, 0, 0}, Req->MElemSize, - AllocaCmd->getMemAllocation(), MQueue, Req->MDims, - Req->MMemoryRange, Req->MAccessRange, Req->MOffset, - Req->MElemSize, std::move(RawEvents), Event); + MemoryManager::copy( + AllocaCmd->getSYCLMemObj(), Copy->getSrc(), + Scheduler::getInstance().getDefaultHostQueue(), Req->MDims, + Req->MAccessRange, Req->MAccessRange, + /*SrcOffset*/ {0, 0, 0}, Req->MElemSize, AllocaCmd->getMemAllocation(), + MQueue, Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, + Req->MElemSize, std::move(RawEvents), MEvent->getHandleRef()); return CL_SUCCESS; } @@ -2056,7 +2057,8 @@ cl_int ExecCGCommand::enqueueImp() { ReqSrc->MDims, ReqSrc->MMemoryRange, ReqSrc->MAccessRange, ReqSrc->MOffset, ReqSrc->MElemSize, AllocaCmdDst->getMemAllocation(), MQueue, ReqDst->MDims, ReqDst->MMemoryRange, ReqDst->MAccessRange, - ReqDst->MOffset, ReqDst->MElemSize, std::move(RawEvents), Event); + ReqDst->MOffset, ReqDst->MElemSize, std::move(RawEvents), + MEvent->getHandleRef()); return CL_SUCCESS; } @@ -2069,7 +2071,7 @@ cl_int ExecCGCommand::enqueueImp() { AllocaCmd->getSYCLMemObj(), AllocaCmd->getMemAllocation(), MQueue, Fill->MPattern.size(), Fill->MPattern.data(), Req->MDims, Req->MMemoryRange, Req->MAccessRange, Req->MOffset, Req->MElemSize, - std::move(RawEvents), Event); + std::move(RawEvents), MEvent->getHandleRef()); return CL_SUCCESS; } @@ -2131,7 +2133,7 @@ cl_int ExecCGCommand::enqueueImp() { MQueue->getHandleRef(), DispatchNativeKernel, (void *)ArgsBlob.data(), ArgsBlob.size() * sizeof(ArgsBlob[0]), Buffers.size(), Buffers.data(), const_cast(MemLocs.data()), RawEvents.size(), - RawEvents.empty() ? nullptr : RawEvents.data(), &Event); + RawEvents.empty() ? nullptr : RawEvents.data(), Event); switch (Error) { case PI_INVALID_OPERATION: @@ -2185,10 +2187,24 @@ cl_int ExecCGCommand::enqueueImp() { return AllocaCmd->getMemAllocation(); }; + const std::shared_ptr &SyclKernel = + ExecKernel->MSyclKernel; + const std::string &KernelName = ExecKernel->MKernelName; + const detail::OSModuleHandle &OSModuleHandle = ExecKernel->MOSModuleHandle; + + if (!Event) { + // Kernel only uses assert if it's non interop one + bool KernelUsesAssert = !(SyclKernel && SyclKernel->isInterop()) && + ProgramManager::getInstance().kernelUsesAssert( + OSModuleHandle, KernelName); + if (KernelUsesAssert) { + Event = &MEvent->getHandleRef(); + } + } + return enqueueImpKernel( - MQueue, NDRDesc, Args, ExecKernel->getKernelBundle(), - ExecKernel->MSyclKernel, ExecKernel->MKernelName, - ExecKernel->MOSModuleHandle, RawEvents, MEvent, getMemAllocationFunc); + MQueue, NDRDesc, Args, ExecKernel->getKernelBundle(), SyclKernel, + KernelName, OSModuleHandle, RawEvents, Event, getMemAllocationFunc); } case CG::CGTYPE::CopyUSM: { CGCopyUSM *Copy = (CGCopyUSM *)MCommandGroup.get(); @@ -2243,7 +2259,7 @@ cl_int ExecCGCommand::enqueueImp() { interop_handler InteropHandler(std::move(ReqMemObjs), MQueue); ExecInterop->MInteropTask->call(InteropHandler); Plugin.call(MQueue->getHandleRef(), 0, - nullptr, &Event); + nullptr, Event); return CL_SUCCESS; } @@ -2309,7 +2325,7 @@ cl_int ExecCGCommand::enqueueImp() { } const detail::plugin &Plugin = MQueue->getPlugin(); Plugin.call( - MQueue->getHandleRef(), 0, nullptr, &Event); + MQueue->getHandleRef(), 0, nullptr, Event); return PI_SUCCESS; } @@ -2324,7 +2340,7 @@ cl_int ExecCGCommand::enqueueImp() { } const detail::plugin &Plugin = MQueue->getPlugin(); Plugin.call( - MQueue->getHandleRef(), PiEvents.size(), &PiEvents[0], &Event); + MQueue->getHandleRef(), PiEvents.size(), &PiEvents[0], Event); return PI_SUCCESS; } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 4a556f8a5567e..e90ff2422d3bc 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -508,7 +508,7 @@ cl_int enqueueImpKernel( const std::shared_ptr &KernelBundleImplPtr, const std::shared_ptr &MSyclKernel, const std::string &KernelName, const detail::OSModuleHandle &OSModuleHandle, - std::vector &RawEvents, const EventImplPtr &EventImpl, + std::vector &RawEvents, RT::PiEvent *OutEvent, const std::function &getMemAllocationFunc); /// The exec CG command enqueues execution of kernel or explicit memory diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index e1c27d2650898..4dc70eaef76b2 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -198,25 +198,45 @@ event handler::finalize() { // bypassing scheduler and avoiding CommandGroup, Command objects creation. std::vector RawEvents; - detail::EventImplPtr NewEvent = - std::make_shared(MQueue); - NewEvent->setContextImpl(MQueue->getContextImplPtr()); + detail::EventImplPtr NewEvent; + RT::PiEvent *OutEvent = nullptr; + + auto EnqueueKernel = [&]() { + if (MQueue->is_host()) { + MHostKernel->call( + MNDRDesc, (NewEvent) ? NewEvent->getHostProfilingInfo() : nullptr); + return CL_SUCCESS; + } + return enqueueImpKernel(MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, + MKernel, MKernelName, MOSModuleHandle, RawEvents, + OutEvent, nullptr); + }; + + bool DiscardEvent = false; + if (MQueue->has_discard_events_support()) { + // Kernel only uses assert if it's non interop one + bool KernelUsesAssert = + !(MKernel && MKernel->isInterop()) && + detail::ProgramManager::getInstance().kernelUsesAssert( + MOSModuleHandle, MKernelName); + DiscardEvent = !KernelUsesAssert; + } - cl_int Res = CL_SUCCESS; - if (MQueue->is_host()) { - MHostKernel->call(MNDRDesc, NewEvent->getHostProfilingInfo()); + if (DiscardEvent) { + if (CL_SUCCESS != EnqueueKernel()) + throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); } else { - Res = enqueueImpKernel(MQueue, MNDRDesc, MArgs, KernelBundleImpPtr, - MKernel, MKernelName, MOSModuleHandle, RawEvents, - NewEvent, nullptr); - } + NewEvent = std::make_shared(MQueue); + NewEvent->setContextImpl(MQueue->getContextImplPtr()); + OutEvent = &NewEvent->getHandleRef(); - if (CL_SUCCESS != Res) - throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); - else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) - NewEvent->setComplete(); + if (CL_SUCCESS != EnqueueKernel()) + throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); + else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) + NewEvent->setComplete(); - MLastEvent = detail::createSyclObjFromImpl(NewEvent); + MLastEvent = detail::createSyclObjFromImpl(NewEvent); + } return MLastEvent; } @@ -692,5 +712,26 @@ void handler::use_kernel_bundle( setHandlerKernelBundle(detail::getSyclObjImpl(ExecBundle)); } +void handler::depends_on(event Event) { + auto EventImpl = detail::getSyclObjImpl(Event); + if (EventImpl->isDiscarded()) { + throw sycl::exception(make_error_code(errc::invalid), + "Queue operation cannot depend on discarded event."); + } + MEvents.push_back(EventImpl); +} + +void handler::depends_on(const std::vector &Events) { + for (const event &Event : Events) { + auto EventImpl = detail::getSyclObjImpl(Event); + if (EventImpl->isDiscarded()) { + throw sycl::exception( + make_error_code(errc::invalid), + "Queue operation cannot depend on discarded event."); + } + MEvents.push_back(EventImpl); + } +} + } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 03c3d00607830..46ebc1d3a4d15 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -122,6 +123,15 @@ event queue::mem_advise(const void *Ptr, size_t Length, int Advice, return impl->mem_advise(impl, Ptr, Length, pi_mem_advice(Advice), DepEvents); } +event queue::discard_or_return(const event &Event) { + if (impl->MDiscardEvents) { + using detail::event_impl; + auto Impl = std::make_shared(event_impl::HES_Discarded); + return detail::createSyclObjFromImpl(Impl); + } + return Event; +} + event queue::submit_impl(std::function CGH, const detail::code_location &CodeLoc) { return impl->submit(CGH, impl, CodeLoc); diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 4d8a377088db6..641e775ecd7a1 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3694,6 +3694,7 @@ _ZN2cl4sycl5queue10mem_adviseEPKvmiRKSt6vectorINS0_5eventESaIS5_EE _ZN2cl4sycl5queue10wait_proxyERKNS0_6detail13code_locationE _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationE +_ZN2cl4sycl5queue17discard_or_returnERKNS0_5eventE _ZN2cl4sycl5queue18throw_asynchronousEv _ZN2cl4sycl5queue20wait_and_throw_proxyERKNS0_6detail13code_locationE _ZN2cl4sycl5queue23getAssertHappenedBufferEv @@ -3821,7 +3822,9 @@ _ZN2cl4sycl6detail12sampler_implC2EP11_cl_samplerRKNS0_7contextE _ZN2cl4sycl6detail12sampler_implD1Ev _ZN2cl4sycl6detail12sampler_implD2Ev _ZN2cl4sycl6detail12split_stringERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEc +_ZN2cl4sycl6detail13MemoryManager10advise_usmEPKvSt10shared_ptrINS1_10queue_implEEm14_pi_mem_adviceSt6vectorIP9_pi_eventSaISB_EEPSB_ _ZN2cl4sycl6detail13MemoryManager10advise_usmEPKvSt10shared_ptrINS1_10queue_implEEm14_pi_mem_adviceSt6vectorIP9_pi_eventSaISB_EERSB_ +_ZN2cl4sycl6detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EEPS9_ _ZN2cl4sycl6detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EERS9_ _ZN2cl4sycl6detail13MemoryManager13releaseMemObjESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvS8_ _ZN2cl4sycl6detail13MemoryManager16allocateMemImageESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRK14_pi_image_descRK16_pi_image_formatRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event @@ -3839,7 +3842,9 @@ _ZN2cl4sycl6detail13MemoryManager4fillEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_1 _ZN2cl4sycl6detail13MemoryManager5unmapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEES5_St6vectorIP9_pi_eventSaISB_EERSB_ _ZN2cl4sycl6detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event _ZN2cl4sycl6detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event +_ZN2cl4sycl6detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EEPSB_ _ZN2cl4sycl6detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EERSB_ +_ZN2cl4sycl6detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EEPS9_ _ZN2cl4sycl6detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EERS9_ _ZN2cl4sycl6detail13make_platformEmNS0_7backendE _ZN2cl4sycl6detail14getBorderColorENS0_19image_channel_orderE @@ -3937,6 +3942,8 @@ _ZN2cl4sycl7contextC2ERKSt6vectorINS0_6deviceESaIS3_EERKNS0_13property_listE _ZN2cl4sycl7contextC2ERKSt6vectorINS0_6deviceESaIS3_EESt8functionIFvNS0_14exception_listEEERKNS0_13property_listE _ZN2cl4sycl7contextC2ERKSt8functionIFvNS0_14exception_listEEERKNS0_13property_listE _ZN2cl4sycl7contextC2ESt10shared_ptrINS0_6detail12context_implEE +_ZN2cl4sycl7handler10depends_onERKSt6vectorINS0_5eventESaIS3_EE +_ZN2cl4sycl7handler10depends_onENS0_5eventE _ZN2cl4sycl7handler10mem_adviseEPKvmi _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmb _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmbb diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 3d4a525f89651..c4fdfa0ac79d3 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1057,6 +1057,7 @@ ?addReduction@handler@sycl@cl@@AEAAXAEBV?$shared_ptr@$$CBX@std@@@Z ?addStream@handler@sycl@cl@@AEAAXAEBV?$shared_ptr@Vstream_impl@detail@sycl@cl@@@std@@@Z ?advise_usm@MemoryManager@detail@sycl@cl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KW4_pi_mem_advice@@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@AEAPEAU_pi_event@@@Z +?advise_usm@MemoryManager@detail@sycl@cl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KW4_pi_mem_advice@@V?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?alignedAlloc@OSUtil@detail@sycl@cl@@SAPEAX_K0@Z ?alignedFree@OSUtil@detail@sycl@cl@@SAXPEAX@Z ?aligned_alloc@sycl@cl@@YAPEAX_K0AEBVdevice@12@AEBVcontext@12@W4alloc@usm@12@@Z @@ -1462,6 +1463,7 @@ ?convertChannelType@detail@sycl@cl@@YA?AW4image_channel_type@23@W4_pi_image_channel_type@@@Z ?copy@MemoryManager@detail@sycl@cl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@IV?$range@$02@34@3V?$id@$02@34@I12I334IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z ?copy_usm@MemoryManager@detail@sycl@cl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KPEAXV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@AEAPEAU_pi_event@@@Z +?copy_usm@MemoryManager@detail@sycl@cl@@SAXPEBXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KPEAXV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?copysign@__host_std@cl@@YA?AV?$vec@M$00@sycl@2@V342@0@Z ?copysign@__host_std@cl@@YA?AV?$vec@M$01@sycl@2@V342@0@Z ?copysign@__host_std@cl@@YA?AV?$vec@M$02@sycl@2@V342@0@Z @@ -1634,6 +1636,7 @@ ?determineHostPtr@SYCLMemObjT@detail@sycl@cl@@IEAAXAEBV?$shared_ptr@Vcontext_impl@detail@sycl@cl@@@std@@_NAEAPEAXAEA_N@Z ?device_has@queue@sycl@cl@@QEBA_NW4aspect@23@@Z ?die@pi@detail@sycl@cl@@YAXPEBD@Z +?discard_or_return@queue@sycl@cl@@AEAA?AVevent@23@AEBV423@@Z ?distance@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V34562@0@Z ?distance@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$01@62@0@Z ?distance@__host_std@cl@@YA?AVhalf@half_impl@detail@sycl@2@V?$vec@Vhalf@half_impl@detail@sycl@cl@@$02@62@0@Z @@ -1867,6 +1870,7 @@ ?fdim@__host_std@cl@@YANNN@Z ?fill@MemoryManager@detail@sycl@cl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z ?fill_usm@MemoryManager@detail@sycl@cl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@AEAPEAU_pi_event@@@Z +?fill_usm@MemoryManager@detail@sycl@cl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?finalize@handler@sycl@cl@@AEAA?AVevent@23@XZ ?getESIMDDeviceInterface@detail@sycl@cl@@YAPEAUESIMDDeviceInterface@123@XZ ?find_device_intersection@detail@sycl@cl@@YA?AV?$vector@Vdevice@sycl@cl@@V?$allocator@Vdevice@sycl@cl@@@std@@@std@@AEBV?$vector@V?$kernel_bundle@$00@sycl@cl@@V?$allocator@V?$kernel_bundle@$00@sycl@cl@@@std@@@5@@Z @@ -3027,6 +3031,7 @@ ?prefetch@queue@sycl@cl@@QEAA?AVevent@23@PEBX_KAEBV?$vector@Vevent@sycl@cl@@V?$allocator@Vevent@sycl@cl@@@std@@@std@@@Z ?prefetch@queue@sycl@cl@@QEAA?AVevent@23@PEBX_KV423@@Z ?prefetch_usm@MemoryManager@detail@sycl@cl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@AEAPEAU_pi_event@@@Z +?prefetch_usm@MemoryManager@detail@sycl@cl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@PEAPEAU_pi_event@@@Z ?processArg@handler@sycl@cl@@AEAAXPEAXAEBW4kernel_param_kind_t@detail@23@H_KAEA_K_N4@Z ?processArg@handler@sycl@cl@@AEAAXPEAXAEBW4kernel_param_kind_t@detail@23@H_KAEA_K_N@Z ?radians@__host_std@cl@@YA?AV?$vec@M$00@sycl@2@V342@@Z