From 89fc65c52b61163092f123aa24be1e4534e37137 Mon Sep 17 00:00:00 2001 From: Vlad Romanov Date: Wed, 29 Jul 2020 17:32:10 +0300 Subject: [PATCH 1/4] [SYCL] Refactoring of queue classes 1. Aligned variables names 2. Replaced "take by value" to "take by reference" in several functions 3. Reduced scope of locks 4. Always use vector of queues instead of accessing one dedicated queue. --- sycl/include/CL/sycl/queue.hpp | 18 ++-- sycl/source/detail/queue_impl.cpp | 87 ++++++++-------- sycl/source/detail/queue_impl.hpp | 160 ++++++++++++++---------------- sycl/source/queue.cpp | 84 ++++++++-------- 4 files changed, 172 insertions(+), 177 deletions(-) diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index 19ce5f6ace5bd..e85d7982131b6 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -145,17 +145,17 @@ class __SYCL_EXPORT queue { queue(cl_command_queue ClQueue, const context &SyclContext, const async_handler &AsyncHandler = {}); - queue(const queue &rhs) = default; + queue(const queue &RHS) = default; - queue(queue &&rhs) = default; + queue(queue &&RHS) = default; - queue &operator=(const queue &rhs) = default; + queue &operator=(const queue &RHS) = default; - queue &operator=(queue &&rhs) = default; + queue &operator=(queue &&RHS) = default; - bool operator==(const queue &rhs) const { return impl == rhs.impl; } + bool operator==(const queue &RHS) const { return impl == RHS.impl; } - bool operator!=(const queue &rhs) const { return !(*this == rhs); } + bool operator!=(const queue &RHS) const { return !(*this == RHS); } /// \return a valid instance of OpenCL queue, which is retained before being /// returned. @@ -317,7 +317,7 @@ class __SYCL_EXPORT queue { /// \return a copy of the property of type PropertyT that the queue was /// constructed with. If the queue was not constructed with the PropertyT /// property, an invalid_object_error SYCL exception. - template propertyT get_property() const; + template PropertyT get_property() const; /// Fills the memory pointed by a USM pointer with the value specified. /// @@ -900,10 +900,10 @@ class __SYCL_EXPORT queue { namespace std { template <> struct hash { - size_t operator()(const cl::sycl::queue &q) const { + size_t operator()(const cl::sycl::queue &Q) const { return std::hash< cl::sycl::shared_ptr_class>()( - cl::sycl::detail::getSyclObjImpl(q)); + cl::sycl::detail::getSyclObjImpl(Q)); } }; } // namespace std diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 1ae5f1a68809a..68bd769eceb64 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -27,7 +27,7 @@ template <> cl_uint queue_impl::get_info() const { RT::PiResult result = PI_SUCCESS; if (!is_host()) getPlugin().call( - MCommandQueue, PI_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result, + MQueues[0], PI_QUEUE_INFO_REFERENCE_COUNT, sizeof(result), &result, nullptr); return result; } @@ -40,49 +40,47 @@ template <> device queue_impl::get_info() const { return get_device(); } -static event prepareUSMEvent(shared_ptr_class QueueImpl, - RT::PiEvent NativeEvent) { +static event +prepareUSMEvent(const shared_ptr_class &QueueImpl, + RT::PiEvent NativeEvent) { auto EventImpl = std::make_shared(QueueImpl); EventImpl->getHandleRef() = NativeEvent; EventImpl->setContextImpl(detail::getSyclObjImpl(QueueImpl->get_context())); return detail::createSyclObjFromImpl(EventImpl); } -event queue_impl::memset(shared_ptr_class Impl, void *Ptr, - int Value, size_t Count) { - context Context = get_context(); +event queue_impl::memset(const shared_ptr_class &Self, + void *Ptr, int Value, size_t Count) { RT::PiEvent NativeEvent = nullptr; - MemoryManager::fill_usm(Ptr, Impl, Count, Value, /*DepEvents*/ {}, + MemoryManager::fill_usm(Ptr, Self, Count, Value, /*DepEvents*/ {}, NativeEvent); - if (Context.is_host()) + if (MContext->is_host()) return event(); - event ResEvent = prepareUSMEvent(Impl, NativeEvent); + event ResEvent = prepareUSMEvent(Self, NativeEvent); addUSMEvent(ResEvent); return ResEvent; } -event queue_impl::memcpy(shared_ptr_class Impl, void *Dest, - const void *Src, size_t Count) { - context Context = get_context(); +event queue_impl::memcpy(const shared_ptr_class &Self, + void *Dest, const void *Src, size_t Count) { RT::PiEvent NativeEvent = nullptr; - MemoryManager::copy_usm(Src, Impl, Count, Dest, /*DepEvents*/ {}, + MemoryManager::copy_usm(Src, Self, Count, Dest, /*DepEvents*/ {}, NativeEvent); - if (Context.is_host()) + if (MContext->is_host()) return event(); - event ResEvent = prepareUSMEvent(Impl, NativeEvent); + event ResEvent = prepareUSMEvent(Self, NativeEvent); addUSMEvent(ResEvent); return ResEvent; } -event queue_impl::mem_advise(shared_ptr_class Impl, +event queue_impl::mem_advise(const shared_ptr_class &Self, const void *Ptr, size_t Length, pi_mem_advice Advice) { - context Context = get_context(); - if (Context.is_host()) { + if (MContext->is_host()) { return event(); } @@ -92,26 +90,30 @@ event queue_impl::mem_advise(shared_ptr_class Impl, Plugin.call(getHandleRef(), Ptr, Length, Advice, &NativeEvent); - event ResEvent = prepareUSMEvent(Impl, NativeEvent); + event ResEvent = prepareUSMEvent(Self, NativeEvent); addUSMEvent(ResEvent); return ResEvent; } -void queue_impl::addEvent(event Event) { +void queue_impl::addEvent(const event &Event) { std::weak_ptr EventWeakPtr{getSyclObjImpl(Event)}; - std::lock_guard Guard(MMutex); + std::lock_guard Lock(MMutex); MEvents.push_back(std::move(EventWeakPtr)); } -void queue_impl::addUSMEvent(event Event) { - std::lock_guard Guard(MMutex); - MUSMEvents.push_back(std::move(Event)); +void queue_impl::addUSMEvent(const event &Event) { + std::lock_guard Lock(MMutex); + MUSMEvents.push_back(Event); } void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc, string_class &Name, int32_t StreamID, uint64_t &IId) { void *TraceEvent = nullptr; + (void)CodeLoc; + (void)Name; + (void)StreamID; + (void)IId; #ifdef XPTI_ENABLE_INSTRUMENTATION xpti::trace_event_data_t *WaitEvent = nullptr; if (!xptiTraceEnabled()) @@ -172,6 +174,10 @@ void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc, void queue_impl::instrumentationEpilog(void *TelemetryEvent, string_class &Name, int32_t StreamID, uint64_t IId) { + (void)TelemetryEvent; + (void)Name; + (void)StreamID; + (void)IId; #ifdef XPTI_ENABLE_INSTRUMENTATION if (!(xptiTraceEnabled() && TelemetryEvent)) return; @@ -184,6 +190,7 @@ void queue_impl::instrumentationEpilog(void *TelemetryEvent, string_class &Name, } void queue_impl::wait(const detail::code_location &CodeLoc) { + (void)CodeLoc; #ifdef XPTI_ENABLE_INSTRUMENTATION void *TelemetryEvent = nullptr; uint64_t IId; @@ -192,24 +199,20 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { TelemetryEvent = instrumentationProlog(CodeLoc, Name, StreamID, IId); #endif - std::vector> Events; + vector_class> Events; + vector_class USMEvents; { - std::lock_guard Guard(MMutex); - for (std::weak_ptr &EventImplWeakPtr : MEvents) - if (std::shared_ptr EventImplPtr = EventImplWeakPtr.lock()) - Events.push_back(EventImplPtr); + std::lock_guard Lock(MMutex); + Events.swap(MEvents); + USMEvents.swap(MUSMEvents); } - for (std::shared_ptr &Event : Events) { - Event->wait(Event); - } + for (std::weak_ptr &EventImplWeakPtr : Events) + if (std::shared_ptr EventImplPtr = EventImplWeakPtr.lock()) + EventImplPtr->wait(EventImplPtr); - for (event &Event : MUSMEvents) { + for (event &Event : USMEvents) Event.wait(); - } - - MEvents.clear(); - MUSMEvents.clear(); #ifdef XPTI_ENABLE_INSTRUMENTATION instrumentationEpilog(TelemetryEvent, Name, StreamID, IId); @@ -222,9 +225,9 @@ void queue_impl::initHostTaskAndEventCallbackThreadPool() { int Size = 1; - if (const char *val = std::getenv("SYCL_QUEUE_THREAD_POOL_SIZE")) + if (const char *Val = std::getenv("SYCL_QUEUE_THREAD_POOL_SIZE")) try { - Size = std::stoi(val); + Size = std::stoi(Val); } catch (...) { throw invalid_parameter_error( "Invalid value for SYCL_QUEUE_THREAD_POOL_SIZE environment variable", @@ -241,9 +244,9 @@ void queue_impl::initHostTaskAndEventCallbackThreadPool() { } pi_native_handle queue_impl::getNative() const { - auto Plugin = getPlugin(); - pi_native_handle Handle; - Plugin.call(MCommandQueue, &Handle); + const detail::plugin &Plugin = getPlugin(); + pi_native_handle Handle{}; + Plugin.call(MQueues[0], &Handle); return Handle; } diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 5cdcf8c3ee1f8..a6647a6503edb 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -31,14 +31,14 @@ using ContextImplPtr = std::shared_ptr; using DeviceImplPtr = shared_ptr_class; /// Sets max number of queues supported by FPGA RT. -const size_t MaxNumQueues = 256; +constexpr size_t MaxNumQueues = 256; //// Possible CUDA context types supported by PI CUDA backend /// TODO: Implement this as a property once there is an extension document -enum class cuda_context_type : char { primary, custom }; +enum class CUDAContextT : char { primary, custom }; /// Default context type created for CUDA backend -constexpr cuda_context_type DefaultContextType = cuda_context_type::custom; +constexpr CUDAContextT DefaultContextType = CUDAContextT::custom; enum QueueOrder { Ordered, OOO }; @@ -51,12 +51,12 @@ class queue_impl { /// to the queue. /// \param AsyncHandler is a SYCL asynchronous exception handler. /// \param PropList is a list of properties to use for queue construction. - queue_impl(DeviceImplPtr Device, async_handler AsyncHandler, + queue_impl(const DeviceImplPtr &Device, const async_handler &AsyncHandler, const property_list &PropList) : queue_impl(Device, - detail::getSyclObjImpl(context( - createSyclObjFromImpl(Device), {}, - (DefaultContextType == cuda_context_type::primary))), + detail::getSyclObjImpl( + context(createSyclObjFromImpl(Device), {}, + (DefaultContextType == CUDAContextT::primary))), AsyncHandler, PropList){}; /// Constructs a SYCL queue with an async_handler and property_list provided @@ -68,22 +68,21 @@ class queue_impl { /// constructed. /// \param AsyncHandler is a SYCL asynchronous exception handler. /// \param PropList is a list of properties to use for queue construction. - queue_impl(DeviceImplPtr Device, ContextImplPtr Context, - async_handler AsyncHandler, const property_list &PropList) + queue_impl(const DeviceImplPtr &Device, const ContextImplPtr &Context, + const async_handler &AsyncHandler, const property_list &PropList) : MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler), - MPropList(PropList), MHostQueue(MDevice->is_host()), - MOpenCLInterop(!MHostQueue) { + MPropList(PropList), MHostQueue(MDevice->is_host()) { if (!Context->hasDevice(Device)) throw cl::sycl::invalid_parameter_error( "Queue cannot be constructed with the given context and device " "as the context does not contain the given device.", PI_INVALID_DEVICE); if (!MHostQueue) { - const QueueOrder qorder = + const QueueOrder QOrder = MPropList.has_property() ? QueueOrder::Ordered : QueueOrder::OOO; - MCommandQueue = createQueue(qorder); + MQueues.push_back(createQueue(QOrder)); } } @@ -93,37 +92,36 @@ class queue_impl { /// \param Context is a SYCL context to associate with the queue being /// constructed. /// \param AsyncHandler is a SYCL asynchronous exception handler. - queue_impl(RT::PiQueue PiQueue, ContextImplPtr Context, + queue_impl(RT::PiQueue PiQueue, const ContextImplPtr &Context, const async_handler &AsyncHandler) - : MContext(Context), MAsyncHandler(AsyncHandler), MHostQueue(false), - MOpenCLInterop(true) { + : MContext(Context), MAsyncHandler(AsyncHandler), MHostQueue(false) { - MCommandQueue = pi::cast(PiQueue); + MQueues.push_back(pi::cast(PiQueue)); RT::PiDevice Device = nullptr; const detail::plugin &Plugin = getPlugin(); // TODO catch an exception and put it to list of asynchronous exceptions - Plugin.call(MCommandQueue, PI_QUEUE_INFO_DEVICE, + Plugin.call(MQueues[0], PI_QUEUE_INFO_DEVICE, sizeof(Device), &Device, nullptr); MDevice = DeviceImplPtr(new device_impl(Device, Context->getPlatformImpl())); // TODO catch an exception and put it to list of asynchronous exceptions - Plugin.call(MCommandQueue); + Plugin.call(MQueues[0]); } ~queue_impl() { throw_asynchronous(); if (!MHostQueue) { - getPlugin().call(MCommandQueue); + getPlugin().call(MQueues[0]); } } /// \return an OpenCL interoperability queue handle. cl_command_queue get() { - if (MOpenCLInterop) { - getPlugin().call(MCommandQueue); - return pi::cast(MCommandQueue); + if (!MHostQueue) { + getPlugin().call(MQueues[0]); + return pi::cast(MQueues[0]); } throw invalid_object_error( "This instance of queue doesn't support OpenCL interoperability", @@ -148,8 +146,8 @@ class queue_impl { /// Queries SYCL queue for information. /// /// The return type depends on information being queried. - template - typename info::param_traits::return_type get_info() const; + template + typename info::param_traits::return_type get_info() const; /// Submits a command group function object to the queue, in order to be /// scheduled for execution on the device. @@ -164,14 +162,14 @@ class queue_impl { /// \return a SYCL event object, which corresponds to the queue the command /// group is being enqueued on. event submit(const function_class &CGF, - shared_ptr_class Self, - shared_ptr_class SecondQueue, + const shared_ptr_class &Self, + const shared_ptr_class &SecondQueue, const detail::code_location &Loc) { try { return submit_impl(CGF, Self, Loc); } catch (...) { { - std::lock_guard Guard(MMutex); + std::lock_guard Lock(MMutex); MExceptions.PushBack(std::current_exception()); } return SecondQueue->submit(CGF, SecondQueue, Loc); @@ -186,9 +184,9 @@ class queue_impl { /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event object for the submitted command group. event submit(const function_class &CGF, - shared_ptr_class Self, + const shared_ptr_class &Self, const detail::code_location &Loc) { - return submit_impl(CGF, std::move(Self), Loc); + return submit_impl(CGF, Self, Loc); } /// Performs a blocking wait for the completion of all enqueued tasks in the @@ -215,20 +213,19 @@ class queue_impl { /// queue on construction. If no async_handler was provided then /// asynchronous exceptions will be lost. void throw_asynchronous() { - std::unique_lock lock(MMutex); - - if (MAsyncHandler && MExceptions.size()) { - exception_list Exceptions; + if (!MAsyncHandler) + return; + exception_list Exceptions; + { + std::unique_lock Lock(MMutex); std::swap(MExceptions, Exceptions); - - // Unlock the mutex before calling user-provided handler to avoid - // potential deadlock if the same queue is somehow referenced in the - // handler. - lock.unlock(); - - MAsyncHandler(std::move(Exceptions)); } + // Unlock the mutex before calling user-provided handler to avoid + // potential deadlock if the same queue is somehow referenced in the + // handler. + if (Exceptions.size()) + MAsyncHandler(std::move(Exceptions)); } /// Creates PI queue. @@ -244,7 +241,7 @@ class queue_impl { if (MPropList.has_property()) { CreationFlags |= PI_QUEUE_PROFILING_ENABLE; } - RT::PiQueue Queue; + RT::PiQueue Queue{}; RT::PiContext Context = MContext->getHandleRef(); RT::PiDevice Device = MDevice->getHandleRef(); const detail::plugin &Plugin = getPlugin(); @@ -269,42 +266,40 @@ class queue_impl { /// \return a raw PI handle for a free queue. The returned handle is not /// retained. It is caller responsibility to make sure queue is still alive. RT::PiQueue &getExclusiveQueueHandleRef() { - std::lock_guard Guard(MMutex); - - // To achieve parallelism for FPGA with in order execution model with - // possibility of two kernels to share data with each other we shall - // create a queue for every kernel enqueued. - if (MQueues.size() < MaxNumQueues) { - MQueues.push_back(createQueue(QueueOrder::Ordered)); - return MQueues.back(); + RT::PiQueue *PIQ = nullptr; + bool ReuseQueue = false; + { + std::lock_guard Lock(MMutex); + + // To achieve parallelism for FPGA with in order execution model with + // possibility of two kernels to share data with each other we shall + // create a queue for every kernel enqueued. + if (MQueues.size() < MaxNumQueues) { + MQueues.push_back({}); + PIQ = &MQueues.back(); + } else { + // If the limit of OpenCL queues is going to be exceeded - take the + // earliest used queue, wait until it finished and then reuse it. + // MQueueNumber %= MaxNumQueues; + PIQ = &MQueues[MNextQueueID]; + MNextQueueID = (MNextQueueID + 1) % MaxNumQueues; + ReuseQueue = true; + } } - // If the limit of OpenCL queues is going to be exceeded - take the - // earliest used queue, wait until it finished and then reuse it. - MQueueNumber %= MaxNumQueues; - size_t FreeQueueNum = MQueueNumber++; + if (!ReuseQueue) + *PIQ = createQueue(QueueOrder::Ordered); + else + getPlugin().call(*PIQ); - getPlugin().call(MQueues[FreeQueueNum]); - return MQueues[FreeQueueNum]; + return *PIQ; } /// \return a raw PI queue handle. The returned handle is not retained. It /// is caller responsibility to make sure queue is still alive. RT::PiQueue &getHandleRef() { - if (MSupportOOO) { - return MCommandQueue; - } - - { - // Reduce the scope since this mutex is also - // locked inside of getExclusiveQueueHandleRef() - std::lock_guard Guard(MMutex); - - if (MQueues.empty()) { - MQueues.push_back(MCommandQueue); - return MCommandQueue; - } - } + if (MSupportOOO) + return MQueues[0]; return getExclusiveQueueHandleRef(); } @@ -329,7 +324,7 @@ class queue_impl { /// \param Value is a value to be set. Value is cast as an unsigned char. /// \param Count is a number of bytes to fill. /// \return an event representing fill operation. - event memset(shared_ptr_class Impl, void *Ptr, int Value, + event memset(const shared_ptr_class &Self, void *Ptr, int Value, size_t Count); /// Copies data from one memory region to another, both pointed by /// USM pointers. @@ -338,8 +333,8 @@ class queue_impl { /// \param Dest is a USM pointer to the destination memory. /// \param Src is a USM pointer to the source memory. /// \param Count is a number of bytes to copy. - event memcpy(shared_ptr_class Impl, void *Dest, const void *Src, - size_t Count); + event memcpy(const shared_ptr_class &Self, void *Dest, + const void *Src, size_t Count); /// Provides additional information to the underlying runtime about how /// different allocations are used. /// @@ -347,14 +342,14 @@ class queue_impl { /// \param Ptr is a USM pointer to the allocation. /// \param Length is a number of bytes in the allocation. /// \param Advice is a device-defined advice for the specified allocation. - event mem_advise(shared_ptr_class Impl, const void *Ptr, + event mem_advise(const shared_ptr_class &Self, const void *Ptr, size_t Length, pi_mem_advice Advice); /// Puts exception to the list of asynchronous ecxeptions. /// /// \param ExceptionPtr is a pointer to exception to be put. - void reportAsyncException(std::exception_ptr ExceptionPtr) { - std::lock_guard Guard(MMutex); + void reportAsyncException(const std::exception_ptr &ExceptionPtr) { + std::lock_guard Lock(MMutex); MExceptions.PushBack(ExceptionPtr); } @@ -378,9 +373,9 @@ class queue_impl { /// \param Loc is the code location of the submit call (default argument) /// \return a SYCL event representing submitted command group. event submit_impl(const function_class &CGF, - shared_ptr_class Self, + const shared_ptr_class &Self, const detail::code_location &Loc) { - handler Handler(std::move(Self), MHostQueue); + handler Handler(Self, MHostQueue); Handler.saveCodeLoc(Loc); CGF(Handler); event Event = Handler.finalize(); @@ -402,12 +397,12 @@ class queue_impl { /// Stores a USM operation event that should be associated with the queue /// /// \param Event is the event to be stored - void addUSMEvent(event Event); + void addUSMEvent(const event &Event); /// Stores an event that should be associated with the queue /// /// \param Event is the event to be stored - void addEvent(event Event); + void addEvent(const event &Event); /// Protects all the fields that can be changed by class' methods. mutex_class MMutex; @@ -422,15 +417,12 @@ class queue_impl { const async_handler MAsyncHandler; const property_list MPropList; - RT::PiQueue MCommandQueue = nullptr; - /// List of queues created for FPGA device from a single SYCL queue. vector_class MQueues; /// Iterator through MQueues. - size_t MQueueNumber = 0; + size_t MNextQueueID = 0; const bool MHostQueue = false; - const bool MOpenCLInterop = false; // Assume OOO support by default. bool MSupportOOO = true; diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 827575527f639..8098b4adac955 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -18,49 +18,49 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -queue::queue(const context &syclContext, const device_selector &deviceSelector, - const async_handler &asyncHandler, const property_list &propList) { +queue::queue(const context &SyclContext, const device_selector &DeviceSelector, + const async_handler &AsyncHandler, const property_list &PropList) { - const vector_class Devs = syclContext.get_devices(); + const vector_class Devs = SyclContext.get_devices(); - auto Comp = [&deviceSelector](const device &d1, const device &d2) { - return deviceSelector(d1) < deviceSelector(d2); + auto Comp = [&DeviceSelector](const device &d1, const device &d2) { + return DeviceSelector(d1) < DeviceSelector(d2); }; - const device &syclDevice = *std::max_element(Devs.begin(), Devs.end(), Comp); + const device &SyclDevice = *std::max_element(Devs.begin(), Devs.end(), Comp); impl = std::make_shared( - detail::getSyclObjImpl(syclDevice), detail::getSyclObjImpl(syclContext), - asyncHandler, propList); + detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext), + AsyncHandler, PropList); } -queue::queue(const context &syclContext, - const device &syclDevice, - const async_handler &asyncHandler, - const property_list &propList) { +queue::queue(const context &SyclContext, + const device &SyclDevice, + const async_handler &AsyncHandler, + const property_list &PropList) { impl = std::make_shared( - detail::getSyclObjImpl(syclDevice), detail::getSyclObjImpl(syclContext), - asyncHandler, propList); + detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext), + AsyncHandler, PropList); } -queue::queue(const device &syclDevice, const async_handler &asyncHandler, - const property_list &propList) { +queue::queue(const device &SyclDevice, const async_handler &AsyncHandler, + const property_list &PropList) { impl = std::make_shared( - detail::getSyclObjImpl(syclDevice), asyncHandler, propList); + detail::getSyclObjImpl(SyclDevice), AsyncHandler, PropList); } -queue::queue(cl_command_queue clQueue, const context &syclContext, - const async_handler &asyncHandler) { +queue::queue(cl_command_queue clQueue, const context &SyclContext, + const async_handler &AsyncHandler) { impl = std::make_shared( reinterpret_cast(clQueue), - detail::getSyclObjImpl(syclContext), asyncHandler); + detail::getSyclObjImpl(SyclContext), AsyncHandler); } -queue::queue(const context &syclContext, const device_selector &deviceSelector, - const property_list &propList) - : queue(syclContext, deviceSelector, - detail::getSyclObjImpl(syclContext)->get_async_handler(), - propList) {} +queue::queue(const context &SyclContext, const device_selector &deviceSelector, + const property_list &PropList) + : queue(SyclContext, deviceSelector, + detail::getSyclObjImpl(SyclContext)->get_async_handler(), + PropList) {} queue::queue(const context &SyclContext, const device &SyclDevice, const property_list &PropList) @@ -79,16 +79,16 @@ bool queue::is_host() const { return impl->is_host(); } void queue::throw_asynchronous() { impl->throw_asynchronous(); } -event queue::memset(void *ptr, int value, size_t count) { - return impl->memset(impl, ptr, value, count); +event queue::memset(void *Ptr, int Value, size_t Count) { + return impl->memset(impl, Ptr, Value, Count); } -event queue::memcpy(void *dest, const void *src, size_t count) { - return impl->memcpy(impl, dest, src, count); +event queue::memcpy(void *Dest, const void *Src, size_t Count) { + return impl->memcpy(impl, Dest, Src, Count); } -event queue::mem_advise(const void *ptr, size_t length, pi_mem_advice advice) { - return impl->mem_advise(impl, ptr, length, advice); +event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice) { + return impl->mem_advise(impl, Ptr, Length, Advice); } event queue::submit_impl(function_class CGH, @@ -96,9 +96,9 @@ event queue::submit_impl(function_class CGH, return impl->submit(CGH, impl, CodeLoc); } -event queue::submit_impl(function_class CGH, queue secondQueue, +event queue::submit_impl(function_class CGH, queue SecondQueue, const detail::code_location &CodeLoc) { - return impl->submit(CGH, impl, secondQueue.impl, CodeLoc); + return impl->submit(CGH, impl, SecondQueue.impl, CodeLoc); } void queue::wait_proxy(const detail::code_location &CodeLoc) { @@ -109,26 +109,26 @@ void queue::wait_and_throw_proxy(const detail::code_location &CodeLoc) { impl->wait_and_throw(CodeLoc); } -template -typename info::param_traits::return_type +template +typename info::param_traits::return_type queue::get_info() const { - return impl->get_info(); + return impl->get_info(); } -#define PARAM_TRAITS_SPEC(param_type, param, ret_type) \ - template __SYCL_EXPORT ret_type queue::get_info() \ +#define PARAM_TRAITS_SPEC(ParamType, Param, RetType) \ + template __SYCL_EXPORT RetType queue::get_info() \ const; #include #undef PARAM_TRAITS_SPEC -template bool queue::has_property() const { - return impl->has_property(); +template bool queue::has_property() const { + return impl->has_property(); } -template propertyT queue::get_property() const { - return impl->get_property(); +template PropertyT queue::get_property() const { + return impl->get_property(); } template __SYCL_EXPORT bool From 6c767b90ef0c9a5d584c60d0f8b3d7e760877ca0 Mon Sep 17 00:00:00 2001 From: Vlad Romanov Date: Wed, 29 Jul 2020 18:15:31 +0300 Subject: [PATCH 2/4] fix clang format --- sycl/source/queue.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index 8098b4adac955..21ec93eb5eb4a 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -34,10 +34,8 @@ queue::queue(const context &SyclContext, const device_selector &DeviceSelector, AsyncHandler, PropList); } -queue::queue(const context &SyclContext, - const device &SyclDevice, - const async_handler &AsyncHandler, - const property_list &PropList) { +queue::queue(const context &SyclContext, const device &SyclDevice, + const async_handler &AsyncHandler, const property_list &PropList) { impl = std::make_shared( detail::getSyclObjImpl(SyclDevice), detail::getSyclObjImpl(SyclContext), AsyncHandler, PropList); From cfe43b8e920b8afee2ec39eed68c38103c713648 Mon Sep 17 00:00:00 2001 From: Vlad Romanov Date: Thu, 30 Jul 2020 14:40:45 +0300 Subject: [PATCH 3/4] some commengs addressed --- sycl/source/detail/queue_impl.cpp | 5 +++-- sycl/source/detail/queue_impl.hpp | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 68bd769eceb64..eb2e351ef851f 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -14,6 +14,7 @@ #include #include +#include #ifdef XPTI_ENABLE_INSTRUMENTATION #include "xpti_trace_framework.hpp" @@ -203,8 +204,8 @@ void queue_impl::wait(const detail::code_location &CodeLoc) { vector_class USMEvents; { std::lock_guard Lock(MMutex); - Events.swap(MEvents); - USMEvents.swap(MUSMEvents); + Events = std::move(MEvents); + USMEvents = std::move(MUSMEvents); } for (std::weak_ptr &EventImplWeakPtr : Events) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index a6647a6503edb..b5d79e43bd076 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -23,6 +23,8 @@ #include #include +#include + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { @@ -31,7 +33,7 @@ using ContextImplPtr = std::shared_ptr; using DeviceImplPtr = shared_ptr_class; /// Sets max number of queues supported by FPGA RT. -constexpr size_t MaxNumQueues = 256; +static constexpr size_t MaxNumQueues = 256; //// Possible CUDA context types supported by PI CUDA backend /// TODO: Implement this as a property once there is an extension document @@ -219,7 +221,7 @@ class queue_impl { exception_list Exceptions; { std::unique_lock Lock(MMutex); - std::swap(MExceptions, Exceptions); + Exceptions = std::move(MExceptions); } // Unlock the mutex before calling user-provided handler to avoid // potential deadlock if the same queue is somehow referenced in the From bebfbfb64d155aa593d437f9a748af28d42a5a44 Mon Sep 17 00:00:00 2001 From: Vlad Romanov Date: Thu, 30 Jul 2020 15:05:38 +0300 Subject: [PATCH 4/4] some commengs addressed --- sycl/source/detail/queue_impl.cpp | 6 +++--- sycl/source/detail/queue_impl.hpp | 11 +++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index eb2e351ef851f..52894ed3b9b20 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -52,7 +52,7 @@ prepareUSMEvent(const shared_ptr_class &QueueImpl, event queue_impl::memset(const shared_ptr_class &Self, void *Ptr, int Value, size_t Count) { - RT::PiEvent NativeEvent = nullptr; + RT::PiEvent NativeEvent{}; MemoryManager::fill_usm(Ptr, Self, Count, Value, /*DepEvents*/ {}, NativeEvent); @@ -66,7 +66,7 @@ event queue_impl::memset(const shared_ptr_class &Self, event queue_impl::memcpy(const shared_ptr_class &Self, void *Dest, const void *Src, size_t Count) { - RT::PiEvent NativeEvent = nullptr; + RT::PiEvent NativeEvent{}; MemoryManager::copy_usm(Src, Self, Count, Dest, /*DepEvents*/ {}, NativeEvent); @@ -86,7 +86,7 @@ event queue_impl::mem_advise(const shared_ptr_class &Self, } // non-Host device - RT::PiEvent NativeEvent = nullptr; + RT::PiEvent NativeEvent{}; const detail::plugin &Plugin = getPlugin(); Plugin.call(getHandleRef(), Ptr, Length, Advice, &NativeEvent); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index b5d79e43bd076..3c22bc73b8d73 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -100,7 +100,7 @@ class queue_impl { MQueues.push_back(pi::cast(PiQueue)); - RT::PiDevice Device = nullptr; + RT::PiDevice Device{}; const detail::plugin &Plugin = getPlugin(); // TODO catch an exception and put it to list of asynchronous exceptions Plugin.call(MQueues[0], PI_QUEUE_INFO_DEVICE, @@ -220,7 +220,7 @@ class queue_impl { exception_list Exceptions; { - std::unique_lock Lock(MMutex); + std::lock_guard Lock(MMutex); Exceptions = std::move(MExceptions); } // Unlock the mutex before calling user-provided handler to avoid @@ -282,9 +282,8 @@ class queue_impl { } else { // If the limit of OpenCL queues is going to be exceeded - take the // earliest used queue, wait until it finished and then reuse it. - // MQueueNumber %= MaxNumQueues; - PIQ = &MQueues[MNextQueueID]; - MNextQueueID = (MNextQueueID + 1) % MaxNumQueues; + PIQ = &MQueues[MNextQueueIdx]; + MNextQueueIdx = (MNextQueueIdx + 1) % MaxNumQueues; ReuseQueue = true; } } @@ -422,7 +421,7 @@ class queue_impl { /// List of queues created for FPGA device from a single SYCL queue. vector_class MQueues; /// Iterator through MQueues. - size_t MNextQueueID = 0; + size_t MNextQueueIdx = 0; const bool MHostQueue = false; // Assume OOO support by default.