From 885b27a3bc230cc670cb4702d66c66cbc3e3e426 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 10 Aug 2020 16:10:18 +0300 Subject: [PATCH 01/19] [SYCL] Split read/write lockings Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 42 +++-- sycl/source/detail/scheduler/commands.hpp | 12 +- .../source/detail/scheduler/graph_builder.cpp | 166 ++++++++++++------ .../detail/scheduler/graph_processor.cpp | 18 +- sycl/source/detail/scheduler/scheduler.cpp | 136 +++++++++----- sycl/source/detail/scheduler/scheduler.hpp | 61 ++++--- 6 files changed, 294 insertions(+), 141 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f21d39d8efc3d..cc3cbd4f15086 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -229,7 +229,7 @@ class DispatchHostTask { // Thus we employ read-lock of graph. { Scheduler &Sched = Scheduler::getInstance(); - std::shared_lock Lock(Sched.MGraphLock); + Scheduler::ReadLockT Lock(Sched.MGraphLock); std::vector Deps = MThisCmd->MDeps; @@ -239,7 +239,7 @@ class DispatchHostTask { EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; for (const DepDesc &Dep : Deps) - Scheduler::enqueueLeavesOfReqUnlocked(Dep.MDepRequirement); + Scheduler::enqueueLeavesOfReqUnlocked(Dep.MDepRequirement, Lock); } } }; @@ -475,7 +475,7 @@ void Command::makeTraceEventEpilog() { #endif } -void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { +Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { const ContextImplPtr &Context = getContext(); // 1. Async work is not supported for host device. @@ -486,25 +486,29 @@ void Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { // call to waitInternal() is in waitForPreparedHostEvents() as it's called // from enqueue process functions MPreparedHostDepsEvents.push_back(DepEvent); - return; + return nullptr; } + Command *ConnectionCmd = nullptr; ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match we'll connect them using host task if (DepEventContext != Context && !Context->is_host()) { Scheduler::GraphBuilder &GB = Scheduler::getInstance().MGraphBuilder; - GB.connectDepEvent(this, DepEvent, Dep); + ConnectionCmd = GB.connectDepEvent(this, DepEvent, Dep); } else MPreparedDepsEvents.push_back(std::move(DepEvent)); + + return ConnectionCmd; } ContextImplPtr Command::getContext() const { return detail::getSyclObjImpl(MQueue->get_context()); } -void Command::addDep(DepDesc NewDep) { +Command *Command::addDep(DepDesc NewDep) { + Command *ConnectionCmd = nullptr; if (NewDep.MDepCommand) { - processDepEvent(NewDep.MDepCommand->getEvent(), NewDep); + ConnectionCmd = processDepEvent(NewDep.MDepCommand->getEvent(), NewDep); } MDeps.push_back(NewDep); #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -512,9 +516,11 @@ void Command::addDep(DepDesc NewDep) { NewDep.MDepCommand, (void *)NewDep.MDepRequirement->MSYCLMemObj, accessModeToString(NewDep.MDepRequirement->MAccessMode), true); #endif + + return ConnectionCmd; } -void Command::addDep(EventImplPtr Event) { +Command *Command::addDep(EventImplPtr Event) { #ifdef XPTI_ENABLE_INSTRUMENTATION // We need this for just the instrumentation, so guarding it will prevent // unused variable warnings when instrumentation is turned off @@ -524,7 +530,7 @@ void Command::addDep(EventImplPtr Event) { emitEdgeEventForEventDependence(Cmd, PiEventAddr); #endif - processDepEvent(std::move(Event), DepDesc{nullptr, nullptr, nullptr}); + return processDepEvent(std::move(Event), DepDesc{nullptr, nullptr, nullptr}); } void Command::emitEnqueuedEventSignal(RT::PiEvent &PiEventAddr) { @@ -718,7 +724,10 @@ AllocaCommand::AllocaCommand(QueueImplPtr Queue, Requirement Req, // Node event must be created before the dependent edge is added to this node, // so this call must be before the addDep() call. emitInstrumentationDataProxy(); - addDep(DepDesc(nullptr, getRequirement(), this)); + // "Nothing to depend on" + Command *ConnectionCmd = addDep(DepDesc(nullptr, getRequirement(), this)); + assert(ConnectionCmd == nullptr); + (void)ConnectionCmd; } void AllocaCommand::emitInstrumentationData() { @@ -789,7 +798,8 @@ void AllocaCommand::printDot(std::ostream &Stream) const { } AllocaSubBufCommand::AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req, - AllocaCommandBase *ParentAlloca) + AllocaCommandBase *ParentAlloca, + std::vector &ToEnqueue) : AllocaCommandBase(CommandType::ALLOCA_SUB_BUF, std::move(Queue), std::move(Req), /*LinkedAllocaCmd*/ nullptr), @@ -798,7 +808,10 @@ AllocaSubBufCommand::AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req, // is added to this node, so this call must be before // the addDep() call. emitInstrumentationDataProxy(); - addDep(DepDesc(MParentAlloca, getRequirement(), MParentAlloca)); + Command *ConnectionCmd = + addDep(DepDesc(MParentAlloca, getRequirement(), MParentAlloca)); + if (ConnectionCmd) + ToEnqueue.push_back(ConnectionCmd); } void AllocaSubBufCommand::emitInstrumentationData() { @@ -1328,7 +1341,10 @@ void EmptyCommand::addRequirement(Command *DepCmd, AllocaCommandBase *AllocaCmd, MRequirements.emplace_back(ReqRef); const Requirement *const StoredReq = &MRequirements.back(); - addDep(DepDesc{DepCmd, StoredReq, AllocaCmd}); + // EmptyCommand is always host one, so we believe that result of addDep is nil + Command *Cmd = addDep(DepDesc{DepCmd, StoredReq, AllocaCmd}); + assert(Cmd == nullptr && "Conection command should be null for EmptyCommand"); + (void)Cmd; } void EmptyCommand::emitInstrumentationData() { diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 04d85d5206739..8bd8e9da941ff 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -105,9 +105,11 @@ class Command { Command(CommandType Type, QueueImplPtr Queue); - void addDep(DepDesc NewDep); + /// \return an optional connection cmd to enqueue + Command *addDep(DepDesc NewDep); - void addDep(EventImplPtr Event); + /// \return an optional connection cmd to enqueue + Command *addDep(EventImplPtr Event); void addUser(Command *NewUser) { MUsers.insert(NewUser); } @@ -192,13 +194,14 @@ class Command { /// Perform glueing of events from different contexts /// \param DepEvent event this commands should depend on /// \param Dep optional DepDesc to perform connection of events properly + /// \return returns an optional connection command to enqueue /// /// Glueing (i.e. connecting) will be performed if and only if DepEvent is /// not from host context and its context doesn't match to context of this /// command. Context of this command is fetched via getContext(). /// /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. - void processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep); + Command *processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep); /// Private interface. Derived classes should implement this method. virtual cl_int enqueueImp() = 0; @@ -378,7 +381,8 @@ class AllocaCommand : public AllocaCommandBase { class AllocaSubBufCommand : public AllocaCommandBase { public: AllocaSubBufCommand(QueueImplPtr Queue, Requirement Req, - AllocaCommandBase *ParentAlloca); + AllocaCommandBase *ParentAlloca, + std::vector &ToEnqueue); void *getMemAllocation() const final override; void printDot(std::ostream &Stream) const final override; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index bab4b0c10c7ba..10ff2b4f3b2c0 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -193,7 +193,9 @@ void Scheduler::GraphBuilder::updateLeaves(const std::set &Cmds, void Scheduler::GraphBuilder::addNodeToLeaves(MemObjRecord *Record, Command *Cmd, - access::mode AccessMode) { + access::mode AccessMode, + std::vector &ToEnqueue) +{ CircularBuffer &Leaves{AccessMode == access::mode::read ? Record->MReadLeaves : Record->MWriteLeaves}; @@ -206,7 +208,8 @@ void Scheduler::GraphBuilder::addNodeToLeaves(MemObjRecord *Record, // the requirements for the current record DepDesc Dep = findDepForRecord(Cmd, Record); Dep.MDepCommand = OldLeaf; - Cmd->addDep(Dep); + if (Command *ConnectionCmd = Cmd->addDep(Dep)) + ToEnqueue.push_back(ConnectionCmd); OldLeaf->addUser(Cmd); --(OldLeaf->MLeafCounter); } @@ -215,7 +218,8 @@ void Scheduler::GraphBuilder::addNodeToLeaves(MemObjRecord *Record, } UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd( - MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue) { + MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue, + std::vector &ToEnqueue) { AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, Queue->getContextImplPtr()); assert(AllocaCmd && "There must be alloca for requirement!"); @@ -228,11 +232,14 @@ UpdateHostRequirementCommand *Scheduler::GraphBuilder::insertUpdateHostReqCmd( std::set Deps = findDepsForReq(Record, Req, Queue->getContextImplPtr()); for (Command *Dep : Deps) { - UpdateCommand->addDep(DepDesc{Dep, StoredReq, AllocaCmd}); + Command *ConnCmd = + UpdateCommand->addDep(DepDesc{Dep, StoredReq, AllocaCmd}); + if (ConnCmd) + ToEnqueue.push_back(ConnCmd); Dep->addUser(UpdateCommand); } updateLeaves(Deps, Record, Req->MAccessMode); - addNodeToLeaves(Record, UpdateCommand, Req->MAccessMode); + addNodeToLeaves(Record, UpdateCommand, Req->MAccessMode, ToEnqueue); return UpdateCommand; } @@ -265,11 +272,14 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc, return MapCmd; } -Command *Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record, - Requirement *Req, - const QueueImplPtr &Queue) { +Command * +Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record, + Requirement *Req, + const QueueImplPtr &Queue, + std::vector &ToEnqueue) { - AllocaCommandBase *AllocaCmdDst = getOrCreateAllocaForReq(Record, Req, Queue); + AllocaCommandBase *AllocaCmdDst = + getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); if (!AllocaCmdDst) throw runtime_error("Out of host memory", PI_OUT_OF_HOST_MEMORY); @@ -332,17 +342,21 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record, } for (Command *Dep : Deps) { - NewCmd->addDep(DepDesc{Dep, NewCmd->getRequirement(), AllocaCmdDst}); + Command *ConnCmd = + NewCmd->addDep(DepDesc{Dep, NewCmd->getRequirement(), AllocaCmdDst}); + if (ConnCmd) + ToEnqueue.push_back(ConnCmd); Dep->addUser(NewCmd); } updateLeaves(Deps, Record, access::mode::read_write); - addNodeToLeaves(Record, NewCmd, access::mode::read_write); + addNodeToLeaves(Record, NewCmd, access::mode::read_write, ToEnqueue); Record->MCurContext = Queue->getContextImplPtr(); return NewCmd; } Command *Scheduler::GraphBuilder::remapMemoryObject( - MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd) { + MemObjRecord *Record, Requirement *Req, AllocaCommandBase *HostAllocaCmd, + std::vector &ToEnqueue) { assert(HostAllocaCmd->getQueue()->is_host() && "Host alloca command expected"); assert(HostAllocaCmd->MIsActive && "Active alloca command expected"); @@ -365,22 +379,29 @@ Command *Scheduler::GraphBuilder::remapMemoryObject( &HostAllocaCmd->MMemAllocation, LinkedAllocaCmd->getQueue(), MapMode); for (Command *Dep : Deps) { - UnMapCmd->addDep(DepDesc{Dep, UnMapCmd->getRequirement(), LinkedAllocaCmd}); + Command *ConnCmd = UnMapCmd->addDep( + DepDesc{Dep, UnMapCmd->getRequirement(), LinkedAllocaCmd}); + if (ConnCmd) + ToEnqueue.push_back(ConnCmd); Dep->addUser(UnMapCmd); } - MapCmd->addDep(DepDesc{UnMapCmd, MapCmd->getRequirement(), HostAllocaCmd}); + Command *ConnCmd = MapCmd->addDep( + DepDesc{UnMapCmd, MapCmd->getRequirement(), HostAllocaCmd}); + if (ConnCmd) + ToEnqueue.push_back(ConnCmd); UnMapCmd->addUser(MapCmd); updateLeaves(Deps, Record, access::mode::read_write); - addNodeToLeaves(Record, MapCmd, access::mode::read_write); + addNodeToLeaves(Record, MapCmd, access::mode::read_write, ToEnqueue); Record->MHostAccess = MapMode; return MapCmd; } // The function adds copy operation of the up to date'st memory to the memory // pointed by Req. -Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) { +Command *Scheduler::GraphBuilder::addCopyBack( + Requirement *Req, std::vector &ToEnqueue) { QueueImplPtr HostQueue = Scheduler::getInstance().getDefaultHostQueue(); SYCLMemObjI *MemObj = Req->MSYCLMemObj; @@ -406,12 +427,15 @@ Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) { MemCpyCommandHost *MemCpyCmd = MemCpyCmdUniquePtr.release(); for (Command *Dep : Deps) { - MemCpyCmd->addDep(DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd}); + Command *ConnCmd = MemCpyCmd->addDep( + DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd}); + if (ConnCmd) + ToEnqueue.push_back(ConnCmd); Dep->addUser(MemCpyCmd); } updateLeaves(Deps, Record, Req->MAccessMode); - addNodeToLeaves(Record, MemCpyCmd, Req->MAccessMode); + addNodeToLeaves(Record, MemCpyCmd, Req->MAccessMode, ToEnqueue); if (MPrintOptionsArray[AfterAddCopyBack]) printGraphAsDot("after_addCopyBack"); return MemCpyCmd; @@ -419,7 +443,8 @@ Command *Scheduler::GraphBuilder::addCopyBack(Requirement *Req) { // The function implements SYCL host accessor logic: host accessor // should provide access to the buffer in user space. -Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req) { +Command *Scheduler::GraphBuilder::addHostAccessor( + Requirement *Req, std::vector &ToEnqueue) { const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue(); @@ -429,20 +454,22 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req) { markModifiedIfWrite(Record, Req); AllocaCommandBase *HostAllocaCmd = - getOrCreateAllocaForReq(Record, Req, HostQueue); + getOrCreateAllocaForReq(Record, Req, HostQueue, ToEnqueue); if (sameCtx(HostAllocaCmd->getQueue()->getContextImplPtr(), Record->MCurContext)) { if (!isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) - remapMemoryObject(Record, Req, HostAllocaCmd); + remapMemoryObject(Record, Req, HostAllocaCmd, ToEnqueue); } else - insertMemoryMove(Record, Req, HostQueue); + insertMemoryMove(Record, Req, HostQueue, ToEnqueue); - Command *UpdateHostAccCmd = insertUpdateHostReqCmd(Record, Req, HostQueue); + Command *UpdateHostAccCmd = + insertUpdateHostReqCmd(Record, Req, HostQueue, ToEnqueue); // Need empty command to be blocked until host accessor is destructed EmptyCommand *EmptyCmd = addEmptyCmd( - UpdateHostAccCmd, {Req}, HostQueue, Command::BlockReason::HostAccessor); + UpdateHostAccCmd, {Req}, HostQueue, Command::BlockReason::HostAccessor, + ToEnqueue); Req->MBlockedCmd = EmptyCmd; @@ -453,13 +480,14 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req) { } Command *Scheduler::GraphBuilder::addCGUpdateHost( - std::unique_ptr CommandGroup, QueueImplPtr HostQueue) { + std::unique_ptr CommandGroup, QueueImplPtr HostQueue, + std::vector &ToEnqueue) { auto UpdateHost = static_cast(CommandGroup.get()); Requirement *Req = UpdateHost->getReqToUpdate(); MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req); - return insertMemoryMove(Record, Req, HostQueue); + return insertMemoryMove(Record, Req, HostQueue, ToEnqueue); } /// Start the search for the record from list of "leaf" commands and check if @@ -564,7 +592,8 @@ Scheduler::GraphBuilder::findAllocaForReq(MemObjRecord *Record, // Note, creation of new allocation command can lead to the current context // (Record->MCurContext) change. AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( - MemObjRecord *Record, const Requirement *Req, QueueImplPtr Queue) { + MemObjRecord *Record, const Requirement *Req, QueueImplPtr Queue, + std::vector &ToEnqueue) { AllocaCommandBase *AllocaCmd = findAllocaForReq(Record, Req, Queue->getContextImplPtr()); @@ -580,8 +609,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( /*Working with bytes*/ sizeof(char)); auto *ParentAlloca = - getOrCreateAllocaForReq(Record, &ParentRequirement, Queue); - AllocaCmd = new AllocaSubBufCommand(Queue, *Req, ParentAlloca); + getOrCreateAllocaForReq(Record, &ParentRequirement, Queue, ToEnqueue); + AllocaCmd = new AllocaSubBufCommand(Queue, *Req, ParentAlloca, ToEnqueue); } else { const Requirement FullReq(/*Offset*/ {0, 0, 0}, Req->MMemoryRange, @@ -617,15 +646,20 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( // Update linked command if (LinkedAllocaCmd) { - AllocaCmd->addDep(DepDesc{LinkedAllocaCmd, AllocaCmd->getRequirement(), - LinkedAllocaCmd}); + Command *ConnCmd = AllocaCmd->addDep( + DepDesc{LinkedAllocaCmd, AllocaCmd->getRequirement(), + LinkedAllocaCmd}); + if (ConnCmd) + ToEnqueue.push_back(ConnCmd); LinkedAllocaCmd->addUser(AllocaCmd); LinkedAllocaCmd->MLinkedAllocaCmd = AllocaCmd; // To ensure that the leader allocation is removed first - AllocaCmd->getReleaseCmd()->addDep( + ConnCmd = AllocaCmd->getReleaseCmd()->addDep( DepDesc(LinkedAllocaCmd->getReleaseCmd(), AllocaCmd->getRequirement(), LinkedAllocaCmd)); + if (ConnCmd) + ToEnqueue.push_back(ConnCmd); // Device allocation takes ownership of the host ptr during // construction, host allocation doesn't. So, device allocation should @@ -640,11 +674,14 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( std::set Deps = findDepsForReq(Record, Req, Queue->getContextImplPtr()); for (Command *Dep : Deps) { - AllocaCmd->addDep(DepDesc{Dep, Req, LinkedAllocaCmd}); + Command *ConnCmd = + AllocaCmd->addDep(DepDesc{Dep, Req, LinkedAllocaCmd}); + if (ConnCmd) + ToEnqueue.push_back(ConnCmd); Dep->addUser(AllocaCmd); } updateLeaves(Deps, Record, Req->MAccessMode); - addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode); + addNodeToLeaves(Record, AllocaCmd, Req->MAccessMode, ToEnqueue); } } } @@ -677,7 +714,8 @@ typename std::enable_if< EmptyCommand *>::type Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector &Reqs, const QueueImplPtr &Queue, - Command::BlockReason Reason) { + Command::BlockReason Reason, + std::vector &ToEnqueue) { EmptyCommand *EmptyCmd = new EmptyCommand(Scheduler::getInstance().getDefaultHostQueue()); @@ -690,7 +728,8 @@ Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector &Reqs, for (T *Req : Reqs) { MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req); - AllocaCommandBase *AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue); + AllocaCommandBase *AllocaCmd = + getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); EmptyCmd->addRequirement(Cmd, AllocaCmd, Req); } @@ -702,7 +741,7 @@ Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector &Reqs, MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); updateLeaves({Cmd}, Record, Req->MAccessMode); - addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode); + addNodeToLeaves(Record, EmptyCmd, Req->MAccessMode, ToEnqueue); } return EmptyCmd; @@ -720,7 +759,8 @@ static bool isInteropHostTask(const std::unique_ptr &Cmd) { Command * Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, - QueueImplPtr Queue) { + QueueImplPtr Queue, + std::vector &ToEnqueue) { const std::vector &Reqs = CommandGroup->MRequirements; const std::vector &Events = CommandGroup->MEvents; const CG::CGTYPE CGType = CommandGroup->getType(); @@ -748,7 +788,8 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, Record = getOrInsertMemObjRecord(QueueForAlloca, Req); markModifiedIfWrite(Record, Req); - AllocaCmd = getOrCreateAllocaForReq(Record, Req, QueueForAlloca); + AllocaCmd = + getOrCreateAllocaForReq(Record, Req, QueueForAlloca, ToEnqueue); isSameCtx = sameCtx(QueueForAlloca->getContextImplPtr(), Record->MCurContext); @@ -761,7 +802,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, // required access mode is valid, remap if not. if (Record->MCurContext->is_host() && !isAccessModeAllowed(Req->MAccessMode, Record->MHostAccess)) - remapMemoryObject(Record, Req, AllocaCmd); + remapMemoryObject(Record, Req, AllocaCmd, ToEnqueue); } else { // Cannot directly copy memory from OpenCL device to OpenCL device - // create two copies: device->host and host->device. @@ -781,14 +822,16 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, if (NeedMemMoveToHost) insertMemoryMove(Record, Req, - Scheduler::getInstance().getDefaultHostQueue()); - insertMemoryMove(Record, Req, MemMoveTargetQueue); + Scheduler::getInstance().getDefaultHostQueue(), + ToEnqueue); + insertMemoryMove(Record, Req, MemMoveTargetQueue, ToEnqueue); } std::set Deps = findDepsForReq(Record, Req, Queue->getContextImplPtr()); for (Command *Dep : Deps) - NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd}); + if (Command *ConnCmd = NewCmd->addDep(DepDesc{Dep, Req, AllocaCmd})) + ToEnqueue.push_back(ConnCmd); } // Set new command as user for dependencies and update leaves. @@ -801,17 +844,19 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, const Requirement *Req = Dep.MDepRequirement; MemObjRecord *Record = getMemObjRecord(Req->MSYCLMemObj); updateLeaves({Dep.MDepCommand}, Record, Req->MAccessMode); - addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode); + addNodeToLeaves(Record, NewCmd.get(), Req->MAccessMode, ToEnqueue); } // Register all the events as dependencies for (detail::EventImplPtr e : Events) { - NewCmd->addDep(e); + if (Command *ConnCmd = NewCmd->addDep(e)) + ToEnqueue.push_back(ConnCmd); } if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) NewCmd->MEmptyCmd = addEmptyCmd(NewCmd.get(), NewCmd->getCG().MRequirements, - Queue, Command::BlockReason::HostTask); + Queue, Command::BlockReason::HostTask, + ToEnqueue); if (MPrintOptionsArray[AfterAddCG]) printGraphAsDot("after_addCG"); @@ -981,9 +1026,9 @@ void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) { // requirement in Dep we make ConnectCmd depend on DepEvent's command with this // requirement. // Optionality of Dep is set by Dep.MDepCommand equal to nullptr. -void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, - EventImplPtr DepEvent, - const DepDesc &Dep) { +Command *Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, + EventImplPtr DepEvent, + const DepDesc &Dep) { assert(Cmd->getContext() != DepEvent->getContextImpl()); // construct Host Task type command manually and make it depend on DepEvent @@ -1011,6 +1056,8 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, if (Dep.MDepRequirement) { // make ConnectCmd depend on requirement + // Dismiss the result here as it's not a connection now, + // 'cause ConnectCmd is host one ConnectCmd->addDep(Dep); assert(reinterpret_cast(DepEvent->getCommand()) == Dep.MDepCommand); @@ -1019,12 +1066,16 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, MemObjRecord *Record = getMemObjRecord(Dep.MDepRequirement->MSYCLMemObj); updateLeaves({Dep.MDepCommand}, Record, Dep.MDepRequirement->MAccessMode); - addNodeToLeaves(Record, ConnectCmd, Dep.MDepRequirement->MAccessMode); + std::vector ToEnqueue; + addNodeToLeaves(Record, ConnectCmd, Dep.MDepRequirement->MAccessMode, + ToEnqueue); + assert(ToEnqueue.size() == 0); const std::vector Reqs(1, Dep.MDepRequirement); EmptyCmd = addEmptyCmd(ConnectCmd, Reqs, Scheduler::getInstance().getDefaultHostQueue(), - Command::BlockReason::HostTask); + Command::BlockReason::HostTask, ToEnqueue); + assert(ToEnqueue.size() == 0); // Dependencies for EmptyCmd are set in addEmptyCmd for provided Reqs. // Depend Cmd on empty command @@ -1032,19 +1083,27 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, DepDesc CmdDep = Dep; CmdDep.MDepCommand = EmptyCmd; + // Dismiss the result here as it's not a connection now, + // 'cause EmptyCmd is host one Cmd->addDep(CmdDep); } } else { + std::vector ToEnqueue; EmptyCmd = addEmptyCmd( ConnectCmd, {}, Scheduler::getInstance().getDefaultHostQueue(), - Command::BlockReason::HostTask); + Command::BlockReason::HostTask, ToEnqueue); + assert(ToEnqueue.size() == 0); // There is no requirement thus, empty command will only depend on // ConnectCmd via its event. + // Dismiss the result here as it's not a connection now, + // 'cause ConnectCmd is host one. EmptyCmd->addDep(ConnectCmd->getEvent()); ConnectCmd->addDep(DepEvent); // Depend Cmd on empty command + // Dismiss the result here as it's not a connection now, + // 'cause EmptyCmd is host one Cmd->addDep(EmptyCmd->getEvent()); } @@ -1052,6 +1111,8 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, ConnectCmd->MEmptyCmd = EmptyCmd; + return ConnectCmd; +#if 0 // FIXME graph builder shouldn't really enqueue commands. We're in the middle // of enqueue process for some command Cmd. We're going to add a dependency // for it. Need some nice and cute solution to enqueue ConnectCmd via standard @@ -1062,6 +1123,7 @@ void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Failed to enqueue a sync event between two contexts", PI_INVALID_OPERATION); +#endif } } // namespace detail diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp index 7b9f10efef295..25c4916dc1eb7 100644 --- a/sycl/source/detail/scheduler/graph_processor.cpp +++ b/sycl/source/detail/scheduler/graph_processor.cpp @@ -36,7 +36,8 @@ Scheduler::GraphProcessor::getWaitList(EventImplPtr Event) { return Result; } -void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event) { +void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event, + ReadLockT &GraphReadLock) { Command *Cmd = getCommand(Event); // Command can be nullptr if user creates cl::sycl::event explicitly or the // event has been waited on by another thread @@ -44,16 +45,19 @@ void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event) { return; EnqueueResultT Res; - bool Enqueued = enqueueCommand(Cmd, Res, BLOCKING); + bool Enqueued = enqueueCommand(Cmd, Res, GraphReadLock, BLOCKING); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) // TODO: Reschedule commands. throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); + GraphReadLock.unlock(); + Cmd->getEvent()->waitInternal(); } bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, EnqueueResultT &EnqueueResult, + ReadLockT &GraphReadLock, BlockingT Blocking) { if (!Cmd || Cmd->isSuccessfullyEnqueued()) return true; @@ -63,7 +67,7 @@ bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, for (DepDesc &Dep : Cmd->MDeps) { const bool Enqueued = - enqueueCommand(Dep.MDepCommand, EnqueueResult, Blocking); + enqueueCommand(Dep.MDepCommand, EnqueueResult, GraphReadLock, Blocking); if (!Enqueued) switch (EnqueueResult.MResult) { case EnqueueResultT::SyclEnqueueFailed: @@ -85,7 +89,13 @@ bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, if (BlockedByDep) return false; - return Cmd->enqueue(EnqueueResult, Blocking); + { + GraphReadLock.unlock(); + bool Result = Cmd->enqueue(EnqueueResult, Blocking); + GraphReadLock.lock(); + + return Result; + } } } // namespace detail diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index e4766d734d2be..a287eaf17207e 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -20,36 +20,38 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { -void Scheduler::waitForRecordToFinish(MemObjRecord *Record) { +void Scheduler::waitForRecordToFinish(MemObjRecord *Record, + ReadLockT &GraphReadLock) { #ifdef XPTI_ENABLE_INSTRUMENTATION // Will contain the list of dependencies for the Release Command std::set DepCommands; #endif for (Command *Cmd : Record->MReadLeaves) { EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, GraphReadLock); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); #ifdef XPTI_ENABLE_INSTRUMENTATION // Capture the dependencies DepCommands.insert(Cmd); #endif - GraphProcessor::waitForEvent(Cmd->getEvent()); + GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock); } for (Command *Cmd : Record->MWriteLeaves) { EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, GraphReadLock); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); #ifdef XPTI_ENABLE_INSTRUMENTATION DepCommands.insert(Cmd); #endif - GraphProcessor::waitForEvent(Cmd->getEvent()); + GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock); } for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { Command *ReleaseCmd = AllocaCmd->getReleaseCmd(); EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, Res); + bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, Res, + GraphReadLock); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -57,7 +59,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record) { // reported as edges ReleaseCmd->resolveReleaseDependencies(DepCommands); #endif - GraphProcessor::waitForEvent(ReleaseCmd->getEvent()); + GraphProcessor::waitForEvent(ReleaseCmd->getEvent(), GraphReadLock); } } @@ -65,29 +67,40 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, QueueImplPtr Queue) { Command *NewCmd = nullptr; const bool IsKernel = CommandGroup->getType() == CG::KERNEL; + std::vector AuxiliaryCmds; { - std::unique_lock Lock(MGraphLock, std::defer_lock); - lockSharedTimedMutex(Lock); + WriteLockT Lock(MGraphLock, std::defer_lock); + acquireWriteLock(Lock); switch (CommandGroup->getType()) { case CG::UPDATE_HOST: NewCmd = MGraphBuilder.addCGUpdateHost(std::move(CommandGroup), - DefaultHostQueue); + DefaultHostQueue, AuxiliaryCmds); break; case CG::CODEPLAY_HOST_TASK: - NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), DefaultHostQueue); + NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), DefaultHostQueue, + AuxiliaryCmds); break; default: - NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue)); + NewCmd = MGraphBuilder.addCG(std::move(CommandGroup), std::move(Queue), + AuxiliaryCmds); } } { - std::shared_lock Lock(MGraphLock); + ReadLockT Lock(MGraphLock); - // TODO: Check if lazy mode. EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res); + bool Enqueued; + + for (Command *Cmd : AuxiliaryCmds) { + Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, Lock); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); + } + + // TODO: Check if lazy mode. + Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res, Lock); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); } @@ -99,17 +112,30 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, } EventImplPtr Scheduler::addCopyBack(Requirement *Req) { - std::unique_lock Lock(MGraphLock, std::defer_lock); - lockSharedTimedMutex(Lock); - Command *NewCmd = MGraphBuilder.addCopyBack(Req); - // Command was not creted because there were no operations with - // buffer. - if (!NewCmd) - return nullptr; + std::vector AuxiliaryCmds; + Command *NewCmd = nullptr; + { + WriteLockT Lock(MGraphLock, std::defer_lock); + acquireWriteLock(Lock); + NewCmd = MGraphBuilder.addCopyBack(Req, AuxiliaryCmds); + // Command was not creted because there were no operations with + // buffer. + if (!NewCmd) + return nullptr; + } try { + ReadLockT Lock(MGraphLock); EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res); + bool Enqueued; + + for (Command *Cmd : AuxiliaryCmds) { + Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, Lock); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); + } + + Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res, Lock); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); } catch (...) { @@ -132,20 +158,20 @@ Scheduler Scheduler::instance; Scheduler &Scheduler::getInstance() { return instance; } std::vector Scheduler::getWaitList(EventImplPtr Event) { - std::shared_lock Lock(MGraphLock); + ReadLockT Lock(MGraphLock); return GraphProcessor::getWaitList(std::move(Event)); } void Scheduler::waitForEvent(EventImplPtr Event) { - std::shared_lock Lock(MGraphLock); - GraphProcessor::waitForEvent(std::move(Event)); + ReadLockT Lock(MGraphLock); + GraphProcessor::waitForEvent(std::move(Event), Lock); } void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { // Avoiding deadlock situation, where one thread is in the process of // enqueueing (with a locked mutex) a currently blocked task that waits for // another thread which is stuck at attempting cleanup. - std::unique_lock Lock(MGraphLock, std::try_to_lock); + WriteLockT Lock(MGraphLock, std::try_to_lock); if (Lock.owns_lock()) { Command *FinishedCmd = static_cast(FinishedEvent->getCommand()); // The command might have been cleaned up (and set to nullptr) by another @@ -157,10 +183,10 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) { void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { MemObjRecord *Record = nullptr; - std::unique_lock Lock(MGraphLock, std::defer_lock); + WriteLockT Lock(MGraphLock, std::defer_lock); { - lockSharedTimedMutex(Lock); + acquireWriteLock(Lock); Record = MGraphBuilder.getMemObjRecord(MemObj); if (!Record) @@ -173,12 +199,12 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { { // This only needs a shared mutex as it only involves enqueueing and // awaiting for events - std::shared_lock Lock(MGraphLock); - waitForRecordToFinish(Record); + ReadLockT Lock(MGraphLock); + waitForRecordToFinish(Record, Lock); } { - lockSharedTimedMutex(Lock); + acquireWriteLock(Lock); MGraphBuilder.decrementLeafCountersForRecord(Record); MGraphBuilder.cleanupCommandsForRecord(Record); MGraphBuilder.removeRecordForMemObj(MemObj); @@ -186,39 +212,58 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { } EventImplPtr Scheduler::addHostAccessor(Requirement *Req) { - std::unique_lock Lock(MGraphLock, std::defer_lock); - lockSharedTimedMutex(Lock); + std::vector AuxiliaryCmds; + Command *NewCmd = nullptr; + + { + WriteLockT Lock(MGraphLock, std::defer_lock); + acquireWriteLock(Lock); - Command *NewCmd = MGraphBuilder.addHostAccessor(Req); + NewCmd = MGraphBuilder.addHostAccessor(Req, AuxiliaryCmds); + } if (!NewCmd) return nullptr; - EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); + + { + ReadLockT ReadLock(MGraphLock); + EnqueueResultT Res; + bool Enqueued; + + for (Command *Cmd : AuxiliaryCmds) { + Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ReadLock); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); + } + + Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res, ReadLock); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); + } + return NewCmd->getEvent(); } void Scheduler::releaseHostAccessor(Requirement *Req) { Command *const BlockedCmd = Req->MBlockedCmd; - std::shared_lock Lock(MGraphLock); + ReadLockT Lock(MGraphLock); assert(BlockedCmd && "Can't find appropriate command to unblock"); BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - enqueueLeavesOfReqUnlocked(Req); + enqueueLeavesOfReqUnlocked(Req, Lock); } // static -void Scheduler::enqueueLeavesOfReqUnlocked(const Requirement *const Req) { +void Scheduler::enqueueLeavesOfReqUnlocked(const Requirement *const Req, + ReadLockT &GraphReadLock) { MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get(); - auto EnqueueLeaves = [](CircularBuffer &Leaves) { + auto EnqueueLeaves = [&GraphReadLock](CircularBuffer &Leaves) { for (Command *Cmd : Leaves) { EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, GraphReadLock); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); } @@ -234,8 +279,7 @@ Scheduler::Scheduler() { /*PropList=*/{})); } -void Scheduler::lockSharedTimedMutex( - std::unique_lock &Lock) { +void Scheduler::acquireWriteLock(WriteLockT &Lock) { #ifdef _WIN32 // Avoiding deadlock situation for MSVC. std::shared_timed_mutex specification // does not specify a priority for shared and exclusive accesses. It will be a diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index aaaf40e178f54..5925e4dca2276 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -430,17 +430,24 @@ class Scheduler { QueueImplPtr getDefaultHostQueue() { return DefaultHostQueue; } protected: + // TODO: after switching to C++17, change std::shared_timed_mutex to + // std::shared_mutex + using RWLockT = std::shared_timed_mutex; + using ReadLockT = std::shared_lock; + using WriteLockT = std::unique_lock; + Scheduler(); static Scheduler instance; /// Provides exclusive access to std::shared_timed_mutex object with deadlock /// avoidance /// - /// \param Lock is an instance of std::unique_lock + /// \param Lock is an instance of WriteLockT /// class - void lockSharedTimedMutex(std::unique_lock &Lock); + void acquireWriteLock(WriteLockT &Lock); - static void enqueueLeavesOfReqUnlocked(const Requirement *const Req); + static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, + ReadLockT &GraphReadLock); /// Graph builder class. /// @@ -458,24 +465,26 @@ class Scheduler { /// /// \return a command that represents command group execution. Command *addCG(std::unique_ptr CommandGroup, - QueueImplPtr Queue); + QueueImplPtr Queue, std::vector &ToEnqueue); /// Registers a \ref CG "command group" that updates host memory to the /// latest state. /// /// \return a command that represents command group execution. Command *addCGUpdateHost(std::unique_ptr CommandGroup, - QueueImplPtr HostQueue); + QueueImplPtr HostQueue, + std::vector &ToEnqueue); /// Enqueues a command to update memory to the latest state. /// /// \param Req is a requirement, that describes memory object. - Command *addCopyBack(Requirement *Req); + Command *addCopyBack(Requirement *Req, std::vector &ToEnqueue); /// Enqueues a command to create a host accessor. /// /// \param Req points to memory being accessed. - Command *addHostAccessor(Requirement *Req); + Command *addHostAccessor(Requirement *Req, + std::vector &ToEnqueue); /// [Provisional] Optimizes the whole graph. void optimize(); @@ -516,7 +525,8 @@ class Scheduler { /// Adds new command to leaves if needed. void addNodeToLeaves(MemObjRecord *Record, Command *Cmd, - access::mode AccessMode); + access::mode AccessMode, + std::vector &ToEnqueue); /// Removes commands from leaves. void updateLeaves(const std::set &Cmds, MemObjRecord *Record, @@ -526,10 +536,11 @@ class Scheduler { /// \param Cmd dependant command /// \param DepEvent event to depend on /// \param Dep optional DepDesc to perform connection properly + /// \returns the connecting command which is to be enqueued /// /// Optionality of Dep is set by Dep.MDepCommand equal to nullptr. - void connectDepEvent(Command *const Cmd, EventImplPtr DepEvent, - const DepDesc &Dep); + Command *connectDepEvent(Command *const Cmd, EventImplPtr DepEvent, + const DepDesc &Dep); std::vector MMemObjs; @@ -544,16 +555,19 @@ class Scheduler { /// \param Req is a Requirement describing destination. /// \param Queue is a queue that is bound to target context. Command *insertMemoryMove(MemObjRecord *Record, Requirement *Req, - const QueueImplPtr &Queue); + const QueueImplPtr &Queue, + std::vector &ToEnqueue); // Inserts commands required to remap the memory object to its current host // context so that the required access mode becomes valid. Command *remapMemoryObject(MemObjRecord *Record, Requirement *Req, - AllocaCommandBase *HostAllocaCmd); + AllocaCommandBase *HostAllocaCmd, + std::vector &ToEnqueue); UpdateHostRequirementCommand * insertUpdateHostReqCmd(MemObjRecord *Record, Requirement *Req, - const QueueImplPtr &Queue); + const QueueImplPtr &Queue, + std::vector &ToEnqueue); /// Finds dependencies for the requirement. std::set findDepsForReq(MemObjRecord *Record, @@ -565,7 +579,8 @@ class Scheduler { std::is_same::type, Requirement>::value, EmptyCommand *>::type addEmptyCmd(Command *Cmd, const std::vector &Req, - const QueueImplPtr &Queue, Command::BlockReason Reason); + const QueueImplPtr &Queue, Command::BlockReason Reason, + std::vector &ToEnqueue); protected: /// Finds a command dependency corresponding to the record. @@ -584,9 +599,9 @@ class Scheduler { /// Searches for suitable alloca in memory record. /// /// If none found, creates new one. - AllocaCommandBase *getOrCreateAllocaForReq(MemObjRecord *Record, - const Requirement *Req, - QueueImplPtr Queue); + AllocaCommandBase *getOrCreateAllocaForReq( + MemObjRecord *Record, const Requirement *Req, QueueImplPtr Queue, + std::vector &ToEnqueue); void markModifiedIfWrite(MemObjRecord *Record, Requirement *Req); @@ -686,22 +701,24 @@ class Scheduler { static std::vector getWaitList(EventImplPtr Event); /// Waits for the command, associated with Event passed, is completed. - static void waitForEvent(EventImplPtr Event); + /// \param GraphReadLock read-lock which is already acquired for reading + static void waitForEvent(EventImplPtr Event, + ReadLockT &GraphReadLock); /// Enqueues the command and all its dependencies. /// /// \param EnqueueResult is set to specific status if enqueue failed. + /// \param GraphReadLock read-lock which is already acquired for reading /// \return true if the command is successfully enqueued. static bool enqueueCommand(Command *Cmd, EnqueueResultT &EnqueueResult, + ReadLockT &GraphReadLock, BlockingT Blocking = NON_BLOCKING); }; - void waitForRecordToFinish(MemObjRecord *Record); + void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock); GraphBuilder MGraphBuilder; - // TODO: after switching to C++17, change std::shared_timed_mutex to - // std::shared_mutex - std::shared_timed_mutex MGraphLock; + RWLockT MGraphLock; QueueImplPtr DefaultHostQueue; From da0402c2aac3736f171894582a954cadabf5663a Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 27 Aug 2020 15:23:16 +0300 Subject: [PATCH 02/19] [SYCL] Fix merge glitch Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 2e0d428034bf1..48887d5003f69 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -107,7 +107,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, if (NewCmd) { // TODO: Check if lazy mode. EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res); + bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res, Lock); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); From 6358b17c62e690647fc6aaec55698bf683c6239d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Wed, 14 Oct 2020 18:11:58 +0300 Subject: [PATCH 03/19] Fix merge glitch Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_builder.cpp | 6 +++--- sycl/source/detail/scheduler/leaves_collection.cpp | 8 +++++--- sycl/source/detail/scheduler/leaves_collection.hpp | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 51b194d5e6ebd..cbb270b6e010c 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -167,13 +167,13 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, const size_t LeafLimit = 8; LeavesCollection::AllocateDependencyF AllocateDependency = [this](Command *Dependant, Command *Dependency, MemObjRecord *Record, - std::vector *ToEnqueue) { + LeavesCollection::EnqueueListT *ToEnqueue) { // Add the old leaf as a dependency for the new one by duplicating one // of the requirements for the current record DepDesc Dep = findDepForRecord(Dependant, Record); Dep.MDepCommand = Dependency; if (Command *ConnectionCmd = Dependant->addDep(Dep)) - ToEnqueue.push_back(ConnectionCmd); + ToEnqueue->push_back(ConnectionCmd); Dependency->addUser(Dependant); --(Dependency->MLeafCounter); }; @@ -682,7 +682,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( } Record->MAllocaCommands.push_back(AllocaCmd); - Record->MWriteLeaves.push_back(AllocaCmd); + Record->MWriteLeaves.push_back(AllocaCmd, &ToEnqueue); ++(AllocaCmd->MLeafCounter); } return AllocaCmd; diff --git a/sycl/source/detail/scheduler/leaves_collection.cpp b/sycl/source/detail/scheduler/leaves_collection.cpp index 9628e61cb43e1..b847bb425d106 100644 --- a/sycl/source/detail/scheduler/leaves_collection.cpp +++ b/sycl/source/detail/scheduler/leaves_collection.cpp @@ -53,7 +53,8 @@ bool LeavesCollection::push_back(value_type Cmd, EnqueueListT *ToEnqueue) { bool Result = false; if (isHostAccessorCmd(Cmd)) - Result = addHostAccessorCommand(static_cast(Cmd)); + Result = + addHostAccessorCommand(static_cast(Cmd), ToEnqueue); else Result = addGenericCommand(Cmd, ToEnqueue); @@ -72,7 +73,8 @@ std::vector LeavesCollection::toVector() const { return Result; } -bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd) { +bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd, + EnqueueListT *ToEnqueue) { // 1. find the oldest command with doOverlap() = true amongst the List // => OldCmd HostAccessorCommandSingleXRefT OldCmdIt; @@ -97,7 +99,7 @@ bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd) { // when circular buffer is full. if (OldCmdIt != MHostAccessorCommands.end()) { // allocate dependency - MAllocateDependency(Cmd, *OldCmdIt, MRecord); + MAllocateDependency(Cmd, *OldCmdIt, MRecord, ToEnqueue); // erase the old cmd as it's tracked via dependency now eraseHostAccessorCommand(static_cast(*OldCmdIt)); diff --git a/sycl/source/detail/scheduler/leaves_collection.hpp b/sycl/source/detail/scheduler/leaves_collection.hpp index d694975751ef7..bf1308119437b 100644 --- a/sycl/source/detail/scheduler/leaves_collection.hpp +++ b/sycl/source/detail/scheduler/leaves_collection.hpp @@ -127,7 +127,7 @@ class LeavesCollection { AllocateDependencyF MAllocateDependency; bool addGenericCommand(value_type Cmd, EnqueueListT *ToEnqueue); - bool addHostAccessorCommand(EmptyCommand *Cmd); + bool addHostAccessorCommand(EmptyCommand *Cmd, EnqueueListT *ToEnqueue); // inserts a command to the end of list for its mem object void insertHostAccessorCommand(EmptyCommand *Cmd); From fbb2246763118e9e0d1a0a73e27a4273d91feee8 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 15 Oct 2020 15:26:14 +0300 Subject: [PATCH 04/19] Fix merge glitch Signed-off-by: Sergey Kanaev --- sycl/unittests/scheduler/BlockedCommands.cpp | 22 +++++++++++++------ sycl/unittests/scheduler/FailedCommands.cpp | 4 +++- .../scheduler/FinishedCmdCleanup.cpp | 6 +++-- sycl/unittests/scheduler/LeafLimit.cpp | 3 ++- sycl/unittests/scheduler/LeavesCollection.cpp | 18 ++++++++++----- .../scheduler/LinkedAllocaDependencies.cpp | 8 ++++--- .../scheduler/SchedulerTestUtils.hpp | 21 +++++++++++++----- 7 files changed, 58 insertions(+), 24 deletions(-) diff --git a/sycl/unittests/scheduler/BlockedCommands.cpp b/sycl/unittests/scheduler/BlockedCommands.cpp index c447a8f1e606b..35db395333001 100644 --- a/sycl/unittests/scheduler/BlockedCommands.cpp +++ b/sycl/unittests/scheduler/BlockedCommands.cpp @@ -19,9 +19,11 @@ TEST_F(SchedulerTest, BlockedCommands) { MockCmd.MIsBlockable = true; MockCmd.MRetVal = CL_DEVICE_PARTITION_EQUALLY; + MockScheduler MS; + auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; bool Enqueued = - MockScheduler::enqueueCommand(&MockCmd, Res, detail::NON_BLOCKING); + MockScheduler::enqueueCommand(&MockCmd, Res, Lock, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED\n"; @@ -30,7 +32,7 @@ TEST_F(SchedulerTest, BlockedCommands) { Res.MResult = detail::EnqueueResultT::SyclEnqueueSuccess; MockCmd.MRetVal = CL_DEVICE_PARTITION_EQUALLY; - Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, detail::BLOCKING); + Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, Lock, detail::BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueFailed, Res.MResult) << "The command is expected to fail to enqueue.\n"; @@ -41,7 +43,7 @@ TEST_F(SchedulerTest, BlockedCommands) { Res = detail::EnqueueResultT{}; MockCmd.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; MockCmd.MRetVal = CL_SUCCESS; - Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, detail::BLOCKING); + Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, Lock, detail::BLOCKING); ASSERT_TRUE(Enqueued && Res.MResult == detail::EnqueueResultT::SyclEnqueueSuccess) << "The command is expected to be successfully enqueued.\n"; @@ -84,8 +86,10 @@ TEST_F(SchedulerTest, DontEnqueueDepsIfOneOfThemIsBlocked) { EXPECT_CALL(C, enqueue(_, _)).Times(0); EXPECT_CALL(D, enqueue(_, _)).Times(0); + MockScheduler MS; + auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; - bool Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); + bool Enqueued = MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED.\n"; @@ -112,8 +116,10 @@ TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) { EXPECT_CALL(A, enqueue(_, _)).Times(0); EXPECT_CALL(B, enqueue(_, _)).Times(0); + MockScheduler MS; + auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; - bool Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); + bool Enqueued = MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED.\n"; @@ -124,7 +130,7 @@ TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) { EXPECT_CALL(A, enqueue(_, _)).Times(0); EXPECT_CALL(B, enqueue(_, _)).Times(1); - Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::BLOCKING); + Enqueued = MockScheduler::enqueueCommand(&A, Res, Lock, detail::BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueFailed, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED.\n"; @@ -163,8 +169,10 @@ TEST_F(SchedulerTest, EnqueueHostDependency) { EXPECT_CALL(A, enqueue(_, _)).Times(1); EXPECT_CALL(B, enqueue(_, _)).Times(1); + MockScheduler MS; + auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; - bool Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); + bool Enqueued = MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); ASSERT_TRUE(Enqueued) << "The command should be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueSuccess, Res.MResult) << "Enqueue operation should return successfully.\n"; diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 02c9f9ba1d865..1e0b9f2c3b4d6 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -20,9 +20,11 @@ TEST_F(SchedulerTest, FailedDependency) { MUser.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; MDep.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueFailed; + MockScheduler MS; + auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; bool Enqueued = - MockScheduler::enqueueCommand(&MUser, Res, detail::NON_BLOCKING); + MockScheduler::enqueueCommand(&MUser, Res, Lock, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Enqueue process must fail\n"; ASSERT_EQ(Res.MCmd, &MDep) << "Wrong failed command\n"; diff --git a/sycl/unittests/scheduler/FinishedCmdCleanup.cpp b/sycl/unittests/scheduler/FinishedCmdCleanup.cpp index abafde6cd2c5a..d82005d30f463 100644 --- a/sycl/unittests/scheduler/FinishedCmdCleanup.cpp +++ b/sycl/unittests/scheduler/FinishedCmdCleanup.cpp @@ -59,13 +59,15 @@ TEST_F(SchedulerTest, FinishedCmdCleanup) { detail::getSyclObjImpl(MQueue), MockReqA, Callback); addEdge(InnerC, &AllocaA, &AllocaA); + std::vector ToEnqueue; + MockCommand LeafB{detail::getSyclObjImpl(MQueue), MockReqB}; addEdge(&LeafB, &AllocaB, &AllocaB); - MS.addNodeToLeaves(RecC, &LeafB); + MS.addNodeToLeaves(RecC, &LeafB, access::mode::read, ToEnqueue); MockCommand LeafA{detail::getSyclObjImpl(MQueue), MockReqA}; addEdge(&LeafA, InnerC, &AllocaA); - MS.addNodeToLeaves(RecC, &LeafA); + MS.addNodeToLeaves(RecC, &LeafA, access::mode::read, ToEnqueue); MockCommand *InnerB = new MockCommandWithCallback( detail::getSyclObjImpl(MQueue), MockReqB, Callback); diff --git a/sycl/unittests/scheduler/LeafLimit.cpp b/sycl/unittests/scheduler/LeafLimit.cpp index d840099a0048d..ab59ce2d33b9a 100644 --- a/sycl/unittests/scheduler/LeafLimit.cpp +++ b/sycl/unittests/scheduler/LeafLimit.cpp @@ -44,9 +44,10 @@ TEST_F(SchedulerTest, LeafLimit) { Leaf->addDep( detail::DepDesc{MockDepCmd.get(), Leaf->getRequirement(), nullptr}); } + std::vector ToEnqueue; // Add edges as leaves and exceed the leaf limit for (auto &LeafPtr : LeavesToAdd) { - MS.addNodeToLeaves(Rec, LeafPtr.get()); + MS.addNodeToLeaves(Rec, LeafPtr.get(), access::mode::read, ToEnqueue); } // Check that the oldest leaf has been removed from the leaf list // and added as a dependency of the newest one instead diff --git a/sycl/unittests/scheduler/LeavesCollection.cpp b/sycl/unittests/scheduler/LeavesCollection.cpp index ee5a3952cb0c6..a339a5dce424f 100644 --- a/sycl/unittests/scheduler/LeavesCollection.cpp +++ b/sycl/unittests/scheduler/LeavesCollection.cpp @@ -51,8 +51,13 @@ TEST_F(LeavesCollectionTest, PushBack) { size_t TimesGenericWasFull; + std::vector ToEnqueue; + LeavesCollection::AllocateDependencyF AllocateDependency = - [&](Command *, Command *, MemObjRecord *) { ++TimesGenericWasFull; }; + [&](Command *, Command *, MemObjRecord *, + std::vector *) { + ++TimesGenericWasFull; + }; // add only generic commands { @@ -65,7 +70,7 @@ TEST_F(LeavesCollectionTest, PushBack) { for (size_t Idx = 0; Idx < GenericCmdsCapacity * 2; ++Idx) { Cmds.push_back(createGenericCommand(getSyclObjImpl(MQueue))); - LE.push_back(Cmds.back().get()); + LE.push_back(Cmds.back().get(), &ToEnqueue); } ASSERT_EQ(TimesGenericWasFull, GenericCmdsCapacity) @@ -95,7 +100,7 @@ TEST_F(LeavesCollectionTest, PushBack) { : createEmptyCommand(getSyclObjImpl(MQueue), MockReq); Cmds.push_back(Cmd); - LE.push_back(Cmds.back().get()); + LE.push_back(Cmds.back().get(), &ToEnqueue); } ASSERT_EQ(TimesGenericWasFull, GenericCmdsCapacity) @@ -112,8 +117,11 @@ TEST_F(LeavesCollectionTest, PushBack) { TEST_F(LeavesCollectionTest, Remove) { static constexpr size_t GenericCmdsCapacity = 8; + std::vector ToEnqueue; + LeavesCollection::AllocateDependencyF AllocateDependency = - [](Command *, Command *Old, MemObjRecord *) { --Old->MLeafCounter; }; + [](Command *, Command *Old, MemObjRecord *, + std::vector *) { --Old->MLeafCounter; }; { cl::sycl::buffer Buf(cl::sycl::range<1>(1)); @@ -129,7 +137,7 @@ TEST_F(LeavesCollectionTest, Remove) { : createEmptyCommand(getSyclObjImpl(MQueue), MockReq); Cmds.push_back(Cmd); - if (LE.push_back(Cmds.back().get())) + if (LE.push_back(Cmds.back().get(), &ToEnqueue)) ++Cmd->MLeafCounter; } diff --git a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp index ef590676212ff..49a904fba0e48 100644 --- a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp +++ b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp @@ -55,7 +55,8 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) { /*PropList=*/{})); auto AllocaDep = [](cl::sycl::detail::Command *, cl::sycl::detail::Command *, - cl::sycl::detail::MemObjRecord *) {}; + cl::sycl::detail::MemObjRecord *, + std::vector *) {}; std::shared_ptr Record{ new cl::sycl::detail::MemObjRecord(DefaultHostQueue->getContextImplPtr(), @@ -71,11 +72,12 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) { MockCommand DepDepCmd(DefaultHostQueue, Req); DepCmd.MDeps.push_back({&DepDepCmd, DepDepCmd.getRequirement(), &AllocaCmd1}); DepDepCmd.MUsers.insert(&DepCmd); - Record->MWriteLeaves.push_back(&DepCmd); + std::vector ToEnqueue; + Record->MWriteLeaves.push_back(&DepCmd, &ToEnqueue); MockScheduler MS; cl::sycl::detail::Command *AllocaCmd2 = - MS.getOrCreateAllocaForReq(Record.get(), &Req, Q1); + MS.getOrCreateAllocaForReq(Record.get(), &Req, Q1, ToEnqueue); ASSERT_TRUE(!!AllocaCmd1.MLinkedAllocaCmd) << "No link appeared in existing command"; diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index e5348f401236c..d5934e788d823 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -20,6 +20,10 @@ cl::sycl::detail::Requirement getMockRequirement(); +namespace cl { namespace sycl { namespace detail { + class Command; +}}} + class MockCommand : public cl::sycl::detail::Command { public: MockCommand(cl::sycl::detail::QueueImplPtr Queue, @@ -103,21 +107,28 @@ class MockScheduler : public cl::sycl::detail::Scheduler { void addNodeToLeaves( cl::sycl::detail::MemObjRecord *Rec, cl::sycl::detail::Command *Cmd, - cl::sycl::access::mode Mode = cl::sycl::access::mode::read_write) { - return MGraphBuilder.addNodeToLeaves(Rec, Cmd, Mode); + cl::sycl::access::mode Mode, + std::vector &ToEnqueue) { + return MGraphBuilder.addNodeToLeaves(Rec, Cmd, Mode, ToEnqueue); } static bool enqueueCommand(cl::sycl::detail::Command *Cmd, cl::sycl::detail::EnqueueResultT &EnqueueResult, + ReadLockT &GraphReadLock, cl::sycl::detail::BlockingT Blocking) { - return GraphProcessor::enqueueCommand(Cmd, EnqueueResult, Blocking); + return GraphProcessor::enqueueCommand(Cmd, EnqueueResult, GraphReadLock, Blocking); } cl::sycl::detail::AllocaCommandBase * getOrCreateAllocaForReq(cl::sycl::detail::MemObjRecord *Record, const cl::sycl::detail::Requirement *Req, - cl::sycl::detail::QueueImplPtr Queue) { - return MGraphBuilder.getOrCreateAllocaForReq(Record, Req, Queue); + cl::sycl::detail::QueueImplPtr Queue, + std::vector &ToEnqueue) { + return MGraphBuilder.getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); + } + + ReadLockT acquireGraphReadLock() { + return ReadLockT{MGraphLock}; } }; From a503fa28f93f0373412b73e4062235ac392cd74e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 11 May 2021 21:40:04 +0300 Subject: [PATCH 05/19] Fix merge glitch Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 2 +- sycl/source/detail/scheduler/graph_builder.cpp | 15 ++++++++------- sycl/source/detail/scheduler/graph_processor.cpp | 2 +- sycl/source/detail/scheduler/scheduler.cpp | 1 - sycl/source/detail/scheduler/scheduler.hpp | 3 ++- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 499a881343458..c2bda55ae6cce 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -501,7 +501,7 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep) { // Do not add redundant event dependencies for in-order queues. if (Dep.MDepCommand && Dep.MDepCommand->getWorkerQueue() == WorkerQueue && WorkerQueue->has_property()) - return; + return nullptr; ContextImplPtr DepEventContext = DepEvent->getContextImpl(); // If contexts don't match we'll connect them using host task diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 3b0c84516c06d..294019753d46a 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -174,7 +174,8 @@ MemObjRecord *Scheduler::GraphBuilder::getMemObjRecord(SYCLMemObjI *MemObject) { MemObjRecord * Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, - const Requirement *Req) { + const Requirement *Req, + std::vector &ToEnqueue) { SYCLMemObjI *MemObject = Req->MSYCLMemObj; MemObjRecord *Record = getMemObjRecord(MemObject); @@ -214,7 +215,7 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, MemObject->MRecord.reset( new MemObjRecord{InteropCtxPtr, LeafLimit, AllocateDependency}); - getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr); + getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr, ToEnqueue); } else MemObject->MRecord.reset(new MemObjRecord{Queue->getContextImplPtr(), LeafLimit, AllocateDependency}); @@ -486,7 +487,7 @@ Command *Scheduler::GraphBuilder::addHostAccessor( const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue(); - MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req); + MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue); if (MPrintOptionsArray[BeforeAddHostAcc]) printGraphAsDot("before_addHostAccessor"); markModifiedIfWrite(Record, Req); @@ -524,7 +525,7 @@ Command *Scheduler::GraphBuilder::addCGUpdateHost( auto UpdateHost = static_cast(CommandGroup.get()); Requirement *Req = UpdateHost->getReqToUpdate(); - MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req); + MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req, ToEnqueue); return insertMemoryMove(Record, Req, HostQueue, ToEnqueue); } @@ -702,7 +703,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( DefaultHostQueue, FullReq, true /* InitFromUserData */, nullptr /* LinkedAllocaCmd */); Record->MAllocaCommands.push_back(HostAllocaCmd); - Record->MWriteLeaves.push_back(HostAllocaCmd); + Record->MWriteLeaves.push_back(HostAllocaCmd, &ToEnqueue); ++(HostAllocaCmd->MLeafCounter); Record->MCurContext = DefaultHostQueue->getContextImplPtr(); } @@ -831,7 +832,7 @@ Scheduler::GraphBuilder::addEmptyCmd(Command *Cmd, const std::vector &Reqs, EmptyCmd->MBlockReason = Reason; for (T *Req : Reqs) { - MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req); + MemObjRecord *Record = getOrInsertMemObjRecord(Queue, Req, ToEnqueue); AllocaCommandBase *AllocaCmd = getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); EmptyCmd->addRequirement(Cmd, AllocaCmd, Req); @@ -913,7 +914,7 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, ? static_cast(NewCmd->getCG()).MQueue : Queue; - Record = getOrInsertMemObjRecord(QueueForAlloca, Req); + Record = getOrInsertMemObjRecord(QueueForAlloca, Req, ToEnqueue); markModifiedIfWrite(Record, Req); AllocaCmd = diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp index f800ecd8d5556..666f6324c27a6 100644 --- a/sycl/source/detail/scheduler/graph_processor.cpp +++ b/sycl/source/detail/scheduler/graph_processor.cpp @@ -51,8 +51,8 @@ void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event, throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); GraphReadLock.unlock(); - Cmd->getEvent()->waitInternal(); + GraphReadLock.lock(); } bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index f2fb38c244913..0f45486d06957 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -243,7 +243,6 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) { { acquireWriteLock(Lock); - lockSharedTimedMutex(Lock); Record = MGraphBuilder.getMemObjRecord(MemObj); if (!Record) diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 126667834e899..d88769933a4b7 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -531,7 +531,8 @@ class Scheduler { /// \return a pointer to MemObjRecord for pointer to memory object. If the /// record is not found, nullptr is returned. MemObjRecord *getOrInsertMemObjRecord(const QueueImplPtr &Queue, - const Requirement *Req); + const Requirement *Req, + std::vector &ToEnqueue); /// Decrements leaf counters for all leaves of the record. void decrementLeafCountersForRecord(MemObjRecord *Record); From a7f93d979965bbd94d51ca894f089ef95780fefa Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 13 May 2021 11:36:35 +0300 Subject: [PATCH 06/19] Fix build issues Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.hpp | 6 +- .../source/detail/scheduler/graph_builder.cpp | 62 +++++++++---------- .../detail/scheduler/graph_processor.cpp | 4 +- sycl/source/detail/scheduler/scheduler.cpp | 7 ++- sycl/source/detail/scheduler/scheduler.hpp | 14 ++--- sycl/unittests/scheduler/AllocaLinking.cpp | 24 ++++--- sycl/unittests/scheduler/BlockedCommands.cpp | 2 +- sycl/unittests/scheduler/FailedCommands.cpp | 2 +- .../scheduler/FinishedCmdCleanup.cpp | 5 +- sycl/unittests/scheduler/InOrderQueueDeps.cpp | 21 ++++--- sycl/unittests/scheduler/LeafLimit.cpp | 7 ++- .../scheduler/MemObjCommandCleanup.cpp | 5 +- .../scheduler/NoHostUnifiedMemory.cpp | 41 +++++++----- .../scheduler/SchedulerTestUtils.hpp | 15 +++-- .../scheduler/StreamInitDependencyOnHost.cpp | 3 +- sycl/unittests/scheduler/utils.cpp | 3 +- 16 files changed, 124 insertions(+), 97 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index acdb256239c85..87fab13513696 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -108,10 +108,10 @@ class Command { Command(CommandType Type, QueueImplPtr Queue); /// \return an optional connection cmd to enqueue - Command *addDep(DepDesc NewDep); + [[nodiscard]] Command *addDep(DepDesc NewDep); /// \return an optional connection cmd to enqueue - Command *addDep(EventImplPtr Event); + [[nodiscard]] Command *addDep(EventImplPtr Event); void addUser(Command *NewUser) { MUsers.insert(NewUser); } @@ -213,7 +213,7 @@ class Command { /// command. Context of this command is fetched via getWorkerContext(). /// /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. - Command *processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep); + [[nodiscard]] Command *processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep); /// Private interface. Derived classes should implement this method. virtual cl_int enqueueImp() = 0; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 294019753d46a..060928ed4035f 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -173,9 +173,9 @@ MemObjRecord *Scheduler::GraphBuilder::getMemObjRecord(SYCLMemObjI *MemObject) { } MemObjRecord * -Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, - const Requirement *Req, - std::vector &ToEnqueue) { +Scheduler::GraphBuilder::getOrInsertMemObjRecord( + const QueueImplPtr &Queue, const Requirement *Req, + std::vector &ToEnqueue) { SYCLMemObjI *MemObject = Req->MSYCLMemObj; MemObjRecord *Record = getMemObjRecord(MemObject); @@ -215,7 +215,8 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, MemObject->MRecord.reset( new MemObjRecord{InteropCtxPtr, LeafLimit, AllocateDependency}); - getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr, ToEnqueue); + getOrCreateAllocaForReq(MemObject->MRecord.get(), Req, InteropQueuePtr, + ToEnqueue); } else MemObject->MRecord.reset(new MemObjRecord{Queue->getContextImplPtr(), LeafLimit, AllocateDependency}); @@ -238,11 +239,9 @@ void Scheduler::GraphBuilder::updateLeaves(const std::set &Cmds, } } -void Scheduler::GraphBuilder::addNodeToLeaves(MemObjRecord *Record, - Command *Cmd, - access::mode AccessMode, - std::vector &ToEnqueue) -{ +void Scheduler::GraphBuilder::addNodeToLeaves( + MemObjRecord *Record, Command *Cmd, access::mode AccessMode, + std::vector &ToEnqueue) { LeavesCollection &Leaves{AccessMode == access::mode::read ? Record->MReadLeaves : Record->MWriteLeaves}; @@ -306,10 +305,9 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc, } Command * -Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record, - Requirement *Req, - const QueueImplPtr &Queue, - std::vector &ToEnqueue) { +Scheduler::GraphBuilder::insertMemoryMove( + MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue, + std::vector &ToEnqueue) { AllocaCommandBase *AllocaCmdDst = getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); @@ -439,9 +437,9 @@ Command *Scheduler::GraphBuilder::remapMemoryObject( // The function adds copy operation of the up to date'st memory to the memory // pointed by Req. -Command *Scheduler::GraphBuilder::addCopyBack( - Requirement *Req, std::vector &ToEnqueue) { - +Command * +Scheduler::GraphBuilder::addCopyBack(Requirement *Req, + std::vector &ToEnqueue) { QueueImplPtr HostQueue = Scheduler::getInstance().getDefaultHostQueue(); SYCLMemObjI *MemObj = Req->MSYCLMemObj; MemObjRecord *Record = getMemObjRecord(MemObj); @@ -482,8 +480,9 @@ Command *Scheduler::GraphBuilder::addCopyBack( // The function implements SYCL host accessor logic: host accessor // should provide access to the buffer in user space. -Command *Scheduler::GraphBuilder::addHostAccessor( - Requirement *Req, std::vector &ToEnqueue) { +Command * +Scheduler::GraphBuilder::addHostAccessor(Requirement *Req, + std::vector &ToEnqueue) { const QueueImplPtr &HostQueue = getInstance().getDefaultHostQueue(); @@ -506,9 +505,9 @@ Command *Scheduler::GraphBuilder::addHostAccessor( insertUpdateHostReqCmd(Record, Req, HostQueue, ToEnqueue); // Need empty command to be blocked until host accessor is destructed - EmptyCommand *EmptyCmd = addEmptyCmd( - UpdateHostAccCmd, {Req}, HostQueue, Command::BlockReason::HostAccessor, - ToEnqueue); + EmptyCommand *EmptyCmd = + addEmptyCmd(UpdateHostAccCmd, {Req}, HostQueue, + Command::BlockReason::HostAccessor, ToEnqueue); Req->MBlockedCmd = EmptyCmd; @@ -750,9 +749,8 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( // Update linked command if (LinkedAllocaCmd) { - Command *ConnCmd = AllocaCmd->addDep( - DepDesc{LinkedAllocaCmd, AllocaCmd->getRequirement(), - LinkedAllocaCmd}); + Command *ConnCmd = AllocaCmd->addDep(DepDesc{ + LinkedAllocaCmd, AllocaCmd->getRequirement(), LinkedAllocaCmd}); if (ConnCmd) ToEnqueue.push_back(ConnCmd); LinkedAllocaCmd->addUser(AllocaCmd); @@ -983,9 +981,9 @@ Scheduler::GraphBuilder::addCG(std::unique_ptr CommandGroup, } if (CGType == CG::CGTYPE::CODEPLAY_HOST_TASK) - NewCmd->MEmptyCmd = addEmptyCmd(NewCmd.get(), NewCmd->getCG().MRequirements, - Queue, Command::BlockReason::HostTask, - ToEnqueue); + NewCmd->MEmptyCmd = + addEmptyCmd(NewCmd.get(), NewCmd->getCG().MRequirements, Queue, + Command::BlockReason::HostTask, ToEnqueue); if (MPrintOptionsArray[AfterAddCG]) printGraphAsDot("after_addCG"); @@ -1211,7 +1209,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, // make ConnectCmd depend on requirement // Dismiss the result here as it's not a connection now, // 'cause ConnectCmd is host one - ConnectCmd->addDep(Dep); + (void)ConnectCmd->addDep(Dep); assert(reinterpret_cast(DepEvent->getCommand()) == Dep.MDepCommand); // add user to Dep.MDepCommand is already performed beyond this if branch @@ -1238,7 +1236,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, // Dismiss the result here as it's not a connection now, // 'cause EmptyCmd is host one - Cmd->addDep(CmdDep); + (void)Cmd->addDep(CmdDep); } } else { std::vector ToEnqueue; @@ -1251,13 +1249,13 @@ Command *Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, // ConnectCmd via its event. // Dismiss the result here as it's not a connection now, // 'cause ConnectCmd is host one. - EmptyCmd->addDep(ConnectCmd->getEvent()); - ConnectCmd->addDep(DepEvent); + (void)EmptyCmd->addDep(ConnectCmd->getEvent()); + (void)ConnectCmd->addDep(DepEvent); // Depend Cmd on empty command // Dismiss the result here as it's not a connection now, // 'cause EmptyCmd is host one - Cmd->addDep(EmptyCmd->getEvent()); + (void)Cmd->addDep(EmptyCmd->getEvent()); } EmptyCmd->addUser(Cmd); diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp index 666f6324c27a6..77be1841c7de0 100644 --- a/sycl/source/detail/scheduler/graph_processor.cpp +++ b/sycl/source/detail/scheduler/graph_processor.cpp @@ -71,8 +71,8 @@ bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, // Recursively enqueue all the dependencies first and // exit immediately if any of the commands cannot be enqueued. for (DepDesc &Dep : Cmd->MDeps) { - if (!enqueueCommand( - Dep.MDepCommand, EnqueueResult, GraphReadLock, Blocking)) + if (!enqueueCommand(Dep.MDepCommand, EnqueueResult, GraphReadLock, + Blocking)) return false; } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 0f45486d06957..83b97f21b1519 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -56,8 +56,8 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { Command *ReleaseCmd = AllocaCmd->getReleaseCmd(); EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, Res, - GraphReadLock); + bool Enqueued = + GraphProcessor::enqueueCommand(ReleaseCmd, Res, GraphReadLock); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -119,7 +119,8 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, for (Command *Cmd : AuxiliaryCmds) { Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, Lock); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); + throw runtime_error( + "Auxiliary enqueue process failed.", PI_INVALID_OPERATION); } Command *NewCmd = static_cast(NewEvent->getCommand()); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index d88769933a4b7..3d059e97ccacf 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -481,8 +481,8 @@ class Scheduler { /// \sa queue::submit, Scheduler::addCG /// /// \return a command that represents command group execution. - Command *addCG(std::unique_ptr CommandGroup, - QueueImplPtr Queue, std::vector &ToEnqueue); + Command *addCG(std::unique_ptr CommandGroup, QueueImplPtr Queue, + std::vector &ToEnqueue); /// Registers a \ref CG "command group" that updates host memory to the /// latest state. @@ -621,9 +621,10 @@ class Scheduler { /// Searches for suitable alloca in memory record. /// /// If none found, creates new one. - AllocaCommandBase *getOrCreateAllocaForReq( - MemObjRecord *Record, const Requirement *Req, QueueImplPtr Queue, - std::vector &ToEnqueue); + AllocaCommandBase * + getOrCreateAllocaForReq(MemObjRecord *Record, const Requirement *Req, + QueueImplPtr Queue, + std::vector &ToEnqueue); void markModifiedIfWrite(MemObjRecord *Record, Requirement *Req); @@ -728,8 +729,7 @@ class Scheduler { /// Waits for the command, associated with Event passed, is completed. /// \param GraphReadLock read-lock which is already acquired for reading - static void waitForEvent(EventImplPtr Event, - ReadLockT &GraphReadLock); + static void waitForEvent(EventImplPtr Event, ReadLockT &GraphReadLock); /// Enqueues the command and all its dependencies. /// diff --git a/sycl/unittests/scheduler/AllocaLinking.cpp b/sycl/unittests/scheduler/AllocaLinking.cpp index 129bab848cb3b..64691fc1b20bc 100644 --- a/sycl/unittests/scheduler/AllocaLinking.cpp +++ b/sycl/unittests/scheduler/AllocaLinking.cpp @@ -68,11 +68,13 @@ TEST_F(SchedulerTest, AllocaLinking) { buffer Buf(range<1>(1)); detail::Requirement Req = getMockRequirement(Buf); - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req); + std::vector AuxCmds; + detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( + QImpl, &Req, AuxCmds); detail::AllocaCommandBase *NonHostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, QImpl); + MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); detail::AllocaCommandBase *HostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue); + MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); EXPECT_FALSE(HostAllocaCmd->MLinkedAllocaCmd); EXPECT_FALSE(NonHostAllocaCmd->MLinkedAllocaCmd); @@ -83,11 +85,13 @@ TEST_F(SchedulerTest, AllocaLinking) { range<1>(1), {ext::oneapi::property::buffer::use_pinned_host_memory()}); detail::Requirement Req = getMockRequirement(Buf); - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req); + std::vector AuxCmds; + detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( + QImpl, &Req, AuxCmds); detail::AllocaCommandBase *NonHostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, QImpl); + MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); detail::AllocaCommandBase *HostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue); + MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); EXPECT_EQ(HostAllocaCmd->MLinkedAllocaCmd, NonHostAllocaCmd); EXPECT_EQ(NonHostAllocaCmd->MLinkedAllocaCmd, HostAllocaCmd); @@ -98,11 +102,13 @@ TEST_F(SchedulerTest, AllocaLinking) { buffer Buf(range<1>(1)); detail::Requirement Req = getMockRequirement(Buf); - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req); + std::vector AuxCmds; + detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( + QImpl, &Req, AuxCmds); detail::AllocaCommandBase *NonHostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, QImpl); + MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); detail::AllocaCommandBase *HostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue); + MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); EXPECT_EQ(HostAllocaCmd->MLinkedAllocaCmd, NonHostAllocaCmd); EXPECT_EQ(NonHostAllocaCmd->MLinkedAllocaCmd, HostAllocaCmd); diff --git a/sycl/unittests/scheduler/BlockedCommands.cpp b/sycl/unittests/scheduler/BlockedCommands.cpp index 35db395333001..900d4b3710ba3 100644 --- a/sycl/unittests/scheduler/BlockedCommands.cpp +++ b/sycl/unittests/scheduler/BlockedCommands.cpp @@ -154,7 +154,7 @@ TEST_F(SchedulerTest, EnqueueHostDependency) { new cl::sycl::detail::event_impl(detail::getSyclObjImpl(MQueue))}; DepEvent->setCommand(&B); - A.addDep(DepEvent); + (void)A.addDep(DepEvent); // We have such a "graph": // diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 1e0b9f2c3b4d6..36ac78a65140f 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -16,7 +16,7 @@ TEST_F(SchedulerTest, FailedDependency) { MockCommand MDep(detail::getSyclObjImpl(MQueue)); MockCommand MUser(detail::getSyclObjImpl(MQueue)); MDep.addUser(&MUser); - MUser.addDep(detail::DepDesc{&MDep, &MockReq, nullptr}); + (void)MUser.addDep(detail::DepDesc{&MDep, &MockReq, nullptr}); MUser.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; MDep.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueFailed; diff --git a/sycl/unittests/scheduler/FinishedCmdCleanup.cpp b/sycl/unittests/scheduler/FinishedCmdCleanup.cpp index d82005d30f463..34401108187e1 100644 --- a/sycl/unittests/scheduler/FinishedCmdCleanup.cpp +++ b/sycl/unittests/scheduler/FinishedCmdCleanup.cpp @@ -21,8 +21,9 @@ TEST_F(SchedulerTest, FinishedCmdCleanup) { detail::Requirement MockReqA = getMockRequirement(BufA); detail::Requirement MockReqB = getMockRequirement(BufB); detail::Requirement MockReqC = getMockRequirement(BufC); - detail::MemObjRecord *RecC = - MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(MQueue), &MockReqC); + std::vector AuxCmds; + detail::MemObjRecord *RecC = MS.getOrInsertMemObjRecord( + detail::getSyclObjImpl(MQueue), &MockReqC, AuxCmds); // Create a graph and check that all inner nodes have been deleted and // their users have had the corresponding dependency replaced with a diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index cec7c1772852c..0a2bb258138f6 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -112,18 +112,21 @@ TEST_F(SchedulerTest, InOrderQueueDeps) { buffer Buf(&val, range<1>(1)); detail::Requirement Req = getMockRequirement(Buf); + std::vector AuxCmds; detail::MemObjRecord *Record = - MS.getOrInsertMemObjRecord(InOrderQueueImpl, &Req); - MS.getOrCreateAllocaForReq(Record, &Req, InOrderQueueImpl); - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue); + MS.getOrInsertMemObjRecord(InOrderQueueImpl, &Req, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &Req, InOrderQueueImpl, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); // Check that sequential memory movements submitted to the same in-order // queue do not depend on each other. - detail::Command *Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue); + detail::Command *Cmd = MS.insertMemoryMove( + Record, &Req, DefaultHostQueue, AuxCmds); detail::EnqueueResultT Res; - MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); - Cmd = MS.insertMemoryMove(Record, &Req, InOrderQueueImpl); - MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); - Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue); - MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); + auto ReadLock = MS.acquireGraphReadLock(); + MockScheduler::enqueueCommand(Cmd, Res, ReadLock, detail::NON_BLOCKING); + Cmd = MS.insertMemoryMove(Record, &Req, InOrderQueueImpl, AuxCmds); + MockScheduler::enqueueCommand(Cmd, Res, ReadLock, detail::NON_BLOCKING); + Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue, AuxCmds); + MockScheduler::enqueueCommand(Cmd, Res, ReadLock, detail::NON_BLOCKING); } diff --git a/sycl/unittests/scheduler/LeafLimit.cpp b/sycl/unittests/scheduler/LeafLimit.cpp index ab59ce2d33b9a..03dfea7803b6c 100644 --- a/sycl/unittests/scheduler/LeafLimit.cpp +++ b/sycl/unittests/scheduler/LeafLimit.cpp @@ -29,8 +29,9 @@ TEST_F(SchedulerTest, LeafLimit) { MockDepCmd = std::make_unique(detail::getSyclObjImpl(MQueue), MockReq); - detail::MemObjRecord *Rec = - MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(MQueue), &MockReq); + std::vector AuxCmds; + detail::MemObjRecord *Rec = MS.getOrInsertMemObjRecord( + detail::getSyclObjImpl(MQueue), &MockReq, AuxCmds); // Create commands that will be added as leaves exceeding the limit by 1 for (std::size_t i = 0; i < Rec->MWriteLeaves.genericCommandsCapacity() + 1; @@ -41,7 +42,7 @@ TEST_F(SchedulerTest, LeafLimit) { // Create edges: all soon-to-be leaves are direct users of MockDep for (auto &Leaf : LeavesToAdd) { MockDepCmd->addUser(Leaf.get()); - Leaf->addDep( + (void)Leaf->addDep( detail::DepDesc{MockDepCmd.get(), Leaf->getRequirement(), nullptr}); } std::vector ToEnqueue; diff --git a/sycl/unittests/scheduler/MemObjCommandCleanup.cpp b/sycl/unittests/scheduler/MemObjCommandCleanup.cpp index d35ece2454203..17429831f4257 100644 --- a/sycl/unittests/scheduler/MemObjCommandCleanup.cpp +++ b/sycl/unittests/scheduler/MemObjCommandCleanup.cpp @@ -17,8 +17,9 @@ TEST_F(SchedulerTest, MemObjCommandCleanup) { buffer BufB(range<1>(1)); detail::Requirement MockReqA = getMockRequirement(BufA); detail::Requirement MockReqB = getMockRequirement(BufB); - detail::MemObjRecord *RecA = - MS.getOrInsertMemObjRecord(detail::getSyclObjImpl(MQueue), &MockReqA); + std::vector AuxCmds; + detail::MemObjRecord *RecA = MS.getOrInsertMemObjRecord( + detail::getSyclObjImpl(MQueue), &MockReqA, AuxCmds); // Create 2 fake allocas, one of which will be cleaned up detail::AllocaCommand *MockAllocaA = diff --git a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp index 9dc561295eb86..dedfe10bfeaeb 100644 --- a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp +++ b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp @@ -89,9 +89,11 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { buffer Buf(&val, range<1>(1)); detail::Requirement Req = getMockRequirement(Buf); - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req); + std::vector AuxCmds; + detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( + QImpl, &Req, AuxCmds); detail::AllocaCommandBase *NonHostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, QImpl); + MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); // Both non-host and host allocations should be created in this case in // order to perform a memory move. @@ -102,7 +104,8 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { EXPECT_TRUE(!NonHostAllocaCmd->MLinkedAllocaCmd); EXPECT_TRUE(Record->MCurContext->is_host()); - detail::Command *MemoryMove = MS.insertMemoryMove(Record, &Req, QImpl); + detail::Command *MemoryMove = MS.insertMemoryMove( + Record, &Req, QImpl, AuxCmds); EXPECT_EQ(MemoryMove->getType(), detail::Command::COPY_MEMORY); } // Check non-host alloca with discard access modes @@ -116,8 +119,10 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { // No need to create a host allocation in this case since the data can be // discarded. - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req); - MS.getOrCreateAllocaForReq(Record, &DiscardReq, QImpl); + std::vector AuxCmds; + detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( + QImpl, &Req, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &DiscardReq, QImpl, AuxCmds); EXPECT_EQ(Record->MAllocaCommands.size(), 1U); } // Check non-host alloca without user pointer @@ -127,8 +132,10 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { // No need to create a host allocation in this case since there's no data to // initialize the buffer with. - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req); - MS.getOrCreateAllocaForReq(Record, &Req, QImpl); + std::vector AuxCmds; + detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( + QImpl, &Req, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); EXPECT_EQ(Record->MAllocaCommands.size(), 1U); } // Check host -> non-host alloca @@ -139,18 +146,20 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { // No special handling required: alloca commands are created one after // another and the transfer is done via a write operation. + std::vector AuxCmds; detail::MemObjRecord *Record = - MS.getOrInsertMemObjRecord(DefaultHostQueue, &Req); + MS.getOrInsertMemObjRecord(DefaultHostQueue, &Req, AuxCmds); detail::AllocaCommandBase *HostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue); + MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); EXPECT_EQ(Record->MAllocaCommands.size(), 1U); detail::AllocaCommandBase *NonHostAllocaCmd = - MS.getOrCreateAllocaForReq(Record, &Req, QImpl); + MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); EXPECT_EQ(Record->MAllocaCommands.size(), 2U); EXPECT_TRUE(!HostAllocaCmd->MLinkedAllocaCmd); EXPECT_TRUE(!NonHostAllocaCmd->MLinkedAllocaCmd); - detail::Command *MemoryMove = MS.insertMemoryMove(Record, &Req, QImpl); + detail::Command *MemoryMove = MS.insertMemoryMove( + Record, &Req, QImpl, AuxCmds); EXPECT_EQ(MemoryMove->getType(), detail::Command::COPY_MEMORY); } // Check that memory movement operations work correctly with/after discard @@ -163,13 +172,15 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { detail::Requirement DiscardReq = getMockRequirement(Buf); DiscardReq.MAccessMode = access::mode::discard_read_write; - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord(QImpl, &Req); - MS.getOrCreateAllocaForReq(Record, &Req, QImpl); - MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue); + std::vector AuxCmds; + detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( + QImpl, &Req, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); + MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); // Memory movement operations should be omitted for discard access modes. detail::Command *MemoryMove = - MS.insertMemoryMove(Record, &DiscardReq, DefaultHostQueue); + MS.insertMemoryMove(Record, &DiscardReq, DefaultHostQueue, AuxCmds); EXPECT_TRUE(MemoryMove == nullptr); // The current context for the record should still be modified. EXPECT_EQ(Record->MCurContext, DefaultHostQueue->getContextImplPtr()); diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 0396093566d26..f78a635918e95 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -95,8 +95,9 @@ class MockScheduler : public cl::sycl::detail::Scheduler { public: cl::sycl::detail::MemObjRecord * getOrInsertMemObjRecord(const cl::sycl::detail::QueueImplPtr &Queue, - cl::sycl::detail::Requirement *Req) { - return MGraphBuilder.getOrInsertMemObjRecord(Queue, Req); + cl::sycl::detail::Requirement *Req, + std::vector &ToEnqueue) { + return MGraphBuilder.getOrInsertMemObjRecord(Queue, Req, ToEnqueue); } void removeRecordForMemObj(cl::sycl::detail::SYCLMemObjI *MemObj) { @@ -138,14 +139,16 @@ class MockScheduler : public cl::sycl::detail::Scheduler { cl::sycl::detail::Command * insertMemoryMove(cl::sycl::detail::MemObjRecord *Record, cl::sycl::detail::Requirement *Req, - const cl::sycl::detail::QueueImplPtr &Queue) { - return MGraphBuilder.insertMemoryMove(Record, Req, Queue); + const cl::sycl::detail::QueueImplPtr &Queue, + std::vector &ToEnqueue) { + return MGraphBuilder.insertMemoryMove(Record, Req, Queue, ToEnqueue); } cl::sycl::detail::Command * addCG(std::unique_ptr CommandGroup, - cl::sycl::detail::QueueImplPtr Queue) { - return MGraphBuilder.addCG(std::move(CommandGroup), Queue); + cl::sycl::detail::QueueImplPtr Queue, + std::vector &ToEnqueue) { + return MGraphBuilder.addCG(std::move(CommandGroup), Queue, ToEnqueue); } }; diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 083fc3eb40630..720247e38c27b 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -128,7 +128,8 @@ TEST_F(SchedulerTest, StreamInitDependencyOnHost) { initStream(Streams[0], HQueueImpl); MockScheduler MS; - detail::Command *NewCmd = MS.addCG(std::move(MainCG), HQueueImpl); + std::vector AuxCmds; + detail::Command *NewCmd = MS.addCG(std::move(MainCG), HQueueImpl, AuxCmds); ASSERT_TRUE(!!NewCmd) << "Failed to add command group into scheduler"; ASSERT_GT(NewCmd->MDeps.size(), 0u) << "No deps appeared in the new exec kernel command"; diff --git a/sycl/unittests/scheduler/utils.cpp b/sycl/unittests/scheduler/utils.cpp index 3e80c485bc458..b6bb23b4325d8 100644 --- a/sycl/unittests/scheduler/utils.cpp +++ b/sycl/unittests/scheduler/utils.cpp @@ -10,7 +10,8 @@ void addEdge(cl::sycl::detail::Command *User, cl::sycl::detail::Command *Dep, cl::sycl::detail::AllocaCommandBase *Alloca) { - User->addDep(cl::sycl::detail::DepDesc{Dep, User->getRequirement(), Alloca}); + (void)User->addDep( + cl::sycl::detail::DepDesc{Dep, User->getRequirement(), Alloca}); Dep->addUser(User); } From d9fb9e7e594679485b677838b880c19e990ee24e Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 13 May 2021 12:05:17 +0300 Subject: [PATCH 07/19] Fix style issues Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.hpp | 3 ++- .../source/detail/scheduler/graph_builder.cpp | 6 ++--- sycl/source/detail/scheduler/scheduler.cpp | 4 +-- sycl/unittests/scheduler/AllocaLinking.cpp | 12 ++++----- sycl/unittests/scheduler/BlockedCommands.cpp | 15 +++++++---- sycl/unittests/scheduler/InOrderQueueDeps.cpp | 4 +-- .../scheduler/NoHostUnifiedMemory.cpp | 24 +++++++++--------- .../scheduler/SchedulerTestUtils.hpp | 25 +++++++++++-------- 8 files changed, 50 insertions(+), 43 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 87fab13513696..f6262a907e8d6 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -213,7 +213,8 @@ class Command { /// command. Context of this command is fetched via getWorkerContext(). /// /// Optionality of Dep is set by Dep.MDepCommand not equal to nullptr. - [[nodiscard]] Command *processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep); + [[nodiscard]] Command *processDepEvent(EventImplPtr DepEvent, + const DepDesc &Dep); /// Private interface. Derived classes should implement this method. virtual cl_int enqueueImp() = 0; diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 060928ed4035f..8b362c1801ec1 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -172,8 +172,7 @@ MemObjRecord *Scheduler::GraphBuilder::getMemObjRecord(SYCLMemObjI *MemObject) { return MemObject->MRecord.get(); } -MemObjRecord * -Scheduler::GraphBuilder::getOrInsertMemObjRecord( +MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord( const QueueImplPtr &Queue, const Requirement *Req, std::vector &ToEnqueue) { SYCLMemObjI *MemObject = Req->MSYCLMemObj; @@ -304,8 +303,7 @@ static Command *insertMapUnmapForLinkedCmds(AllocaCommandBase *AllocaCmdSrc, return MapCmd; } -Command * -Scheduler::GraphBuilder::insertMemoryMove( +Command *Scheduler::GraphBuilder::insertMemoryMove( MemObjRecord *Record, Requirement *Req, const QueueImplPtr &Queue, std::vector &ToEnqueue) { diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 83b97f21b1519..120aee9c3f6c7 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -119,8 +119,8 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, for (Command *Cmd : AuxiliaryCmds) { Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, Lock); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error( - "Auxiliary enqueue process failed.", PI_INVALID_OPERATION); + throw runtime_error("Auxiliary enqueue process failed.", + PI_INVALID_OPERATION); } Command *NewCmd = static_cast(NewEvent->getCommand()); diff --git a/sycl/unittests/scheduler/AllocaLinking.cpp b/sycl/unittests/scheduler/AllocaLinking.cpp index 64691fc1b20bc..33348079e0abf 100644 --- a/sycl/unittests/scheduler/AllocaLinking.cpp +++ b/sycl/unittests/scheduler/AllocaLinking.cpp @@ -69,8 +69,8 @@ TEST_F(SchedulerTest, AllocaLinking) { detail::Requirement Req = getMockRequirement(Buf); std::vector AuxCmds; - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( - QImpl, &Req, AuxCmds); + detail::MemObjRecord *Record = + MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds); detail::AllocaCommandBase *NonHostAllocaCmd = MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); detail::AllocaCommandBase *HostAllocaCmd = @@ -86,8 +86,8 @@ TEST_F(SchedulerTest, AllocaLinking) { detail::Requirement Req = getMockRequirement(Buf); std::vector AuxCmds; - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( - QImpl, &Req, AuxCmds); + detail::MemObjRecord *Record = + MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds); detail::AllocaCommandBase *NonHostAllocaCmd = MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); detail::AllocaCommandBase *HostAllocaCmd = @@ -103,8 +103,8 @@ TEST_F(SchedulerTest, AllocaLinking) { detail::Requirement Req = getMockRequirement(Buf); std::vector AuxCmds; - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( - QImpl, &Req, AuxCmds); + detail::MemObjRecord *Record = + MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds); detail::AllocaCommandBase *NonHostAllocaCmd = MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); detail::AllocaCommandBase *HostAllocaCmd = diff --git a/sycl/unittests/scheduler/BlockedCommands.cpp b/sycl/unittests/scheduler/BlockedCommands.cpp index 900d4b3710ba3..8cb055ffcb90d 100644 --- a/sycl/unittests/scheduler/BlockedCommands.cpp +++ b/sycl/unittests/scheduler/BlockedCommands.cpp @@ -32,7 +32,8 @@ TEST_F(SchedulerTest, BlockedCommands) { Res.MResult = detail::EnqueueResultT::SyclEnqueueSuccess; MockCmd.MRetVal = CL_DEVICE_PARTITION_EQUALLY; - Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, Lock, detail::BLOCKING); + Enqueued = + MockScheduler::enqueueCommand(&MockCmd, Res, Lock, detail::BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueFailed, Res.MResult) << "The command is expected to fail to enqueue.\n"; @@ -43,7 +44,8 @@ TEST_F(SchedulerTest, BlockedCommands) { Res = detail::EnqueueResultT{}; MockCmd.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; MockCmd.MRetVal = CL_SUCCESS; - Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, Lock, detail::BLOCKING); + Enqueued = + MockScheduler::enqueueCommand(&MockCmd, Res, Lock, detail::BLOCKING); ASSERT_TRUE(Enqueued && Res.MResult == detail::EnqueueResultT::SyclEnqueueSuccess) << "The command is expected to be successfully enqueued.\n"; @@ -89,7 +91,8 @@ TEST_F(SchedulerTest, DontEnqueueDepsIfOneOfThemIsBlocked) { MockScheduler MS; auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; - bool Enqueued = MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); + bool Enqueued = + MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED.\n"; @@ -119,7 +122,8 @@ TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) { MockScheduler MS; auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; - bool Enqueued = MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); + bool Enqueued = + MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED.\n"; @@ -172,7 +176,8 @@ TEST_F(SchedulerTest, EnqueueHostDependency) { MockScheduler MS; auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; - bool Enqueued = MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); + bool Enqueued = + MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); ASSERT_TRUE(Enqueued) << "The command should be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueSuccess, Res.MResult) << "Enqueue operation should return successfully.\n"; diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index 0a2bb258138f6..8d56b26d084fa 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -120,8 +120,8 @@ TEST_F(SchedulerTest, InOrderQueueDeps) { // Check that sequential memory movements submitted to the same in-order // queue do not depend on each other. - detail::Command *Cmd = MS.insertMemoryMove( - Record, &Req, DefaultHostQueue, AuxCmds); + detail::Command *Cmd = + MS.insertMemoryMove(Record, &Req, DefaultHostQueue, AuxCmds); detail::EnqueueResultT Res; auto ReadLock = MS.acquireGraphReadLock(); MockScheduler::enqueueCommand(Cmd, Res, ReadLock, detail::NON_BLOCKING); diff --git a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp index dedfe10bfeaeb..ed66cc0c9f60f 100644 --- a/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp +++ b/sycl/unittests/scheduler/NoHostUnifiedMemory.cpp @@ -90,8 +90,8 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { detail::Requirement Req = getMockRequirement(Buf); std::vector AuxCmds; - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( - QImpl, &Req, AuxCmds); + detail::MemObjRecord *Record = + MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds); detail::AllocaCommandBase *NonHostAllocaCmd = MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); @@ -104,8 +104,8 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { EXPECT_TRUE(!NonHostAllocaCmd->MLinkedAllocaCmd); EXPECT_TRUE(Record->MCurContext->is_host()); - detail::Command *MemoryMove = MS.insertMemoryMove( - Record, &Req, QImpl, AuxCmds); + detail::Command *MemoryMove = + MS.insertMemoryMove(Record, &Req, QImpl, AuxCmds); EXPECT_EQ(MemoryMove->getType(), detail::Command::COPY_MEMORY); } // Check non-host alloca with discard access modes @@ -120,8 +120,8 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { // No need to create a host allocation in this case since the data can be // discarded. std::vector AuxCmds; - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( - QImpl, &Req, AuxCmds); + detail::MemObjRecord *Record = + MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds); MS.getOrCreateAllocaForReq(Record, &DiscardReq, QImpl, AuxCmds); EXPECT_EQ(Record->MAllocaCommands.size(), 1U); } @@ -133,8 +133,8 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { // No need to create a host allocation in this case since there's no data to // initialize the buffer with. std::vector AuxCmds; - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( - QImpl, &Req, AuxCmds); + detail::MemObjRecord *Record = + MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds); MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); EXPECT_EQ(Record->MAllocaCommands.size(), 1U); } @@ -158,8 +158,8 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { EXPECT_TRUE(!HostAllocaCmd->MLinkedAllocaCmd); EXPECT_TRUE(!NonHostAllocaCmd->MLinkedAllocaCmd); - detail::Command *MemoryMove = MS.insertMemoryMove( - Record, &Req, QImpl, AuxCmds); + detail::Command *MemoryMove = + MS.insertMemoryMove(Record, &Req, QImpl, AuxCmds); EXPECT_EQ(MemoryMove->getType(), detail::Command::COPY_MEMORY); } // Check that memory movement operations work correctly with/after discard @@ -173,8 +173,8 @@ TEST_F(SchedulerTest, NoHostUnifiedMemory) { DiscardReq.MAccessMode = access::mode::discard_read_write; std::vector AuxCmds; - detail::MemObjRecord *Record = MS.getOrInsertMemObjRecord( - QImpl, &Req, AuxCmds); + detail::MemObjRecord *Record = + MS.getOrInsertMemObjRecord(QImpl, &Req, AuxCmds); MS.getOrCreateAllocaForReq(Record, &Req, QImpl, AuxCmds); MS.getOrCreateAllocaForReq(Record, &Req, DefaultHostQueue, AuxCmds); diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index f78a635918e95..5485a6adb55af 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -22,9 +22,13 @@ cl::sycl::detail::Requirement getMockRequirement(); -namespace cl { namespace sycl { namespace detail { - class Command; -}}} +namespace cl { +namespace sycl { +namespace detail { +class Command; +} // namespace detail +} // namespace sycl +} // namespace cl class MockCommand : public cl::sycl::detail::Command { public: @@ -110,10 +114,10 @@ class MockScheduler : public cl::sycl::detail::Scheduler { MGraphBuilder.cleanupCommandsForRecord(Rec, StreamsToDeallocate); } - void addNodeToLeaves( - cl::sycl::detail::MemObjRecord *Rec, cl::sycl::detail::Command *Cmd, - cl::sycl::access::mode Mode, - std::vector &ToEnqueue) { + void addNodeToLeaves(cl::sycl::detail::MemObjRecord *Rec, + cl::sycl::detail::Command *Cmd, + cl::sycl::access::mode Mode, + std::vector &ToEnqueue) { return MGraphBuilder.addNodeToLeaves(Rec, Cmd, Mode, ToEnqueue); } @@ -121,7 +125,8 @@ class MockScheduler : public cl::sycl::detail::Scheduler { cl::sycl::detail::EnqueueResultT &EnqueueResult, ReadLockT &GraphReadLock, cl::sycl::detail::BlockingT Blocking) { - return GraphProcessor::enqueueCommand(Cmd, EnqueueResult, GraphReadLock, Blocking); + return GraphProcessor::enqueueCommand(Cmd, EnqueueResult, GraphReadLock, + Blocking); } cl::sycl::detail::AllocaCommandBase * @@ -132,9 +137,7 @@ class MockScheduler : public cl::sycl::detail::Scheduler { return MGraphBuilder.getOrCreateAllocaForReq(Record, Req, Queue, ToEnqueue); } - ReadLockT acquireGraphReadLock() { - return ReadLockT{MGraphLock}; - } + ReadLockT acquireGraphReadLock() { return ReadLockT{MGraphLock}; } cl::sycl::detail::Command * insertMemoryMove(cl::sycl::detail::MemObjRecord *Record, From a20ec6d9b3a57cd435db83b1e1d08352258dc6c8 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 13 May 2021 18:08:38 +0300 Subject: [PATCH 08/19] Fix unit test Signed-off-by: Sergey Kanaev --- sycl/unittests/scheduler/LeafLimit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/unittests/scheduler/LeafLimit.cpp b/sycl/unittests/scheduler/LeafLimit.cpp index 03dfea7803b6c..ffed74ba0e1ec 100644 --- a/sycl/unittests/scheduler/LeafLimit.cpp +++ b/sycl/unittests/scheduler/LeafLimit.cpp @@ -48,7 +48,7 @@ TEST_F(SchedulerTest, LeafLimit) { std::vector ToEnqueue; // Add edges as leaves and exceed the leaf limit for (auto &LeafPtr : LeavesToAdd) { - MS.addNodeToLeaves(Rec, LeafPtr.get(), access::mode::read, ToEnqueue); + MS.addNodeToLeaves(Rec, LeafPtr.get(), access::mode::write, ToEnqueue); } // Check that the oldest leaf has been removed from the leaf list // and added as a dependency of the newest one instead From 0cf12172883dc8979da5db04319f2852d418716c Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Mon, 24 May 2021 16:50:32 +0300 Subject: [PATCH 09/19] Eliminate runtime failure Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/commands.cpp | 2 +- .../detail/scheduler/graph_processor.cpp | 23 +++++++++---- sycl/source/detail/scheduler/scheduler.cpp | 32 ++++++++++--------- sycl/source/detail/scheduler/scheduler.hpp | 4 +-- 4 files changed, 35 insertions(+), 26 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index c2bda55ae6cce..93abcdd2f6c9b 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -245,7 +245,7 @@ class DispatchHostTask { EmptyCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; for (const DepDesc &Dep : Deps) - Scheduler::enqueueLeavesOfReqUnlocked(Dep.MDepRequirement, Lock); + Scheduler::enqueueLeavesOfReqUnlocked(Dep.MDepRequirement); } } }; diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp index 77be1841c7de0..4d09c00ecd11c 100644 --- a/sycl/source/detail/scheduler/graph_processor.cpp +++ b/sycl/source/detail/scheduler/graph_processor.cpp @@ -45,7 +45,7 @@ void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event, return; EnqueueResultT Res; - bool Enqueued = enqueueCommand(Cmd, Res, GraphReadLock, BLOCKING); + bool Enqueued = enqueueCommand(Cmd, Res, BLOCKING); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) // TODO: Reschedule commands. throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); @@ -57,7 +57,6 @@ void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event, bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, EnqueueResultT &EnqueueResult, - ReadLockT &GraphReadLock, BlockingT Blocking) { if (!Cmd || Cmd->isSuccessfullyEnqueued()) return true; @@ -71,8 +70,7 @@ bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, // Recursively enqueue all the dependencies first and // exit immediately if any of the commands cannot be enqueued. for (DepDesc &Dep : Cmd->MDeps) { - if (!enqueueCommand(Dep.MDepCommand, EnqueueResult, GraphReadLock, - Blocking)) + if (!enqueueCommand(Dep.MDepCommand, EnqueueResult, Blocking)) return false; } @@ -88,14 +86,25 @@ bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, // implemented. for (const EventImplPtr &Event : Cmd->getPreparedHostDepsEvents()) { if (Command *DepCmd = static_cast(Event->getCommand())) - if (!enqueueCommand(DepCmd, EnqueueResult, GraphReadLock, Blocking)) + if (!enqueueCommand(DepCmd, EnqueueResult, Blocking)) return false; } { - GraphReadLock.unlock(); + // Only graph read lock is to be held here. + // Enqueue process of a command may last quite a time. Having graph locked + // can introduce some thread starving (i.e. when the other thread attempts + // to acquire write lock and add a command to graph). + // Releasing read lock without other safety measures isn't an option here as + // the other thread could go into graph cleanup process (due to some event + // complete) and remove some dependencies from dependencies of the user of + // this command. An example: command A depends on commands B and C. This + // wants to enqueue A. Hence, it needs to enqueue B and C. So this thread + // gets into dependency list and starts enqueueing B right away. The other + // thread waits on completion of C and starts cleanup process. This thread + // is still in the middle of enqueue of B. The other thread modifies + // dependency list of A by removing C out of it. Iterators become invalid. bool Result = Cmd->enqueue(EnqueueResult, Blocking); - GraphReadLock.lock(); return Result; } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 6f2260a2b3416..d797460bab121 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -34,7 +34,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, #endif for (Command *Cmd : Record->MReadLeaves) { EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, GraphReadLock); + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -45,7 +45,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, } for (Command *Cmd : Record->MWriteLeaves) { EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, GraphReadLock); + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -57,7 +57,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, Command *ReleaseCmd = AllocaCmd->getReleaseCmd(); EnqueueResultT Res; bool Enqueued = - GraphProcessor::enqueueCommand(ReleaseCmd, Res, GraphReadLock); + GraphProcessor::enqueueCommand(ReleaseCmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -113,6 +113,8 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, { ReadLockT Lock(MGraphLock); + Command *NewCmd = static_cast(NewEvent->getCommand()); + EnqueueResultT Res; bool Enqueued; @@ -127,19 +129,19 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, }; for (Command *Cmd : AuxiliaryCmds) { - Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, Lock); + Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); try { if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Auxiliary enqueue process failed.", PI_INVALID_OPERATION); } catch (...) { + // enqueueCommand() func and if statement above may throw an exception, + // so destroy required resources to avoid memory leak CleanUp(); std::rethrow_exception(std::current_exception()); } } - Command *NewCmd = static_cast(NewEvent->getCommand()); - if (NewCmd) { // TODO: Check if lazy mode. EnqueueResultT Res; @@ -188,12 +190,12 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) { bool Enqueued; for (Command *Cmd : AuxiliaryCmds) { - Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, Lock); + Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); } - Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res, Lock); + Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); } catch (...) { @@ -307,12 +309,12 @@ EventImplPtr Scheduler::addHostAccessor(Requirement *Req) { bool Enqueued; for (Command *Cmd : AuxiliaryCmds) { - Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, ReadLock); + Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); } - Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res, ReadLock); + Enqueued = GraphProcessor::enqueueCommand(NewCmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); } @@ -329,21 +331,21 @@ void Scheduler::releaseHostAccessor(Requirement *Req) { BlockedCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueReady; - enqueueLeavesOfReqUnlocked(Req, Lock); + enqueueLeavesOfReqUnlocked(Req); } // static -void Scheduler::enqueueLeavesOfReqUnlocked(const Requirement *const Req, - ReadLockT &GraphReadLock) { +void Scheduler::enqueueLeavesOfReqUnlocked(const Requirement *const Req) { MemObjRecord *Record = Req->MSYCLMemObj->MRecord.get(); - auto EnqueueLeaves = [&GraphReadLock](LeavesCollection &Leaves) { + auto EnqueueLeaves = [](LeavesCollection &Leaves) { for (Command *Cmd : Leaves) { EnqueueResultT Res; - bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res, GraphReadLock); + bool Enqueued = GraphProcessor::enqueueCommand(Cmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); } }; + EnqueueLeaves(Record->MReadLeaves); EnqueueLeaves(Record->MWriteLeaves); } diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 3d059e97ccacf..285f9c246a908 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -463,8 +463,7 @@ class Scheduler { /// class void acquireWriteLock(WriteLockT &Lock); - static void enqueueLeavesOfReqUnlocked(const Requirement *const Req, - ReadLockT &GraphReadLock); + static void enqueueLeavesOfReqUnlocked(const Requirement *const Req); /// Graph builder class. /// @@ -737,7 +736,6 @@ class Scheduler { /// \param GraphReadLock read-lock which is already acquired for reading /// \return true if the command is successfully enqueued. static bool enqueueCommand(Command *Cmd, EnqueueResultT &EnqueueResult, - ReadLockT &GraphReadLock, BlockingT Blocking = NON_BLOCKING); }; From d1e5b2cfb54b4541d5aab3e4d956a64c43282998 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 25 May 2021 12:30:56 +0300 Subject: [PATCH 10/19] Fix code style issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/scheduler.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index d797460bab121..5ebedb0ee9656 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -56,8 +56,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, for (AllocaCommandBase *AllocaCmd : Record->MAllocaCommands) { Command *ReleaseCmd = AllocaCmd->getReleaseCmd(); EnqueueResultT Res; - bool Enqueued = - GraphProcessor::enqueueCommand(ReleaseCmd, Res); + bool Enqueued = GraphProcessor::enqueueCommand(ReleaseCmd, Res); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); #ifdef XPTI_ENABLE_INSTRUMENTATION From 97e5b43024c47946b0c714d1f7ff5579b2a43e19 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 25 May 2021 12:53:53 +0300 Subject: [PATCH 11/19] Fix build issues Signed-off-by: Sergey Kanaev --- sycl/unittests/scheduler/BlockedCommands.cpp | 14 +++++++------- sycl/unittests/scheduler/FailedCommands.cpp | 2 +- sycl/unittests/scheduler/InOrderQueueDeps.cpp | 6 +++--- sycl/unittests/scheduler/SchedulerTestUtils.hpp | 4 +--- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/sycl/unittests/scheduler/BlockedCommands.cpp b/sycl/unittests/scheduler/BlockedCommands.cpp index 8cb055ffcb90d..19db8ba7447b6 100644 --- a/sycl/unittests/scheduler/BlockedCommands.cpp +++ b/sycl/unittests/scheduler/BlockedCommands.cpp @@ -23,7 +23,7 @@ TEST_F(SchedulerTest, BlockedCommands) { auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; bool Enqueued = - MockScheduler::enqueueCommand(&MockCmd, Res, Lock, detail::NON_BLOCKING); + MockScheduler::enqueueCommand(&MockCmd, Res, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED\n"; @@ -33,7 +33,7 @@ TEST_F(SchedulerTest, BlockedCommands) { MockCmd.MRetVal = CL_DEVICE_PARTITION_EQUALLY; Enqueued = - MockScheduler::enqueueCommand(&MockCmd, Res, Lock, detail::BLOCKING); + MockScheduler::enqueueCommand(&MockCmd, Res, detail::BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueFailed, Res.MResult) << "The command is expected to fail to enqueue.\n"; @@ -45,7 +45,7 @@ TEST_F(SchedulerTest, BlockedCommands) { MockCmd.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; MockCmd.MRetVal = CL_SUCCESS; Enqueued = - MockScheduler::enqueueCommand(&MockCmd, Res, Lock, detail::BLOCKING); + MockScheduler::enqueueCommand(&MockCmd, Res, detail::BLOCKING); ASSERT_TRUE(Enqueued && Res.MResult == detail::EnqueueResultT::SyclEnqueueSuccess) << "The command is expected to be successfully enqueued.\n"; @@ -92,7 +92,7 @@ TEST_F(SchedulerTest, DontEnqueueDepsIfOneOfThemIsBlocked) { auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; bool Enqueued = - MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); + MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED.\n"; @@ -123,7 +123,7 @@ TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) { auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; bool Enqueued = - MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); + MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED.\n"; @@ -134,7 +134,7 @@ TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) { EXPECT_CALL(A, enqueue(_, _)).Times(0); EXPECT_CALL(B, enqueue(_, _)).Times(1); - Enqueued = MockScheduler::enqueueCommand(&A, Res, Lock, detail::BLOCKING); + Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueFailed, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED.\n"; @@ -177,7 +177,7 @@ TEST_F(SchedulerTest, EnqueueHostDependency) { auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; bool Enqueued = - MockScheduler::enqueueCommand(&A, Res, Lock, detail::NON_BLOCKING); + MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); ASSERT_TRUE(Enqueued) << "The command should be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueSuccess, Res.MResult) << "Enqueue operation should return successfully.\n"; diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 36ac78a65140f..37a7a71a4afdc 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -24,7 +24,7 @@ TEST_F(SchedulerTest, FailedDependency) { auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; bool Enqueued = - MockScheduler::enqueueCommand(&MUser, Res, Lock, detail::NON_BLOCKING); + MockScheduler::enqueueCommand(&MUser, Res, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Enqueue process must fail\n"; ASSERT_EQ(Res.MCmd, &MDep) << "Wrong failed command\n"; diff --git a/sycl/unittests/scheduler/InOrderQueueDeps.cpp b/sycl/unittests/scheduler/InOrderQueueDeps.cpp index 8d56b26d084fa..14c8a802d1772 100644 --- a/sycl/unittests/scheduler/InOrderQueueDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueDeps.cpp @@ -124,9 +124,9 @@ TEST_F(SchedulerTest, InOrderQueueDeps) { MS.insertMemoryMove(Record, &Req, DefaultHostQueue, AuxCmds); detail::EnqueueResultT Res; auto ReadLock = MS.acquireGraphReadLock(); - MockScheduler::enqueueCommand(Cmd, Res, ReadLock, detail::NON_BLOCKING); + MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); Cmd = MS.insertMemoryMove(Record, &Req, InOrderQueueImpl, AuxCmds); - MockScheduler::enqueueCommand(Cmd, Res, ReadLock, detail::NON_BLOCKING); + MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); Cmd = MS.insertMemoryMove(Record, &Req, DefaultHostQueue, AuxCmds); - MockScheduler::enqueueCommand(Cmd, Res, ReadLock, detail::NON_BLOCKING); + MockScheduler::enqueueCommand(Cmd, Res, detail::NON_BLOCKING); } diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 5485a6adb55af..ba244fa7c0bb8 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -123,10 +123,8 @@ class MockScheduler : public cl::sycl::detail::Scheduler { static bool enqueueCommand(cl::sycl::detail::Command *Cmd, cl::sycl::detail::EnqueueResultT &EnqueueResult, - ReadLockT &GraphReadLock, cl::sycl::detail::BlockingT Blocking) { - return GraphProcessor::enqueueCommand(Cmd, EnqueueResult, GraphReadLock, - Blocking); + return GraphProcessor::enqueueCommand(Cmd, EnqueueResult, Blocking); } cl::sycl::detail::AllocaCommandBase * From 69732b033ba269b21e74c9875161fbdfd1ed9e45 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Tue, 25 May 2021 13:17:52 +0300 Subject: [PATCH 12/19] Fix code style issue Signed-off-by: Sergey Kanaev --- sycl/unittests/scheduler/BlockedCommands.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/sycl/unittests/scheduler/BlockedCommands.cpp b/sycl/unittests/scheduler/BlockedCommands.cpp index 19db8ba7447b6..967b3ee75531c 100644 --- a/sycl/unittests/scheduler/BlockedCommands.cpp +++ b/sycl/unittests/scheduler/BlockedCommands.cpp @@ -32,8 +32,7 @@ TEST_F(SchedulerTest, BlockedCommands) { Res.MResult = detail::EnqueueResultT::SyclEnqueueSuccess; MockCmd.MRetVal = CL_DEVICE_PARTITION_EQUALLY; - Enqueued = - MockScheduler::enqueueCommand(&MockCmd, Res, detail::BLOCKING); + Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, detail::BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueFailed, Res.MResult) << "The command is expected to fail to enqueue.\n"; @@ -44,8 +43,7 @@ TEST_F(SchedulerTest, BlockedCommands) { Res = detail::EnqueueResultT{}; MockCmd.MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueReady; MockCmd.MRetVal = CL_SUCCESS; - Enqueued = - MockScheduler::enqueueCommand(&MockCmd, Res, detail::BLOCKING); + Enqueued = MockScheduler::enqueueCommand(&MockCmd, Res, detail::BLOCKING); ASSERT_TRUE(Enqueued && Res.MResult == detail::EnqueueResultT::SyclEnqueueSuccess) << "The command is expected to be successfully enqueued.\n"; @@ -91,8 +89,7 @@ TEST_F(SchedulerTest, DontEnqueueDepsIfOneOfThemIsBlocked) { MockScheduler MS; auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; - bool Enqueued = - MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); + bool Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED.\n"; @@ -122,8 +119,7 @@ TEST_F(SchedulerTest, EnqueueBlockedCommandEarlyExit) { MockScheduler MS; auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; - bool Enqueued = - MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); + bool Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); ASSERT_FALSE(Enqueued) << "Blocked command should not be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueBlocked, Res.MResult) << "Result of enqueueing blocked command should be BLOCKED.\n"; @@ -176,8 +172,7 @@ TEST_F(SchedulerTest, EnqueueHostDependency) { MockScheduler MS; auto Lock = MS.acquireGraphReadLock(); detail::EnqueueResultT Res; - bool Enqueued = - MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); + bool Enqueued = MockScheduler::enqueueCommand(&A, Res, detail::NON_BLOCKING); ASSERT_TRUE(Enqueued) << "The command should be enqueued\n"; ASSERT_EQ(detail::EnqueueResultT::SyclEnqueueSuccess, Res.MResult) << "Enqueue operation should return successfully.\n"; From 95cb36dacd0e375ab2de3de19ee7c21da239efb2 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Thu, 10 Jun 2021 19:20:00 +0300 Subject: [PATCH 13/19] Address review comments Signed-off-by: Sergey Kanaev --- .../source/detail/scheduler/graph_builder.cpp | 22 +++---------- .../detail/scheduler/graph_processor.cpp | 32 ++++++++----------- .../detail/scheduler/leaves_collection.cpp | 6 ++-- .../detail/scheduler/leaves_collection.hpp | 4 +-- sycl/source/detail/scheduler/scheduler.hpp | 11 +++++-- 5 files changed, 33 insertions(+), 42 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 8b362c1801ec1..383bcee2f9d7e 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -184,13 +184,13 @@ MemObjRecord *Scheduler::GraphBuilder::getOrInsertMemObjRecord( const size_t LeafLimit = 8; LeavesCollection::AllocateDependencyF AllocateDependency = [this](Command *Dependant, Command *Dependency, MemObjRecord *Record, - LeavesCollection::EnqueueListT *ToEnqueue) { + LeavesCollection::EnqueueListT &ToEnqueue) { // Add the old leaf as a dependency for the new one by duplicating one // of the requirements for the current record DepDesc Dep = findDepForRecord(Dependant, Record); Dep.MDepCommand = Dependency; if (Command *ConnectionCmd = Dependant->addDep(Dep)) - ToEnqueue->push_back(ConnectionCmd); + ToEnqueue.push_back(ConnectionCmd); Dependency->addUser(Dependant); --(Dependency->MLeafCounter); }; @@ -244,7 +244,7 @@ void Scheduler::GraphBuilder::addNodeToLeaves( LeavesCollection &Leaves{AccessMode == access::mode::read ? Record->MReadLeaves : Record->MWriteLeaves}; - if (Leaves.push_back(Cmd, &ToEnqueue)) + if (Leaves.push_back(Cmd, ToEnqueue)) ++Cmd->MLeafCounter; } @@ -700,7 +700,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( DefaultHostQueue, FullReq, true /* InitFromUserData */, nullptr /* LinkedAllocaCmd */); Record->MAllocaCommands.push_back(HostAllocaCmd); - Record->MWriteLeaves.push_back(HostAllocaCmd, &ToEnqueue); + Record->MWriteLeaves.push_back(HostAllocaCmd, ToEnqueue); ++(HostAllocaCmd->MLeafCounter); Record->MCurContext = DefaultHostQueue->getContextImplPtr(); } @@ -787,7 +787,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( } Record->MAllocaCommands.push_back(AllocaCmd); - Record->MWriteLeaves.push_back(AllocaCmd, &ToEnqueue); + Record->MWriteLeaves.push_back(AllocaCmd, ToEnqueue); ++(AllocaCmd->MLeafCounter); } return AllocaCmd; @@ -1261,18 +1261,6 @@ Command *Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd, ConnectCmd->MEmptyCmd = EmptyCmd; return ConnectCmd; -#if 0 - // FIXME graph builder shouldn't really enqueue commands. We're in the middle - // of enqueue process for some command Cmd. We're going to add a dependency - // for it. Need some nice and cute solution to enqueue ConnectCmd via standard - // scheduler/graph processor mechanisms. - // Though, we need this call to enqueue to launch ConnectCmd. - EnqueueResultT Res; - bool Enqueued = Scheduler::GraphProcessor::enqueueCommand(ConnectCmd, Res); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw runtime_error("Failed to enqueue a sync event between two contexts", - PI_INVALID_OPERATION); -#endif } } // namespace detail diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp index 4d09c00ecd11c..478c70fdac927 100644 --- a/sycl/source/detail/scheduler/graph_processor.cpp +++ b/sycl/source/detail/scheduler/graph_processor.cpp @@ -90,24 +90,20 @@ bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, return false; } - { - // Only graph read lock is to be held here. - // Enqueue process of a command may last quite a time. Having graph locked - // can introduce some thread starving (i.e. when the other thread attempts - // to acquire write lock and add a command to graph). - // Releasing read lock without other safety measures isn't an option here as - // the other thread could go into graph cleanup process (due to some event - // complete) and remove some dependencies from dependencies of the user of - // this command. An example: command A depends on commands B and C. This - // wants to enqueue A. Hence, it needs to enqueue B and C. So this thread - // gets into dependency list and starts enqueueing B right away. The other - // thread waits on completion of C and starts cleanup process. This thread - // is still in the middle of enqueue of B. The other thread modifies - // dependency list of A by removing C out of it. Iterators become invalid. - bool Result = Cmd->enqueue(EnqueueResult, Blocking); - - return Result; - } + // Only graph read lock is to be held here. + // Enqueue process of a command may last quite a time. Having graph locked can + // introduce some thread starving (i.e. when the other thread attempts to + // acquire write lock and add a command to graph). Releasing read lock without + // other safety measures isn't an option here as the other thread could go + // into graph cleanup process (due to some event complete) and remove some + // dependencies from dependencies of the user of this command. + // An example: command A depends on commands B and C. This thread wants to + // enqueue A. Hence, it needs to enqueue B and C. So this thread gets into + // dependency list and starts enqueueing B right away. The other thread waits + // on completion of C and starts cleanup process. This thread is still in the + // middle of enqueue of B. The other thread modifies dependency list of A by + // removing C out of it. Iterators become invalid. + return Cmd->enqueue(EnqueueResult, Blocking); } } // namespace detail diff --git a/sycl/source/detail/scheduler/leaves_collection.cpp b/sycl/source/detail/scheduler/leaves_collection.cpp index b847bb425d106..0ae0bcfbb9c0e 100644 --- a/sycl/source/detail/scheduler/leaves_collection.cpp +++ b/sycl/source/detail/scheduler/leaves_collection.cpp @@ -49,7 +49,7 @@ size_t LeavesCollection::remove(value_type Cmd) { return eraseHostAccessorCommand(static_cast(Cmd)); } -bool LeavesCollection::push_back(value_type Cmd, EnqueueListT *ToEnqueue) { +bool LeavesCollection::push_back(value_type Cmd, EnqueueListT &ToEnqueue) { bool Result = false; if (isHostAccessorCmd(Cmd)) @@ -74,7 +74,7 @@ std::vector LeavesCollection::toVector() const { } bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd, - EnqueueListT *ToEnqueue) { + EnqueueListT &ToEnqueue) { // 1. find the oldest command with doOverlap() = true amongst the List // => OldCmd HostAccessorCommandSingleXRefT OldCmdIt; @@ -112,7 +112,7 @@ bool LeavesCollection::addHostAccessorCommand(EmptyCommand *Cmd, } bool LeavesCollection::addGenericCommand(Command *Cmd, - EnqueueListT *ToEnqueue) { + EnqueueListT &ToEnqueue) { if (MGenericCommands.full()) { Command *OldLeaf = MGenericCommands.front(); diff --git a/sycl/source/detail/scheduler/leaves_collection.hpp b/sycl/source/detail/scheduler/leaves_collection.hpp index bf1308119437b..dd9da9ff049b2 100644 --- a/sycl/source/detail/scheduler/leaves_collection.hpp +++ b/sycl/source/detail/scheduler/leaves_collection.hpp @@ -43,7 +43,7 @@ class LeavesCollection { // Make first command depend on the second using AllocateDependencyF = - std::function; + std::function; template class IteratorT; @@ -82,7 +82,7 @@ class LeavesCollection { } /// Returns true if insertion took place. Returns false otherwise. - bool push_back(value_type Cmd, EnqueueListT *ToEnqueue); + bool push_back(value_type Cmd, EnqueueListT &ToEnqueue); /// Replacement for std::remove with subsequent call to erase(newEnd, end()). /// This function is introduced here due to complexity of iterator. diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index 285f9c246a908..f8ac5a1f11e16 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -459,8 +459,7 @@ class Scheduler { /// Provides exclusive access to std::shared_timed_mutex object with deadlock /// avoidance /// - /// \param Lock is an instance of WriteLockT - /// class + /// \param Lock is an instance of WriteLockT, created with \c std::defer_lock void acquireWriteLock(WriteLockT &Lock); static void enqueueLeavesOfReqUnlocked(const Requirement *const Req); @@ -739,6 +738,14 @@ class Scheduler { BlockingT Blocking = NON_BLOCKING); }; + /// This function waits on all of the graph leaves which somehow use the + /// memory object which is represented by \c Record. The function is called + /// upon destruction of memory buffer. + /// \param Record memory record to await graph leaves of to finish + /// \param GraphReadLock locked graph read lock + /// + /// GraphReadLock will be unlocked/locked as needed. Upon return from the + /// function, GraphReadLock will be left in locked state. void waitForRecordToFinish(MemObjRecord *Record, ReadLockT &GraphReadLock); GraphBuilder MGraphBuilder; From f6b4d749b2513e75b09c9c9815e653ace4fbad18 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 10:32:10 +0300 Subject: [PATCH 14/19] Fix build issues Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/leaves_collection.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/leaves_collection.hpp b/sycl/source/detail/scheduler/leaves_collection.hpp index dd9da9ff049b2..54b162693355a 100644 --- a/sycl/source/detail/scheduler/leaves_collection.hpp +++ b/sycl/source/detail/scheduler/leaves_collection.hpp @@ -126,8 +126,8 @@ class LeavesCollection { AllocateDependencyF MAllocateDependency; - bool addGenericCommand(value_type Cmd, EnqueueListT *ToEnqueue); - bool addHostAccessorCommand(EmptyCommand *Cmd, EnqueueListT *ToEnqueue); + bool addGenericCommand(value_type Cmd, EnqueueListT &ToEnqueue); + bool addHostAccessorCommand(EmptyCommand *Cmd, EnqueueListT &ToEnqueue); // inserts a command to the end of list for its mem object void insertHostAccessorCommand(EmptyCommand *Cmd); From 949c42c8aa9765b3bc62cd4d344fce2c1c6b3786 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 10:32:37 +0300 Subject: [PATCH 15/19] Use event instead of command Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_processor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp index 478c70fdac927..84267b93d4513 100644 --- a/sycl/source/detail/scheduler/graph_processor.cpp +++ b/sycl/source/detail/scheduler/graph_processor.cpp @@ -50,8 +50,10 @@ void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event, // TODO: Reschedule commands. throw runtime_error("Enqueue process failed.", PI_INVALID_OPERATION); + assert(Cmd->getEvent() == Event); + GraphReadLock.unlock(); - Cmd->getEvent()->waitInternal(); + Event->waitInternal(); GraphReadLock.lock(); } From 3dd86fe896187d3ee828b19884cf01d03e674b23 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 10:32:48 +0300 Subject: [PATCH 16/19] Add comments Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/scheduler.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index f8ac5a1f11e16..edc8848333e42 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -727,6 +727,9 @@ class Scheduler { /// Waits for the command, associated with Event passed, is completed. /// \param GraphReadLock read-lock which is already acquired for reading + /// + /// The function may unlock and lock GraphReadLock as needed. Upon return + /// the lock is left in locked state. static void waitForEvent(EventImplPtr Event, ReadLockT &GraphReadLock); /// Enqueues the command and all its dependencies. @@ -734,6 +737,9 @@ class Scheduler { /// \param EnqueueResult is set to specific status if enqueue failed. /// \param GraphReadLock read-lock which is already acquired for reading /// \return true if the command is successfully enqueued. + /// + /// The function may unlock and lock GraphReadLock as needed. Upon return + /// the lock is left in locked state. static bool enqueueCommand(Command *Cmd, EnqueueResultT &EnqueueResult, BlockingT Blocking = NON_BLOCKING); }; From 2702a2178d60cd6c841753653e82cf131d1ab414 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 14:15:31 +0300 Subject: [PATCH 17/19] Fix build issues Signed-off-by: Sergey Kanaev --- sycl/unittests/scheduler/LeavesCollection.cpp | 10 +++++----- sycl/unittests/scheduler/LinkedAllocaDependencies.cpp | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/unittests/scheduler/LeavesCollection.cpp b/sycl/unittests/scheduler/LeavesCollection.cpp index a339a5dce424f..a731f960c0c4a 100644 --- a/sycl/unittests/scheduler/LeavesCollection.cpp +++ b/sycl/unittests/scheduler/LeavesCollection.cpp @@ -55,7 +55,7 @@ TEST_F(LeavesCollectionTest, PushBack) { LeavesCollection::AllocateDependencyF AllocateDependency = [&](Command *, Command *, MemObjRecord *, - std::vector *) { + std::vector &) { ++TimesGenericWasFull; }; @@ -70,7 +70,7 @@ TEST_F(LeavesCollectionTest, PushBack) { for (size_t Idx = 0; Idx < GenericCmdsCapacity * 2; ++Idx) { Cmds.push_back(createGenericCommand(getSyclObjImpl(MQueue))); - LE.push_back(Cmds.back().get(), &ToEnqueue); + LE.push_back(Cmds.back().get(), ToEnqueue); } ASSERT_EQ(TimesGenericWasFull, GenericCmdsCapacity) @@ -100,7 +100,7 @@ TEST_F(LeavesCollectionTest, PushBack) { : createEmptyCommand(getSyclObjImpl(MQueue), MockReq); Cmds.push_back(Cmd); - LE.push_back(Cmds.back().get(), &ToEnqueue); + LE.push_back(Cmds.back().get(), ToEnqueue); } ASSERT_EQ(TimesGenericWasFull, GenericCmdsCapacity) @@ -121,7 +121,7 @@ TEST_F(LeavesCollectionTest, Remove) { LeavesCollection::AllocateDependencyF AllocateDependency = [](Command *, Command *Old, MemObjRecord *, - std::vector *) { --Old->MLeafCounter; }; + std::vector &) { --Old->MLeafCounter; }; { cl::sycl::buffer Buf(cl::sycl::range<1>(1)); @@ -137,7 +137,7 @@ TEST_F(LeavesCollectionTest, Remove) { : createEmptyCommand(getSyclObjImpl(MQueue), MockReq); Cmds.push_back(Cmd); - if (LE.push_back(Cmds.back().get(), &ToEnqueue)) + if (LE.push_back(Cmds.back().get(), ToEnqueue)) ++Cmd->MLeafCounter; } diff --git a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp index 08a28de3ee649..aa10bb446c32c 100644 --- a/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp +++ b/sycl/unittests/scheduler/LinkedAllocaDependencies.cpp @@ -69,7 +69,7 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) { auto AllocaDep = [](cl::sycl::detail::Command *, cl::sycl::detail::Command *, cl::sycl::detail::MemObjRecord *, - std::vector *) {}; + std::vector &) {}; std::shared_ptr Record{ new cl::sycl::detail::MemObjRecord(DefaultHostQueue->getContextImplPtr(), @@ -86,7 +86,7 @@ TEST_F(SchedulerTest, LinkedAllocaDependencies) { DepCmd.MDeps.push_back({&DepDepCmd, DepDepCmd.getRequirement(), &AllocaCmd1}); DepDepCmd.MUsers.insert(&DepCmd); std::vector ToEnqueue; - Record->MWriteLeaves.push_back(&DepCmd, &ToEnqueue); + Record->MWriteLeaves.push_back(&DepCmd, ToEnqueue); MockScheduler MS; cl::sycl::detail::Command *AllocaCmd2 = From e9fa4345834be3c20729fcb83f8fbd13a89c53c4 Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 14:29:39 +0300 Subject: [PATCH 18/19] Eliminate lock-unlock sequence with no payload Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/graph_processor.cpp | 7 +++++-- sycl/source/detail/scheduler/scheduler.cpp | 4 +++- sycl/source/detail/scheduler/scheduler.hpp | 6 ++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/scheduler/graph_processor.cpp b/sycl/source/detail/scheduler/graph_processor.cpp index 84267b93d4513..cc2e0cb15067c 100644 --- a/sycl/source/detail/scheduler/graph_processor.cpp +++ b/sycl/source/detail/scheduler/graph_processor.cpp @@ -37,7 +37,8 @@ Scheduler::GraphProcessor::getWaitList(EventImplPtr Event) { } void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event, - ReadLockT &GraphReadLock) { + ReadLockT &GraphReadLock, + bool LockTheLock) { Command *Cmd = getCommand(Event); // Command can be nullptr if user creates cl::sycl::event explicitly or the // event has been waited on by another thread @@ -54,7 +55,9 @@ void Scheduler::GraphProcessor::waitForEvent(EventImplPtr Event, GraphReadLock.unlock(); Event->waitInternal(); - GraphReadLock.lock(); + + if (LockTheLock) + GraphReadLock.lock(); } bool Scheduler::GraphProcessor::enqueueCommand(Command *Cmd, diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 5ebedb0ee9656..f64ebc45dcf4d 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -214,7 +214,9 @@ std::vector Scheduler::getWaitList(EventImplPtr Event) { void Scheduler::waitForEvent(EventImplPtr Event) { ReadLockT Lock(MGraphLock); - GraphProcessor::waitForEvent(std::move(Event), Lock); + // It's fine to leave the lock unlocked upon return from waitForEvent as + // there's no more actions to do here with graph + GraphProcessor::waitForEvent(std::move(Event), Lock, /*LockTheLock=*/ false); } static void deallocateStreams( diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index edc8848333e42..5193d61858849 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -727,10 +727,12 @@ class Scheduler { /// Waits for the command, associated with Event passed, is completed. /// \param GraphReadLock read-lock which is already acquired for reading + /// \param LockTheLock selects if graph lock should be locked upon return /// /// The function may unlock and lock GraphReadLock as needed. Upon return - /// the lock is left in locked state. - static void waitForEvent(EventImplPtr Event, ReadLockT &GraphReadLock); + /// the lock is left in locked state if and only if LockTheLock is true. + static void waitForEvent(EventImplPtr Event, ReadLockT &GraphReadLock, + bool LockTheLock = true); /// Enqueues the command and all its dependencies. /// From 68e86a6db049e3029b9ab601e3aa30cd21a15d0d Mon Sep 17 00:00:00 2001 From: Sergey Kanaev Date: Fri, 11 Jun 2021 14:37:00 +0300 Subject: [PATCH 19/19] Address style issue Signed-off-by: Sergey Kanaev --- sycl/source/detail/scheduler/scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index f64ebc45dcf4d..1f646ff6acf5b 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -216,7 +216,7 @@ void Scheduler::waitForEvent(EventImplPtr Event) { ReadLockT Lock(MGraphLock); // It's fine to leave the lock unlocked upon return from waitForEvent as // there's no more actions to do here with graph - GraphProcessor::waitForEvent(std::move(Event), Lock, /*LockTheLock=*/ false); + GraphProcessor::waitForEvent(std::move(Event), Lock, /*LockTheLock=*/false); } static void deallocateStreams(