From 31f4ab5ddc4225ab41121ef13a565fca894cef83 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Wed, 19 Aug 2020 17:45:34 +0300 Subject: [PATCH 1/9] Add support of multiple devices within a context This patch adds support of multiple devices within a context. Programs can be created from images or from SPIR-V binaries. Only kernels, created using invoking kernels functions (parallel_for, single_task, ...) are supported. Kernels, created in OpenCL interoperability mode (using sycl::program and sycl::kernel functions) are not supported. --- sycl/source/detail/kernel_program_cache.hpp | 6 +- sycl/source/detail/program_impl.cpp | 14 ++- .../program_manager/program_manager.cpp | 116 +++++++++--------- .../program_manager/program_manager.hpp | 25 ++-- sycl/source/detail/scheduler/commands.cpp | 35 +++--- .../context-with-multiple-devices.cpp | 49 ++++++++ 6 files changed, 156 insertions(+), 89 deletions(-) create mode 100644 sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 83261ce661142..478c1cc5cf3b8 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -67,7 +67,8 @@ class KernelProgramCache { using PiProgramT = std::remove_pointer::type; using PiProgramPtrT = std::atomic; using ProgramWithBuildStateT = BuildResult; - using ProgramCacheKeyT = std::pair; + using ProgramCacheKeyT = + std::pair, RT::PiDevice>; using ProgramCacheT = std::map; using ContextPtr = context_impl *; @@ -75,7 +76,8 @@ class KernelProgramCache { using PiKernelPtrT = std::atomic; using KernelWithBuildStateT = BuildResult; - using KernelByNameT = std::map; + using KernelByNameT = + std::map, KernelWithBuildStateT>; using KernelCacheT = std::map; ~KernelProgramCache(); diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index a73064e455067..d2652bdbc03c1 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -233,7 +233,7 @@ void program_impl::build_with_kernel_name(string_class KernelName, if (is_cacheable_with_options(BuildOptions)) { MProgramAndKernelCachingAllowed = true; MProgram = ProgramManager::getInstance().getBuiltPIProgram( - Module, get_context(), KernelName, this, + Module, get_context(), get_devices()[0], KernelName, this, /*JITCompilationIsRequired=*/(!BuildOptions.empty())); const detail::plugin &Plugin = getPlugin(); Plugin.call(MProgram); @@ -356,7 +356,7 @@ void program_impl::build(const string_class &Options) { check_device_feature_support(MDevices); vector_class Devices(get_pi_devices()); const detail::plugin &Plugin = getPlugin(); - ProgramManager::getInstance().flushSpecConstants(*this); + ProgramManager::getInstance().flushSpecConstants(*this, get_pi_devices()[0]); RT::PiResult Err = Plugin.call_nocheck( MProgram, Devices.size(), Devices.data(), Options.c_str(), nullptr, nullptr); @@ -404,7 +404,8 @@ RT::PiKernel program_impl::get_pi_kernel(const string_class &KernelName) const { if (is_cacheable()) { std::tie(Kernel, std::ignore) = ProgramManager::getInstance().getOrCreateKernel( - MProgramModuleHandle, get_context(), KernelName, this); + MProgramModuleHandle, get_context(), get_devices()[0], KernelName, + this); getPlugin().call(Kernel); } else { const detail::plugin &Plugin = getPlugin(); @@ -453,9 +454,10 @@ void program_impl::create_pi_program_with_kernel_name( bool JITCompilationIsRequired) { assert(!MProgram && "This program already has an encapsulated PI program"); ProgramManager &PM = ProgramManager::getInstance(); - RTDeviceBinaryImage &Img = PM.getDeviceImage( - Module, KernelName, get_context(), JITCompilationIsRequired); - MProgram = PM.createPIProgram(Img, get_context()); + RTDeviceBinaryImage &Img = + PM.getDeviceImage(Module, KernelName, get_context(), get_devices()[0], + JITCompilationIsRequired); + MProgram = PM.createPIProgram(Img, get_context(), get_devices()[0]); } template <> diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index fc45b310c94c6..75695566d640b 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -52,29 +52,10 @@ ProgramManager &ProgramManager::getInstance() { return Instance; } -static RT::PiDevice getFirstDevice(const ContextImplPtr &Context) { - pi_uint32 NumDevices = 0; - const detail::plugin &Plugin = Context->getPlugin(); - Plugin.call(Context->getHandleRef(), - PI_CONTEXT_INFO_NUM_DEVICES, - sizeof(NumDevices), &NumDevices, - /*param_value_size_ret=*/nullptr); - assert(NumDevices > 0 && "Context without devices?"); - - vector_class Devices(NumDevices); - size_t ParamValueSize = 0; - Plugin.call( - Context->getHandleRef(), PI_CONTEXT_INFO_DEVICES, - sizeof(cl_device_id) * NumDevices, &Devices[0], &ParamValueSize); - assert(ParamValueSize == sizeof(cl_device_id) * NumDevices && - "Number of CL_CONTEXT_DEVICES should match CL_CONTEXT_NUM_DEVICES."); - return Devices[0]; -} - static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, + const device &Device, const unsigned char *Data, size_t DataLen) { - // FIXME: we don't yet support multiple devices with a single binary. const detail::plugin &Plugin = Context->getPlugin(); #ifndef _NDEBUG pi_uint32 NumDevices = 0; @@ -87,10 +68,10 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, #endif RT::PiProgram Program; - RT::PiDevice Device = getFirstDevice(Context); + const auto PiDevice = getSyclObjImpl(Device)->getHandleRef(); pi_int32 BinaryStatus = CL_SUCCESS; Plugin.call( - Context->getHandleRef(), 1 /*one binary*/, &Device, &DataLen, &Data, + Context->getHandleRef(), 1 /*one binary*/, &PiDevice, &DataLen, &Data, &BinaryStatus, &Program); if (BinaryStatus != CL_SUCCESS) { @@ -112,14 +93,16 @@ static RT::PiProgram createSpirvProgram(const ContextImplPtr Context, RTDeviceBinaryImage & ProgramManager::getDeviceImage(OSModuleHandle M, const string_class &KernelName, - const context &Context, + const context &Context, const device &Device, bool JITCompilationIsRequired) { if (DbgProgMgr > 0) std::cerr << ">>> ProgramManager::getDeviceImage(" << M << ", \"" - << KernelName << "\", " << getRawSyclObjImpl(Context) << ")\n"; + << KernelName << "\", " << getRawSyclObjImpl(Context) << ", " + << getRawSyclObjImpl(Device) << ", " << JITCompilationIsRequired + << ")\n"; KernelSetId KSId = getKernelSetId(M, KernelName); - return getDeviceImage(M, KSId, Context, JITCompilationIsRequired); + return getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired); } template @@ -290,9 +273,12 @@ static const char *getFormatStr(RT::PiDeviceBinaryType Format) { } RT::PiProgram ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img, - const context &Context) { + const context &Context, + const device &Device) { if (DbgProgMgr > 0) - std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ")\n"; + std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ", " + << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device) + << ")\n"; const pi_device_binary_struct &RawImg = Img.getRawData(); // perform minimal sanity checks on the device image and the descriptor @@ -329,12 +315,13 @@ RT::PiProgram ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img, RT::PiProgram Res = Format == PI_DEVICE_BINARY_TYPE_SPIRV ? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize) - : createBinaryProgram(Ctx, RawImg.BinaryStart, ImgSize); + : createBinaryProgram(Ctx, Device, RawImg.BinaryStart, ImgSize); { std::lock_guard Lock(MNativeProgramsMutex); // associate the PI program with the image it was created for - NativePrograms[Res] = &Img; + const auto PiDevice = getSyclObjImpl(Device)->getHandleRef(); + NativePrograms.emplace(std::make_pair(std::make_pair(Res, PiDevice), &Img)); } if (DbgProgMgr > 1) @@ -346,6 +333,7 @@ RT::PiProgram ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img, RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, const context &Context, + const device &Device, const string_class &KernelName, const program_impl *Prg, bool JITCompilationIsRequired) { @@ -364,15 +352,17 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, auto GetF = [](const Locked &LockedCache) -> ProgramCacheT & { return LockedCache.get(); }; - auto BuildF = [this, &M, &KSId, &Context, Prg, &JITCompilationIsRequired] { + auto BuildF = [this, &M, &KSId, &Context, &Device, Prg, + &JITCompilationIsRequired] { const RTDeviceBinaryImage &Img = - getDeviceImage(M, KSId, Context, JITCompilationIsRequired); + getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired); ContextImplPtr ContextImpl = getSyclObjImpl(Context); const detail::plugin &Plugin = ContextImpl->getPlugin(); - RT::PiProgram NativePrg = createPIProgram(Img, Context); + RT::PiProgram NativePrg = createPIProgram(Img, Context, Device); if (Prg) - flushSpecConstants(*Prg, NativePrg, &Img); + flushSpecConstants(*Prg, getSyclObjImpl(Device)->getHandleRef(), + NativePrg, &Img); ProgramPtr ProgramManaged( NativePrg, Plugin.getPiPlugin().PiFunctionTable.piProgramRelease); @@ -386,12 +376,8 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, !SYCLConfig::get()) DeviceLibReqMask = getDeviceLibReqMask(Img); - const std::vector &Devices = ContextImpl->getDevices(); - std::vector PiDevices(Devices.size()); - std::transform(Devices.begin(), Devices.end(), PiDevices.begin(), - [](const device Dev) { - return getRawSyclObjImpl(Dev)->getHandleRef(); - }); + vector_class PiDevices; + PiDevices.push_back(getRawSyclObjImpl(Device)->getHandleRef()); ProgramPtr BuiltProgram = build(std::move(ProgramManaged), ContextImpl, Img.getCompileOptions(), @@ -400,31 +386,36 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, { std::lock_guard Lock(MNativeProgramsMutex); - NativePrograms[BuiltProgram.get()] = &Img; + const auto PiDevice = getSyclObjImpl(Device)->getHandleRef(); + NativePrograms.emplace( + std::make_pair(std::make_pair(BuiltProgram.get(), PiDevice), &Img)); } return BuiltProgram.release(); }; - using KeyT = KernelProgramCache::ProgramCacheKeyT; SerializedObj SpecConsts; if (Prg) Prg->stableSerializeSpecConstRegistry(SpecConsts); + const auto PiDevice = getRawSyclObjImpl(Device)->getHandleRef(); auto BuildResult = getOrBuild( - Cache, KeyT(std::move(SpecConsts), KSId), AcquireF, GetF, BuildF); + Cache, + std::make_pair(std::make_pair(std::move(SpecConsts), KSId), PiDevice), + AcquireF, GetF, BuildF); return BuildResult->Ptr.load(); } -std::pair -ProgramManager::getOrCreateKernel(OSModuleHandle M, const context &Context, - const string_class &KernelName, - const program_impl *Prg) { +std::pair ProgramManager::getOrCreateKernel( + OSModuleHandle M, const context &Context, const device &Device, + const string_class &KernelName, const program_impl *Prg) { if (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getOrCreateKernel(" << M << ", " - << getRawSyclObjImpl(Context) << ", " << KernelName << ")\n"; + << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device) + << ", " << KernelName << ")\n"; } - RT::PiProgram Program = getBuiltPIProgram(M, Context, KernelName, Prg); + RT::PiProgram Program = + getBuiltPIProgram(M, Context, Device, KernelName, Prg); const ContextImplPtr Ctx = getSyclObjImpl(Context); using PiKernelT = KernelProgramCache::PiKernelT; @@ -457,8 +448,9 @@ ProgramManager::getOrCreateKernel(OSModuleHandle M, const context &Context, return Result; }; + const auto PiDevice = getRawSyclObjImpl(Device)->getHandleRef(); auto BuildResult = getOrBuild( - Cache, KernelName, AcquireF, GetF, BuildF); + Cache, std::make_pair(KernelName, PiDevice), AcquireF, GetF, BuildF); return std::make_pair(BuildResult->Ptr.load(), &(BuildResult->MBuildResultMutex)); } @@ -642,11 +634,13 @@ ProgramManager::ProgramManager() { RTDeviceBinaryImage & ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId, - const context &Context, + const context &Context, const device &Device, bool JITCompilationIsRequired) { if (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getDeviceImage(" << M << ", \"" << KSId - << "\", " << getRawSyclObjImpl(Context) << ")\n"; + << "\", " << getRawSyclObjImpl(Context) << ", " + << getRawSyclObjImpl(Device) << ", " << JITCompilationIsRequired + << ")\n"; std::cerr << "available device images:\n"; debugPrintBinaryImages(); @@ -668,7 +662,8 @@ ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId, RawImgs[I] = const_cast(&Imgs[I]->getRawData()); Ctx->getPlugin().call( - getFirstDevice(Ctx), RawImgs.data(), (cl_uint)RawImgs.size(), &ImgInd); + getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(), + (cl_uint)RawImgs.size(), &ImgInd); if (JITCompilationIsRequired) { // If the image is already compiled with AOT, throw an exception. @@ -1001,6 +996,7 @@ void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img, } void ProgramManager::flushSpecConstants(const program_impl &Prg, + RT::PiDevice Device, RT::PiProgram NativePrg, const RTDeviceBinaryImage *Img) { if (DbgProgMgr > 2) { @@ -1018,7 +1014,7 @@ void ProgramManager::flushSpecConstants(const program_impl &Prg, // caller hasn't provided the image object - find it { // make sure NativePrograms map access is synchronized std::lock_guard Lock(MNativeProgramsMutex); - auto It = NativePrograms.find(NativePrg); + auto It = NativePrograms.find(std::make_pair(NativePrg, Device)); if (It == NativePrograms.end()) throw sycl::experimental::spec_const_error( "spec constant is set in a program w/o a binary image", @@ -1054,15 +1050,18 @@ uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) { // TODO consider another approach with storing the masks in the integration // header instead. ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask( - OSModuleHandle M, const context &Context, pi::PiProgram NativePrg, - const string_class &KernelName, bool KnownProgram) { + OSModuleHandle M, const context &Context, const device &Device, + pi::PiProgram NativePrg, const string_class &KernelName, + bool KnownProgram) { // If instructed to use a spv file, assume no eliminated arguments. if (m_UseSpvFile && M == OSUtil::ExeModuleHandle) return {}; + const auto PiDevice = getSyclObjImpl(Device)->getHandleRef(); + { std::lock_guard Lock(MNativeProgramsMutex); - auto ImgIt = NativePrograms.find(NativePrg); + auto ImgIt = NativePrograms.find(std::make_pair(NativePrg, PiDevice)); if (ImgIt != NativePrograms.end()) { auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second); if (MapIt != m_EliminatedKernelArgMasks.end()) @@ -1089,10 +1088,11 @@ ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask( return {}; std::rethrow_exception(std::current_exception()); } - RTDeviceBinaryImage &Img = getDeviceImage(M, KSId, Context); + RTDeviceBinaryImage &Img = getDeviceImage(M, KSId, Context, Device); { std::lock_guard Lock(MNativeProgramsMutex); - NativePrograms[NativePrg] = &Img; + NativePrograms.emplace( + std::make_pair(std::make_pair(NativePrg, PiDevice), &Img)); } auto MapIt = m_EliminatedKernelArgMasks.find(&Img); if (MapIt != m_EliminatedKernelArgMasks.end()) diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 1e4329b3fd925..18294f85f57e8 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -67,14 +67,16 @@ class ProgramManager { RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M, const string_class &KernelName, const context &Context, + const device &Device, bool JITCompilationIsRequired = false); RT::PiProgram createPIProgram(const RTDeviceBinaryImage &Img, - const context &Context); + const context &Context, const device &Device); /// Builds or retrieves from cache a program defining the kernel with given /// name. /// \param M idenfies the OS module the kernel comes from (multiple OS modules /// may have kernels with the same name) /// \param Context the context to build the program with + /// \param Device the device for which the program is built /// \param KernelName the kernel's name /// \param Prg provides build context information, such as /// current specialization constants settings; can be nullptr. @@ -83,12 +85,14 @@ class ProgramManager { /// \param JITCompilationIsRequired If JITCompilationIsRequired is true /// add a check that kernel is compiled, otherwise don't add the check. RT::PiProgram getBuiltPIProgram(OSModuleHandle M, const context &Context, + const device &Device, const string_class &KernelName, const program_impl *Prg = nullptr, bool JITCompilationIsRequired = false); std::pair getOrCreateKernel(OSModuleHandle M, const context &Context, - const string_class &KernelName, const program_impl *Prg); + const device &Device, const string_class &KernelName, + const program_impl *Prg); RT::PiProgram getPiProgramFromPiKernel(RT::PiKernel Kernel, const ContextImplPtr Context); @@ -102,13 +106,14 @@ class ProgramManager { /// \param Prg the program object to get spec constant settings from. /// Passing program_impl by raw reference is OK, since it is not /// captured anywhere once the function returns. + /// \param Device the device assosiated with the native program /// \param NativePrg the native program, target for spec constant setting; if /// not null then overrides the native program in Prg /// \param Img A source of the information about which constants need /// setting and symboling->integer spec constnant ID mapping. If not /// null, overrides native program->binary image binding maintained by /// the program manager. - void flushSpecConstants(const program_impl &Prg, + void flushSpecConstants(const program_impl &Prg, pi::PiDevice Device, pi::PiProgram NativePrg = nullptr, const RTDeviceBinaryImage *Img = nullptr); uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); @@ -118,16 +123,16 @@ class ProgramManager { /// \param M identifies the OS module the kernel comes from (multiple OS /// modules may have kernels with the same name). /// \param Context the context associated with the kernel. + /// \param Device the device associated with the context. /// \param NativePrg the PI program associated with the kernel. /// \param KernelName the name of the kernel. /// \param KnownProgram indicates whether the PI program is guaranteed to /// be known to program manager (built with its API) or not (not /// cacheable or constructed with interoperability). - KernelArgMask getEliminatedKernelArgMask(OSModuleHandle M, - const context &Context, - pi::PiProgram NativePrg, - const string_class &KernelName, - bool KnownProgram); + KernelArgMask + getEliminatedKernelArgMask(OSModuleHandle M, const context &Context, + const device &Device, pi::PiProgram NativePrg, + const string_class &KernelName, bool KnownProgram); private: ProgramManager(); @@ -137,6 +142,7 @@ class ProgramManager { RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M, KernelSetId KSId, const context &Context, + const device &Device, bool JITCompilationIsRequired = false); using ProgramPtr = unique_ptr_class, decltype(&::piProgramRelease)>; @@ -203,7 +209,8 @@ class ProgramManager { // the underlying program disposed of), so the map can't be used in any way // other than binary image lookup with known live PiProgram as the key. // NOTE: access is synchronized via the MNativeProgramsMutex - std::unordered_map NativePrograms; + std::map, const RTDeviceBinaryImage *> + NativePrograms; /// Protects NativePrograms that can be changed by class' methods. std::mutex MNativeProgramsMutex; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index efbe0373f06e6..01f5121d54993 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1909,6 +1909,12 @@ cl_int ExecCGCommand::enqueueImp() { if (nullptr != ExecKernel->MSyclKernel) { assert(ExecKernel->MSyclKernel->get_info() == Context); + if (Context.get_devices().size() > 1) { + throw feature_not_supported( + "multiple devices within a context are not supported with " + "sycl::program and sycl::kernel", + PI_INVALID_OPERATION); + } Kernel = ExecKernel->MSyclKernel->getHandleRef(); auto SyclProg = detail::getSyclObjImpl( @@ -1920,15 +1926,15 @@ cl_int ExecCGCommand::enqueueImp() { detail::ProgramManager::getInstance().getOrCreateKernel( ExecKernel->MOSModuleHandle, ExecKernel->MSyclKernel->get_info(), - ExecKernel->MKernelName, SyclProg.get()); + MQueue->get_device(), ExecKernel->MKernelName, SyclProg.get()); assert(FoundKernel == Kernel); } else KnownProgram = false; } else { std::tie(Kernel, KernelMutex) = detail::ProgramManager::getInstance().getOrCreateKernel( - ExecKernel->MOSModuleHandle, Context, ExecKernel->MKernelName, - nullptr); + ExecKernel->MOSModuleHandle, Context, MQueue->get_device(), + ExecKernel->MKernelName, nullptr); MQueue->getPlugin().call( Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(RT::PiProgram), &Program, nullptr); @@ -1940,8 +1946,8 @@ cl_int ExecCGCommand::enqueueImp() { !ExecKernel->MSyclKernel->isCreatedFromSource()) { EliminatedArgMask = detail::ProgramManager::getInstance().getEliminatedKernelArgMask( - ExecKernel->MOSModuleHandle, Context, Program, - ExecKernel->MKernelName, KnownProgram); + ExecKernel->MOSModuleHandle, Context, MQueue->get_device(), + Program, ExecKernel->MKernelName, KnownProgram); } if (KernelMutex != nullptr) { // For cacheable kernels, we use per-kernel mutex @@ -1996,15 +2002,16 @@ cl_int ExecCGCommand::enqueueImp() { Plugin.call(RawEvents.size(), &RawEvents[0]); } std::vector ReqMemObjs; - // Extract the Mem Objects for all Requirements, to ensure they are available if - // a user ask for them inside the interop task scope - const auto& HandlerReq = ExecInterop->MRequirements; - std::for_each(std::begin(HandlerReq), std::end(HandlerReq), [&](Requirement* Req) { - AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); - auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); - interop_handler::ReqToMem ReqToMem = std::make_pair(Req, MemArg); - ReqMemObjs.emplace_back(ReqToMem); - }); + // Extract the Mem Objects for all Requirements, to ensure they are + // available if a user ask for them inside the interop task scope + const auto &HandlerReq = ExecInterop->MRequirements; + std::for_each( + std::begin(HandlerReq), std::end(HandlerReq), [&](Requirement *Req) { + AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); + auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); + interop_handler::ReqToMem ReqToMem = std::make_pair(Req, MemArg); + ReqMemObjs.emplace_back(ReqToMem); + }); std::sort(std::begin(ReqMemObjs), std::end(ReqMemObjs)); interop_handler InteropHandler(std::move(ReqMemObjs), MQueue); diff --git a/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp new file mode 100644 index 0000000000000..86d056dc06ed1 --- /dev/null +++ b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp @@ -0,0 +1,49 @@ +// REQUIRES: cpu, accelerator, aoc + +// UNSUPPORTED: cuda, level_zero + +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t1.out +// RUN: %CPU_RUN_PLACEHOLDER CL_CONFIG_CPU_EMULATE_DEVICES=2 %t1.out +// RUN: %CPU_RUN_PLACEHOLDER CL_CONFIG_CPU_EMULATE_DEVICES=4 %t1.out +// RUN: %clangxx -fsycl -fintelfpga -fsycl-unnamed-lambda %s -o %t2.out +// RUN: %ACC_RUN_PLACEHOLDER CL_CONFIG_CPU_EMULATE_DEVICES=2 %t2.out + +#include + +using namespace cl::sycl; + +void exceptionHandler(sycl::exception_list exceptions) { + for (std::exception_ptr const &e : exceptions) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception const &e) { + std::cout << "Caught asynchronous SYCL exception:\n" + << e.what() << std::endl; + } + } +} + +int main() { + std::vector DeviceList = sycl::device::get_devices(); + + // remove host device from the list + DeviceList.erase(std::remove_if(DeviceList.begin(), DeviceList.end(), + [](auto Device) { return Device.is_host(); }), + DeviceList.end()); + + sycl::context Context(DeviceList, &exceptionHandler); + + std::vector QueueList; + for (const auto &Device : Context.get_devices()) { + sycl::queue Queue(Context, Device, &exceptionHandler); + QueueList.push_back(Queue); + } + + for (auto &Queue : QueueList) { + Queue.submit([&](sycl::handler &cgh) { + cgh.parallel_for(range<1>(100), [=](id<1> i) {}); + }); + } + + return 0; +} \ No newline at end of file From 481b600c00aca6e0ca2efea867ecdc271bfa7513 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Wed, 19 Aug 2020 18:14:38 +0300 Subject: [PATCH 2/9] Fix formatting --- sycl/source/detail/scheduler/commands.cpp | 19 +++++++++---------- .../context-with-multiple-devices.cpp | 2 +- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 01f5121d54993..184f3942e298e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2002,16 +2002,15 @@ cl_int ExecCGCommand::enqueueImp() { Plugin.call(RawEvents.size(), &RawEvents[0]); } std::vector ReqMemObjs; - // Extract the Mem Objects for all Requirements, to ensure they are - // available if a user ask for them inside the interop task scope - const auto &HandlerReq = ExecInterop->MRequirements; - std::for_each( - std::begin(HandlerReq), std::end(HandlerReq), [&](Requirement *Req) { - AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); - auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); - interop_handler::ReqToMem ReqToMem = std::make_pair(Req, MemArg); - ReqMemObjs.emplace_back(ReqToMem); - }); + // Extract the Mem Objects for all Requirements, to ensure they are available if + // a user ask for them inside the interop task scope + const auto& HandlerReq = ExecInterop->MRequirements; + std::for_each(std::begin(HandlerReq), std::end(HandlerReq), [&](Requirement *Req) { + AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); + auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); + interop_handler::ReqToMem ReqToMem = std::make_pair(Req, MemArg); + ReqMemObjs.emplace_back(ReqToMem); + }); std::sort(std::begin(ReqMemObjs), std::end(ReqMemObjs)); interop_handler InteropHandler(std::move(ReqMemObjs), MQueue); diff --git a/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp index 86d056dc06ed1..e87ff6deb90dc 100644 --- a/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp +++ b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp @@ -46,4 +46,4 @@ int main() { } return 0; -} \ No newline at end of file +} From c1fa7d383e6f52068bd523c94813953697b9da06 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Wed, 19 Aug 2020 18:16:19 +0300 Subject: [PATCH 3/9] Remove unnecessary formating by clang-format --- sycl/source/detail/scheduler/commands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 184f3942e298e..7dd611e63cf2f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2005,7 +2005,7 @@ cl_int ExecCGCommand::enqueueImp() { // Extract the Mem Objects for all Requirements, to ensure they are available if // a user ask for them inside the interop task scope const auto& HandlerReq = ExecInterop->MRequirements; - std::for_each(std::begin(HandlerReq), std::end(HandlerReq), [&](Requirement *Req) { + std::for_each(std::begin(HandlerReq), std::end(HandlerReq), [&](Requirement* Req) { AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); auto MemArg = reinterpret_cast(AllocaCmd->getMemAllocation()); interop_handler::ReqToMem ReqToMem = std::make_pair(Req, MemArg); From edb74ceea18eaee5170e0fa4760a000679e705a1 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Thu, 20 Aug 2020 17:10:23 +0300 Subject: [PATCH 4/9] Fix impl to make all LIT tests pass; apply CR comments; minor improvements --- sycl/source/detail/program_impl.cpp | 22 +++++++++++++++- .../program_manager/program_manager.cpp | 25 ++++++++++++++++++- sycl/source/detail/scheduler/commands.cpp | 6 ----- .../context-with-multiple-devices.cpp | 8 +++--- 4 files changed, 48 insertions(+), 13 deletions(-) diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index d2652bdbc03c1..522579acad7e6 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -28,7 +28,14 @@ program_impl::program_impl(ContextImplPtr Context) program_impl::program_impl(ContextImplPtr Context, vector_class DeviceList) - : MContext(Context), MDevices(DeviceList) {} + : MContext(Context), MDevices(DeviceList) { + if (Context->getDevices().size() > 1) { + throw feature_not_supported( + "multiple devices within a context are not supported with " + "sycl::program and sycl::kernel", + PI_INVALID_OPERATION); + } +} program_impl::program_impl( vector_class> ProgramList, @@ -51,6 +58,12 @@ program_impl::program_impl( } MContext = ProgramList[0]->MContext; + if (MContext->getDevices().size() > 1) { + throw feature_not_supported( + "multiple devices within a context are not supported with " + "sycl::program and sycl::kernel", + PI_INVALID_OPERATION); + } MDevices = ProgramList[0]->MDevices; vector_class DevicesSorted; if (!is_host()) { @@ -105,6 +118,13 @@ program_impl::program_impl(ContextImplPtr Context, RT::PiProgram Program) : MProgram(Program), MContext(Context), MLinkable(true) { + if (Context->getDevices().size() > 1) { + throw feature_not_supported( + "multiple devices within a context are not supported with " + "sycl::program and sycl::kernel", + PI_INVALID_OPERATION); + } + const detail::plugin &Plugin = getPlugin(); if (MProgram == nullptr) { assert(InteropProgram && diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 75695566d640b..a19c0da5f0d4e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -376,8 +376,31 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, !SYCLConfig::get()) DeviceLibReqMask = getDeviceLibReqMask(Img); + bool ContextHasSubDevices = false; + const vector_class &Devices = ContextImpl->getDevices(); + for (const auto &Device : Devices) { + try { + // Device.get_info(); should throw + // sycl::invalid_object_error exception if Device is not a sub device. + // If the exception doesn't throw, it means that context has a sub + // device and we can quit the loop. + Device.get_info(); + ContextHasSubDevices = true; + break; + } catch (sycl::invalid_object_error const &E) { + } + } + vector_class PiDevices; - PiDevices.push_back(getRawSyclObjImpl(Device)->getHandleRef()); + if (ContextHasSubDevices) { + PiDevices.resize(Devices.size()); + std::transform(Devices.begin(), Devices.end(), PiDevices.begin(), + [](const device Dev) { + return getRawSyclObjImpl(Dev)->getHandleRef(); + }); + } else { + PiDevices.push_back(getRawSyclObjImpl(Device)->getHandleRef()); + } ProgramPtr BuiltProgram = build(std::move(ProgramManaged), ContextImpl, Img.getCompileOptions(), diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 7dd611e63cf2f..34fd1616253c9 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1909,12 +1909,6 @@ cl_int ExecCGCommand::enqueueImp() { if (nullptr != ExecKernel->MSyclKernel) { assert(ExecKernel->MSyclKernel->get_info() == Context); - if (Context.get_devices().size() > 1) { - throw feature_not_supported( - "multiple devices within a context are not supported with " - "sycl::program and sycl::kernel", - PI_INVALID_OPERATION); - } Kernel = ExecKernel->MSyclKernel->getHandleRef(); auto SyclProg = detail::getSyclObjImpl( diff --git a/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp index e87ff6deb90dc..6b12c987ce8b4 100644 --- a/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp +++ b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp @@ -10,13 +10,11 @@ #include -using namespace cl::sycl; - void exceptionHandler(sycl::exception_list exceptions) { for (std::exception_ptr const &e : exceptions) { try { std::rethrow_exception(e); - } catch (cl::sycl::exception const &e) { + } catch (sycl::exception const &e) { std::cout << "Caught asynchronous SYCL exception:\n" << e.what() << std::endl; } @@ -24,7 +22,7 @@ void exceptionHandler(sycl::exception_list exceptions) { } int main() { - std::vector DeviceList = sycl::device::get_devices(); + std::vector DeviceList = sycl::device::get_devices(); // remove host device from the list DeviceList.erase(std::remove_if(DeviceList.begin(), DeviceList.end(), @@ -41,7 +39,7 @@ int main() { for (auto &Queue : QueueList) { Queue.submit([&](sycl::handler &cgh) { - cgh.parallel_for(range<1>(100), [=](id<1> i) {}); + cgh.parallel_for(100, [=](auto i) {}); }); } From 476dd3e46708734da1433959642be6f1a067d108 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Thu, 20 Aug 2020 17:59:27 +0300 Subject: [PATCH 5/9] Fix comp error on Windows and fix clang-format --- sycl/source/detail/program_manager/program_manager.cpp | 2 +- .../context-with-multiple-devices.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index a19c0da5f0d4e..c0733ef2fcc9c 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -387,7 +387,7 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, Device.get_info(); ContextHasSubDevices = true; break; - } catch (sycl::invalid_object_error const &E) { + } catch (sycl::invalid_object_error const &) { } } diff --git a/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp index 6b12c987ce8b4..47c7da34e46a2 100644 --- a/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp +++ b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp @@ -38,9 +38,8 @@ int main() { } for (auto &Queue : QueueList) { - Queue.submit([&](sycl::handler &cgh) { - cgh.parallel_for(100, [=](auto i) {}); - }); + Queue.submit( + [&](sycl::handler &cgh) { cgh.parallel_for(100, [=](auto i) {}); }); } return 0; From 112c27f32acc7976e86a4b2215ac6b46592160c9 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 21 Aug 2020 17:29:02 +0300 Subject: [PATCH 6/9] Apply CR comments --- sycl/source/detail/program_impl.cpp | 8 +++--- .../program_manager/program_manager.cpp | 26 +++++++------------ .../program_manager/program_manager.hpp | 3 +-- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index 522579acad7e6..61e7ac555f1a7 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -474,10 +474,10 @@ void program_impl::create_pi_program_with_kernel_name( bool JITCompilationIsRequired) { assert(!MProgram && "This program already has an encapsulated PI program"); ProgramManager &PM = ProgramManager::getInstance(); - RTDeviceBinaryImage &Img = - PM.getDeviceImage(Module, KernelName, get_context(), get_devices()[0], - JITCompilationIsRequired); - MProgram = PM.createPIProgram(Img, get_context(), get_devices()[0]); + const device FirstDevice = get_devices()[0]; + RTDeviceBinaryImage &Img = PM.getDeviceImage( + Module, KernelName, get_context(), FirstDevice, JITCompilationIsRequired); + MProgram = PM.createPIProgram(Img, get_context(), FirstDevice); } template <> diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index c0733ef2fcc9c..c473cb0a1e8fc 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -68,7 +68,7 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, #endif RT::PiProgram Program; - const auto PiDevice = getSyclObjImpl(Device)->getHandleRef(); + const RT::PiDevice PiDevice = getSyclObjImpl(Device)->getHandleRef(); pi_int32 BinaryStatus = CL_SUCCESS; Plugin.call( Context->getHandleRef(), 1 /*one binary*/, &PiDevice, &DataLen, &Data, @@ -320,8 +320,7 @@ RT::PiProgram ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img, { std::lock_guard Lock(MNativeProgramsMutex); // associate the PI program with the image it was created for - const auto PiDevice = getSyclObjImpl(Device)->getHandleRef(); - NativePrograms.emplace(std::make_pair(std::make_pair(Res, PiDevice), &Img)); + NativePrograms[Res] = &Img; } if (DbgProgMgr > 1) @@ -378,7 +377,7 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, bool ContextHasSubDevices = false; const vector_class &Devices = ContextImpl->getDevices(); - for (const auto &Device : Devices) { + for (const device &Device : Devices) { try { // Device.get_info(); should throw // sycl::invalid_object_error exception if Device is not a sub device. @@ -395,7 +394,7 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, if (ContextHasSubDevices) { PiDevices.resize(Devices.size()); std::transform(Devices.begin(), Devices.end(), PiDevices.begin(), - [](const device Dev) { + [](const device &Dev) { return getRawSyclObjImpl(Dev)->getHandleRef(); }); } else { @@ -409,9 +408,7 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, { std::lock_guard Lock(MNativeProgramsMutex); - const auto PiDevice = getSyclObjImpl(Device)->getHandleRef(); - NativePrograms.emplace( - std::make_pair(std::make_pair(BuiltProgram.get(), PiDevice), &Img)); + NativePrograms[BuiltProgram.get()] = &Img; } return BuiltProgram.release(); }; @@ -420,7 +417,7 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, if (Prg) Prg->stableSerializeSpecConstRegistry(SpecConsts); - const auto PiDevice = getRawSyclObjImpl(Device)->getHandleRef(); + const RT::PiDevice PiDevice = getRawSyclObjImpl(Device)->getHandleRef(); auto BuildResult = getOrBuild( Cache, std::make_pair(std::make_pair(std::move(SpecConsts), KSId), PiDevice), @@ -471,7 +468,7 @@ std::pair ProgramManager::getOrCreateKernel( return Result; }; - const auto PiDevice = getRawSyclObjImpl(Device)->getHandleRef(); + const RT::PiDevice PiDevice = getRawSyclObjImpl(Device)->getHandleRef(); auto BuildResult = getOrBuild( Cache, std::make_pair(KernelName, PiDevice), AcquireF, GetF, BuildF); return std::make_pair(BuildResult->Ptr.load(), @@ -1037,7 +1034,7 @@ void ProgramManager::flushSpecConstants(const program_impl &Prg, // caller hasn't provided the image object - find it { // make sure NativePrograms map access is synchronized std::lock_guard Lock(MNativeProgramsMutex); - auto It = NativePrograms.find(std::make_pair(NativePrg, Device)); + auto It = NativePrograms.find(NativePrg); if (It == NativePrograms.end()) throw sycl::experimental::spec_const_error( "spec constant is set in a program w/o a binary image", @@ -1080,11 +1077,9 @@ ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask( if (m_UseSpvFile && M == OSUtil::ExeModuleHandle) return {}; - const auto PiDevice = getSyclObjImpl(Device)->getHandleRef(); - { std::lock_guard Lock(MNativeProgramsMutex); - auto ImgIt = NativePrograms.find(std::make_pair(NativePrg, PiDevice)); + auto ImgIt = NativePrograms.find(NativePrg); if (ImgIt != NativePrograms.end()) { auto MapIt = m_EliminatedKernelArgMasks.find(ImgIt->second); if (MapIt != m_EliminatedKernelArgMasks.end()) @@ -1114,8 +1109,7 @@ ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask( RTDeviceBinaryImage &Img = getDeviceImage(M, KSId, Context, Device); { std::lock_guard Lock(MNativeProgramsMutex); - NativePrograms.emplace( - std::make_pair(std::make_pair(NativePrg, PiDevice), &Img)); + NativePrograms[NativePrg] = &Img; } auto MapIt = m_EliminatedKernelArgMasks.find(&Img); if (MapIt != m_EliminatedKernelArgMasks.end()) diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 18294f85f57e8..7d1c852bbfc93 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -209,8 +209,7 @@ class ProgramManager { // the underlying program disposed of), so the map can't be used in any way // other than binary image lookup with known live PiProgram as the key. // NOTE: access is synchronized via the MNativeProgramsMutex - std::map, const RTDeviceBinaryImage *> - NativePrograms; + std::map NativePrograms; /// Protects NativePrograms that can be changed by class' methods. std::mutex MNativeProgramsMutex; From ec7cd646494c30ae773c9575e3d0d52e9cabc679 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Tue, 25 Aug 2020 13:39:12 +0300 Subject: [PATCH 7/9] Improve devicelib usage and fix CR comments --- sycl/source/detail/context_impl.hpp | 6 +- .../program_manager/program_manager.cpp | 150 +++++++----------- .../program_manager/program_manager.hpp | 15 +- .../context-with-multiple-devices.cpp | 7 +- 4 files changed, 74 insertions(+), 104 deletions(-) diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index c6d1953b23d98..1bcee24a9401d 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -134,7 +134,8 @@ class context_impl { /// more details. /// /// \returns a map with device library programs. - std::map &getCachedLibPrograms() { + std::map, RT::PiProgram> & + getCachedLibPrograms() { return MCachedLibPrograms; } @@ -155,7 +156,8 @@ class context_impl { PlatformImplPtr MPlatform; bool MHostContext; bool MUseCUDAPrimaryContext; - std::map MCachedLibPrograms; + std::map, RT::PiProgram> + MCachedLibPrograms; mutable KernelProgramCache MKernelProgramCache; }; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index c473cb0a1e8fc..5fec8f1f26cef 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -375,35 +375,9 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, !SYCLConfig::get()) DeviceLibReqMask = getDeviceLibReqMask(Img); - bool ContextHasSubDevices = false; - const vector_class &Devices = ContextImpl->getDevices(); - for (const device &Device : Devices) { - try { - // Device.get_info(); should throw - // sycl::invalid_object_error exception if Device is not a sub device. - // If the exception doesn't throw, it means that context has a sub - // device and we can quit the loop. - Device.get_info(); - ContextHasSubDevices = true; - break; - } catch (sycl::invalid_object_error const &) { - } - } - - vector_class PiDevices; - if (ContextHasSubDevices) { - PiDevices.resize(Devices.size()); - std::transform(Devices.begin(), Devices.end(), PiDevices.begin(), - [](const device &Dev) { - return getRawSyclObjImpl(Dev)->getHandleRef(); - }); - } else { - PiDevices.push_back(getRawSyclObjImpl(Device)->getHandleRef()); - } - ProgramPtr BuiltProgram = build(std::move(ProgramManaged), ContextImpl, Img.getCompileOptions(), - Img.getLinkOptions(), PiDevices, + Img.getLinkOptions(), getRawSyclObjImpl(Device)->getHandleRef(), ContextImpl->getCachedLibPrograms(), DeviceLibReqMask); { @@ -575,13 +549,15 @@ static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { static RT::PiProgram loadDeviceLibFallback( const ContextImplPtr Context, DeviceLibExt Extension, - const std::vector &Devices, - std::map &CachedLibPrograms) { + const RT::PiDevice &Device, + std::map, RT::PiProgram> + &CachedLibPrograms) { const char *LibFileName = getDeviceLibFilename(Extension); - auto CacheResult = CachedLibPrograms.insert({Extension, nullptr}); + auto CacheResult = CachedLibPrograms.emplace( + std::make_pair(std::make_pair(Extension, Device), nullptr)); bool Cached = !CacheResult.second; - std::map::iterator LibProgIt = CacheResult.first; + auto LibProgIt = CacheResult.first; RT::PiProgram &LibProg = LibProgIt->second; if (Cached) @@ -597,8 +573,7 @@ static RT::PiProgram loadDeviceLibFallback( // TODO no spec constants are used in the std libraries, support in the future RT::PiResult Error = Plugin.call_nocheck( LibProg, - // Assume that Devices contains all devices from Context. - Devices.size(), Devices.data(), + /*num devices = */ 1, &Device, // Do not use compile options for library programs: it is not clear // if user options (image options) are supposed to be applied to // library program as well, and what actually happens to a SPIR-V @@ -718,11 +693,11 @@ static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { return ((DeviceLibReqMask & Mask) == Mask); } -static std::vector -getDeviceLibPrograms(const ContextImplPtr Context, - const std::vector &Devices, - std::map &CachedLibPrograms, - uint32_t DeviceLibReqMask) { +static std::vector getDeviceLibPrograms( + const ContextImplPtr Context, const RT::PiDevice &Device, + std::map, RT::PiProgram> + &CachedLibPrograms, + uint32_t DeviceLibReqMask) { std::vector Programs; std::pair RequiredDeviceLibExt[] = { @@ -736,68 +711,61 @@ getDeviceLibPrograms(const ContextImplPtr Context, // Disable all devicelib extensions requiring fp64 support if at least // one underlying device doesn't support cl_khr_fp64. bool fp64Support = true; - for (RT::PiDevice Dev : Devices) { - std::string DevExtList = - get_device_info::get( - Dev, Context->getPlugin()); - fp64Support = - fp64Support && (DevExtList.npos != DevExtList.find("cl_khr_fp64")); - } + std::string DevExtList = + get_device_info::get( + Device, Context->getPlugin()); + fp64Support = + fp64Support && (DevExtList.npos != DevExtList.find("cl_khr_fp64")); - // Load a fallback library for an extension if at least one device does not + // Load a fallback library for an extension if the device does not // support it. - for (RT::PiDevice Dev : Devices) { - std::string DevExtList = - get_device_info::get( - Dev, Context->getPlugin()); - for (auto &Pair : RequiredDeviceLibExt) { - DeviceLibExt Ext = Pair.first; - bool &FallbackIsLoaded = Pair.second; - - if (FallbackIsLoaded) { - continue; - } + for (auto &Pair : RequiredDeviceLibExt) { + DeviceLibExt Ext = Pair.first; + bool &FallbackIsLoaded = Pair.second; - if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) { - continue; - } - if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 || - Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64) && - !fp64Support) { - continue; - } + if (FallbackIsLoaded) { + continue; + } - const char *ExtStr = getDeviceLibExtensionStr(Ext); + if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) { + continue; + } + if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 || + Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64) && + !fp64Support) { + continue; + } - bool InhibitNativeImpl = false; - if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { - InhibitNativeImpl = strstr(Env, ExtStr) != nullptr; - } + const char *ExtStr = getDeviceLibExtensionStr(Ext); - bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtStr); + bool InhibitNativeImpl = false; + if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { + InhibitNativeImpl = strstr(Env, ExtStr) != nullptr; + } - if (!DeviceSupports || InhibitNativeImpl) { - Programs.push_back( - loadDeviceLibFallback(Context, Ext, Devices, CachedLibPrograms)); - FallbackIsLoaded = true; - } + bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtStr); + + if (!DeviceSupports || InhibitNativeImpl) { + Programs.push_back( + loadDeviceLibFallback(Context, Ext, Device, CachedLibPrograms)); + FallbackIsLoaded = true; } } return Programs; } -ProgramManager::ProgramPtr -ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, - const string_class &CompileOptions, - const string_class &LinkOptions, - const std::vector &Devices, - std::map &CachedLibPrograms, - uint32_t DeviceLibReqMask) { +ProgramManager::ProgramPtr ProgramManager::build( + ProgramPtr Program, const ContextImplPtr Context, + const string_class &CompileOptions, const string_class &LinkOptions, + const RT::PiDevice &Device, + std::map, RT::PiProgram> + &CachedLibPrograms, + uint32_t DeviceLibReqMask) { if (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::build(" << Program.get() << ", " - << CompileOptions << ", " << LinkOptions << ", ... " - << Devices.size() << ")\n"; + << CompileOptions << ", " << LinkOptions << ", ... " << Device + << ")\n"; } bool LinkDeviceLibs = (DeviceLibReqMask != 0); @@ -828,7 +796,7 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, std::vector LinkPrograms; if (LinkDeviceLibs) { - LinkPrograms = getDeviceLibPrograms(Context, Devices, CachedLibPrograms, + LinkPrograms = getDeviceLibPrograms(Context, Device, CachedLibPrograms, DeviceLibReqMask); } @@ -837,7 +805,7 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, std::string Opts(CompileOpts); RT::PiResult Error = Plugin.call_nocheck( - Program.get(), Devices.size(), Devices.data(), Opts.c_str(), nullptr, + Program.get(), /*num devices =*/1, &Device, Opts.c_str(), nullptr, nullptr); if (Error != PI_SUCCESS) throw compile_program_error(getProgramBuildLog(Program.get(), Context), @@ -846,14 +814,14 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, } // Include the main program and compile/link everything together - Plugin.call(Program.get(), Devices.size(), - Devices.data(), CompileOpts, 0, - nullptr, nullptr, nullptr, nullptr); + Plugin.call(Program.get(), /*num devices =*/1, + &Device, CompileOpts, 0, nullptr, + nullptr, nullptr, nullptr); LinkPrograms.push_back(Program.get()); RT::PiProgram LinkedProg = nullptr; RT::PiResult Error = Plugin.call_nocheck( - Context->getHandleRef(), Devices.size(), Devices.data(), LinkOpts, + Context->getHandleRef(), /*num devices =*/1, &Device, LinkOpts, LinkPrograms.size(), LinkPrograms.data(), nullptr, nullptr, &LinkedProg); // Link program call returns a new program object if all parameters are valid, diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 7d1c852bbfc93..e667060f744ec 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -146,12 +146,13 @@ class ProgramManager { bool JITCompilationIsRequired = false); using ProgramPtr = unique_ptr_class, decltype(&::piProgramRelease)>; - ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context, - const string_class &CompileOptions, - const string_class &LinkOptions, - const std::vector &Devices, - std::map &CachedLibPrograms, - uint32_t DeviceLibReqMask); + ProgramPtr + build(ProgramPtr Program, const ContextImplPtr Context, + const string_class &CompileOptions, const string_class &LinkOptions, + const RT::PiDevice &Device, + std::map, RT::PiProgram> + &CachedLibPrograms, + uint32_t DeviceLibReqMask); /// Provides a new kernel set id for grouping kernel names together KernelSetId getNextKernelSetId() const; /// Returns the kernel set associated with the kernel, handles some special @@ -209,7 +210,7 @@ class ProgramManager { // the underlying program disposed of), so the map can't be used in any way // other than binary image lookup with known live PiProgram as the key. // NOTE: access is synchronized via the MNativeProgramsMutex - std::map NativePrograms; + std::unordered_map NativePrograms; /// Protects NativePrograms that can be changed by class' methods. std::mutex MNativeProgramsMutex; diff --git a/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp index 47c7da34e46a2..8409d77cdf3b2 100644 --- a/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp +++ b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp @@ -15,14 +15,14 @@ void exceptionHandler(sycl::exception_list exceptions) { try { std::rethrow_exception(e); } catch (sycl::exception const &e) { - std::cout << "Caught asynchronous SYCL exception:\n" + std::cerr << "Caught asynchronous SYCL exception:\n" << e.what() << std::endl; } } } int main() { - std::vector DeviceList = sycl::device::get_devices(); + auto DeviceList = sycl::device::get_devices(); // remove host device from the list DeviceList.erase(std::remove_if(DeviceList.begin(), DeviceList.end(), @@ -33,8 +33,7 @@ int main() { std::vector QueueList; for (const auto &Device : Context.get_devices()) { - sycl::queue Queue(Context, Device, &exceptionHandler); - QueueList.push_back(Queue); + QueueList.emplace_back(Context, Device, &exceptionHandler); } for (auto &Queue : QueueList) { From bd5ea0432a026ec984413fd6d17657ce938e6990 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Tue, 25 Aug 2020 13:42:44 +0300 Subject: [PATCH 8/9] Fix clang-format --- .../detail/program_manager/program_manager.hpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index e667060f744ec..93ddf954856db 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -146,13 +146,12 @@ class ProgramManager { bool JITCompilationIsRequired = false); using ProgramPtr = unique_ptr_class, decltype(&::piProgramRelease)>; - ProgramPtr - build(ProgramPtr Program, const ContextImplPtr Context, - const string_class &CompileOptions, const string_class &LinkOptions, - const RT::PiDevice &Device, - std::map, RT::PiProgram> - &CachedLibPrograms, - uint32_t DeviceLibReqMask); + ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context, + const string_class &CompileOptions, + const string_class &LinkOptions, const RT::PiDevice &Device, + std::map, + RT::PiProgram> &CachedLibPrograms, + uint32_t DeviceLibReqMask); /// Provides a new kernel set id for grouping kernel names together KernelSetId getNextKernelSetId() const; /// Returns the kernel set associated with the kernel, handles some special From d659650b2fd070e3d26529bbda5e5bc013167064 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Tue, 25 Aug 2020 14:40:30 +0300 Subject: [PATCH 9/9] Apply CR comments --- sycl/source/detail/program_manager/program_manager.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 5fec8f1f26cef..1ddbf00b79739 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -710,12 +710,10 @@ static std::vector getDeviceLibPrograms( // Disable all devicelib extensions requiring fp64 support if at least // one underlying device doesn't support cl_khr_fp64. - bool fp64Support = true; std::string DevExtList = get_device_info::get( Device, Context->getPlugin()); - fp64Support = - fp64Support && (DevExtList.npos != DevExtList.find("cl_khr_fp64")); + const bool fp64Support = (DevExtList.npos != DevExtList.find("cl_khr_fp64")); // Load a fallback library for an extension if the device does not // support it.