diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index c6d1953b23d98..1bcee24a9401d 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -134,7 +134,8 @@ class context_impl { /// more details. /// /// \returns a map with device library programs. - std::map &getCachedLibPrograms() { + std::map, RT::PiProgram> & + getCachedLibPrograms() { return MCachedLibPrograms; } @@ -155,7 +156,8 @@ class context_impl { PlatformImplPtr MPlatform; bool MHostContext; bool MUseCUDAPrimaryContext; - std::map MCachedLibPrograms; + std::map, RT::PiProgram> + MCachedLibPrograms; mutable KernelProgramCache MKernelProgramCache; }; diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 83261ce661142..478c1cc5cf3b8 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -67,7 +67,8 @@ class KernelProgramCache { using PiProgramT = std::remove_pointer::type; using PiProgramPtrT = std::atomic; using ProgramWithBuildStateT = BuildResult; - using ProgramCacheKeyT = std::pair; + using ProgramCacheKeyT = + std::pair, RT::PiDevice>; using ProgramCacheT = std::map; using ContextPtr = context_impl *; @@ -75,7 +76,8 @@ class KernelProgramCache { using PiKernelPtrT = std::atomic; using KernelWithBuildStateT = BuildResult; - using KernelByNameT = std::map; + using KernelByNameT = + std::map, KernelWithBuildStateT>; using KernelCacheT = std::map; ~KernelProgramCache(); diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index a73064e455067..61e7ac555f1a7 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -28,7 +28,14 @@ program_impl::program_impl(ContextImplPtr Context) program_impl::program_impl(ContextImplPtr Context, vector_class DeviceList) - : MContext(Context), MDevices(DeviceList) {} + : MContext(Context), MDevices(DeviceList) { + if (Context->getDevices().size() > 1) { + throw feature_not_supported( + "multiple devices within a context are not supported with " + "sycl::program and sycl::kernel", + PI_INVALID_OPERATION); + } +} program_impl::program_impl( vector_class> ProgramList, @@ -51,6 +58,12 @@ program_impl::program_impl( } MContext = ProgramList[0]->MContext; + if (MContext->getDevices().size() > 1) { + throw feature_not_supported( + "multiple devices within a context are not supported with " + "sycl::program and sycl::kernel", + PI_INVALID_OPERATION); + } MDevices = ProgramList[0]->MDevices; vector_class DevicesSorted; if (!is_host()) { @@ -105,6 +118,13 @@ program_impl::program_impl(ContextImplPtr Context, RT::PiProgram Program) : MProgram(Program), MContext(Context), MLinkable(true) { + if (Context->getDevices().size() > 1) { + throw feature_not_supported( + "multiple devices within a context are not supported with " + "sycl::program and sycl::kernel", + PI_INVALID_OPERATION); + } + const detail::plugin &Plugin = getPlugin(); if (MProgram == nullptr) { assert(InteropProgram && @@ -233,7 +253,7 @@ void program_impl::build_with_kernel_name(string_class KernelName, if (is_cacheable_with_options(BuildOptions)) { MProgramAndKernelCachingAllowed = true; MProgram = ProgramManager::getInstance().getBuiltPIProgram( - Module, get_context(), KernelName, this, + Module, get_context(), get_devices()[0], KernelName, this, /*JITCompilationIsRequired=*/(!BuildOptions.empty())); const detail::plugin &Plugin = getPlugin(); Plugin.call(MProgram); @@ -356,7 +376,7 @@ void program_impl::build(const string_class &Options) { check_device_feature_support(MDevices); vector_class Devices(get_pi_devices()); const detail::plugin &Plugin = getPlugin(); - ProgramManager::getInstance().flushSpecConstants(*this); + ProgramManager::getInstance().flushSpecConstants(*this, get_pi_devices()[0]); RT::PiResult Err = Plugin.call_nocheck( MProgram, Devices.size(), Devices.data(), Options.c_str(), nullptr, nullptr); @@ -404,7 +424,8 @@ RT::PiKernel program_impl::get_pi_kernel(const string_class &KernelName) const { if (is_cacheable()) { std::tie(Kernel, std::ignore) = ProgramManager::getInstance().getOrCreateKernel( - MProgramModuleHandle, get_context(), KernelName, this); + MProgramModuleHandle, get_context(), get_devices()[0], KernelName, + this); getPlugin().call(Kernel); } else { const detail::plugin &Plugin = getPlugin(); @@ -453,9 +474,10 @@ void program_impl::create_pi_program_with_kernel_name( bool JITCompilationIsRequired) { assert(!MProgram && "This program already has an encapsulated PI program"); ProgramManager &PM = ProgramManager::getInstance(); + const device FirstDevice = get_devices()[0]; RTDeviceBinaryImage &Img = PM.getDeviceImage( - Module, KernelName, get_context(), JITCompilationIsRequired); - MProgram = PM.createPIProgram(Img, get_context()); + Module, KernelName, get_context(), FirstDevice, JITCompilationIsRequired); + MProgram = PM.createPIProgram(Img, get_context(), FirstDevice); } template <> diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index fc45b310c94c6..1ddbf00b79739 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -52,29 +52,10 @@ ProgramManager &ProgramManager::getInstance() { return Instance; } -static RT::PiDevice getFirstDevice(const ContextImplPtr &Context) { - pi_uint32 NumDevices = 0; - const detail::plugin &Plugin = Context->getPlugin(); - Plugin.call(Context->getHandleRef(), - PI_CONTEXT_INFO_NUM_DEVICES, - sizeof(NumDevices), &NumDevices, - /*param_value_size_ret=*/nullptr); - assert(NumDevices > 0 && "Context without devices?"); - - vector_class Devices(NumDevices); - size_t ParamValueSize = 0; - Plugin.call( - Context->getHandleRef(), PI_CONTEXT_INFO_DEVICES, - sizeof(cl_device_id) * NumDevices, &Devices[0], &ParamValueSize); - assert(ParamValueSize == sizeof(cl_device_id) * NumDevices && - "Number of CL_CONTEXT_DEVICES should match CL_CONTEXT_NUM_DEVICES."); - return Devices[0]; -} - static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, + const device &Device, const unsigned char *Data, size_t DataLen) { - // FIXME: we don't yet support multiple devices with a single binary. const detail::plugin &Plugin = Context->getPlugin(); #ifndef _NDEBUG pi_uint32 NumDevices = 0; @@ -87,10 +68,10 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context, #endif RT::PiProgram Program; - RT::PiDevice Device = getFirstDevice(Context); + const RT::PiDevice PiDevice = getSyclObjImpl(Device)->getHandleRef(); pi_int32 BinaryStatus = CL_SUCCESS; Plugin.call( - Context->getHandleRef(), 1 /*one binary*/, &Device, &DataLen, &Data, + Context->getHandleRef(), 1 /*one binary*/, &PiDevice, &DataLen, &Data, &BinaryStatus, &Program); if (BinaryStatus != CL_SUCCESS) { @@ -112,14 +93,16 @@ static RT::PiProgram createSpirvProgram(const ContextImplPtr Context, RTDeviceBinaryImage & ProgramManager::getDeviceImage(OSModuleHandle M, const string_class &KernelName, - const context &Context, + const context &Context, const device &Device, bool JITCompilationIsRequired) { if (DbgProgMgr > 0) std::cerr << ">>> ProgramManager::getDeviceImage(" << M << ", \"" - << KernelName << "\", " << getRawSyclObjImpl(Context) << ")\n"; + << KernelName << "\", " << getRawSyclObjImpl(Context) << ", " + << getRawSyclObjImpl(Device) << ", " << JITCompilationIsRequired + << ")\n"; KernelSetId KSId = getKernelSetId(M, KernelName); - return getDeviceImage(M, KSId, Context, JITCompilationIsRequired); + return getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired); } template @@ -290,9 +273,12 @@ static const char *getFormatStr(RT::PiDeviceBinaryType Format) { } RT::PiProgram ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img, - const context &Context) { + const context &Context, + const device &Device) { if (DbgProgMgr > 0) - std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ")\n"; + std::cerr << ">>> ProgramManager::createPIProgram(" << &Img << ", " + << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device) + << ")\n"; const pi_device_binary_struct &RawImg = Img.getRawData(); // perform minimal sanity checks on the device image and the descriptor @@ -329,7 +315,7 @@ RT::PiProgram ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img, RT::PiProgram Res = Format == PI_DEVICE_BINARY_TYPE_SPIRV ? createSpirvProgram(Ctx, RawImg.BinaryStart, ImgSize) - : createBinaryProgram(Ctx, RawImg.BinaryStart, ImgSize); + : createBinaryProgram(Ctx, Device, RawImg.BinaryStart, ImgSize); { std::lock_guard Lock(MNativeProgramsMutex); @@ -346,6 +332,7 @@ RT::PiProgram ProgramManager::createPIProgram(const RTDeviceBinaryImage &Img, RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, const context &Context, + const device &Device, const string_class &KernelName, const program_impl *Prg, bool JITCompilationIsRequired) { @@ -364,15 +351,17 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, auto GetF = [](const Locked &LockedCache) -> ProgramCacheT & { return LockedCache.get(); }; - auto BuildF = [this, &M, &KSId, &Context, Prg, &JITCompilationIsRequired] { + auto BuildF = [this, &M, &KSId, &Context, &Device, Prg, + &JITCompilationIsRequired] { const RTDeviceBinaryImage &Img = - getDeviceImage(M, KSId, Context, JITCompilationIsRequired); + getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired); ContextImplPtr ContextImpl = getSyclObjImpl(Context); const detail::plugin &Plugin = ContextImpl->getPlugin(); - RT::PiProgram NativePrg = createPIProgram(Img, Context); + RT::PiProgram NativePrg = createPIProgram(Img, Context, Device); if (Prg) - flushSpecConstants(*Prg, NativePrg, &Img); + flushSpecConstants(*Prg, getSyclObjImpl(Device)->getHandleRef(), + NativePrg, &Img); ProgramPtr ProgramManaged( NativePrg, Plugin.getPiPlugin().PiFunctionTable.piProgramRelease); @@ -386,16 +375,9 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, !SYCLConfig::get()) DeviceLibReqMask = getDeviceLibReqMask(Img); - const std::vector &Devices = ContextImpl->getDevices(); - std::vector PiDevices(Devices.size()); - std::transform(Devices.begin(), Devices.end(), PiDevices.begin(), - [](const device Dev) { - return getRawSyclObjImpl(Dev)->getHandleRef(); - }); - ProgramPtr BuiltProgram = build(std::move(ProgramManaged), ContextImpl, Img.getCompileOptions(), - Img.getLinkOptions(), PiDevices, + Img.getLinkOptions(), getRawSyclObjImpl(Device)->getHandleRef(), ContextImpl->getCachedLibPrograms(), DeviceLibReqMask); { @@ -405,26 +387,29 @@ RT::PiProgram ProgramManager::getBuiltPIProgram(OSModuleHandle M, return BuiltProgram.release(); }; - using KeyT = KernelProgramCache::ProgramCacheKeyT; SerializedObj SpecConsts; if (Prg) Prg->stableSerializeSpecConstRegistry(SpecConsts); + const RT::PiDevice PiDevice = getRawSyclObjImpl(Device)->getHandleRef(); auto BuildResult = getOrBuild( - Cache, KeyT(std::move(SpecConsts), KSId), AcquireF, GetF, BuildF); + Cache, + std::make_pair(std::make_pair(std::move(SpecConsts), KSId), PiDevice), + AcquireF, GetF, BuildF); return BuildResult->Ptr.load(); } -std::pair -ProgramManager::getOrCreateKernel(OSModuleHandle M, const context &Context, - const string_class &KernelName, - const program_impl *Prg) { +std::pair ProgramManager::getOrCreateKernel( + OSModuleHandle M, const context &Context, const device &Device, + const string_class &KernelName, const program_impl *Prg) { if (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getOrCreateKernel(" << M << ", " - << getRawSyclObjImpl(Context) << ", " << KernelName << ")\n"; + << getRawSyclObjImpl(Context) << ", " << getRawSyclObjImpl(Device) + << ", " << KernelName << ")\n"; } - RT::PiProgram Program = getBuiltPIProgram(M, Context, KernelName, Prg); + RT::PiProgram Program = + getBuiltPIProgram(M, Context, Device, KernelName, Prg); const ContextImplPtr Ctx = getSyclObjImpl(Context); using PiKernelT = KernelProgramCache::PiKernelT; @@ -457,8 +442,9 @@ ProgramManager::getOrCreateKernel(OSModuleHandle M, const context &Context, return Result; }; + const RT::PiDevice PiDevice = getRawSyclObjImpl(Device)->getHandleRef(); auto BuildResult = getOrBuild( - Cache, KernelName, AcquireF, GetF, BuildF); + Cache, std::make_pair(KernelName, PiDevice), AcquireF, GetF, BuildF); return std::make_pair(BuildResult->Ptr.load(), &(BuildResult->MBuildResultMutex)); } @@ -563,13 +549,15 @@ static const char *getDeviceLibExtensionStr(DeviceLibExt Extension) { static RT::PiProgram loadDeviceLibFallback( const ContextImplPtr Context, DeviceLibExt Extension, - const std::vector &Devices, - std::map &CachedLibPrograms) { + const RT::PiDevice &Device, + std::map, RT::PiProgram> + &CachedLibPrograms) { const char *LibFileName = getDeviceLibFilename(Extension); - auto CacheResult = CachedLibPrograms.insert({Extension, nullptr}); + auto CacheResult = CachedLibPrograms.emplace( + std::make_pair(std::make_pair(Extension, Device), nullptr)); bool Cached = !CacheResult.second; - std::map::iterator LibProgIt = CacheResult.first; + auto LibProgIt = CacheResult.first; RT::PiProgram &LibProg = LibProgIt->second; if (Cached) @@ -585,8 +573,7 @@ static RT::PiProgram loadDeviceLibFallback( // TODO no spec constants are used in the std libraries, support in the future RT::PiResult Error = Plugin.call_nocheck( LibProg, - // Assume that Devices contains all devices from Context. - Devices.size(), Devices.data(), + /*num devices = */ 1, &Device, // Do not use compile options for library programs: it is not clear // if user options (image options) are supposed to be applied to // library program as well, and what actually happens to a SPIR-V @@ -642,11 +629,13 @@ ProgramManager::ProgramManager() { RTDeviceBinaryImage & ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId, - const context &Context, + const context &Context, const device &Device, bool JITCompilationIsRequired) { if (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getDeviceImage(" << M << ", \"" << KSId - << "\", " << getRawSyclObjImpl(Context) << ")\n"; + << "\", " << getRawSyclObjImpl(Context) << ", " + << getRawSyclObjImpl(Device) << ", " << JITCompilationIsRequired + << ")\n"; std::cerr << "available device images:\n"; debugPrintBinaryImages(); @@ -668,7 +657,8 @@ ProgramManager::getDeviceImage(OSModuleHandle M, KernelSetId KSId, RawImgs[I] = const_cast(&Imgs[I]->getRawData()); Ctx->getPlugin().call( - getFirstDevice(Ctx), RawImgs.data(), (cl_uint)RawImgs.size(), &ImgInd); + getSyclObjImpl(Device)->getHandleRef(), RawImgs.data(), + (cl_uint)RawImgs.size(), &ImgInd); if (JITCompilationIsRequired) { // If the image is already compiled with AOT, throw an exception. @@ -703,11 +693,11 @@ static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { return ((DeviceLibReqMask & Mask) == Mask); } -static std::vector -getDeviceLibPrograms(const ContextImplPtr Context, - const std::vector &Devices, - std::map &CachedLibPrograms, - uint32_t DeviceLibReqMask) { +static std::vector getDeviceLibPrograms( + const ContextImplPtr Context, const RT::PiDevice &Device, + std::map, RT::PiProgram> + &CachedLibPrograms, + uint32_t DeviceLibReqMask) { std::vector Programs; std::pair RequiredDeviceLibExt[] = { @@ -720,69 +710,60 @@ getDeviceLibPrograms(const ContextImplPtr Context, // Disable all devicelib extensions requiring fp64 support if at least // one underlying device doesn't support cl_khr_fp64. - bool fp64Support = true; - for (RT::PiDevice Dev : Devices) { - std::string DevExtList = - get_device_info::get( - Dev, Context->getPlugin()); - fp64Support = - fp64Support && (DevExtList.npos != DevExtList.find("cl_khr_fp64")); - } + std::string DevExtList = + get_device_info::get( + Device, Context->getPlugin()); + const bool fp64Support = (DevExtList.npos != DevExtList.find("cl_khr_fp64")); - // Load a fallback library for an extension if at least one device does not + // Load a fallback library for an extension if the device does not // support it. - for (RT::PiDevice Dev : Devices) { - std::string DevExtList = - get_device_info::get( - Dev, Context->getPlugin()); - for (auto &Pair : RequiredDeviceLibExt) { - DeviceLibExt Ext = Pair.first; - bool &FallbackIsLoaded = Pair.second; - - if (FallbackIsLoaded) { - continue; - } + for (auto &Pair : RequiredDeviceLibExt) { + DeviceLibExt Ext = Pair.first; + bool &FallbackIsLoaded = Pair.second; - if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) { - continue; - } - if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 || - Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64) && - !fp64Support) { - continue; - } + if (FallbackIsLoaded) { + continue; + } - const char *ExtStr = getDeviceLibExtensionStr(Ext); + if (!isDeviceLibRequired(Ext, DeviceLibReqMask)) { + continue; + } + if ((Ext == DeviceLibExt::cl_intel_devicelib_math_fp64 || + Ext == DeviceLibExt::cl_intel_devicelib_complex_fp64) && + !fp64Support) { + continue; + } - bool InhibitNativeImpl = false; - if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { - InhibitNativeImpl = strstr(Env, ExtStr) != nullptr; - } + const char *ExtStr = getDeviceLibExtensionStr(Ext); - bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtStr); + bool InhibitNativeImpl = false; + if (const char *Env = getenv("SYCL_DEVICELIB_INHIBIT_NATIVE")) { + InhibitNativeImpl = strstr(Env, ExtStr) != nullptr; + } - if (!DeviceSupports || InhibitNativeImpl) { - Programs.push_back( - loadDeviceLibFallback(Context, Ext, Devices, CachedLibPrograms)); - FallbackIsLoaded = true; - } + bool DeviceSupports = DevExtList.npos != DevExtList.find(ExtStr); + + if (!DeviceSupports || InhibitNativeImpl) { + Programs.push_back( + loadDeviceLibFallback(Context, Ext, Device, CachedLibPrograms)); + FallbackIsLoaded = true; } } return Programs; } -ProgramManager::ProgramPtr -ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, - const string_class &CompileOptions, - const string_class &LinkOptions, - const std::vector &Devices, - std::map &CachedLibPrograms, - uint32_t DeviceLibReqMask) { +ProgramManager::ProgramPtr ProgramManager::build( + ProgramPtr Program, const ContextImplPtr Context, + const string_class &CompileOptions, const string_class &LinkOptions, + const RT::PiDevice &Device, + std::map, RT::PiProgram> + &CachedLibPrograms, + uint32_t DeviceLibReqMask) { if (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::build(" << Program.get() << ", " - << CompileOptions << ", " << LinkOptions << ", ... " - << Devices.size() << ")\n"; + << CompileOptions << ", " << LinkOptions << ", ... " << Device + << ")\n"; } bool LinkDeviceLibs = (DeviceLibReqMask != 0); @@ -813,7 +794,7 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, std::vector LinkPrograms; if (LinkDeviceLibs) { - LinkPrograms = getDeviceLibPrograms(Context, Devices, CachedLibPrograms, + LinkPrograms = getDeviceLibPrograms(Context, Device, CachedLibPrograms, DeviceLibReqMask); } @@ -822,7 +803,7 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, std::string Opts(CompileOpts); RT::PiResult Error = Plugin.call_nocheck( - Program.get(), Devices.size(), Devices.data(), Opts.c_str(), nullptr, + Program.get(), /*num devices =*/1, &Device, Opts.c_str(), nullptr, nullptr); if (Error != PI_SUCCESS) throw compile_program_error(getProgramBuildLog(Program.get(), Context), @@ -831,14 +812,14 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, } // Include the main program and compile/link everything together - Plugin.call(Program.get(), Devices.size(), - Devices.data(), CompileOpts, 0, - nullptr, nullptr, nullptr, nullptr); + Plugin.call(Program.get(), /*num devices =*/1, + &Device, CompileOpts, 0, nullptr, + nullptr, nullptr, nullptr); LinkPrograms.push_back(Program.get()); RT::PiProgram LinkedProg = nullptr; RT::PiResult Error = Plugin.call_nocheck( - Context->getHandleRef(), Devices.size(), Devices.data(), LinkOpts, + Context->getHandleRef(), /*num devices =*/1, &Device, LinkOpts, LinkPrograms.size(), LinkPrograms.data(), nullptr, nullptr, &LinkedProg); // Link program call returns a new program object if all parameters are valid, @@ -1001,6 +982,7 @@ void ProgramManager::dumpImage(const RTDeviceBinaryImage &Img, } void ProgramManager::flushSpecConstants(const program_impl &Prg, + RT::PiDevice Device, RT::PiProgram NativePrg, const RTDeviceBinaryImage *Img) { if (DbgProgMgr > 2) { @@ -1054,8 +1036,9 @@ uint32_t ProgramManager::getDeviceLibReqMask(const RTDeviceBinaryImage &Img) { // TODO consider another approach with storing the masks in the integration // header instead. ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask( - OSModuleHandle M, const context &Context, pi::PiProgram NativePrg, - const string_class &KernelName, bool KnownProgram) { + OSModuleHandle M, const context &Context, const device &Device, + pi::PiProgram NativePrg, const string_class &KernelName, + bool KnownProgram) { // If instructed to use a spv file, assume no eliminated arguments. if (m_UseSpvFile && M == OSUtil::ExeModuleHandle) return {}; @@ -1089,7 +1072,7 @@ ProgramManager::KernelArgMask ProgramManager::getEliminatedKernelArgMask( return {}; std::rethrow_exception(std::current_exception()); } - RTDeviceBinaryImage &Img = getDeviceImage(M, KSId, Context); + RTDeviceBinaryImage &Img = getDeviceImage(M, KSId, Context, Device); { std::lock_guard Lock(MNativeProgramsMutex); NativePrograms[NativePrg] = &Img; diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 1e4329b3fd925..93ddf954856db 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -67,14 +67,16 @@ class ProgramManager { RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M, const string_class &KernelName, const context &Context, + const device &Device, bool JITCompilationIsRequired = false); RT::PiProgram createPIProgram(const RTDeviceBinaryImage &Img, - const context &Context); + const context &Context, const device &Device); /// Builds or retrieves from cache a program defining the kernel with given /// name. /// \param M idenfies the OS module the kernel comes from (multiple OS modules /// may have kernels with the same name) /// \param Context the context to build the program with + /// \param Device the device for which the program is built /// \param KernelName the kernel's name /// \param Prg provides build context information, such as /// current specialization constants settings; can be nullptr. @@ -83,12 +85,14 @@ class ProgramManager { /// \param JITCompilationIsRequired If JITCompilationIsRequired is true /// add a check that kernel is compiled, otherwise don't add the check. RT::PiProgram getBuiltPIProgram(OSModuleHandle M, const context &Context, + const device &Device, const string_class &KernelName, const program_impl *Prg = nullptr, bool JITCompilationIsRequired = false); std::pair getOrCreateKernel(OSModuleHandle M, const context &Context, - const string_class &KernelName, const program_impl *Prg); + const device &Device, const string_class &KernelName, + const program_impl *Prg); RT::PiProgram getPiProgramFromPiKernel(RT::PiKernel Kernel, const ContextImplPtr Context); @@ -102,13 +106,14 @@ class ProgramManager { /// \param Prg the program object to get spec constant settings from. /// Passing program_impl by raw reference is OK, since it is not /// captured anywhere once the function returns. + /// \param Device the device assosiated with the native program /// \param NativePrg the native program, target for spec constant setting; if /// not null then overrides the native program in Prg /// \param Img A source of the information about which constants need /// setting and symboling->integer spec constnant ID mapping. If not /// null, overrides native program->binary image binding maintained by /// the program manager. - void flushSpecConstants(const program_impl &Prg, + void flushSpecConstants(const program_impl &Prg, pi::PiDevice Device, pi::PiProgram NativePrg = nullptr, const RTDeviceBinaryImage *Img = nullptr); uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); @@ -118,16 +123,16 @@ class ProgramManager { /// \param M identifies the OS module the kernel comes from (multiple OS /// modules may have kernels with the same name). /// \param Context the context associated with the kernel. + /// \param Device the device associated with the context. /// \param NativePrg the PI program associated with the kernel. /// \param KernelName the name of the kernel. /// \param KnownProgram indicates whether the PI program is guaranteed to /// be known to program manager (built with its API) or not (not /// cacheable or constructed with interoperability). - KernelArgMask getEliminatedKernelArgMask(OSModuleHandle M, - const context &Context, - pi::PiProgram NativePrg, - const string_class &KernelName, - bool KnownProgram); + KernelArgMask + getEliminatedKernelArgMask(OSModuleHandle M, const context &Context, + const device &Device, pi::PiProgram NativePrg, + const string_class &KernelName, bool KnownProgram); private: ProgramManager(); @@ -137,14 +142,15 @@ class ProgramManager { RTDeviceBinaryImage &getDeviceImage(OSModuleHandle M, KernelSetId KSId, const context &Context, + const device &Device, bool JITCompilationIsRequired = false); using ProgramPtr = unique_ptr_class, decltype(&::piProgramRelease)>; ProgramPtr build(ProgramPtr Program, const ContextImplPtr Context, const string_class &CompileOptions, - const string_class &LinkOptions, - const std::vector &Devices, - std::map &CachedLibPrograms, + const string_class &LinkOptions, const RT::PiDevice &Device, + std::map, + RT::PiProgram> &CachedLibPrograms, uint32_t DeviceLibReqMask); /// Provides a new kernel set id for grouping kernel names together KernelSetId getNextKernelSetId() const; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index efbe0373f06e6..34fd1616253c9 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1920,15 +1920,15 @@ cl_int ExecCGCommand::enqueueImp() { detail::ProgramManager::getInstance().getOrCreateKernel( ExecKernel->MOSModuleHandle, ExecKernel->MSyclKernel->get_info(), - ExecKernel->MKernelName, SyclProg.get()); + MQueue->get_device(), ExecKernel->MKernelName, SyclProg.get()); assert(FoundKernel == Kernel); } else KnownProgram = false; } else { std::tie(Kernel, KernelMutex) = detail::ProgramManager::getInstance().getOrCreateKernel( - ExecKernel->MOSModuleHandle, Context, ExecKernel->MKernelName, - nullptr); + ExecKernel->MOSModuleHandle, Context, MQueue->get_device(), + ExecKernel->MKernelName, nullptr); MQueue->getPlugin().call( Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(RT::PiProgram), &Program, nullptr); @@ -1940,8 +1940,8 @@ cl_int ExecCGCommand::enqueueImp() { !ExecKernel->MSyclKernel->isCreatedFromSource()) { EliminatedArgMask = detail::ProgramManager::getInstance().getEliminatedKernelArgMask( - ExecKernel->MOSModuleHandle, Context, Program, - ExecKernel->MKernelName, KnownProgram); + ExecKernel->MOSModuleHandle, Context, MQueue->get_device(), + Program, ExecKernel->MKernelName, KnownProgram); } if (KernelMutex != nullptr) { // For cacheable kernels, we use per-kernel mutex diff --git a/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp new file mode 100644 index 0000000000000..8409d77cdf3b2 --- /dev/null +++ b/sycl/test/context-with-multiple-devices/context-with-multiple-devices.cpp @@ -0,0 +1,45 @@ +// REQUIRES: cpu, accelerator, aoc + +// UNSUPPORTED: cuda, level_zero + +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t1.out +// RUN: %CPU_RUN_PLACEHOLDER CL_CONFIG_CPU_EMULATE_DEVICES=2 %t1.out +// RUN: %CPU_RUN_PLACEHOLDER CL_CONFIG_CPU_EMULATE_DEVICES=4 %t1.out +// RUN: %clangxx -fsycl -fintelfpga -fsycl-unnamed-lambda %s -o %t2.out +// RUN: %ACC_RUN_PLACEHOLDER CL_CONFIG_CPU_EMULATE_DEVICES=2 %t2.out + +#include + +void exceptionHandler(sycl::exception_list exceptions) { + for (std::exception_ptr const &e : exceptions) { + try { + std::rethrow_exception(e); + } catch (sycl::exception const &e) { + std::cerr << "Caught asynchronous SYCL exception:\n" + << e.what() << std::endl; + } + } +} + +int main() { + auto DeviceList = sycl::device::get_devices(); + + // remove host device from the list + DeviceList.erase(std::remove_if(DeviceList.begin(), DeviceList.end(), + [](auto Device) { return Device.is_host(); }), + DeviceList.end()); + + sycl::context Context(DeviceList, &exceptionHandler); + + std::vector QueueList; + for (const auto &Device : Context.get_devices()) { + QueueList.emplace_back(Context, Device, &exceptionHandler); + } + + for (auto &Queue : QueueList) { + Queue.submit( + [&](sycl::handler &cgh) { cgh.parallel_for(100, [=](auto i) {}); }); + } + + return 0; +}