From b05fa0817395fe5526a39ae8f2ac4b6612d6f505 Mon Sep 17 00:00:00 2001 From: Vlad Romanov Date: Mon, 7 Feb 2022 12:30:16 +0300 Subject: [PATCH 1/4] Improve get_kernel_bundle performance --- sycl/source/backend.cpp | 2 +- sycl/source/detail/device_image_impl.hpp | 20 +- sycl/source/detail/kernel_bundle_impl.hpp | 6 - .../program_manager/program_manager.cpp | 201 +++++++++--------- .../program_manager/program_manager.hpp | 24 ++- sycl/source/kernel_bundle.cpp | 4 +- .../program_manager/EliminatedArgMask.cpp | 9 +- 7 files changed, 135 insertions(+), 131 deletions(-) diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index 691bdbf0ab91c..5db30231415fa 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -187,7 +187,7 @@ make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext, // this by pre-building the device image and extracting kernel info. We can't // do the same to user images, since they may contain references to undefined // symbols (e.g. when kernel_bundle is supposed to be joined with another). - std::vector KernelIDs{}; + std::shared_ptr> KernelIDs{new std::vector}; auto DevImgImpl = std::make_shared( nullptr, TargetContext, Devices, State, KernelIDs, PiProgram); device_image_plain DevImg{DevImgImpl}; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 42310d97d3dbf..100f926d4788d 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -32,6 +32,12 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { +template struct LessByHash { + bool operator()(const T &LHS, const T &RHS) const { + return getSyclObjImpl(LHS) < getSyclObjImpl(RHS); + } +}; + // The class is impl counterpart for sycl::device_image // It can represent a program in different states, kernel_id's it has and state // of specialization constants for it @@ -51,7 +57,7 @@ class device_image_impl { device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector Devices, bundle_state State, - std::vector KernelIDs, RT::PiProgram Program) + std::shared_ptr> KernelIDs, RT::PiProgram Program) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::move(KernelIDs)) { @@ -60,7 +66,7 @@ class device_image_impl { device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector Devices, bundle_state State, - std::vector KernelIDs, RT::PiProgram Program, + std::shared_ptr> KernelIDs, RT::PiProgram Program, const SpecConstMapT &SpecConstMap, const std::vector &SpecConstsBlob) : MBinImage(BinImage), MContext(std::move(Context)), @@ -69,8 +75,8 @@ class device_image_impl { MSpecConstSymMap(SpecConstMap) {} bool has_kernel(const kernel_id &KernelIDCand) const noexcept { - return std::binary_search(MKernelIDs.begin(), MKernelIDs.end(), - KernelIDCand, LessByNameComp{}); + return std::binary_search(MKernelIDs->begin(), MKernelIDs->end(), + KernelIDCand, LessByHash{}); } bool has_kernel(const kernel_id &KernelIDCand, @@ -83,7 +89,7 @@ class device_image_impl { } const std::vector &get_kernel_ids() const noexcept { - return MKernelIDs; + return *MKernelIDs; } bool has_specialization_constants() const noexcept { @@ -176,7 +182,7 @@ class device_image_impl { const context &get_context() const noexcept { return MContext; } - std::vector &get_kernel_ids_ref() noexcept { return MKernelIDs; } + std::shared_ptr> &get_kernel_ids_ref() noexcept { return MKernelIDs; } std::vector &get_spec_const_blob_ref() noexcept { return MSpecConstsBlob; @@ -312,7 +318,7 @@ class device_image_impl { RT::PiProgram MProgram = nullptr; // List of kernel ids available in this image, elements should be sorted // according to LessByNameComp - std::vector MKernelIDs; + std::shared_ptr> MKernelIDs; // A mutex for sycnhronizing access to spec constants blob. Mutable because // needs to be locked in the const method for getting spec constant value. diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index a6828b6640e08..cc8bf4b38249f 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -28,12 +28,6 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { -template struct LessByHash { - bool operator()(const T &LHS, const T &RHS) const { - return getSyclObjImpl(LHS) < getSyclObjImpl(RHS); - } -}; - static bool checkAllDevicesAreInContext(const std::vector &Devices, const context &Context) { const std::vector &ContextDevices = Context.get_devices(); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 7febb0427d549..a1a5e4bad496e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1070,28 +1070,9 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) { ArgMaskMap[Info->Name] = createKernelArgMask(pi::DeviceBinaryProperty(Info).asByteArray()); } - // Use the entry information if it's available - if (EntriesB != EntriesE) { - // The kernel sets for any pair of images are either disjoint or - // identical, look up the kernel set using the first kernel name... - StrToKSIdMap &KSIdMap = m_KernelSets[M]; - auto KSIdIt = KSIdMap.find(EntriesB->name); - if (KSIdIt != KSIdMap.end()) { - for (_pi_offload_entry EntriesIt = EntriesB + 1; EntriesIt != EntriesE; - ++EntriesIt) - assert(KSIdMap[EntriesIt->name] == KSIdIt->second && - "Kernel sets are not disjoint"); - auto &Imgs = m_DeviceImages[KSIdIt->second]; - assert(Imgs && "Device image vector should have been already created"); - cacheKernelUsesAssertInfo(M, *Img); - - Imgs->push_back(std::move(Img)); - continue; - } - // ... or create the set first if it hasn't been - KernelSetId KSId = getNextKernelSetId(); - { + // Fill maps for kernel bundles + if (EntriesB != EntriesE) { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); // Register all exported symbols @@ -1099,11 +1080,12 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) { for (const pi_device_binary_property &ExportedSymbol : ExportedSymbols) m_ExportedSymbols.insert(ExportedSymbol->Name); + + m_BinImg2KernelIDs[Img.get()].reset(new std::vector); + + for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; ++EntriesIt) { - auto Result = KSIdMap.insert(std::make_pair(EntriesIt->name, KSId)); - (void)Result; - assert(Result.second && "Kernel sets are not disjoint"); // Skip creating unique kernel ID if it is a service kernel. // SYCL service kernels are identified by having @@ -1126,8 +1108,49 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) { std::make_shared(EntriesIt->name); sycl::kernel_id KernelID = detail::createSyclObjFromImpl(KernelIDImpl); - m_KernelIDs.insert( - std::make_pair(EntriesIt->name, std::move(KernelID))); + + m_KernelName2KernelIDs.insert( + std::make_pair(EntriesIt->name, KernelID)); + + m_KernelIDs2BinImage.insert(std::make_pair(KernelID, Img.get())); + m_BinImg2KernelIDs[Img.get()]->push_back(KernelID); + + } + + // Sort kernel ids for faster search + std::sort(m_BinImg2KernelIDs[Img.get()]->begin(), + m_BinImg2KernelIDs[Img.get()]->end(), LessByHash{}); + } + + // TODO: Remove the code below once program manager works trought kernel + // bundles only + // Use the entry information if it's available + if (EntriesB != EntriesE) { + // The kernel sets for any pair of images are either disjoint or + // identical, look up the kernel set using the first kernel name... + StrToKSIdMap &KSIdMap = m_KernelSets[M]; + auto KSIdIt = KSIdMap.find(EntriesB->name); + if (KSIdIt != KSIdMap.end()) { + for (_pi_offload_entry EntriesIt = EntriesB + 1; EntriesIt != EntriesE; + ++EntriesIt) + assert(KSIdMap[EntriesIt->name] == KSIdIt->second && + "Kernel sets are not disjoint"); + auto &Imgs = m_DeviceImages[KSIdIt->second]; + assert(Imgs && "Device image vector should have been already created"); + + cacheKernelUsesAssertInfo(M, *Img); + + Imgs->push_back(std::move(Img)); + continue; + } + // ... or create the set first if it hasn't been + KernelSetId KSId = getNextKernelSetId(); + { + std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + + for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; + ++EntriesIt) { + KSIdMap.insert(std::make_pair(EntriesIt->name, KSId)); } } m_DeviceImages[KSId].reset(new std::vector()); @@ -1347,8 +1370,8 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, kernel_id ProgramManager::getSYCLKernelID(const std::string &KernelName) { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - auto KernelID = m_KernelIDs.find(KernelName); - if (KernelID == m_KernelIDs.end()) + auto KernelID = m_KernelName2KernelIDs.find(KernelName); + if (KernelID == m_KernelName2KernelIDs.end()) throw runtime_error("No kernel found with the specified name", PI_INVALID_KERNEL_NAME); @@ -1359,8 +1382,9 @@ std::vector ProgramManager::getAllSYCLKernelIDs() { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); std::vector AllKernelIDs; - AllKernelIDs.reserve(m_KernelIDs.size()); - for (std::pair KernelID : m_KernelIDs) { + AllKernelIDs.reserve(m_KernelName2KernelIDs.size()); + // TODO: Replace with inserts of vectors from m_BinImg2KernelIDs ? + for (std::pair KernelID : m_KernelName2KernelIDs) { AllKernelIDs.push_back(KernelID.second); } return AllKernelIDs; @@ -1382,80 +1406,58 @@ kernel_id ProgramManager::getBuiltInKernelID(const std::string &KernelName) { std::vector ProgramManager::getSYCLDeviceImagesWithCompatibleState( const context &Ctx, const std::vector &Devs, - bundle_state TargetState) { - - // Collect raw device images - std::vector BinImages; - { + bundle_state TargetState, const std::vector &KernelIDs) { + + // Collect unique raw device images taking into account kernel ids passed + // TODO: Can we avoid repacking? + std::set BinImages; + if (!KernelIDs.empty()) { + std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + for (const kernel_id &KID : KernelIDs) { + auto Range = m_KernelIDs2BinImage.equal_range(KID); + for (auto It = Range.first, End = Range.second; It != End; ++It) + BinImages.insert(It->second); + } + } else { std::lock_guard Guard(Sync::getGlobalLock()); for (auto &ImagesSets : m_DeviceImages) { auto &ImagesUPtrs = *ImagesSets.second.get(); - for (auto &ImageUPtr : ImagesUPtrs) { - const RTDeviceBinaryImage *BinImage = ImageUPtr.get(); - const bundle_state ImgState = getBinImageState(BinImage); - - // Ignore images with incompatible state. Image is considered compatible - // with a target state if an image is already in the target state or can - // be brought to target state by compiling/linking/building. - // - // Example: an image in "executable" state is not compatible with - // "input" target state - there is no operation to convert the image it - // to "input" state. An image in "input" state is compatible with - // "executable" target state because it can be built to get into - // "executable" state. - if (ImgState > TargetState) - continue; - - BinImages.push_back(ImageUPtr.get()); - } + for (auto &ImageUPtr : ImagesUPtrs) + BinImages.insert(ImageUPtr.get()); } } - // TODO: Add a diagnostic on multiple device images with conflicting kernel - // names, and remove OSModuleHandle usage, as conflicting kernel names will be - // an error. + assert(BinImages.size() > 0 && "Expected to find at least on device image"); - // TODO: Cache device_image objects - // Create SYCL device image from those that have compatible state and at least - // one device std::vector SYCLDeviceImages; for (RTDeviceBinaryImage *BinImage : BinImages) { const bundle_state ImgState = getBinImageState(BinImage); + // Ignore images with incompatible state. Image is considered compatible + // with a target state if an image is already in the target state or can + // be brought to target state by compiling/linking/building. + // + // Example: an image in "executable" state is not compatible with + // "input" target state - there is no operation to convert the image it + // to "input" state. An image in "input" state is compatible with + // "executable" target state because it can be built to get into + // "executable" state. + if (ImgState > TargetState) + continue; + for (const sycl::device &Dev : Devs) { if (!compatibleWithDevice(BinImage, Dev)) continue; - std::vector KernelIDs; + std::shared_ptr> KernelIDs; // Collect kernel names for the image - pi_device_binary DevBin = - const_cast(&BinImage->getRawData()); { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - for (_pi_offload_entry EntriesIt = DevBin->EntriesBegin; - EntriesIt != DevBin->EntriesEnd; ++EntriesIt) { - auto KernelID = m_KernelIDs.find(EntriesIt->name); - - if (KernelID == m_KernelIDs.end()) { - // Service kernels and exported symbols do not have kernel IDs - assert((m_ServiceKernels.find(EntriesIt->name) != - m_ServiceKernels.end() || - m_ExportedSymbols.find(EntriesIt->name) != - m_ExportedSymbols.end()) && - "Kernel ID in device binary missing from cache"); - continue; - } - - KernelIDs.push_back(KernelID->second); - } + KernelIDs = m_BinImg2KernelIDs[BinImage]; + // If the image does not contain any non-service kernels we can skip it. + if (KernelIDs->empty()) + continue; } - // If the image does not contain any non-service kernels we can skip it. - if (KernelIDs.empty()) - continue; - - // device_image_impl expects kernel ids to be sorted for fast search - std::sort(KernelIDs.begin(), KernelIDs.end(), LessByNameComp{}); - DeviceImageImplPtr Impl = std::make_shared( BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr); @@ -1556,8 +1558,9 @@ std::vector ProgramManager::getSYCLDeviceImages( { std::lock_guard BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex); - for (const kernel_id &ID : KernelIDs) { - if (m_BuiltInKernelIDs.find(ID.get_name()) != m_BuiltInKernelIDs.end()) + for (auto &It : m_BuiltInKernelIDs) { + if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) != + KernelIDs.end()) throw sycl::exception(make_error_code(errc::kernel_argument), "Attempting to use a built-in kernel. They are " "not fully supported"); @@ -1566,19 +1569,7 @@ std::vector ProgramManager::getSYCLDeviceImages( // Collect device images with compatible state std::vector DeviceImages = - getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState); - - // Filter out images that have no kernel_ids specified - auto It = std::remove_if(DeviceImages.begin(), DeviceImages.end(), - [&KernelIDs](const device_image_plain &Image) { - return std::none_of( - KernelIDs.begin(), KernelIDs.end(), - [&Image](const sycl::kernel_id &KernelID) { - return Image.has_kernel(KernelID); - }); - }); - - DeviceImages.erase(It, DeviceImages.end()); + getSYCLDeviceImagesWithCompatibleState(Ctx, Devs, TargetState, KernelIDs); // Brind device images with compatible state to desired state bringSYCLDeviceImagesToState(DeviceImages, TargetState); @@ -1683,15 +1674,15 @@ ProgramManager::link(const std::vector &DeviceImages, Plugin.reportPiError(Error, "link()"); } - std::vector KernelIDs; + std::shared_ptr> KernelIDs{new std::vector}; for (const device_image_plain &DeviceImage : DeviceImages) { // Duplicates are not expected here, otherwise piProgramLink should fail - KernelIDs.insert(KernelIDs.end(), - getSyclObjImpl(DeviceImage)->get_kernel_ids().begin(), - getSyclObjImpl(DeviceImage)->get_kernel_ids().end()); + KernelIDs->insert(KernelIDs->end(), + getSyclObjImpl(DeviceImage)->get_kernel_ids_ref()->begin(), + getSyclObjImpl(DeviceImage)->get_kernel_ids_ref()->end()); } // device_image_impl expects kernel ids to be sorted for fast search - std::sort(KernelIDs.begin(), KernelIDs.end(), LessByNameComp{}); + std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash{}); DeviceImageImplPtr ExecutableImpl = std::make_shared( diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 1d1442c337b09..99c01373e859d 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -184,10 +184,9 @@ class ProgramManager { // The function returns a vector of SYCL device images that are compiled with // the required state and at least one device from the passed list of devices. - std::vector - getSYCLDeviceImagesWithCompatibleState(const context &Ctx, - const std::vector &Devs, - bundle_state TargetState); + std::vector getSYCLDeviceImagesWithCompatibleState( + const context &Ctx, const std::vector &Devs, + bundle_state TargetState, const std::vector &KernelIDs = {}); // Brind images in the passed vector to the required state. Does it inplace void @@ -310,7 +309,22 @@ class ProgramManager { /// TODO: Use std::unordered_set with transparent hash and equality functions /// when C++20 is enabled for the runtime library. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_map m_KernelIDs; + // + std::unordered_map m_KernelName2KernelIDs; + + // Maps KernelIDs to device binary images. There can be more than one image + // in case of SPIRV + AOT. + /// Access must be guarded by the m_KernelIDsMutex mutex. + std::unordered_multimap m_KernelIDs2BinImage; + + // Maps device binary image to a vector of kernel ids in this image. + // Using shared_ptr to avoid expensive copy of the vector. + // The vector is initialized in addImages function and is supposed to be + // immutable afterwards. + /// Access must be guarded by the m_KernelIDsMutex mutex. + std::unordered_map>> + m_BinImg2KernelIDs; /// Protects kernel ID cache. /// NOTE: This may be acquired while \ref Sync::getGlobalLock() is held so to diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index d196764ca670c..18db9a87d8525 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -218,8 +218,8 @@ bool has_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, const std::shared_ptr &DeviceImageImpl = getSyclObjImpl(DeviceImage); - CombinedKernelIDs.insert(DeviceImageImpl->get_kernel_ids_ref().begin(), - DeviceImageImpl->get_kernel_ids_ref().end()); + CombinedKernelIDs.insert(DeviceImageImpl->get_kernel_ids_ref()->begin(), + DeviceImageImpl->get_kernel_ids_ref()->end()); } const bool AllKernelIDsRepresented = diff --git a/sycl/unittests/program_manager/EliminatedArgMask.cpp b/sycl/unittests/program_manager/EliminatedArgMask.cpp index c5c3bfe4c2c61..5301ea986ad94 100644 --- a/sycl/unittests/program_manager/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/EliminatedArgMask.cpp @@ -167,11 +167,10 @@ sycl::detail::ProgramManager::KernelArgMask getKernelArgMaskFromBundle( EXPECT_TRUE(KernelBundleImplPtr) << "Expect command group to contain kernel bundle"; - auto KernelIDImpl = - std::make_shared(ExecKernel->MKernelName); - sycl::kernel SyclKernel = KernelBundleImplPtr->get_kernel( - sycl::detail::createSyclObjFromImpl(KernelIDImpl), - KernelBundleImplPtr); + auto KernelID = sycl::detail::ProgramManager::getInstance().getSYCLKernelID( + ExecKernel->MKernelName); + sycl::kernel SyclKernel = + KernelBundleImplPtr->get_kernel(KernelID, KernelBundleImplPtr); auto SyclKernelImpl = sycl::detail::getSyclObjImpl(SyclKernel); std::shared_ptr DeviceImageImpl = SyclKernelImpl->getDeviceImage(); From 88fc918ce939d8d662c6ae5d1f44c34daae1d288 Mon Sep 17 00:00:00 2001 From: Vlad Romanov Date: Wed, 9 Feb 2022 20:55:55 +0300 Subject: [PATCH 2/4] Avoid kernel id duplication --- .../program_manager/program_manager.cpp | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index a1a5e4bad496e..9576bb18a6e0b 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1104,16 +1104,22 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) { continue; // ... and create a unique kernel ID for the entry - std::shared_ptr KernelIDImpl = - std::make_shared(EntriesIt->name); - sycl::kernel_id KernelID = - detail::createSyclObjFromImpl(KernelIDImpl); + auto It = m_KernelName2KernelIDs.find(EntriesIt->name); + if(It == m_KernelName2KernelIDs.end()) { + std::shared_ptr KernelIDImpl = + std::make_shared(EntriesIt->name); + sycl::kernel_id KernelID = + detail::createSyclObjFromImpl(KernelIDImpl); + + It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, + KernelID); + } m_KernelName2KernelIDs.insert( - std::make_pair(EntriesIt->name, KernelID)); + std::make_pair(EntriesIt->name, It->second)); - m_KernelIDs2BinImage.insert(std::make_pair(KernelID, Img.get())); - m_BinImg2KernelIDs[Img.get()]->push_back(KernelID); + m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); + m_BinImg2KernelIDs[Img.get()]->push_back(It->second); } From c5c59eab4f7298b93d62ce788d6096e6af50b6f1 Mon Sep 17 00:00:00 2001 From: Vlad Romanov Date: Fri, 11 Feb 2022 13:59:08 +0300 Subject: [PATCH 3/4] Comments --- sycl/source/backend.cpp | 2 +- sycl/source/detail/device_image_impl.hpp | 11 ++- .../program_manager/program_manager.cpp | 88 +++++++++---------- .../program_manager/program_manager.hpp | 3 +- sycl/source/kernel_bundle.cpp | 4 +- 5 files changed, 54 insertions(+), 54 deletions(-) diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index 5db30231415fa..37407c35e61da 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -187,7 +187,7 @@ make_kernel_bundle(pi_native_handle NativeHandle, const context &TargetContext, // this by pre-building the device image and extracting kernel info. We can't // do the same to user images, since they may contain references to undefined // symbols (e.g. when kernel_bundle is supposed to be joined with another). - std::shared_ptr> KernelIDs{new std::vector}; + auto KernelIDs = std::make_shared>(); auto DevImgImpl = std::make_shared( nullptr, TargetContext, Devices, State, KernelIDs, PiProgram); device_image_plain DevImg{DevImgImpl}; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 100f926d4788d..4886c5c8ae25a 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -57,7 +57,8 @@ class device_image_impl { device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector Devices, bundle_state State, - std::shared_ptr> KernelIDs, RT::PiProgram Program) + std::shared_ptr> KernelIDs, + RT::PiProgram Program) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::move(KernelIDs)) { @@ -66,8 +67,8 @@ class device_image_impl { device_image_impl(const RTDeviceBinaryImage *BinImage, context Context, std::vector Devices, bundle_state State, - std::shared_ptr> KernelIDs, RT::PiProgram Program, - const SpecConstMapT &SpecConstMap, + std::shared_ptr> KernelIDs, + RT::PiProgram Program, const SpecConstMapT &SpecConstMap, const std::vector &SpecConstsBlob) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), @@ -182,7 +183,9 @@ class device_image_impl { const context &get_context() const noexcept { return MContext; } - std::shared_ptr> &get_kernel_ids_ref() noexcept { return MKernelIDs; } + std::shared_ptr> &get_kernel_ids_ptr() noexcept { + return MKernelIDs; + } std::vector &get_spec_const_blob_ref() noexcept { return MSpecConstsBlob; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 9576bb18a6e0b..f7581a3a837b4 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1073,55 +1073,51 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) { // Fill maps for kernel bundles if (EntriesB != EntriesE) { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - - // Register all exported symbols - auto ExportedSymbols = Img->getExportedSymbols(); - for (const pi_device_binary_property &ExportedSymbol : ExportedSymbols) - m_ExportedSymbols.insert(ExportedSymbol->Name); + std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + // Register all exported symbols + auto ExportedSymbols = Img->getExportedSymbols(); + for (const pi_device_binary_property &ExportedSymbol : ExportedSymbols) + m_ExportedSymbols.insert(ExportedSymbol->Name); - m_BinImg2KernelIDs[Img.get()].reset(new std::vector); + m_BinImg2KernelIDs[Img.get()].reset(new std::vector); + for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; + ++EntriesIt) { - for (_pi_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; - ++EntriesIt) { + // Skip creating unique kernel ID if it is a service kernel. + // SYCL service kernels are identified by having + // __sycl_service_kernel__ in the mangled name, primarily as part of + // the namespace of the name type. + if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) { + m_ServiceKernels.insert(EntriesIt->name); + continue; + } - // Skip creating unique kernel ID if it is a service kernel. - // SYCL service kernels are identified by having - // __sycl_service_kernel__ in the mangled name, primarily as part of - // the namespace of the name type. - if (std::strstr(EntriesIt->name, "__sycl_service_kernel__")) { - m_ServiceKernels.insert(EntriesIt->name); - continue; - } + // Skip creating unique kernel ID if it is an exported device + // function. Exported device functions appear in the offload entries + // among kernels, but are identifiable by being listed in properties. + if (m_ExportedSymbols.find(EntriesIt->name) != m_ExportedSymbols.end()) + continue; - // Skip creating unique kernel ID if it is an exported device - // function. Exported device functions appear in the offload entries - // among kernels, but are identifiable by being listed in properties. - if (m_ExportedSymbols.find(EntriesIt->name) != - m_ExportedSymbols.end()) - continue; - - // ... and create a unique kernel ID for the entry - auto It = m_KernelName2KernelIDs.find(EntriesIt->name); - if(It == m_KernelName2KernelIDs.end()) { - std::shared_ptr KernelIDImpl = - std::make_shared(EntriesIt->name); - sycl::kernel_id KernelID = - detail::createSyclObjFromImpl(KernelIDImpl); - - It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, - KernelID); - } + // ... and create a unique kernel ID for the entry + auto It = m_KernelName2KernelIDs.find(EntriesIt->name); + if (It == m_KernelName2KernelIDs.end()) { + std::shared_ptr KernelIDImpl = + std::make_shared(EntriesIt->name); + sycl::kernel_id KernelID = + detail::createSyclObjFromImpl(KernelIDImpl); - m_KernelName2KernelIDs.insert( - std::make_pair(EntriesIt->name, It->second)); + It = m_KernelName2KernelIDs.emplace_hint(It, EntriesIt->name, + KernelID); + } - m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); - m_BinImg2KernelIDs[Img.get()]->push_back(It->second); + m_KernelName2KernelIDs.insert( + std::make_pair(EntriesIt->name, It->second)); - } + m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); + m_BinImg2KernelIDs[Img.get()]->push_back(It->second); + } // Sort kernel ids for faster search std::sort(m_BinImg2KernelIDs[Img.get()]->begin(), @@ -1389,7 +1385,6 @@ std::vector ProgramManager::getAllSYCLKernelIDs() { std::vector AllKernelIDs; AllKernelIDs.reserve(m_KernelName2KernelIDs.size()); - // TODO: Replace with inserts of vectors from m_BinImg2KernelIDs ? for (std::pair KernelID : m_KernelName2KernelIDs) { AllKernelIDs.push_back(KernelID.second); } @@ -1616,7 +1611,7 @@ ProgramManager::compile(const device_image_plain &DeviceImage, DeviceImageImplPtr ObjectImpl = std::make_shared( InputImpl->get_bin_image_ref(), InputImpl->get_context(), Devs, - bundle_state::object, InputImpl->get_kernel_ids_ref(), Prog, + bundle_state::object, InputImpl->get_kernel_ids_ptr(), Prog, InputImpl->get_spec_const_data_ref(), InputImpl->get_spec_const_blob_ref()); @@ -1683,9 +1678,10 @@ ProgramManager::link(const std::vector &DeviceImages, std::shared_ptr> KernelIDs{new std::vector}; for (const device_image_plain &DeviceImage : DeviceImages) { // Duplicates are not expected here, otherwise piProgramLink should fail - KernelIDs->insert(KernelIDs->end(), - getSyclObjImpl(DeviceImage)->get_kernel_ids_ref()->begin(), - getSyclObjImpl(DeviceImage)->get_kernel_ids_ref()->end()); + KernelIDs->insert( + KernelIDs->end(), + getSyclObjImpl(DeviceImage)->get_kernel_ids_ptr()->begin(), + getSyclObjImpl(DeviceImage)->get_kernel_ids_ptr()->end()); } // device_image_impl expects kernel ids to be sorted for fast search std::sort(KernelIDs->begin(), KernelIDs->end(), LessByHash{}); @@ -1857,7 +1853,7 @@ device_image_plain ProgramManager::build(const device_image_plain &DeviceImage, DeviceImageImplPtr ExecImpl = std::make_shared( InputImpl->get_bin_image_ref(), Context, Devs, bundle_state::executable, - InputImpl->get_kernel_ids_ref(), ResProgram, + InputImpl->get_kernel_ids_ptr(), ResProgram, InputImpl->get_spec_const_data_ref(), InputImpl->get_spec_const_blob_ref()); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index 99c01373e859d..3fb4ef6ab239a 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -315,7 +315,8 @@ class ProgramManager { // Maps KernelIDs to device binary images. There can be more than one image // in case of SPIRV + AOT. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap m_KernelIDs2BinImage; + std::unordered_multimap + m_KernelIDs2BinImage; // Maps device binary image to a vector of kernel ids in this image. // Using shared_ptr to avoid expensive copy of the vector. diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 18db9a87d8525..21f308883bbf2 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -218,8 +218,8 @@ bool has_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, const std::shared_ptr &DeviceImageImpl = getSyclObjImpl(DeviceImage); - CombinedKernelIDs.insert(DeviceImageImpl->get_kernel_ids_ref()->begin(), - DeviceImageImpl->get_kernel_ids_ref()->end()); + CombinedKernelIDs.insert(DeviceImageImpl->get_kernel_ids_ptr()->begin(), + DeviceImageImpl->get_kernel_ids_ptr()->end()); } const bool AllKernelIDsRepresented = From a2303351667e07766498e2d079882ea97045195a Mon Sep 17 00:00:00 2001 From: Vlad Romanov Date: Thu, 17 Feb 2022 16:13:36 +0300 Subject: [PATCH 4/4] Remove unnecesary line --- sycl/source/detail/program_manager/program_manager.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 67f2eb6c77443..f7659172dbc5e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1112,9 +1112,6 @@ void ProgramManager::addImages(pi_device_binaries DeviceBinary) { KernelID); } - m_KernelName2KernelIDs.insert( - std::make_pair(EntriesIt->name, It->second)); - m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); m_BinImg2KernelIDs[Img.get()]->push_back(It->second); }