From 63a2c7640c14d4473207dce32b8eb073c80ec3e9 Mon Sep 17 00:00:00 2001 From: Augusto Noronha Date: Fri, 12 Aug 2022 09:37:02 -0700 Subject: [PATCH] Introduce ExternalTypeRefCache LLDB would like to cache typeref information to accelerate finding type information. This patch adds an optional interface that allows of typeref to register and provider field descriptor information for faster lookups. --- include/swift/Reflection/ReflectionContext.h | 63 ++++++++------- include/swift/Reflection/TypeRefBuilder.h | 22 +++++- include/swift/Remote/ExternalTypeRefCache.h | 75 ++++++++++++++++++ stdlib/public/Reflection/TypeRefBuilder.cpp | 79 ++++++++++++++++--- .../SwiftRemoteMirror/SwiftRemoteMirror.cpp | 2 +- 5 files changed, 197 insertions(+), 44 deletions(-) create mode 100644 include/swift/Remote/ExternalTypeRefCache.h diff --git a/include/swift/Reflection/ReflectionContext.h b/include/swift/Reflection/ReflectionContext.h index 125a42029e705..77a32127d0ddb 100644 --- a/include/swift/Reflection/ReflectionContext.h +++ b/include/swift/Reflection/ReflectionContext.h @@ -211,9 +211,10 @@ class ReflectionContext uint32_t ThreadPort; }; - explicit ReflectionContext(std::shared_ptr reader) - : super(std::move(reader), *this) - {} + explicit ReflectionContext( + std::shared_ptr reader, + remote::ExternalTypeRefCache *externalCache = nullptr) + : super(std::move(reader), *this, externalCache) {} ReflectionContext(const ReflectionContext &other) = delete; ReflectionContext &operator=(const ReflectionContext &other) = delete; @@ -227,8 +228,10 @@ class ReflectionContext return sizeof(StoredPointer) * 2; } + /// On success returns the ID of the newly registered Reflection Info. template - bool readMachOSections( + llvm::Optional + readMachOSections( RemoteAddress ImageStart, llvm::SmallVector PotentialModuleNames = {}) { auto Buf = @@ -350,7 +353,7 @@ class ReflectionContext {MPEnumMdSec.first, MPEnumMdSec.second}, PotentialModuleNames}; - this->addReflectionInfo(info); + auto InfoID = this->addReflectionInfo(info); // Find the __DATA segment. for (unsigned I = 0; I < NumCommands; ++I) { @@ -374,10 +377,11 @@ class ReflectionContext savedBuffers.push_back(std::move(Buf)); savedBuffers.push_back(std::move(Sections)); - return true; + return InfoID; } - bool readPECOFFSections( + /// On success returns the ID of the newly registered Reflection Info. + llvm::Optional readPECOFFSections( RemoteAddress ImageStart, llvm::SmallVector PotentialModuleNames = {}) { auto DOSHdrBuf = this->getReader().readBytes( @@ -477,11 +481,11 @@ class ReflectionContext {ConformMdSec.first, ConformMdSec.second}, {MPEnumMdSec.first, MPEnumMdSec.second}, PotentialModuleNames}; - this->addReflectionInfo(Info); - return true; + return this->addReflectionInfo(Info); } - bool readPECOFF(RemoteAddress ImageStart, + /// On success returns the ID of the newly registered Reflection Info. + llvm::Optional readPECOFF(RemoteAddress ImageStart, llvm::SmallVector PotentialModuleNames = {}) { auto Buf = this->getReader().readBytes(ImageStart, sizeof(llvm::object::dos_header)); @@ -504,8 +508,9 @@ class ReflectionContext return readPECOFFSections(ImageStart, PotentialModuleNames); } + /// On success returns the ID of the newly registered Reflection Info. template - bool readELFSections( + llvm::Optional readELFSections( RemoteAddress ImageStart, llvm::Optional FileBuffer, llvm::SmallVector PotentialModuleNames = {}) { @@ -673,8 +678,7 @@ class ReflectionContext {MPEnumMdSec.first, MPEnumMdSec.second}, PotentialModuleNames}; - this->addReflectionInfo(info); - return true; + return this->addReflectionInfo(info); } /// Parses metadata information from an ELF image. Because the Section @@ -693,22 +697,22 @@ class ReflectionContext /// instance's memory reader. /// /// \return - /// /b True if the metadata information was parsed successfully, - /// /b false otherwise. - bool + /// \b The newly added reflection info ID if successful, + /// \b llvm::None otherwise. + llvm::Optional readELF(RemoteAddress ImageStart, llvm::Optional FileBuffer, llvm::SmallVector PotentialModuleNames = {}) { auto Buf = this->getReader().readBytes(ImageStart, sizeof(llvm::ELF::Elf64_Ehdr)); if (!Buf) - return false; + return llvm::None; // Read the header. auto Hdr = reinterpret_cast(Buf.get()); if (!Hdr->checkMagic()) - return false; + return llvm::None; // Check if we have a ELFCLASS32 or ELFCLASS64 unsigned char FileClass = Hdr->getFileClass(); @@ -719,11 +723,12 @@ class ReflectionContext return readELFSections>( ImageStart, FileBuffer, PotentialModuleNames); } else { - return false; + return llvm::None; } } - bool + /// On success returns the ID of the newly registered Reflection Info. + llvm::Optional addImage(RemoteAddress ImageStart, llvm::SmallVector PotentialModuleNames = {}) { // Read the first few bytes to look for a magic header. @@ -761,7 +766,7 @@ class ReflectionContext } // We don't recognize the format. - return false; + return llvm::None; } /// Adds an image using the FindSection closure to find the swift metadata @@ -770,9 +775,9 @@ class ReflectionContext /// of freeing the memory buffer in the RemoteRef return value. /// process. /// \return - /// \b True if any of the reflection sections were registered, - /// \b false otherwise. - bool + /// \b The newly added reflection info ID if successful, + /// \b llvm::None otherwise. + llvm::Optional addImage(llvm::function_ref< std::pair, uint64_t>(ReflectionSectionKind)> FindSection, @@ -798,7 +803,7 @@ class ReflectionContext // If we didn't find any sections, return. if (llvm::all_of(Pairs, [](const auto &Pair) { return !Pair.first; })) - return false; + return {}; ReflectionInfo Info = {{Pairs[0].first, Pairs[0].second}, {Pairs[1].first, Pairs[1].second}, @@ -809,12 +814,12 @@ class ReflectionContext {Pairs[6].first, Pairs[6].second}, {Pairs[7].first, Pairs[7].second}, PotentialModuleNames}; - this->addReflectionInfo(Info); - return true; + return addReflectionInfo(Info); } - void addReflectionInfo(ReflectionInfo I) { - getBuilder().addReflectionInfo(I); + /// Adds the reflection info and returns it's id. + uint32_t addReflectionInfo(ReflectionInfo I) { + return getBuilder().addReflectionInfo(I); } bool ownsObject(RemoteAddress ObjectAddress) { diff --git a/include/swift/Reflection/TypeRefBuilder.h b/include/swift/Reflection/TypeRefBuilder.h index 83731f7a802bd..33ddfbb8ed5e8 100644 --- a/include/swift/Reflection/TypeRefBuilder.h +++ b/include/swift/Reflection/TypeRefBuilder.h @@ -18,6 +18,7 @@ #ifndef SWIFT_REFLECTION_TYPEREFBUILDER_H #define SWIFT_REFLECTION_TYPEREFBUILDER_H +#include "swift/Remote/ExternalTypeRefCache.h" #include "swift/Remote/MetadataReader.h" #include "swift/Reflection/MetadataSourceBuilder.h" #include "swift/Reflection/Records.h" @@ -426,6 +427,8 @@ class TypeRefBuilder { TypeConverter TC; MetadataSourceBuilder MSB; + remote::ExternalTypeRefCache *ExternalTypeRefCache = nullptr; + #define TYPEREF(Id, Parent) \ std::unordered_map Id##TypeRefs; @@ -915,10 +918,17 @@ class TypeRefBuilder { /// Parsing reflection metadata /// - void addReflectionInfo(ReflectionInfo I) { + /// Add the ReflectionInfo and return a unique ID for the reflection image + /// added. Since we only add reflection infos, the ID can be its index. + /// We return a uint32_t since it's extremely unlikely we'll run out of + /// indexes. + uint32_t addReflectionInfo(ReflectionInfo I) { ReflectionInfos.push_back(I); + auto InfoID = ReflectionInfos.size() - 1; + assert(InfoID <= UINT32_MAX && "ReflectionInfo ID overflow"); + return InfoID; } - + const std::vector &getReflectionInfos() { return ReflectionInfos; } @@ -943,6 +953,9 @@ class TypeRefBuilder { llvm::Optional> findFieldDescriptorAtIndex(size_t Index, const std::string &MangledName); + llvm::Optional> + getFieldDescriptorFromExternalCache(const std::string &MangledName); + public: RemoteRef readTypeRef(uint64_t remoteAddr); @@ -983,8 +996,9 @@ class TypeRefBuilder { public: template - TypeRefBuilder(remote::MetadataReader &reader) - : TC(*this), + TypeRefBuilder(remote::MetadataReader &reader, + remote::ExternalTypeRefCache *externalCache = nullptr) + : TC(*this), ExternalTypeRefCache(externalCache), PointerSize(sizeof(typename Runtime::StoredPointer)), TypeRefDemangler( [this, &reader](RemoteRef string, bool useOpaqueTypeSymbolicReferences) -> Demangle::Node * { diff --git a/include/swift/Remote/ExternalTypeRefCache.h b/include/swift/Remote/ExternalTypeRefCache.h new file mode 100644 index 0000000000000..8ac79f136a3e6 --- /dev/null +++ b/include/swift/Remote/ExternalTypeRefCache.h @@ -0,0 +1,75 @@ +//===--- ExternalTypeRefCache.h - Abstract access to external caches of +//typeref ------*- C++ -*-===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===----------------------------------------------------------------------===// +// +/// @file +/// This file declares an abstract interface for external caches of +/// typeref information. +// +//===----------------------------------------------------------------------===// + +#ifndef SWIFT_REMOTE_EXTERNALTYPEREFCACHE_H +#define SWIFT_REMOTE_EXTERNALTYPEREFCACHE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Optional.h" + +#include + +namespace swift { +namespace reflection { + +template +class ReflectionSection; +class FieldDescriptorIterator; +using FieldSection = ReflectionSection; +} + +namespace remote { +/// A struct with the information required to locate a specific field +/// descriptor. +struct FieldDescriptorLocator { + /// The reflection info ID the field descriptor belongs to. + uint64_t InfoID; + + /// The offset of the field descriptor in the FieldSection buffer. + uint64_t Offset; +}; + +/// An abstract interface for providing external type layout information. +struct ExternalTypeRefCache { + virtual ~ExternalTypeRefCache() = default; + + /// Cache the field descriptors of a reflection info with a given id with + /// their corresponding mangled names. The amount of field descriptors and + /// mangled names must be the same. If a field descriptor does not have a + /// mangled name a corresponding empty string must be in the mangled_names + /// array. + virtual void + cacheFieldDescriptors(uint64_t InfoID, + const swift::reflection::FieldSection &FieldDescriptors, + llvm::ArrayRef MangledNames) = 0; + + /// Retrieve a pair representing the reflection info id and the offset of a + /// field descriptor in the field section buffer, if available. + virtual llvm::Optional + getFieldDescriptorLocator(const std::string &Name) = 0; + + /// Returns whether the reflection info with the corresponding ID has been + /// cached already. + virtual bool isReflectionInfoCached(uint64_t InfoID) = 0; +}; + +} // namespace remote +} // namespace swift +#endif diff --git a/stdlib/public/Reflection/TypeRefBuilder.cpp b/stdlib/public/Reflection/TypeRefBuilder.cpp index 1e18379972426..ad376c92878de 100644 --- a/stdlib/public/Reflection/TypeRefBuilder.cpp +++ b/stdlib/public/Reflection/TypeRefBuilder.cpp @@ -221,22 +221,32 @@ void TypeRefBuilder::populateFieldTypeInfoCacheWithReflectionAtIndex( if (ProcessedReflectionInfoIndexes.contains(Index)) return; + llvm::SmallVector Names; const auto &Info = ReflectionInfos[Index]; for (auto FD : Info.Field) { - if (!FD->hasMangledTypeName()) - continue; - auto CandidateMangledName = readTypeRef(FD, FD->MangledTypeName); - if (auto NormalizedName = normalizeReflectionName(CandidateMangledName)) { - FieldTypeInfoCache[std::move(*NormalizedName)] = FD; + if (FD->hasMangledTypeName()) { + auto CandidateMangledName = readTypeRef(FD, FD->MangledTypeName); + if (auto NormalizedName = normalizeReflectionName(CandidateMangledName)) { + if (ExternalTypeRefCache) + Names.push_back(*NormalizedName); + FieldTypeInfoCache[std::move(*NormalizedName)] = FD; + } + } else if (ExternalTypeRefCache) { + // Mark the lack of a mangled name for this field descriptor with an empty + // string. + Names.push_back(""); } } + if (ExternalTypeRefCache) + ExternalTypeRefCache->cacheFieldDescriptors(Index, Info.Field, Names); + ProcessedReflectionInfoIndexes.insert(Index); } llvm::Optional> -TypeRefBuilder::findFieldDescriptorAtIndex(size_t Index, - const std::string &MangledName) { +TypeRefBuilder::findFieldDescriptorAtIndex( + size_t Index, const std::string &MangledName) { populateFieldTypeInfoCacheWithReflectionAtIndex(Index); auto Found = FieldTypeInfoCache.find(MangledName); if (Found != FieldTypeInfoCache.end()) { @@ -245,6 +255,41 @@ TypeRefBuilder::findFieldDescriptorAtIndex(size_t Index, return llvm::None; } +llvm::Optional> +TypeRefBuilder::getFieldDescriptorFromExternalCache( + const std::string &MangledName) { + if (!ExternalTypeRefCache) + return llvm::None; + + if (auto Locator = ExternalTypeRefCache->getFieldDescriptorLocator(MangledName)) { + if (Locator->InfoID >= ReflectionInfos.size()) + return llvm::None; + + auto &Field = ReflectionInfos[Locator->InfoID].Field; + auto Addr = Field.startAddress().getAddressData() + Locator->Offset; + + // Validate that we've got the correct field descriptor offset by parsing + // the mangled name for that specific offset and making sure it's the one + // we're looking for. + for (auto FD : Field) { + if (FD.getAddressData() == Addr) { + if (!FD->hasMangledTypeName()) + break; + auto CandidateMangledName = readTypeRef(FD, FD->MangledTypeName); + if (auto NormalizedName = + normalizeReflectionName(CandidateMangledName)) { + FieldTypeInfoCache[std::move(*NormalizedName)] = FD; + break; + } + } + } + auto Found = FieldTypeInfoCache.find(MangledName); + if (Found != FieldTypeInfoCache.end()) + return Found->second; + } + return llvm::None; +} + RemoteRef TypeRefBuilder::getFieldTypeInfo(const TypeRef *TR) { const std::string *MangledName; NodePointer Node; @@ -263,23 +308,37 @@ RemoteRef TypeRefBuilder::getFieldTypeInfo(const TypeRef *TR) { if (Found != FieldTypeInfoCache.end()) return Found->second; + if (auto FD = getFieldDescriptorFromExternalCache(*MangledName)) + return *FD; + // Heuristic: find the outermost Module node available, and try to parse the // ReflectionInfos with a matching name first. auto ModuleName = FindOutermostModuleName(Node); // If we couldn't find a module name or the type is imported (__C module) we // don't any useful information on which image to look for the type. - if (ModuleName && ModuleName != llvm::StringRef("__C")) - for (size_t i = 0; i < ReflectionInfos.size(); ++i) + if (ModuleName && !ModuleName->equals("__C")) { + for (size_t i = 0; i < ReflectionInfos.size(); ++i) { + // If the external cache already has the contents of this reflection info, + // and the previous lookup in the cache failed, then the field descriptor + // we're looking for isn't in this reflection info. + if (ExternalTypeRefCache && + ExternalTypeRefCache->isReflectionInfoCached(i)) + continue; if (llvm::is_contained(ReflectionInfos[i].PotentialModuleNames, ModuleName)) if (auto FD = findFieldDescriptorAtIndex(i, *MangledName)) return *FD; + } + } // On failure, fill out the cache, ReflectionInfo by ReflectionInfo, // until we find the field descriptor we're looking for. - for (size_t i = 0; i < ReflectionInfos.size(); ++i) + for (size_t i = 0; i < ReflectionInfos.size(); ++i) { + if (ExternalTypeRefCache && ExternalTypeRefCache->isReflectionInfoCached(i)) + continue; if (auto FD = findFieldDescriptorAtIndex(i, *MangledName)) return *FD; + } return nullptr; } diff --git a/stdlib/public/SwiftRemoteMirror/SwiftRemoteMirror.cpp b/stdlib/public/SwiftRemoteMirror/SwiftRemoteMirror.cpp index 6eb1577f2f602..64d61e2f620e6 100644 --- a/stdlib/public/SwiftRemoteMirror/SwiftRemoteMirror.cpp +++ b/stdlib/public/SwiftRemoteMirror/SwiftRemoteMirror.cpp @@ -278,7 +278,7 @@ int swift_reflection_addImage(SwiftReflectionContextRef ContextRef, swift_addr_t imageStart) { auto Context = ContextRef->nativeContext; - return Context->addImage(RemoteAddress(imageStart)); + return Context->addImage(RemoteAddress(imageStart)).hasValue(); } int