Skip to content

[PGO][OpenMP] Instrumentation for GPU devices #76587

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 36 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
530eb98
Add profiling functions to libomptarget
EthanLuisMcDonough Dec 16, 2023
fb067d4
Fix PGO instrumentation for GPU targets
EthanLuisMcDonough Dec 16, 2023
7a0e0ef
Change global visibility on GPU targets
EthanLuisMcDonough Dec 21, 2023
fddc079
Make names global public on GPU
EthanLuisMcDonough Dec 23, 2023
e9db03c
Read and print GPU device PGO globals
EthanLuisMcDonough Dec 29, 2023
aa83bd2
Merge branch 'main' into gpuprof
EthanLuisMcDonough Dec 29, 2023
e468760
Fix rebase bug
EthanLuisMcDonough Jan 3, 2024
ec18ce9
Refactor portions to be more idiomatic
EthanLuisMcDonough Jan 3, 2024
0872556
Reformat DeviceRTL prof functions
EthanLuisMcDonough Jan 3, 2024
94f47f3
Merge branch 'main' into gpuprof
EthanLuisMcDonough Jan 3, 2024
62f31d1
Style changes + catch name error
EthanLuisMcDonough Jan 9, 2024
0c4bbeb
Add GPU PGO test
EthanLuisMcDonough Jan 18, 2024
c7ae2a7
Fix PGO test formatting
EthanLuisMcDonough Jan 18, 2024
9e66bfb
Merge branch 'main' into gpuprof
EthanLuisMcDonough Jan 19, 2024
8bb2207
Refactor visibility logic
EthanLuisMcDonough Jan 19, 2024
9f13943
Add LLVM instrumentation support
EthanLuisMcDonough Jan 24, 2024
b28d4a9
Merge branch 'main' into gpuprof
EthanLuisMcDonough Jan 24, 2024
23d7fe2
Merge branch 'main' into gpuprof
EthanLuisMcDonough Feb 14, 2024
0606f0d
Use explicit addrspace instead of unqual
EthanLuisMcDonough Feb 14, 2024
23f75b2
Merge branch 'main' into gpuprof
EthanLuisMcDonough Feb 15, 2024
c1f9be3
Remove redundant namespaces
EthanLuisMcDonough Feb 16, 2024
721dac6
Merge branch 'main' into gpuprof
EthanLuisMcDonough Feb 16, 2024
6a3ae40
Clang format
EthanLuisMcDonough Feb 16, 2024
6866862
Use getAddrSpaceCast
EthanLuisMcDonough Feb 16, 2024
052394f
Revert "Use getAddrSpaceCast"
EthanLuisMcDonough Feb 27, 2024
4be80e5
Merge branch 'main' into gpuprof
EthanLuisMcDonough May 7, 2024
f6a1545
Merge branch 'main' into gpuprof
EthanLuisMcDonough May 9, 2024
1dbde8e
Merge branch 'main' into gpuprof
EthanLuisMcDonough May 13, 2024
ed2a289
Merge branch 'main' into gpuprof_ptrcastfix
EthanLuisMcDonough May 13, 2024
aa895a1
Fix elf obj file
EthanLuisMcDonough Mar 19, 2024
2031e49
Add more addrspace casts for GPU targets
EthanLuisMcDonough May 7, 2024
2ba27e8
Merge branch 'main' into gpuprof
EthanLuisMcDonough May 21, 2024
9cddcf4
Merge branch 'main' into gpuprof
EthanLuisMcDonough Jun 1, 2024
344e357
Tighten PGO test requirements
EthanLuisMcDonough Jun 1, 2024
488cb4a
Apply requested formatting changes
EthanLuisMcDonough Jun 26, 2024
b90c015
Add memop function shim to DeviceRTL
EthanLuisMcDonough Jun 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions clang/lib/CodeGen/CodeGenPGO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1197,10 +1197,15 @@ void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S,

unsigned Counter = (*RegionCounterMap)[S];

llvm::Value *Args[] = {FuncNameVar,
Builder.getInt64(FunctionHash),
Builder.getInt32(NumRegionCounters),
Builder.getInt32(Counter), StepV};
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
auto *NormalizedFuncNameVarPtr =
llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0));

llvm::Value *Args[] = {
NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash),
Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV};

if (llvm::EnableSingleByteCoverage)
Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover),
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,9 @@ __OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
__OMP_RTL(__kmpc_syncwarp, false, Void, Int64)

__OMP_RTL(__llvm_profile_register_function, false, Void, VoidPtr)
__OMP_RTL(__llvm_profile_register_names_function, false, Void, VoidPtr, Int64)

__OMP_RTL(__last, false, Void, )

#undef __OMP_RTL
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/ProfileData/InstrProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ inline StringRef getInstrProfCounterBiasVarName() {
/// Return the marker used to separate PGO names during serialization.
inline StringRef getInstrProfNameSeparator() { return "\01"; }

/// Determines whether module targets a GPU eligable for PGO
/// instrumentation
bool isGPUProfTarget(const Module &M);

/// Please use getIRPGOFuncName for LLVM IR instrumentation. This function is
/// for front-end (Clang, etc) instrumentation.
/// Return the modified name for function \c F suitable to be
Expand Down
25 changes: 20 additions & 5 deletions llvm/lib/ProfileData/InstrProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,13 +432,31 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
return VarName;
}

bool isGPUProfTarget(const Module &M) {
const auto &T = Triple(M.getTargetTriple());
return T.isAMDGPU() || T.isNVPTX();
}

void setPGOFuncVisibility(Module &M, GlobalVariable *FuncNameVar) {
// If the target is a GPU, make the symbol protected so it can
// be read from the host device
if (isGPUProfTarget(M))
FuncNameVar->setVisibility(GlobalValue::ProtectedVisibility);
// Hide the symbol so that we correctly get a copy for each executable.
else if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
}

GlobalVariable *createPGOFuncNameVar(Module &M,
GlobalValue::LinkageTypes Linkage,
StringRef PGOFuncName) {
// Ensure profiling variables on GPU are visible to be read from host
if (isGPUProfTarget(M))
Linkage = GlobalValue::ExternalLinkage;
// We generally want to match the function's linkage, but available_externally
// and extern_weak both have the wrong semantics, and anything that doesn't
// need to link across compilation units doesn't need to be visible at all.
if (Linkage == GlobalValue::ExternalWeakLinkage)
else if (Linkage == GlobalValue::ExternalWeakLinkage)
Linkage = GlobalValue::LinkOnceAnyLinkage;
else if (Linkage == GlobalValue::AvailableExternallyLinkage)
Linkage = GlobalValue::LinkOnceODRLinkage;
Expand All @@ -452,10 +470,7 @@ GlobalVariable *createPGOFuncNameVar(Module &M,
new GlobalVariable(M, Value->getType(), true, Linkage, Value,
getPGOFuncNameVarName(PGOFuncName, Linkage));

// Hide the symbol so that we correctly get a copy for each executable.
if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);

setPGOFuncVisibility(M, FuncNameVar);
return FuncNameVar;
}

Expand Down
44 changes: 34 additions & 10 deletions llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,8 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
llvm::InstrProfValueKind::IPVK_MemOPSize);
CallInst *Call = nullptr;
auto *TLI = &GetTLI(*Ind->getFunction());
auto *NormalizedDataVarPtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
DataVar, PointerType::get(M.getContext(), 0));

// To support value profiling calls within Windows exception handlers, funclet
// information contained within operand bundles needs to be copied over to
Expand All @@ -877,11 +879,13 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
SmallVector<OperandBundleDef, 1> OpBundles;
Ind->getOperandBundlesAsDefs(OpBundles);
if (!IsMemOpSize) {
Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
Builder.getInt32(Index)};
Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
OpBundles);
} else {
Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
Value *Args[3] = {Ind->getTargetValue(), NormalizedDataVarPtr,
Builder.getInt32(Index)};
Call = Builder.CreateCall(
getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
Args, OpBundles);
Expand Down Expand Up @@ -1575,7 +1579,8 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
ValuesVar->setAlignment(Align(8));
maybeSetComdat(ValuesVar, Fn, CntsVarName);
ValuesPtrExpr = ValuesVar;
ValuesPtrExpr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
ValuesVar, PointerType::get(Fn->getContext(), 0));
}

uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
Expand All @@ -1599,6 +1604,10 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);

if (isGPUProfTarget(M)) {
Linkage = GlobalValue::ExternalLinkage;
Visibility = GlobalValue::ProtectedVisibility;
}
// If the data variable is not referenced by code (if we don't emit
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
// data variable live under linker GC, the data variable can be private. This
Expand All @@ -1610,9 +1619,9 @@ void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
// If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
// that other copies must have the same CFG and cannot have value profiling.
// If no hash suffix, other profd copies may be referenced by code.
if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
(TT.isOSBinFormatELF() ||
(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
else if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
(TT.isOSBinFormatELF() ||
(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
Linkage = GlobalValue::PrivateLinkage;
Visibility = GlobalValue::DefaultVisibility;
}
Expand Down Expand Up @@ -1735,6 +1744,13 @@ void InstrLowerer::emitNameData() {
NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
GlobalValue::PrivateLinkage, NamesVal,
getInstrProfNamesVarName());

// Make names variable public if current target is a GPU
if (isGPUProfTarget(M)) {
NamesVar->setLinkage(GlobalValue::ExternalLinkage);
NamesVar->setVisibility(GlobalValue::VisibilityTypes::ProtectedVisibility);
}

NamesSize = CompressedNameStr.size();
setGlobalVariableLargeSection(TT, *NamesVar);
NamesVar->setSection(
Expand Down Expand Up @@ -1801,10 +1817,13 @@ void InstrLowerer::emitRegistration() {
IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
for (Value *Data : CompilerUsedVars)
if (!isa<Function>(Data))
IRB.CreateCall(RuntimeRegisterF, Data);
// Check for addrspace cast when profiling GPU
IRB.CreateCall(RuntimeRegisterF,
IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));
for (Value *Data : UsedVars)
if (Data != NamesVar && !isa<Function>(Data))
IRB.CreateCall(RuntimeRegisterF, Data);
IRB.CreateCall(RuntimeRegisterF,
IRB.CreatePointerBitCastOrAddrSpaceCast(Data, VoidPtrTy));

if (NamesVar) {
Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
Expand All @@ -1813,7 +1832,9 @@ void InstrLowerer::emitRegistration() {
auto *NamesRegisterF =
Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
getInstrProfNamesRegFuncName(), M);
IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)});
IRB.CreateCall(NamesRegisterF, {IRB.CreatePointerBitCastOrAddrSpaceCast(
NamesVar, VoidPtrTy),
IRB.getInt64(NamesSize)});
}

IRB.CreateRetVoid();
Expand All @@ -1834,7 +1855,10 @@ bool InstrLowerer::emitRuntimeHook() {
auto *Var =
new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, getInstrProfRuntimeHookVarName());
Var->setVisibility(GlobalValue::HiddenVisibility);
if (isGPUProfTarget(M))
Var->setVisibility(GlobalValue::ProtectedVisibility);
else
Var->setVisibility(GlobalValue::HiddenVisibility);

if (TT.isOSBinFormatELF() && !TT.isPS()) {
// Mark the user variable as used so that it isn't stripped out.
Expand Down
24 changes: 18 additions & 6 deletions llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -882,14 +882,18 @@ static void instrumentOneFunc(
auto Name = FuncInfo.FuncNameVar;
auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
FuncInfo.FunctionHash);
// Make sure that pointer to global is passed in with zero addrspace
// This is relevant during GPU profiling
auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
Name, PointerType::get(M->getContext(), 0));
if (PGOFunctionEntryCoverage) {
auto &EntryBB = F.getEntryBlock();
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
// llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
// i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
{Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
{NormalizedNamePtr, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
return;
}

Expand Down Expand Up @@ -944,7 +948,8 @@ static void instrumentOneFunc(
// i32 <index>)
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
{NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
Builder.getInt32(I)});
I += PGOBlockCoverage ? 8 : 1;
}

Expand All @@ -958,7 +963,8 @@ static void instrumentOneFunc(
Intrinsic::getDeclaration(M, PGOBlockCoverage
? Intrinsic::instrprof_cover
: Intrinsic::instrprof_increment),
{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
{NormalizedNamePtr, CFGHash, Builder.getInt32(NumCounters),
Builder.getInt32(I++)});
}

// Now instrument select instructions:
Expand Down Expand Up @@ -1001,11 +1007,14 @@ static void instrumentOneFunc(
ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
assert(ToProfile && "value profiling Value is of unexpected type");

auto *NormalizedNamePtr = ConstantExpr::getPointerBitCastOrAddrSpaceCast(
Name, PointerType::get(M->getContext(), 0));

SmallVector<OperandBundleDef, 1> OpBundles;
populateEHOperandBundle(Cand, BlockColors, OpBundles);
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
{FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
{NormalizedNamePtr, Builder.getInt64(FuncInfo.FunctionHash),
ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
OpBundles);
}
Expand Down Expand Up @@ -1679,10 +1688,13 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
IRBuilder<> Builder(&SI);
Type *Int64Ty = Builder.getInt64Ty();
auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
auto *NormalizedFuncNameVarPtr =
ConstantExpr::getPointerBitCastOrAddrSpaceCast(
FuncNameVar, PointerType::get(M->getContext(), 0));
Builder.CreateCall(
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
{FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
Builder.getInt32(*CurCtrIdx), Step});
{NormalizedFuncNameVarPtr, Builder.getInt64(FuncHash),
Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
++(*CurCtrIdx);
}

Expand Down
2 changes: 2 additions & 0 deletions offload/DeviceRTL/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ set(include_files
${include_directory}/Interface.h
${include_directory}/LibC.h
${include_directory}/Mapping.h
${include_directory}/Profiling.h
${include_directory}/State.h
${include_directory}/Synchronization.h
${include_directory}/Types.h
Expand All @@ -92,6 +93,7 @@ set(src_files
${source_directory}/Mapping.cpp
${source_directory}/Misc.cpp
${source_directory}/Parallelism.cpp
${source_directory}/Profiling.cpp
${source_directory}/Reduction.cpp
${source_directory}/State.cpp
${source_directory}/Synchronization.cpp
Expand Down
21 changes: 21 additions & 0 deletions offload/DeviceRTL/include/Profiling.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-------- Profiling.h - OpenMP interface ---------------------- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//
//===----------------------------------------------------------------------===//

#ifndef OMPTARGET_DEVICERTL_PROFILING_H
#define OMPTARGET_DEVICERTL_PROFILING_H

extern "C" {
void __llvm_profile_register_function(void *Ptr);
void __llvm_profile_register_names_function(void *Ptr, long int I);
void __llvm_profile_instrument_memop(long int I, void *Ptr, int I2);
}

#endif
22 changes: 22 additions & 0 deletions offload/DeviceRTL/src/Profiling.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===------- Profiling.cpp ---------------------------------------- C++ ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "Profiling.h"

#pragma omp begin declare target device_type(nohost)

extern "C" {

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe provide a comment here or in the header why we need these empty functions.
Also, style (Ptr, and I)

// Provides empty implementations for certain functions in compiler-rt
// that are emitted by the PGO instrumentation.
void __llvm_profile_register_function(void *Ptr) {}
void __llvm_profile_register_names_function(void *Ptr, long int I) {}
void __llvm_profile_instrument_memop(long int I, void *Ptr, int I2) {}
}

#pragma omp end declare target
29 changes: 28 additions & 1 deletion offload/plugins-nextgen/common/include/GlobalHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H

#include <string>
#include <type_traits>

#include "llvm/ADT/DenseMap.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/ProfileData/InstrProf.h"

#include "Shared/Debug.h"
#include "Shared/Utils.h"
Expand Down Expand Up @@ -55,6 +56,23 @@ class GlobalTy {
void setPtr(void *P) { Ptr = P; }
};

using IntPtrT = void *;
struct __llvm_profile_data {
#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
std::remove_const<Type>::type Name;
#include "llvm/ProfileData/InstrProfData.inc"
};

/// PGO profiling data extracted from a GPU device
struct GPUProfGlobals {
SmallVector<uint8_t> NamesData;
SmallVector<SmallVector<int64_t>> Counts;
SmallVector<__llvm_profile_data> Data;
Triple TargetTriple;

void dump() const;
};

/// Subclass of GlobalTy that holds the memory for a global of \p Ty.
template <typename Ty> class StaticGlobalTy : public GlobalTy {
Ty Data;
Expand Down Expand Up @@ -164,6 +182,15 @@ class GenericGlobalHandlerTy {
return moveGlobalBetweenDeviceAndHost(Device, Image, HostGlobal,
/*D2H=*/false);
}

/// Checks whether a given image contains profiling globals.
bool hasProfilingGlobals(GenericDeviceTy &Device, DeviceImageTy &Image);

/// Reads profiling data from a GPU image to supplied profdata struct.
/// Iterates through the image symbol table and stores global values
/// with profiling prefixes.
Expected<GPUProfGlobals> readProfilingGlobals(GenericDeviceTy &Device,
DeviceImageTy &Image);
};

} // namespace plugin
Expand Down
Loading