-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[MemProf] Add option to hint allocations at a given cold byte percentage #120301
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MemProf] Add option to hint allocations at a given cold byte percentage #120301
Conversation
Optionally unconditionally hint allocations as cold or not cold during the matching step if the percentage of bytes allocated is at least that of the given threshold.
@llvm/pr-subscribers-llvm-transforms Author: Teresa Johnson (teresajohnson) ChangesOptionally unconditionally hint allocations as cold or not cold during Full diff: https://github.com/llvm/llvm-project/pull/120301.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index c980869a1c0d82..497fe4b7594ea7 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -173,6 +173,10 @@ static cl::opt<std::string>
extern cl::opt<bool> MemProfReportHintedSizes;
+static cl::opt<unsigned> MinMatchedColdBytePercent(
+ "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
+ cl::desc("Min percent of cold bytes matched to hint allocation cold"));
+
// Instrumentation statistics
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
@@ -1074,6 +1078,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
// contexts. Add them to a Trie specialized for trimming the contexts to
// the minimal needed to disambiguate contexts with unique behavior.
CallStackTrie AllocTrie;
+ uint64_t TotalSize = 0;
+ uint64_t TotalColdSize = 0;
for (auto *AllocInfo : AllocInfoIter->second) {
// Check the full inlined call stack against this one.
// If we found and thus matched all frames on the call, include
@@ -1085,6 +1091,9 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes)
FullStackId = computeFullStackId(AllocInfo->CallStack);
auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
+ TotalSize += AllocInfo->Info.getTotalSize();
+ if (AllocType == AllocationType::Cold)
+ TotalColdSize += AllocInfo->Info.getTotalSize();
// Record information about the allocation if match info printing
// was requested.
if (ClPrintMemProfMatchInfo) {
@@ -1094,6 +1103,16 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
}
}
}
+ // If the threshold for the percent of cold bytes is less than 100%,
+ // and not all bytes are cold, see if we should still hint this
+ // allocation as cold without context sensitivity.
+ if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
+ TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
+ AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold,
+ "dominant");
+ continue;
+ }
+
// We might not have matched any to the full inlined call stack.
// But if we did, create and attach metadata, or a function attribute if
// all contexts have identical profiled behavior.
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index f1b361de0fbba5..7e47c8ded4e4a7 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -64,7 +64,10 @@
; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO
;; Check that the total sizes are reported if requested.
-; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZES
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZES
+
+;; Check that we hint additional allocations with a threshold < 100%
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes -memprof-matching-cold-threshold=60 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZESTHRESH60
;; Make sure we emit a random hotness seed if requested.
; RUN: llvm-profdata merge -memprof-random-hotness %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand 2>&1 | FileCheck %s --check-prefix=RAND
@@ -348,8 +351,14 @@ for.end: ; preds = %for.cond
;; For non-context sensitive allocations that get attributes we emit a message
;; with the full allocation context hash, type, and size in bytes.
-; TOTALSIZES: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10
-; TOTALSIZES: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 8525406123785421946 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 11714230664165068698 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 5725971306423925017 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 16342802530253093571 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 18254812774972004394 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 1093248920606587996 and dominant alloc type cold: 10
+; TOTALSIZESSINGLE: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10
+; TOTALSIZESSINGLE: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10
;; For context sensitive allocations the full context hash and size in bytes
;; are in separate metadata nodes included on the MIB metadata.
; TOTALSIZES: !"cold", ![[CONTEXT1:[0-9]+]]}
|
@llvm/pr-subscribers-pgo Author: Teresa Johnson (teresajohnson) ChangesOptionally unconditionally hint allocations as cold or not cold during Full diff: https://github.com/llvm/llvm-project/pull/120301.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index c980869a1c0d82..497fe4b7594ea7 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -173,6 +173,10 @@ static cl::opt<std::string>
extern cl::opt<bool> MemProfReportHintedSizes;
+static cl::opt<unsigned> MinMatchedColdBytePercent(
+ "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
+ cl::desc("Min percent of cold bytes matched to hint allocation cold"));
+
// Instrumentation statistics
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
@@ -1074,6 +1078,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
// contexts. Add them to a Trie specialized for trimming the contexts to
// the minimal needed to disambiguate contexts with unique behavior.
CallStackTrie AllocTrie;
+ uint64_t TotalSize = 0;
+ uint64_t TotalColdSize = 0;
for (auto *AllocInfo : AllocInfoIter->second) {
// Check the full inlined call stack against this one.
// If we found and thus matched all frames on the call, include
@@ -1085,6 +1091,9 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes)
FullStackId = computeFullStackId(AllocInfo->CallStack);
auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
+ TotalSize += AllocInfo->Info.getTotalSize();
+ if (AllocType == AllocationType::Cold)
+ TotalColdSize += AllocInfo->Info.getTotalSize();
// Record information about the allocation if match info printing
// was requested.
if (ClPrintMemProfMatchInfo) {
@@ -1094,6 +1103,16 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
}
}
}
+ // If the threshold for the percent of cold bytes is less than 100%,
+ // and not all bytes are cold, see if we should still hint this
+ // allocation as cold without context sensitivity.
+ if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
+ TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
+ AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold,
+ "dominant");
+ continue;
+ }
+
// We might not have matched any to the full inlined call stack.
// But if we did, create and attach metadata, or a function attribute if
// all contexts have identical profiled behavior.
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index f1b361de0fbba5..7e47c8ded4e4a7 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -64,7 +64,10 @@
; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO
;; Check that the total sizes are reported if requested.
-; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZES
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZES
+
+;; Check that we hint additional allocations with a threshold < 100%
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes -memprof-matching-cold-threshold=60 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZESTHRESH60
;; Make sure we emit a random hotness seed if requested.
; RUN: llvm-profdata merge -memprof-random-hotness %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand 2>&1 | FileCheck %s --check-prefix=RAND
@@ -348,8 +351,14 @@ for.end: ; preds = %for.cond
;; For non-context sensitive allocations that get attributes we emit a message
;; with the full allocation context hash, type, and size in bytes.
-; TOTALSIZES: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10
-; TOTALSIZES: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 8525406123785421946 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 11714230664165068698 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 5725971306423925017 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 16342802530253093571 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 18254812774972004394 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for full allocation context hash 1093248920606587996 and dominant alloc type cold: 10
+; TOTALSIZESSINGLE: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10
+; TOTALSIZESSINGLE: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10
;; For context sensitive allocations the full context hash and size in bytes
;; are in separate metadata nodes included on the MIB metadata.
; TOTALSIZES: !"cold", ![[CONTEXT1:[0-9]+]]}
|
// allocation as cold without context sensitivity. | ||
if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 && | ||
TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) { | ||
AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we account for this in FullStackIdToAllocMatchInfo
above so that we can print more accurate statistics?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All of the contexts matched and in the trie are already entered into this map above, which only is used to report whether contexts were matched, not how they were ultimately hinted. If hint reporting is enabled, addSingleAllocTypeAttribute called here will report the hinting as cold.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
Optionally unconditionally hint allocations as cold or not cold during
the matching step if the percentage of bytes allocated is at least that
of the given threshold.