Skip to content

Commit a39e56e

Browse files
committed
Change implementation of query cache hit counts
1 parent e4b9d01 commit a39e56e

File tree

2 files changed

+78
-23
lines changed

2 files changed

+78
-23
lines changed

compiler/rustc_data_structures/src/profiling.rs

Lines changed: 77 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,14 @@ use std::fmt::Display;
8888
use std::intrinsics::unlikely;
8989
use std::path::Path;
9090
use std::sync::Arc;
91+
use std::sync::atomic::{AtomicU64, Ordering};
9192
use std::time::{Duration, Instant};
9293
use std::{fs, process};
9394

9495
pub use measureme::EventId;
9596
use measureme::{EventIdBuilder, Profiler, SerializableString, StringId};
9697
use parking_lot::RwLock;
98+
use rustc_index::Idx;
9799
use smallvec::SmallVec;
98100
use tracing::warn;
99101

@@ -117,6 +119,7 @@ bitflags::bitflags! {
117119

118120
const DEFAULT = Self::GENERIC_ACTIVITIES.bits() |
119121
Self::QUERY_PROVIDERS.bits() |
122+
Self::QUERY_CACHE_HITS.bits() |
120123
Self::QUERY_BLOCKED.bits() |
121124
Self::INCR_CACHE_LOADS.bits() |
122125
Self::INCR_RESULT_HASHING.bits() |
@@ -145,8 +148,20 @@ const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[
145148
];
146149

147150
/// Something that uniquely identifies a query invocation.
151+
#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
148152
pub struct QueryInvocationId(pub u32);
149153

154+
impl Idx for QueryInvocationId {
155+
#[inline]
156+
fn new(idx: usize) -> Self {
157+
Self(idx as u32)
158+
}
159+
#[inline]
160+
fn index(self) -> usize {
161+
self.0 as usize
162+
}
163+
}
164+
150165
/// Which format to use for `-Z time-passes`
151166
#[derive(Clone, Copy, PartialEq, Hash, Debug)]
152167
pub enum TimePassesFormat {
@@ -411,10 +426,7 @@ impl SelfProfilerRef {
411426
#[inline(never)]
412427
#[cold]
413428
fn cold_call(profiler_ref: &SelfProfilerRef, query_invocation_id: QueryInvocationId) {
414-
profiler_ref.instant_query_event(
415-
|profiler| profiler.query_cache_hit_event_kind,
416-
query_invocation_id,
417-
);
429+
profiler_ref.profiler.as_ref().unwrap().increment_query_cache_hit(query_invocation_id);
418430
}
419431

420432
if unlikely(self.event_filter_mask.contains(EventFilter::QUERY_CACHE_HITS)) {
@@ -459,22 +471,6 @@ impl SelfProfilerRef {
459471
})
460472
}
461473

462-
#[inline(always)]
463-
fn instant_query_event(
464-
&self,
465-
event_kind: fn(&SelfProfiler) -> StringId,
466-
query_invocation_id: QueryInvocationId,
467-
) {
468-
let event_id = StringId::new_virtual(query_invocation_id.0);
469-
let thread_id = get_thread_id();
470-
let profiler = self.profiler.as_ref().unwrap();
471-
profiler.profiler.record_instant_event(
472-
event_kind(profiler),
473-
EventId::from_virtual(event_id),
474-
thread_id,
475-
);
476-
}
477-
478474
pub fn with_profiler(&self, f: impl FnOnce(&SelfProfiler)) {
479475
if let Some(profiler) = &self.profiler {
480476
f(profiler)
@@ -489,6 +485,30 @@ impl SelfProfilerRef {
489485
self.profiler.as_ref().map(|p| p.get_or_alloc_cached_string(s))
490486
}
491487

488+
/// Store query cache hits to the self-profile log.
489+
/// Should be called once at the end of the compilation session.
490+
///
491+
/// The cache hits are stored per **query invocation**, not **per query kind/type**.
492+
/// `analyzeme` can later deduplicate individual query labels from the QueryInvocationId event
493+
/// IDs.
494+
pub fn store_query_cache_hits(&self) {
495+
if self.event_filter_mask.contains(EventFilter::QUERY_CACHE_HITS) {
496+
let profiler = self.profiler.as_ref().unwrap();
497+
let query_hits = profiler.query_hits.read();
498+
let builder = EventIdBuilder::new(&profiler.profiler);
499+
let thread_id = get_thread_id();
500+
for (query_invocation, hit_count) in query_hits.iter().enumerate() {
501+
let event_id = builder.from_label(StringId::new_virtual(query_invocation as u64));
502+
profiler.profiler.record_integer_event(
503+
profiler.query_cache_hit_count_event_kind,
504+
event_id,
505+
thread_id,
506+
hit_count.load(Ordering::Relaxed),
507+
);
508+
}
509+
}
510+
}
511+
492512
#[inline]
493513
pub fn enabled(&self) -> bool {
494514
self.profiler.is_some()
@@ -537,13 +557,27 @@ pub struct SelfProfiler {
537557

538558
string_cache: RwLock<FxHashMap<String, StringId>>,
539559

560+
/// Recording individual query cache hits as "instant" measureme events
561+
/// is incredibly expensive. Instead of doing that, we simply aggregate
562+
/// cache hit *counts* per query invocation, and then store the final count
563+
/// of cache hits per invocation at the end of the compilation session.
564+
///
565+
/// With this approach, we don't know the individual thread IDs and timestamps
566+
/// of cache hits, but it has very little overhead on top of `-Zself-profile`.
567+
/// Recording the cache hits as individual events made compilation 3-5x slower.
568+
///
569+
/// Query invocation IDs should be monotonic integers, so we can store them in a vec,
570+
/// rather than using a hashmap.
571+
query_hits: RwLock<Vec<AtomicU64>>,
572+
540573
query_event_kind: StringId,
541574
generic_activity_event_kind: StringId,
542575
incremental_load_result_event_kind: StringId,
543576
incremental_result_hashing_event_kind: StringId,
544577
query_blocked_event_kind: StringId,
545-
query_cache_hit_event_kind: StringId,
546578
artifact_size_event_kind: StringId,
579+
/// Total cache hits per query invocation
580+
query_cache_hit_count_event_kind: StringId,
547581
}
548582

549583
impl SelfProfiler {
@@ -571,8 +605,8 @@ impl SelfProfiler {
571605
let incremental_result_hashing_event_kind =
572606
profiler.alloc_string("IncrementalResultHashing");
573607
let query_blocked_event_kind = profiler.alloc_string("QueryBlocked");
574-
let query_cache_hit_event_kind = profiler.alloc_string("QueryCacheHit");
575608
let artifact_size_event_kind = profiler.alloc_string("ArtifactSize");
609+
let query_cache_hit_count_event_kind = profiler.alloc_string("QueryCacheHitCount");
576610

577611
let mut event_filter_mask = EventFilter::empty();
578612

@@ -616,8 +650,9 @@ impl SelfProfiler {
616650
incremental_load_result_event_kind,
617651
incremental_result_hashing_event_kind,
618652
query_blocked_event_kind,
619-
query_cache_hit_event_kind,
620653
artifact_size_event_kind,
654+
query_cache_hit_count_event_kind,
655+
query_hits: Default::default(),
621656
})
622657
}
623658

@@ -627,6 +662,25 @@ impl SelfProfiler {
627662
self.profiler.alloc_string(s)
628663
}
629664

665+
/// Store a cache hit of a query invocation
666+
pub fn increment_query_cache_hit(&self, id: QueryInvocationId) {
667+
// Fast path: assume that the query was already encountered before, and just record
668+
// a cache hit.
669+
let mut guard = self.query_hits.upgradable_read();
670+
let query_hits = &guard;
671+
let index = id.0 as usize;
672+
if index < query_hits.len() {
673+
// We only want to increment the count, no other synchronization is required
674+
query_hits[index].fetch_add(1, Ordering::Relaxed);
675+
} else {
676+
// If not, we need to extend the query hit map to the highest observed ID
677+
guard.with_upgraded(|vec| {
678+
vec.resize_with(index + 1, || AtomicU64::new(0));
679+
vec[index] = AtomicU64::from(1);
680+
});
681+
}
682+
}
683+
630684
/// Gets a `StringId` for the given string. This method makes sure that
631685
/// any strings going through it will only be allocated once in the
632686
/// profiling data.

compiler/rustc_query_impl/src/profiling_support.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,4 +259,5 @@ pub fn alloc_self_profile_query_strings(tcx: TyCtxt<'_>) {
259259
for alloc in super::ALLOC_SELF_PROFILE_QUERY_STRINGS.iter() {
260260
alloc(tcx, &mut string_cache)
261261
}
262+
tcx.sess.prof.store_query_cache_hits();
262263
}

0 commit comments

Comments
 (0)