@@ -88,6 +88,7 @@ use std::fmt::Display;
88
88
use std:: intrinsics:: unlikely;
89
89
use std:: path:: Path ;
90
90
use std:: sync:: Arc ;
91
+ use std:: sync:: atomic:: { AtomicU64 , Ordering } ;
91
92
use std:: time:: { Duration , Instant } ;
92
93
use std:: { fs, process} ;
93
94
@@ -105,6 +106,8 @@ bitflags::bitflags! {
105
106
struct EventFilter : u16 {
106
107
const GENERIC_ACTIVITIES = 1 << 0 ;
107
108
const QUERY_PROVIDERS = 1 << 1 ;
109
+ /// Store detailed instant events, including timestamp and thread ID,
110
+ /// per each query cache hit. Note that this is quite expensive.
108
111
const QUERY_CACHE_HITS = 1 << 2 ;
109
112
const QUERY_BLOCKED = 1 << 3 ;
110
113
const INCR_CACHE_LOADS = 1 << 4 ;
@@ -113,16 +116,20 @@ bitflags::bitflags! {
113
116
const FUNCTION_ARGS = 1 << 6 ;
114
117
const LLVM = 1 << 7 ;
115
118
const INCR_RESULT_HASHING = 1 << 8 ;
116
- const ARTIFACT_SIZES = 1 << 9 ;
119
+ const ARTIFACT_SIZES = 1 << 9 ;
120
+ /// Store aggregated counts of cache hits per query invocation.
121
+ const QUERY_CACHE_HIT_COUNTS = 1 << 10 ;
117
122
118
123
const DEFAULT = Self :: GENERIC_ACTIVITIES . bits( ) |
119
124
Self :: QUERY_PROVIDERS . bits( ) |
120
125
Self :: QUERY_BLOCKED . bits( ) |
121
126
Self :: INCR_CACHE_LOADS . bits( ) |
122
127
Self :: INCR_RESULT_HASHING . bits( ) |
123
- Self :: ARTIFACT_SIZES . bits( ) ;
128
+ Self :: ARTIFACT_SIZES . bits( ) |
129
+ Self :: QUERY_CACHE_HIT_COUNTS . bits( ) ;
124
130
125
131
const ARGS = Self :: QUERY_KEYS . bits( ) | Self :: FUNCTION_ARGS . bits( ) ;
132
+ const QUERY_CACHE_HIT_COMBINED = Self :: QUERY_CACHE_HITS . bits( ) | Self :: QUERY_CACHE_HIT_COUNTS . bits( ) ;
126
133
}
127
134
}
128
135
@@ -134,6 +141,7 @@ const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[
134
141
( "generic-activity" , EventFilter :: GENERIC_ACTIVITIES ) ,
135
142
( "query-provider" , EventFilter :: QUERY_PROVIDERS ) ,
136
143
( "query-cache-hit" , EventFilter :: QUERY_CACHE_HITS ) ,
144
+ ( "query-cache-hit-count" , EventFilter :: QUERY_CACHE_HITS ) ,
137
145
( "query-blocked" , EventFilter :: QUERY_BLOCKED ) ,
138
146
( "incr-cache-load" , EventFilter :: INCR_CACHE_LOADS ) ,
139
147
( "query-keys" , EventFilter :: QUERY_KEYS ) ,
@@ -411,13 +419,24 @@ impl SelfProfilerRef {
411
419
#[ inline( never) ]
412
420
#[ cold]
413
421
fn cold_call ( profiler_ref : & SelfProfilerRef , query_invocation_id : QueryInvocationId ) {
414
- profiler_ref. instant_query_event (
415
- |profiler| profiler. query_cache_hit_event_kind ,
416
- query_invocation_id,
417
- ) ;
422
+ if profiler_ref. event_filter_mask . contains ( EventFilter :: QUERY_CACHE_HIT_COUNTS ) {
423
+ profiler_ref
424
+ . profiler
425
+ . as_ref ( )
426
+ . unwrap ( )
427
+ . increment_query_cache_hit_counters ( QueryInvocationId ( query_invocation_id. 0 ) ) ;
428
+ }
429
+ if unlikely ( profiler_ref. event_filter_mask . contains ( EventFilter :: QUERY_CACHE_HITS ) ) {
430
+ profiler_ref. instant_query_event (
431
+ |profiler| profiler. query_cache_hit_event_kind ,
432
+ query_invocation_id,
433
+ ) ;
434
+ }
418
435
}
419
436
420
- if unlikely ( self . event_filter_mask . contains ( EventFilter :: QUERY_CACHE_HITS ) ) {
437
+ // We check both kinds of query cache hit events at once, to reduce overhead in the
438
+ // common case (with self-profile disabled).
439
+ if unlikely ( self . event_filter_mask . intersects ( EventFilter :: QUERY_CACHE_HIT_COMBINED ) ) {
421
440
cold_call ( self , query_invocation_id) ;
422
441
}
423
442
}
@@ -489,6 +508,30 @@ impl SelfProfilerRef {
489
508
self . profiler . as_ref ( ) . map ( |p| p. get_or_alloc_cached_string ( s) )
490
509
}
491
510
511
+ /// Store query cache hits to the self-profile log.
512
+ /// Should be called once at the end of the compilation session.
513
+ ///
514
+ /// The cache hits are stored per **query invocation**, not **per query kind/type**.
515
+ /// `analyzeme` can later deduplicate individual query labels from the QueryInvocationId event
516
+ /// IDs.
517
+ pub fn store_query_cache_hits ( & self ) {
518
+ if self . event_filter_mask . contains ( EventFilter :: QUERY_CACHE_HIT_COUNTS ) {
519
+ let profiler = self . profiler . as_ref ( ) . unwrap ( ) ;
520
+ let query_hits = profiler. query_hits . read ( ) ;
521
+ let builder = EventIdBuilder :: new ( & profiler. profiler ) ;
522
+ let thread_id = get_thread_id ( ) ;
523
+ for ( query_invocation, hit_count) in query_hits. iter ( ) . enumerate ( ) {
524
+ let event_id = builder. from_label ( StringId :: new_virtual ( query_invocation as u64 ) ) ;
525
+ profiler. profiler . record_integer_event (
526
+ profiler. query_cache_hit_count_event_kind ,
527
+ event_id,
528
+ thread_id,
529
+ hit_count. load ( Ordering :: Relaxed ) ,
530
+ ) ;
531
+ }
532
+ }
533
+ }
534
+
492
535
#[ inline]
493
536
pub fn enabled ( & self ) -> bool {
494
537
self . profiler . is_some ( )
@@ -537,13 +580,28 @@ pub struct SelfProfiler {
537
580
538
581
string_cache : RwLock < FxHashMap < String , StringId > > ,
539
582
583
+ /// Recording individual query cache hits as "instant" measureme events
584
+ /// is incredibly expensive. Instead of doing that, we simply aggregate
585
+ /// cache hit *counts* per query invocation, and then store the final count
586
+ /// of cache hits per invocation at the end of the compilation session.
587
+ ///
588
+ /// With this approach, we don't know the individual thread IDs and timestamps
589
+ /// of cache hits, but it has very little overhead on top of `-Zself-profile`.
590
+ /// Recording the cache hits as individual events made compilation 3-5x slower.
591
+ ///
592
+ /// Query invocation IDs should be monotonic integers, so we can store them in a vec,
593
+ /// rather than using a hashmap.
594
+ query_hits : RwLock < Vec < AtomicU64 > > ,
595
+
540
596
query_event_kind : StringId ,
541
597
generic_activity_event_kind : StringId ,
542
598
incremental_load_result_event_kind : StringId ,
543
599
incremental_result_hashing_event_kind : StringId ,
544
600
query_blocked_event_kind : StringId ,
545
601
query_cache_hit_event_kind : StringId ,
546
602
artifact_size_event_kind : StringId ,
603
+ /// Total cache hits per query invocation
604
+ query_cache_hit_count_event_kind : StringId ,
547
605
}
548
606
549
607
impl SelfProfiler {
@@ -573,6 +631,7 @@ impl SelfProfiler {
573
631
let query_blocked_event_kind = profiler. alloc_string ( "QueryBlocked" ) ;
574
632
let query_cache_hit_event_kind = profiler. alloc_string ( "QueryCacheHit" ) ;
575
633
let artifact_size_event_kind = profiler. alloc_string ( "ArtifactSize" ) ;
634
+ let query_cache_hit_count_event_kind = profiler. alloc_string ( "QueryCacheHitCount" ) ;
576
635
577
636
let mut event_filter_mask = EventFilter :: empty ( ) ;
578
637
@@ -618,6 +677,8 @@ impl SelfProfiler {
618
677
query_blocked_event_kind,
619
678
query_cache_hit_event_kind,
620
679
artifact_size_event_kind,
680
+ query_cache_hit_count_event_kind,
681
+ query_hits : Default :: default ( ) ,
621
682
} )
622
683
}
623
684
@@ -627,6 +688,25 @@ impl SelfProfiler {
627
688
self . profiler . alloc_string ( s)
628
689
}
629
690
691
+ /// Store a cache hit of a query invocation
692
+ pub fn increment_query_cache_hit_counters ( & self , id : QueryInvocationId ) {
693
+ // Fast path: assume that the query was already encountered before, and just record
694
+ // a cache hit.
695
+ let mut guard = self . query_hits . upgradable_read ( ) ;
696
+ let query_hits = & guard;
697
+ let index = id. 0 as usize ;
698
+ if index < query_hits. len ( ) {
699
+ // We only want to increment the count, no other synchronization is required
700
+ query_hits[ index] . fetch_add ( 1 , Ordering :: Relaxed ) ;
701
+ } else {
702
+ // If not, we need to extend the query hit map to the highest observed ID
703
+ guard. with_upgraded ( |vec| {
704
+ vec. resize_with ( index + 1 , || AtomicU64 :: new ( 0 ) ) ;
705
+ vec[ index] = AtomicU64 :: from ( 1 ) ;
706
+ } ) ;
707
+ }
708
+ }
709
+
630
710
/// Gets a `StringId` for the given string. This method makes sure that
631
711
/// any strings going through it will only be allocated once in the
632
712
/// profiling data.
0 commit comments