@@ -88,12 +88,14 @@ use std::fmt::Display;
88
88
use std:: intrinsics:: unlikely;
89
89
use std:: path:: Path ;
90
90
use std:: sync:: Arc ;
91
+ use std:: sync:: atomic:: { AtomicU64 , Ordering } ;
91
92
use std:: time:: { Duration , Instant } ;
92
93
use std:: { fs, process} ;
93
94
94
95
pub use measureme:: EventId ;
95
96
use measureme:: { EventIdBuilder , Profiler , SerializableString , StringId } ;
96
97
use parking_lot:: RwLock ;
98
+ use rustc_index:: Idx ;
97
99
use smallvec:: SmallVec ;
98
100
use tracing:: warn;
99
101
@@ -117,6 +119,7 @@ bitflags::bitflags! {
117
119
118
120
const DEFAULT = Self :: GENERIC_ACTIVITIES . bits( ) |
119
121
Self :: QUERY_PROVIDERS . bits( ) |
122
+ Self :: QUERY_CACHE_HITS . bits( ) |
120
123
Self :: QUERY_BLOCKED . bits( ) |
121
124
Self :: INCR_CACHE_LOADS . bits( ) |
122
125
Self :: INCR_RESULT_HASHING . bits( ) |
@@ -145,8 +148,20 @@ const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[
145
148
] ;
146
149
147
150
/// Something that uniquely identifies a query invocation.
151
+ #[ derive( PartialEq , Eq , Hash , Copy , Clone , Debug ) ]
148
152
pub struct QueryInvocationId ( pub u32 ) ;
149
153
154
+ impl Idx for QueryInvocationId {
155
+ #[ inline]
156
+ fn new ( idx : usize ) -> Self {
157
+ Self ( idx as u32 )
158
+ }
159
+ #[ inline]
160
+ fn index ( self ) -> usize {
161
+ self . 0 as usize
162
+ }
163
+ }
164
+
150
165
/// Which format to use for `-Z time-passes`
151
166
#[ derive( Clone , Copy , PartialEq , Hash , Debug ) ]
152
167
pub enum TimePassesFormat {
@@ -411,10 +426,7 @@ impl SelfProfilerRef {
411
426
#[ inline( never) ]
412
427
#[ cold]
413
428
fn cold_call ( profiler_ref : & SelfProfilerRef , query_invocation_id : QueryInvocationId ) {
414
- profiler_ref. instant_query_event (
415
- |profiler| profiler. query_cache_hit_event_kind ,
416
- query_invocation_id,
417
- ) ;
429
+ profiler_ref. profiler . as_ref ( ) . unwrap ( ) . increment_query_cache_hit ( query_invocation_id) ;
418
430
}
419
431
420
432
if unlikely ( self . event_filter_mask . contains ( EventFilter :: QUERY_CACHE_HITS ) ) {
@@ -459,22 +471,6 @@ impl SelfProfilerRef {
459
471
} )
460
472
}
461
473
462
- #[ inline( always) ]
463
- fn instant_query_event (
464
- & self ,
465
- event_kind : fn ( & SelfProfiler ) -> StringId ,
466
- query_invocation_id : QueryInvocationId ,
467
- ) {
468
- let event_id = StringId :: new_virtual ( query_invocation_id. 0 ) ;
469
- let thread_id = get_thread_id ( ) ;
470
- let profiler = self . profiler . as_ref ( ) . unwrap ( ) ;
471
- profiler. profiler . record_instant_event (
472
- event_kind ( profiler) ,
473
- EventId :: from_virtual ( event_id) ,
474
- thread_id,
475
- ) ;
476
- }
477
-
478
474
pub fn with_profiler ( & self , f : impl FnOnce ( & SelfProfiler ) ) {
479
475
if let Some ( profiler) = & self . profiler {
480
476
f ( profiler)
@@ -489,6 +485,30 @@ impl SelfProfilerRef {
489
485
self . profiler . as_ref ( ) . map ( |p| p. get_or_alloc_cached_string ( s) )
490
486
}
491
487
488
+ /// Store query cache hits to the self-profile log.
489
+ /// Should be called once at the end of the compilation session.
490
+ ///
491
+ /// The cache hits are stored per **query invocation**, not **per query kind/type**.
492
+ /// `analyzeme` can later deduplicate individual query labels from the QueryInvocationId event
493
+ /// IDs.
494
+ pub fn store_query_cache_hits ( & self ) {
495
+ if self . event_filter_mask . contains ( EventFilter :: QUERY_CACHE_HITS ) {
496
+ let profiler = self . profiler . as_ref ( ) . unwrap ( ) ;
497
+ let query_hits = profiler. query_hits . read ( ) ;
498
+ let builder = EventIdBuilder :: new ( & profiler. profiler ) ;
499
+ let thread_id = get_thread_id ( ) ;
500
+ for ( query_invocation, hit_count) in query_hits. iter ( ) . enumerate ( ) {
501
+ let event_id = builder. from_label ( StringId :: new_virtual ( query_invocation as u64 ) ) ;
502
+ profiler. profiler . record_integer_event (
503
+ profiler. query_cache_hit_count_event_kind ,
504
+ event_id,
505
+ thread_id,
506
+ hit_count. load ( Ordering :: Relaxed ) ,
507
+ ) ;
508
+ }
509
+ }
510
+ }
511
+
492
512
#[ inline]
493
513
pub fn enabled ( & self ) -> bool {
494
514
self . profiler . is_some ( )
@@ -537,13 +557,27 @@ pub struct SelfProfiler {
537
557
538
558
string_cache : RwLock < FxHashMap < String , StringId > > ,
539
559
560
+ /// Recording individual query cache hits as "instant" measureme events
561
+ /// is incredibly expensive. Instead of doing that, we simply aggregate
562
+ /// cache hit *counts* per query invocation, and then store the final count
563
+ /// of cache hits per invocation at the end of the compilation session.
564
+ ///
565
+ /// With this approach, we don't know the individual thread IDs and timestamps
566
+ /// of cache hits, but it has very little overhead on top of `-Zself-profile`.
567
+ /// Recording the cache hits as individual events made compilation 3-5x slower.
568
+ ///
569
+ /// Query invocation IDs should be monotonic integers, so we can store them in a vec,
570
+ /// rather than using a hashmap.
571
+ query_hits : RwLock < Vec < AtomicU64 > > ,
572
+
540
573
query_event_kind : StringId ,
541
574
generic_activity_event_kind : StringId ,
542
575
incremental_load_result_event_kind : StringId ,
543
576
incremental_result_hashing_event_kind : StringId ,
544
577
query_blocked_event_kind : StringId ,
545
- query_cache_hit_event_kind : StringId ,
546
578
artifact_size_event_kind : StringId ,
579
+ /// Total cache hits per query invocation
580
+ query_cache_hit_count_event_kind : StringId ,
547
581
}
548
582
549
583
impl SelfProfiler {
@@ -571,8 +605,8 @@ impl SelfProfiler {
571
605
let incremental_result_hashing_event_kind =
572
606
profiler. alloc_string ( "IncrementalResultHashing" ) ;
573
607
let query_blocked_event_kind = profiler. alloc_string ( "QueryBlocked" ) ;
574
- let query_cache_hit_event_kind = profiler. alloc_string ( "QueryCacheHit" ) ;
575
608
let artifact_size_event_kind = profiler. alloc_string ( "ArtifactSize" ) ;
609
+ let query_cache_hit_count_event_kind = profiler. alloc_string ( "QueryCacheHitCount" ) ;
576
610
577
611
let mut event_filter_mask = EventFilter :: empty ( ) ;
578
612
@@ -616,8 +650,9 @@ impl SelfProfiler {
616
650
incremental_load_result_event_kind,
617
651
incremental_result_hashing_event_kind,
618
652
query_blocked_event_kind,
619
- query_cache_hit_event_kind,
620
653
artifact_size_event_kind,
654
+ query_cache_hit_count_event_kind,
655
+ query_hits : Default :: default ( ) ,
621
656
} )
622
657
}
623
658
@@ -627,6 +662,25 @@ impl SelfProfiler {
627
662
self . profiler . alloc_string ( s)
628
663
}
629
664
665
+ /// Store a cache hit of a query invocation
666
+ pub fn increment_query_cache_hit ( & self , id : QueryInvocationId ) {
667
+ // Fast path: assume that the query was already encountered before, and just record
668
+ // a cache hit.
669
+ let mut guard = self . query_hits . upgradable_read ( ) ;
670
+ let query_hits = & guard;
671
+ let index = id. 0 as usize ;
672
+ if index < query_hits. len ( ) {
673
+ // We only want to increment the count, no other synchronization is required
674
+ query_hits[ index] . fetch_add ( 1 , Ordering :: Relaxed ) ;
675
+ } else {
676
+ // If not, we need to extend the query hit map to the highest observed ID
677
+ guard. with_upgraded ( |vec| {
678
+ vec. resize_with ( index + 1 , || AtomicU64 :: new ( 0 ) ) ;
679
+ vec[ index] = AtomicU64 :: from ( 1 ) ;
680
+ } ) ;
681
+ }
682
+ }
683
+
630
684
/// Gets a `StringId` for the given string. This method makes sure that
631
685
/// any strings going through it will only be allocated once in the
632
686
/// profiling data.
0 commit comments