From a7f832fb57b459dc07c577db551bf88b39fa0629 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Thu, 1 Nov 2018 06:24:27 +0100 Subject: [PATCH 1/4] [benchmark] Legacy factor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds optional `legacyFactor` to the `BenchmarkInfo`, which allows for linear modification of constants that unnecesarily inflate the base workload of benchmarks, while maintaining the continuity of log-term benchmark tracking. For example, if a benchmark uses `for _ in N*10_000` in its run function, we could lower this to `for _ in N*1_000` and adding a `legacyFactor: 10` to its `BenchmarkInfo`. Note that this doesn’t affect the real measurements gathered from the `--verbose` output. The `BenchmarkDoctor` has been slightly adjusted to work with these real samples, therefore `Benchmark_Driver check` will not flag these benchmarks for slow run time reported in the summary, if their real runtimes fall into the recommended range. --- benchmark/scripts/Benchmark_Driver | 12 +++++++----- benchmark/scripts/test_Benchmark_Driver.py | 5 +++-- benchmark/utils/DriverUtils.swift | 4 ++++ benchmark/utils/TestsUtils.swift | 17 ++++++++++------- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver index 3e9cf0fe8225a..6e9af9703b4fc 100755 --- a/benchmark/scripts/Benchmark_Driver +++ b/benchmark/scripts/Benchmark_Driver @@ -343,7 +343,7 @@ class BenchmarkDoctor(object): setup, ratio = BenchmarkDoctor._setup_overhead(measurements) setup = 0 if ratio < 0.05 else setup runtime = min( - [(result.min - correction) for i_series in + [(result.samples.min - correction) for i_series in [BenchmarkDoctor._select(measurements, num_iters=i) for correction in [(setup / i) for i in [1, 2]] ] for result in i_series]) @@ -367,7 +367,8 @@ class BenchmarkDoctor(object): def _setup_overhead(measurements): select = BenchmarkDoctor._select ti1, ti2 = [float(min(mins)) for mins in - [[result.min for result in i_series] for i_series in + [[result.samples.min for result in i_series] + for i_series in [select(measurements, num_iters=i) for i in [1, 2]]]] setup = int(round(2.0 * (ti1 - ti2))) ratio = (setup / ti1) if ti1 > 0 else 0 @@ -439,8 +440,9 @@ class BenchmarkDoctor(object): Returns a dictionary with benchmark name and `PerformanceTestResult`s. """ self.log.debug('Calibrating num-samples for {0}:'.format(benchmark)) - r = self.driver.run(benchmark, num_samples=3, num_iters=1) # calibrate - num_samples = self._adjusted_1s_samples(r.min) + r = self.driver.run(benchmark, num_samples=3, num_iters=1, + verbose=True) # calibrate + num_samples = self._adjusted_1s_samples(r.samples.min) def capped(s): return min(s, 2048) @@ -449,7 +451,7 @@ class BenchmarkDoctor(object): opts = opts if isinstance(opts, list) else [opts] self.log.debug( 'Runtime {0} μs yields {1} adjusted samples per second.'.format( - r.min, num_samples)) + r.samples.min, num_samples)) self.log.debug( 'Measuring {0}, 5 x i1 ({1} samples), 5 x i2 ({2} samples)'.format( benchmark, run_args[0][0], run_args[1][0])) diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py index f3adeb6fa7edd..4ff7c32a0e28c 100644 --- a/benchmark/scripts/test_Benchmark_Driver.py +++ b/benchmark/scripts/test_Benchmark_Driver.py @@ -423,7 +423,7 @@ def test_no_prefix_for_base_logging(self): def _PTR(min=700, mem_pages=1000, setup=None): """Create PerformanceTestResult Stub.""" - return Stub(min=min, mem_pages=mem_pages, setup=setup) + return Stub(samples=Stub(min=min), mem_pages=mem_pages, setup=setup) def _run(test, num_samples=None, num_iters=None, verbose=None, @@ -483,7 +483,8 @@ def test_measure_10_independent_1s_benchmark_series(self): """ driver = BenchmarkDriverMock(tests=['B1'], responses=([ # calibration run, returns a stand-in for PerformanceTestResult - (_run('B1', num_samples=3, num_iters=1), _PTR(min=300))] + + (_run('B1', num_samples=3, num_iters=1, + verbose=True), _PTR(min=300))] + # 5x i1 series, with 300 μs runtime its possible to take 4098 # samples/s, but it should be capped at 2k ([(_run('B1', num_samples=2048, num_iters=1, diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift index fc7b9211aa2eb..3785c8961fbb0 100644 --- a/benchmark/utils/DriverUtils.swift +++ b/benchmark/utils/DriverUtils.swift @@ -524,6 +524,10 @@ final class TestRunner { } test.tearDownFunction?() + if let lf = test.legacyFactor { + logVerbose(" Applying legacy factor: \(lf)") + samples = samples.map { $0 * lf } + } return BenchResults(samples, maxRSS: measureMemoryUsage()) } diff --git a/benchmark/utils/TestsUtils.swift b/benchmark/utils/TestsUtils.swift index 8d6f52f33de57..d698e1cdf7578 100644 --- a/benchmark/utils/TestsUtils.swift +++ b/benchmark/utils/TestsUtils.swift @@ -26,18 +26,18 @@ public enum BenchmarkCategory : String { case runtime, refcount, metadata // Other general areas of compiled code validation. case abstraction, safetychecks, exceptions, bridging, concurrency - + // Algorithms are "micro" that test some well-known algorithm in isolation: // sorting, searching, hashing, fibonaci, crypto, etc. case algorithm - + // Miniapplications are contrived to mimic some subset of application behavior // in a way that can be easily measured. They are larger than micro-benchmarks, // combining multiple APIs, data structures, or algorithms. This includes small // standardized benchmarks, pieces of real applications that have been extracted // into a benchmark, important functionality like JSON parsing, etc. case miniapplication - + // Regression benchmarks is a catch-all for less important "micro" // benchmarks. This could be a random piece of code that was attached to a bug // report. We want to make sure the optimizer as a whole continues to handle @@ -46,12 +46,12 @@ public enum BenchmarkCategory : String { // as highly as "validation" benchmarks and likely won't be the subject of // future investigation unless they significantly regress. case regression - + // Most benchmarks are assumed to be "stable" and will be regularly tracked at // each commit. A handful may be marked unstable if continually tracking them is // counterproductive. case unstable - + // CPU benchmarks represent instrinsic Swift performance. They are useful for // measuring a fully baked Swift implementation across different platforms and // hardware. The benchmark should also be reasonably applicable to real Swift @@ -151,16 +151,20 @@ public struct BenchmarkInfo { return _tearDownFunction } + public var legacyFactor: Int? + public init(name: String, runFunction: @escaping (Int) -> (), tags: [BenchmarkCategory], setUpFunction: (() -> ())? = nil, tearDownFunction: (() -> ())? = nil, - unsupportedPlatforms: BenchmarkPlatformSet = []) { + unsupportedPlatforms: BenchmarkPlatformSet = [], + legacyFactor: Int? = nil) { self.name = name self._runFunction = runFunction self.tags = Set(tags) self._setUpFunction = setUpFunction self._tearDownFunction = tearDownFunction self.unsupportedPlatforms = unsupportedPlatforms + self.legacyFactor = legacyFactor } /// Returns true if this benchmark should be run on the current platform. @@ -266,4 +270,3 @@ public func getString(_ s: String) -> String { return s } // The same for Substring. @inline(never) public func getSubstring(_ s: Substring) -> Substring { return s } - From 04d1384b2c795fadc32d91eeb7a6896e53fa9739 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Thu, 1 Nov 2018 06:24:57 +0100 Subject: [PATCH 2/4] [benchmark] Legacy factor AnyHashableWithAClass Lowered the base workload by a factor of 500 --- benchmark/single-source/AnyHashableWithAClass.swift | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/benchmark/single-source/AnyHashableWithAClass.swift b/benchmark/single-source/AnyHashableWithAClass.swift index d3e2ef864df05..eac874e8d6eb1 100644 --- a/benchmark/single-source/AnyHashableWithAClass.swift +++ b/benchmark/single-source/AnyHashableWithAClass.swift @@ -25,9 +25,12 @@ import TestsUtils public var AnyHashableWithAClass = BenchmarkInfo( name: "AnyHashableWithAClass", runFunction: run_AnyHashableWithAClass, - tags: [.abstraction, .runtime, .cpubench] + tags: [.abstraction, .runtime, .cpubench], + legacyFactor: lf ) +let lf = 500 + class TestHashableBase : Hashable { var value: Int init(_ value: Int) { @@ -55,8 +58,7 @@ class TestHashableDerived5 : TestHashableDerived4 {} @inline(never) public func run_AnyHashableWithAClass(_ N: Int) { let c = TestHashableDerived5(10) - for _ in 0...(N*500000) { + for _ in 0...(N*500000/lf) { _ = AnyHashable(c) } } - From 435e55f0c0592585d08a55b53c4207a24731d7c2 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Thu, 1 Nov 2018 06:29:14 +0100 Subject: [PATCH 3/4] [benchmark] Legacy factor ArrayOf[Generic]Ref Lowered the base workload by a factor of 10 --- benchmark/single-source/ArrayOfGenericRef.swift | 11 ++++++----- benchmark/single-source/ArrayOfRef.swift | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/benchmark/single-source/ArrayOfGenericRef.swift b/benchmark/single-source/ArrayOfGenericRef.swift index b4bfac1328dc7..a1f365084d7c3 100644 --- a/benchmark/single-source/ArrayOfGenericRef.swift +++ b/benchmark/single-source/ArrayOfGenericRef.swift @@ -13,14 +13,15 @@ // This benchmark tests creation and destruction of an array of enum // and generic type bound to nontrivial types. // -// For comparison, we always create three arrays of 10,000 words. +// For comparison, we always create three arrays of 1,000 words. import TestsUtils public let ArrayOfGenericRef = BenchmarkInfo( name: "ArrayOfGenericRef", runFunction: run_ArrayOfGenericRef, - tags: [.validation, .api, .Array]) + tags: [.validation, .api, .Array], + legacyFactor: 10) protocol Constructible { associatedtype Element @@ -31,8 +32,8 @@ class ConstructibleArray { init(_ e:T.Element) { array = [T]() - array.reserveCapacity(10_000) - for _ in 0...10_000 { + array.reserveCapacity(1_000) + for _ in 0...1_000 { array.append(T(e:e) as T) } } @@ -65,7 +66,7 @@ func genCommonRefArray() { class RefArray { var array: [T] - init(_ i:T, count:Int = 10_000) { + init(_ i:T, count:Int = 1_000) { array = [T](repeating: i, count: count) } } diff --git a/benchmark/single-source/ArrayOfRef.swift b/benchmark/single-source/ArrayOfRef.swift index d333487e8f107..85b11eca5954e 100644 --- a/benchmark/single-source/ArrayOfRef.swift +++ b/benchmark/single-source/ArrayOfRef.swift @@ -14,14 +14,15 @@ // references. It is meant to be a baseline for comparison against // ArrayOfGenericRef. // -// For comparison, we always create four arrays of 10,000 words. +// For comparison, we always create four arrays of 1,000 words. import TestsUtils public let ArrayOfRef = BenchmarkInfo( name: "ArrayOfRef", runFunction: run_ArrayOfRef, - tags: [.validation, .api, .Array]) + tags: [.validation, .api, .Array], + legacyFactor: 10) protocol Constructible { associatedtype Element @@ -32,8 +33,8 @@ class ConstructibleArray { init(_ e:T.Element) { array = [T]() - array.reserveCapacity(10_000) - for _ in 0...10_000 { + array.reserveCapacity(1_000) + for _ in 0...1_000 { array.append(T(e:e) as T) } } @@ -77,7 +78,7 @@ enum RefEnum { class RefArray { var array : [T] - init(_ i:T, count:Int = 10_000) { + init(_ i:T, count:Int = 1_000) { array = [T](repeating: i, count: count) } } From f121ee12313742713542034f210e2e11da4879b4 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Thu, 1 Nov 2018 06:32:06 +0100 Subject: [PATCH 4/4] [benchmark] Legacy factor ArraySetElement Lowered base workload by a factor of 10. --- benchmark/single-source/ArraySetElement.swift | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/single-source/ArraySetElement.swift b/benchmark/single-source/ArraySetElement.swift index 292b6a345d4dc..9d6d59d33203e 100644 --- a/benchmark/single-source/ArraySetElement.swift +++ b/benchmark/single-source/ArraySetElement.swift @@ -18,7 +18,8 @@ import TestsUtils public var ArraySetElement = BenchmarkInfo( name: "ArraySetElement", runFunction: run_ArraySetElement, - tags: [.runtime, .cpubench, .unstable] + tags: [.runtime, .cpubench, .unstable], + legacyFactor: 10 ) // This is an effort to defeat isUniquelyReferenced optimization. Ideally @@ -29,9 +30,8 @@ func storeArrayElement(_ array: inout [Int], _ i: Int) { } public func run_ArraySetElement(_ N: Int) { - let scale = 10 var array = [Int](repeating: 0, count: 10000) - for _ in 0..