From a7f832fb57b459dc07c577db551bf88b39fa0629 Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Thu, 1 Nov 2018 06:24:27 +0100
Subject: [PATCH 1/4] [benchmark] Legacy factor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds optional `legacyFactor` to the `BenchmarkInfo`, which allows for linear modification of constants that unnecesarily inflate the base workload of benchmarks, while maintaining the continuity of log-term benchmark tracking.

For example, if a benchmark uses `for _ in N*10_000` in its run function, we could lower this to `for _ in N*1_000` and adding a `legacyFactor: 10` to its `BenchmarkInfo`.

Note that this doesn’t affect the real measurements gathered from the `--verbose` output. The `BenchmarkDoctor` has been slightly adjusted to work with these real samples, therefore `Benchmark_Driver check` will not flag these benchmarks for slow run time reported in the summary, if their real runtimes fall into the recommended range.
---
 benchmark/scripts/Benchmark_Driver         | 12 +++++++-----
 benchmark/scripts/test_Benchmark_Driver.py |  5 +++--
 benchmark/utils/DriverUtils.swift          |  4 ++++
 benchmark/utils/TestsUtils.swift           | 17 ++++++++++-------
 4 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/benchmark/scripts/Benchmark_Driver b/benchmark/scripts/Benchmark_Driver
index 3e9cf0fe8225a..6e9af9703b4fc 100755
--- a/benchmark/scripts/Benchmark_Driver
+++ b/benchmark/scripts/Benchmark_Driver
@@ -343,7 +343,7 @@ class BenchmarkDoctor(object):
         setup, ratio = BenchmarkDoctor._setup_overhead(measurements)
         setup = 0 if ratio < 0.05 else setup
         runtime = min(
-            [(result.min - correction) for i_series in
+            [(result.samples.min - correction) for i_series in
              [BenchmarkDoctor._select(measurements, num_iters=i)
               for correction in [(setup / i) for i in [1, 2]]
               ] for result in i_series])
@@ -367,7 +367,8 @@ class BenchmarkDoctor(object):
     def _setup_overhead(measurements):
         select = BenchmarkDoctor._select
         ti1, ti2 = [float(min(mins)) for mins in
-                    [[result.min for result in i_series] for i_series in
+                    [[result.samples.min for result in i_series]
+                     for i_series in
                      [select(measurements, num_iters=i) for i in [1, 2]]]]
         setup = int(round(2.0 * (ti1 - ti2)))
         ratio = (setup / ti1) if ti1 > 0 else 0
@@ -439,8 +440,9 @@ class BenchmarkDoctor(object):
         Returns a dictionary with benchmark name and `PerformanceTestResult`s.
         """
         self.log.debug('Calibrating num-samples for {0}:'.format(benchmark))
-        r = self.driver.run(benchmark, num_samples=3, num_iters=1)  # calibrate
-        num_samples = self._adjusted_1s_samples(r.min)
+        r = self.driver.run(benchmark, num_samples=3, num_iters=1,
+                            verbose=True)  # calibrate
+        num_samples = self._adjusted_1s_samples(r.samples.min)
 
         def capped(s):
             return min(s, 2048)
@@ -449,7 +451,7 @@ class BenchmarkDoctor(object):
         opts = opts if isinstance(opts, list) else [opts]
         self.log.debug(
             'Runtime {0} μs yields {1} adjusted samples per second.'.format(
-                r.min, num_samples))
+                r.samples.min, num_samples))
         self.log.debug(
             'Measuring {0}, 5 x i1 ({1} samples), 5 x i2 ({2} samples)'.format(
                 benchmark, run_args[0][0], run_args[1][0]))
diff --git a/benchmark/scripts/test_Benchmark_Driver.py b/benchmark/scripts/test_Benchmark_Driver.py
index f3adeb6fa7edd..4ff7c32a0e28c 100644
--- a/benchmark/scripts/test_Benchmark_Driver.py
+++ b/benchmark/scripts/test_Benchmark_Driver.py
@@ -423,7 +423,7 @@ def test_no_prefix_for_base_logging(self):
 
 def _PTR(min=700, mem_pages=1000, setup=None):
     """Create PerformanceTestResult Stub."""
-    return Stub(min=min, mem_pages=mem_pages, setup=setup)
+    return Stub(samples=Stub(min=min), mem_pages=mem_pages, setup=setup)
 
 
 def _run(test, num_samples=None, num_iters=None, verbose=None,
@@ -483,7 +483,8 @@ def test_measure_10_independent_1s_benchmark_series(self):
         """
         driver = BenchmarkDriverMock(tests=['B1'], responses=([
             # calibration run, returns a stand-in for PerformanceTestResult
-            (_run('B1', num_samples=3, num_iters=1), _PTR(min=300))] +
+            (_run('B1', num_samples=3, num_iters=1,
+                  verbose=True), _PTR(min=300))] +
             # 5x i1 series, with 300 μs runtime its possible to take 4098
             # samples/s, but it should be capped at 2k
             ([(_run('B1', num_samples=2048, num_iters=1,
diff --git a/benchmark/utils/DriverUtils.swift b/benchmark/utils/DriverUtils.swift
index fc7b9211aa2eb..3785c8961fbb0 100644
--- a/benchmark/utils/DriverUtils.swift
+++ b/benchmark/utils/DriverUtils.swift
@@ -524,6 +524,10 @@ final class TestRunner {
     }
 
     test.tearDownFunction?()
+    if let lf = test.legacyFactor {
+      logVerbose("    Applying legacy factor: \(lf)")
+      samples = samples.map { $0 * lf }
+    }
 
     return BenchResults(samples, maxRSS: measureMemoryUsage())
   }
diff --git a/benchmark/utils/TestsUtils.swift b/benchmark/utils/TestsUtils.swift
index 8d6f52f33de57..d698e1cdf7578 100644
--- a/benchmark/utils/TestsUtils.swift
+++ b/benchmark/utils/TestsUtils.swift
@@ -26,18 +26,18 @@ public enum BenchmarkCategory : String {
   case runtime, refcount, metadata
   // Other general areas of compiled code validation.
   case abstraction, safetychecks, exceptions, bridging, concurrency
-   
+
   // Algorithms are "micro" that test some well-known algorithm in isolation:
   // sorting, searching, hashing, fibonaci, crypto, etc.
   case algorithm
-   
+
   // Miniapplications are contrived to mimic some subset of application behavior
   // in a way that can be easily measured. They are larger than micro-benchmarks,
   // combining multiple APIs, data structures, or algorithms. This includes small
   // standardized benchmarks, pieces of real applications that have been extracted
   // into a benchmark, important functionality like JSON parsing, etc.
   case miniapplication
-   
+
   // Regression benchmarks is a catch-all for less important "micro"
   // benchmarks. This could be a random piece of code that was attached to a bug
   // report. We want to make sure the optimizer as a whole continues to handle
@@ -46,12 +46,12 @@ public enum BenchmarkCategory : String {
   // as highly as "validation" benchmarks and likely won't be the subject of
   // future investigation unless they significantly regress.
   case regression
-   
+
   // Most benchmarks are assumed to be "stable" and will be regularly tracked at
   // each commit. A handful may be marked unstable if continually tracking them is
   // counterproductive.
   case unstable
-   
+
   // CPU benchmarks represent instrinsic Swift performance. They are useful for
   // measuring a fully baked Swift implementation across different platforms and
   // hardware. The benchmark should also be reasonably applicable to real Swift
@@ -151,16 +151,20 @@ public struct BenchmarkInfo {
     return _tearDownFunction
   }
 
+  public var legacyFactor: Int?
+
   public init(name: String, runFunction: @escaping (Int) -> (), tags: [BenchmarkCategory],
               setUpFunction: (() -> ())? = nil,
               tearDownFunction: (() -> ())? = nil,
-              unsupportedPlatforms: BenchmarkPlatformSet = []) {
+              unsupportedPlatforms: BenchmarkPlatformSet = [],
+              legacyFactor: Int? = nil) {
     self.name = name
     self._runFunction = runFunction
     self.tags = Set(tags)
     self._setUpFunction = setUpFunction
     self._tearDownFunction = tearDownFunction
     self.unsupportedPlatforms = unsupportedPlatforms
+    self.legacyFactor = legacyFactor
   }
 
   /// Returns true if this benchmark should be run on the current platform.
@@ -266,4 +270,3 @@ public func getString(_ s: String) -> String { return s }
 // The same for Substring.
 @inline(never)
 public func getSubstring(_ s: Substring) -> Substring { return s }
-

From 04d1384b2c795fadc32d91eeb7a6896e53fa9739 Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Thu, 1 Nov 2018 06:24:57 +0100
Subject: [PATCH 2/4] [benchmark] Legacy factor AnyHashableWithAClass

Lowered the base workload by a factor of 500
---
 benchmark/single-source/AnyHashableWithAClass.swift | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/benchmark/single-source/AnyHashableWithAClass.swift b/benchmark/single-source/AnyHashableWithAClass.swift
index d3e2ef864df05..eac874e8d6eb1 100644
--- a/benchmark/single-source/AnyHashableWithAClass.swift
+++ b/benchmark/single-source/AnyHashableWithAClass.swift
@@ -25,9 +25,12 @@ import TestsUtils
 public var AnyHashableWithAClass = BenchmarkInfo(
   name: "AnyHashableWithAClass",
   runFunction: run_AnyHashableWithAClass,
-  tags: [.abstraction, .runtime, .cpubench]
+  tags: [.abstraction, .runtime, .cpubench],
+  legacyFactor: lf
 )
 
+let lf = 500
+
 class TestHashableBase : Hashable {
   var value: Int
   init(_ value: Int) {
@@ -55,8 +58,7 @@ class TestHashableDerived5 : TestHashableDerived4 {}
 @inline(never)
 public func run_AnyHashableWithAClass(_ N: Int) {
   let c = TestHashableDerived5(10)
-  for _ in 0...(N*500000) {
+  for _ in 0...(N*500000/lf) {
     _ = AnyHashable(c)
   }
 }
-

From 435e55f0c0592585d08a55b53c4207a24731d7c2 Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Thu, 1 Nov 2018 06:29:14 +0100
Subject: [PATCH 3/4] [benchmark] Legacy factor ArrayOf[Generic]Ref

Lowered the base workload by a factor of 10
---
 benchmark/single-source/ArrayOfGenericRef.swift | 11 ++++++-----
 benchmark/single-source/ArrayOfRef.swift        | 11 ++++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/benchmark/single-source/ArrayOfGenericRef.swift b/benchmark/single-source/ArrayOfGenericRef.swift
index b4bfac1328dc7..a1f365084d7c3 100644
--- a/benchmark/single-source/ArrayOfGenericRef.swift
+++ b/benchmark/single-source/ArrayOfGenericRef.swift
@@ -13,14 +13,15 @@
 // This benchmark tests creation and destruction of an array of enum
 // and generic type bound to nontrivial types.
 //
-// For comparison, we always create three arrays of 10,000 words.
+// For comparison, we always create three arrays of 1,000 words.
 
 import TestsUtils
 
 public let ArrayOfGenericRef = BenchmarkInfo(
   name: "ArrayOfGenericRef",
   runFunction: run_ArrayOfGenericRef,
-  tags: [.validation, .api, .Array])
+  tags: [.validation, .api, .Array],
+  legacyFactor: 10)
 
 protocol Constructible {
   associatedtype Element
@@ -31,8 +32,8 @@ class ConstructibleArray<T:Constructible> {
 
   init(_ e:T.Element) {
     array = [T]()
-    array.reserveCapacity(10_000)
-    for _ in 0...10_000 {
+    array.reserveCapacity(1_000)
+    for _ in 0...1_000 {
       array.append(T(e:e) as T)
     }
   }
@@ -65,7 +66,7 @@ func genCommonRefArray() {
 class RefArray<T> {
   var array: [T]
 
-  init(_ i:T, count:Int = 10_000) {
+  init(_ i:T, count:Int = 1_000) {
     array = [T](repeating: i, count: count)
   }
 }
diff --git a/benchmark/single-source/ArrayOfRef.swift b/benchmark/single-source/ArrayOfRef.swift
index d333487e8f107..85b11eca5954e 100644
--- a/benchmark/single-source/ArrayOfRef.swift
+++ b/benchmark/single-source/ArrayOfRef.swift
@@ -14,14 +14,15 @@
 // references. It is meant to be a baseline for comparison against
 // ArrayOfGenericRef.
 //
-// For comparison, we always create four arrays of 10,000 words.
+// For comparison, we always create four arrays of 1,000 words.
 
 import TestsUtils
 
 public let ArrayOfRef = BenchmarkInfo(
   name: "ArrayOfRef",
   runFunction: run_ArrayOfRef,
-  tags: [.validation, .api, .Array])
+  tags: [.validation, .api, .Array],
+  legacyFactor: 10)
 
 protocol Constructible {
   associatedtype Element
@@ -32,8 +33,8 @@ class ConstructibleArray<T:Constructible> {
 
   init(_ e:T.Element) {
     array = [T]()
-    array.reserveCapacity(10_000)
-    for _ in 0...10_000 {
+    array.reserveCapacity(1_000)
+    for _ in 0...1_000 {
       array.append(T(e:e) as T)
     }
   }
@@ -77,7 +78,7 @@ enum RefEnum {
 class RefArray<T> {
   var array : [T]
 
-  init(_ i:T, count:Int = 10_000) {
+  init(_ i:T, count:Int = 1_000) {
     array = [T](repeating: i, count: count)
   }
 }

From f121ee12313742713542034f210e2e11da4879b4 Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Thu, 1 Nov 2018 06:32:06 +0100
Subject: [PATCH 4/4] [benchmark] Legacy factor ArraySetElement

Lowered base workload by a factor of 10.
---
 benchmark/single-source/ArraySetElement.swift | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/benchmark/single-source/ArraySetElement.swift b/benchmark/single-source/ArraySetElement.swift
index 292b6a345d4dc..9d6d59d33203e 100644
--- a/benchmark/single-source/ArraySetElement.swift
+++ b/benchmark/single-source/ArraySetElement.swift
@@ -18,7 +18,8 @@ import TestsUtils
 public var ArraySetElement = BenchmarkInfo(
   name: "ArraySetElement",
   runFunction: run_ArraySetElement,
-  tags: [.runtime, .cpubench, .unstable]
+  tags: [.runtime, .cpubench, .unstable],
+  legacyFactor: 10
 )
 
 // This is an effort to defeat isUniquelyReferenced optimization. Ideally
@@ -29,9 +30,8 @@ func storeArrayElement(_ array: inout [Int], _ i: Int) {
 }
 
 public func run_ArraySetElement(_ N: Int) {
-  let scale = 10
   var array = [Int](repeating: 0, count: 10000)
-  for _ in 0..<N*scale {
+  for _ in 0..<N {
     for i in 0..<array.count {
       storeArrayElement(&array, i)
     }