From 5bffb905e553f1f4235b3707f56569291e496f4a Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Tue, 30 May 2017 13:10:22 +0200
Subject: [PATCH 1/2] In markdown break test names with zero-width space

In the `--markdown` format, which is used to display performance test comparisons on GitHub, break the long test names on the camel case word boundary using zero-width space.

This prevents the results table from overflowing badly, especially in mobile view. But the **downside** is that you can no longer simply find the tests in the project by the names copied from comparison results, because they now contain invisible characters.

Use of zero-with space required the move of some strings to unicode, otherwise the column justification and formatting broke on multibyte characters in UTF-8.

Also updated class declarations and private methods to better follow Python conventions. And used list comprehension where it is clearer than map.

Added tests for parts of compare_perf_tests.py and their `lit` invocation to the `validation-test`.
---
 .gitignore                                    |   2 +
 benchmark/scripts/compare_perf_tests.py       | 112 +++++----
 benchmark/scripts/test_compare_perf_tests.py  | 215 ++++++++++++++++++
 .../Python/benchmark-scripts.test-sh          |   2 +
 4 files changed, 286 insertions(+), 45 deletions(-)
 create mode 100644 benchmark/scripts/test_compare_perf_tests.py
 create mode 100644 validation-test/Python/benchmark-scripts.test-sh

diff --git a/.gitignore b/.gitignore
index 6b9ce48e3ab00..6d1abc0930ad7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,3 +51,5 @@ compile_commands.json
 8
 4
 SortedCFDatabase.def
+.coverage
+htmlcov
diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py
index 8f636364c586c..c5073019b45d9 100755
--- a/benchmark/scripts/compare_perf_tests.py
+++ b/benchmark/scripts/compare_perf_tests.py
@@ -17,11 +17,12 @@
 
 import argparse
 import csv
+import re
 import sys
 from math import sqrt
 
 
-class PerformanceTestResult:
+class PerformanceTestResult(object):
     def __init__(self, csv_row):
         # csv_row[0] is just an ordinal number of the test - skip that
         self.name = csv_row[1]          # Name of the performance test
@@ -36,7 +37,6 @@ def __init__(self, csv_row):
         self.median = int(csv_row[7])   # Median runtime (ms)
         self.max_rss = (                # Maximum Resident Set Size (B)
             int(csv_row[8]) if len(csv_row) > 8 else None)
-        # TODO if we really want to compute mean MAX_RSS: self.S_memory
 
     @property
     def sd(self):  # Standard Deviation (ms)
@@ -65,7 +65,6 @@ def push(x):
             (self.samples, self.mean, self.S_runtime) = state
 
         # Merging test results with up to 3 samples is exact
-        # TODO investigate how to best handle merge of higher sample counts
         values = [r.min, r.max, r.median, r.mean][:min(r.samples, 4)]
         map(push, values)
 
@@ -73,15 +72,19 @@ def push(x):
 
     # Tuple of values formatted for display in results table:
     # (name, min value, max value, mean value, max_rss)
-    def values(self):
-        return (self.name, str(self.min), str(self.max), str(int(self.mean)),
-                str(self.max_rss) if self.max_rss else '-')
+    def values(self, break_words=False):
+        return (
+            break_word_camel_case(self.name) if break_words else self.name,
+            str(self.min), str(self.max), str(int(self.mean)),
+            str(self.max_rss) if self.max_rss else '—'
+        )
 
 
-class ResultComparison:
+class ResultComparison(object):
     def __init__(self, old, new):
         self.old = old
         self.new = new
+        assert(old.name == new.name)
         self.name = old.name  # Test name, convenience accessor
 
         # Speedup ratio
@@ -100,18 +103,19 @@ def __init__(self, old, new):
 
     # Tuple of values formatted for display in results table:
     # (name, old value, new value, delta [%], speedup ratio)
-    def values(self):
-        return (self.name, str(self.old.min), str(self.new.min),
+    def values(self, break_words=False):
+        return (break_word_camel_case(self.name) if break_words else self.name,
+                str(self.old.min), str(self.new.min),
                 '{0:+.1f}%'.format(self.delta),
                 '{0:.2f}x{1}'.format(self.ratio, self.is_dubious))
 
 
-class TestComparator:
-    def __init__(self, old_file, new_file, delta_threshold, changes_only):
+class TestComparator(object):
+    def __init__(self, old_file, new_file, delta_threshold):
 
         def load_from_CSV(filename):  # handles output from Benchmark_O and
             def skip_totals(row):     # Benchmark_Driver (added MAX_RSS column)
-                        return len(row) > 7 and row[0].isdigit()
+                return len(row) > 7 and row[0].isdigit()
             tests = map(PerformanceTestResult,
                         filter(skip_totals, csv.reader(open(filename))))
 
@@ -131,9 +135,9 @@ def add_or_merge(names, r):
         added_tests = new_tests.difference(old_tests)
         removed_tests = old_tests.difference(new_tests)
 
-        self.added = sorted(map(lambda t: new_results[t], added_tests),
+        self.added = sorted([new_results[t] for t in added_tests],
                             key=lambda r: r.name)
-        self.removed = sorted(map(lambda t: old_results[t], removed_tests),
+        self.removed = sorted([old_results[t] for t in removed_tests],
                               key=lambda r: r.name)
 
         def compare(name):
@@ -144,58 +148,60 @@ def compare(name):
         def partition(l, p):
             return reduce(lambda x, y: x[not p(y)].append(y) or x, l, ([], []))
 
-        # TODO take standard deviation (SD) into account
         decreased, not_decreased = partition(
             comparisons, lambda c: c.ratio < (1 - delta_threshold))
         increased, unchanged = partition(
             not_decreased, lambda c: c.ratio > (1 + delta_threshold))
 
         # sorted partitions
-        names = map(lambda c: c.name, comparisons)
+        names = [c.name for c in comparisons]
         comparisons = dict(zip(names, comparisons))
-        self.decreased = map(lambda c: comparisons[c.name],
-                             sorted(decreased, key=lambda c: -c.delta))
-        self.increased = map(lambda c: comparisons[c.name],
-                             sorted(increased, key=lambda c: c.delta))
-        self.unchanged = map(lambda c: comparisons[c.name],
-                             sorted(unchanged, key=lambda c: c.name))
+        self.decreased = [comparisons[c.name]
+                          for c in sorted(decreased, key=lambda c: -c.delta)]
+        self.increased = [comparisons[c.name]
+                          for c in sorted(increased, key=lambda c: c.delta)]
+        self.unchanged = [comparisons[c.name]
+                          for c in sorted(unchanged, key=lambda c: c.name)]
 
 
-class ReportFormatter:
+class ReportFormatter(object):
     def __init__(self, comparator, old_branch, new_branch, changes_only):
         self.comparator = comparator
         self.old_branch = old_branch
         self.new_branch = new_branch
         self.changes_only = changes_only
+        self.break_words = False
 
-    MARKDOWN_DETAIL = """
+    MARKDOWN_DETAIL = u"""
 <details {3}>
   <summary>{0} ({1})</summary>
   {2}
 </details>
 """
-    GIT_DETAIL = """
+    GIT_DETAIL = u"""
 {0} ({1}): {2}"""
 
     def markdown(self):
-        return self.__formatted_text(
-            ROW='{0} | {1} | {2} | {3} | {4} \n',
-            HEADER_SEPARATOR='---',
+        self.break_words = True
+        return self._formatted_text(
+            ROW=u'{0} | {1} | {2} | {3} | {4} \n',
+            HEADER_SEPARATOR=u'---',
             DETAIL=self.MARKDOWN_DETAIL)
 
     def git(self):
-        return self.__formatted_text(
-            ROW='{0}   {1}   {2}   {3}   {4} \n',
-            HEADER_SEPARATOR='   ',
+        return self._formatted_text(
+            ROW=u'{0}   {1}   {2}   {3}   {4} \n',
+            HEADER_SEPARATOR=u'   ',
             DETAIL=self.GIT_DETAIL)
 
-    def __column_widths(self):
+    def _column_widths(self):
         changed = self.comparator.decreased + self.comparator.increased
         comparisons = (changed if self.changes_only else
                        changed + self.comparator.unchanged)
         comparisons += self.comparator.added + self.comparator.removed
 
-        values = map(lambda c: c.values(), comparisons)
+        values = [c.values(break_words=self.break_words)
+                  for c in comparisons]
         widths = map(lambda columns: map(len, columns),
                      [PerformanceTestResult.header, ResultComparison.header] +
                      values)
@@ -205,8 +211,8 @@ def max_widths(maximum, widths):
 
         return reduce(max_widths, widths, tuple([0] * 5))
 
-    def __formatted_text(self, ROW, HEADER_SEPARATOR, DETAIL):
-        widths = self.__column_widths()
+    def _formatted_text(self, ROW, HEADER_SEPARATOR, DETAIL):
+        widths = self._column_widths()
 
         def justify_columns(contents):
             return tuple(map(lambda (w, c): c.ljust(w), zip(widths, contents)))
@@ -223,7 +229,9 @@ def format_columns(r, strong):
 
         def table(title, results, is_strong=False, is_open=False):
             rows = [
-                row(format_columns(result_comparison.values(), is_strong))
+                row(format_columns(
+                    result_comparison.values(break_words=self.break_words),
+                    is_strong))
                 for result_comparison in results
             ]
             return ('' if not rows else
@@ -241,9 +249,9 @@ def table(title, results, is_strong=False, is_open=False):
              table('No Changes', self.comparator.unchanged)),
             table('Added', self.comparator.added, is_open=True),
             table('Removed', self.comparator.removed, is_open=True)
-        ])
+        ]).encode('utf-8')
 
-    HTML = """
+    HTML = u"""
 <!DOCTYPE html>
 <html>
 <head>
@@ -268,7 +276,7 @@ def table(title, results, is_strong=False, is_open=False):
 </body>
 </html>"""
 
-    HTML_HEADER_ROW = """
+    HTML_HEADER_ROW = u"""
         <tr>
                 <th align='left'>{0} ({1})</th>
                 <th align='left'>{2}</th>
@@ -278,7 +286,7 @@ def table(title, results, is_strong=False, is_open=False):
         </tr>
 """
 
-    HTML_ROW = """
+    HTML_ROW = u"""
         <tr>
                 <td align='left'>{0}</td>
                 <td align='left'>{1}</td>
@@ -315,7 +323,7 @@ def table(title, results, speedup_color):
                  table('No Changes', self.comparator.unchanged, 'black')),
                 table('Added', self.comparator.added, ''),
                 table('Removed', self.comparator.removed, '')
-            ]))
+            ])).encode('utf-8')
 
 
 def main():
@@ -343,7 +351,7 @@ def main():
 
     args = parser.parse_args()
     comparator = TestComparator(args.old_file, args.new_file,
-                                float(args.delta_threshold), args.changes_only)
+                                float(args.delta_threshold))
     formatter = ReportFormatter(comparator, args.old_branch, args.new_branch,
                                 args.changes_only)
 
@@ -368,13 +376,27 @@ def main():
             sys.exit(1)
 
 
+FIRST_CAP_RE = re.compile('(.)([A-Z][a-z]+)')
+ALL_CAP_RE = re.compile('([a-z0-9])([A-Z])')
+
+
+def break_word_camel_case(name, separator='\xe2\x80\x8b'):
+    """Insert sperator between words in the camelCased name.
+    Default separator is zero-width space (U+200B).
+    """
+    replacement = r'\1' + separator + r'\2'
+    name = FIRST_CAP_RE.sub(replacement, name)
+    name = ALL_CAP_RE.sub(replacement, name)
+    return unicode(name, encoding='utf-8')
+
+
 def write_to_file(file_name, data):
     """
     Write data to given file
     """
-    file = open(file_name, 'w')
-    file.write(data)
-    file.close
+    output_file = open(file_name, 'w')
+    output_file.write(data)
+    output_file.close
 
 
 if __name__ == '__main__':
diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py
new file mode 100644
index 0000000000000..9308e3ab2939b
--- /dev/null
+++ b/benchmark/scripts/test_compare_perf_tests.py
@@ -0,0 +1,215 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# ===--- benchmark_utils.py ----------------------------------------------===//
+#
+#  This source file is part of the Swift.org open source project
+#
+#  Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
+#  Licensed under Apache License v2.0 with Runtime Library Exception
+#
+#  See https://swift.org/LICENSE.txt for license information
+#  See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+#
+# ===---------------------------------------------------------------------===//
+
+import unittest
+
+from compare_perf_tests import PerformanceTestResult
+from compare_perf_tests import ResultComparison
+from compare_perf_tests import break_word_camel_case
+
+
+class TestPerformanceTestResult(unittest.TestCase):
+    """PerformanceTestResult holds results from executing individual benchmark
+    from Swift Benchmark Suite as reported by test driver (Benchmark_O,
+    Benchmark_Onone, Benchmark_Ounchecked or Benchmark_Driver).
+
+    It depends on the log format emmited by the test driver in the form:
+    #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
+
+    The last column, MAX_RSS, is emmited only for runs instrumented by the
+    Benchmark_Driver to measure rough memory use during the execution of the
+    benchmark.
+    """
+
+    def test_init(self):
+        """PerformanceTestResult instance is created from an iterable with
+        length of 8 or 9. (Like a row provided by the CSV parser.)
+        """
+        log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884'
+        r = PerformanceTestResult(log_line.split(','))
+        self.assertEquals(r.name, 'AngryPhonebook')
+        self.assertEquals((r.samples, r.min, r.max, r.mean, r.sd, r.median),
+                          (20, 10664, 12933, 11035, 576, 10884))
+
+        log_line = '1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336'
+        r = PerformanceTestResult(log_line.split(','))
+        self.assertEquals(r.max_rss, 10510336)
+
+    def test_header(self):
+        """Provides column labels for header row in results table"""
+        self.assertEquals(PerformanceTestResult.header,
+                          ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS'))
+
+    def test_values(self):
+        """Values property for display in results table comparisons
+        in format: ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS').
+        Test name can have invisible breaks inserted into camel case word
+        boundaries to prevent overly long tables when displayed on GitHub.
+        The `break_words` attribute controls this behavior, it is off by
+        default.
+        """
+        log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884'
+        r = PerformanceTestResult(log_line.split(','))
+        self.assertEquals(
+            r.values(break_words=True),
+            (u'Angry\u200bPhonebook', '10664', '12933', '11035', '—')
+        )
+        self.assertEquals(
+            r.values(),
+            ('AngryPhonebook', '10664', '12933', '11035', '—')
+        )
+        log_line = '1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336'
+        r = PerformanceTestResult(log_line.split(','))
+        self.assertEquals(r.values(break_words=True),
+                          (u'Angry\u200bPhonebook',
+                           '12045', '12045', '12045', '10510336'))
+
+    def test_merge(self):
+        """Merging test results recomputes min and max.
+        It attempts to recompute mean and standard deviation when all_samples
+        are available. There is no correct way to compute these values from
+        test results that are summaries from more than 3 samples.
+
+        The use case here is comparing tests results parsed from concatenated
+        log files from multiple runs of benchmark driver.
+        """
+        tests = """1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336
+1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336
+1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144
+1,AngryPhonebook,1,12270,12270,12270,0,12270,10498048""".split('\n')
+        results = map(PerformanceTestResult,
+                      [line.split(',') for line in tests])
+
+        def as_tuple(r):
+            return (r.min, r.max, round(r.mean, 2), round(r.sd, 2), r.median,
+                    r.max_rss)
+
+        r = results[0]
+        self.assertEquals(as_tuple(r),
+                          (12045, 12045, 12045, 0, 12045, 10510336))
+        r.merge(results[1])
+        self.assertEquals(as_tuple(r),
+                          (12045, 12325, 12185, 197.99, 12045, 10510336))
+        r.merge(results[2])
+        self.assertEquals(as_tuple(r),
+                          (11616, 12325, 11995.33, 357.10, 12045, 10510336))
+        r.merge(results[3])
+        self.assertEquals(as_tuple(r),
+                          (11616, 12325, 12064, 322.29, 12045, 10510336))
+
+
+class TestBreakWordsCamelCase(unittest.TestCase):
+    def test_break_words_camel_case(self):
+        """Inset zero-width space (\xe2\x80\x8b) into word boundaries of
+        CamelCase names.
+        """
+        b = break_word_camel_case
+        self.assertEquals(b('Unchanged'), 'Unchanged')
+        self.assertEquals(b('AngryPhonebook'), u'Angry\u200bPhonebook')
+        self.assertEquals(b('AngryPhonebook', separator='_'),
+                          'Angry_Phonebook')
+        self.assertEquals(b('ArrayAppendUTF16', separator='_'),
+                          'Array_Append_UTF16')
+        self.assertEquals(b('AnyHashableWithAClass', separator='_'),
+                          'Any_Hashable_With_A_Class')
+        self.assertEquals(b('ObjectiveCBridgeToNSArray', separator='_'),
+                          'Objective_C_Bridge_To_NS_Array')
+        self.assertEquals(b('SuffixAnySeqCRangeIterLazy', separator='_'),
+                          'Suffix_Any_Seq_C_Range_Iter_Lazy')
+
+
+class TestResultComparison(unittest.TestCase):
+    """ResultComparison compares MINs from new and old PerformanceTestResult.
+    It computes speedup ratio and improvement delta (%).
+    """
+    def setUp(self):
+        self.r0 = PerformanceTestResult(
+            '101,GlobalClass,20,0,0,0,0,0,10185728'.split(','))
+        self.r01 = PerformanceTestResult(
+            '101,GlobalClass,20,20,20,20,0,0,10185728'.split(','))
+        self.r1 = PerformanceTestResult(
+            '1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336'.split(','))
+        self.r2 = PerformanceTestResult(
+            '1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144'.split(','))
+
+    def test_init(self):
+        rc = ResultComparison(self.r1, self.r2)
+        self.assertEquals(rc.name, 'AngryPhonebook')
+        self.assertAlmostEquals(rc.ratio, 12325.0 / 11616.0)
+        self.assertAlmostEquals(rc.delta, (((11616.0 / 12325.0) - 1) * 100),
+                                places=3)
+        # handle test results that sometimes change to zero, when compiler
+        # optimizes out the body of the incorrectly written test
+        rc = ResultComparison(self.r0, self.r0)
+        self.assertEquals(rc.name, 'GlobalClass')
+        self.assertAlmostEquals(rc.ratio, 1)
+        self.assertAlmostEquals(rc.delta, 0, places=3)
+        rc = ResultComparison(self.r0, self.r01)
+        self.assertAlmostEquals(rc.ratio, 0, places=3)
+        self.assertAlmostEquals(rc.delta, 2000000, places=3)
+        rc = ResultComparison(self.r01, self.r0)
+        self.assertAlmostEquals(rc.ratio, 20001)
+        self.assertAlmostEquals(rc.delta, -99.995, places=3)
+        # disallow comparison of different test results
+        self.assertRaises(
+            AssertionError,
+            ResultComparison, self.r0, self.r1
+        )
+
+    def test_header(self):
+        """Provides column labels for header row in results table"""
+        self.assertEquals(ResultComparison.header,
+                          ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP'))
+
+    def test_values(self):
+        """Values property for display in results table comparisons
+        in format: ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP').
+        Test name can have invisible breaks inserted into camel case word
+        boundaries to prevent overly long tables when displayed on GitHub.
+        The `break_words` attribute controls this behavior, it is off by
+        default.
+        """
+        rc = ResultComparison(self.r1, self.r2)
+        self.assertEquals(
+            rc.values(break_words=True),
+            (u'Angry\u200bPhonebook', '12325', '11616', '-5.8%', '1.06x')
+        )
+        self.assertEquals(
+            rc.values(),
+            ('AngryPhonebook', '12325', '11616', '-5.8%', '1.06x')
+        )
+        # other way around
+        rc = ResultComparison(self.r2, self.r1)
+        self.assertEquals(
+            rc.values(),
+            ('AngryPhonebook', '11616', '12325', '+6.1%', '0.94x')
+        )
+
+    def test_values_is_dubious(self):
+        """Add ' (?)' to the speedup column as of indication dubious changes:
+        result's MIN falls inside the (MIN, MAX) interval of result they are
+        being compared with.
+        """
+        self.r2.max = self.r1.min + 1
+        # new.min < old.min < new.max
+        rc = ResultComparison(self.r1, self.r2)
+        self.assertEquals(rc.values()[4], '1.06x (?)')
+        # other way around: old.min < new.min < old.max
+        rc = ResultComparison(self.r2, self.r1)
+        self.assertEquals(rc.values()[4], '0.94x (?)')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/validation-test/Python/benchmark-scripts.test-sh b/validation-test/Python/benchmark-scripts.test-sh
new file mode 100644
index 0000000000000..3919f56bd921b
--- /dev/null
+++ b/validation-test/Python/benchmark-scripts.test-sh
@@ -0,0 +1,2 @@
+// RUN: %{python} -m unittest discover -s %swift_src_root/benchmark/scripts/
+// REQUIRES: OS=macosx

From ddb31f9a4a6d2b07a74021d4b95c94f45ec5fc06 Mon Sep 17 00:00:00 2001
From: Pavol Vaskovic <pali@pali.sk>
Date: Wed, 31 May 2017 17:51:24 +0200
Subject: [PATCH 2/2] Moved documentation from tests to implementation.

---
 benchmark/scripts/compare_perf_tests.py      | 65 ++++++++++++++++----
 benchmark/scripts/test_compare_perf_tests.py | 48 ---------------
 2 files changed, 54 insertions(+), 59 deletions(-)

diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py
index c5073019b45d9..689de65fddc8c 100755
--- a/benchmark/scripts/compare_perf_tests.py
+++ b/benchmark/scripts/compare_perf_tests.py
@@ -23,7 +23,21 @@
 
 
 class PerformanceTestResult(object):
+    """PerformanceTestResult holds results from executing individual benchmark
+    from Swift Benchmark Suite as reported by test driver (Benchmark_O,
+    Benchmark_Onone, Benchmark_Ounchecked or Benchmark_Driver).
+
+    It depends on the log format emmited by the test driver in the form:
+    #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
+
+    The last column, MAX_RSS, is emmited only for runs instrumented by the
+    Benchmark_Driver to measure rough memory use during the execution of the
+    benchmark.
+    """
     def __init__(self, csv_row):
+        """PerformanceTestResult instance is created from an iterable with
+        length of 8 or 9. (Like a row provided by the CSV parser.)
+        """
         # csv_row[0] is just an ordinal number of the test - skip that
         self.name = csv_row[1]          # Name of the performance test
         self.samples = int(csv_row[2])  # Number of measurement samples taken
@@ -39,22 +53,33 @@ def __init__(self, csv_row):
             int(csv_row[8]) if len(csv_row) > 8 else None)
 
     @property
-    def sd(self):  # Standard Deviation (ms)
+    def sd(self):
+        """Standard Deviation (ms)"""
         return (0 if self.samples < 2 else
                 sqrt(self.S_runtime / (self.samples - 1)))
 
-    # Compute running variance, B. P. Welford's method
-    # See Knuth TAOCP vol 2, 3rd edition, page 232, or
-    # https://www.johndcook.com/blog/standard_deviation/
-    # M is mean, Standard Deviation is defined as sqrt(S/k-1)
     @staticmethod
     def running_mean_variance((k, M_, S_), x):
+        """
+        Compute running variance, B. P. Welford's method
+        See Knuth TAOCP vol 2, 3rd edition, page 232, or
+        https://www.johndcook.com/blog/standard_deviation/
+        M is mean, Standard Deviation is defined as sqrt(S/k-1)
+        """
         k = float(k + 1)
         M = M_ + (x - M_) / k
         S = S_ + (x - M_) * (x - M)
         return (k, M, S)
 
     def merge(self, r):
+        """Merging test results recomputes min and max.
+        It attempts to recompute mean and standard deviation when all_samples
+        are available. There is no correct way to compute these values from
+        test results that are summaries from more than 3 samples.
+
+        The use case here is comparing tests results parsed from concatenated
+        log files from multiple runs of benchmark driver.
+        """
         self.min = min(self.min, r.min)
         self.max = max(self.max, r.max)
         # self.median = None # unclear what to do here
@@ -65,14 +90,20 @@ def push(x):
             (self.samples, self.mean, self.S_runtime) = state
 
         # Merging test results with up to 3 samples is exact
-        values = [r.min, r.max, r.median, r.mean][:min(r.samples, 4)]
+        values = [r.min, r.max, r.median][:min(r.samples, 3)]
         map(push, values)
 
+    # Column labels for header row in results table
     header = ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS')
 
-    # Tuple of values formatted for display in results table:
-    # (name, min value, max value, mean value, max_rss)
     def values(self, break_words=False):
+        """Values property for display in results table comparisons
+        in format: ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS').
+        Test name can have invisible breaks inserted into camel case word
+        boundaries to prevent overly long tables when displayed on GitHub.
+        The `break_words` attribute controls this behavior, it is off by
+        default.
+        """
         return (
             break_word_camel_case(self.name) if break_words else self.name,
             str(self.min), str(self.max), str(int(self.mean)),
@@ -81,6 +112,9 @@ def values(self, break_words=False):
 
 
 class ResultComparison(object):
+    """ResultComparison compares MINs from new and old PerformanceTestResult.
+    It computes speedup ratio and improvement delta (%).
+    """
     def __init__(self, old, new):
         self.old = old
         self.new = new
@@ -94,16 +128,25 @@ def __init__(self, old, new):
         ratio = (new.min + 0.001) / (old.min + 0.001)
         self.delta = ((ratio - 1) * 100)
 
-        self.is_dubious = (  # FIXME this is legacy
+        # Add ' (?)' to the speedup column as indication of dubious changes:
+        # result's MIN falls inside the (MIN, MAX) interval of result they are
+        # being compared with.
+        self.is_dubious = (
             ' (?)' if ((old.min < new.min and new.min < old.max) or
                        (new.min < old.min and old.min < new.max))
             else '')
 
+    # Column labels for header row in results table
     header = ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP')
 
-    # Tuple of values formatted for display in results table:
-    # (name, old value, new value, delta [%], speedup ratio)
     def values(self, break_words=False):
+        """Values property for display in results table comparisons
+        in format: ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP').
+        Test name can have invisible breaks inserted into camel case word
+        boundaries to prevent overly long tables when displayed on GitHub.
+        The `break_words` attribute controls this behavior, it is off by
+        default.
+        """
         return (break_word_camel_case(self.name) if break_words else self.name,
                 str(self.old.min), str(self.new.min),
                 '{0:+.1f}%'.format(self.delta),
diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py
index 9308e3ab2939b..4ca715cb56655 100644
--- a/benchmark/scripts/test_compare_perf_tests.py
+++ b/benchmark/scripts/test_compare_perf_tests.py
@@ -21,22 +21,8 @@
 
 
 class TestPerformanceTestResult(unittest.TestCase):
-    """PerformanceTestResult holds results from executing individual benchmark
-    from Swift Benchmark Suite as reported by test driver (Benchmark_O,
-    Benchmark_Onone, Benchmark_Ounchecked or Benchmark_Driver).
-
-    It depends on the log format emmited by the test driver in the form:
-    #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
-
-    The last column, MAX_RSS, is emmited only for runs instrumented by the
-    Benchmark_Driver to measure rough memory use during the execution of the
-    benchmark.
-    """
 
     def test_init(self):
-        """PerformanceTestResult instance is created from an iterable with
-        length of 8 or 9. (Like a row provided by the CSV parser.)
-        """
         log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884'
         r = PerformanceTestResult(log_line.split(','))
         self.assertEquals(r.name, 'AngryPhonebook')
@@ -48,18 +34,10 @@ def test_init(self):
         self.assertEquals(r.max_rss, 10510336)
 
     def test_header(self):
-        """Provides column labels for header row in results table"""
         self.assertEquals(PerformanceTestResult.header,
                           ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS'))
 
     def test_values(self):
-        """Values property for display in results table comparisons
-        in format: ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS').
-        Test name can have invisible breaks inserted into camel case word
-        boundaries to prevent overly long tables when displayed on GitHub.
-        The `break_words` attribute controls this behavior, it is off by
-        default.
-        """
         log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884'
         r = PerformanceTestResult(log_line.split(','))
         self.assertEquals(
@@ -77,14 +55,6 @@ def test_values(self):
                            '12045', '12045', '12045', '10510336'))
 
     def test_merge(self):
-        """Merging test results recomputes min and max.
-        It attempts to recompute mean and standard deviation when all_samples
-        are available. There is no correct way to compute these values from
-        test results that are summaries from more than 3 samples.
-
-        The use case here is comparing tests results parsed from concatenated
-        log files from multiple runs of benchmark driver.
-        """
         tests = """1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336
 1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336
 1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144
@@ -112,9 +82,6 @@ def as_tuple(r):
 
 class TestBreakWordsCamelCase(unittest.TestCase):
     def test_break_words_camel_case(self):
-        """Inset zero-width space (\xe2\x80\x8b) into word boundaries of
-        CamelCase names.
-        """
         b = break_word_camel_case
         self.assertEquals(b('Unchanged'), 'Unchanged')
         self.assertEquals(b('AngryPhonebook'), u'Angry\u200bPhonebook')
@@ -131,9 +98,6 @@ def test_break_words_camel_case(self):
 
 
 class TestResultComparison(unittest.TestCase):
-    """ResultComparison compares MINs from new and old PerformanceTestResult.
-    It computes speedup ratio and improvement delta (%).
-    """
     def setUp(self):
         self.r0 = PerformanceTestResult(
             '101,GlobalClass,20,0,0,0,0,0,10185728'.split(','))
@@ -169,18 +133,10 @@ def test_init(self):
         )
 
     def test_header(self):
-        """Provides column labels for header row in results table"""
         self.assertEquals(ResultComparison.header,
                           ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP'))
 
     def test_values(self):
-        """Values property for display in results table comparisons
-        in format: ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP').
-        Test name can have invisible breaks inserted into camel case word
-        boundaries to prevent overly long tables when displayed on GitHub.
-        The `break_words` attribute controls this behavior, it is off by
-        default.
-        """
         rc = ResultComparison(self.r1, self.r2)
         self.assertEquals(
             rc.values(break_words=True),
@@ -198,10 +154,6 @@ def test_values(self):
         )
 
     def test_values_is_dubious(self):
-        """Add ' (?)' to the speedup column as of indication dubious changes:
-        result's MIN falls inside the (MIN, MAX) interval of result they are
-        being compared with.
-        """
         self.r2.max = self.r1.min + 1
         # new.min < old.min < new.max
         rc = ResultComparison(self.r1, self.r2)