From 5bffb905e553f1f4235b3707f56569291e496f4a Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Tue, 30 May 2017 13:10:22 +0200 Subject: [PATCH 1/2] In markdown break test names with zero-width space In the `--markdown` format, which is used to display performance test comparisons on GitHub, break the long test names on the camel case word boundary using zero-width space. This prevents the results table from overflowing badly, especially in mobile view. But the **downside** is that you can no longer simply find the tests in the project by the names copied from comparison results, because they now contain invisible characters. Use of zero-with space required the move of some strings to unicode, otherwise the column justification and formatting broke on multibyte characters in UTF-8. Also updated class declarations and private methods to better follow Python conventions. And used list comprehension where it is clearer than map. Added tests for parts of compare_perf_tests.py and their `lit` invocation to the `validation-test`. --- .gitignore | 2 + benchmark/scripts/compare_perf_tests.py | 112 +++++---- benchmark/scripts/test_compare_perf_tests.py | 215 ++++++++++++++++++ .../Python/benchmark-scripts.test-sh | 2 + 4 files changed, 286 insertions(+), 45 deletions(-) create mode 100644 benchmark/scripts/test_compare_perf_tests.py create mode 100644 validation-test/Python/benchmark-scripts.test-sh diff --git a/.gitignore b/.gitignore index 6b9ce48e3ab00..6d1abc0930ad7 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,5 @@ compile_commands.json 8 4 SortedCFDatabase.def +.coverage +htmlcov diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index 8f636364c586c..c5073019b45d9 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -17,11 +17,12 @@ import argparse import csv +import re import sys from math import sqrt -class PerformanceTestResult: +class PerformanceTestResult(object): def __init__(self, csv_row): # csv_row[0] is just an ordinal number of the test - skip that self.name = csv_row[1] # Name of the performance test @@ -36,7 +37,6 @@ def __init__(self, csv_row): self.median = int(csv_row[7]) # Median runtime (ms) self.max_rss = ( # Maximum Resident Set Size (B) int(csv_row[8]) if len(csv_row) > 8 else None) - # TODO if we really want to compute mean MAX_RSS: self.S_memory @property def sd(self): # Standard Deviation (ms) @@ -65,7 +65,6 @@ def push(x): (self.samples, self.mean, self.S_runtime) = state # Merging test results with up to 3 samples is exact - # TODO investigate how to best handle merge of higher sample counts values = [r.min, r.max, r.median, r.mean][:min(r.samples, 4)] map(push, values) @@ -73,15 +72,19 @@ def push(x): # Tuple of values formatted for display in results table: # (name, min value, max value, mean value, max_rss) - def values(self): - return (self.name, str(self.min), str(self.max), str(int(self.mean)), - str(self.max_rss) if self.max_rss else '-') + def values(self, break_words=False): + return ( + break_word_camel_case(self.name) if break_words else self.name, + str(self.min), str(self.max), str(int(self.mean)), + str(self.max_rss) if self.max_rss else '—' + ) -class ResultComparison: +class ResultComparison(object): def __init__(self, old, new): self.old = old self.new = new + assert(old.name == new.name) self.name = old.name # Test name, convenience accessor # Speedup ratio @@ -100,18 +103,19 @@ def __init__(self, old, new): # Tuple of values formatted for display in results table: # (name, old value, new value, delta [%], speedup ratio) - def values(self): - return (self.name, str(self.old.min), str(self.new.min), + def values(self, break_words=False): + return (break_word_camel_case(self.name) if break_words else self.name, + str(self.old.min), str(self.new.min), '{0:+.1f}%'.format(self.delta), '{0:.2f}x{1}'.format(self.ratio, self.is_dubious)) -class TestComparator: - def __init__(self, old_file, new_file, delta_threshold, changes_only): +class TestComparator(object): + def __init__(self, old_file, new_file, delta_threshold): def load_from_CSV(filename): # handles output from Benchmark_O and def skip_totals(row): # Benchmark_Driver (added MAX_RSS column) - return len(row) > 7 and row[0].isdigit() + return len(row) > 7 and row[0].isdigit() tests = map(PerformanceTestResult, filter(skip_totals, csv.reader(open(filename)))) @@ -131,9 +135,9 @@ def add_or_merge(names, r): added_tests = new_tests.difference(old_tests) removed_tests = old_tests.difference(new_tests) - self.added = sorted(map(lambda t: new_results[t], added_tests), + self.added = sorted([new_results[t] for t in added_tests], key=lambda r: r.name) - self.removed = sorted(map(lambda t: old_results[t], removed_tests), + self.removed = sorted([old_results[t] for t in removed_tests], key=lambda r: r.name) def compare(name): @@ -144,58 +148,60 @@ def compare(name): def partition(l, p): return reduce(lambda x, y: x[not p(y)].append(y) or x, l, ([], [])) - # TODO take standard deviation (SD) into account decreased, not_decreased = partition( comparisons, lambda c: c.ratio < (1 - delta_threshold)) increased, unchanged = partition( not_decreased, lambda c: c.ratio > (1 + delta_threshold)) # sorted partitions - names = map(lambda c: c.name, comparisons) + names = [c.name for c in comparisons] comparisons = dict(zip(names, comparisons)) - self.decreased = map(lambda c: comparisons[c.name], - sorted(decreased, key=lambda c: -c.delta)) - self.increased = map(lambda c: comparisons[c.name], - sorted(increased, key=lambda c: c.delta)) - self.unchanged = map(lambda c: comparisons[c.name], - sorted(unchanged, key=lambda c: c.name)) + self.decreased = [comparisons[c.name] + for c in sorted(decreased, key=lambda c: -c.delta)] + self.increased = [comparisons[c.name] + for c in sorted(increased, key=lambda c: c.delta)] + self.unchanged = [comparisons[c.name] + for c in sorted(unchanged, key=lambda c: c.name)] -class ReportFormatter: +class ReportFormatter(object): def __init__(self, comparator, old_branch, new_branch, changes_only): self.comparator = comparator self.old_branch = old_branch self.new_branch = new_branch self.changes_only = changes_only + self.break_words = False - MARKDOWN_DETAIL = """ + MARKDOWN_DETAIL = u"""
{0} ({1}) {2}
""" - GIT_DETAIL = """ + GIT_DETAIL = u""" {0} ({1}): {2}""" def markdown(self): - return self.__formatted_text( - ROW='{0} | {1} | {2} | {3} | {4} \n', - HEADER_SEPARATOR='---', + self.break_words = True + return self._formatted_text( + ROW=u'{0} | {1} | {2} | {3} | {4} \n', + HEADER_SEPARATOR=u'---', DETAIL=self.MARKDOWN_DETAIL) def git(self): - return self.__formatted_text( - ROW='{0} {1} {2} {3} {4} \n', - HEADER_SEPARATOR=' ', + return self._formatted_text( + ROW=u'{0} {1} {2} {3} {4} \n', + HEADER_SEPARATOR=u' ', DETAIL=self.GIT_DETAIL) - def __column_widths(self): + def _column_widths(self): changed = self.comparator.decreased + self.comparator.increased comparisons = (changed if self.changes_only else changed + self.comparator.unchanged) comparisons += self.comparator.added + self.comparator.removed - values = map(lambda c: c.values(), comparisons) + values = [c.values(break_words=self.break_words) + for c in comparisons] widths = map(lambda columns: map(len, columns), [PerformanceTestResult.header, ResultComparison.header] + values) @@ -205,8 +211,8 @@ def max_widths(maximum, widths): return reduce(max_widths, widths, tuple([0] * 5)) - def __formatted_text(self, ROW, HEADER_SEPARATOR, DETAIL): - widths = self.__column_widths() + def _formatted_text(self, ROW, HEADER_SEPARATOR, DETAIL): + widths = self._column_widths() def justify_columns(contents): return tuple(map(lambda (w, c): c.ljust(w), zip(widths, contents))) @@ -223,7 +229,9 @@ def format_columns(r, strong): def table(title, results, is_strong=False, is_open=False): rows = [ - row(format_columns(result_comparison.values(), is_strong)) + row(format_columns( + result_comparison.values(break_words=self.break_words), + is_strong)) for result_comparison in results ] return ('' if not rows else @@ -241,9 +249,9 @@ def table(title, results, is_strong=False, is_open=False): table('No Changes', self.comparator.unchanged)), table('Added', self.comparator.added, is_open=True), table('Removed', self.comparator.removed, is_open=True) - ]) + ]).encode('utf-8') - HTML = """ + HTML = u""" @@ -268,7 +276,7 @@ def table(title, results, is_strong=False, is_open=False): """ - HTML_HEADER_ROW = """ + HTML_HEADER_ROW = u""" {0} ({1}) {2} @@ -278,7 +286,7 @@ def table(title, results, is_strong=False, is_open=False): """ - HTML_ROW = """ + HTML_ROW = u""" {0} {1} @@ -315,7 +323,7 @@ def table(title, results, speedup_color): table('No Changes', self.comparator.unchanged, 'black')), table('Added', self.comparator.added, ''), table('Removed', self.comparator.removed, '') - ])) + ])).encode('utf-8') def main(): @@ -343,7 +351,7 @@ def main(): args = parser.parse_args() comparator = TestComparator(args.old_file, args.new_file, - float(args.delta_threshold), args.changes_only) + float(args.delta_threshold)) formatter = ReportFormatter(comparator, args.old_branch, args.new_branch, args.changes_only) @@ -368,13 +376,27 @@ def main(): sys.exit(1) +FIRST_CAP_RE = re.compile('(.)([A-Z][a-z]+)') +ALL_CAP_RE = re.compile('([a-z0-9])([A-Z])') + + +def break_word_camel_case(name, separator='\xe2\x80\x8b'): + """Insert sperator between words in the camelCased name. + Default separator is zero-width space (U+200B). + """ + replacement = r'\1' + separator + r'\2' + name = FIRST_CAP_RE.sub(replacement, name) + name = ALL_CAP_RE.sub(replacement, name) + return unicode(name, encoding='utf-8') + + def write_to_file(file_name, data): """ Write data to given file """ - file = open(file_name, 'w') - file.write(data) - file.close + output_file = open(file_name, 'w') + output_file.write(data) + output_file.close if __name__ == '__main__': diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py new file mode 100644 index 0000000000000..9308e3ab2939b --- /dev/null +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -0,0 +1,215 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# ===--- benchmark_utils.py ----------------------------------------------===// +# +# This source file is part of the Swift.org open source project +# +# Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors +# Licensed under Apache License v2.0 with Runtime Library Exception +# +# See https://swift.org/LICENSE.txt for license information +# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +# +# ===---------------------------------------------------------------------===// + +import unittest + +from compare_perf_tests import PerformanceTestResult +from compare_perf_tests import ResultComparison +from compare_perf_tests import break_word_camel_case + + +class TestPerformanceTestResult(unittest.TestCase): + """PerformanceTestResult holds results from executing individual benchmark + from Swift Benchmark Suite as reported by test driver (Benchmark_O, + Benchmark_Onone, Benchmark_Ounchecked or Benchmark_Driver). + + It depends on the log format emmited by the test driver in the form: + #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B) + + The last column, MAX_RSS, is emmited only for runs instrumented by the + Benchmark_Driver to measure rough memory use during the execution of the + benchmark. + """ + + def test_init(self): + """PerformanceTestResult instance is created from an iterable with + length of 8 or 9. (Like a row provided by the CSV parser.) + """ + log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884' + r = PerformanceTestResult(log_line.split(',')) + self.assertEquals(r.name, 'AngryPhonebook') + self.assertEquals((r.samples, r.min, r.max, r.mean, r.sd, r.median), + (20, 10664, 12933, 11035, 576, 10884)) + + log_line = '1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336' + r = PerformanceTestResult(log_line.split(',')) + self.assertEquals(r.max_rss, 10510336) + + def test_header(self): + """Provides column labels for header row in results table""" + self.assertEquals(PerformanceTestResult.header, + ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS')) + + def test_values(self): + """Values property for display in results table comparisons + in format: ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS'). + Test name can have invisible breaks inserted into camel case word + boundaries to prevent overly long tables when displayed on GitHub. + The `break_words` attribute controls this behavior, it is off by + default. + """ + log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884' + r = PerformanceTestResult(log_line.split(',')) + self.assertEquals( + r.values(break_words=True), + (u'Angry\u200bPhonebook', '10664', '12933', '11035', '—') + ) + self.assertEquals( + r.values(), + ('AngryPhonebook', '10664', '12933', '11035', '—') + ) + log_line = '1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336' + r = PerformanceTestResult(log_line.split(',')) + self.assertEquals(r.values(break_words=True), + (u'Angry\u200bPhonebook', + '12045', '12045', '12045', '10510336')) + + def test_merge(self): + """Merging test results recomputes min and max. + It attempts to recompute mean and standard deviation when all_samples + are available. There is no correct way to compute these values from + test results that are summaries from more than 3 samples. + + The use case here is comparing tests results parsed from concatenated + log files from multiple runs of benchmark driver. + """ + tests = """1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336 +1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336 +1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144 +1,AngryPhonebook,1,12270,12270,12270,0,12270,10498048""".split('\n') + results = map(PerformanceTestResult, + [line.split(',') for line in tests]) + + def as_tuple(r): + return (r.min, r.max, round(r.mean, 2), round(r.sd, 2), r.median, + r.max_rss) + + r = results[0] + self.assertEquals(as_tuple(r), + (12045, 12045, 12045, 0, 12045, 10510336)) + r.merge(results[1]) + self.assertEquals(as_tuple(r), + (12045, 12325, 12185, 197.99, 12045, 10510336)) + r.merge(results[2]) + self.assertEquals(as_tuple(r), + (11616, 12325, 11995.33, 357.10, 12045, 10510336)) + r.merge(results[3]) + self.assertEquals(as_tuple(r), + (11616, 12325, 12064, 322.29, 12045, 10510336)) + + +class TestBreakWordsCamelCase(unittest.TestCase): + def test_break_words_camel_case(self): + """Inset zero-width space (\xe2\x80\x8b) into word boundaries of + CamelCase names. + """ + b = break_word_camel_case + self.assertEquals(b('Unchanged'), 'Unchanged') + self.assertEquals(b('AngryPhonebook'), u'Angry\u200bPhonebook') + self.assertEquals(b('AngryPhonebook', separator='_'), + 'Angry_Phonebook') + self.assertEquals(b('ArrayAppendUTF16', separator='_'), + 'Array_Append_UTF16') + self.assertEquals(b('AnyHashableWithAClass', separator='_'), + 'Any_Hashable_With_A_Class') + self.assertEquals(b('ObjectiveCBridgeToNSArray', separator='_'), + 'Objective_C_Bridge_To_NS_Array') + self.assertEquals(b('SuffixAnySeqCRangeIterLazy', separator='_'), + 'Suffix_Any_Seq_C_Range_Iter_Lazy') + + +class TestResultComparison(unittest.TestCase): + """ResultComparison compares MINs from new and old PerformanceTestResult. + It computes speedup ratio and improvement delta (%). + """ + def setUp(self): + self.r0 = PerformanceTestResult( + '101,GlobalClass,20,0,0,0,0,0,10185728'.split(',')) + self.r01 = PerformanceTestResult( + '101,GlobalClass,20,20,20,20,0,0,10185728'.split(',')) + self.r1 = PerformanceTestResult( + '1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336'.split(',')) + self.r2 = PerformanceTestResult( + '1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144'.split(',')) + + def test_init(self): + rc = ResultComparison(self.r1, self.r2) + self.assertEquals(rc.name, 'AngryPhonebook') + self.assertAlmostEquals(rc.ratio, 12325.0 / 11616.0) + self.assertAlmostEquals(rc.delta, (((11616.0 / 12325.0) - 1) * 100), + places=3) + # handle test results that sometimes change to zero, when compiler + # optimizes out the body of the incorrectly written test + rc = ResultComparison(self.r0, self.r0) + self.assertEquals(rc.name, 'GlobalClass') + self.assertAlmostEquals(rc.ratio, 1) + self.assertAlmostEquals(rc.delta, 0, places=3) + rc = ResultComparison(self.r0, self.r01) + self.assertAlmostEquals(rc.ratio, 0, places=3) + self.assertAlmostEquals(rc.delta, 2000000, places=3) + rc = ResultComparison(self.r01, self.r0) + self.assertAlmostEquals(rc.ratio, 20001) + self.assertAlmostEquals(rc.delta, -99.995, places=3) + # disallow comparison of different test results + self.assertRaises( + AssertionError, + ResultComparison, self.r0, self.r1 + ) + + def test_header(self): + """Provides column labels for header row in results table""" + self.assertEquals(ResultComparison.header, + ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP')) + + def test_values(self): + """Values property for display in results table comparisons + in format: ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP'). + Test name can have invisible breaks inserted into camel case word + boundaries to prevent overly long tables when displayed on GitHub. + The `break_words` attribute controls this behavior, it is off by + default. + """ + rc = ResultComparison(self.r1, self.r2) + self.assertEquals( + rc.values(break_words=True), + (u'Angry\u200bPhonebook', '12325', '11616', '-5.8%', '1.06x') + ) + self.assertEquals( + rc.values(), + ('AngryPhonebook', '12325', '11616', '-5.8%', '1.06x') + ) + # other way around + rc = ResultComparison(self.r2, self.r1) + self.assertEquals( + rc.values(), + ('AngryPhonebook', '11616', '12325', '+6.1%', '0.94x') + ) + + def test_values_is_dubious(self): + """Add ' (?)' to the speedup column as of indication dubious changes: + result's MIN falls inside the (MIN, MAX) interval of result they are + being compared with. + """ + self.r2.max = self.r1.min + 1 + # new.min < old.min < new.max + rc = ResultComparison(self.r1, self.r2) + self.assertEquals(rc.values()[4], '1.06x (?)') + # other way around: old.min < new.min < old.max + rc = ResultComparison(self.r2, self.r1) + self.assertEquals(rc.values()[4], '0.94x (?)') + + +if __name__ == '__main__': + unittest.main() diff --git a/validation-test/Python/benchmark-scripts.test-sh b/validation-test/Python/benchmark-scripts.test-sh new file mode 100644 index 0000000000000..3919f56bd921b --- /dev/null +++ b/validation-test/Python/benchmark-scripts.test-sh @@ -0,0 +1,2 @@ +// RUN: %{python} -m unittest discover -s %swift_src_root/benchmark/scripts/ +// REQUIRES: OS=macosx From ddb31f9a4a6d2b07a74021d4b95c94f45ec5fc06 Mon Sep 17 00:00:00 2001 From: Pavol Vaskovic Date: Wed, 31 May 2017 17:51:24 +0200 Subject: [PATCH 2/2] Moved documentation from tests to implementation. --- benchmark/scripts/compare_perf_tests.py | 65 ++++++++++++++++---- benchmark/scripts/test_compare_perf_tests.py | 48 --------------- 2 files changed, 54 insertions(+), 59 deletions(-) diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index c5073019b45d9..689de65fddc8c 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -23,7 +23,21 @@ class PerformanceTestResult(object): + """PerformanceTestResult holds results from executing individual benchmark + from Swift Benchmark Suite as reported by test driver (Benchmark_O, + Benchmark_Onone, Benchmark_Ounchecked or Benchmark_Driver). + + It depends on the log format emmited by the test driver in the form: + #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B) + + The last column, MAX_RSS, is emmited only for runs instrumented by the + Benchmark_Driver to measure rough memory use during the execution of the + benchmark. + """ def __init__(self, csv_row): + """PerformanceTestResult instance is created from an iterable with + length of 8 or 9. (Like a row provided by the CSV parser.) + """ # csv_row[0] is just an ordinal number of the test - skip that self.name = csv_row[1] # Name of the performance test self.samples = int(csv_row[2]) # Number of measurement samples taken @@ -39,22 +53,33 @@ def __init__(self, csv_row): int(csv_row[8]) if len(csv_row) > 8 else None) @property - def sd(self): # Standard Deviation (ms) + def sd(self): + """Standard Deviation (ms)""" return (0 if self.samples < 2 else sqrt(self.S_runtime / (self.samples - 1))) - # Compute running variance, B. P. Welford's method - # See Knuth TAOCP vol 2, 3rd edition, page 232, or - # https://www.johndcook.com/blog/standard_deviation/ - # M is mean, Standard Deviation is defined as sqrt(S/k-1) @staticmethod def running_mean_variance((k, M_, S_), x): + """ + Compute running variance, B. P. Welford's method + See Knuth TAOCP vol 2, 3rd edition, page 232, or + https://www.johndcook.com/blog/standard_deviation/ + M is mean, Standard Deviation is defined as sqrt(S/k-1) + """ k = float(k + 1) M = M_ + (x - M_) / k S = S_ + (x - M_) * (x - M) return (k, M, S) def merge(self, r): + """Merging test results recomputes min and max. + It attempts to recompute mean and standard deviation when all_samples + are available. There is no correct way to compute these values from + test results that are summaries from more than 3 samples. + + The use case here is comparing tests results parsed from concatenated + log files from multiple runs of benchmark driver. + """ self.min = min(self.min, r.min) self.max = max(self.max, r.max) # self.median = None # unclear what to do here @@ -65,14 +90,20 @@ def push(x): (self.samples, self.mean, self.S_runtime) = state # Merging test results with up to 3 samples is exact - values = [r.min, r.max, r.median, r.mean][:min(r.samples, 4)] + values = [r.min, r.max, r.median][:min(r.samples, 3)] map(push, values) + # Column labels for header row in results table header = ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS') - # Tuple of values formatted for display in results table: - # (name, min value, max value, mean value, max_rss) def values(self, break_words=False): + """Values property for display in results table comparisons + in format: ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS'). + Test name can have invisible breaks inserted into camel case word + boundaries to prevent overly long tables when displayed on GitHub. + The `break_words` attribute controls this behavior, it is off by + default. + """ return ( break_word_camel_case(self.name) if break_words else self.name, str(self.min), str(self.max), str(int(self.mean)), @@ -81,6 +112,9 @@ def values(self, break_words=False): class ResultComparison(object): + """ResultComparison compares MINs from new and old PerformanceTestResult. + It computes speedup ratio and improvement delta (%). + """ def __init__(self, old, new): self.old = old self.new = new @@ -94,16 +128,25 @@ def __init__(self, old, new): ratio = (new.min + 0.001) / (old.min + 0.001) self.delta = ((ratio - 1) * 100) - self.is_dubious = ( # FIXME this is legacy + # Add ' (?)' to the speedup column as indication of dubious changes: + # result's MIN falls inside the (MIN, MAX) interval of result they are + # being compared with. + self.is_dubious = ( ' (?)' if ((old.min < new.min and new.min < old.max) or (new.min < old.min and old.min < new.max)) else '') + # Column labels for header row in results table header = ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP') - # Tuple of values formatted for display in results table: - # (name, old value, new value, delta [%], speedup ratio) def values(self, break_words=False): + """Values property for display in results table comparisons + in format: ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP'). + Test name can have invisible breaks inserted into camel case word + boundaries to prevent overly long tables when displayed on GitHub. + The `break_words` attribute controls this behavior, it is off by + default. + """ return (break_word_camel_case(self.name) if break_words else self.name, str(self.old.min), str(self.new.min), '{0:+.1f}%'.format(self.delta), diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 9308e3ab2939b..4ca715cb56655 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -21,22 +21,8 @@ class TestPerformanceTestResult(unittest.TestCase): - """PerformanceTestResult holds results from executing individual benchmark - from Swift Benchmark Suite as reported by test driver (Benchmark_O, - Benchmark_Onone, Benchmark_Ounchecked or Benchmark_Driver). - - It depends on the log format emmited by the test driver in the form: - #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B) - - The last column, MAX_RSS, is emmited only for runs instrumented by the - Benchmark_Driver to measure rough memory use during the execution of the - benchmark. - """ def test_init(self): - """PerformanceTestResult instance is created from an iterable with - length of 8 or 9. (Like a row provided by the CSV parser.) - """ log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884' r = PerformanceTestResult(log_line.split(',')) self.assertEquals(r.name, 'AngryPhonebook') @@ -48,18 +34,10 @@ def test_init(self): self.assertEquals(r.max_rss, 10510336) def test_header(self): - """Provides column labels for header row in results table""" self.assertEquals(PerformanceTestResult.header, ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS')) def test_values(self): - """Values property for display in results table comparisons - in format: ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS'). - Test name can have invisible breaks inserted into camel case word - boundaries to prevent overly long tables when displayed on GitHub. - The `break_words` attribute controls this behavior, it is off by - default. - """ log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884' r = PerformanceTestResult(log_line.split(',')) self.assertEquals( @@ -77,14 +55,6 @@ def test_values(self): '12045', '12045', '12045', '10510336')) def test_merge(self): - """Merging test results recomputes min and max. - It attempts to recompute mean and standard deviation when all_samples - are available. There is no correct way to compute these values from - test results that are summaries from more than 3 samples. - - The use case here is comparing tests results parsed from concatenated - log files from multiple runs of benchmark driver. - """ tests = """1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336 1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336 1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144 @@ -112,9 +82,6 @@ def as_tuple(r): class TestBreakWordsCamelCase(unittest.TestCase): def test_break_words_camel_case(self): - """Inset zero-width space (\xe2\x80\x8b) into word boundaries of - CamelCase names. - """ b = break_word_camel_case self.assertEquals(b('Unchanged'), 'Unchanged') self.assertEquals(b('AngryPhonebook'), u'Angry\u200bPhonebook') @@ -131,9 +98,6 @@ def test_break_words_camel_case(self): class TestResultComparison(unittest.TestCase): - """ResultComparison compares MINs from new and old PerformanceTestResult. - It computes speedup ratio and improvement delta (%). - """ def setUp(self): self.r0 = PerformanceTestResult( '101,GlobalClass,20,0,0,0,0,0,10185728'.split(',')) @@ -169,18 +133,10 @@ def test_init(self): ) def test_header(self): - """Provides column labels for header row in results table""" self.assertEquals(ResultComparison.header, ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP')) def test_values(self): - """Values property for display in results table comparisons - in format: ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP'). - Test name can have invisible breaks inserted into camel case word - boundaries to prevent overly long tables when displayed on GitHub. - The `break_words` attribute controls this behavior, it is off by - default. - """ rc = ResultComparison(self.r1, self.r2) self.assertEquals( rc.values(break_words=True), @@ -198,10 +154,6 @@ def test_values(self): ) def test_values_is_dubious(self): - """Add ' (?)' to the speedup column as of indication dubious changes: - result's MIN falls inside the (MIN, MAX) interval of result they are - being compared with. - """ self.r2.max = self.r1.min + 1 # new.min < old.min < new.max rc = ResultComparison(self.r1, self.r2)