diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml index 8062ccdbcaea0..16b6529b11b02 100644 --- a/devops/actions/run-tests/benchmark/action.yml +++ b/devops/actions/run-tests/benchmark/action.yml @@ -157,12 +157,16 @@ runs: --preset "$PRESET" \ --timestamp-override "$SAVE_TIMESTAMP" \ --detect-version sycl,compute_runtime + echo "-----" python3 ./devops/scripts/benchmarks/compare.py to_hist \ + --avg-type EWMA \ + --cutoff "$(date -u -d '7 days ago' +'%Y%m%d_%H%M%S')" \ --name "$SAVE_NAME" \ --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \ --results-dir "./llvm-ci-perf-results/results/" \ - --regression-filter '^[a-z_]+_sycl ' + --regression-filter '^[a-z_]+_sycl ' \ + --verbose echo "-----" - name: Cache changes to benchmark folder for archival purposes diff --git a/devops/scripts/benchmarks/compare.py b/devops/scripts/benchmarks/compare.py index 31b31f05f368d..9b1a9213810bc 100644 --- a/devops/scripts/benchmarks/compare.py +++ b/devops/scripts/benchmarks/compare.py @@ -1,4 +1,4 @@ -from utils.aggregate import Aggregator, SimpleMedian +from utils.aggregate import Aggregator, SimpleMedian, EWMA from utils.validate import Validate from utils.result import Result, BenchmarkRun from options import options @@ -13,6 +13,8 @@ from dataclasses import dataclass, asdict +verbose = False + @dataclass class BenchmarkHistoricAverage: """Contains historic average information for 1 benchmark""" @@ -225,6 +227,11 @@ def perf_diff_entry() -> dict: elif halfway_round(delta, 2) < -options.regression_threshold: regression.append(perf_diff_entry()) + if verbose: + print( + f"{test.name}: expect {hist_avg[test.name].value}, got {test.value}" + ) + return improvement, regression def to_hist( @@ -255,8 +262,12 @@ def to_hist( from the average for this benchmark run. """ - if avg_type != "median": - print("Only median is currently supported: Refusing to continue.") + if avg_type == "median": + aggregator_type = SimpleMedian + elif avg_type == "EWMA": + aggregator_type = EWMA + else: + print("Error: Unsupported avg_type f{avg_type}.") exit(1) try: @@ -282,6 +293,7 @@ def to_hist( result_dir, compare_result.hostname, cutoff, + aggregator=aggregator_type, exclude=[Path(compare_file).stem], ) return Compare.to_hist_avg(hist_avg, compare_result) @@ -320,6 +332,11 @@ def to_hist( help="Timestamp (in YYYYMMDD_HHMMSS) of oldest result to include in historic average calculation", default="20000101_010101", ) + parser_avg.add_argument( + "--verbose", + action="store_true", + help="Increase output verbosity", + ) parser_avg.add_argument( "--regression-filter", type=str, @@ -329,15 +346,19 @@ def to_hist( args = parser.parse_args() + if args.verbose: + verbose = True + print("-- Compare.py --") + if args.operation == "to_hist": - if args.avg_type != "median": - print("Only median is currently supported: exiting.") - exit(1) if not Validate.timestamp(args.cutoff): raise ValueError("Timestamp must be provided as YYYYMMDD_HHMMSS.") + if args.avg_type not in ["median", "EWMA"]: + print("Only median, EWMA is currently supported: exiting.") + exit(1) improvements, regressions = Compare.to_hist( - "median", args.name, args.compare_file, args.results_dir, args.cutoff + args.avg_type, args.name, args.compare_file, args.results_dir, args.cutoff ) # Not all regressions are of concern: if a filter is provided, filter diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index 04a7e76be43e3..06f5568387d57 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -72,6 +72,7 @@ class Options: exit_on_failure: bool = False # Options intended for CI: + regression_threshold: float = 0.05 # It's necessary in CI to compare or redo benchmark runs. Instead of # generating a new timestamp each run by default, specify a single timestamp @@ -94,6 +95,17 @@ class Options: archive_baseline_days: int = 30 # Archive Baseline_* runs after 30 days archive_pr_days: int = 7 # Archive other (PR/dev) runs after 7 days + # EWMA Options: + + # The smoothing factor is alpha in the EWMA equation. Generally, a higher + # smoothing factor results in newer data having more weight, and a lower + # smoothing factor results in older data having more weight. + # + # Valid values for this smoothing factor ranges from (0, 1). Note that no + # value of smothing factor will result in older elements having more weight + # than newer elements. + EWMA_smoothing_factor: float = 0.15 + detect_versions: DetectVersionsOptions = field( default_factory=DetectVersionsOptions ) diff --git a/devops/scripts/benchmarks/tests/test_aggregate.py b/devops/scripts/benchmarks/tests/test_aggregate.py new file mode 100644 index 0000000000000..4b75f16fdbff4 --- /dev/null +++ b/devops/scripts/benchmarks/tests/test_aggregate.py @@ -0,0 +1,39 @@ +import sys +import os + +sys.path.append(f"{os.path.dirname(__file__)}/../") +from options import options +from utils.aggregate import * + + +def run_testcase(aggregator: Aggregator, src: list, expected: float) -> bool: + aggr = aggregator() + for n in src: + aggr.add(n) + res = aggr.get_avg() + if res != expected: + print(f"Failed: {aggregator}, {src} -- expected {expected}, got {res}") + return False + return True + + +def test_EWMA(): + options.EWMA_smoothing_factor = 0.5 + testcases = [ + ([], None), + ([100], 100), + ([100, 100, 100, 100, 100], 100), + ([100, 105, 103, 108, 107], 106.1875), + ] + successes = 0 + fails = 0 + for t in testcases: + if not run_testcase(EWMA, *t): + fails = fails + 1 + else: + successes = successes + 1 + print(f"EWMA test: {successes} successes, {fails} fails.") + + +if __name__ == "__main__": + test_EWMA() diff --git a/devops/scripts/benchmarks/utils/aggregate.py b/devops/scripts/benchmarks/utils/aggregate.py index 36ee7cbecaae6..8938320536593 100644 --- a/devops/scripts/benchmarks/utils/aggregate.py +++ b/devops/scripts/benchmarks/utils/aggregate.py @@ -1,6 +1,8 @@ import statistics from abc import ABC, abstractmethod +from options import options + class Aggregator(ABC): """ @@ -51,3 +53,30 @@ def add(self, n: float): def get_avg(self) -> float: return statistics.median(self.elements) + + +class EWMA(Aggregator): + """ + Exponentially weighted moving average based on all elements added to the + aggregator. + """ + + def __init__(self, starting_elements: list = []): + self.elements = starting_elements + + @staticmethod + def get_type() -> str: + return "EWMA" + + def add(self, n: float): + self.elements.append(n) + + def get_avg(self) -> float: + if len(self.elements) == 0: + return None # No elements collected, cannot provide an average + + alpha = options.EWMA_smoothing_factor + ewma_t = self.elements[0] + for x_t in self.elements[1:]: + ewma_t = alpha * x_t + (1 - alpha) * ewma_t + return ewma_t