intel · sarnex · Jul 8, 2025 · Jun 2, 2025 · Jun 5, 2025 · Jun 6, 2025
@@ -157,12 +157,16 @@ runs:
         --preset "$PRESET" \
         --timestamp-override "$SAVE_TIMESTAMP" \
         --detect-version sycl,compute_runtime
+
       echo "-----"
       python3 ./devops/scripts/benchmarks/compare.py to_hist \
+        --avg-type EWMA \
+        --cutoff "$(date -u -d '7 days ago' +'%Y%m%d_%H%M%S')" \
         --name "$SAVE_NAME" \
         --compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
         --results-dir "./llvm-ci-perf-results/results/" \
-        --regression-filter '^[a-z_]+_sycl '
+        --regression-filter '^[a-z_]+_sycl ' \
+        --verbose
       echo "-----"
 
   - name: Cache changes to benchmark folder for archival purposes

@@ -1,4 +1,4 @@
-from utils.aggregate import Aggregator, SimpleMedian
+from utils.aggregate import Aggregator, SimpleMedian, EWMA
 from utils.validate import Validate
 from utils.result import Result, BenchmarkRun
 from options import options
@@ -13,6 +13,8 @@
 from dataclasses import dataclass, asdict
 
 
+verbose = False
+
 @dataclass
 class BenchmarkHistoricAverage:
     """Contains historic average information for 1 benchmark"""
@@ -225,6 +227,11 @@ def perf_diff_entry() -> dict:
             elif halfway_round(delta, 2) < -options.regression_threshold:
                 regression.append(perf_diff_entry())
 
+            if verbose:
+                print(
+                    f"{test.name}: expect {hist_avg[test.name].value}, got {test.value}"
+                )
+
         return improvement, regression
 
     def to_hist(
@@ -255,8 +262,12 @@ def to_hist(
             from the average for this benchmark run.
         """
 
-        if avg_type != "median":
-            print("Only median is currently supported: Refusing to continue.")
+        if avg_type == "median":
+            aggregator_type = SimpleMedian
+        elif avg_type == "EWMA":
+            aggregator_type = EWMA
+        else:
+            print("Error: Unsupported avg_type f{avg_type}.")
             exit(1)
 
         try:
@@ -282,6 +293,7 @@ def to_hist(
             result_dir,
             compare_result.hostname,
             cutoff,
+            aggregator=aggregator_type,
             exclude=[Path(compare_file).stem],
         )
         return Compare.to_hist_avg(hist_avg, compare_result)
@@ -320,6 +332,11 @@ def to_hist(
         help="Timestamp (in YYYYMMDD_HHMMSS) of oldest result to include in historic average calculation",
         default="20000101_010101",
     )
+    parser_avg.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Increase output verbosity",
+    )
     parser_avg.add_argument(
         "--regression-filter",
         type=str,
@@ -329,15 +346,19 @@ def to_hist(
 
     args = parser.parse_args()
 
+    if args.verbose:
+        verbose = True
+        print("-- Compare.py --")
+
     if args.operation == "to_hist":
-        if args.avg_type != "median":
-            print("Only median is currently supported: exiting.")
-            exit(1)
         if not Validate.timestamp(args.cutoff):
             raise ValueError("Timestamp must be provided as YYYYMMDD_HHMMSS.")
+        if args.avg_type not in ["median", "EWMA"]:
+            print("Only median, EWMA is currently supported: exiting.")
+            exit(1)
 
         improvements, regressions = Compare.to_hist(
-            "median", args.name, args.compare_file, args.results_dir, args.cutoff
+            args.avg_type, args.name, args.compare_file, args.results_dir, args.cutoff
         )
 
         # Not all regressions are of concern: if a filter is provided, filter

@@ -72,6 +72,7 @@ class Options:
     exit_on_failure: bool = False
 
     # Options intended for CI:
+
     regression_threshold: float = 0.05
     # It's necessary in CI to compare or redo benchmark runs. Instead of
     # generating a new timestamp each run by default, specify a single timestamp
@@ -94,6 +95,17 @@ class Options:
     archive_baseline_days: int = 30  # Archive Baseline_* runs after 30 days
     archive_pr_days: int = 7  # Archive other (PR/dev) runs after 7 days
 
+    # EWMA Options:
+
+    # The smoothing factor is alpha in the EWMA equation. Generally, a higher
+    # smoothing factor results in newer data having more weight, and a lower
+    # smoothing factor results in older data having more weight.
+    #
+    # Valid values for this smoothing factor ranges from (0, 1). Note that no
+    # value of smothing factor will result in older elements having more weight
+    # than newer elements.
+    EWMA_smoothing_factor: float = 0.15
+
     detect_versions: DetectVersionsOptions = field(
         default_factory=DetectVersionsOptions
     )

@@ -0,0 +1,39 @@
+import sys
+import os
+
+sys.path.append(f"{os.path.dirname(__file__)}/../")
+from options import options
+from utils.aggregate import *
+
+
+def run_testcase(aggregator: Aggregator, src: list, expected: float) -> bool:
+    aggr = aggregator()
+    for n in src:
+        aggr.add(n)
+    res = aggr.get_avg()
+    if res != expected:
+        print(f"Failed: {aggregator}, {src} -- expected {expected}, got {res}")
+        return False
+    return True
+
+
+def test_EWMA():
+    options.EWMA_smoothing_factor = 0.5
+    testcases = [
+        ([], None),
+        ([100], 100),
+        ([100, 100, 100, 100, 100], 100),
+        ([100, 105, 103, 108, 107], 106.1875),
+    ]
+    successes = 0
+    fails = 0
+    for t in testcases:
+        if not run_testcase(EWMA, *t):
+            fails = fails + 1
+        else:
+            successes = successes + 1
+    print(f"EWMA test: {successes} successes, {fails} fails.")
+
+
+if __name__ == "__main__":
+    test_EWMA()
@@ -1,6 +1,8 @@
 import statistics
 from abc import ABC, abstractmethod
 
+from options import options
+
 
 class Aggregator(ABC):
     """
@@ -51,3 +53,30 @@ def add(self, n: float):
 
     def get_avg(self) -> float:
         return statistics.median(self.elements)
+
+
+class EWMA(Aggregator):
+    """
+    Exponentially weighted moving average based on all elements added to the
+    aggregator.
+    """
+
+    def __init__(self, starting_elements: list = []):
+        self.elements = starting_elements
+
+    @staticmethod
+    def get_type() -> str:
+        return "EWMA"
+
+    def add(self, n: float):
+        self.elements.append(n)
+
+    def get_avg(self) -> float:
+        if len(self.elements) == 0:
+            return None  # No elements collected, cannot provide an average
+
+        alpha = options.EWMA_smoothing_factor
+        ewma_t = self.elements[0]
+        for x_t in self.elements[1:]:
+            ewma_t = alpha * x_t + (1 - alpha) * ewma_t
+        return ewma_t