diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py
index 0295514d3cc24..843ee8b63dff1 100644
--- a/devops/scripts/benchmarks/history.py
+++ b/devops/scripts/benchmarks/history.py
@@ -9,7 +9,7 @@
import socket
from utils.result import Result, BenchmarkRun
from options import Compare, options
-from datetime import datetime, timezone
+from datetime import datetime, timezone, timedelta
from utils.utils import run
from utils.validate import Validate
@@ -223,3 +223,27 @@ def get_compare(self, name: str) -> BenchmarkRun:
return self.compute_average(data)
raise Exception("invalid compare type")
+
+ def partition_runs_by_age(self) -> tuple[list[BenchmarkRun], list[BenchmarkRun]]:
+ """
+ Partition runs into current and archived based on their age.
+ Returns:
+ tuple: (current_runs, archived_runs)
+ """
+ current_runs = []
+ archived_runs = []
+
+ for run in self.runs:
+ archive_after = (
+ options.archive_baseline_days
+ if run.name.startswith("Baseline_")
+ else options.archive_pr_days
+ )
+ cutoff_date = datetime.now(timezone.utc) - timedelta(days=archive_after)
+
+ if run.date > cutoff_date:
+ current_runs.append(run)
+ else:
+ archived_runs.append(run)
+
+ return current_runs, archived_runs
diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html
index 18639c10a1f6a..431946792a8a1 100644
--- a/devops/scripts/benchmarks/html/index.html
+++ b/devops/scripts/benchmarks/html/index.html
@@ -53,6 +53,10 @@
Display Options
Adjust Y-axis for comparisons
+
diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js
index 12ff80a1d02e3..8d3f93fa0ecf3 100644
--- a/devops/scripts/benchmarks/html/scripts.js
+++ b/devops/scripts/benchmarks/html/scripts.js
@@ -7,13 +7,15 @@
let activeRuns = new Set(defaultCompareNames);
let chartInstances = new Map();
let suiteNames = new Set();
-let timeseriesData, barChartsData, allRunNames;
let activeTags = new Set();
+let timeseriesData, barChartsData, allRunNames;
let layerComparisonsData;
let latestRunsLookup = new Map();
let pendingCharts = new Map(); // Store chart data for lazy loading
let chartObserver; // Intersection observer for lazy loading charts
let annotationsOptions = new Map(); // Global options map for annotations
+let archivedDataLoaded = false;
+let loadedBenchmarkRuns = []; // Loaded results from the js/json files
// DOM Elements
let runSelect, selectedRunsDiv, suiteFiltersContainer, tagFiltersContainer;
@@ -21,7 +23,7 @@ let runSelect, selectedRunsDiv, suiteFiltersContainer, tagFiltersContainer;
// Observer for lazy loading charts
function initChartObserver() {
if (chartObserver) return;
-
+
chartObserver = new IntersectionObserver((entries) => {
entries.forEach(entry => {
if (entry.isIntersecting) {
@@ -196,7 +198,7 @@ function createChart(data, containerId, type) {
maxTicksLimit: 10
}
};
-
+
// Add dependencies version change annotations
if (Object.keys(data.runs).length > 0) {
ChartAnnotations.addVersionChangeAnnotations(data, options);
@@ -210,8 +212,8 @@ function createChart(data, containerId, type) {
...runData,
// For timeseries (historical results charts) use runName,
// otherwise use displayLabel (for layer comparison charts)
- label: containerId.startsWith('timeseries') ?
- runData.runName :
+ label: containerId.startsWith('timeseries') ?
+ runData.runName :
(runData.displayLabel || runData.label)
}))
} : {
@@ -223,12 +225,12 @@ function createChart(data, containerId, type) {
const chart = new Chart(ctx, chartConfig);
chartInstances.set(containerId, chart);
-
+
// Add annotation interaction handlers for time-series charts
if (type === 'time') {
ChartAnnotations.setupAnnotationListeners(chart, ctx, options);
}
-
+
return chart;
}
@@ -263,7 +265,7 @@ function drawCharts(filteredTimeseriesData, filteredBarChartsData, filteredLayer
chartInstances.forEach(chart => chart.destroy());
chartInstances.clear();
pendingCharts.clear();
-
+
initChartObserver(); // For lazy loading charts
// Create timeseries charts
@@ -394,7 +396,7 @@ function metadataForLabel(label, type) {
if (benchmarkMetadata[label]?.type === type) {
return benchmarkMetadata[label];
}
-
+
// Then fall back to prefix match for backward compatibility
for (const [key, metadata] of Object.entries(benchmarkMetadata)) {
if (metadata.type === type && label.startsWith(key)) {
@@ -405,10 +407,10 @@ function metadataForLabel(label, type) {
}
// Pre-compute a lookup for the latest run per label
-function createLatestRunsLookup(benchmarkRuns) {
+function createLatestRunsLookup() {
const latestRunsMap = new Map();
- benchmarkRuns.forEach(run => {
+ loadedBenchmarkRuns.forEach(run => {
const runDate = run.date;
run.results.forEach(result => {
const label = result.label;
@@ -578,6 +580,12 @@ function updateURL() {
url.searchParams.set('customRange', 'true');
}
+ if (!isArchivedDataEnabled()) {
+ url.searchParams.delete('archived');
+ } else {
+ url.searchParams.set('archived', 'true');
+ }
+
history.replaceState(null, '', url);
}
@@ -615,10 +623,10 @@ function getActiveSuites() {
}
// Data processing
-function processTimeseriesData(benchmarkRuns) {
+function processTimeseriesData() {
const resultsByLabel = {};
- benchmarkRuns.forEach(run => {
+ loadedBenchmarkRuns.forEach(run => {
run.results.forEach(result => {
const metadata = metadataForLabel(result.label, 'benchmark');
@@ -641,10 +649,10 @@ function processTimeseriesData(benchmarkRuns) {
return Object.values(resultsByLabel);
}
-function processBarChartsData(benchmarkRuns) {
+function processBarChartsData() {
const groupedResults = {};
- benchmarkRuns.forEach(run => {
+ loadedBenchmarkRuns.forEach(run => {
run.results.forEach(result => {
const resultMetadata = metadataForLabel(result.label, 'benchmark');
const explicitGroup = resultMetadata?.explicit_group || result?.explicit_group;
@@ -718,11 +726,11 @@ function getLayerTags(metadata) {
return layerTags;
}
-function processLayerComparisonsData(benchmarkRuns) {
+function processLayerComparisonsData() {
const groupedResults = {};
const labelsByGroup = {};
- benchmarkRuns.forEach(run => {
+ loadedBenchmarkRuns.forEach(run => {
run.results.forEach(result => {
const resultMetadata = metadataForLabel(result.label, 'benchmark');
const explicitGroup = resultMetadata?.explicit_group || result.explicit_group;
@@ -735,7 +743,7 @@ function processLayerComparisonsData(benchmarkRuns) {
});
});
- benchmarkRuns.forEach(run => {
+ loadedBenchmarkRuns.forEach(run => {
run.results.forEach(result => {
// Get explicit_group from metadata
const resultMetadata = metadataForLabel(result.label, 'benchmark');
@@ -835,6 +843,9 @@ function setupRunSelector() {
runSelect = document.getElementById('run-select');
selectedRunsDiv = document.getElementById('selected-runs');
+ // Clear existing options first to prevent duplicates when reloading with archived data
+ runSelect.innerHTML = '';
+
allRunNames.forEach(name => {
const option = document.createElement('option');
option.value = name;
@@ -848,7 +859,10 @@ function setupRunSelector() {
function setupSuiteFilters() {
suiteFiltersContainer = document.getElementById('suite-filters');
- benchmarkRuns.forEach(run => {
+ // Clear existing suite filters before adding new ones
+ suiteFiltersContainer.innerHTML = '';
+
+ loadedBenchmarkRuns.forEach(run => {
run.results.forEach(result => {
suiteNames.add(result.suite);
});
@@ -883,10 +897,16 @@ function isCustomRangesEnabled() {
return rangesToggle.checked;
}
+function isArchivedDataEnabled() {
+ const archivedDataToggle = document.getElementById('show-archived-data');
+ return archivedDataToggle.checked;
+}
+
function setupToggles() {
const notesToggle = document.getElementById('show-notes');
const unstableToggle = document.getElementById('show-unstable');
const customRangeToggle = document.getElementById('custom-range');
+ const archivedDataToggle = document.getElementById('show-archived-data');
notesToggle.addEventListener('change', function () {
// Update all note elements visibility
@@ -909,9 +929,25 @@ function setupToggles() {
updateCharts();
});
+ // Add event listener for archived data toggle
+ if (archivedDataToggle) {
+ archivedDataToggle.addEventListener('change', function() {
+ if (archivedDataToggle.checked) {
+ loadArchivedData();
+ } else {
+ if (archivedDataLoaded) {
+ // Reload the page to reset
+ location.reload();
+ }
+ }
+ updateURL();
+ });
+ }
+
// Initialize from URL params if present
const notesParam = getQueryParam('notes');
const unstableParam = getQueryParam('unstable');
+ const archivedParam = getQueryParam('archived');
if (notesParam !== null) {
let showNotes = notesParam === 'true';
@@ -927,11 +963,22 @@ function setupToggles() {
if (customRangesParam !== null) {
customRangeToggle.checked = customRangesParam === 'true';
}
+
+ if (archivedDataToggle && archivedParam !== null) {
+ archivedDataToggle.checked = archivedParam === 'true';
+
+ if (archivedDataToggle.checked) {
+ loadArchivedData();
+ }
+ }
}
function setupTagFilters() {
tagFiltersContainer = document.getElementById('tag-filters');
+ // Clear existing tag filters before adding new ones
+ tagFiltersContainer.innerHTML = '';
+
const allTags = [];
if (benchmarkTags) {
@@ -1000,14 +1047,14 @@ function toggleAllTags(select) {
function initializeCharts() {
// Process raw data
- timeseriesData = processTimeseriesData(benchmarkRuns);
- barChartsData = processBarChartsData(benchmarkRuns);
- layerComparisonsData = processLayerComparisonsData(benchmarkRuns);
- allRunNames = [...new Set(benchmarkRuns.map(run => run.name))];
- latestRunsLookup = createLatestRunsLookup(benchmarkRuns);
+ timeseriesData = processTimeseriesData();
+ barChartsData = processBarChartsData();
+ layerComparisonsData = processLayerComparisonsData();
+ allRunNames = [...new Set(loadedBenchmarkRuns.map(run => run.name))];
+ latestRunsLookup = createLatestRunsLookup();
// Create global options map for annotations
- annotationsOptions = createAnnotationsOptions(benchmarkRuns);
+ annotationsOptions = createAnnotationsOptions();
// Make it available to the ChartAnnotations module
window.annotationsOptions = annotationsOptions;
@@ -1087,6 +1134,42 @@ window.addSelectedRun = addSelectedRun;
window.removeRun = removeRun;
window.toggleAllTags = toggleAllTags;
+// Helper function to fetch and process benchmark data
+function fetchAndProcessData(url, isArchived = false) {
+ const loadingIndicator = document.getElementById('loading-indicator');
+
+ return fetch(url)
+ .then(response => {
+ if (!response.ok) { throw new Error(`Got response status ${response.status}.`) }
+ return response.json();
+ })
+ .then(data => {
+ const newRuns = data.runs || data;
+
+ if (isArchived) {
+ // Merge with existing data for archived data
+ loadedBenchmarkRuns = loadedBenchmarkRuns.concat(newRuns);
+ archivedDataLoaded = true;
+ } else {
+ // Replace existing data for current data
+ loadedBenchmarkRuns = newRuns;
+ }
+ // The following variables have same values regardless of whether
+ // we load archived or current data
+ benchmarkMetadata = data.metadata || benchmarkMetadata || {};
+ benchmarkTags = data.tags || benchmarkTags || {};
+
+ initializeCharts();
+ })
+ .catch(error => {
+ console.error(`Error fetching ${isArchived ? 'archived' : 'remote'} data:`, error);
+ loadingIndicator.textContent = 'Fetching remote data failed.';
+ })
+ .finally(() => {
+ loadingIndicator.style.display = 'none';
+ });
+}
+
// Load data based on configuration
function loadData() {
const loadingIndicator = document.getElementById('loading-indicator');
@@ -1094,41 +1177,64 @@ function loadData() {
if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') {
// Fetch data from remote URL
- fetch(remoteDataUrl)
- .then(response => {
- if (!response.ok) { throw new Error(`Got response status ${response.status}.`) }
- return response.json();
- })
- .then(data => {
- benchmarkRuns = data.runs || data;
- benchmarkMetadata = data.metadata || benchmarkMetadata || {};
- benchmarkTags = data.tags || benchmarkTags || {};
- initializeCharts();
- })
- .catch(error => {
- console.error('Error fetching remote data:', error);
- loadingIndicator.textContent = 'Fetching remote data failed.';
- })
- .finally(() => {
- loadingIndicator.style.display = 'none'; // Hide loading indicator
- });
+ const url = remoteDataUrl.endsWith('/') ? remoteDataUrl + 'data.json' : remoteDataUrl + '/data.json';
+ fetchAndProcessData(url);
} else {
- // Use local data (benchmarkRuns and benchmarkMetadata should be defined in data.js)
+ // Use local data
+ loadedBenchmarkRuns = benchmarkRuns;
initializeCharts();
loadingIndicator.style.display = 'none'; // Hide loading indicator
}
}
+// Function to load archived data and merge with current data
+// Archived data consists of older benchmark results that have been separated from
+// the primary dataset but are still available for historical analysis.
+function loadArchivedData() {
+ const loadingIndicator = document.getElementById('loading-indicator');
+ loadingIndicator.style.display = 'block';
+
+ if (archivedDataLoaded) {
+ updateCharts();
+ loadingIndicator.style.display = 'none';
+ return;
+ }
+
+ if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') {
+ // Fetch data from remote URL
+ const url = remoteDataUrl.endsWith('/') ? remoteDataUrl + 'data_archive.json' : remoteDataUrl + '/data_archive.json';
+ fetchAndProcessData(url, true);
+ } else {
+ // For local data use a static js file
+ const script = document.createElement('script');
+ script.src = 'data_archive.js';
+ script.onload = () => {
+ // Merge the archived runs with current runs
+ loadedBenchmarkRuns = loadedBenchmarkRuns.concat(benchmarkRuns);
+ archivedDataLoaded = true;
+ initializeCharts();
+ loadingIndicator.style.display = 'none';
+ };
+
+ script.onerror = () => {
+ console.error('Failed to load data_archive.js');
+ loadingIndicator.style.display = 'none';
+ };
+
+ document.head.appendChild(script);
+ }
+}
+
// Initialize when DOM is ready
document.addEventListener('DOMContentLoaded', () => {
loadData();
});
// Process all benchmark runs to create a global options map for annotations
-function createAnnotationsOptions(benchmarkRuns) {
+function createAnnotationsOptions() {
const repoMap = new Map();
- benchmarkRuns.forEach(run => {
+ loadedBenchmarkRuns.forEach(run => {
run.results.forEach(result => {
if (result.git_url && !repoMap.has(result.git_url)) {
const suiteName = result.suite;
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 081c76f5d7a0f..db4551b1baad7 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -316,7 +316,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
if options.output_directory is None:
html_path = os.path.join(os.path.dirname(__file__), "html")
- generate_html(history.runs, compare_names, html_path, metadata)
+ generate_html(history, compare_names, html_path, metadata)
def validate_and_parse_env_args(env_args):
@@ -558,6 +558,22 @@ def validate_and_parse_env_args(env_args):
help="Location of detect_version.cpp used to query e.g. DPC++, L0",
default=None,
)
+ parser.add_argument(
+ "--archive-baseline-after",
+ type=int,
+ help="Archive baseline results (runs starting with 'Baseline_') older than this many days. "
+ "Archived results are stored separately and can be viewed in the HTML UI by enabling "
+ "'Include archived runs'. This helps manage the size of the primary dataset.",
+ default=options.archive_baseline_days,
+ )
+ parser.add_argument(
+ "--archive-pr-after",
+ type=int,
+ help="Archive PR and other non-baseline results older than this many days. "
+ "Archived results are stored separately and can be viewed in the HTML UI by enabling "
+ "'Include archived runs'. PR runs typically have a shorter retention period than baselines.",
+ default=options.archive_pr_days,
+ )
args = parser.parse_args()
additional_env_vars = validate_and_parse_env_args(args.env)
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index a7b65e752d450..4a263f5d9672c 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -2,8 +2,6 @@
from enum import Enum
import multiprocessing
-from presets import presets
-
class Compare(Enum):
LATEST = "latest"
@@ -41,6 +39,7 @@ class DetectVersionsOptions:
# Max amount of api calls permitted on each run of the benchmark scripts
max_api_calls = 4
+
@dataclass
class Options:
workdir: str = None
@@ -88,6 +87,11 @@ class Options:
# CI scripts vs SYCl build source.
github_repo_override: str = None
git_commit_override: str = None
+ # Archiving settings
+ # Archived runs are stored separately from the main dataset but are still accessible
+ # via the HTML UI when "Include archived runs" is enabled
+ archive_baseline_days: int = 30 # Archive Baseline_* runs after 30 days
+ archive_pr_days: int = 7 # Archive other (PR/dev) runs after 7 days
detect_versions: DetectVersionsOptions = field(
default_factory=DetectVersionsOptions
diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py
index 54e17043631e4..d527bdd7576b7 100644
--- a/devops/scripts/benchmarks/output_html.py
+++ b/devops/scripts/benchmarks/output_html.py
@@ -5,55 +5,82 @@
import json
import os
+
from options import options
from utils.result import BenchmarkMetadata, BenchmarkOutput
-from benches.base import benchmark_tags, benchmark_tags_dict
+from history import BenchmarkHistory
+from benches.base import benchmark_tags_dict
-def generate_html(
- benchmark_runs: list,
- compare_names: list[str],
- html_path: str,
- metadata: dict[str, BenchmarkMetadata],
-):
- benchmark_runs.sort(key=lambda run: run.date, reverse=True)
- # Sorted in reverse, such that runs are ordered from newest to oldest
-
- # Create the comprehensive output object
- output = BenchmarkOutput(
- runs=benchmark_runs,
- metadata=metadata,
- tags=benchmark_tags_dict,
- default_compare_names=compare_names,
- )
+def _write_output_to_file(
+ output: BenchmarkOutput, html_path: str, archive: bool = False
+) -> None:
+ """
+ Helper function to write the BenchmarkOutput to a file in JSON format.
+ """
+ # Define variable configuration based on whether we're archiving or not
+ filename = "data_archive" if archive else "data"
if options.output_html == "local":
- data_path = os.path.join(html_path, "data.js")
+ data_path = os.path.join(html_path, f"{filename}.js")
with open(data_path, "w") as f:
# For local format, we need to write JavaScript variable assignments
f.write("benchmarkRuns = ")
json.dump(json.loads(output.to_json())["runs"], f, indent=2)
f.write(";\n\n")
- f.write("benchmarkMetadata = ")
+ f.write(f"benchmarkMetadata = ")
json.dump(json.loads(output.to_json())["metadata"], f, indent=2)
f.write(";\n\n")
- f.write("benchmarkTags = ")
+ f.write(f"benchmarkTags = ")
json.dump(json.loads(output.to_json())["tags"], f, indent=2)
f.write(";\n\n")
- f.write("defaultCompareNames = ")
+ f.write(f"defaultCompareNames = ")
json.dump(output.default_compare_names, f, indent=2)
f.write(";\n")
- print(f"See {os.getcwd()}/html/index.html for the results.")
+ if not archive:
+ print(f"See {os.getcwd()}/html/index.html for the results.")
else:
# For remote format, we write a single JSON file
- data_path = os.path.join(html_path, "data.json")
+ data_path = os.path.join(html_path, f"{filename}.json")
with open(data_path, "w") as f:
json.dump(json.loads(output.to_json()), f, indent=2)
-
print(
f"Upload {data_path} to a location set in config.js remoteDataUrl argument."
)
+
+
+def generate_html(
+ history: BenchmarkHistory,
+ compare_names: list[str],
+ html_path: str,
+ metadata: dict[str, BenchmarkMetadata],
+):
+ """Generate HTML output for benchmark results."""
+ current_runs, archived_runs = history.partition_runs_by_age()
+
+ # Sorted in reverse, such that runs are ordered from newest to oldest
+ current_runs.sort(key=lambda run: run.date, reverse=True)
+
+ # Create the comprehensive output object
+ output = BenchmarkOutput(
+ runs=current_runs,
+ metadata=metadata,
+ tags=benchmark_tags_dict,
+ default_compare_names=compare_names,
+ )
+ _write_output_to_file(output, html_path)
+
+ # Generate a separate file for archived runs if any
+ if archived_runs:
+ archived_runs.sort(key=lambda run: run.date, reverse=True)
+ archived_output = BenchmarkOutput(
+ runs=archived_runs,
+ metadata=metadata,
+ tags=benchmark_tags_dict,
+ default_compare_names=compare_names,
+ )
+ _write_output_to_file(archived_output, html_path, archive=True)