diff --git a/devops/scripts/benchmarks/history.py b/devops/scripts/benchmarks/history.py index 0295514d3cc24..843ee8b63dff1 100644 --- a/devops/scripts/benchmarks/history.py +++ b/devops/scripts/benchmarks/history.py @@ -9,7 +9,7 @@ import socket from utils.result import Result, BenchmarkRun from options import Compare, options -from datetime import datetime, timezone +from datetime import datetime, timezone, timedelta from utils.utils import run from utils.validate import Validate @@ -223,3 +223,27 @@ def get_compare(self, name: str) -> BenchmarkRun: return self.compute_average(data) raise Exception("invalid compare type") + + def partition_runs_by_age(self) -> tuple[list[BenchmarkRun], list[BenchmarkRun]]: + """ + Partition runs into current and archived based on their age. + Returns: + tuple: (current_runs, archived_runs) + """ + current_runs = [] + archived_runs = [] + + for run in self.runs: + archive_after = ( + options.archive_baseline_days + if run.name.startswith("Baseline_") + else options.archive_pr_days + ) + cutoff_date = datetime.now(timezone.utc) - timedelta(days=archive_after) + + if run.date > cutoff_date: + current_runs.append(run) + else: + archived_runs.append(run) + + return current_runs, archived_runs diff --git a/devops/scripts/benchmarks/html/index.html b/devops/scripts/benchmarks/html/index.html index 18639c10a1f6a..431946792a8a1 100644 --- a/devops/scripts/benchmarks/html/index.html +++ b/devops/scripts/benchmarks/html/index.html @@ -53,6 +53,10 @@

Display Options

Adjust Y-axis for comparisons + diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js index 12ff80a1d02e3..8d3f93fa0ecf3 100644 --- a/devops/scripts/benchmarks/html/scripts.js +++ b/devops/scripts/benchmarks/html/scripts.js @@ -7,13 +7,15 @@ let activeRuns = new Set(defaultCompareNames); let chartInstances = new Map(); let suiteNames = new Set(); -let timeseriesData, barChartsData, allRunNames; let activeTags = new Set(); +let timeseriesData, barChartsData, allRunNames; let layerComparisonsData; let latestRunsLookup = new Map(); let pendingCharts = new Map(); // Store chart data for lazy loading let chartObserver; // Intersection observer for lazy loading charts let annotationsOptions = new Map(); // Global options map for annotations +let archivedDataLoaded = false; +let loadedBenchmarkRuns = []; // Loaded results from the js/json files // DOM Elements let runSelect, selectedRunsDiv, suiteFiltersContainer, tagFiltersContainer; @@ -21,7 +23,7 @@ let runSelect, selectedRunsDiv, suiteFiltersContainer, tagFiltersContainer; // Observer for lazy loading charts function initChartObserver() { if (chartObserver) return; - + chartObserver = new IntersectionObserver((entries) => { entries.forEach(entry => { if (entry.isIntersecting) { @@ -196,7 +198,7 @@ function createChart(data, containerId, type) { maxTicksLimit: 10 } }; - + // Add dependencies version change annotations if (Object.keys(data.runs).length > 0) { ChartAnnotations.addVersionChangeAnnotations(data, options); @@ -210,8 +212,8 @@ function createChart(data, containerId, type) { ...runData, // For timeseries (historical results charts) use runName, // otherwise use displayLabel (for layer comparison charts) - label: containerId.startsWith('timeseries') ? - runData.runName : + label: containerId.startsWith('timeseries') ? + runData.runName : (runData.displayLabel || runData.label) })) } : { @@ -223,12 +225,12 @@ function createChart(data, containerId, type) { const chart = new Chart(ctx, chartConfig); chartInstances.set(containerId, chart); - + // Add annotation interaction handlers for time-series charts if (type === 'time') { ChartAnnotations.setupAnnotationListeners(chart, ctx, options); } - + return chart; } @@ -263,7 +265,7 @@ function drawCharts(filteredTimeseriesData, filteredBarChartsData, filteredLayer chartInstances.forEach(chart => chart.destroy()); chartInstances.clear(); pendingCharts.clear(); - + initChartObserver(); // For lazy loading charts // Create timeseries charts @@ -394,7 +396,7 @@ function metadataForLabel(label, type) { if (benchmarkMetadata[label]?.type === type) { return benchmarkMetadata[label]; } - + // Then fall back to prefix match for backward compatibility for (const [key, metadata] of Object.entries(benchmarkMetadata)) { if (metadata.type === type && label.startsWith(key)) { @@ -405,10 +407,10 @@ function metadataForLabel(label, type) { } // Pre-compute a lookup for the latest run per label -function createLatestRunsLookup(benchmarkRuns) { +function createLatestRunsLookup() { const latestRunsMap = new Map(); - benchmarkRuns.forEach(run => { + loadedBenchmarkRuns.forEach(run => { const runDate = run.date; run.results.forEach(result => { const label = result.label; @@ -578,6 +580,12 @@ function updateURL() { url.searchParams.set('customRange', 'true'); } + if (!isArchivedDataEnabled()) { + url.searchParams.delete('archived'); + } else { + url.searchParams.set('archived', 'true'); + } + history.replaceState(null, '', url); } @@ -615,10 +623,10 @@ function getActiveSuites() { } // Data processing -function processTimeseriesData(benchmarkRuns) { +function processTimeseriesData() { const resultsByLabel = {}; - benchmarkRuns.forEach(run => { + loadedBenchmarkRuns.forEach(run => { run.results.forEach(result => { const metadata = metadataForLabel(result.label, 'benchmark'); @@ -641,10 +649,10 @@ function processTimeseriesData(benchmarkRuns) { return Object.values(resultsByLabel); } -function processBarChartsData(benchmarkRuns) { +function processBarChartsData() { const groupedResults = {}; - benchmarkRuns.forEach(run => { + loadedBenchmarkRuns.forEach(run => { run.results.forEach(result => { const resultMetadata = metadataForLabel(result.label, 'benchmark'); const explicitGroup = resultMetadata?.explicit_group || result?.explicit_group; @@ -718,11 +726,11 @@ function getLayerTags(metadata) { return layerTags; } -function processLayerComparisonsData(benchmarkRuns) { +function processLayerComparisonsData() { const groupedResults = {}; const labelsByGroup = {}; - benchmarkRuns.forEach(run => { + loadedBenchmarkRuns.forEach(run => { run.results.forEach(result => { const resultMetadata = metadataForLabel(result.label, 'benchmark'); const explicitGroup = resultMetadata?.explicit_group || result.explicit_group; @@ -735,7 +743,7 @@ function processLayerComparisonsData(benchmarkRuns) { }); }); - benchmarkRuns.forEach(run => { + loadedBenchmarkRuns.forEach(run => { run.results.forEach(result => { // Get explicit_group from metadata const resultMetadata = metadataForLabel(result.label, 'benchmark'); @@ -835,6 +843,9 @@ function setupRunSelector() { runSelect = document.getElementById('run-select'); selectedRunsDiv = document.getElementById('selected-runs'); + // Clear existing options first to prevent duplicates when reloading with archived data + runSelect.innerHTML = ''; + allRunNames.forEach(name => { const option = document.createElement('option'); option.value = name; @@ -848,7 +859,10 @@ function setupRunSelector() { function setupSuiteFilters() { suiteFiltersContainer = document.getElementById('suite-filters'); - benchmarkRuns.forEach(run => { + // Clear existing suite filters before adding new ones + suiteFiltersContainer.innerHTML = ''; + + loadedBenchmarkRuns.forEach(run => { run.results.forEach(result => { suiteNames.add(result.suite); }); @@ -883,10 +897,16 @@ function isCustomRangesEnabled() { return rangesToggle.checked; } +function isArchivedDataEnabled() { + const archivedDataToggle = document.getElementById('show-archived-data'); + return archivedDataToggle.checked; +} + function setupToggles() { const notesToggle = document.getElementById('show-notes'); const unstableToggle = document.getElementById('show-unstable'); const customRangeToggle = document.getElementById('custom-range'); + const archivedDataToggle = document.getElementById('show-archived-data'); notesToggle.addEventListener('change', function () { // Update all note elements visibility @@ -909,9 +929,25 @@ function setupToggles() { updateCharts(); }); + // Add event listener for archived data toggle + if (archivedDataToggle) { + archivedDataToggle.addEventListener('change', function() { + if (archivedDataToggle.checked) { + loadArchivedData(); + } else { + if (archivedDataLoaded) { + // Reload the page to reset + location.reload(); + } + } + updateURL(); + }); + } + // Initialize from URL params if present const notesParam = getQueryParam('notes'); const unstableParam = getQueryParam('unstable'); + const archivedParam = getQueryParam('archived'); if (notesParam !== null) { let showNotes = notesParam === 'true'; @@ -927,11 +963,22 @@ function setupToggles() { if (customRangesParam !== null) { customRangeToggle.checked = customRangesParam === 'true'; } + + if (archivedDataToggle && archivedParam !== null) { + archivedDataToggle.checked = archivedParam === 'true'; + + if (archivedDataToggle.checked) { + loadArchivedData(); + } + } } function setupTagFilters() { tagFiltersContainer = document.getElementById('tag-filters'); + // Clear existing tag filters before adding new ones + tagFiltersContainer.innerHTML = ''; + const allTags = []; if (benchmarkTags) { @@ -1000,14 +1047,14 @@ function toggleAllTags(select) { function initializeCharts() { // Process raw data - timeseriesData = processTimeseriesData(benchmarkRuns); - barChartsData = processBarChartsData(benchmarkRuns); - layerComparisonsData = processLayerComparisonsData(benchmarkRuns); - allRunNames = [...new Set(benchmarkRuns.map(run => run.name))]; - latestRunsLookup = createLatestRunsLookup(benchmarkRuns); + timeseriesData = processTimeseriesData(); + barChartsData = processBarChartsData(); + layerComparisonsData = processLayerComparisonsData(); + allRunNames = [...new Set(loadedBenchmarkRuns.map(run => run.name))]; + latestRunsLookup = createLatestRunsLookup(); // Create global options map for annotations - annotationsOptions = createAnnotationsOptions(benchmarkRuns); + annotationsOptions = createAnnotationsOptions(); // Make it available to the ChartAnnotations module window.annotationsOptions = annotationsOptions; @@ -1087,6 +1134,42 @@ window.addSelectedRun = addSelectedRun; window.removeRun = removeRun; window.toggleAllTags = toggleAllTags; +// Helper function to fetch and process benchmark data +function fetchAndProcessData(url, isArchived = false) { + const loadingIndicator = document.getElementById('loading-indicator'); + + return fetch(url) + .then(response => { + if (!response.ok) { throw new Error(`Got response status ${response.status}.`) } + return response.json(); + }) + .then(data => { + const newRuns = data.runs || data; + + if (isArchived) { + // Merge with existing data for archived data + loadedBenchmarkRuns = loadedBenchmarkRuns.concat(newRuns); + archivedDataLoaded = true; + } else { + // Replace existing data for current data + loadedBenchmarkRuns = newRuns; + } + // The following variables have same values regardless of whether + // we load archived or current data + benchmarkMetadata = data.metadata || benchmarkMetadata || {}; + benchmarkTags = data.tags || benchmarkTags || {}; + + initializeCharts(); + }) + .catch(error => { + console.error(`Error fetching ${isArchived ? 'archived' : 'remote'} data:`, error); + loadingIndicator.textContent = 'Fetching remote data failed.'; + }) + .finally(() => { + loadingIndicator.style.display = 'none'; + }); +} + // Load data based on configuration function loadData() { const loadingIndicator = document.getElementById('loading-indicator'); @@ -1094,41 +1177,64 @@ function loadData() { if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') { // Fetch data from remote URL - fetch(remoteDataUrl) - .then(response => { - if (!response.ok) { throw new Error(`Got response status ${response.status}.`) } - return response.json(); - }) - .then(data => { - benchmarkRuns = data.runs || data; - benchmarkMetadata = data.metadata || benchmarkMetadata || {}; - benchmarkTags = data.tags || benchmarkTags || {}; - initializeCharts(); - }) - .catch(error => { - console.error('Error fetching remote data:', error); - loadingIndicator.textContent = 'Fetching remote data failed.'; - }) - .finally(() => { - loadingIndicator.style.display = 'none'; // Hide loading indicator - }); + const url = remoteDataUrl.endsWith('/') ? remoteDataUrl + 'data.json' : remoteDataUrl + '/data.json'; + fetchAndProcessData(url); } else { - // Use local data (benchmarkRuns and benchmarkMetadata should be defined in data.js) + // Use local data + loadedBenchmarkRuns = benchmarkRuns; initializeCharts(); loadingIndicator.style.display = 'none'; // Hide loading indicator } } +// Function to load archived data and merge with current data +// Archived data consists of older benchmark results that have been separated from +// the primary dataset but are still available for historical analysis. +function loadArchivedData() { + const loadingIndicator = document.getElementById('loading-indicator'); + loadingIndicator.style.display = 'block'; + + if (archivedDataLoaded) { + updateCharts(); + loadingIndicator.style.display = 'none'; + return; + } + + if (typeof remoteDataUrl !== 'undefined' && remoteDataUrl !== '') { + // Fetch data from remote URL + const url = remoteDataUrl.endsWith('/') ? remoteDataUrl + 'data_archive.json' : remoteDataUrl + '/data_archive.json'; + fetchAndProcessData(url, true); + } else { + // For local data use a static js file + const script = document.createElement('script'); + script.src = 'data_archive.js'; + script.onload = () => { + // Merge the archived runs with current runs + loadedBenchmarkRuns = loadedBenchmarkRuns.concat(benchmarkRuns); + archivedDataLoaded = true; + initializeCharts(); + loadingIndicator.style.display = 'none'; + }; + + script.onerror = () => { + console.error('Failed to load data_archive.js'); + loadingIndicator.style.display = 'none'; + }; + + document.head.appendChild(script); + } +} + // Initialize when DOM is ready document.addEventListener('DOMContentLoaded', () => { loadData(); }); // Process all benchmark runs to create a global options map for annotations -function createAnnotationsOptions(benchmarkRuns) { +function createAnnotationsOptions() { const repoMap = new Map(); - benchmarkRuns.forEach(run => { + loadedBenchmarkRuns.forEach(run => { run.results.forEach(result => { if (result.git_url && !repoMap.has(result.git_url)) { const suiteName = result.suite; diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py index 081c76f5d7a0f..db4551b1baad7 100755 --- a/devops/scripts/benchmarks/main.py +++ b/devops/scripts/benchmarks/main.py @@ -316,7 +316,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): if options.output_directory is None: html_path = os.path.join(os.path.dirname(__file__), "html") - generate_html(history.runs, compare_names, html_path, metadata) + generate_html(history, compare_names, html_path, metadata) def validate_and_parse_env_args(env_args): @@ -558,6 +558,22 @@ def validate_and_parse_env_args(env_args): help="Location of detect_version.cpp used to query e.g. DPC++, L0", default=None, ) + parser.add_argument( + "--archive-baseline-after", + type=int, + help="Archive baseline results (runs starting with 'Baseline_') older than this many days. " + "Archived results are stored separately and can be viewed in the HTML UI by enabling " + "'Include archived runs'. This helps manage the size of the primary dataset.", + default=options.archive_baseline_days, + ) + parser.add_argument( + "--archive-pr-after", + type=int, + help="Archive PR and other non-baseline results older than this many days. " + "Archived results are stored separately and can be viewed in the HTML UI by enabling " + "'Include archived runs'. PR runs typically have a shorter retention period than baselines.", + default=options.archive_pr_days, + ) args = parser.parse_args() additional_env_vars = validate_and_parse_env_args(args.env) diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py index a7b65e752d450..4a263f5d9672c 100644 --- a/devops/scripts/benchmarks/options.py +++ b/devops/scripts/benchmarks/options.py @@ -2,8 +2,6 @@ from enum import Enum import multiprocessing -from presets import presets - class Compare(Enum): LATEST = "latest" @@ -41,6 +39,7 @@ class DetectVersionsOptions: # Max amount of api calls permitted on each run of the benchmark scripts max_api_calls = 4 + @dataclass class Options: workdir: str = None @@ -88,6 +87,11 @@ class Options: # CI scripts vs SYCl build source. github_repo_override: str = None git_commit_override: str = None + # Archiving settings + # Archived runs are stored separately from the main dataset but are still accessible + # via the HTML UI when "Include archived runs" is enabled + archive_baseline_days: int = 30 # Archive Baseline_* runs after 30 days + archive_pr_days: int = 7 # Archive other (PR/dev) runs after 7 days detect_versions: DetectVersionsOptions = field( default_factory=DetectVersionsOptions diff --git a/devops/scripts/benchmarks/output_html.py b/devops/scripts/benchmarks/output_html.py index 54e17043631e4..d527bdd7576b7 100644 --- a/devops/scripts/benchmarks/output_html.py +++ b/devops/scripts/benchmarks/output_html.py @@ -5,55 +5,82 @@ import json import os + from options import options from utils.result import BenchmarkMetadata, BenchmarkOutput -from benches.base import benchmark_tags, benchmark_tags_dict +from history import BenchmarkHistory +from benches.base import benchmark_tags_dict -def generate_html( - benchmark_runs: list, - compare_names: list[str], - html_path: str, - metadata: dict[str, BenchmarkMetadata], -): - benchmark_runs.sort(key=lambda run: run.date, reverse=True) - # Sorted in reverse, such that runs are ordered from newest to oldest - - # Create the comprehensive output object - output = BenchmarkOutput( - runs=benchmark_runs, - metadata=metadata, - tags=benchmark_tags_dict, - default_compare_names=compare_names, - ) +def _write_output_to_file( + output: BenchmarkOutput, html_path: str, archive: bool = False +) -> None: + """ + Helper function to write the BenchmarkOutput to a file in JSON format. + """ + # Define variable configuration based on whether we're archiving or not + filename = "data_archive" if archive else "data" if options.output_html == "local": - data_path = os.path.join(html_path, "data.js") + data_path = os.path.join(html_path, f"{filename}.js") with open(data_path, "w") as f: # For local format, we need to write JavaScript variable assignments f.write("benchmarkRuns = ") json.dump(json.loads(output.to_json())["runs"], f, indent=2) f.write(";\n\n") - f.write("benchmarkMetadata = ") + f.write(f"benchmarkMetadata = ") json.dump(json.loads(output.to_json())["metadata"], f, indent=2) f.write(";\n\n") - f.write("benchmarkTags = ") + f.write(f"benchmarkTags = ") json.dump(json.loads(output.to_json())["tags"], f, indent=2) f.write(";\n\n") - f.write("defaultCompareNames = ") + f.write(f"defaultCompareNames = ") json.dump(output.default_compare_names, f, indent=2) f.write(";\n") - print(f"See {os.getcwd()}/html/index.html for the results.") + if not archive: + print(f"See {os.getcwd()}/html/index.html for the results.") else: # For remote format, we write a single JSON file - data_path = os.path.join(html_path, "data.json") + data_path = os.path.join(html_path, f"{filename}.json") with open(data_path, "w") as f: json.dump(json.loads(output.to_json()), f, indent=2) - print( f"Upload {data_path} to a location set in config.js remoteDataUrl argument." ) + + +def generate_html( + history: BenchmarkHistory, + compare_names: list[str], + html_path: str, + metadata: dict[str, BenchmarkMetadata], +): + """Generate HTML output for benchmark results.""" + current_runs, archived_runs = history.partition_runs_by_age() + + # Sorted in reverse, such that runs are ordered from newest to oldest + current_runs.sort(key=lambda run: run.date, reverse=True) + + # Create the comprehensive output object + output = BenchmarkOutput( + runs=current_runs, + metadata=metadata, + tags=benchmark_tags_dict, + default_compare_names=compare_names, + ) + _write_output_to_file(output, html_path) + + # Generate a separate file for archived runs if any + if archived_runs: + archived_runs.sort(key=lambda run: run.date, reverse=True) + archived_output = BenchmarkOutput( + runs=archived_runs, + metadata=metadata, + tags=benchmark_tags_dict, + default_compare_names=compare_names, + ) + _write_output_to_file(archived_output, html_path, archive=True)