From b500e76a1e26e7efc7d387a551ddf27e00b71d8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patryk=20Kami=C5=84ski?= <patryk.kaminski@intel.com>
Date: Thu, 3 Jul 2025 13:01:15 +0000
Subject: [PATCH] [CI][Benchmarks] Fix exit on benchmark failure

---
 devops/scripts/benchmarks/main.py    | 23 ++++++++++++++++-------
 devops/scripts/benchmarks/options.py |  1 +
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
index 7449148ba2efb..d90824bbb8c38 100755
--- a/devops/scripts/benchmarks/main.py
+++ b/devops/scripts/benchmarks/main.py
@@ -43,14 +43,22 @@ def run_iterations(
         print(f"running {benchmark.name()}, iteration {iter}... ", flush=True)
         bench_results = benchmark.run(env_vars)
         if bench_results is None:
-            failures[benchmark.name()] = "benchmark produced no results!"
-            break
+            if options.exit_on_failure:
+                raise RuntimeError(f"Benchmark {benchmark.name()} produced no results!")
+            else:
+                failures[benchmark.name()] = "benchmark produced no results!"
+                break
 
         for bench_result in bench_results:
             if not bench_result.passed:
-                failures[bench_result.label] = "verification failed"
-                print(f"complete ({bench_result.label}: verification failed).")
-                continue
+                if options.exit_on_failure:
+                    raise RuntimeError(
+                        f"Benchmark {benchmark.name()} failed: {bench_result.label} verification failed."
+                    )
+                else:
+                    failures[bench_result.label] = "verification failed"
+                    print(f"complete ({bench_result.label}: verification failed).")
+                    continue
 
             print(
                 f"{benchmark.name()} complete ({bench_result.label}: {bench_result.value:.3f} {bench_result.unit})."
@@ -220,7 +228,6 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
             benchmark.setup()
             if options.verbose:
                 print(f"{benchmark.name()} setup complete.")
-
         except Exception as e:
             if options.exit_on_failure:
                 raise e
@@ -405,7 +412,9 @@ def validate_and_parse_env_args(env_args):
         "--verbose", help="Print output of all the commands.", action="store_true"
     )
     parser.add_argument(
-        "--exit-on-failure", help="Exit on first failure.", action="store_true"
+        "--exit-on-failure",
+        help="Exit on first benchmark failure.",
+        action="store_true",
     )
     parser.add_argument(
         "--compare-type",
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
index ff94274d6647e..04a7e76be43e3 100644
--- a/devops/scripts/benchmarks/options.py
+++ b/devops/scripts/benchmarks/options.py
@@ -69,6 +69,7 @@ class Options:
     current_run_name: str = "This PR"
     preset: str = "Full"
     build_jobs: int = multiprocessing.cpu_count()
+    exit_on_failure: bool = False
 
     # Options intended for CI:
     regression_threshold: float = 0.05