From c700a9bb072a403b446b262a305fa957444b95ea Mon Sep 17 00:00:00 2001
From: SadhviNarayanan <sadhvin@gmail.com>
Date: Mon, 28 Apr 2025 12:08:18 -0700
Subject: [PATCH] updated the script to use external file for comparison, and
 condensed grep logic for embench

---
 .../embench}/expected_results.json            | 11 ++-
 bin/regression-wally                          | 72 ++++++++++++-------
 2 files changed, 49 insertions(+), 34 deletions(-)
 rename {bin => benchmarks/embench}/expected_results.json (61%)

diff --git a/bin/expected_results.json b/benchmarks/embench/expected_results.json
similarity index 61%
rename from bin/expected_results.json
rename to benchmarks/embench/expected_results.json
index 8ffd582a8..949f88acc 100644
--- a/bin/expected_results.json
+++ b/benchmarks/embench/expected_results.json
@@ -1,23 +1,20 @@
 {
-    "coremark": {
-        "coremark/mhz": 3.38
-    },
     "embench_rv32imc": {
         "wallySizeOpt_size": {
             "size geometric mean": 1.04,
             "size geometric standard deviation": 1.26
         },
         "wallySizeOpt_speed": {
-            "size geometric mean": 1.07,
-            "size geometric standard deviation": 1.51
+            "speed geometric mean": 1.07,
+            "speed geometric standard deviation": 1.51
         },
         "wallySpeedOpt_size": {
             "size geometric mean": 1.21,
             "size geometric standard deviation": 1.28
         },
         "wallySpeedOpt_speed": {
-            "size geometric mean": 1.15,
-            "size geometric standard deviation": 1.61
+            "speed geometric mean": 1.15,
+            "speed geometric standard deviation": 1.61
         }
     }
 }
\ No newline at end of file
diff --git a/bin/regression-wally b/bin/regression-wally
index d8af9ffaa..b76d5685c 100755
--- a/bin/regression-wally
+++ b/bin/regression-wally
@@ -14,6 +14,7 @@
 #
 ##################################
 import argparse
+import json
 import multiprocessing
 import os
 import shutil
@@ -279,7 +280,7 @@ lockstepwaivers = [
 # Data Types & Functions
 ##################################
 
-TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'diffcommand', 'simlog'])
+TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'altcommand', 'simlog'])
 # name:     the name of this test configuration (used in printing human-readable
 #           output and picking logfile names)
 # cmd:      the command to run to test (should include the logfile as '{}', and
@@ -288,7 +289,7 @@ TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfil
 #           grep finds that string in the logfile (is used by grep, so it may
 #           be any pattern grep accepts, see `man 1 grep` for more info).
 # grepfile:  a string containing the location of the file to be searched for output
-# diffcommand:  the command to run, if enabled, to check the output of the CoreMark benchmark
+# altcommand:  the command to run, if enabled, to check the output of the CoreMark benchmark
 #           against the expected output. The command should write to the
 #           validation log file. If None, no diff command is run.
 # simlog:  the name of the logfile to be used for this test.
@@ -321,7 +322,7 @@ def addTests(testList, sim, coverStr, configs):
                     cmd=f"{cmdPrefix} {t} > {sim_log}",
                     grepstr=gs,
                     grepfile = grepfile,
-                    diffcommand=None,
+                    altcommand=None,
                     simlog=sim_log)
             configs.append(tc)
 
@@ -359,7 +360,7 @@ def addTestsByDir(testDir, config, sim, coverStr, configs, lockstepMode=0, breke
                         cmd=f"{cmdPrefix} {fullfile} > {sim_log}",
                         grepstr=gs,
                         grepfile = sim_log,
-                        diffcommand=None,
+                        altcommand=None,
                         simlog=sim_log)
                 configs.append(tc)
 
@@ -376,7 +377,7 @@ def search_log_for_text(text, grepfile):
 def run_test_case(config, dryrun: bool = False):
     grepfile = config.grepfile
     cmd = config.cmd
-    diffcommand = config.diffcommand
+    altcommand = config.altcommand
     if dryrun:
         print(f"Executing {cmd}", flush=True)
         return 0
@@ -386,9 +387,9 @@ def run_test_case(config, dryrun: bool = False):
             print(f"{bcolors.FAIL}{cmd}: Failed to execute{bcolors.ENDC}", flush=True)
             print(f"  Check {grepfile} for more details.", flush=True)
             return 1
-        elif diffcommand:
+        elif altcommand:
             sim_log = config.simlog
-            check_ret_code = os.system(diffcommand)
+            check_ret_code = os.system(altcommand)
             with open(sim_log, 'a') as f:
                 if check_ret_code == 0:
                     # Success message
@@ -451,7 +452,7 @@ def process_args(args):
     elif args.branch:
         TIMEOUT_DUR = 120*60 # seconds
     elif args.nightly or args.performance:
-        TIMEOUT_DUR = 5*3600 # seconds
+        TIMEOUT_DUR = 60*60 # seconds
     else:
         TIMEOUT_DUR = 10*60 # seconds
 
@@ -467,7 +468,7 @@ def selectTests(args, sims, coverStr):
             cmd=f"lint-wally {'--nightly' if args.nightly else ''} | tee {regressionDir}/verilator/logs/all_lints.log",
             grepstr="lints run with no errors or warnings",
             grepfile = f"{regressionDir}/verilator/logs/all_lints.log",
-            diffcommand=None,
+            altcommand=None,
             simlog=f"{regressionDir}/verilator/logs/all_lints.log"),
         ]
 
@@ -534,38 +535,55 @@ def selectTests(args, sims, coverStr):
                         cmd=f"wsim --tb testbench_fp --sim {testfloatsim} {config} {test} > {sim_log}",
                         grepstr="All Tests completed with          0 errors",
                         grepfile = sim_log,
-                        diffcommand=None,
+                        altcommand=None,
                         simlog=sim_log)
                 configs.append(tc)
 
     if (args.performance or args.nightly):
         # RUNNING THE EMBENCH TEST
+        embench_expected_json = f"{WALLY}/benchmarks/embench/expected_results.json"
+        with open(embench_expected_json) as f:
+            embench_expected = json.load(f)
+
+        # As of now, we are only benchmarking rv32imc for embench performance
+        embench_expected_values = embench_expected["embench_rv32imc"]
+
+        # Extracting the actual values from the JSON file
         wallySizeOpt_size_json = f"{WALLY}/benchmarks/embench/wallySizeOpt_size.json"
         wallySizeOpt_speed_json = f"{WALLY}/benchmarks/embench/wallySizeOpt_speed.json"
         wallySpeedOpt_size_json = f"{WALLY}/benchmarks/embench/wallySpeedOpt_size.json"
         wallySpeedOpt_speed_json = f"{WALLY}/benchmarks/embench/wallySpeedOpt_speed.json"
 
+        # Map file names to their expected values
+        file_to_expectations = {
+            wallySizeOpt_size_json: embench_expected_values["wallySizeOpt_size"],
+            wallySizeOpt_speed_json: embench_expected_values["wallySizeOpt_speed"],
+            wallySpeedOpt_size_json: embench_expected_values["wallySpeedOpt_size"],
+            wallySpeedOpt_speed_json: embench_expected_values["wallySpeedOpt_speed"]
+        }
+
+        # Dynamically generate the grep commands - checks for geometric (mean|std. dev.)
+        grep_commands = []
+        for filepath, checks in file_to_expectations.items():
+            for key, expected_value in checks.items():
+                grep_commands.append(f"grep -q '{key}.*{expected_value}' {filepath}")
+
+        # Join all grep commands with "&&" so that all need to pass
+        grep_cmds_combined = " && ".join(grep_commands)
+
         embench_test = TestCase(
             name="embench",
-            variant="rv32gc",  # is this the correct variant here? or rv32imac_zicsr 
+            variant="rv32gc",
             cmd=(
-        f"touch $WALLY/benchmarks/embench/run.log && ("
-        f"cd $WALLY/benchmarks/embench && make run | tee run.log && "
-        f"grep -q 'size geometric mean.*1.04' {wallySizeOpt_size_json} && "
-        f"grep -q 'size geometric standard deviation.*1.26' {wallySizeOpt_size_json} && "
-        f"grep -q 'speed geometric mean.*1.07' {wallySizeOpt_speed_json} && "
-        f"grep -q 'speed geometric standard deviation.*1.51' {wallySizeOpt_speed_json} && "
-        f"grep -q 'size geometric mean.*1.21' {wallySpeedOpt_size_json} && "
-        f"grep -q 'size geometric standard deviation.*1.28' {wallySpeedOpt_size_json} && "
-        f"grep -q 'speed geometric mean.*1.15' {wallySpeedOpt_speed_json} && "
-        f"grep -q 'speed geometric standard deviation.*1.61' {wallySpeedOpt_speed_json}"
-        f") && echo 'EMBENCH_TEST_PASSED' >> $WALLY/benchmarks/embench/run.log "
-        f"|| (echo 'EMBENCH_TEST_FAILED: Values did not match expected metrics' >> $WALLY/benchmarks/embench/run.log && exit 1)"
-    ),
-        
+                f"touch $WALLY/benchmarks/embench/run.log && ("
+                f"cd $WALLY/benchmarks/embench && make run | tee run.log && "
+                f"{grep_cmds_combined}"
+                f") && echo 'EMBENCH_TEST_PASSED' >> $WALLY/benchmarks/embench/run.log "
+                f"|| (echo 'EMBENCH_TEST_FAILED: Values did not match expected metrics' >> $WALLY/benchmarks/embench/run.log && exit 1)"
+            ),
             grepstr="EMBENCH_TEST_PASSED", # make sure this matches the grep string in the command
             grepfile=os.path.expandvars("$WALLY/benchmarks/embench/run.log"),
-            diffcommand=None,
+            altcommand=None,
             simlog=os.path.expandvars("$WALLY/benchmarks/embench/run.log")
         )
         configs.append(embench_test)
@@ -588,7 +606,7 @@ def selectTests(args, sims, coverStr):
             cmd=(f"python3 {coremark_sweep_test_dir}"),
             grepstr=None,
             grepfile=None,
-            diffcommand=f"diff -q {actual_coremark_values_csv} {expected_coremark_values_csv}",
+            altcommand=f"diff -q {actual_coremark_values_csv} {expected_coremark_values_csv}",
             simlog=sim_log
         )