updated the script to use external file for comparison, and condensed grep logic for embench

2025-06-28 09:36:01 -04:00 · 2025-04-28 12:08:18 -07:00 · 2025-04-28 12:08:18 -07:00 · c700a9bb07
commit c700a9bb07
parent 381bb34879
2 changed files with 49 additions and 34 deletions
--- a/benchmarks/embench/expected_results.json
+++ b/benchmarks/embench/expected_results.json
@ -1,23 +1,20 @@
 {
-    "coremark": {
-        "coremark/mhz": 3.38
-    },
    "embench_rv32imc": {
        "wallySizeOpt_size": {
            "size geometric mean": 1.04,
            "size geometric standard deviation": 1.26
        },
        "wallySizeOpt_speed": {
-            "size geometric mean": 1.07,
-            "size geometric standard deviation": 1.51
+            "speed geometric mean": 1.07,
+            "speed geometric standard deviation": 1.51
        },
        "wallySpeedOpt_size": {
            "size geometric mean": 1.21,
            "size geometric standard deviation": 1.28
        },
        "wallySpeedOpt_speed": {
-            "size geometric mean": 1.15,
-            "size geometric standard deviation": 1.61
+            "speed geometric mean": 1.15,
+            "speed geometric standard deviation": 1.61
        }
    }
 }
--- a/bin/regression-wally
+++ b/bin/regression-wally
@ -14,6 +14,7 @@
 #
 ##################################
 import argparse
+import json
 import multiprocessing
 import os
 import shutil
@ -279,7 +280,7 @@ lockstepwaivers = [
 # Data Types & Functions
 ##################################

-TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'diffcommand', 'simlog'])
+TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'altcommand', 'simlog'])
 # name:     the name of this test configuration (used in printing human-readable
 #           output and picking logfile names)
 # cmd:      the command to run to test (should include the logfile as '{}', and
@ -288,7 +289,7 @@ TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfil
 #           grep finds that string in the logfile (is used by grep, so it may
 #           be any pattern grep accepts, see `man 1 grep` for more info).
 # grepfile:  a string containing the location of the file to be searched for output
-# diffcommand:  the command to run, if enabled, to check the output of the CoreMark benchmark
+# altcommand:  the command to run, if enabled, to check the output of the CoreMark benchmark
 #           against the expected output. The command should write to the
 #           validation log file. If None, no diff command is run.
 # simlog:  the name of the logfile to be used for this test.
@ -321,7 +322,7 @@ def addTests(testList, sim, coverStr, configs):
                    cmd=f"{cmdPrefix} {t} > {sim_log}",
                    grepstr=gs,
                    grepfile = grepfile,
-                    diffcommand=None,
+                    altcommand=None,
                    simlog=sim_log)
            configs.append(tc)

@ -359,7 +360,7 @@ def addTestsByDir(testDir, config, sim, coverStr, configs, lockstepMode=0, breke
                        cmd=f"{cmdPrefix} {fullfile} > {sim_log}",
                        grepstr=gs,
                        grepfile = sim_log,
-                        diffcommand=None,
+                        altcommand=None,
                        simlog=sim_log)
                configs.append(tc)

@ -376,7 +377,7 @@ def search_log_for_text(text, grepfile):
 def run_test_case(config, dryrun: bool = False):
    grepfile = config.grepfile
    cmd = config.cmd
-    diffcommand = config.diffcommand
+    altcommand = config.altcommand
    if dryrun:
        print(f"Executing {cmd}", flush=True)
        return 0
@ -386,9 +387,9 @@ def run_test_case(config, dryrun: bool = False):
            print(f"{bcolors.FAIL}{cmd}: Failed to execute{bcolors.ENDC}", flush=True)
            print(f"  Check {grepfile} for more details.", flush=True)
            return 1
-        elif diffcommand:
+        elif altcommand:
            sim_log = config.simlog
-            check_ret_code = os.system(diffcommand)
+            check_ret_code = os.system(altcommand)
            with open(sim_log, 'a') as f:
                if check_ret_code == 0:
                    # Success message
@ -451,7 +452,7 @@ def process_args(args):
    elif args.branch:
        TIMEOUT_DUR = 120*60 # seconds
    elif args.nightly or args.performance:
-        TIMEOUT_DUR = 5*3600 # seconds
+        TIMEOUT_DUR = 60*60 # seconds
    else:
        TIMEOUT_DUR = 10*60 # seconds

@ -467,7 +468,7 @@ def selectTests(args, sims, coverStr):
            cmd=f"lint-wally {'--nightly' if args.nightly else ''} | tee {regressionDir}/verilator/logs/all_lints.log",
            grepstr="lints run with no errors or warnings",
            grepfile = f"{regressionDir}/verilator/logs/all_lints.log",
-            diffcommand=None,
+            altcommand=None,
            simlog=f"{regressionDir}/verilator/logs/all_lints.log"),
        ]

@ -534,38 +535,55 @@ def selectTests(args, sims, coverStr):
                        cmd=f"wsim --tb testbench_fp --sim {testfloatsim} {config} {test} > {sim_log}",
                        grepstr="All Tests completed with          0 errors",
                        grepfile = sim_log,
-                        diffcommand=None,
+                        altcommand=None,
                        simlog=sim_log)
                configs.append(tc)

    if (args.performance or args.nightly):
        # RUNNING THE EMBENCH TEST
+        embench_expected_json = f"{WALLY}/benchmarks/embench/expected_results.json"
+        with open(embench_expected_json) as f:
+            embench_expected = json.load(f)
+
+        # As of now, we are only benchmarking rv32imc for embench performance
+        embench_expected_values = embench_expected["embench_rv32imc"]
+
+        # Extracting the actual values from the JSON file
        wallySizeOpt_size_json = f"{WALLY}/benchmarks/embench/wallySizeOpt_size.json"
        wallySizeOpt_speed_json = f"{WALLY}/benchmarks/embench/wallySizeOpt_speed.json"
        wallySpeedOpt_size_json = f"{WALLY}/benchmarks/embench/wallySpeedOpt_size.json"
        wallySpeedOpt_speed_json = f"{WALLY}/benchmarks/embench/wallySpeedOpt_speed.json"

+        # Map file names to their expected values
+        file_to_expectations = {
+            wallySizeOpt_size_json: embench_expected_values["wallySizeOpt_size"],
+            wallySizeOpt_speed_json: embench_expected_values["wallySizeOpt_speed"],
+            wallySpeedOpt_size_json: embench_expected_values["wallySpeedOpt_size"],
+            wallySpeedOpt_speed_json: embench_expected_values["wallySpeedOpt_speed"]
+        }
+
+        # Dynamically generate the grep commands - checks for geometric (mean|std. dev.)
+        grep_commands = []
+        for filepath, checks in file_to_expectations.items():
+            for key, expected_value in checks.items():
+                grep_commands.append(f"grep -q '{key}.*{expected_value}' {filepath}")
+
+        # Join all grep commands with "&&" so that all need to pass
+        grep_cmds_combined = " && ".join(grep_commands)
+
        embench_test = TestCase(
            name="embench",
-            variant="rv32gc",  # is this the correct variant here? or rv32imac_zicsr 
+            variant="rv32gc",
            cmd=(
                f"touch $WALLY/benchmarks/embench/run.log && ("
                f"cd $WALLY/benchmarks/embench && make run | tee run.log && "
-        f"grep -q 'size geometric mean.*1.04' {wallySizeOpt_size_json} && "
-        f"grep -q 'size geometric standard deviation.*1.26' {wallySizeOpt_size_json} && "
-        f"grep -q 'speed geometric mean.*1.07' {wallySizeOpt_speed_json} && "
-        f"grep -q 'speed geometric standard deviation.*1.51' {wallySizeOpt_speed_json} && "
-        f"grep -q 'size geometric mean.*1.21' {wallySpeedOpt_size_json} && "
-        f"grep -q 'size geometric standard deviation.*1.28' {wallySpeedOpt_size_json} && "
-        f"grep -q 'speed geometric mean.*1.15' {wallySpeedOpt_speed_json} && "
-        f"grep -q 'speed geometric standard deviation.*1.61' {wallySpeedOpt_speed_json}"
+                f"{grep_cmds_combined}"
                f") && echo 'EMBENCH_TEST_PASSED' >> $WALLY/benchmarks/embench/run.log "
                f"|| (echo 'EMBENCH_TEST_FAILED: Values did not match expected metrics' >> $WALLY/benchmarks/embench/run.log && exit 1)"
            ),
-        
            grepstr="EMBENCH_TEST_PASSED", # make sure this matches the grep string in the command
            grepfile=os.path.expandvars("$WALLY/benchmarks/embench/run.log"),
-            diffcommand=None,
+            altcommand=None,
            simlog=os.path.expandvars("$WALLY/benchmarks/embench/run.log")
        )
        configs.append(embench_test)
@ -588,7 +606,7 @@ def selectTests(args, sims, coverStr):
            cmd=(f"python3 {coremark_sweep_test_dir}"),
            grepstr=None,
            grepfile=None,
-            diffcommand=f"diff -q {actual_coremark_values_csv} {expected_coremark_values_csv}",
+            altcommand=f"diff -q {actual_coremark_values_csv} {expected_coremark_values_csv}",
            simlog=sim_log
        )