From c700a9bb072a403b446b262a305fa957444b95ea Mon Sep 17 00:00:00 2001 From: SadhviNarayanan Date: Mon, 28 Apr 2025 12:08:18 -0700 Subject: [PATCH] updated the script to use external file for comparison, and condensed grep logic for embench --- .../embench}/expected_results.json | 11 ++- bin/regression-wally | 72 ++++++++++++------- 2 files changed, 49 insertions(+), 34 deletions(-) rename {bin => benchmarks/embench}/expected_results.json (61%) diff --git a/bin/expected_results.json b/benchmarks/embench/expected_results.json similarity index 61% rename from bin/expected_results.json rename to benchmarks/embench/expected_results.json index 8ffd582a8..949f88acc 100644 --- a/bin/expected_results.json +++ b/benchmarks/embench/expected_results.json @@ -1,23 +1,20 @@ { - "coremark": { - "coremark/mhz": 3.38 - }, "embench_rv32imc": { "wallySizeOpt_size": { "size geometric mean": 1.04, "size geometric standard deviation": 1.26 }, "wallySizeOpt_speed": { - "size geometric mean": 1.07, - "size geometric standard deviation": 1.51 + "speed geometric mean": 1.07, + "speed geometric standard deviation": 1.51 }, "wallySpeedOpt_size": { "size geometric mean": 1.21, "size geometric standard deviation": 1.28 }, "wallySpeedOpt_speed": { - "size geometric mean": 1.15, - "size geometric standard deviation": 1.61 + "speed geometric mean": 1.15, + "speed geometric standard deviation": 1.61 } } } \ No newline at end of file diff --git a/bin/regression-wally b/bin/regression-wally index d8af9ffaa..b76d5685c 100755 --- a/bin/regression-wally +++ b/bin/regression-wally @@ -14,6 +14,7 @@ # ################################## import argparse +import json import multiprocessing import os import shutil @@ -279,7 +280,7 @@ lockstepwaivers = [ # Data Types & Functions ################################## -TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'diffcommand', 'simlog']) +TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'altcommand', 'simlog']) # name: the name of this test configuration (used in printing human-readable # output and picking logfile names) # cmd: the command to run to test (should include the logfile as '{}', and @@ -288,7 +289,7 @@ TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfil # grep finds that string in the logfile (is used by grep, so it may # be any pattern grep accepts, see `man 1 grep` for more info). # grepfile: a string containing the location of the file to be searched for output -# diffcommand: the command to run, if enabled, to check the output of the CoreMark benchmark +# altcommand: the command to run, if enabled, to check the output of the CoreMark benchmark # against the expected output. The command should write to the # validation log file. If None, no diff command is run. # simlog: the name of the logfile to be used for this test. @@ -321,7 +322,7 @@ def addTests(testList, sim, coverStr, configs): cmd=f"{cmdPrefix} {t} > {sim_log}", grepstr=gs, grepfile = grepfile, - diffcommand=None, + altcommand=None, simlog=sim_log) configs.append(tc) @@ -359,7 +360,7 @@ def addTestsByDir(testDir, config, sim, coverStr, configs, lockstepMode=0, breke cmd=f"{cmdPrefix} {fullfile} > {sim_log}", grepstr=gs, grepfile = sim_log, - diffcommand=None, + altcommand=None, simlog=sim_log) configs.append(tc) @@ -376,7 +377,7 @@ def search_log_for_text(text, grepfile): def run_test_case(config, dryrun: bool = False): grepfile = config.grepfile cmd = config.cmd - diffcommand = config.diffcommand + altcommand = config.altcommand if dryrun: print(f"Executing {cmd}", flush=True) return 0 @@ -386,9 +387,9 @@ def run_test_case(config, dryrun: bool = False): print(f"{bcolors.FAIL}{cmd}: Failed to execute{bcolors.ENDC}", flush=True) print(f" Check {grepfile} for more details.", flush=True) return 1 - elif diffcommand: + elif altcommand: sim_log = config.simlog - check_ret_code = os.system(diffcommand) + check_ret_code = os.system(altcommand) with open(sim_log, 'a') as f: if check_ret_code == 0: # Success message @@ -451,7 +452,7 @@ def process_args(args): elif args.branch: TIMEOUT_DUR = 120*60 # seconds elif args.nightly or args.performance: - TIMEOUT_DUR = 5*3600 # seconds + TIMEOUT_DUR = 60*60 # seconds else: TIMEOUT_DUR = 10*60 # seconds @@ -467,7 +468,7 @@ def selectTests(args, sims, coverStr): cmd=f"lint-wally {'--nightly' if args.nightly else ''} | tee {regressionDir}/verilator/logs/all_lints.log", grepstr="lints run with no errors or warnings", grepfile = f"{regressionDir}/verilator/logs/all_lints.log", - diffcommand=None, + altcommand=None, simlog=f"{regressionDir}/verilator/logs/all_lints.log"), ] @@ -534,38 +535,55 @@ def selectTests(args, sims, coverStr): cmd=f"wsim --tb testbench_fp --sim {testfloatsim} {config} {test} > {sim_log}", grepstr="All Tests completed with 0 errors", grepfile = sim_log, - diffcommand=None, + altcommand=None, simlog=sim_log) configs.append(tc) if (args.performance or args.nightly): # RUNNING THE EMBENCH TEST + embench_expected_json = f"{WALLY}/benchmarks/embench/expected_results.json" + with open(embench_expected_json) as f: + embench_expected = json.load(f) + + # As of now, we are only benchmarking rv32imc for embench performance + embench_expected_values = embench_expected["embench_rv32imc"] + + # Extracting the actual values from the JSON file wallySizeOpt_size_json = f"{WALLY}/benchmarks/embench/wallySizeOpt_size.json" wallySizeOpt_speed_json = f"{WALLY}/benchmarks/embench/wallySizeOpt_speed.json" wallySpeedOpt_size_json = f"{WALLY}/benchmarks/embench/wallySpeedOpt_size.json" wallySpeedOpt_speed_json = f"{WALLY}/benchmarks/embench/wallySpeedOpt_speed.json" + # Map file names to their expected values + file_to_expectations = { + wallySizeOpt_size_json: embench_expected_values["wallySizeOpt_size"], + wallySizeOpt_speed_json: embench_expected_values["wallySizeOpt_speed"], + wallySpeedOpt_size_json: embench_expected_values["wallySpeedOpt_size"], + wallySpeedOpt_speed_json: embench_expected_values["wallySpeedOpt_speed"] + } + + # Dynamically generate the grep commands - checks for geometric (mean|std. dev.) + grep_commands = [] + for filepath, checks in file_to_expectations.items(): + for key, expected_value in checks.items(): + grep_commands.append(f"grep -q '{key}.*{expected_value}' {filepath}") + + # Join all grep commands with "&&" so that all need to pass + grep_cmds_combined = " && ".join(grep_commands) + embench_test = TestCase( name="embench", - variant="rv32gc", # is this the correct variant here? or rv32imac_zicsr + variant="rv32gc", cmd=( - f"touch $WALLY/benchmarks/embench/run.log && (" - f"cd $WALLY/benchmarks/embench && make run | tee run.log && " - f"grep -q 'size geometric mean.*1.04' {wallySizeOpt_size_json} && " - f"grep -q 'size geometric standard deviation.*1.26' {wallySizeOpt_size_json} && " - f"grep -q 'speed geometric mean.*1.07' {wallySizeOpt_speed_json} && " - f"grep -q 'speed geometric standard deviation.*1.51' {wallySizeOpt_speed_json} && " - f"grep -q 'size geometric mean.*1.21' {wallySpeedOpt_size_json} && " - f"grep -q 'size geometric standard deviation.*1.28' {wallySpeedOpt_size_json} && " - f"grep -q 'speed geometric mean.*1.15' {wallySpeedOpt_speed_json} && " - f"grep -q 'speed geometric standard deviation.*1.61' {wallySpeedOpt_speed_json}" - f") && echo 'EMBENCH_TEST_PASSED' >> $WALLY/benchmarks/embench/run.log " - f"|| (echo 'EMBENCH_TEST_FAILED: Values did not match expected metrics' >> $WALLY/benchmarks/embench/run.log && exit 1)" - ), - + f"touch $WALLY/benchmarks/embench/run.log && (" + f"cd $WALLY/benchmarks/embench && make run | tee run.log && " + f"{grep_cmds_combined}" + f") && echo 'EMBENCH_TEST_PASSED' >> $WALLY/benchmarks/embench/run.log " + f"|| (echo 'EMBENCH_TEST_FAILED: Values did not match expected metrics' >> $WALLY/benchmarks/embench/run.log && exit 1)" + ), grepstr="EMBENCH_TEST_PASSED", # make sure this matches the grep string in the command grepfile=os.path.expandvars("$WALLY/benchmarks/embench/run.log"), - diffcommand=None, + altcommand=None, simlog=os.path.expandvars("$WALLY/benchmarks/embench/run.log") ) configs.append(embench_test) @@ -588,7 +606,7 @@ def selectTests(args, sims, coverStr): cmd=(f"python3 {coremark_sweep_test_dir}"), grepstr=None, grepfile=None, - diffcommand=f"diff -q {actual_coremark_values_csv} {expected_coremark_values_csv}", + altcommand=f"diff -q {actual_coremark_values_csv} {expected_coremark_values_csv}", simlog=sim_log )