From d923d961c1a319b097ebc42655c3cc67acdd543d Mon Sep 17 00:00:00 2001 From: SadhviNarayanan Date: Sun, 4 May 2025 12:03:15 -0700 Subject: [PATCH] diff logic for embench - directory level --- benchmarks/embench/Makefile | 8 +- .../wallySizeOpt_size.json} | 0 .../wallySizeOpt_speed.json} | 0 .../wallySpeedOpt_size.json} | 0 .../wallySpeedOpt_speed.json} | 0 .../wallySizeOpt_size.json | 29 +++++++ .../wallySizeOpt_speed.json | 29 +++++++ .../wallySpeedOpt_size.json | 29 +++++++ .../wallySpeedOpt_speed.json | 29 +++++++ benchmarks/embench/expected_results.json | 20 ----- benchmarks/graphGen.py | 8 +- bin/regression-wally | 87 ++++++------------- 12 files changed, 150 insertions(+), 89 deletions(-) rename benchmarks/embench/{expected_embench_results/expected_wallySizeOpt_size.json => actual_embench_results/wallySizeOpt_size.json} (100%) rename benchmarks/embench/{expected_embench_results/expected_wallySizeOpt_speed.json => actual_embench_results/wallySizeOpt_speed.json} (100%) rename benchmarks/embench/{expected_embench_results/expected_wallySpeedOpt_size.json => actual_embench_results/wallySpeedOpt_size.json} (100%) rename benchmarks/embench/{expected_embench_results/expected_wallySpeedOpt_speed.json => actual_embench_results/wallySpeedOpt_speed.json} (100%) create mode 100644 benchmarks/embench/expected_embench_results/wallySizeOpt_size.json create mode 100644 benchmarks/embench/expected_embench_results/wallySizeOpt_speed.json create mode 100644 benchmarks/embench/expected_embench_results/wallySpeedOpt_size.json create mode 100644 benchmarks/embench/expected_embench_results/wallySpeedOpt_speed.json delete mode 100644 benchmarks/embench/expected_results.json diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index 0a52f79ae..00504d741 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -65,13 +65,13 @@ spike_run: # python wrapper to present results of embench size benchmark size: buildsize - $(embench_dir)/benchmark_size.py --builddir=bd_speedopt_size --json-output > wallySpeedOpt_size.json - $(embench_dir)/benchmark_size.py --builddir=bd_sizeopt_size --json-output > wallySizeOpt_size.json + $(embench_dir)/benchmark_size.py --builddir=bd_speedopt_size --json-output > actual_embench_results/wallySpeedOpt_size.json + $(embench_dir)/benchmark_size.py --builddir=bd_sizeopt_size --json-output > actual_embench_results/wallySizeOpt_size.json # python wrapper to present results of embench speed benchmark speed: - $(embench_dir)/benchmark_speed.py --builddir=bd_sizeopt_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySizeOpt_speed.json - $(embench_dir)/benchmark_speed.py --builddir=bd_speedopt_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySpeedOpt_speed.json + $(embench_dir)/benchmark_speed.py --builddir=bd_sizeopt_speed --target-module run_wally --cpu-mhz=1 --json-output > actual_embench_results/wallySizeOpt_speed.json + $(embench_dir)/benchmark_speed.py --builddir=bd_speedopt_speed --target-module run_wally --cpu-mhz=1 --json-output > actual_embench_results/wallySpeedOpt_speed.json # deletes all files clean: diff --git a/benchmarks/embench/expected_embench_results/expected_wallySizeOpt_size.json b/benchmarks/embench/actual_embench_results/wallySizeOpt_size.json similarity index 100% rename from benchmarks/embench/expected_embench_results/expected_wallySizeOpt_size.json rename to benchmarks/embench/actual_embench_results/wallySizeOpt_size.json diff --git a/benchmarks/embench/expected_embench_results/expected_wallySizeOpt_speed.json b/benchmarks/embench/actual_embench_results/wallySizeOpt_speed.json similarity index 100% rename from benchmarks/embench/expected_embench_results/expected_wallySizeOpt_speed.json rename to benchmarks/embench/actual_embench_results/wallySizeOpt_speed.json diff --git a/benchmarks/embench/expected_embench_results/expected_wallySpeedOpt_size.json b/benchmarks/embench/actual_embench_results/wallySpeedOpt_size.json similarity index 100% rename from benchmarks/embench/expected_embench_results/expected_wallySpeedOpt_size.json rename to benchmarks/embench/actual_embench_results/wallySpeedOpt_size.json diff --git a/benchmarks/embench/expected_embench_results/expected_wallySpeedOpt_speed.json b/benchmarks/embench/actual_embench_results/wallySpeedOpt_speed.json similarity index 100% rename from benchmarks/embench/expected_embench_results/expected_wallySpeedOpt_speed.json rename to benchmarks/embench/actual_embench_results/wallySpeedOpt_speed.json diff --git a/benchmarks/embench/expected_embench_results/wallySizeOpt_size.json b/benchmarks/embench/expected_embench_results/wallySizeOpt_size.json new file mode 100644 index 000000000..4c80ab462 --- /dev/null +++ b/benchmarks/embench/expected_embench_results/wallySizeOpt_size.json @@ -0,0 +1,29 @@ +{ "size results" : + { "detailed size results" : + { "aha-mont64" : 0.96, + "crc32" : 0.74, + "cubic" : 2.01, + "edn" : 1.09, + "huffbench" : 1.16, + "matmult-int" : 0.87, + "md5sum" : 1.00, + "minver" : 0.87, + "nbody" : 0.92, + "nettle-aes" : 1.26, + "nettle-sha256" : 1.62, + "nsichneu" : 1.42, + "picojpeg" : 1.14, + "primecount" : 0.73, + "qrduino" : 1.03, + "sglib-combined" : 1.01, + "slre" : 1.05, + "st" : 0.93, + "statemate" : 0.82, + "tarfind" : 0.95, + "ud" : 0.96, + "wikisort" : 0.94 + }, + "size geometric mean" : 1.04, + "size geometric standard deviation" : 1.26 + } +} diff --git a/benchmarks/embench/expected_embench_results/wallySizeOpt_speed.json b/benchmarks/embench/expected_embench_results/wallySizeOpt_speed.json new file mode 100644 index 000000000..f9a787973 --- /dev/null +++ b/benchmarks/embench/expected_embench_results/wallySizeOpt_speed.json @@ -0,0 +1,29 @@ +{ "speed results" : + { "detailed speed results" : + { "aha-mont64" : 0.81, + "crc32" : 1.00, + "cubic" : 0.42, + "edn" : 0.88, + "huffbench" : 1.38, + "matmult-int" : 1.11, + "md5sum" : 2.00, + "minver" : 0.63, + "nbody" : 0.67, + "nettle-aes" : 0.82, + "nettle-sha256" : 0.96, + "nsichneu" : 1.14, + "picojpeg" : 0.79, + "primecount" : 1.30, + "qrduino" : 1.22, + "sglib-combined" : 1.17, + "slre" : 1.25, + "st" : 0.84, + "statemate" : 2.15, + "tarfind" : 2.42, + "ud" : 0.88, + "wikisort" : 1.71 + }, + "speed geometric mean" : 1.07, + "speed geometric standard deviation" : 1.51 + } +} diff --git a/benchmarks/embench/expected_embench_results/wallySpeedOpt_size.json b/benchmarks/embench/expected_embench_results/wallySpeedOpt_size.json new file mode 100644 index 000000000..00c54abd7 --- /dev/null +++ b/benchmarks/embench/expected_embench_results/wallySpeedOpt_size.json @@ -0,0 +1,29 @@ +{ "size results" : + { "detailed size results" : + { "aha-mont64" : 1.50, + "crc32" : 0.70, + "cubic" : 2.04, + "edn" : 1.10, + "huffbench" : 1.27, + "matmult-int" : 1.15, + "md5sum" : 1.18, + "minver" : 1.10, + "nbody" : 1.12, + "nettle-aes" : 1.37, + "nettle-sha256" : 1.71, + "nsichneu" : 1.51, + "picojpeg" : 1.67, + "primecount" : 0.73, + "qrduino" : 1.43, + "sglib-combined" : 1.13, + "slre" : 1.28, + "st" : 1.29, + "statemate" : 0.87, + "tarfind" : 1.09, + "ud" : 1.14, + "wikisort" : 1.13 + }, + "size geometric mean" : 1.21, + "size geometric standard deviation" : 1.28 + } +} diff --git a/benchmarks/embench/expected_embench_results/wallySpeedOpt_speed.json b/benchmarks/embench/expected_embench_results/wallySpeedOpt_speed.json new file mode 100644 index 000000000..837d3ef2a --- /dev/null +++ b/benchmarks/embench/expected_embench_results/wallySpeedOpt_speed.json @@ -0,0 +1,29 @@ +{ "speed results" : + { "detailed speed results" : + { "aha-mont64" : 0.84, + "crc32" : 1.05, + "cubic" : 0.42, + "edn" : 1.06, + "huffbench" : 1.58, + "matmult-int" : 1.11, + "md5sum" : 1.92, + "minver" : 0.65, + "nbody" : 0.67, + "nettle-aes" : 0.93, + "nettle-sha256" : 0.99, + "nsichneu" : 0.70, + "picojpeg" : 0.99, + "primecount" : 1.41, + "qrduino" : 1.32, + "sglib-combined" : 1.41, + "slre" : 1.54, + "st" : 0.86, + "statemate" : 3.13, + "tarfind" : 3.31, + "ud" : 0.94, + "wikisort" : 1.74 + }, + "speed geometric mean" : 1.15, + "speed geometric standard deviation" : 1.61 + } +} diff --git a/benchmarks/embench/expected_results.json b/benchmarks/embench/expected_results.json deleted file mode 100644 index 949f88acc..000000000 --- a/benchmarks/embench/expected_results.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "embench_rv32imc": { - "wallySizeOpt_size": { - "size geometric mean": 1.04, - "size geometric standard deviation": 1.26 - }, - "wallySizeOpt_speed": { - "speed geometric mean": 1.07, - "speed geometric standard deviation": 1.51 - }, - "wallySpeedOpt_size": { - "size geometric mean": 1.21, - "size geometric standard deviation": 1.28 - }, - "wallySpeedOpt_speed": { - "speed geometric mean": 1.15, - "speed geometric standard deviation": 1.61 - } - } -} \ No newline at end of file diff --git a/benchmarks/graphGen.py b/benchmarks/graphGen.py index fd283190e..878bb8847 100755 --- a/benchmarks/graphGen.py +++ b/benchmarks/graphGen.py @@ -95,10 +95,10 @@ def main(): embenchSizeOpt_SizeData = {} embenchSpeedOpt_SizeData = {} coremarkData = loadCoremark(coremarkData) - embenchSpeedOpt_SpeedData = loadEmbench("embench/wallySpeedOpt_speed.json", embenchSpeedOpt_SpeedData) - embenchSizeOpt_SpeedData = loadEmbench("embench/wallySizeOpt_speed.json", embenchSizeOpt_SpeedData) - embenchSpeedOpt_SizeData = loadEmbench("embench/wallySpeedOpt_size.json", embenchSpeedOpt_SizeData) - embenchSizeOpt_SizeData = loadEmbench("embench/wallySizeOpt_size.json", embenchSizeOpt_SizeData) + embenchSpeedOpt_SpeedData = loadEmbench("embench/actual_embench_results/wallySpeedOpt_speed.json", embenchSpeedOpt_SpeedData) + embenchSizeOpt_SpeedData = loadEmbench("embench/actual_embench_results/wallySizeOpt_speed.json", embenchSizeOpt_SpeedData) + embenchSpeedOpt_SizeData = loadEmbench("embench/actual_embench_results/wallySpeedOpt_size.json", embenchSpeedOpt_SizeData) + embenchSizeOpt_SizeData = loadEmbench("embench/actual_embench_results/wallySizeOpt_size.json", embenchSizeOpt_SizeData) graphEmbench(embenchSpeedOpt_SpeedData, embenchSizeOpt_SpeedData, embenchSpeedOpt_SizeData, embenchSizeOpt_SizeData) diff --git a/bin/regression-wally b/bin/regression-wally index 4455008e2..f037d804b 100755 --- a/bin/regression-wally +++ b/bin/regression-wally @@ -279,7 +279,7 @@ lockstepwaivers = [ # Data Types & Functions ################################## -TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'simlog', 'altcommand'], defaults=[None]) # applies the None default to altcommand +TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'altcommand'], defaults=[None]) # applies the None default to altcommand # name: the name of this test configuration (used in printing human-readable # output and picking logfile names) # cmd: the command to run to test (should include the logfile as '{}', and @@ -288,7 +288,6 @@ TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfil # grep finds that string in the logfile (is used by grep, so it may # be any pattern grep accepts, see `man 1 grep` for more info). # grepfile: a string containing the location of the file to be searched for output -# simlog: the name of the logfile to be used for this test. # altcommand: the command, if enabled, performs a validation check other than grep # on the log files. None by default, and if specified the command will be run class bcolors: @@ -319,8 +318,7 @@ def addTests(testList, sim, coverStr, configs): variant=config, cmd=f"{cmdPrefix} {t} > {sim_log}", grepstr=gs, - grepfile = grepfile, - simlog=sim_log) + grepfile = grepfile) configs.append(tc) @@ -356,8 +354,7 @@ def addTestsByDir(testDir, config, sim, coverStr, configs, lockstepMode=0, breke variant=config, cmd=f"{cmdPrefix} {fullfile} > {sim_log}", grepstr=gs, - grepfile = sim_log, - simlog=sim_log) + grepfile = sim_log) configs.append(tc) def search_log_for_text(text, grepfile): @@ -384,7 +381,7 @@ def run_test_case(config, dryrun: bool = False): print(f" Check {grepfile} for more details.", flush=True) return 1 elif altcommand: - sim_log = config.simlog + sim_log = config.grepfile check_ret_code = os.system(altcommand) with open(sim_log, 'a') as f: if check_ret_code == 0: @@ -462,8 +459,7 @@ def selectTests(args, sims, coverStr): variant="all", cmd=f"lint-wally {'--nightly' if args.nightly else ''} | tee {regressionDir}/verilator/logs/all_lints.log", grepstr="lints run with no errors or warnings", - grepfile = f"{regressionDir}/verilator/logs/all_lints.log", - simlog=f"{regressionDir}/verilator/logs/all_lints.log") + grepfile = f"{regressionDir}/verilator/logs/all_lints.log") ] # run full buildroot boot simulation (slow) if buildroot flag is set. Start it early to overlap with other tests @@ -527,85 +523,54 @@ def selectTests(args, sims, coverStr): variant=config, cmd=f"wsim --tb testbench_fp --sim {testfloatsim} {config} {test} > {sim_log}", grepstr="All Tests completed with 0 errors", - grepfile = sim_log, - simlog=sim_log) + grepfile = sim_log) configs.append(tc) if (args.performance or args.nightly): # RUNNING THE EMBENCH TEST - actual_json_path_start = f"{WALLY}/benchmarks/embench/" - expected_json_path_start = f"{WALLY}/benchmarks/embench/expected_embench_results/" - file_name_order = ["wallySizeOpt_size.json", "wallySizeOpt_speed.json", "wallySpeedOpt_size.json", "wallySpeedOpt_speed.json"] - # These are the actual file paths names that embench generates - file_paths_actual = [f"{actual_json_path_start}{file_name}" for file_name in file_name_order] - # The expected file paths are the same as the actual ones, but with "expected_" in front - file_paths_expected = [f"{expected_json_path_start}expected_{file_name}" for file_name in file_name_order] - + actual_embench_directory = f"{WALLY}/benchmarks/embench/actual_embench_results/" + expected_embench_directory = f"{WALLY}/benchmarks/embench/expected_embench_results/" embench_logfile_path = os.path.expandvars("$WALLY/benchmarks/embench/run.log") - diff_commands = [] - for expected, actual in zip(file_paths_expected, file_paths_actual): - diff_commands.append( - f'echo "\n==============================================" >> "{embench_logfile_path}"\n' - f'echo "Comparing {expected} and {actual}:" >> "{embench_logfile_path}"\n' - # Run diff - f'echo "\nDiff output:" >> "{embench_logfile_path}"\n' - f'diff -u "{actual}" "{expected}" >> "{embench_logfile_path}" 2>&1\n' - f'if [ $? -ne 0 ]; then\n' - f' echo "MISMATCH found for {expected} and {actual}" >> "{embench_logfile_path}"\n' - f' mismatch_detected=1;\n' - f'else\n' - f' echo "Files match." >> "{embench_logfile_path}"\n' - f'fi\n' - f'echo "==============================================" >> "{embench_logfile_path}"' - ) - - diff_cmds_combined = "\n".join(diff_commands) - - final_check_block = ( - f'if [ $mismatch_detected -eq 1 ]; then\n' - f' echo "EMBENCH_TEST_FAILED: Values did not match expected metrics" >> "{embench_logfile_path}";\n' - f' exit 1;\n' - f'else\n' - f' echo "EMBENCH_TEST_PASSED" >> "{embench_logfile_path}";\n' - f'fi' - ) - - full_shell_block = f"mismatch_detected=0\n{diff_cmds_combined}\n{final_check_block}" - + # Create the file if it doesn't exist + with open(embench_logfile_path, 'w'): + pass + # Combine everything into the embench_test command + # finds any differences between the two embench directories and appends them to the log file embench_test = TestCase( name="embench", variant="rv32gc", - cmd = ( - f"rm -f {embench_logfile_path} && " - f"touch {embench_logfile_path} && " + cmd=( f"cd $WALLY/benchmarks/embench && " - f"make run >> {embench_logfile_path} 2>&1 && " - f"bash << 'EOF'\n{full_shell_block}\nEOF" + f"make run >> {embench_logfile_path} 2>&1" ), - grepstr="EMBENCH_TEST_PASSED", # Make sure this matches success message + grepstr=None, grepfile=embench_logfile_path, - simlog=embench_logfile_path + altcommand=f"diff -ru {actual_embench_directory} {expected_embench_directory} >> {embench_logfile_path}" ) configs.append(embench_test) # RUNNING THE COREMARK TEST sim_log = f"{regressionDir}/{defaultsim}/logs/validation.log" - # Create the directory if it doesn't exist - os.makedirs(os.path.dirname(sim_log), exist_ok=True) + coremark_logfile_path = os.path.expandvars(sim_log) + # Create the directory (and file) if it doesn't exist + os.makedirs(os.path.dirname(coremark_logfile_path), exist_ok=True) + with open(coremark_logfile_path, 'w'): + pass coremark_sweep_test_file = f"{WALLY}/benchmarks/coremark/coremark_sweep.py" actual_coremark_values_csv = f"{WALLY}/benchmarks/coremark/coremark_results.csv" expected_coremark_values_csv = f"{WALLY}/benchmarks/coremark/expected_coremark_results.csv" + + # calculates the difference between the coremark expected outcomes and appends them to the log file coremark_test = TestCase( name="validate_coremark_sweep", variant="coremark check", cmd=(f"python3 {coremark_sweep_test_file}"), grepstr=None, - grepfile=None, - simlog=sim_log, - altcommand=f"diff -u {actual_coremark_values_csv} {expected_coremark_values_csv} >> {sim_log}" + grepfile=coremark_logfile_path, + altcommand=f"diff -u {actual_coremark_values_csv} {expected_coremark_values_csv} >> {coremark_logfile_path}" ) configs.append(coremark_test)