diff logic for embench - directory level

2025-06-27 08:50:26 -04:00 · 2025-05-04 12:03:15 -07:00 · 2025-05-04 12:03:15 -07:00 · d923d961c1
commit d923d961c1
parent 60f4b90929
12 changed files with 150 additions and 89 deletions
--- a/benchmarks/embench/Makefile
+++ b/benchmarks/embench/Makefile
@ -65,13 +65,13 @@ spike_run:

 # python wrapper to present results of embench size benchmark
 size: buildsize
-	$(embench_dir)/benchmark_size.py --builddir=bd_speedopt_size --json-output > wallySpeedOpt_size.json 
-	$(embench_dir)/benchmark_size.py --builddir=bd_sizeopt_size --json-output > wallySizeOpt_size.json 
+	$(embench_dir)/benchmark_size.py --builddir=bd_speedopt_size --json-output > actual_embench_results/wallySpeedOpt_size.json 
+	$(embench_dir)/benchmark_size.py --builddir=bd_sizeopt_size --json-output > actual_embench_results/wallySizeOpt_size.json 

 # python wrapper to present results of embench speed benchmark
 speed:
-	$(embench_dir)/benchmark_speed.py --builddir=bd_sizeopt_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySizeOpt_speed.json 
-	$(embench_dir)/benchmark_speed.py --builddir=bd_speedopt_speed --target-module run_wally --cpu-mhz=1 --json-output > wallySpeedOpt_speed.json 
+	$(embench_dir)/benchmark_speed.py --builddir=bd_sizeopt_speed --target-module run_wally --cpu-mhz=1 --json-output > actual_embench_results/wallySizeOpt_speed.json 
+	$(embench_dir)/benchmark_speed.py --builddir=bd_speedopt_speed --target-module run_wally --cpu-mhz=1 --json-output > actual_embench_results/wallySpeedOpt_speed.json 

 # deletes all files
 clean: 
--- a/benchmarks/embench/expected_embench_results/expected_wallySizeOpt_size.json
+++ b/benchmarks/embench/expected_embench_results/expected_wallySizeOpt_size.json
--- a/benchmarks/embench/expected_embench_results/expected_wallySizeOpt_speed.json
+++ b/benchmarks/embench/expected_embench_results/expected_wallySizeOpt_speed.json
--- a/benchmarks/embench/expected_embench_results/expected_wallySpeedOpt_size.json
+++ b/benchmarks/embench/expected_embench_results/expected_wallySpeedOpt_size.json
--- a/benchmarks/embench/expected_embench_results/expected_wallySpeedOpt_speed.json
+++ b/benchmarks/embench/expected_embench_results/expected_wallySpeedOpt_speed.json
--- a/benchmarks/embench/expected_embench_results/wallySizeOpt_size.json
+++ b/benchmarks/embench/expected_embench_results/wallySizeOpt_size.json
@ -0,0 +1,29 @@
+{  "size results" :
+  { "detailed size results" :
+    { "aha-mont64" : 0.96,
+      "crc32" : 0.74,
+      "cubic" : 2.01,
+      "edn" : 1.09,
+      "huffbench" : 1.16,
+      "matmult-int" : 0.87,
+      "md5sum" : 1.00,
+      "minver" : 0.87,
+      "nbody" : 0.92,
+      "nettle-aes" : 1.26,
+      "nettle-sha256" : 1.62,
+      "nsichneu" : 1.42,
+      "picojpeg" : 1.14,
+      "primecount" : 0.73,
+      "qrduino" : 1.03,
+      "sglib-combined" : 1.01,
+      "slre" : 1.05,
+      "st" : 0.93,
+      "statemate" : 0.82,
+      "tarfind" : 0.95,
+      "ud" : 0.96,
+      "wikisort" : 0.94
+    },
+    "size geometric mean" : 1.04,
+    "size geometric standard deviation" : 1.26
+  }
+}
--- a/benchmarks/embench/expected_embench_results/wallySizeOpt_speed.json
+++ b/benchmarks/embench/expected_embench_results/wallySizeOpt_speed.json
@ -0,0 +1,29 @@
+{  "speed results" :
+  { "detailed speed results" :
+    { "aha-mont64" : 0.81,
+      "crc32" : 1.00,
+      "cubic" : 0.42,
+      "edn" : 0.88,
+      "huffbench" : 1.38,
+      "matmult-int" : 1.11,
+      "md5sum" : 2.00,
+      "minver" : 0.63,
+      "nbody" : 0.67,
+      "nettle-aes" : 0.82,
+      "nettle-sha256" : 0.96,
+      "nsichneu" : 1.14,
+      "picojpeg" : 0.79,
+      "primecount" : 1.30,
+      "qrduino" : 1.22,
+      "sglib-combined" : 1.17,
+      "slre" : 1.25,
+      "st" : 0.84,
+      "statemate" : 2.15,
+      "tarfind" : 2.42,
+      "ud" : 0.88,
+      "wikisort" : 1.71
+    },
+    "speed geometric mean" : 1.07,
+    "speed geometric standard deviation" : 1.51
+  }
+}
--- a/benchmarks/embench/expected_embench_results/wallySpeedOpt_size.json
+++ b/benchmarks/embench/expected_embench_results/wallySpeedOpt_size.json
@ -0,0 +1,29 @@
+{  "size results" :
+  { "detailed size results" :
+    { "aha-mont64" : 1.50,
+      "crc32" : 0.70,
+      "cubic" : 2.04,
+      "edn" : 1.10,
+      "huffbench" : 1.27,
+      "matmult-int" : 1.15,
+      "md5sum" : 1.18,
+      "minver" : 1.10,
+      "nbody" : 1.12,
+      "nettle-aes" : 1.37,
+      "nettle-sha256" : 1.71,
+      "nsichneu" : 1.51,
+      "picojpeg" : 1.67,
+      "primecount" : 0.73,
+      "qrduino" : 1.43,
+      "sglib-combined" : 1.13,
+      "slre" : 1.28,
+      "st" : 1.29,
+      "statemate" : 0.87,
+      "tarfind" : 1.09,
+      "ud" : 1.14,
+      "wikisort" : 1.13
+    },
+    "size geometric mean" : 1.21,
+    "size geometric standard deviation" : 1.28
+  }
+}
--- a/benchmarks/embench/expected_embench_results/wallySpeedOpt_speed.json
+++ b/benchmarks/embench/expected_embench_results/wallySpeedOpt_speed.json
@ -0,0 +1,29 @@
+{  "speed results" :
+  { "detailed speed results" :
+    { "aha-mont64" : 0.84,
+      "crc32" : 1.05,
+      "cubic" : 0.42,
+      "edn" : 1.06,
+      "huffbench" : 1.58,
+      "matmult-int" : 1.11,
+      "md5sum" : 1.92,
+      "minver" : 0.65,
+      "nbody" : 0.67,
+      "nettle-aes" : 0.93,
+      "nettle-sha256" : 0.99,
+      "nsichneu" : 0.70,
+      "picojpeg" : 0.99,
+      "primecount" : 1.41,
+      "qrduino" : 1.32,
+      "sglib-combined" : 1.41,
+      "slre" : 1.54,
+      "st" : 0.86,
+      "statemate" : 3.13,
+      "tarfind" : 3.31,
+      "ud" : 0.94,
+      "wikisort" : 1.74
+    },
+    "speed geometric mean" : 1.15,
+    "speed geometric standard deviation" : 1.61
+  }
+}
--- a/benchmarks/embench/expected_results.json
+++ b/benchmarks/embench/expected_results.json
@ -1,20 +0,0 @@
-{
-    "embench_rv32imc": {
-        "wallySizeOpt_size": {
-            "size geometric mean": 1.04,
-            "size geometric standard deviation": 1.26
-        },
-        "wallySizeOpt_speed": {
-            "speed geometric mean": 1.07,
-            "speed geometric standard deviation": 1.51
-        },
-        "wallySpeedOpt_size": {
-            "size geometric mean": 1.21,
-            "size geometric standard deviation": 1.28
-        },
-        "wallySpeedOpt_speed": {
-            "speed geometric mean": 1.15,
-            "speed geometric standard deviation": 1.61
-        }
-    }
-}
--- a/benchmarks/graphGen.py
+++ b/benchmarks/graphGen.py
@ -95,10 +95,10 @@ def main():
    embenchSizeOpt_SizeData = {}
    embenchSpeedOpt_SizeData = {}
    coremarkData = loadCoremark(coremarkData)
-    embenchSpeedOpt_SpeedData = loadEmbench("embench/wallySpeedOpt_speed.json", embenchSpeedOpt_SpeedData)
-    embenchSizeOpt_SpeedData = loadEmbench("embench/wallySizeOpt_speed.json", embenchSizeOpt_SpeedData)
-    embenchSpeedOpt_SizeData = loadEmbench("embench/wallySpeedOpt_size.json", embenchSpeedOpt_SizeData)
-    embenchSizeOpt_SizeData = loadEmbench("embench/wallySizeOpt_size.json", embenchSizeOpt_SizeData)
+    embenchSpeedOpt_SpeedData = loadEmbench("embench/actual_embench_results/wallySpeedOpt_speed.json", embenchSpeedOpt_SpeedData)
+    embenchSizeOpt_SpeedData = loadEmbench("embench/actual_embench_results/wallySizeOpt_speed.json", embenchSizeOpt_SpeedData)
+    embenchSpeedOpt_SizeData = loadEmbench("embench/actual_embench_results/wallySpeedOpt_size.json", embenchSpeedOpt_SizeData)
+    embenchSizeOpt_SizeData = loadEmbench("embench/actual_embench_results/wallySizeOpt_size.json", embenchSizeOpt_SizeData)

    graphEmbench(embenchSpeedOpt_SpeedData, embenchSizeOpt_SpeedData, embenchSpeedOpt_SizeData, embenchSizeOpt_SizeData)

--- a/bin/regression-wally
+++ b/bin/regression-wally
@ -279,7 +279,7 @@ lockstepwaivers = [
 # Data Types & Functions
 ##################################

-TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'simlog', 'altcommand'], defaults=[None]) # applies the None default to altcommand
+TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'altcommand'], defaults=[None]) # applies the None default to altcommand
 # name:     the name of this test configuration (used in printing human-readable
 #           output and picking logfile names)
 # cmd:      the command to run to test (should include the logfile as '{}', and
@ -288,7 +288,6 @@ TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfil
 #           grep finds that string in the logfile (is used by grep, so it may
 #           be any pattern grep accepts, see `man 1 grep` for more info).
 # grepfile:  a string containing the location of the file to be searched for output
-# simlog:  the name of the logfile to be used for this test.
 # altcommand:  the command, if enabled, performs a validation check other than grep 
 #           on the log files. None by default, and if specified the command will be run
 class bcolors:
@ -319,8 +318,7 @@ def addTests(testList, sim, coverStr, configs):
                    variant=config,
                    cmd=f"{cmdPrefix} {t} > {sim_log}",
                    grepstr=gs,
-                    grepfile = grepfile,
-                    simlog=sim_log)
+                    grepfile = grepfile)
            configs.append(tc)


@ -356,8 +354,7 @@ def addTestsByDir(testDir, config, sim, coverStr, configs, lockstepMode=0, breke
                        variant=config,
                        cmd=f"{cmdPrefix} {fullfile} > {sim_log}",
                        grepstr=gs,
-                        grepfile = sim_log,
-                        simlog=sim_log)
+                        grepfile = sim_log)
                configs.append(tc)

 def search_log_for_text(text, grepfile):
@ -384,7 +381,7 @@ def run_test_case(config, dryrun: bool = False):
            print(f"  Check {grepfile} for more details.", flush=True)
            return 1
        elif altcommand:
-            sim_log = config.simlog
+            sim_log = config.grepfile
            check_ret_code = os.system(altcommand)
            with open(sim_log, 'a') as f:
                if check_ret_code == 0:
@ -462,8 +459,7 @@ def selectTests(args, sims, coverStr):
            variant="all",
            cmd=f"lint-wally {'--nightly' if args.nightly else ''} | tee {regressionDir}/verilator/logs/all_lints.log",
            grepstr="lints run with no errors or warnings",
-            grepfile = f"{regressionDir}/verilator/logs/all_lints.log",
-            simlog=f"{regressionDir}/verilator/logs/all_lints.log")
+            grepfile = f"{regressionDir}/verilator/logs/all_lints.log")
        ]

    # run full buildroot boot simulation (slow) if buildroot flag is set.  Start it early to overlap with other tests
@ -527,85 +523,54 @@ def selectTests(args, sims, coverStr):
                        variant=config,
                        cmd=f"wsim --tb testbench_fp --sim {testfloatsim} {config} {test} > {sim_log}",
                        grepstr="All Tests completed with          0 errors",
-                        grepfile = sim_log,
-                        simlog=sim_log)
+                        grepfile = sim_log)
                configs.append(tc)

    if (args.performance or args.nightly):
        # RUNNING THE EMBENCH TEST
-        actual_json_path_start = f"{WALLY}/benchmarks/embench/"
-        expected_json_path_start = f"{WALLY}/benchmarks/embench/expected_embench_results/"
-        file_name_order = ["wallySizeOpt_size.json", "wallySizeOpt_speed.json", "wallySpeedOpt_size.json", "wallySpeedOpt_speed.json"]
-        # These are the actual file paths names that embench generates
-        file_paths_actual = [f"{actual_json_path_start}{file_name}" for file_name in file_name_order]
-        # The expected file paths are the same as the actual ones, but with "expected_" in front
-        file_paths_expected = [f"{expected_json_path_start}expected_{file_name}" for file_name in file_name_order]
-
+        actual_embench_directory = f"{WALLY}/benchmarks/embench/actual_embench_results/"
+        expected_embench_directory = f"{WALLY}/benchmarks/embench/expected_embench_results/"
        embench_logfile_path = os.path.expandvars("$WALLY/benchmarks/embench/run.log")
-        diff_commands = []
-        for expected, actual in zip(file_paths_expected, file_paths_actual):
-            diff_commands.append(
-                f'echo "\n==============================================" >> "{embench_logfile_path}"\n'
-                f'echo "Comparing {expected} and {actual}:" >> "{embench_logfile_path}"\n'
-                # Run diff
-                f'echo "\nDiff output:" >> "{embench_logfile_path}"\n'
-                f'diff -u "{actual}" "{expected}" >> "{embench_logfile_path}" 2>&1\n'
-                f'if [ $? -ne 0 ]; then\n'
-                f'  echo "MISMATCH found for {expected} and {actual}" >> "{embench_logfile_path}"\n'
-                f'  mismatch_detected=1;\n'
-                f'else\n'
-                f'  echo "Files match." >> "{embench_logfile_path}"\n'
-                f'fi\n'
-                f'echo "==============================================" >> "{embench_logfile_path}"'
-            )
-
-        diff_cmds_combined = "\n".join(diff_commands)
-
-        final_check_block = (
-            f'if [ $mismatch_detected -eq 1 ]; then\n'
-            f'  echo "EMBENCH_TEST_FAILED: Values did not match expected metrics" >> "{embench_logfile_path}";\n'
-            f'  exit 1;\n'
-            f'else\n'
-            f'  echo "EMBENCH_TEST_PASSED" >> "{embench_logfile_path}";\n'
-            f'fi'
-        )
-
-        full_shell_block = f"mismatch_detected=0\n{diff_cmds_combined}\n{final_check_block}"
-
+        # Create the file if it doesn't exist
+        with open(embench_logfile_path, 'w'):
+            pass
+        
        # Combine everything into the embench_test command
+        # finds any differences between the two embench directories and appends them to the log file
        embench_test = TestCase(
            name="embench",
            variant="rv32gc",
-            cmd = (
-                f"rm -f {embench_logfile_path} && "
-                f"touch {embench_logfile_path} && "
+            cmd=(
                f"cd $WALLY/benchmarks/embench && "
-                f"make run >> {embench_logfile_path} 2>&1 && "
-                f"bash << 'EOF'\n{full_shell_block}\nEOF"
+                f"make run >> {embench_logfile_path} 2>&1"
            ),
-            grepstr="EMBENCH_TEST_PASSED",  # Make sure this matches success message
+            grepstr=None,
            grepfile=embench_logfile_path,
-            simlog=embench_logfile_path
+            altcommand=f"diff -ru {actual_embench_directory} {expected_embench_directory} >> {embench_logfile_path}"
        )
        configs.append(embench_test)


        # RUNNING THE COREMARK TEST
        sim_log = f"{regressionDir}/{defaultsim}/logs/validation.log"
-        # Create the directory if it doesn't exist
-        os.makedirs(os.path.dirname(sim_log), exist_ok=True)
+        coremark_logfile_path = os.path.expandvars(sim_log)
+        # Create the directory (and file) if it doesn't exist
+        os.makedirs(os.path.dirname(coremark_logfile_path), exist_ok=True)
+        with open(coremark_logfile_path, 'w'):
+            pass

        coremark_sweep_test_file = f"{WALLY}/benchmarks/coremark/coremark_sweep.py"
        actual_coremark_values_csv = f"{WALLY}/benchmarks/coremark/coremark_results.csv"
        expected_coremark_values_csv = f"{WALLY}/benchmarks/coremark/expected_coremark_results.csv"
+
+        # calculates the difference between the coremark expected outcomes and appends them to the log file
        coremark_test = TestCase(
            name="validate_coremark_sweep",
            variant="coremark check",
            cmd=(f"python3 {coremark_sweep_test_file}"),
            grepstr=None,
-            grepfile=None,
-            simlog=sim_log,
-            altcommand=f"diff -u {actual_coremark_values_csv} {expected_coremark_values_csv} >> {sim_log}"
+            grepfile=coremark_logfile_path,
+            altcommand=f"diff -u {actual_coremark_values_csv} {expected_coremark_values_csv} >> {coremark_logfile_path}"
        )
        configs.append(coremark_test)