updated the script to use external file for comparison, and condensed grep logic for embench

This commit is contained in:
SadhviNarayanan 2025-04-28 12:08:18 -07:00
parent 381bb34879
commit c700a9bb07
2 changed files with 49 additions and 34 deletions

View file

@ -1,23 +1,20 @@
{
"coremark": {
"coremark/mhz": 3.38
},
"embench_rv32imc": {
"wallySizeOpt_size": {
"size geometric mean": 1.04,
"size geometric standard deviation": 1.26
},
"wallySizeOpt_speed": {
"size geometric mean": 1.07,
"size geometric standard deviation": 1.51
"speed geometric mean": 1.07,
"speed geometric standard deviation": 1.51
},
"wallySpeedOpt_size": {
"size geometric mean": 1.21,
"size geometric standard deviation": 1.28
},
"wallySpeedOpt_speed": {
"size geometric mean": 1.15,
"size geometric standard deviation": 1.61
"speed geometric mean": 1.15,
"speed geometric standard deviation": 1.61
}
}
}

View file

@ -14,6 +14,7 @@
#
##################################
import argparse
import json
import multiprocessing
import os
import shutil
@ -279,7 +280,7 @@ lockstepwaivers = [
# Data Types & Functions
##################################
TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'diffcommand', 'simlog'])
TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfile', 'altcommand', 'simlog'])
# name: the name of this test configuration (used in printing human-readable
# output and picking logfile names)
# cmd: the command to run to test (should include the logfile as '{}', and
@ -288,7 +289,7 @@ TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr', 'grepfil
# grep finds that string in the logfile (is used by grep, so it may
# be any pattern grep accepts, see `man 1 grep` for more info).
# grepfile: a string containing the location of the file to be searched for output
# diffcommand: the command to run, if enabled, to check the output of the CoreMark benchmark
# altcommand: the command to run, if enabled, to check the output of the CoreMark benchmark
# against the expected output. The command should write to the
# validation log file. If None, no diff command is run.
# simlog: the name of the logfile to be used for this test.
@ -321,7 +322,7 @@ def addTests(testList, sim, coverStr, configs):
cmd=f"{cmdPrefix} {t} > {sim_log}",
grepstr=gs,
grepfile = grepfile,
diffcommand=None,
altcommand=None,
simlog=sim_log)
configs.append(tc)
@ -359,7 +360,7 @@ def addTestsByDir(testDir, config, sim, coverStr, configs, lockstepMode=0, breke
cmd=f"{cmdPrefix} {fullfile} > {sim_log}",
grepstr=gs,
grepfile = sim_log,
diffcommand=None,
altcommand=None,
simlog=sim_log)
configs.append(tc)
@ -376,7 +377,7 @@ def search_log_for_text(text, grepfile):
def run_test_case(config, dryrun: bool = False):
grepfile = config.grepfile
cmd = config.cmd
diffcommand = config.diffcommand
altcommand = config.altcommand
if dryrun:
print(f"Executing {cmd}", flush=True)
return 0
@ -386,9 +387,9 @@ def run_test_case(config, dryrun: bool = False):
print(f"{bcolors.FAIL}{cmd}: Failed to execute{bcolors.ENDC}", flush=True)
print(f" Check {grepfile} for more details.", flush=True)
return 1
elif diffcommand:
elif altcommand:
sim_log = config.simlog
check_ret_code = os.system(diffcommand)
check_ret_code = os.system(altcommand)
with open(sim_log, 'a') as f:
if check_ret_code == 0:
# Success message
@ -451,7 +452,7 @@ def process_args(args):
elif args.branch:
TIMEOUT_DUR = 120*60 # seconds
elif args.nightly or args.performance:
TIMEOUT_DUR = 5*3600 # seconds
TIMEOUT_DUR = 60*60 # seconds
else:
TIMEOUT_DUR = 10*60 # seconds
@ -467,7 +468,7 @@ def selectTests(args, sims, coverStr):
cmd=f"lint-wally {'--nightly' if args.nightly else ''} | tee {regressionDir}/verilator/logs/all_lints.log",
grepstr="lints run with no errors or warnings",
grepfile = f"{regressionDir}/verilator/logs/all_lints.log",
diffcommand=None,
altcommand=None,
simlog=f"{regressionDir}/verilator/logs/all_lints.log"),
]
@ -534,38 +535,55 @@ def selectTests(args, sims, coverStr):
cmd=f"wsim --tb testbench_fp --sim {testfloatsim} {config} {test} > {sim_log}",
grepstr="All Tests completed with 0 errors",
grepfile = sim_log,
diffcommand=None,
altcommand=None,
simlog=sim_log)
configs.append(tc)
if (args.performance or args.nightly):
# RUNNING THE EMBENCH TEST
embench_expected_json = f"{WALLY}/benchmarks/embench/expected_results.json"
with open(embench_expected_json) as f:
embench_expected = json.load(f)
# As of now, we are only benchmarking rv32imc for embench performance
embench_expected_values = embench_expected["embench_rv32imc"]
# Extracting the actual values from the JSON file
wallySizeOpt_size_json = f"{WALLY}/benchmarks/embench/wallySizeOpt_size.json"
wallySizeOpt_speed_json = f"{WALLY}/benchmarks/embench/wallySizeOpt_speed.json"
wallySpeedOpt_size_json = f"{WALLY}/benchmarks/embench/wallySpeedOpt_size.json"
wallySpeedOpt_speed_json = f"{WALLY}/benchmarks/embench/wallySpeedOpt_speed.json"
# Map file names to their expected values
file_to_expectations = {
wallySizeOpt_size_json: embench_expected_values["wallySizeOpt_size"],
wallySizeOpt_speed_json: embench_expected_values["wallySizeOpt_speed"],
wallySpeedOpt_size_json: embench_expected_values["wallySpeedOpt_size"],
wallySpeedOpt_speed_json: embench_expected_values["wallySpeedOpt_speed"]
}
# Dynamically generate the grep commands - checks for geometric (mean|std. dev.)
grep_commands = []
for filepath, checks in file_to_expectations.items():
for key, expected_value in checks.items():
grep_commands.append(f"grep -q '{key}.*{expected_value}' {filepath}")
# Join all grep commands with "&&" so that all need to pass
grep_cmds_combined = " && ".join(grep_commands)
embench_test = TestCase(
name="embench",
variant="rv32gc", # is this the correct variant here? or rv32imac_zicsr
variant="rv32gc",
cmd=(
f"touch $WALLY/benchmarks/embench/run.log && ("
f"cd $WALLY/benchmarks/embench && make run | tee run.log && "
f"grep -q 'size geometric mean.*1.04' {wallySizeOpt_size_json} && "
f"grep -q 'size geometric standard deviation.*1.26' {wallySizeOpt_size_json} && "
f"grep -q 'speed geometric mean.*1.07' {wallySizeOpt_speed_json} && "
f"grep -q 'speed geometric standard deviation.*1.51' {wallySizeOpt_speed_json} && "
f"grep -q 'size geometric mean.*1.21' {wallySpeedOpt_size_json} && "
f"grep -q 'size geometric standard deviation.*1.28' {wallySpeedOpt_size_json} && "
f"grep -q 'speed geometric mean.*1.15' {wallySpeedOpt_speed_json} && "
f"grep -q 'speed geometric standard deviation.*1.61' {wallySpeedOpt_speed_json}"
f"{grep_cmds_combined}"
f") && echo 'EMBENCH_TEST_PASSED' >> $WALLY/benchmarks/embench/run.log "
f"|| (echo 'EMBENCH_TEST_FAILED: Values did not match expected metrics' >> $WALLY/benchmarks/embench/run.log && exit 1)"
),
grepstr="EMBENCH_TEST_PASSED", # make sure this matches the grep string in the command
grepfile=os.path.expandvars("$WALLY/benchmarks/embench/run.log"),
diffcommand=None,
altcommand=None,
simlog=os.path.expandvars("$WALLY/benchmarks/embench/run.log")
)
configs.append(embench_test)
@ -588,7 +606,7 @@ def selectTests(args, sims, coverStr):
cmd=(f"python3 {coremark_sweep_test_dir}"),
grepstr=None,
grepfile=None,
diffcommand=f"diff -q {actual_coremark_values_csv} {expected_coremark_values_csv}",
altcommand=f"diff -q {actual_coremark_values_csv} {expected_coremark_values_csv}",
simlog=sim_log
)