mirror of
https://github.com/openhwgroup/cvw.git
synced 2025-06-28 01:32:49 -04:00
added benchmarking scripts to check for performance hits
This commit is contained in:
parent
0ca90e74a4
commit
b2958f228f
5 changed files with 257 additions and 3 deletions
|
@ -33,6 +33,13 @@ import csv
|
|||
import os
|
||||
import re
|
||||
|
||||
WALLY = os.environ.get("WALLY")
|
||||
|
||||
# Set working directory to where the Makefile is
|
||||
coremark_dir = os.path.join(WALLY, "benchmarks/coremark")
|
||||
os.chdir(coremark_dir)
|
||||
|
||||
|
||||
# list of architectures to run.
|
||||
arch_list = [
|
||||
"rv32i_zicsr",
|
||||
|
@ -54,7 +61,8 @@ mt_regex = r"Elapsed MTIME: (\d+).*?Elapsed MINSTRET: (\d+).*?COREMARK/MHz Score
|
|||
#cpi_regex = r"CPI: \d+ / \d+ = (\d+\.\d+)"
|
||||
#cmhz_regex = r"COREMARK/MHz Score: [\d,]+ / [\d,]+ = (\d+\.\d+)"
|
||||
# Open a CSV file to write the results
|
||||
resultfile = 'coremark_results.csv'
|
||||
resultfile = os.path.join(coremark_dir, 'coremark_results.csv')
|
||||
# resultfile = 'coremark_results.csv'
|
||||
with open(resultfile, mode='w', newline='') as csvfile:
|
||||
fieldnames = ['Architecture', 'CM / MHz','CPI','MTIME','MINSTRET','Load Stalls','Store Stalls','D$ Accesses',
|
||||
'D$ Misses','I$ Accesses','I$ Misses','Branches','Branch Mispredicts','BTB Misses',
|
||||
|
|
13
benchmarks/coremark/expected_coremark_results.csv
Normal file
13
benchmarks/coremark/expected_coremark_results.csv
Normal file
|
@ -0,0 +1,13 @@
|
|||
Architecture,CM / MHz,CPI,MTIME,MINSTRET,Load Stalls,Store Stalls,D$ Accesses,D$ Misses,I$ Accesses,I$ Misses,Branches,Branch Mispredicts,BTB Misses,Jump/JR,RAS Wrong,Returns,BP Class Pred Wrong
|
||||
rv32i_zicsr,1.20,1.16,8269385,7124630,261886,22307,716317,73,7827908,1040,2009578,443447,5476,122015,3468,113645,20699
|
||||
rv32im_zicsr,3.26,1.12,3061233,2716910,264489,22198,690506,73,2975498,827,543885,45067,5483,30033,69,15237,20898
|
||||
rv32imc_zicsr,3.24,1.13,3085767,2716550,264404,23853,690507,75,3011253,285,543761,44223,5615,29675,171,15237,20295
|
||||
rv32im_zicsr_zba_zbb_zbs,3.38,1.12,2954414,2624181,266850,22258,689232,75,2878922,494,544408,42375,4295,29685,18,15249,14980
|
||||
rv32gc,3.24,1.13,3085783,2716550,264134,23852,690500,74,3010201,286,543485,44182,5563,29668,162,15230,20108
|
||||
rv32gc_zba_zbb_zbs,3.32,1.14,3003843,2624181,272635,22141,689226,74,2929468,280,543245,44490,6087,29680,23,15242,22189
|
||||
rv64i_zicsr,1.02,1.13,9731538,8559698,273929,22198,720375,85,9242621,588,2340171,459594,4842,128954,178,109383,15941
|
||||
rv64im_zicsr,2.86,1.10,3493939,3156218,271101,22149,691714,83,3406099,340,547671,42901,4651,34669,14,15099,15726
|
||||
rv64imc_zicsr,2.82,1.12,3545301,3156218,271029,25304,691715,86,3457798,263,547535,43990,4970,34671,5,15099,15889
|
||||
rv64im_zicsr_zba_zbb_zbs,3.08,1.11,3241375,2901479,273442,24665,689242,85,3150626,424,547796,41798,4635,34680,43,15111,16143
|
||||
rv64gc,2.82,1.12,3545281,3156218,270740,25304,691708,86,3456812,264,547229,43969,4970,34664,5,15092,15889
|
||||
rv64gc_zba_zbb_zbs,3.03,1.13,3297540,2901479,273107,26696,689236,83,3200848,250,547359,46238,6197,34675,73,15104,21328
|
|
23
bin/expected_results.json
Normal file
23
bin/expected_results.json
Normal file
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"coremark": {
|
||||
"coremark/mhz": 3.38
|
||||
},
|
||||
"embench_rv32imc": {
|
||||
"wallySizeOpt_size": {
|
||||
"size geometric mean": 1.04,
|
||||
"size geometric standard deviation": 1.26
|
||||
},
|
||||
"wallySizeOpt_speed": {
|
||||
"size geometric mean": 1.07,
|
||||
"size geometric standard deviation": 1.51
|
||||
},
|
||||
"wallySpeedOpt_size": {
|
||||
"size geometric mean": 1.21,
|
||||
"size geometric standard deviation": 1.28
|
||||
},
|
||||
"wallySpeedOpt_speed": {
|
||||
"size geometric mean": 1.15,
|
||||
"size geometric standard deviation": 1.61
|
||||
}
|
||||
}
|
||||
}
|
|
@ -395,6 +395,8 @@ def parse_args():
|
|||
parser.add_argument("--fp", help="Include floating-point tests in coverage (slower runtime)", action="store_true") # Currently not used
|
||||
parser.add_argument("--breker", help="Run Breker tests", action="store_true") # Requires a license for the breker tool. See tests/breker/README.md for details
|
||||
parser.add_argument("--dryrun", help="Print commands invoked to console without running regression", action="store_true")
|
||||
|
||||
parser.add_argument("--performance", help="Check for performance changes or discrepencies in embench and coremark", action="store_true")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
@ -415,7 +417,7 @@ def process_args(args):
|
|||
TIMEOUT_DUR = 30*60
|
||||
shutil.rmtree(f"{regressionDir}/questa/fcov_ucdb", ignore_errors=True)
|
||||
os.makedirs(f"{regressionDir}/questa/fcov_ucdb", exist_ok=True)
|
||||
elif args.buildroot:
|
||||
elif args.buildroot or args.performance: # TODO: fix timing on this bc performance shodl also be limit on nightly
|
||||
TIMEOUT_DUR = 60*3600 # 2.5 days
|
||||
elif args.testfloat:
|
||||
sims = [testfloatsim]
|
||||
|
@ -423,7 +425,7 @@ def process_args(args):
|
|||
elif args.branch:
|
||||
TIMEOUT_DUR = 120*60 # seconds
|
||||
elif args.nightly:
|
||||
TIMEOUT_DUR = 30*60 # seconds
|
||||
TIMEOUT_DUR = 5*3600 # seconds # NOTE: changed this to 5 hours for nightly regression
|
||||
else:
|
||||
TIMEOUT_DUR = 10*60 # seconds
|
||||
|
||||
|
@ -505,6 +507,41 @@ def selectTests(args, sims, coverStr):
|
|||
grepstr="All Tests completed with 0 errors",
|
||||
grepfile = sim_log)
|
||||
configs.append(tc)
|
||||
|
||||
if (args.performance or args.nightly):
|
||||
# RUNNING THE EMBENCH TEST
|
||||
embench_test = TestCase(
|
||||
name="embench",
|
||||
variant="rv32gc", # is this the correct variant here? or rv32imac_zicsr
|
||||
cmd="cd $WALLY/benchmarks/embench && make run | tee run.log", # do we want to pipe result out? | tee run.log
|
||||
grepstr="SUCCESS", # not sure what keyword to put here
|
||||
grepfile=os.path.expandvars("$WALLY/benchmarks/embench/run.log")
|
||||
)
|
||||
configs.append(embench_test)
|
||||
|
||||
validation_log = f"{WALLY}/bin/logs/validation.log"
|
||||
os.makedirs(os.path.dirname(validation_log), exist_ok=True)
|
||||
|
||||
validate_test_dir = f"{WALLY}/bin/validate_performance.py"
|
||||
coremark_sweep_test_dir = f"{WALLY}/benchmarks/coremark/coremark_sweep.py"
|
||||
|
||||
# Remove './' since you're using full paths
|
||||
# Ensure the log file exists before writing (with `touch`)
|
||||
# Also chain commands properly and safely
|
||||
performance_test = TestCase(
|
||||
name="validate_performance",
|
||||
variant="performance check",
|
||||
cmd=(
|
||||
f"touch {validation_log} && "
|
||||
f"python3 {coremark_sweep_test_dir} && "
|
||||
f"python3 {validate_test_dir} | tee {validation_log}"
|
||||
),
|
||||
grepstr="Validation Tests completed with 0 errors", # adjust if message differs
|
||||
grepfile=validation_log
|
||||
)
|
||||
|
||||
configs.append(performance_test)
|
||||
|
||||
return configs
|
||||
|
||||
|
||||
|
|
173
bin/validate_performance.py
Executable file
173
bin/validate_performance.py
Executable file
|
@ -0,0 +1,173 @@
|
|||
#!/usr/bin/env python3
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
|
||||
# global variables
|
||||
WALLY = os.environ.get('WALLY')
|
||||
coremarkDir = f'{WALLY}/benchmarks/coremark/coremark_results.csv'
|
||||
coremarkDir_expected = f'{WALLY}/benchmarks/coremark/expected_coremark_results.csv'
|
||||
embenchDir = f'{WALLY}/benchmarks/embench'
|
||||
|
||||
|
||||
def create_expected_results_json():
|
||||
# Create a dictionary to hold the expected results
|
||||
validation_data = {
|
||||
"coremark": {
|
||||
"coremark/mhz": 3.38
|
||||
},
|
||||
"embench_rv32imc": {
|
||||
"wallySizeOpt_size": {
|
||||
"size geometric mean": 1.04,
|
||||
"size geometric standard deviation": 1.26
|
||||
},
|
||||
"wallySizeOpt_speed": {
|
||||
"size geometric mean": 1.07,
|
||||
"size geometric standard deviation": 1.51
|
||||
},
|
||||
"wallySpeedOpt_size": {
|
||||
"size geometric mean": 1.21,
|
||||
"size geometric standard deviation": 1.28
|
||||
},
|
||||
"wallySpeedOpt_speed": {
|
||||
"size geometric mean": 1.15,
|
||||
"size geometric standard deviation": 1.61
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Write the data to a JSON file
|
||||
with open('expected_results.json', 'w') as json_file:
|
||||
json.dump(validation_data, json_file, indent=4) # Use validation_data instead of 'data'
|
||||
|
||||
def validate_results():
|
||||
# EMBENCH VALIDATION
|
||||
failing_value = ""
|
||||
# Create a list to keep track of all the csv files generated by embench
|
||||
embench_csv_files = ["wallySizeOpt_size", "wallySizeOpt_speed", "wallySpeedOpt_size", "wallySpeedOpt_speed"]
|
||||
|
||||
for json_file in embench_csv_files:
|
||||
directory = f"{embenchDir}/{json_file}.json"
|
||||
# Open and read the JSON file
|
||||
with open(directory) as file:
|
||||
embench_log_csv_data = json.load(file)
|
||||
|
||||
# Extract whether the file is a speed or size test (based on the filename)
|
||||
type_speed_sound = json_file.split("_")[1]
|
||||
|
||||
# Create the keys for the JSON data
|
||||
level1 = f"{type_speed_sound} results"
|
||||
key_level2_mean = f"{type_speed_sound} geometric mean"
|
||||
key_level2_std_dev = f"{type_speed_sound} geometric standard deviation"
|
||||
|
||||
# Extract the actual size geometric mean and std. dev. from the JSON data
|
||||
actual_size_geometric_mean = embench_log_csv_data[level1][key_level2_mean]
|
||||
actual_size_geometric_std_dev = embench_log_csv_data[level1][key_level2_std_dev]
|
||||
|
||||
# Load the expected results from the expected_results.json file
|
||||
expected_results_path = f"{WALLY}/bin/expected_results.json"
|
||||
with open(expected_results_path) as file:
|
||||
expected_data = json.load(file)
|
||||
|
||||
# Extract geometric means from the expected results for comparison
|
||||
expected_wally_geometric_mean = expected_data['embench_rv32imc'][json_file]['size geometric mean']
|
||||
expected_wally_geometric_std_dev = expected_data['embench_rv32imc'][json_file]['size geometric standard deviation']
|
||||
|
||||
# Compare the actual and expected results
|
||||
if (actual_size_geometric_mean != expected_wally_geometric_mean):
|
||||
failing_value += f"embench {json_file}'s geometric mean of {actual_size_geometric_mean} does not match expected value of {expected_wally_geometric_mean}\n"
|
||||
|
||||
# # Update expected results file if smaller / better results
|
||||
# if actual_size_geometric_mean < expected_wally_geometric_mean:
|
||||
# updated_expected_json = True
|
||||
# expected_data['embench_rv32imc'][json_file]['size geometric mean'] = actual_size_geometric_mean
|
||||
# print(f"Updated expected geometric mean for {json_file} to {actual_size_geometric_mean}")
|
||||
|
||||
|
||||
if (actual_size_geometric_std_dev != expected_wally_geometric_std_dev):
|
||||
failing_value += f"embench {json_file}'s geometric std. dev. of {actual_size_geometric_std_dev} does not match expected value of {expected_wally_geometric_std_dev}\n"
|
||||
|
||||
# # Update expected results file if smaller / better results
|
||||
# if actual_size_geometric_std_dev < expected_wally_geometric_std_dev:
|
||||
# updated_expected_json = True
|
||||
# expected_data['embench_rv32imc'][json_file]['size geometric standard deviation'] = actual_size_geometric_std_dev
|
||||
# print(f"Updated expected std. dev. for {json_file} to {actual_size_geometric_std_dev}")
|
||||
|
||||
# if (updated_expected_json):
|
||||
# with open(expected_results_path, 'w') as f:
|
||||
# json.dump(expected_data, f, indent=4)
|
||||
|
||||
# # automatically push the expected_results.json file to github
|
||||
# subprocess.run(["git", "add", expected_results_path])
|
||||
# subprocess.run(["git", "commit", "-m", "Update expected results with improved metrics"])
|
||||
# subprocess.run(["git", "push"])
|
||||
|
||||
# COREMARK VALIDATION
|
||||
# coremark_run = {}
|
||||
# with open(coremarkDir, newline='') as csvfile:
|
||||
# reader = csv.DictReader(csvfile)
|
||||
# for row in reader:
|
||||
# arch = row["Architecture"]
|
||||
# coremark_run[arch] = row
|
||||
# Now you can directly index into it
|
||||
# actual_CM_MHz = coremark_run["rv32im_zicsr_zba_zbb_zbs"]["CM / MHz"]
|
||||
# expected_wally_CM_Mhz = expected_data['coremark']['coremark/mhz']
|
||||
# if (str(actual_CM_MHz) != str(expected_wally_CM_Mhz)):
|
||||
# failure = True
|
||||
# failing_value += f"coremark's actual CM/MHz of {actual_CM_MHz} does not match expected value of {expected_wally_CM_Mhz}\n"
|
||||
|
||||
|
||||
# Read in the expected results from the expected_coremark_results.csv file,
|
||||
# as well as the new one created by the test
|
||||
actual_results = read_csv_as_sorted_list(coremarkDir)
|
||||
expected_results = read_csv_as_sorted_list(coremarkDir_expected)
|
||||
|
||||
# Compare the two CSV files
|
||||
if actual_results != expected_results:
|
||||
failing_value = "Coremark results do not match expected results.\n"
|
||||
print(failing_value)
|
||||
failing_value += f"Coremark results:\n{actual_results}\n"
|
||||
failing_value += f"Expected results:\n{expected_results}\n"
|
||||
exit(1)
|
||||
|
||||
# Check if there were any failures
|
||||
if (failing_value == ""):
|
||||
print("Validation Tests completed with 0 errors")
|
||||
else:
|
||||
print(failing_value)
|
||||
exit(1)
|
||||
|
||||
|
||||
def read_csv_as_sorted_list(filename):
|
||||
with open(filename, newline='') as f:
|
||||
reader = csv.reader(f)
|
||||
rows = list(reader)
|
||||
rows.sort() # sort rows for consistent ordering
|
||||
return rows
|
||||
|
||||
def main():
|
||||
create_expected_results_json() # NOTE: need to uncomment this line to create the expected_results.json file
|
||||
validate_results()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
# do we only want to trigger with nightly - yes
|
||||
# is there a reason we only care about the 3.38 from the rv32im_zicsr_zba_zbb_zbs arch - most complete
|
||||
# how do i know if the two testss that produce the results i scrape from are running - just running these default
|
||||
# cd $WALLY/benchmarks/coremark
|
||||
# ./coremark_sweep.py
|
||||
|
||||
# cd $WALLY/benchmarks/embench
|
||||
# make run
|
||||
|
||||
# automatically push to github if better results?
|
||||
|
||||
# coremark sweep - creates the csv of values fro diff arch
|
||||
# embench benchmark - creates the 4 json files for speed/size
|
||||
|
||||
# check if there are differences between runs of coremark sweep on the csv --> done
|
||||
# need to standardize timoeout duration between performance flag and nightly
|
||||
# need to make syre it is failing when different
|
||||
# need to check if i need to validate more values in this file (maybe do a diff for the csv) --> done this part (more to come in future liekly)
|
Loading…
Add table
Add a link
Reference in a new issue