added benchmarking scripts to check for performance hits

This commit is contained in:
SadhviNarayanan 2025-04-26 01:32:29 -07:00
parent 0ca90e74a4
commit b2958f228f
5 changed files with 257 additions and 3 deletions

View file

@ -33,6 +33,13 @@ import csv
import os import os
import re import re
WALLY = os.environ.get("WALLY")
# Set working directory to where the Makefile is
coremark_dir = os.path.join(WALLY, "benchmarks/coremark")
os.chdir(coremark_dir)
# list of architectures to run. # list of architectures to run.
arch_list = [ arch_list = [
"rv32i_zicsr", "rv32i_zicsr",
@ -54,7 +61,8 @@ mt_regex = r"Elapsed MTIME: (\d+).*?Elapsed MINSTRET: (\d+).*?COREMARK/MHz Score
#cpi_regex = r"CPI: \d+ / \d+ = (\d+\.\d+)" #cpi_regex = r"CPI: \d+ / \d+ = (\d+\.\d+)"
#cmhz_regex = r"COREMARK/MHz Score: [\d,]+ / [\d,]+ = (\d+\.\d+)" #cmhz_regex = r"COREMARK/MHz Score: [\d,]+ / [\d,]+ = (\d+\.\d+)"
# Open a CSV file to write the results # Open a CSV file to write the results
resultfile = 'coremark_results.csv' resultfile = os.path.join(coremark_dir, 'coremark_results.csv')
# resultfile = 'coremark_results.csv'
with open(resultfile, mode='w', newline='') as csvfile: with open(resultfile, mode='w', newline='') as csvfile:
fieldnames = ['Architecture', 'CM / MHz','CPI','MTIME','MINSTRET','Load Stalls','Store Stalls','D$ Accesses', fieldnames = ['Architecture', 'CM / MHz','CPI','MTIME','MINSTRET','Load Stalls','Store Stalls','D$ Accesses',
'D$ Misses','I$ Accesses','I$ Misses','Branches','Branch Mispredicts','BTB Misses', 'D$ Misses','I$ Accesses','I$ Misses','Branches','Branch Mispredicts','BTB Misses',

View file

@ -0,0 +1,13 @@
Architecture,CM / MHz,CPI,MTIME,MINSTRET,Load Stalls,Store Stalls,D$ Accesses,D$ Misses,I$ Accesses,I$ Misses,Branches,Branch Mispredicts,BTB Misses,Jump/JR,RAS Wrong,Returns,BP Class Pred Wrong
rv32i_zicsr,1.20,1.16,8269385,7124630,261886,22307,716317,73,7827908,1040,2009578,443447,5476,122015,3468,113645,20699
rv32im_zicsr,3.26,1.12,3061233,2716910,264489,22198,690506,73,2975498,827,543885,45067,5483,30033,69,15237,20898
rv32imc_zicsr,3.24,1.13,3085767,2716550,264404,23853,690507,75,3011253,285,543761,44223,5615,29675,171,15237,20295
rv32im_zicsr_zba_zbb_zbs,3.38,1.12,2954414,2624181,266850,22258,689232,75,2878922,494,544408,42375,4295,29685,18,15249,14980
rv32gc,3.24,1.13,3085783,2716550,264134,23852,690500,74,3010201,286,543485,44182,5563,29668,162,15230,20108
rv32gc_zba_zbb_zbs,3.32,1.14,3003843,2624181,272635,22141,689226,74,2929468,280,543245,44490,6087,29680,23,15242,22189
rv64i_zicsr,1.02,1.13,9731538,8559698,273929,22198,720375,85,9242621,588,2340171,459594,4842,128954,178,109383,15941
rv64im_zicsr,2.86,1.10,3493939,3156218,271101,22149,691714,83,3406099,340,547671,42901,4651,34669,14,15099,15726
rv64imc_zicsr,2.82,1.12,3545301,3156218,271029,25304,691715,86,3457798,263,547535,43990,4970,34671,5,15099,15889
rv64im_zicsr_zba_zbb_zbs,3.08,1.11,3241375,2901479,273442,24665,689242,85,3150626,424,547796,41798,4635,34680,43,15111,16143
rv64gc,2.82,1.12,3545281,3156218,270740,25304,691708,86,3456812,264,547229,43969,4970,34664,5,15092,15889
rv64gc_zba_zbb_zbs,3.03,1.13,3297540,2901479,273107,26696,689236,83,3200848,250,547359,46238,6197,34675,73,15104,21328
1 Architecture CM / MHz CPI MTIME MINSTRET Load Stalls Store Stalls D$ Accesses D$ Misses I$ Accesses I$ Misses Branches Branch Mispredicts BTB Misses Jump/JR RAS Wrong Returns BP Class Pred Wrong
2 rv32i_zicsr 1.20 1.16 8269385 7124630 261886 22307 716317 73 7827908 1040 2009578 443447 5476 122015 3468 113645 20699
3 rv32im_zicsr 3.26 1.12 3061233 2716910 264489 22198 690506 73 2975498 827 543885 45067 5483 30033 69 15237 20898
4 rv32imc_zicsr 3.24 1.13 3085767 2716550 264404 23853 690507 75 3011253 285 543761 44223 5615 29675 171 15237 20295
5 rv32im_zicsr_zba_zbb_zbs 3.38 1.12 2954414 2624181 266850 22258 689232 75 2878922 494 544408 42375 4295 29685 18 15249 14980
6 rv32gc 3.24 1.13 3085783 2716550 264134 23852 690500 74 3010201 286 543485 44182 5563 29668 162 15230 20108
7 rv32gc_zba_zbb_zbs 3.32 1.14 3003843 2624181 272635 22141 689226 74 2929468 280 543245 44490 6087 29680 23 15242 22189
8 rv64i_zicsr 1.02 1.13 9731538 8559698 273929 22198 720375 85 9242621 588 2340171 459594 4842 128954 178 109383 15941
9 rv64im_zicsr 2.86 1.10 3493939 3156218 271101 22149 691714 83 3406099 340 547671 42901 4651 34669 14 15099 15726
10 rv64imc_zicsr 2.82 1.12 3545301 3156218 271029 25304 691715 86 3457798 263 547535 43990 4970 34671 5 15099 15889
11 rv64im_zicsr_zba_zbb_zbs 3.08 1.11 3241375 2901479 273442 24665 689242 85 3150626 424 547796 41798 4635 34680 43 15111 16143
12 rv64gc 2.82 1.12 3545281 3156218 270740 25304 691708 86 3456812 264 547229 43969 4970 34664 5 15092 15889
13 rv64gc_zba_zbb_zbs 3.03 1.13 3297540 2901479 273107 26696 689236 83 3200848 250 547359 46238 6197 34675 73 15104 21328

23
bin/expected_results.json Normal file
View file

@ -0,0 +1,23 @@
{
"coremark": {
"coremark/mhz": 3.38
},
"embench_rv32imc": {
"wallySizeOpt_size": {
"size geometric mean": 1.04,
"size geometric standard deviation": 1.26
},
"wallySizeOpt_speed": {
"size geometric mean": 1.07,
"size geometric standard deviation": 1.51
},
"wallySpeedOpt_size": {
"size geometric mean": 1.21,
"size geometric standard deviation": 1.28
},
"wallySpeedOpt_speed": {
"size geometric mean": 1.15,
"size geometric standard deviation": 1.61
}
}
}

View file

@ -395,6 +395,8 @@ def parse_args():
parser.add_argument("--fp", help="Include floating-point tests in coverage (slower runtime)", action="store_true") # Currently not used parser.add_argument("--fp", help="Include floating-point tests in coverage (slower runtime)", action="store_true") # Currently not used
parser.add_argument("--breker", help="Run Breker tests", action="store_true") # Requires a license for the breker tool. See tests/breker/README.md for details parser.add_argument("--breker", help="Run Breker tests", action="store_true") # Requires a license for the breker tool. See tests/breker/README.md for details
parser.add_argument("--dryrun", help="Print commands invoked to console without running regression", action="store_true") parser.add_argument("--dryrun", help="Print commands invoked to console without running regression", action="store_true")
parser.add_argument("--performance", help="Check for performance changes or discrepencies in embench and coremark", action="store_true")
return parser.parse_args() return parser.parse_args()
@ -415,7 +417,7 @@ def process_args(args):
TIMEOUT_DUR = 30*60 TIMEOUT_DUR = 30*60
shutil.rmtree(f"{regressionDir}/questa/fcov_ucdb", ignore_errors=True) shutil.rmtree(f"{regressionDir}/questa/fcov_ucdb", ignore_errors=True)
os.makedirs(f"{regressionDir}/questa/fcov_ucdb", exist_ok=True) os.makedirs(f"{regressionDir}/questa/fcov_ucdb", exist_ok=True)
elif args.buildroot: elif args.buildroot or args.performance: # TODO: fix timing on this bc performance shodl also be limit on nightly
TIMEOUT_DUR = 60*3600 # 2.5 days TIMEOUT_DUR = 60*3600 # 2.5 days
elif args.testfloat: elif args.testfloat:
sims = [testfloatsim] sims = [testfloatsim]
@ -423,7 +425,7 @@ def process_args(args):
elif args.branch: elif args.branch:
TIMEOUT_DUR = 120*60 # seconds TIMEOUT_DUR = 120*60 # seconds
elif args.nightly: elif args.nightly:
TIMEOUT_DUR = 30*60 # seconds TIMEOUT_DUR = 5*3600 # seconds # NOTE: changed this to 5 hours for nightly regression
else: else:
TIMEOUT_DUR = 10*60 # seconds TIMEOUT_DUR = 10*60 # seconds
@ -505,6 +507,41 @@ def selectTests(args, sims, coverStr):
grepstr="All Tests completed with 0 errors", grepstr="All Tests completed with 0 errors",
grepfile = sim_log) grepfile = sim_log)
configs.append(tc) configs.append(tc)
if (args.performance or args.nightly):
# RUNNING THE EMBENCH TEST
embench_test = TestCase(
name="embench",
variant="rv32gc", # is this the correct variant here? or rv32imac_zicsr
cmd="cd $WALLY/benchmarks/embench && make run | tee run.log", # do we want to pipe result out? | tee run.log
grepstr="SUCCESS", # not sure what keyword to put here
grepfile=os.path.expandvars("$WALLY/benchmarks/embench/run.log")
)
configs.append(embench_test)
validation_log = f"{WALLY}/bin/logs/validation.log"
os.makedirs(os.path.dirname(validation_log), exist_ok=True)
validate_test_dir = f"{WALLY}/bin/validate_performance.py"
coremark_sweep_test_dir = f"{WALLY}/benchmarks/coremark/coremark_sweep.py"
# Remove './' since you're using full paths
# Ensure the log file exists before writing (with `touch`)
# Also chain commands properly and safely
performance_test = TestCase(
name="validate_performance",
variant="performance check",
cmd=(
f"touch {validation_log} && "
f"python3 {coremark_sweep_test_dir} && "
f"python3 {validate_test_dir} | tee {validation_log}"
),
grepstr="Validation Tests completed with 0 errors", # adjust if message differs
grepfile=validation_log
)
configs.append(performance_test)
return configs return configs

173
bin/validate_performance.py Executable file
View file

@ -0,0 +1,173 @@
#!/usr/bin/env python3
import csv
import json
import os
# global variables
WALLY = os.environ.get('WALLY')
coremarkDir = f'{WALLY}/benchmarks/coremark/coremark_results.csv'
coremarkDir_expected = f'{WALLY}/benchmarks/coremark/expected_coremark_results.csv'
embenchDir = f'{WALLY}/benchmarks/embench'
def create_expected_results_json():
# Create a dictionary to hold the expected results
validation_data = {
"coremark": {
"coremark/mhz": 3.38
},
"embench_rv32imc": {
"wallySizeOpt_size": {
"size geometric mean": 1.04,
"size geometric standard deviation": 1.26
},
"wallySizeOpt_speed": {
"size geometric mean": 1.07,
"size geometric standard deviation": 1.51
},
"wallySpeedOpt_size": {
"size geometric mean": 1.21,
"size geometric standard deviation": 1.28
},
"wallySpeedOpt_speed": {
"size geometric mean": 1.15,
"size geometric standard deviation": 1.61
}
}
}
# Write the data to a JSON file
with open('expected_results.json', 'w') as json_file:
json.dump(validation_data, json_file, indent=4) # Use validation_data instead of 'data'
def validate_results():
# EMBENCH VALIDATION
failing_value = ""
# Create a list to keep track of all the csv files generated by embench
embench_csv_files = ["wallySizeOpt_size", "wallySizeOpt_speed", "wallySpeedOpt_size", "wallySpeedOpt_speed"]
for json_file in embench_csv_files:
directory = f"{embenchDir}/{json_file}.json"
# Open and read the JSON file
with open(directory) as file:
embench_log_csv_data = json.load(file)
# Extract whether the file is a speed or size test (based on the filename)
type_speed_sound = json_file.split("_")[1]
# Create the keys for the JSON data
level1 = f"{type_speed_sound} results"
key_level2_mean = f"{type_speed_sound} geometric mean"
key_level2_std_dev = f"{type_speed_sound} geometric standard deviation"
# Extract the actual size geometric mean and std. dev. from the JSON data
actual_size_geometric_mean = embench_log_csv_data[level1][key_level2_mean]
actual_size_geometric_std_dev = embench_log_csv_data[level1][key_level2_std_dev]
# Load the expected results from the expected_results.json file
expected_results_path = f"{WALLY}/bin/expected_results.json"
with open(expected_results_path) as file:
expected_data = json.load(file)
# Extract geometric means from the expected results for comparison
expected_wally_geometric_mean = expected_data['embench_rv32imc'][json_file]['size geometric mean']
expected_wally_geometric_std_dev = expected_data['embench_rv32imc'][json_file]['size geometric standard deviation']
# Compare the actual and expected results
if (actual_size_geometric_mean != expected_wally_geometric_mean):
failing_value += f"embench {json_file}'s geometric mean of {actual_size_geometric_mean} does not match expected value of {expected_wally_geometric_mean}\n"
# # Update expected results file if smaller / better results
# if actual_size_geometric_mean < expected_wally_geometric_mean:
# updated_expected_json = True
# expected_data['embench_rv32imc'][json_file]['size geometric mean'] = actual_size_geometric_mean
# print(f"Updated expected geometric mean for {json_file} to {actual_size_geometric_mean}")
if (actual_size_geometric_std_dev != expected_wally_geometric_std_dev):
failing_value += f"embench {json_file}'s geometric std. dev. of {actual_size_geometric_std_dev} does not match expected value of {expected_wally_geometric_std_dev}\n"
# # Update expected results file if smaller / better results
# if actual_size_geometric_std_dev < expected_wally_geometric_std_dev:
# updated_expected_json = True
# expected_data['embench_rv32imc'][json_file]['size geometric standard deviation'] = actual_size_geometric_std_dev
# print(f"Updated expected std. dev. for {json_file} to {actual_size_geometric_std_dev}")
# if (updated_expected_json):
# with open(expected_results_path, 'w') as f:
# json.dump(expected_data, f, indent=4)
# # automatically push the expected_results.json file to github
# subprocess.run(["git", "add", expected_results_path])
# subprocess.run(["git", "commit", "-m", "Update expected results with improved metrics"])
# subprocess.run(["git", "push"])
# COREMARK VALIDATION
# coremark_run = {}
# with open(coremarkDir, newline='') as csvfile:
# reader = csv.DictReader(csvfile)
# for row in reader:
# arch = row["Architecture"]
# coremark_run[arch] = row
# Now you can directly index into it
# actual_CM_MHz = coremark_run["rv32im_zicsr_zba_zbb_zbs"]["CM / MHz"]
# expected_wally_CM_Mhz = expected_data['coremark']['coremark/mhz']
# if (str(actual_CM_MHz) != str(expected_wally_CM_Mhz)):
# failure = True
# failing_value += f"coremark's actual CM/MHz of {actual_CM_MHz} does not match expected value of {expected_wally_CM_Mhz}\n"
# Read in the expected results from the expected_coremark_results.csv file,
# as well as the new one created by the test
actual_results = read_csv_as_sorted_list(coremarkDir)
expected_results = read_csv_as_sorted_list(coremarkDir_expected)
# Compare the two CSV files
if actual_results != expected_results:
failing_value = "Coremark results do not match expected results.\n"
print(failing_value)
failing_value += f"Coremark results:\n{actual_results}\n"
failing_value += f"Expected results:\n{expected_results}\n"
exit(1)
# Check if there were any failures
if (failing_value == ""):
print("Validation Tests completed with 0 errors")
else:
print(failing_value)
exit(1)
def read_csv_as_sorted_list(filename):
with open(filename, newline='') as f:
reader = csv.reader(f)
rows = list(reader)
rows.sort() # sort rows for consistent ordering
return rows
def main():
create_expected_results_json() # NOTE: need to uncomment this line to create the expected_results.json file
validate_results()
if __name__ == "__main__":
main()
# do we only want to trigger with nightly - yes
# is there a reason we only care about the 3.38 from the rv32im_zicsr_zba_zbb_zbs arch - most complete
# how do i know if the two testss that produce the results i scrape from are running - just running these default
# cd $WALLY/benchmarks/coremark
# ./coremark_sweep.py
# cd $WALLY/benchmarks/embench
# make run
# automatically push to github if better results?
# coremark sweep - creates the csv of values fro diff arch
# embench benchmark - creates the 4 json files for speed/size
# check if there are differences between runs of coremark sweep on the csv --> done
# need to standardize timoeout duration between performance flag and nightly
# need to make syre it is failing when different
# need to check if i need to validate more values in this file (maybe do a diff for the csv) --> done this part (more to come in future liekly)