mirror of
https://github.com/openhwgroup/cvw.git
synced 2025-06-28 09:36:01 -04:00
173 lines
7.5 KiB
Python
Executable file
173 lines
7.5 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
import csv
|
|
import json
|
|
import os
|
|
|
|
# global variables
|
|
WALLY = os.environ.get('WALLY')
|
|
coremarkDir = f'{WALLY}/benchmarks/coremark/coremark_results.csv'
|
|
coremarkDir_expected = f'{WALLY}/benchmarks/coremark/expected_coremark_results.csv'
|
|
embenchDir = f'{WALLY}/benchmarks/embench'
|
|
|
|
|
|
def create_expected_results_json():
|
|
# Create a dictionary to hold the expected results
|
|
validation_data = {
|
|
"coremark": {
|
|
"coremark/mhz": 3.38
|
|
},
|
|
"embench_rv32imc": {
|
|
"wallySizeOpt_size": {
|
|
"size geometric mean": 1.04,
|
|
"size geometric standard deviation": 1.26
|
|
},
|
|
"wallySizeOpt_speed": {
|
|
"size geometric mean": 1.07,
|
|
"size geometric standard deviation": 1.51
|
|
},
|
|
"wallySpeedOpt_size": {
|
|
"size geometric mean": 1.21,
|
|
"size geometric standard deviation": 1.28
|
|
},
|
|
"wallySpeedOpt_speed": {
|
|
"size geometric mean": 1.15,
|
|
"size geometric standard deviation": 1.61
|
|
}
|
|
}
|
|
}
|
|
|
|
# Write the data to a JSON file
|
|
with open('expected_results.json', 'w') as json_file:
|
|
json.dump(validation_data, json_file, indent=4) # Use validation_data instead of 'data'
|
|
|
|
def validate_results():
|
|
# EMBENCH VALIDATION
|
|
failing_value = ""
|
|
# Create a list to keep track of all the csv files generated by embench
|
|
embench_csv_files = ["wallySizeOpt_size", "wallySizeOpt_speed", "wallySpeedOpt_size", "wallySpeedOpt_speed"]
|
|
|
|
for json_file in embench_csv_files:
|
|
directory = f"{embenchDir}/{json_file}.json"
|
|
# Open and read the JSON file
|
|
with open(directory) as file:
|
|
embench_log_csv_data = json.load(file)
|
|
|
|
# Extract whether the file is a speed or size test (based on the filename)
|
|
type_speed_sound = json_file.split("_")[1]
|
|
|
|
# Create the keys for the JSON data
|
|
level1 = f"{type_speed_sound} results"
|
|
key_level2_mean = f"{type_speed_sound} geometric mean"
|
|
key_level2_std_dev = f"{type_speed_sound} geometric standard deviation"
|
|
|
|
# Extract the actual size geometric mean and std. dev. from the JSON data
|
|
actual_size_geometric_mean = embench_log_csv_data[level1][key_level2_mean]
|
|
actual_size_geometric_std_dev = embench_log_csv_data[level1][key_level2_std_dev]
|
|
|
|
# Load the expected results from the expected_results.json file
|
|
expected_results_path = f"{WALLY}/bin/expected_results.json"
|
|
with open(expected_results_path) as file:
|
|
expected_data = json.load(file)
|
|
|
|
# Extract geometric means from the expected results for comparison
|
|
expected_wally_geometric_mean = expected_data['embench_rv32imc'][json_file]['size geometric mean']
|
|
expected_wally_geometric_std_dev = expected_data['embench_rv32imc'][json_file]['size geometric standard deviation']
|
|
|
|
# Compare the actual and expected results
|
|
if (actual_size_geometric_mean != expected_wally_geometric_mean):
|
|
failing_value += f"embench {json_file}'s geometric mean of {actual_size_geometric_mean} does not match expected value of {expected_wally_geometric_mean}\n"
|
|
|
|
# # Update expected results file if smaller / better results
|
|
# if actual_size_geometric_mean < expected_wally_geometric_mean:
|
|
# updated_expected_json = True
|
|
# expected_data['embench_rv32imc'][json_file]['size geometric mean'] = actual_size_geometric_mean
|
|
# print(f"Updated expected geometric mean for {json_file} to {actual_size_geometric_mean}")
|
|
|
|
|
|
if (actual_size_geometric_std_dev != expected_wally_geometric_std_dev):
|
|
failing_value += f"embench {json_file}'s geometric std. dev. of {actual_size_geometric_std_dev} does not match expected value of {expected_wally_geometric_std_dev}\n"
|
|
|
|
# # Update expected results file if smaller / better results
|
|
# if actual_size_geometric_std_dev < expected_wally_geometric_std_dev:
|
|
# updated_expected_json = True
|
|
# expected_data['embench_rv32imc'][json_file]['size geometric standard deviation'] = actual_size_geometric_std_dev
|
|
# print(f"Updated expected std. dev. for {json_file} to {actual_size_geometric_std_dev}")
|
|
|
|
# if (updated_expected_json):
|
|
# with open(expected_results_path, 'w') as f:
|
|
# json.dump(expected_data, f, indent=4)
|
|
|
|
# # automatically push the expected_results.json file to github
|
|
# subprocess.run(["git", "add", expected_results_path])
|
|
# subprocess.run(["git", "commit", "-m", "Update expected results with improved metrics"])
|
|
# subprocess.run(["git", "push"])
|
|
|
|
# COREMARK VALIDATION
|
|
# coremark_run = {}
|
|
# with open(coremarkDir, newline='') as csvfile:
|
|
# reader = csv.DictReader(csvfile)
|
|
# for row in reader:
|
|
# arch = row["Architecture"]
|
|
# coremark_run[arch] = row
|
|
# Now you can directly index into it
|
|
# actual_CM_MHz = coremark_run["rv32im_zicsr_zba_zbb_zbs"]["CM / MHz"]
|
|
# expected_wally_CM_Mhz = expected_data['coremark']['coremark/mhz']
|
|
# if (str(actual_CM_MHz) != str(expected_wally_CM_Mhz)):
|
|
# failure = True
|
|
# failing_value += f"coremark's actual CM/MHz of {actual_CM_MHz} does not match expected value of {expected_wally_CM_Mhz}\n"
|
|
|
|
|
|
# Read in the expected results from the expected_coremark_results.csv file,
|
|
# as well as the new one created by the test
|
|
actual_results = read_csv_as_sorted_list(coremarkDir)
|
|
expected_results = read_csv_as_sorted_list(coremarkDir_expected)
|
|
|
|
# Compare the two CSV files
|
|
if actual_results != expected_results:
|
|
failing_value = "Coremark results do not match expected results.\n"
|
|
print(failing_value)
|
|
failing_value += f"Coremark results:\n{actual_results}\n"
|
|
failing_value += f"Expected results:\n{expected_results}\n"
|
|
exit(1)
|
|
|
|
# Check if there were any failures
|
|
if (failing_value == ""):
|
|
print("Validation Tests completed with 0 errors")
|
|
else:
|
|
print(failing_value)
|
|
exit(1)
|
|
|
|
|
|
def read_csv_as_sorted_list(filename):
|
|
with open(filename, newline='') as f:
|
|
reader = csv.reader(f)
|
|
rows = list(reader)
|
|
rows.sort() # sort rows for consistent ordering
|
|
return rows
|
|
|
|
def main():
|
|
create_expected_results_json() # NOTE: need to uncomment this line to create the expected_results.json file
|
|
validate_results()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
|
|
# do we only want to trigger with nightly - yes
|
|
# is there a reason we only care about the 3.38 from the rv32im_zicsr_zba_zbb_zbs arch - most complete
|
|
# how do i know if the two testss that produce the results i scrape from are running - just running these default
|
|
# cd $WALLY/benchmarks/coremark
|
|
# ./coremark_sweep.py
|
|
|
|
# cd $WALLY/benchmarks/embench
|
|
# make run
|
|
|
|
# automatically push to github if better results?
|
|
|
|
# coremark sweep - creates the csv of values fro diff arch
|
|
# embench benchmark - creates the 4 json files for speed/size
|
|
|
|
# check if there are differences between runs of coremark sweep on the csv --> done
|
|
# need to standardize timoeout duration between performance flag and nightly
|
|
# need to make syre it is failing when different
|
|
# need to check if i need to validate more values in this file (maybe do a diff for the csv) --> done this part (more to come in future liekly)
|