#!/usr/bin/env python3 import csv import json import os # global variables WALLY = os.environ.get('WALLY') coremarkDir = f'{WALLY}/benchmarks/coremark/coremark_results.csv' coremarkDir_expected = f'{WALLY}/benchmarks/coremark/expected_coremark_results.csv' embenchDir = f'{WALLY}/benchmarks/embench' def create_expected_results_json(): # Create a dictionary to hold the expected results validation_data = { "coremark": { "coremark/mhz": 3.38 }, "embench_rv32imc": { "wallySizeOpt_size": { "size geometric mean": 1.04, "size geometric standard deviation": 1.26 }, "wallySizeOpt_speed": { "size geometric mean": 1.07, "size geometric standard deviation": 1.51 }, "wallySpeedOpt_size": { "size geometric mean": 1.21, "size geometric standard deviation": 1.28 }, "wallySpeedOpt_speed": { "size geometric mean": 1.15, "size geometric standard deviation": 1.61 } } } # Write the data to a JSON file with open('expected_results.json', 'w') as json_file: json.dump(validation_data, json_file, indent=4) # Use validation_data instead of 'data' def validate_results(): # EMBENCH VALIDATION failing_value = "" # Create a list to keep track of all the csv files generated by embench embench_csv_files = ["wallySizeOpt_size", "wallySizeOpt_speed", "wallySpeedOpt_size", "wallySpeedOpt_speed"] for json_file in embench_csv_files: directory = f"{embenchDir}/{json_file}.json" # Open and read the JSON file with open(directory) as file: embench_log_csv_data = json.load(file) # Extract whether the file is a speed or size test (based on the filename) type_speed_sound = json_file.split("_")[1] # Create the keys for the JSON data level1 = f"{type_speed_sound} results" key_level2_mean = f"{type_speed_sound} geometric mean" key_level2_std_dev = f"{type_speed_sound} geometric standard deviation" # Extract the actual size geometric mean and std. dev. from the JSON data actual_size_geometric_mean = embench_log_csv_data[level1][key_level2_mean] actual_size_geometric_std_dev = embench_log_csv_data[level1][key_level2_std_dev] # Load the expected results from the expected_results.json file expected_results_path = f"{WALLY}/bin/expected_results.json" with open(expected_results_path) as file: expected_data = json.load(file) # Extract geometric means from the expected results for comparison expected_wally_geometric_mean = expected_data['embench_rv32imc'][json_file]['size geometric mean'] expected_wally_geometric_std_dev = expected_data['embench_rv32imc'][json_file]['size geometric standard deviation'] # Compare the actual and expected results if (actual_size_geometric_mean != expected_wally_geometric_mean): failing_value += f"embench {json_file}'s geometric mean of {actual_size_geometric_mean} does not match expected value of {expected_wally_geometric_mean}\n" # # Update expected results file if smaller / better results # if actual_size_geometric_mean < expected_wally_geometric_mean: # updated_expected_json = True # expected_data['embench_rv32imc'][json_file]['size geometric mean'] = actual_size_geometric_mean # print(f"Updated expected geometric mean for {json_file} to {actual_size_geometric_mean}") if (actual_size_geometric_std_dev != expected_wally_geometric_std_dev): failing_value += f"embench {json_file}'s geometric std. dev. of {actual_size_geometric_std_dev} does not match expected value of {expected_wally_geometric_std_dev}\n" # # Update expected results file if smaller / better results # if actual_size_geometric_std_dev < expected_wally_geometric_std_dev: # updated_expected_json = True # expected_data['embench_rv32imc'][json_file]['size geometric standard deviation'] = actual_size_geometric_std_dev # print(f"Updated expected std. dev. for {json_file} to {actual_size_geometric_std_dev}") # if (updated_expected_json): # with open(expected_results_path, 'w') as f: # json.dump(expected_data, f, indent=4) # # automatically push the expected_results.json file to github # subprocess.run(["git", "add", expected_results_path]) # subprocess.run(["git", "commit", "-m", "Update expected results with improved metrics"]) # subprocess.run(["git", "push"]) # COREMARK VALIDATION # coremark_run = {} # with open(coremarkDir, newline='') as csvfile: # reader = csv.DictReader(csvfile) # for row in reader: # arch = row["Architecture"] # coremark_run[arch] = row # Now you can directly index into it # actual_CM_MHz = coremark_run["rv32im_zicsr_zba_zbb_zbs"]["CM / MHz"] # expected_wally_CM_Mhz = expected_data['coremark']['coremark/mhz'] # if (str(actual_CM_MHz) != str(expected_wally_CM_Mhz)): # failure = True # failing_value += f"coremark's actual CM/MHz of {actual_CM_MHz} does not match expected value of {expected_wally_CM_Mhz}\n" # Read in the expected results from the expected_coremark_results.csv file, # as well as the new one created by the test actual_results = read_csv_as_sorted_list(coremarkDir) expected_results = read_csv_as_sorted_list(coremarkDir_expected) # Compare the two CSV files if actual_results != expected_results: failing_value = "Coremark results do not match expected results.\n" print(failing_value) failing_value += f"Coremark results:\n{actual_results}\n" failing_value += f"Expected results:\n{expected_results}\n" exit(1) # Check if there were any failures if (failing_value == ""): print("Validation Tests completed with 0 errors") else: print(failing_value) exit(1) def read_csv_as_sorted_list(filename): with open(filename, newline='') as f: reader = csv.reader(f) rows = list(reader) rows.sort() # sort rows for consistent ordering return rows def main(): create_expected_results_json() # NOTE: need to uncomment this line to create the expected_results.json file validate_results() if __name__ == "__main__": main() # do we only want to trigger with nightly - yes # is there a reason we only care about the 3.38 from the rv32im_zicsr_zba_zbb_zbs arch - most complete # how do i know if the two testss that produce the results i scrape from are running - just running these default # cd $WALLY/benchmarks/coremark # ./coremark_sweep.py # cd $WALLY/benchmarks/embench # make run # automatically push to github if better results? # coremark sweep - creates the csv of values fro diff arch # embench benchmark - creates the 4 json files for speed/size # check if there are differences between runs of coremark sweep on the csv --> done # need to standardize timoeout duration between performance flag and nightly # need to make syre it is failing when different # need to check if i need to validate more values in this file (maybe do a diff for the csv) --> done this part (more to come in future liekly)