[dv] Improve results reporting

This commits adds a yaml based intermediate format for test results.
compare.py serialises a TestRunResult (a named tuple type) into this
format for each test run it checks. collect_results.py reads them all
back in to produce reports.

Three reports are output:
- regr.log - plain text report much like the one previously produced
- regr_junit.xml, regr_junit_merged.xml - JUnit report format, the
  _merged version batches together multiple tests to appear to be a
  single test case under a test suite. This gives better results with
  Azure's JUnit reporting.
This commit is contained in:
Greg Chadwick 2021-06-24 13:24:48 +01:00 committed by Greg Chadwick
parent 6daae3509a
commit ef545a8bd8
5 changed files with 319 additions and 90 deletions

View file

@ -426,17 +426,18 @@ rtl_sim: $(rtl-sim-logs)
#
# The comparison script compares these and writes to a result file at
#
# $(OUT-SEED)/rtl_sim/$(TEST).$(SEED)/comparison-result.txt
# $(OUT-SEED)/rtl_sim/$(TEST).$(SEED)/test-result.yml
#
# with PASSED or FAILED, depending.
comp-results := $(addsuffix /comparison-result.txt,$(rtl-sim-dirs))
comp-results := $(addsuffix /test-result.yml,$(rtl-sim-dirs))
$(comp-results): \
%/comparison-result.txt: \
%/test-result.yml: \
compare.py $(metadata)/instr_gen.iss.stamp $(rtl-sim-logs)
@echo Comparing traces for $*
$(verb)./compare.py \
--instr-gen-bin-dir $(OUT-SEED)/instr_gen/asm_test \
--iss $(ISS) \
--iss-log-dir $(OUT-SEED)/instr_gen/$(ISS)_sim \
--start-seed $(SEED) \
@ -446,7 +447,7 @@ $(comp-results): \
$(OUT-SEED)/regr.log: collect_results.py $(comp-results)
@echo "Collecting up results (report at $@)"
$(verb)./collect_results.py -o $@ $(comp-results)
$(verb)./collect_results.py -o $(@D) $(comp-results)
.PHONY: post_compare
post_compare: $(OUT-SEED)/regr.log

View file

@ -4,30 +4,50 @@
# SPDX-License-Identifier: Apache-2.0
import argparse
import junit_xml
import os.path
import sys
from typing import TextIO
import yaml
from test_run_result import (TestRunResult, test_run_result_fields,
check_test_run_result)
from typing import List, TextIO
def parse_log(path: str) -> bool:
first_line = '(empty file)'
with open(path, 'r', encoding='UTF-8') as log:
for line in log:
first_line = line.rstrip()
break
def parse_test_run_result(path: str) -> TestRunResult:
try:
with open(path) as yaml_file:
test_run_result_dict = yaml.load(yaml_file, Loader=yaml.SafeLoader)
loaded_fields = test_run_result_dict.keys()
if set(loaded_fields) != set(test_run_result_fields):
raise RuntimeError(f'Error loading YAML at {path}: does not '
'contain the correct set of fields')
if first_line == 'PASS':
return True
if first_line.startswith('FAIL'):
return False
raise RuntimeError('Strange first line ({!r})'.format(first_line))
trr = TestRunResult(**test_run_result_dict)
try:
check_test_run_result(trr)
except AssertionError:
raise RuntimeError(f'Error loading YAML at path {path}: '
'field types were incorrect')
return trr
except (IOError, yaml.YAMLError) as e:
raise RuntimeError(f'Error loading YAML at path {path}: {e}')
def dump_log(path: str, dest: TextIO) -> None:
print('\nLog at {}:'.format(path), file=dest)
with open(path, 'r') as fd:
for line in fd:
dest.write('> ' + line)
def build_broken_test_run_result(err: str) -> TestRunResult:
return TestRunResult(
name='unknown',
idx=0,
seed=0,
binary=None,
uvm_log=None,
rtl_trace=None,
rtl_trace_csv=None,
iss_trace=None,
iss_trace_csv=None,
comparison_log=None,
passed=False,
failure_message=err
)
def box_comment(line: str) -> str:
@ -35,47 +55,163 @@ def box_comment(line: str) -> str:
return hr + '\n# ' + line + '\n' + hr
def gen_summary_line(passing_tests: List[TestRunResult], failing_tests:
List[TestRunResult]) -> str:
'''Generate a string summarising test results'''
total_tests = len(passing_tests) + len(failing_tests)
pass_pct = (len(passing_tests) / total_tests) * 100
return f'{pass_pct:0.2f}% PASS {len(passing_tests)} PASSED, ' \
f'{len(failing_tests)} FAILED'
def gen_test_run_result_text(test_run_result: TestRunResult) -> str:
'''Generate a string describing a TestRunResult.
The string includes details of logs, binary run and the failure message if
the test did not pass.'''
test_name_idx = f'{test_run_result.name}.{test_run_result.seed}'
test_underline = '-' * len(test_name_idx)
info_lines = [test_name_idx, test_underline]
if (test_run_result.binary):
info_lines.append(f'Test binary: {test_run_result.binary}')
if (test_run_result.uvm_log):
info_lines.append(f'UVM log: {test_run_result.uvm_log}')
if (test_run_result.rtl_trace):
info_lines.append(f'RTL trace: {test_run_result.rtl_trace}')
if (test_run_result.iss_trace):
info_lines.append(f'ISS trace: {test_run_result.iss_trace}')
if (test_run_result.comparison_log):
info_lines.append(f'Comparison log: {test_run_result.comparison_log}')
if (test_run_result.passed):
info_lines.append('[PASSED]')
else:
info_lines.append(f'{test_run_result.failure_message}')
return '\n'.join(info_lines) + '\n'
def output_results_text(passing_tests: List[TestRunResult], failing_tests:
List[TestRunResult], dest: TextIO):
'''Write results in text form to dest'''
if failing_tests:
print(box_comment('Details of failing tests'), file=dest)
for trr in failing_tests:
print(gen_test_run_result_text(trr), file=dest)
if passing_tests:
print(box_comment('Details of passing tests'), file=dest)
for trr in passing_tests:
print(gen_test_run_result_text(trr), file=dest)
dest.write('\n')
print(gen_summary_line(passing_tests, failing_tests), file=dest)
def output_run_results_junit_xml(passing_tests: List[TestRunResult],
failing_tests: List[TestRunResult],
junit_dest: TextIO,
junit_merged_dest: TextIO):
'''Write results to JUnit XML
Two versions are produced: a normal version and a merged version. In the
normal version there is a test suite per unique test name with a different
test case per seed run. In the merged version there is a single test case
under the test suite with information for the individual runs merged
together. This is to aid use of the Azure Pipelines JUnit dashboard, which
doesn't neatly handle the test suite/test case hierarchy
'''
all_tests = passing_tests + failing_tests
test_suite_info = {}
for trr in all_tests:
# test_case_info contains a tuple per unique test name. The first
# element is a list of junit_xml.TestCase, one per test run with that
# name. The other merges together all of the test outputs to produce
# the merged output.
unmerged, merged = \
test_suite_info.setdefault(trr.name, ([], {'stdout': '',
'failures': ''}))
result_text = gen_test_run_result_text(trr)
# Create a test case for the TestRunResult. stdout holds the text
# describing the run. Add the same text to failures if the test failed.
test_case = junit_xml.TestCase(f'{trr.name}.{trr.seed}')
test_case.stdout = result_text
merged['stdout'] += result_text
if not trr.passed:
test_case.add_failure_info(output=result_text)
merged['failures'] += result_text
unmerged.append(test_case)
# Output the normal JUnit XML
test_suites = [junit_xml.TestSuite(name, test_cases) for
name, (test_cases, _) in test_suite_info.items()]
junit_dest.write(junit_xml.to_xml_report_string(test_suites))
# Output the merged version of the JUnit XML
merged_test_suites = []
for name, (_, merged_test_info) in test_suite_info.items():
test_case = junit_xml.TestCase(name)
test_case.stdout = merged_test_info['stdout']
test_case.add_failure_info(output=merged_test_info['failures'])
merged_test_suites.append(junit_xml.TestSuite(name, [test_case]))
junit_merged_dest.write(junit_xml.to_xml_report_string(merged_test_suites))
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument('--output', '-o', required=True)
parser.add_argument('log', nargs='*')
parser.add_argument('--output_dir', '-o', required=True)
parser.add_argument('test_run_result', nargs='*')
args = parser.parse_args()
bad_logs = []
good_logs = []
for log_path in args.log:
passing_tests = []
failing_tests = []
for test_run_result_path in args.test_run_result:
try:
passed = parse_log(log_path)
test_run_result = parse_test_run_result(test_run_result_path)
if test_run_result.passed:
passing_tests.append(test_run_result)
else:
failing_tests.append(test_run_result)
except RuntimeError as e:
print(f'Failed to parse run results at {log_path:!r}: {e}',
file=sys.stderr)
passed = False
failing_tests.append(build_broken_test_run_result(str(e)))
if passed:
good_logs.append(log_path)
else:
bad_logs.append(log_path)
regr_log_path = os.path.join(args.output_dir, 'regr.log')
junit_xml_path = os.path.join(args.output_dir, 'regr_junit.xml')
junit_xml_merged_path = os.path.join(args.output_dir,
'regr_junit_merged.xml')
msg = '{} PASSED, {} FAILED'.format(len(good_logs), len(bad_logs))
with open(args.output, 'w', encoding='UTF-8') as outfile:
print(msg, file=outfile)
if bad_logs:
print('\n\n' + box_comment('Details of failing tests'),
file=outfile)
for log_path in bad_logs:
dump_log(log_path, outfile)
with open(regr_log_path, 'w', encoding='UTF-8') as outfile:
output_results_text(passing_tests, failing_tests, outfile)
if good_logs:
print('\n\n' + box_comment('Details of passing tests'),
file=outfile)
for log_path in good_logs:
dump_log(log_path, outfile)
with open(junit_xml_path, 'w', encoding='UTF-8') as junit_xml, \
open(junit_xml_merged_path, 'w', encoding='UTF-8') as \
junit_merged_xml:
output_run_results_junit_xml(passing_tests, failing_tests, junit_xml,
junit_merged_xml)
print(msg)
print(gen_summary_line(passing_tests, failing_tests))
# Succeed if no tests failed
return 1 if bad_logs else 0
return 1 if failing_tests else 0
if __name__ == '__main__':

View file

@ -11,9 +11,10 @@ import argparse
import os
import re
import sys
from typing import Dict, Optional, TextIO, Tuple
from typing import Dict, Optional, TextIO, Tuple, Union
from test_entry import TestEntry, get_test_entry
from test_run_result import TestRunResult
_CORE_IBEX = os.path.normpath(os.path.join(os.path.dirname(__file__)))
_IBEX_ROOT = os.path.normpath(os.path.join(_CORE_IBEX, '../../..'))
@ -58,7 +59,8 @@ def compare_test_run(test: TestEntry,
seed: int,
rtl_log_dir: str,
iss: str,
iss_log_dir: str) -> _CompareResult:
iss_log_dir: str,
instr_gen_bin_dir: str) -> TestRunResult:
'''Compare results for a single run of a single test
Here, test is a dictionary describing the test (read from the testlist YAML
@ -76,30 +78,50 @@ def compare_test_run(test: TestEntry,
test_name = test['test']
assert isinstance(test_name, str)
uvm_log = os.path.join(rtl_log_dir, 'sim.log')
elf = os.path.join(instr_gen_bin_dir, '{}_{}.o'.format(test_name, idx))
rtl_trace = os.path.join(rtl_log_dir, 'trace_core_00000000.log')
kv_data = {
'test name': test_name,
'iteration': str(idx),
'seed': str(seed),
'UVM log': uvm_log
'name': test_name,
'idx': idx,
'seed': seed,
'binary': elf,
'uvm_log': uvm_log,
'rtl_trace': rtl_trace,
'rtl_trace_csv': None,
'iss_trace': None,
'iss_trace_csv': None,
'comparison_log': None,
'passed': False,
'failure_message': None
}
# Have a look at the UVM log. Report a failure if an issue is seen in the
# log.
uvm_pass, uvm_log_lines = check_ibex_uvm_log(uvm_log)
try:
uvm_pass, uvm_log_lines = check_ibex_uvm_log(uvm_log)
except IOError as e:
kv_data['failure_message'] = str(e)
kv_data['failure_message'] += \
'\n[FAILED] Could not open simulation log'
return TestRunResult(**kv_data)
if not uvm_pass:
return (False, 'simulation error', kv_data)
kv_data['failure_message'] = '\n'.join(uvm_log_lines)
kv_data['failure_message'] += '\n[FAILED]: sim error seen'
return TestRunResult(**kv_data)
rtl_log = os.path.join(rtl_log_dir, 'trace_core_00000000.log')
rtl_csv = os.path.join(rtl_log_dir, 'trace_core_00000000.csv')
rtl_trace_csv = os.path.join(rtl_log_dir, 'trace_core_00000000.csv')
kv_data['rtl log'] = rtl_log
kv_data['rtl csv'] = rtl_csv
kv_data['rtl_trace_csv'] = rtl_trace_csv
try:
# Convert the RTL log file to a trace CSV.
process_ibex_sim_log(rtl_log, rtl_csv, 1)
process_ibex_sim_log(rtl_trace, rtl_trace_csv, 1)
except (OSError, RuntimeError) as e:
return (False, f'RTL log processing failed ({e})', kv_data)
kv_data['failure_message'] = \
'[FAILED]: Log processing failed: {}'.format(e)
return TestRunResult(**kv_data)
no_post_compare = test.get('no_post_compare', False)
assert isinstance(no_post_compare, bool)
@ -107,14 +129,15 @@ def compare_test_run(test: TestEntry,
# no_post_compare skips the final ISS v RTL log check, so if we've reached
# here we're done when no_post_compare is set.
if no_post_compare:
return (True, None, kv_data)
kv_data['passed'] = True
return TestRunResult(**kv_data)
# There were no UVM errors. Process the log file from the ISS.
iss_log = os.path.join(iss_log_dir, '{}.{}.log'.format(test_name, idx))
iss_csv = os.path.join(iss_log_dir, '{}.{}.csv'.format(test_name, idx))
kv_data['ISS log'] = iss_log
kv_data['ISS csv'] = iss_csv
kv_data['iss_trace'] = iss_log
kv_data['iss_trace_csv'] = iss_csv
try:
if iss == "spike":
process_spike_sim_log(iss_log, iss_csv)
@ -122,10 +145,12 @@ def compare_test_run(test: TestEntry,
assert iss == 'ovpsim' # (should be checked by argparse)
process_ovpsim_sim_log(iss_log, iss_csv)
except (OSError, RuntimeError) as e:
return (False, f'ISS log processing failed ({e})', kv_data)
kv_data['failure_message'] = \
'[FAILED]: Log processing failed: {}'.format(e)
return TestRunResult(**kv_data)
compare_log = os.path.join(rtl_log_dir, 'compare.log')
kv_data['comparison log'] = compare_log
kv_data['comparison_log'] = compare_log
# Delete any existing file at compare_log (the compare_trace_csv function
# would append to it, which is rather confusing).
@ -135,36 +160,59 @@ def compare_test_run(test: TestEntry,
pass
compare_result = \
compare_trace_csv(rtl_csv, iss_csv, "ibex", iss, compare_log,
compare_trace_csv(rtl_trace_csv, iss_csv, "ibex", iss, compare_log,
**test.get('compare_opts', {}))
try:
compare_log_file = open(compare_log)
compare_log_contents = compare_log_file.read()
compare_log_file.close()
except IOError as e:
kv_data['failure_message'] = \
'[FAILED]: Could not read compare log: {}'.format(e)
return TestRunResult(**kv_data)
# Rather oddly, compare_result is a string. The comparison passed if it
# starts with '[PASSED]' and failed otherwise.
compare_passed = compare_result.startswith('[PASSED]: ')
if not compare_passed:
assert compare_result.startswith('[FAILED]: ')
# compare_result[10:] will look like "123 matched, 321 mismatch",
# meaning that 123 instructions matched and 321 instructions didn't.
kv_data['compared instructions'] = compare_result[10:]
return (False, 'mismatch between ISS and RTL', kv_data)
kv_data['failure_message'] = ('RTL / ISS trace comparison failed\n' +
compare_log_contents)
return TestRunResult(**kv_data)
# compare_result[10:] will look like "123 matched", meaning that 123
# instructions matched.
kv_data['compared instructions'] = compare_result[10:]
return (True, None, kv_data)
kv_data['passed'] = True
return TestRunResult(**kv_data)
def on_result(result: _CompareResult, output: TextIO) -> None:
passed, err_msg, kv_data = result
# If any of these characters are present in a string output it in multi-line
# mode. This will either be because the string contains newlines or other
# characters that would otherwise need escaping
_YAML_MULTILINE_CHARS = ['[', ']', ':', "'", '"', '\n']
if passed:
assert err_msg is None
output.write('PASS\n\n')
else:
assert err_msg is not None
output.write('FAIL\n\n')
output.write(f'Test failed: {err_msg}\n')
output.write('---\n\n')
def yaml_format(val: Union[int, str, bool]) -> str:
'''Format a value for yaml output.
For int, str and bool value can just be converted to str with special
handling for some string
'''
# If val is a multi-line string
if isinstance(val, str) and any(c in val for c in _YAML_MULTILINE_CHARS):
# Split into individual lines and output them after a suitable yaml
# multi-line string indicator ('|-') indenting each line.
lines = val.split('\n')
return '|-\n' + '\n'.join([f' {line}' for line in lines])
if val is None:
return ''
return str(val)
def on_result(result: TestRunResult, output: TextIO) -> None:
kv_data = result._asdict()
klen = 1
for k in kv_data:
@ -172,11 +220,12 @@ def on_result(result: _CompareResult, output: TextIO) -> None:
for k, v in kv_data.items():
kpad = ' ' * (klen - len(k))
output.write(f'{k}:{kpad} | {v}\n')
output.write(f'{k}:{kpad} {yaml_format(v)}\n')
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument('--instr-gen-bin-dir', required=True)
parser.add_argument('--iss', required=True, choices=['spike', 'ovpsim'])
parser.add_argument('--iss-log-dir', required=True)
parser.add_argument('--start-seed', type=int, required=True)
@ -200,7 +249,8 @@ def main() -> int:
entry = get_test_entry(testname)
result = compare_test_run(entry, iteration, seed,
args.rtl_log_dir, args.iss, args.iss_log_dir)
args.rtl_log_dir, args.iss, args.iss_log_dir,
args.instr_gen_bin_dir)
with open(args.output, 'w', encoding='UTF-8') as outfile:
on_result(result, outfile)

View file

@ -0,0 +1,41 @@
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
import collections
# test_name, test_idx, seed and passed must never be None. Other fields can be
# None.
test_run_result_fields = [
'name', # Name of test
'idx', # Index of test
'seed', # Seed of test
'binary', # Path to test binary
'uvm_log', # Path to UVM DV simulation log
'rtl_trace', # Path to RTL ibex trace output
'rtl_trace_csv', # Path to RTL ibex trace CSV
'iss_trace', # Path to spike trace
'iss_trace_csv', # Path to spike trac.
'comparison_log', # Path to trace comparison log
'passed', # True if test passed
'failure_message' # Message describing failure, includes a
# '[FAILED]: XXXX' line at the end. Must not be
# None if passed is False
]
TestRunResult = collections.namedtuple('TestRunResult', test_run_result_fields)
def check_test_run_result(trr: TestRunResult):
assert (trr.name is not None and isinstance(trr.name, str))
assert (trr.idx is not None and isinstance(trr.idx, int))
assert (trr.seed is not None and isinstance(trr.seed, int))
assert (trr.binary is None or isinstance(trr.binary, str))
assert (trr.uvm_log is None or isinstance(trr.uvm_log, str))
assert (trr.rtl_trace is None or isinstance(trr.rtl_trace, str))
assert (trr.rtl_trace_csv is None or isinstance(trr.rtl_trace_csv, str))
assert (trr.iss_trace is None or isinstance(trr.iss_trace, str))
assert (trr.iss_trace_csv is None or isinstance(trr.iss_trace_csv, str))
assert (trr.comparison_log is None or isinstance(trr.comparison_log, str))
assert (isinstance(trr.passed, bool))
assert (trr.passed or isinstance(trr.failure_message, str))

View file

@ -10,6 +10,7 @@ git+https://github.com/lowRISC/fusesoc.git@ot
pyyaml
mako
junit-xml
# Needed by dvsim.py (not actually used in Ibex)
hjson