diff --git a/dv/uvm/core_ibex/Makefile b/dv/uvm/core_ibex/Makefile
index 740f0638..6ba4ad7f 100644
--- a/dv/uvm/core_ibex/Makefile
+++ b/dv/uvm/core_ibex/Makefile
@@ -426,17 +426,18 @@ rtl_sim: $(rtl-sim-logs)
 #
 # The comparison script compares these and writes to a result file at
 #
-#   $(OUT-SEED)/rtl_sim/$(TEST).$(SEED)/comparison-result.txt
+#   $(OUT-SEED)/rtl_sim/$(TEST).$(SEED)/test-result.yml
 #
 # with PASSED or FAILED, depending.
 
-comp-results := $(addsuffix /comparison-result.txt,$(rtl-sim-dirs))
+comp-results := $(addsuffix /test-result.yml,$(rtl-sim-dirs))
 
 $(comp-results): \
-  %/comparison-result.txt: \
+  %/test-result.yml: \
   compare.py $(metadata)/instr_gen.iss.stamp $(rtl-sim-logs)
 	@echo Comparing traces for $*
 	$(verb)./compare.py \
+      --instr-gen-bin-dir $(OUT-SEED)/instr_gen/asm_test \
       --iss $(ISS) \
       --iss-log-dir $(OUT-SEED)/instr_gen/$(ISS)_sim \
       --start-seed $(SEED) \
@@ -446,7 +447,7 @@ $(comp-results): \
 
 $(OUT-SEED)/regr.log: collect_results.py $(comp-results)
 	@echo "Collecting up results (report at $@)"
-	$(verb)./collect_results.py -o $@ $(comp-results)
+	$(verb)./collect_results.py -o $(@D) $(comp-results)
 
 .PHONY: post_compare
 post_compare: $(OUT-SEED)/regr.log
diff --git a/dv/uvm/core_ibex/collect_results.py b/dv/uvm/core_ibex/collect_results.py
index f1592fec..51cb50e3 100755
--- a/dv/uvm/core_ibex/collect_results.py
+++ b/dv/uvm/core_ibex/collect_results.py
@@ -4,30 +4,50 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import argparse
+import junit_xml
+import os.path
 import sys
-from typing import TextIO
+import yaml
+from test_run_result import (TestRunResult, test_run_result_fields,
+                             check_test_run_result)
+from typing import List, TextIO
 
 
-def parse_log(path: str) -> bool:
-    first_line = '(empty file)'
-    with open(path, 'r', encoding='UTF-8') as log:
-        for line in log:
-            first_line = line.rstrip()
-            break
+def parse_test_run_result(path: str) -> TestRunResult:
+    try:
+        with open(path) as yaml_file:
+            test_run_result_dict = yaml.load(yaml_file, Loader=yaml.SafeLoader)
+            loaded_fields = test_run_result_dict.keys()
+            if set(loaded_fields) != set(test_run_result_fields):
+                raise RuntimeError(f'Error loading YAML at {path}: does not '
+                                   'contain the correct set of fields')
 
-    if first_line == 'PASS':
-        return True
-    if first_line.startswith('FAIL'):
-        return False
-
-    raise RuntimeError('Strange first line ({!r})'.format(first_line))
+            trr = TestRunResult(**test_run_result_dict)
+            try:
+                check_test_run_result(trr)
+            except AssertionError:
+                raise RuntimeError(f'Error loading YAML at path {path}: '
+                                   'field types were incorrect')
+            return trr
+    except (IOError, yaml.YAMLError) as e:
+        raise RuntimeError(f'Error loading YAML at path {path}: {e}')
 
 
-def dump_log(path: str, dest: TextIO) -> None:
-    print('\nLog at {}:'.format(path), file=dest)
-    with open(path, 'r') as fd:
-        for line in fd:
-            dest.write('> ' + line)
+def build_broken_test_run_result(err: str) -> TestRunResult:
+    return TestRunResult(
+            name='unknown',
+            idx=0,
+            seed=0,
+            binary=None,
+            uvm_log=None,
+            rtl_trace=None,
+            rtl_trace_csv=None,
+            iss_trace=None,
+            iss_trace_csv=None,
+            comparison_log=None,
+            passed=False,
+            failure_message=err
+    )
 
 
 def box_comment(line: str) -> str:
@@ -35,47 +55,163 @@ def box_comment(line: str) -> str:
     return hr + '\n# ' + line + '\n' + hr
 
 
+def gen_summary_line(passing_tests: List[TestRunResult], failing_tests:
+                     List[TestRunResult]) -> str:
+    '''Generate a string summarising test results'''
+    total_tests = len(passing_tests) + len(failing_tests)
+    pass_pct = (len(passing_tests) / total_tests) * 100
+
+    return f'{pass_pct:0.2f}% PASS {len(passing_tests)} PASSED, ' \
+           f'{len(failing_tests)} FAILED'
+
+
+def gen_test_run_result_text(test_run_result: TestRunResult) -> str:
+    '''Generate a string describing a TestRunResult.
+
+    The string includes details of logs, binary run and the failure message if
+    the test did not pass.'''
+    test_name_idx = f'{test_run_result.name}.{test_run_result.seed}'
+    test_underline = '-' * len(test_name_idx)
+    info_lines = [test_name_idx, test_underline]
+
+    if (test_run_result.binary):
+        info_lines.append(f'Test binary: {test_run_result.binary}')
+
+    if (test_run_result.uvm_log):
+        info_lines.append(f'UVM log: {test_run_result.uvm_log}')
+
+    if (test_run_result.rtl_trace):
+        info_lines.append(f'RTL trace: {test_run_result.rtl_trace}')
+
+    if (test_run_result.iss_trace):
+        info_lines.append(f'ISS trace: {test_run_result.iss_trace}')
+
+    if (test_run_result.comparison_log):
+        info_lines.append(f'Comparison log: {test_run_result.comparison_log}')
+
+    if (test_run_result.passed):
+        info_lines.append('[PASSED]')
+    else:
+        info_lines.append(f'{test_run_result.failure_message}')
+
+    return '\n'.join(info_lines) + '\n'
+
+
+def output_results_text(passing_tests: List[TestRunResult], failing_tests:
+                        List[TestRunResult], dest: TextIO):
+    '''Write results in text form to dest'''
+
+    if failing_tests:
+        print(box_comment('Details of failing tests'), file=dest)
+        for trr in failing_tests:
+            print(gen_test_run_result_text(trr), file=dest)
+
+    if passing_tests:
+        print(box_comment('Details of passing tests'), file=dest)
+        for trr in passing_tests:
+            print(gen_test_run_result_text(trr), file=dest)
+
+    dest.write('\n')
+
+    print(gen_summary_line(passing_tests, failing_tests), file=dest)
+
+
+def output_run_results_junit_xml(passing_tests: List[TestRunResult],
+                                 failing_tests: List[TestRunResult],
+                                 junit_dest: TextIO,
+                                 junit_merged_dest: TextIO):
+    '''Write results to JUnit XML
+
+    Two versions are produced: a normal version and a merged version. In the
+    normal version there is a test suite per unique test name with a different
+    test case per seed run. In the merged version there is a single test case
+    under the test suite with information for the individual runs merged
+    together. This is to aid use of the Azure Pipelines JUnit dashboard, which
+    doesn't neatly handle the test suite/test case hierarchy
+    '''
+
+    all_tests = passing_tests + failing_tests
+
+    test_suite_info = {}
+    for trr in all_tests:
+        # test_case_info contains a tuple per unique test name. The first
+        # element is a list of junit_xml.TestCase, one per test run with that
+        # name. The other merges together all of the test outputs to produce
+        # the merged output.
+        unmerged, merged = \
+            test_suite_info.setdefault(trr.name, ([], {'stdout': '',
+                                                       'failures': ''}))
+        result_text = gen_test_run_result_text(trr)
+
+        # Create a test case for the TestRunResult. stdout holds the text
+        # describing the run. Add the same text to failures if the test failed.
+        test_case = junit_xml.TestCase(f'{trr.name}.{trr.seed}')
+        test_case.stdout = result_text
+
+        merged['stdout'] += result_text
+
+        if not trr.passed:
+            test_case.add_failure_info(output=result_text)
+            merged['failures'] += result_text
+
+        unmerged.append(test_case)
+
+    # Output the normal JUnit XML
+    test_suites = [junit_xml.TestSuite(name, test_cases) for
+                   name, (test_cases, _) in test_suite_info.items()]
+
+    junit_dest.write(junit_xml.to_xml_report_string(test_suites))
+
+    # Output the merged version of the JUnit XML
+    merged_test_suites = []
+
+    for name, (_, merged_test_info) in test_suite_info.items():
+        test_case = junit_xml.TestCase(name)
+        test_case.stdout = merged_test_info['stdout']
+        test_case.add_failure_info(output=merged_test_info['failures'])
+
+        merged_test_suites.append(junit_xml.TestSuite(name, [test_case]))
+
+    junit_merged_dest.write(junit_xml.to_xml_report_string(merged_test_suites))
+
+
 def main() -> int:
     parser = argparse.ArgumentParser()
-    parser.add_argument('--output', '-o', required=True)
-    parser.add_argument('log', nargs='*')
+    parser.add_argument('--output_dir', '-o', required=True)
+    parser.add_argument('test_run_result', nargs='*')
 
     args = parser.parse_args()
 
-    bad_logs = []
-    good_logs = []
-    for log_path in args.log:
+    passing_tests = []
+    failing_tests = []
+    for test_run_result_path in args.test_run_result:
         try:
-            passed = parse_log(log_path)
+            test_run_result = parse_test_run_result(test_run_result_path)
+            if test_run_result.passed:
+                passing_tests.append(test_run_result)
+            else:
+                failing_tests.append(test_run_result)
         except RuntimeError as e:
-            print(f'Failed to parse run results at {log_path:!r}: {e}',
-                  file=sys.stderr)
-            passed = False
+            failing_tests.append(build_broken_test_run_result(str(e)))
 
-        if passed:
-            good_logs.append(log_path)
-        else:
-            bad_logs.append(log_path)
+    regr_log_path = os.path.join(args.output_dir, 'regr.log')
+    junit_xml_path = os.path.join(args.output_dir, 'regr_junit.xml')
+    junit_xml_merged_path = os.path.join(args.output_dir,
+                                         'regr_junit_merged.xml')
 
-    msg = '{} PASSED, {} FAILED'.format(len(good_logs), len(bad_logs))
-    with open(args.output, 'w', encoding='UTF-8') as outfile:
-        print(msg, file=outfile)
-        if bad_logs:
-            print('\n\n' + box_comment('Details of failing tests'),
-                  file=outfile)
-            for log_path in bad_logs:
-                dump_log(log_path, outfile)
+    with open(regr_log_path, 'w', encoding='UTF-8') as outfile:
+        output_results_text(passing_tests, failing_tests, outfile)
 
-        if good_logs:
-            print('\n\n' + box_comment('Details of passing tests'),
-                  file=outfile)
-            for log_path in good_logs:
-                dump_log(log_path, outfile)
+    with open(junit_xml_path, 'w', encoding='UTF-8') as junit_xml, \
+         open(junit_xml_merged_path, 'w', encoding='UTF-8') as \
+            junit_merged_xml:
+        output_run_results_junit_xml(passing_tests, failing_tests, junit_xml,
+                                     junit_merged_xml)
 
-    print(msg)
+    print(gen_summary_line(passing_tests, failing_tests))
 
     # Succeed if no tests failed
-    return 1 if bad_logs else 0
+    return 1 if failing_tests else 0
 
 
 if __name__ == '__main__':
diff --git a/dv/uvm/core_ibex/compare.py b/dv/uvm/core_ibex/compare.py
index a25f3c36..0ea2ad55 100755
--- a/dv/uvm/core_ibex/compare.py
+++ b/dv/uvm/core_ibex/compare.py
@@ -11,9 +11,10 @@ import argparse
 import os
 import re
 import sys
-from typing import Dict, Optional, TextIO, Tuple
+from typing import Dict, Optional, TextIO, Tuple, Union
 
 from test_entry import TestEntry, get_test_entry
+from test_run_result import TestRunResult
 
 _CORE_IBEX = os.path.normpath(os.path.join(os.path.dirname(__file__)))
 _IBEX_ROOT = os.path.normpath(os.path.join(_CORE_IBEX, '../../..'))
@@ -58,7 +59,8 @@ def compare_test_run(test: TestEntry,
                      seed: int,
                      rtl_log_dir: str,
                      iss: str,
-                     iss_log_dir: str) -> _CompareResult:
+                     iss_log_dir: str,
+                     instr_gen_bin_dir: str) -> TestRunResult:
     '''Compare results for a single run of a single test
 
     Here, test is a dictionary describing the test (read from the testlist YAML
@@ -76,30 +78,50 @@ def compare_test_run(test: TestEntry,
     test_name = test['test']
     assert isinstance(test_name, str)
     uvm_log = os.path.join(rtl_log_dir, 'sim.log')
+    elf = os.path.join(instr_gen_bin_dir, '{}_{}.o'.format(test_name, idx))
+
+    rtl_trace = os.path.join(rtl_log_dir, 'trace_core_00000000.log')
 
     kv_data = {
-        'test name': test_name,
-        'iteration': str(idx),
-        'seed': str(seed),
-        'UVM log': uvm_log
+        'name': test_name,
+        'idx': idx,
+        'seed': seed,
+        'binary': elf,
+        'uvm_log': uvm_log,
+        'rtl_trace': rtl_trace,
+        'rtl_trace_csv': None,
+        'iss_trace': None,
+        'iss_trace_csv': None,
+        'comparison_log': None,
+        'passed': False,
+        'failure_message': None
     }
 
     # Have a look at the UVM log. Report a failure if an issue is seen in the
     # log.
-    uvm_pass, uvm_log_lines = check_ibex_uvm_log(uvm_log)
+    try:
+        uvm_pass, uvm_log_lines = check_ibex_uvm_log(uvm_log)
+    except IOError as e:
+        kv_data['failure_message'] = str(e)
+        kv_data['failure_message'] += \
+            '\n[FAILED] Could not open simulation log'
+        return TestRunResult(**kv_data)
+
     if not uvm_pass:
-        return (False, 'simulation error', kv_data)
+        kv_data['failure_message'] = '\n'.join(uvm_log_lines)
+        kv_data['failure_message'] += '\n[FAILED]: sim error seen'
+        return TestRunResult(**kv_data)
 
-    rtl_log = os.path.join(rtl_log_dir, 'trace_core_00000000.log')
-    rtl_csv = os.path.join(rtl_log_dir, 'trace_core_00000000.csv')
+    rtl_trace_csv = os.path.join(rtl_log_dir, 'trace_core_00000000.csv')
 
-    kv_data['rtl log'] = rtl_log
-    kv_data['rtl csv'] = rtl_csv
+    kv_data['rtl_trace_csv'] = rtl_trace_csv
     try:
         # Convert the RTL log file to a trace CSV.
-        process_ibex_sim_log(rtl_log, rtl_csv, 1)
+        process_ibex_sim_log(rtl_trace, rtl_trace_csv, 1)
     except (OSError, RuntimeError) as e:
-        return (False, f'RTL log processing failed ({e})', kv_data)
+        kv_data['failure_message'] = \
+            '[FAILED]: Log processing failed: {}'.format(e)
+        return TestRunResult(**kv_data)
 
     no_post_compare = test.get('no_post_compare', False)
     assert isinstance(no_post_compare, bool)
@@ -107,14 +129,15 @@ def compare_test_run(test: TestEntry,
     # no_post_compare skips the final ISS v RTL log check, so if we've reached
     # here we're done when no_post_compare is set.
     if no_post_compare:
-        return (True, None, kv_data)
+        kv_data['passed'] = True
+        return TestRunResult(**kv_data)
 
     # There were no UVM errors. Process the log file from the ISS.
     iss_log = os.path.join(iss_log_dir, '{}.{}.log'.format(test_name, idx))
     iss_csv = os.path.join(iss_log_dir, '{}.{}.csv'.format(test_name, idx))
 
-    kv_data['ISS log'] = iss_log
-    kv_data['ISS csv'] = iss_csv
+    kv_data['iss_trace'] = iss_log
+    kv_data['iss_trace_csv'] = iss_csv
     try:
         if iss == "spike":
             process_spike_sim_log(iss_log, iss_csv)
@@ -122,10 +145,12 @@ def compare_test_run(test: TestEntry,
             assert iss == 'ovpsim'  # (should be checked by argparse)
             process_ovpsim_sim_log(iss_log, iss_csv)
     except (OSError, RuntimeError) as e:
-        return (False, f'ISS log processing failed ({e})', kv_data)
+        kv_data['failure_message'] = \
+            '[FAILED]: Log processing failed: {}'.format(e)
+        return TestRunResult(**kv_data)
 
     compare_log = os.path.join(rtl_log_dir, 'compare.log')
-    kv_data['comparison log'] = compare_log
+    kv_data['comparison_log'] = compare_log
 
     # Delete any existing file at compare_log (the compare_trace_csv function
     # would append to it, which is rather confusing).
@@ -135,36 +160,59 @@ def compare_test_run(test: TestEntry,
         pass
 
     compare_result = \
-        compare_trace_csv(rtl_csv, iss_csv, "ibex", iss, compare_log,
+        compare_trace_csv(rtl_trace_csv, iss_csv, "ibex", iss, compare_log,
                           **test.get('compare_opts', {}))
 
+    try:
+        compare_log_file = open(compare_log)
+        compare_log_contents = compare_log_file.read()
+        compare_log_file.close()
+    except IOError as e:
+        kv_data['failure_message'] = \
+            '[FAILED]: Could not read compare log: {}'.format(e)
+        return TestRunResult(**kv_data)
+
     # Rather oddly, compare_result is a string. The comparison passed if it
     # starts with '[PASSED]' and failed otherwise.
     compare_passed = compare_result.startswith('[PASSED]: ')
     if not compare_passed:
         assert compare_result.startswith('[FAILED]: ')
-        # compare_result[10:] will look like "123 matched, 321 mismatch",
-        # meaning that 123 instructions matched and 321 instructions didn't.
-        kv_data['compared instructions'] = compare_result[10:]
-        return (False, 'mismatch between ISS and RTL', kv_data)
+        kv_data['failure_message'] = ('RTL / ISS trace comparison failed\n' +
+                                      compare_log_contents)
+        return TestRunResult(**kv_data)
 
-    # compare_result[10:] will look like "123 matched", meaning that 123
-    # instructions matched.
-    kv_data['compared instructions'] = compare_result[10:]
-    return (True, None, kv_data)
+    kv_data['passed'] = True
+    return TestRunResult(**kv_data)
 
 
-def on_result(result: _CompareResult, output: TextIO) -> None:
-    passed, err_msg, kv_data = result
+# If any of these characters are present in a string output it in multi-line
+# mode. This will either be because the string contains newlines or other
+# characters that would otherwise need escaping
+_YAML_MULTILINE_CHARS = ['[', ']', ':', "'", '"', '\n']
 
-    if passed:
-        assert err_msg is None
-        output.write('PASS\n\n')
-    else:
-        assert err_msg is not None
-        output.write('FAIL\n\n')
-        output.write(f'Test failed: {err_msg}\n')
-        output.write('---\n\n')
+
+def yaml_format(val: Union[int, str, bool]) -> str:
+    '''Format a value for yaml output.
+
+    For int, str and bool value can just be converted to str with special
+    handling for some string
+    '''
+
+    # If val is a multi-line string
+    if isinstance(val, str) and any(c in val for c in _YAML_MULTILINE_CHARS):
+        # Split into individual lines and output them after a suitable yaml
+        # multi-line string indicator ('|-') indenting each line.
+        lines = val.split('\n')
+        return '|-\n' + '\n'.join([f'  {line}' for line in lines])
+
+    if val is None:
+        return ''
+
+    return str(val)
+
+
+def on_result(result: TestRunResult, output: TextIO) -> None:
+    kv_data = result._asdict()
 
     klen = 1
     for k in kv_data:
@@ -172,11 +220,12 @@ def on_result(result: _CompareResult, output: TextIO) -> None:
 
     for k, v in kv_data.items():
         kpad = ' ' * (klen - len(k))
-        output.write(f'{k}:{kpad} | {v}\n')
+        output.write(f'{k}:{kpad} {yaml_format(v)}\n')
 
 
 def main() -> int:
     parser = argparse.ArgumentParser()
+    parser.add_argument('--instr-gen-bin-dir', required=True)
     parser.add_argument('--iss', required=True, choices=['spike', 'ovpsim'])
     parser.add_argument('--iss-log-dir', required=True)
     parser.add_argument('--start-seed', type=int, required=True)
@@ -200,7 +249,8 @@ def main() -> int:
     entry = get_test_entry(testname)
 
     result = compare_test_run(entry, iteration, seed,
-                              args.rtl_log_dir, args.iss, args.iss_log_dir)
+                              args.rtl_log_dir, args.iss, args.iss_log_dir,
+                              args.instr_gen_bin_dir)
     with open(args.output, 'w', encoding='UTF-8') as outfile:
         on_result(result, outfile)
 
diff --git a/dv/uvm/core_ibex/test_run_result.py b/dv/uvm/core_ibex/test_run_result.py
new file mode 100644
index 00000000..779249d7
--- /dev/null
+++ b/dv/uvm/core_ibex/test_run_result.py
@@ -0,0 +1,41 @@
+# Copyright lowRISC contributors.
+# Licensed under the Apache License, Version 2.0, see LICENSE for details.
+# SPDX-License-Identifier: Apache-2.0
+
+import collections
+
+# test_name, test_idx, seed and passed must never be None. Other fields can be
+# None.
+test_run_result_fields = [
+         'name',  # Name of test
+         'idx',  # Index of test
+         'seed',  # Seed of test
+         'binary',  # Path to test binary
+         'uvm_log',  # Path to UVM DV simulation log
+         'rtl_trace',  # Path to RTL ibex trace output
+         'rtl_trace_csv',  # Path to RTL ibex trace CSV
+         'iss_trace',  # Path to spike trace
+         'iss_trace_csv',  # Path to spike trac.
+         'comparison_log',  # Path to trace comparison log
+         'passed',  # True if test passed
+         'failure_message'  # Message describing failure, includes a
+                            # '[FAILED]: XXXX' line at the end. Must not be
+                            # None if passed is False
+        ]
+
+TestRunResult = collections.namedtuple('TestRunResult', test_run_result_fields)
+
+
+def check_test_run_result(trr: TestRunResult):
+    assert (trr.name is not None and isinstance(trr.name, str))
+    assert (trr.idx is not None and isinstance(trr.idx, int))
+    assert (trr.seed is not None and isinstance(trr.seed, int))
+    assert (trr.binary is None or isinstance(trr.binary, str))
+    assert (trr.uvm_log is None or isinstance(trr.uvm_log, str))
+    assert (trr.rtl_trace is None or isinstance(trr.rtl_trace, str))
+    assert (trr.rtl_trace_csv is None or isinstance(trr.rtl_trace_csv, str))
+    assert (trr.iss_trace is None or isinstance(trr.iss_trace, str))
+    assert (trr.iss_trace_csv is None or isinstance(trr.iss_trace_csv, str))
+    assert (trr.comparison_log is None or isinstance(trr.comparison_log, str))
+    assert (isinstance(trr.passed, bool))
+    assert (trr.passed or isinstance(trr.failure_message, str))
diff --git a/python-requirements.txt b/python-requirements.txt
index 19140000..b32bd7f8 100644
--- a/python-requirements.txt
+++ b/python-requirements.txt
@@ -10,6 +10,7 @@ git+https://github.com/lowRISC/fusesoc.git@ot
 
 pyyaml
 mako
+junit-xml
 
 # Needed by dvsim.py (not actually used in Ibex)
 hjson