From 3650e08e4e862b8d2148463402e988b7d6e92c57 Mon Sep 17 00:00:00 2001
From: Harry Callahan <hcallahan@lowrisc.org>
Date: Tue, 4 Oct 2022 14:14:35 +0100
Subject: [PATCH] Record test failure due to timeout in regr.log

This commit adds a new field to the trr (test-run-result) structured data
that records the failure_mode of a testcase. If the test failed due to a
timeout, print a small addendum {T} to each log-line in the summary.

eg.

23.33% PASS 7 PASSED, 23 FAILED
riscv_debug_basic_test.21259:                     PASS
riscv_debug_basic_test.21260:                     FAILED {T}
riscv_debug_basic_test.21261:                     PASS
riscv_debug_basic_test.21262:                     FAILED {T}
riscv_debug_basic_test.21263:                     FAILED {T}
riscv_debug_instr_test.21259:                     FAILED {T}
riscv_debug_instr_test.21260:                     FAILED {T}
riscv_debug_instr_test.21261:                     FAILED {T}
riscv_debug_instr_test.21262:                     PASS
riscv_debug_instr_test.21263:                     FAILED {T}
riscv_dret_test.21259:                            FAILED
riscv_dret_test.21260:                            FAILED
riscv_dret_test.21261:                            FAILED {T}
---
 dv/uvm/core_ibex/scripts/check_logs.py      | 12 +++++++++++-
 dv/uvm/core_ibex/scripts/collect_results.py |  8 ++++++--
 dv/uvm/core_ibex/scripts/metadata.py        |  1 +
 dv/uvm/core_ibex/scripts/run_rtl.py         | 20 +++++++++++++-------
 dv/uvm/core_ibex/scripts/scripts_lib.py     | 17 ++++++++++-------
 dv/uvm/core_ibex/scripts/test_run_result.py | 14 ++++++++++++++
 6 files changed, 55 insertions(+), 17 deletions(-)

diff --git a/dv/uvm/core_ibex/scripts/check_logs.py b/dv/uvm/core_ibex/scripts/check_logs.py
index edd4bdbb..5be679aa 100755
--- a/dv/uvm/core_ibex/scripts/check_logs.py
+++ b/dv/uvm/core_ibex/scripts/check_logs.py
@@ -14,7 +14,7 @@ import sys
 import pathlib3x as pathlib
 
 from test_entry import read_test_dot_seed
-from test_run_result import TestRunResult
+from test_run_result import TestRunResult, Failure_Modes
 
 from spike_log_to_trace_csv import process_spike_sim_log  # type: ignore
 from ibex_log_to_trace_csv import (process_ibex_sim_log,  # type: ignore
@@ -29,15 +29,23 @@ def compare_test_run(trr: TestRunResult) -> TestRunResult:
 
     Use any log-processing scripts available to check for errors.
     """
+
+    # If the test timed-out, return early to avoid overwriting the failure_mode.
+    if (trr.failure_mode == Failure_Modes.TIMEOUT):
+        trr.passed = False
+        return trr
+
     # Have a look at the UVM log. Report a failure if an issue is seen.
     try:
         logger.debug(f"About to do Log processing: {trr.rtl_log}")
         uvm_pass, uvm_log_lines = check_ibex_uvm_log(trr.rtl_log)
     except IOError as e:
         trr.passed = False
+        trr.failure_mode = Failure_Modes.PARSE_ERROR
         trr.failure_message = f"[FAILED] Could not open simulation log: {e}\n"
         return trr
     if not uvm_pass:
+        trr.failure_mode = Failure_Modes.LOG_ERROR
         trr.failure_message = f"\n[FAILURE]: sim error seen in '{trr.rtl_log.name}'\n"
         if uvm_log_lines:
             trr.failure_message += \
@@ -54,6 +62,7 @@ def compare_test_run(trr: TestRunResult) -> TestRunResult:
         process_ibex_sim_log(trr.rtl_trace, trr.dir_test/'rtl_trace.csv')
     except (OSError, RuntimeError) as e:
         trr.passed = False
+        trr.failure_mode = Failure_Modes.LOG_ERROR
         trr.failure_message = f"[FAILED]: Log processing failed: {e}"
         return trr
 
@@ -65,6 +74,7 @@ def compare_test_run(trr: TestRunResult) -> TestRunResult:
             raise RuntimeError('Unsupported simulator for cosim')
     except (OSError, RuntimeError) as e:
         trr.passed = False
+        trr.failure_mode = Failure_Modes.LOG_ERROR
         trr.failure_message = f"[FAILED]: Log processing failed: {e}"
         return trr
 
diff --git a/dv/uvm/core_ibex/scripts/collect_results.py b/dv/uvm/core_ibex/scripts/collect_results.py
index 6f1c70e5..217941cd 100755
--- a/dv/uvm/core_ibex/scripts/collect_results.py
+++ b/dv/uvm/core_ibex/scripts/collect_results.py
@@ -10,7 +10,7 @@ import io
 import pathlib3x as pathlib
 import dataclasses
 from metadata import RegressionMetadata, LockedMetadata
-from test_run_result import TestRunResult
+from test_run_result import TestRunResult, Failure_Modes
 import scripts_lib as ibex_lib
 from typing import List, TextIO
 
@@ -159,10 +159,14 @@ def main() -> int:
         for f in md.tests_pickle_files:
             try:
                 trr = TestRunResult.construct_from_pickle(f)
-                summary_dict[f"{trr.testname}.{trr.seed}"] = ('PASS' if trr.passed else 'FAILED')
+                summary_dict[f"{trr.testname}.{trr.seed}"] = \
+                    ('PASS' if trr.passed else
+                     'FAILED' + (" {T}" if (trr.failure_mode == Failure_Modes.TIMEOUT) else ""))
                 if trr.passed:
                     passing_tests.append(trr)
                 else:
+                    if (trr.failure_mode == Failure_Modes.TIMEOUT):
+                        trr.failure_message = f"[FAILURE] Simulation timed-out [{md.run_rtl_timeout_s}s].\n"
                     failing_tests.append(trr)
             except RuntimeError as e:
                 failing_tests.append(
diff --git a/dv/uvm/core_ibex/scripts/metadata.py b/dv/uvm/core_ibex/scripts/metadata.py
index 71363713..d7e4ce2b 100755
--- a/dv/uvm/core_ibex/scripts/metadata.py
+++ b/dv/uvm/core_ibex/scripts/metadata.py
@@ -62,6 +62,7 @@ class RegressionMetadata(scripts_lib.testdata_cls):
     tests_and_counts: List[Tuple[str, int]] = field(default_factory=list)
     isa_ibex: Optional[str] = None
     isa_iss: Optional[str] = None
+    run_rtl_timeout_s: int = 1800
 
     # Files that control the regression, specify configurations, tests, etc
     ibex_configs                : pathlib.Path = field(init=False, compare=False, default_factory=pathlib.Path)
diff --git a/dv/uvm/core_ibex/scripts/run_rtl.py b/dv/uvm/core_ibex/scripts/run_rtl.py
index 66730b11..f46da52b 100755
--- a/dv/uvm/core_ibex/scripts/run_rtl.py
+++ b/dv/uvm/core_ibex/scripts/run_rtl.py
@@ -7,6 +7,7 @@
 import argparse
 import os
 import sys
+import subprocess
 import pathlib3x as pathlib
 
 from ibex_cmd import get_sim_opts
@@ -14,7 +15,7 @@ import riscvdv_interface
 from scripts_lib import run_one, format_to_cmd
 from test_entry import read_test_dot_seed, get_test_entry
 from metadata import RegressionMetadata
-from test_run_result import TestRunResult
+from test_run_result import TestRunResult, Failure_Modes
 
 import logging
 logger = logging.getLogger(__name__)
@@ -80,17 +81,22 @@ def _main() -> int:
     trr.dir_test.mkdir(exist_ok=True, parents=True)
     trr.rtl_cmds   = [format_to_cmd(cmd) for cmd in sim_cmds]
     trr.rtl_stdout = trr.dir_test / 'rtl_sim_stdstreams.log'
-    trr.export(write_yaml=True)
 
     # Write all sim_cmd output into a single logfile
     with open(trr.rtl_stdout, 'wb') as sim_fd:
 
-        for cmd in trr.rtl_cmds:
-            # Note that we don't capture the success or failure of the subprocess:
-
-            sim_fd.write(f"Running run-rtl command :\n{' '.join(cmd)}\n".encode())
-            run_one(md.verbose, cmd, redirect_stdstreams=sim_fd, timeout_s=1800)
+        try:
+            for cmd in trr.rtl_cmds:
+                # Note that we don't capture the success or failure of the subprocess:
+                sim_fd.write(f"Running run-rtl command :\n{' '.join(cmd)}\n".encode())
+                run_one(md.verbose, cmd,
+                        redirect_stdstreams=sim_fd,
+                        timeout_s=md.run_rtl_timeout_s,
+                        reraise=True)  # Allow us to catch timeout exceptions at this level
+        except subprocess.TimeoutExpired:
+            trr.failure_mode = Failure_Modes.TIMEOUT
 
+    trr.export(write_yaml=True)
     # Always return 0 (success), even if the test failed. We've successfully
     # generated a log either way.
     return 0
diff --git a/dv/uvm/core_ibex/scripts/scripts_lib.py b/dv/uvm/core_ibex/scripts/scripts_lib.py
index 15bf4908..ca7f5b5e 100644
--- a/dv/uvm/core_ibex/scripts/scripts_lib.py
+++ b/dv/uvm/core_ibex/scripts/scripts_lib.py
@@ -25,6 +25,7 @@ def run_one(verbose: bool,
             cmd: List[str],
             redirect_stdstreams: Optional[Union[str, pathlib.Path, IOBase]] = None,
             timeout_s: Optional[int] = None,
+            reraise: bool = False,
             env: Dict[str, str] = None) -> int:
     """Run a command, returning its retcode.
 
@@ -52,18 +53,17 @@ def run_one(verbose: bool,
                 f"redirect_stdstream called as {redirect_stdstreams} "
                 f"but that argument is invalid.")
 
+    cmd_str = ' '.join(shlex.quote(w) for w in cmd)
     if verbose:
         # The equivalent of bash -x
-        cmd_str = ' '.join(shlex.quote(w) for w in cmd)
-        redir_cmd = cmd_str
         if redirect_stdstreams is not None:
             if isinstance(redirect_stdstreams, str):
                 redir = f'>{shlex.quote(redirect_stdstreams)}'
             else:
                 redir = f'>>{shlex.quote(redirect_stdstreams.name)}'
-            redir_cmd = f'{cmd_str} {redir} 2>&1'
+            cmd_str = f'{cmd_str} {redir} 2>&1'
 
-        print('+ ' + redir_cmd, file=sys.stderr)
+        print('+ ' + cmd_str, file=sys.stderr)
 
         # Try to print the command to the file as well. This will fail if it's
         # a binary file: ignore the failure.
@@ -91,9 +91,12 @@ def run_one(verbose: bool,
         print(e)
         # print(ps.communicate()[0])
         return(1)
-    except subprocess.TimeoutExpired:
-        print("Error: Timeout[{}s]: {}".format(timeout_s, cmd))
-        return(1)
+    except subprocess.TimeoutExpired as e:
+        print("Error: Timeout[{}s]: {}".format(timeout_s, cmd_str))
+        if reraise:
+            raise e
+        else:
+            return(1)
     finally:
         if needs_closing:
             stdstream_dest.close()
diff --git a/dv/uvm/core_ibex/scripts/test_run_result.py b/dv/uvm/core_ibex/scripts/test_run_result.py
index 1e641ca8..651ece4a 100644
--- a/dv/uvm/core_ibex/scripts/test_run_result.py
+++ b/dv/uvm/core_ibex/scripts/test_run_result.py
@@ -4,6 +4,7 @@
 # Licensed under the Apache License, Version 2.0, see LICENSE for details.
 # SPDX-License-Identifier: Apache-2.0
 
+from enum import Enum
 import pathlib3x as pathlib
 from typing import Optional, List
 import dataclasses
@@ -15,6 +16,18 @@ import logging
 logger = logging.getLogger(__name__)
 
 
+class Failure_Modes(Enum):
+    """Descriptive enum for the mode in which a test fails"""
+
+    NONE = 0
+    TIMEOUT = 1
+    PARSE_ERROR = 2
+    LOG_ERROR = 3
+
+    def __str__(self):
+        """Print enumerated values as e.g. TIMEOUT(1)"""
+        return f'{self.name}({self.value})'
+
 @typechecked
 @dataclasses.dataclass
 class TestRunResult(scripts_lib.testdata_cls):
@@ -26,6 +39,7 @@ class TestRunResult(scripts_lib.testdata_cls):
     """
     passed: Optional[bool] = None                # True if test passed
     # Message describing failure, includes a '[FAILED]: XXXX' line at the end.
+    failure_mode: Optional[Failure_Modes] = None
     failure_message: Optional[str] = None
 
     testdotseed: Optional[str] = None