Add pydantic schema + common configs for directed tests

This commit introduces the use of a pydantic model to validate the
data used to define directed tests. 'pydantic' is a python data validation
and settings management library which allows for structured data to be
checked against 'pydantic models'. In this case, we define a 'model' for
how we expect the directed-test yaml to look, including fields and datatypes,
and then validate that the imported file matches this expectation.
In effect, it allows the checking of the data to be seperated from its use.

dv/uvm/core_ibex/scripts/directed_test_schema.py defines the pydantic model,
and provides a function 'import_model()' to check the file-contents and
return a valid dictionary of the structured data.
This commit is contained in:
Saad Khalid 2022-11-29 11:58:53 +00:00 committed by Greg Chadwick
parent 2a852685cb
commit ce53589e11
8 changed files with 324 additions and 118 deletions

View file

@ -6,20 +6,25 @@
# This file largely copies the formatting of the testlist.yaml
# used by riscv-dv, but only specifies directed tests.
#
# - All paths are relative to THIS FILE.
# - Each 'test' can specify a config by name to re-use common configuration
# - If a test redefines a key already in the config, the test option takes priority.
##########################################################
- config: base
ld_script: link.ld
includes: .
gcc_opts: -static -mcmodel=medany -fvisibility=hidden -nostdlib -nostartfiles
rtl_test: core_ibex_base_test
rtl_params:
PMPEnable: 1
timeout_s: 300
- test: empty
desc: >
Empty directed test
iterations: 1
test_srcs: empty/empty.S
ld_script: "link.ld"
includes: "."
gcc_opts: "-static -mcmodel=medany -fvisibility=hidden -nostdlib -nostartfiles"
rtl_test: core_ibex_base_test
rtl_params:
PMPEnable: 1
timeout_s: 300
config: base

View file

@ -68,13 +68,13 @@ def output_results_text(passing_tests: List[TestRunResult],
dest: TextIO):
'''Write results in text form to dest'''
print(box_comment('Details of failing tests'), file=dest)
print('\n'+box_comment('Details of failing tests'), file=dest)
if not bool(failing_tests):
print("No failing tests. Nice job!", file=dest)
for trr in failing_tests:
print(gen_test_run_result_text(trr), file=dest)
print(box_comment('Details of passing tests'), file=dest)
print('\n'+box_comment('Details of passing tests'), file=dest)
if not bool(passing_tests):
print("No passing tests. Hmmmm...", file=dest)
for trr in passing_tests:

View file

@ -79,7 +79,6 @@ def get_riscvdv_compile_cmds(md: RegressionMetadata, trr: TestRunResult) -> List
for word in cmd:
for old, new in rewrites:
word = word.replace(old, new)
if str(placeholder) in word:
raise RuntimeError("Couldn't replace every copy of "
f"placeholder in {cmd}")
@ -98,24 +97,15 @@ def get_directed_compile_cmds(md: RegressionMetadata, trr: TestRunResult) -> Lis
if e not in env:
raise RuntimeError("Missing required environment variables for the RISCV TOOLCHAIN")
# Get the data from the directed test yaml that we need to construct the command.
directed_data = read_yaml(md.directed_test_data)
trr.directed_data = next(filter(lambda item: (item.get('test') == trr.testname), directed_data), None)
directed_dir = md.directed_test_dir
includes = directed_dir/(pathlib.Path(trr.directed_data.get('includes')))
ld = directed_dir/(pathlib.Path(trr.directed_data.get('ld_script')))
trr.assembly = directed_dir/trr.directed_data.get('test_srcs')
trr.assembly = trr.directed_data.get('test_srcs')
trr.objectfile = trr.dir_test/'test.o'
trr.binary = trr.dir_test/'test.bin'
# Compose the compilation command
riscv_gcc_arg = trr.directed_data.get('gcc_opts') + \
f" -I{includes}" + \
f" -T{ld}"
# Compose the compilation commands
riscv_gcc_cmd = " ".join([env.get('RISCV_GCC'),
riscv_gcc_arg,
trr.directed_data.get('gcc_opts'),
f"-I{trr.directed_data.get('includes')}",
f"-T{trr.directed_data.get('ld_script')}",
f"-o {trr.objectfile}",
f"{trr.assembly}"])
riscv_gcc_bin_cmd = " ".join([env.get('RISCV_OBJCOPY'),
@ -127,8 +117,7 @@ def get_directed_compile_cmds(md: RegressionMetadata, trr: TestRunResult) -> Lis
def _main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument('--dir-metadata', type=pathlib.Path, required=True)
parser.add_argument('--test-dot-seed', type=read_test_dot_seed, required=False)
parser.add_argument('--bin', type=pathlib.Path, required=False)
parser.add_argument('--test-dot-seed', type=read_test_dot_seed, required=True)
args = parser.parse_args()
tds = args.test_dot_seed
md = RegressionMetadata.construct_from_metadata_dir(args.dir_metadata)
@ -141,12 +130,13 @@ def _main() -> int:
cmds = get_directed_compile_cmds(md, trr)
trr.compile_asm_log = trr.dir_test/'compile.directed.log'
# Finally, run all the commands
trr.compile_asm_cmds = [format_to_cmd(cmd) for cmd in cmds]
trr.export(write_yaml=True)
# Finally, run all the commands
with trr.compile_asm_log.open('wb') as fd:
for cmd in trr.compile_asm_cmds:
ret = run_one(md.verbose, cmd)
ret = run_one(md.verbose, cmd, redirect_stdstreams=fd)
if ret != 0:
return ret

View file

@ -0,0 +1,220 @@
#!/usr/bin/env python3
# Copyright lowRISC contributors.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
"""Define a pydantic-schema for specifying directed tests."""
import sys
import pydantic
import pathlib3x as pathlib
from typing import List, Any
import scripts_lib
import logging
logger = logging.getLogger(__name__)
def make_valid_pathlib_path(cls, v: Any) -> pathlib.Path:
"""Pre-converter to ensure input can be converted to a Path."""
if isinstance(v, pathlib.Path):
return v
try:
return pathlib.Path(v)
except TypeError:
raise ValueError(f"Could not convert input ({v}) to valid Path")
def validate_path_exists(v: pathlib.Path, dt: pathlib.Path) -> pathlib.Path:
"""Validatate that a path object exists, relative to a common file (dt)."""
p = pathlib.Path(dt).parent/v
if not p.exists():
raise ValueError(f"Path object does not exist on disk : {p}")
return p
class DConfig(pydantic.BaseModel): # noqa
"""Represent a common configuration for building directed tests.
This object contains information that one or more tests will require
to build, in a way that encourages reuse of common code.
"""
class Config: # noqa
arbitrary_types_allowed = True
##################################
# DConfig.FIELDS #
##################################
config: str # name of config (each DTest __must__ specify this too)
# RTL Simulation Options
rtl_test: str
rtl_params: dict
timeout_s: pydantic.conint(gt=0)
# Directed Test Build Options
gcc_opts: str # any options that don't specify a path eg. "-O3 -g -static"
ld_script: pathlib.Path # -T<path>
includes: pathlib.Path # -I<path>
##################################
# DConfig.VALIDATORS #
##################################
@pydantic.validator('ld_script', 'includes', pre=True)
def _make_valid_paths(cls, v: Any) -> pathlib.Path:
return make_valid_pathlib_path(cls, v)
class DTest(DConfig): # noqa
"""Represent a entry for a single directed test.
Each directed test (DTest) inherits from a directed config (DConfig), which can
specify how the test's sources are built into a testcase. The inheritance
structure allows multiple tests to inherit common configuration from a single
config item, reusing the fields and reducing code duplication.
"""
class Config: # noqa
arbitrary_types_allowed = True
##################################
# DTest.FIELDS #
##################################
test: str
desc: str
test_srcs: pathlib.Path
iterations: pydantic.conint(gt=0)
##################################
# DTest.VALIDATORS
##################################
@pydantic.validator('test_srcs', pre=True)
def _make_valid_paths(cls, v: Any) -> pathlib.Path:
return make_valid_pathlib_path(cls, v)
class DirectedTestsYaml(pydantic.BaseModel): # noqa
"""Represent the schema for the <directed-tests>.yaml file.
The file on-disk should be of the form...
- A flat list of both DConfig and DTest items
- Each DTest must specify an existing DConfig item with the key 'config'
Note that on-disk representation of this file is slightly-different to
the validation schema defined here, and as part of the validation process
(see import_model()) we need to account for this.
"""
class Config: # noqa
arbitrary_types_allowed = True
yaml: pathlib.Path # Represents the <directed-tests>.yaml config file
configs: List[DConfig]
tests: List[DTest]
@pydantic.validator('yaml')
def yaml_file_must_exist(cls, v: pathlib.Path):
"""Check that the yaml file exists on disk.
This field needs its own validator, as other files are checked
relative to the yaml file.
"""
if not v.exists():
raise ValueError(f"Path object not found in filesystem : {v}")
return v
@pydantic.root_validator()
def test_config_must_exist(cls, values):
"""Check that if a test specifies a common config, it exists in the list available."""
configs = [c.config for c in values.get('configs')]
for test in values.get('tests'):
if test.config not in configs:
raise ValueError(
f"Test '{test.test}' gave the config '{test.config}', but "
"this config does not exist in the file "
f"'{values.get('yaml')}'. Configs detected : {configs} \n")
return values
@pydantic.root_validator()
def all_paths_must_exist(cls, values):
"""Check that all fields specifying files exist on disk.
We need to check all fields recursively for pathlib.Path fields,
then ensure that those files exist, relative to the yaml file.
"""
def check_model_path_fields_exist(model):
for f in filter(lambda f: (f.type_ == pathlib.Path),
model.__fields__.values()):
p = validate_path_exists(getattr(model, f.name), values.get('yaml'))
setattr(model, f.name, p)
for c in values.get('configs'):
check_model_path_fields_exist(c)
for t in values.get('tests'):
check_model_path_fields_exist(t)
return values
def import_model(directed_test_yaml: pathlib.Path) -> dict:
"""Import and validate data against the model schema, return data as dict.
If validation errors occur, print them and exit immediately.
EXAMPLE VALIDATION ERROR
ERROR:directed_test_schema:
################################################################################
The following errors were encountered while validating :
--------------------------------------------------------------------------------
2 validation errors for DirectedTestsYaml
configs -> 1 -> rtl_test
field required (type=value_error.missing)
tests -> 0 -> iterations
ensure this value is greater than 0 (type=value_error.number.not_gt; limit_value=0)
################################################################################
MEANING
--> The config entry at index 1 (2nd in the file) is missing the key 'rtl_test'
--> The test entry at index 0 (1st in the file) has a 'iterations' value that is not >0
TODO print file/linenum for each validation error
https://github.com/pydantic/pydantic/issues/1273
"""
yaml_data = scripts_lib.read_yaml(directed_test_yaml)
tests = []
configs = list(filter((lambda i: i.get('test') is None), yaml_data))
for t in filter((lambda i: i.get('test') is not None), yaml_data):
# For each test, get the matching config and join the sets together.
# This works because DTest inherits from DConfig.
try:
t_config = next(filter(
lambda i: i.get('config') == t.get('config'), configs))
except StopIteration:
raise ValueError(
f"Test '{t['test']}' gave the config '{t['config']}', but "
"this config does not exist in the file "
f"'{directed_test_yaml}'.\n")
tests.append({**t_config, **t})
try:
m = DirectedTestsYaml(
yaml=directed_test_yaml,
configs=configs,
tests=tests)
except pydantic.ValidationError as e:
hl = 80*"#" + "\n" # hash-line
dl = 80*"-" + "\n" # dash-line
logger.error(f"\n{hl}\n"
"The following errors were encountered while validating :"
f"\n{dl}\n{e}\n\n{hl}")
sys.exit(1)
return m.dict()

View file

@ -20,7 +20,6 @@ from dataclasses import field
from typeguard import typechecked
import portalocker
import signal
import shutil
import setup_imports
import scripts_lib
@ -28,6 +27,7 @@ import ibex_cmd
import ibex_config
import lib as riscvdv_lib
from test_run_result import TestRunResult, TestType
import directed_test_schema
import logging
logger = logging.getLogger(__name__)
@ -115,11 +115,12 @@ class RegressionMetadata(scripts_lib.testdata_cls):
dir_cov_merged : pathlib.Path = field(init=False, compare=False, default_factory=pathlib.Path)
dir_cov_report : pathlib.Path = field(init=False, compare=False, default_factory=pathlib.Path)
tests_pickle_files: Optional[List[pathlib.Path]] = None
tests_pickle_files: List[pathlib.Path] = field(init=False, compare=False, default_factory=lambda:[])
def __post_init__(self):
"""Construct all the dependent metadata."""
self._setup_directories()
self.pickle_file = self.dir_metadata/'metadata.pickle'
self.yaml_file = self.dir_metadata/'metadata.yaml'
self.ibex_configs = self.ibex_root/'ibex_configs.yaml'
@ -133,20 +134,6 @@ class RegressionMetadata(scripts_lib.testdata_cls):
self.environment_variables = dict(os.environ)
def _get_ibex_metadata(self):
"""Get the desired ibex_config parameters.
# Any extra derivative data can be setup here.
"""
if self.iterations is not None and self.iterations <= 0:
raise RuntimeError('Bad --iterations argument: must be positive')
if self.seed < 0:
raise RuntimeError('Bad --start_seed argument: must be non-negative')
cfg = ibex_cmd.get_config(self.ibex_config)
self.isa_ibex, self.isa_iss = ibex_cmd.get_isas_for_config(cfg)
def _setup_directories(self):
"""Set the directory variables which contain all other build factors."""
self.ibex_root = setup_imports._IBEX_ROOT
@ -164,6 +151,19 @@ class RegressionMetadata(scripts_lib.testdata_cls):
self.dir_cov_merged = self.dir_cov/'merged'
self.dir_cov_report = self.dir_cov/'report'
def _get_ibex_metadata(self):
"""Get the desired ibex_config parameters.
# Any extra derivative data can be setup here.
"""
if self.iterations is not None and self.iterations <= 0:
raise RuntimeError('Bad --iterations argument: must be positive')
if self.seed < 0:
raise RuntimeError('Bad --start_seed argument: must be non-negative')
cfg = ibex_cmd.get_config(self.ibex_config)
self.isa_ibex, self.isa_iss = ibex_cmd.get_isas_for_config(cfg)
@classmethod
def arg_list_initializer(cls,
dir_metadata: pathlib.Path,
@ -182,22 +182,17 @@ class RegressionMetadata(scripts_lib.testdata_cls):
Returns a constructed RegressionMetadata object.
"""
if dir_out is pathlib.Path():
raise RuntimeError("self.dir_metadata must be initialized)")
if dir_metadata is pathlib.Path():
raise RuntimeError("self.dir_metadata must be initialized)")
dummy_obj = RegressionMetadata()
dummy = dataclasses.asdict(dummy_obj)
logger.debug(dummy) # Useful to see types of all the k,v pairs
# Any fields declared in the class initialization (see above) can be populated
# by constructing a dict with keys matching the fields, and then passing **dict
# to the construction of the class. We do this here to populate from 'args_list'.
# Any fields declared in the class initialization (see above) can be
# populated by constructing a dict with keys matching the fields, and
# then passing **dict to the construction of the class. We do this here
# to populate from 'args_list'.
args_dict = {}
args_dict['raw_args_str'] = args_list
args_dict['raw_args_dict'] = {k: v for k, v in
(pair.split('=', maxsplit=1)
args_dict['raw_args_dict'] = {k: v for k, v in (pair.split('=', maxsplit=1)
for pair in shlex.split(args_list))}
kv_tuples = (pair.split('=', maxsplit=1) for pair in shlex.split(args_list))
@ -243,14 +238,6 @@ class RegressionMetadata(scripts_lib.testdata_cls):
dir_metadata=dir_metadata.resolve(),
**args_dict)
# Fetch/set more derivative metadata specific to the ibex
md._get_ibex_metadata()
# Setup the tests/counts we are going to use, by parsing the
# riscv-dv/directed-test structured data.
# eg. testlist.yaml / directed_testlist.yaml
md.tests_and_counts = md.get_tests_and_counts()
return md
@classmethod
@ -271,9 +258,7 @@ class RegressionMetadata(scripts_lib.testdata_cls):
If iterations is provided, it should be a positive number and overrides the
number of iterations for each test.
"""
riscvdv_matched_list: ibex_cmd._TestEntries = self.process_riscvdv_testlist()
directed_matched_list: ibex_cmd._TestEntries = self.process_directed_testlist()
@ -291,49 +276,44 @@ class RegressionMetadata(scripts_lib.testdata_cls):
# Convert to desired output format (and check for well-formedness)
ret = []
for test in riscvdv_filtered_list:
name = test['test']
iterations = test['iterations']
assert isinstance(name, str) and isinstance(iterations, int)
assert iterations > 0
name, iterations = (test['test'], test['iterations'])
assert isinstance(name, str) and isinstance(iterations, int) \
and iterations > 0
ret.append((name, iterations, TestType.RISCVDV))
for test in directed_filtered_list:
name = test['test']
iterations = test['iterations']
assert isinstance(name, str) and isinstance(iterations, int)
assert iterations > 0
name, iterations = (test['test'], test['iterations'])
assert isinstance(name, str) and isinstance(iterations, int) \
and iterations > 0
ret.append((name, iterations, TestType.DIRECTED))
return ret
def process_riscvdv_testlist(self) -> [ibex_cmd._TestEntries]:
def process_riscvdv_testlist(self) -> ibex_cmd._TestEntries:
"""Extract test information from the riscvdv testlist yaml."""
matched_list: [ibex_cmd._TestEntries] = []
matched_list: ibex_cmd._TestEntries = []
# Get all the tests from the 'testlist' that match the 'test' argument.
riscvdv_lib.process_regression_list(
testlist=self.ibex_riscvdv_testlist,
test=(self.test if self.test is not None else 'all'),
test=(self.test or 'all'),
iterations=(self.iterations or 0),
matched_list=matched_list,
riscv_dv_root=self.riscvdv_root)
return matched_list
def process_directed_testlist(self) -> [ibex_cmd._TestEntries]:
def process_directed_testlist(self) -> ibex_cmd._TestEntries:
"""Extract test information from the directed_test yaml.
Employ a similar format to the riscv-dv testlist structure to
define directed tests.
"""
m = directed_test_schema.import_model(self.directed_test_data)
matched_list: ibex_cmd._TestEntries = []
yaml_data = scripts_lib.read_yaml(self.directed_test_data)
mult_test = self.test.split(',')
for entry in yaml_data:
if (entry['test'] in mult_test) or (self.test == "all"):
if (self.iterations is not None) and (entry['iterations'] > 0):
entry['iterations'] = self.iterations
for entry in m.get('tests'):
if (entry.get('test') in self.test.split(',')) or (self.test == "all"):
entry.update({'iterations': (self.iterations or entry['iterations'])})
if entry['iterations'] > 0:
matched_list.append(entry)
@ -346,7 +326,7 @@ class Ops(Enum):
CREATE = 'create_metadata'
PRINT_FIELD = 'print_field'
def __str__(self):
def __str__(self): # noqa
return self.value
@ -357,12 +337,8 @@ def _main():
parser.add_argument('--dir-out', type=pathlib.Path, required=False)
parser.add_argument('--args-list', type=str, required=False)
parser.add_argument('--field', type=str, required=False)
args = parser.parse_args()
# Parse all variables from the argument string, and then add them
# to the metadata object
if args.op == Ops.CREATE:
"""
Use the --args-list input, a string of 'KEY=VALUE KEY2=VALUE2',
@ -374,19 +350,26 @@ def _main():
logger.error("Build metadata already exists, not recreating from scratch.")
return
md = RegressionMetadata.arg_list_initializer(dir_metadata=args.dir_metadata,
md = RegressionMetadata.arg_list_initializer(
dir_metadata=args.dir_metadata,
dir_out=args.dir_out,
args_list=args.args_list)
# Setup metadata objects for each of the tests to be run. Construct a list of these
# objects inside the regression_metadata object constructed above, so we can easily
# find and import them later, and give each test object a link back to this top-level
# object that defines the wider regression.
md.tests_pickle_files = []
# Fetch/set more derivative metadata specific to the ibex
md._get_ibex_metadata()
# Setup the tests/counts we are going to use, by parsing the
# riscv-dv/directed-test structured data.
# eg. testlist.yaml / directed_testlist.yaml
md.tests_and_counts = md.get_tests_and_counts()
if not md.tests_and_counts:
raise RuntimeError("md.tests_and_counts is empty, cant get TEST.SEED strings.")
raise RuntimeError("md.tests_and_counts is empty, cannot get TEST.SEED strings.")
# Setup metadata objects for each of the tests to be run. Construct a
# list of these objects inside the regression_metadata object
# constructed above, so we can easily find and import them later, and
# give each test object a link back to this top-level object that
# defines the wider regression.
for test, count, testtype in md.tests_and_counts:
for testseed in range(md.seed, md.seed + count):
tds_str = f"{test}.{testseed}"
@ -406,6 +389,12 @@ def _main():
pickle_file=md.dir_metadata/(tds_str + ".pickle"),
yaml_file=md.dir_tests/tds_str/'trr.yaml')
# Get the data from the directed test yaml that we need to construct the command.
if testtype == TestType.DIRECTED:
trr.directed_data = (next(filter(
lambda i: i['test'] == test,
directed_test_schema.import_model(md.directed_test_data).get('tests'))))
# Save the path into a list in the regression metadata object for later.
md.tests_pickle_files.append(trr.pickle_file)
# Export the trr structure to disk.

View file

@ -31,10 +31,13 @@ def _main() -> int:
md = RegressionMetadata.construct_from_metadata_dir(args.dir_metadata)
trr = TestRunResult.construct_from_metadata_dir(args.dir_metadata, f"{tds[0]}.{tds[1]}")
testopts = get_test_entry(testname=trr.testname,
testlist=(md.ibex_riscvdv_testlist
if (trr.testtype == TestType.RISCVDV) else
md.directed_test_data))
if (trr.testtype == TestType.RISCVDV):
testopts = get_test_entry(trr.testname, md.ibex_riscvdv_testlist)
elif (trr.testtype == TestType.DIRECTED):
testopts = trr.directed_data
trr.rtl_test = testopts['rtl_test']
trr.timeout_s = testopts.get('timeout_s') or md.run_rtl_timeout_s
if not os.path.exists(trr.binary):
raise RuntimeError(
@ -49,15 +52,13 @@ def _main() -> int:
if sim_opts_raw:
sim_opts += sim_opts_raw.replace('\n', '')
trr.timeout_s = (testopts.get('timeout_s') if (testopts.get('timeout_s') is not None) else
md.run_rtl_timeout_s)
trr.rtl_log = trr.dir_test / 'rtl_sim.log'
trr.rtl_trace = trr.dir_test / 'trace_core_00000000.log'
trr.iss_cosim_trace = trr.dir_test / f'{md.iss}_cosim_trace_core_00000000.log'
subst_vars_dict = {
'cwd': md.ibex_root,
'test_name': testopts['test'],
'rtl_test': testopts['rtl_test'],
'test_name': trr.testname,
'rtl_test': trr.rtl_test,
'seed': str(trr.seed),
'binary': trr.binary,
'test_dir': trr.dir_test,

View file

@ -35,7 +35,7 @@ def get_test_entry(testname: str, testlist: pathlib.Path) -> TestEntry:
yaml_data = scripts_lib.read_yaml(testlist)
for entry in yaml_data:
if entry['test'] == testname:
if entry.get('test') == testname:
return entry
raise RuntimeError('No matching test entry for {!r}'.format(testname))

View file

@ -16,6 +16,7 @@ pathlib3x # Backports some useful features
typing-utils # Ditto
typeguard
portalocker
pydantic
# Needed by dvsim.py (not actually used in Ibex)
hjson