diff --git a/README.md b/README.md
index e839f24bf..691a46a04 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,9 @@ CVA6 is a 6-stage, single-issue, in-order CPU which implements the 64-bit RISC-V
It has a configurable size, separate TLBs, a hardware PTW and branch-prediction (branch target buffer and branch history table). The primary design goal was on reducing critical path length.
+A performance model of CVA6 is available in the `perf-model/` folder of this repository.
+It can be used to investigate performance-related micro-architecture changes.
+
diff --git a/perf-model/README.md b/perf-model/README.md
new file mode 100644
index 000000000..77ba8575a
--- /dev/null
+++ b/perf-model/README.md
@@ -0,0 +1,79 @@
+# CVA6 cycle-accurate performance model
+
+This repository contains a cycle-accurate performance model of CVA6 control-path.
+
+It was developed to explore microarchitecture changes in CVA6 before implementing them.
+
+To cite this model, please head to the end of this document.
+
+
+## Getting started
+
+### Adapt RVFI trace generation
+
+The regular expression expects the cycle number to be in the RVFI trace.
+The value is not used by the model but it is used to compare the model and CVA6.
+
+To emit cycle number in RVFI trace, modify `corev_apu/tb/rvfi_tracer.sv` in CVA6 repository as below.
+
+```diff
+- $fwrite(f, "core 0: 0x%h (0x%h) DASM(%h)\n",
+- pc64, rvfi_i[i].insn, rvfi_i[i].insn);
++ $fwrite(f, "core 0: 0x%h (0x%h) @%d DASM(%h)\n",
++ pc64, rvfi_i[i].insn, cycles, rvfi_i[i].insn);
+```
+
+
+### Generate an RVFI trace
+
+To generate an RVFI trace, follow the instructions in the CVA6 repository to run a simulation.
+The RVFI trace will be in `verif/sim/out_//.log`.
+
+
+### Running the model
+
+```bash
+python3 model.py verif/sim/out_//.log
+```
+
+
+### Exploring design space
+
+In `model.py`, the `main` function runs the model with arguments which override default values.
+Generic parameters are available in `Model.__init__`.
+You can add new parameters to explore here.
+
+To perform exploration, run the model in a loop, like `issue_commit_graph` does.
+The `display_scores` function is meant to print a 3D plot if you have `matplotlib`.
+`issue_commit_graph` prints the scores so that you can store it and display the figure without re-running the model.
+
+
+## Files
+
+| Name | Description |
+| :--- | :--- |
+| `cycle_diff.py` | Calculates duration of each instruction in an RVFI trace |
+| `isa.py` | Module to create Python objects from RISC-V instructions |
+| `model.py` | The CVA6 performance model |
+
+
+## Citing
+
+```bibtex
+@inproceedings{cf24,
+ author = {Allart, C\^{o}me and Coulon, Jean-Roch and Sintzoff, Andr\'{e} and Potin, Olivier and Rigaud, Jean-Baptiste},
+ title = {Using a Performance Model to Implement a Superscalar CVA6},
+ year = {2024},
+ isbn = {9798400704925},
+ publisher = {Association for Computing Machinery},
+ url = {https://doi.org/10.1145/3637543.3652871},
+ doi = {10.1145/3637543.3652871},
+ abstract = {A performance model of CVA6 RISC-V processor is built to evaluate performance-related modifications before implementing them in RTL. Its accuracy is 99.2\% on CoreMark. This model is used to evaluate a superscalar feature for CVA6. During design phase, the model helped detecting and fixing performance bugs. The superscalar feature resulted in a CVA6 performance improvement of 40\% on CoreMark.},
+ booktitle = {Proceedings of the 21st ACM International Conference on Computing Frontiers: Workshops and Special Sessions},
+ pages = {43–46},
+ numpages = {4},
+ keywords = {CVA6, Cycle-Based Model, Multi-Issue, Performance, RISC-V, Superscalar},
+ location = {Ischia, Italy},
+ series = {CF '24 Companion}
+}
+```
diff --git a/perf-model/cycle_diff.py b/perf-model/cycle_diff.py
new file mode 100644
index 000000000..f9b67bd4c
--- /dev/null
+++ b/perf-model/cycle_diff.py
@@ -0,0 +1,80 @@
+# Copyright 2024 Thales Silicon Security
+#
+# Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+# You may obtain a copy of the License at https://solderpad.org/licenses/
+#
+# Original Author: Côme ALLART - Thales
+
+import re
+import sys
+
+re_csrr_minstret = re.compile(r"^csrr\s+\w+,\s*minstret$")
+re_full = re.compile(
+ r"([a-z]+)\s+0:\s*0x00000000([0-9a-f]+)\s*\(([0-9a-fx]+)\)\s*(\S*)@\s*([0-9]+)\s*(.*)"
+)
+
+class Trace:
+ def __init__(self, addr, cycle, mnemo, flags):
+ self.addr = addr
+ self.cycle = cycle
+ self.mnemo = mnemo
+ self.flags = flags
+ self.delta = None
+
+ def report(self):
+ """True if the instruction is a loading instruction"""
+ return f"+{self.delta} {self.flags} 0x{self.addr}: {self.mnemo}"
+
+def print_data(name, value):
+ "Prints 'name = data' with alignment of the '='"
+ spaces = ' ' * (24 - len(name))
+ print(f"{name}{spaces} = {value}")
+
+def read_traces(input_file):
+ "Collect stage traces from file"
+ l = []
+ def filter_add(trace):
+ if not hasattr(filter_add, "accepting"):
+ filter_add.accepting = False
+ if re_csrr_minstret.search(trace.mnemo):
+ filter_add.accepting = not filter_add.accepting
+ return
+ if filter_add.accepting:
+ l.append(trace)
+ with open(input_file, "r", encoding="utf8") as f:
+ for line in [l.strip() for l in f]:
+ found = re_full.search(line)
+ if found:
+ addr = found.group(2)
+ flags = found.group(4)
+ cycle = int(found.group(5))
+ mnemo = found.group(6)
+ filter_add(Trace(addr, cycle, mnemo, flags))
+ #l.append(Trace(addr, cycle, mnemo, flags))
+ return l
+
+def write_traces(outfile, traces):
+ "Write all instructions to output file"
+ print("output file:", outfile)
+ with open(outfile, "w", encoding="utf8") as f:
+ for trace in traces:
+ f.write(trace.report() + "\n")
+
+def main(input_file: str):
+ "Main function"
+ traces = read_traces(input_file)
+ cycle = traces[0].cycle
+ cycle_number = traces[-1].cycle - cycle + 1
+ for trace in traces:
+ trace.delta = trace.cycle - cycle
+ cycle = trace.cycle
+ print_data("cycle number", cycle_number)
+ print_data("Coremark/MHz", 1000000 / cycle_number)
+ print_data("instruction number", len(traces))
+ print_data("IPC", len(traces) / cycle_number)
+ write_traces("traceout.log", traces)
+
+if __name__ == "__main__":
+ main(sys.argv[1])
diff --git a/perf-model/isa.py b/perf-model/isa.py
new file mode 100644
index 000000000..90cd772a4
--- /dev/null
+++ b/perf-model/isa.py
@@ -0,0 +1,574 @@
+# Copyright 2024 Thales Silicon Security
+#
+# Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+# You may obtain a copy of the License at https://solderpad.org/licenses/
+#
+# Original Author: Côme ALLART - Thales
+
+"""
+Represents the instruction set
+"""
+
+from dataclasses import dataclass
+
+class Reg:
+ """Constants to represent registers"""
+ # ABI names
+ zero = 0
+ ra = 1
+ sp = 2
+ gp = 3
+ tp = 4
+ t0 = 5
+ t1 = 6
+ t2 = 7
+ s0 = 8
+ fp = 8
+ s1 = 9
+ a0 = 10
+ a1 = 11
+ a2 = 12
+ a3 = 13
+ a4 = 14
+ a5 = 15
+ a6 = 16
+ a7 = 17
+ s2 = 18
+ s3 = 19
+ s4 = 20
+ s5 = 21
+ s6 = 22
+ s7 = 23
+ s8 = 24
+ s9 = 25
+ s10 = 26
+ s11 = 27
+ t3 = 28
+ t4 = 29
+ t5 = 30
+ t6 = 31
+ # Register names
+ x0 = 0
+ x1 = 1
+ x2 = 2
+ x3 = 3
+ x4 = 4
+ x5 = 5
+ x6 = 6
+ x7 = 7
+ x8 = 8
+ x9 = 9
+ x10 = 10
+ x11 = 11
+ x12 = 12
+ x13 = 13
+ x14 = 14
+ x15 = 15
+ x16 = 16
+ x17 = 17
+ x18 = 18
+ x19 = 19
+ x20 = 20
+ x21 = 21
+ x22 = 22
+ x23 = 23
+ x24 = 24
+ x25 = 25
+ x26 = 26
+ x27 = 27
+ x28 = 28
+ x29 = 29
+ x30 = 30
+ x31 = 31
+
+def sign_ext(imm, index, xlen=32):
+ """
+ Sign extends a value
+ imm: value to sign extend
+ index: index of the sign bit of the value
+ len: target len for sign extended value
+ """
+ imm_bits = index + 1
+ assert (imm >> imm_bits) == 0
+ neg = imm >> index
+ sext_bits = xlen - imm_bits
+ sext_ones = (1 << sext_bits) - 1
+ sext = neg * sext_ones << imm_bits
+ return sext | imm
+
+@dataclass
+class AddrFields:
+ """Represents the data used to build a memory address"""
+ base_reg: int
+ offset: int
+
+class Rtype:
+ """R-type instructions"""
+ def __init__(self, instr):
+ self.funct7 = instr.bin >> 25
+ self.rs2 = (instr.bin >> 20) & 31
+ self.rs1 = (instr.bin >> 15) & 31
+ self.funct3 = (instr.bin >> 12) & 7
+ self.rd = (instr.bin >> 7) & 31
+ self.opcode = instr.bin & 63
+
+class Itype:
+ """I-type instructions"""
+ def __init__(self, instr):
+ self.rs1 = (instr.bin >> 15) & 31
+ self.funct3 = (instr.bin >> 12) & 7
+ self.rd = (instr.bin >> 7) & 31
+ self.opcode = instr.bin & 63
+ self.imm = sign_ext(instr.bin >> 20, 11)
+
+class Stype:
+ """S-type instructions"""
+ def __init__(self, instr):
+ self.rs2 = (instr.bin >> 20) & 31
+ self.rs1 = (instr.bin >> 15) & 31
+ self.funct3 = (instr.bin >> 12) & 7
+ self.opcode = instr.bin & 63
+ self.imm = sign_ext(
+ ((instr.bin >> 25) << 5) \
+ | ((instr.bin >> 7) & 31)
+ , 11)
+
+class Btype:
+ """B-type instructions"""
+ def __init__(self, instr):
+ self.rs2 = (instr.bin >> 20) & 31
+ self.rs1 = (instr.bin >> 15) & 31
+ self.funct3 = (instr.bin >> 12) & 7
+ self.opcode = instr.bin & 63
+ self.imm = sign_ext(
+ ((instr.bin >> 31) << 12) \
+ | (((instr.bin >> 7) & 1) << 11) \
+ | (((instr.bin >> 25) & 0x3f) << 5) \
+ | (((instr.bin >> 8) & 15) << 1)
+ , 12)
+
+class Utype:
+ """U-type instructions"""
+ def __init__(self, instr):
+ self.imm_31_12 = instr.bin >> 12
+ self.imm_4_0 = (instr.bin >> 7) & 31
+ self.rd = (instr.bin >> 7) & 31
+ self.opcode = instr.bin & 63
+ self.imm = self.imm_31_12 << 12
+
+class Jtype:
+ """J-type instructions"""
+ def __init__(self, instr):
+ self.rd = (instr.bin >> 7) & 31
+ self.opcode = instr.bin & 63
+ self.imm = sign_ext(
+ ((instr.bin >> 31) << 20) \
+ | (((instr.bin >> 12) & 0xff) << 12) \
+ | (((instr.bin >> 20) & 1) << 11) \
+ | (((instr.bin >> 21) & 0x3ff) << 1)
+ , 20)
+
+class MOItype:
+ """Memory ordering instructions"""
+ def __init__(self, instr):
+ self.fm = instr.bin >> 28
+ self.PI = (instr.bin >> 27) & 1
+ self.PO = (instr.bin >> 26) & 1
+ self.PR = (instr.bin >> 25) & 1
+ self.PW = (instr.bin >> 24) & 1
+ self.SI = (instr.bin >> 23) & 1
+ self.SO = (instr.bin >> 22) & 1
+ self.SR = (instr.bin >> 21) & 1
+ self.SW = (instr.bin >> 20) & 1
+ self.rs1 = (instr.bin >> 15) & 31
+ self.funct3 = (instr.bin >> 12) & 7
+ self.rd = (instr.bin >> 7) & 31
+ self.opcode = instr.bin & 63
+
+class CRtype:
+ """Compressed register"""
+ def __init__(self, instr):
+ self.funct4 = instr.bin >> 12
+ r = (instr.bin >> 7) & 31
+ self.rs2 = (instr.bin >> 2) & 31
+ self.op = instr.bin & 3
+ self.rs1 = r
+ base = instr.base()
+ if base == 'C.J[AL]R/C.MV/C.ADD':
+ if self.funct4 & 1:
+ if self.rs2 == 0:
+ if r == 0:
+ base = 'C.EBREAK'
+ else:
+ base = 'C.JALR'
+ else:
+ base = 'C.ADD'
+ else:
+ if self.rs2 == 0:
+ base = 'C.JR'
+ else:
+ base = 'C.MV'
+ if base in CRtype.regreg:
+ self.rd = r
+ self.name = base
+
+ control = ['C.JR', 'C.JALR']
+ regreg = ['C.MV', 'C.ADD']
+
+class CItype:
+ """Compressed immediate"""
+ def __init__(self, instr):
+ self.funct3 = instr.bin >> 13
+ r = (instr.bin >> 7) & 31
+ self.op = instr.bin & 3
+ base = instr.base()
+ if base == 'C.LUI/C.ADDI16SP':
+ if r == Reg.sp:
+ base = 'C.ADDI16SP'
+ else:
+ base = 'C.LUI'
+ if base in CItype.SPload + CItype.constgen:
+ self.rd = r
+ if base in CItype.SPload:
+ self.rs1 = Reg.sp
+ self.offset = CItype.offset[base](instr.bin)
+ # zero-extended offset
+ if base == 'C.LI':
+ self.imm = sign_ext(CItype.imm(instr.bin), 5)
+ if base == 'C.LUI':
+ self.nzimm = sign_ext(CItype.imm(instr.bin) << 12, 17)
+ if base in CItype.regimm:
+ self.rd = r
+ self.rs1 = r
+ if base == 'C.ADDI':
+ self.nzimm = sign_ext(CItype.imm(instr.bin), 5)
+ if base == 'C.ADDIW':
+ self.imm = sign_ext(CItype.imm(instr.bin), 5)
+ if base == 'C.ADDI16SP':
+ self.nzimm = sign_ext(CItype.immsp(instr.bin), 9)
+ if base == 'C.SLLI':
+ self.shamt = CItype.imm(instr.bin)
+
+ SPload = ['C.LWSP', 'C.LDSP', 'C.LQSP', 'C.FLWSP', 'C.FLDSP']
+ constgen = ['C.LI', 'C.LUI']
+ regimm = ['C.ADDI', 'C.ADDIW', 'C.ADDI16SP', 'C.SLLI']
+
+ Woffset = lambda i: (((i >> 12) & 1) << 5) | (((i >> 4) & 7) << 2) \
+ | (((i >> 2) & 3) << 6)
+ Doffset = lambda i: (((i >> 12) & 1) << 5) | (((i >> 5) & 3) << 3) \
+ | (((i >> 2) & 7) << 6)
+ Qoffset = lambda i: (((i >> 12) & 1) << 5) | (((i >> 6) & 1) << 4) \
+ | (((i >> 2) & 15) << 6)
+ imm = lambda i: (((i >> 12) & 1) << 5) | ((i >> 2) & 31)
+ immsp = lambda i: (((i >> 12) & 1) << 9) | (((i >> 6) & 1) << 4) \
+ | (((i >> 5) & 1) << 6) | (((i >> 3) & 3) << 7) \
+ | (((i >> 2) & 1) << 5)
+
+ offset = {
+ 'C.LWSP': Woffset,
+ 'C.LDSP': Doffset,
+ 'C.LQSP': Qoffset,
+ 'C.FLWSP': Woffset,
+ 'C.FLDSP': Doffset,
+ }
+
+class CSStype:
+ """Compressed stack-relative store"""
+ def __init__(self, instr):
+ self.funct3 = instr.bin >> 13
+ self.rs1 = Reg.sp
+ self.rs2 = (instr.bin >> 2) & 31
+ self.op = instr.bin & 3
+ self.offset = CSStype.offset[instr.base()](instr.bin)
+ # zero-extended offset
+
+ Woffset = lambda i: (((i >> 9) & 15) << 2) | (((i >> 7) & 3) << 6)
+ Doffset = lambda i: (((i >> 10) & 7) << 3) | (((i >> 7) & 7) << 6)
+ Qoffset = lambda i: (((i >> 11) & 3) << 4) | (((i >> 7) & 15) << 6)
+
+ offset = {
+ 'C.SWSP': Woffset,
+ 'C.SDSP': Doffset,
+ 'C.SQSP': Qoffset,
+ 'C.FSWSP': Woffset,
+ 'C.FSDSP': Doffset,
+ }
+
+class CIWtype:
+ """Compressed wide immediate"""
+ def __init__(self, instr):
+ i = instr.bin
+ self.funct3 = i >> 13
+ rd_ = (i >> 2) & 7
+ self.rd = rd_ + 8
+ self.op = i & 3
+ self.nzuimm = (((i >> 11) & 3) << 4) | (((i >> 7) & 15) << 6) \
+ | (((i >> 6) & 1) << 2) | (((i >> 5) & 1) << 3)
+ # zero-extended (unsigned) non-zero immediate
+ if instr.base() == 'C.ADDI4SPN':
+ self.rs1 = Reg.sp
+
+CLS_Woffset = lambda i: (((i >> 10) & 7) << 3) | (((i >> 6) & 1) << 2) \
+ | (((i >> 5) & 1) << 6)
+CLS_Doffset = lambda i: (((i >> 10) & 7) << 3) | (((i >> 5) & 3) << 6)
+CLS_Qoffset = lambda i: (((i >> 11) & 3) << 4) | (((i >> 10) & 1) << 8) \
+ | (((i >> 5) & 3) << 6)
+
+class CLtype:
+ """Compressed load"""
+ def __init__(self, instr):
+ self.funct3 = instr.bin >> 13
+ rs1_ = (instr.bin >> 7) & 7
+ rd_ = (instr.bin >> 2) & 7
+ self.rs1 = rs1_ + 8
+ self.rd = rd_ + 8
+ self.op = instr.bin & 3
+ self.offset = CLtype.offset[instr.base()](instr.bin)
+ # zero-extended offset
+
+ offset = {
+ 'C.LW': CLS_Woffset,
+ 'C.LD': CLS_Doffset,
+ 'C.LQ': CLS_Qoffset,
+ 'C.FLW': CLS_Woffset,
+ 'C.FLD': CLS_Doffset,
+ }
+
+class CStype:
+ """Compressed store"""
+ def __init__(self, instr):
+ self.funct3 = instr.bin >> 13
+ rs1_ = (instr.bin >> 7) & 7
+ rs2_ = (instr.bin >> 2) & 7
+ self.rs1 = rs1_ + 8
+ self.rs2 = rs2_ + 8
+ self.op = instr.bin & 3
+ self.offset = CStype.offset[instr.base()](instr.bin)
+ # zero-extended offset
+
+ offset = {
+ 'C.SW': CLS_Woffset,
+ 'C.SD': CLS_Doffset,
+ 'C.SQ': CLS_Qoffset,
+ 'C.FSW': CLS_Woffset,
+ 'C.FSD': CLS_Doffset,
+ }
+
+class CAtype:
+ """Compressed arithmetic"""
+ def __init__(self, instr):
+ self.funct6 = instr.bin >> 10
+ r = (instr.bin >> 7) & 7
+ self.rd = r + 8
+ self.rs1 = r + 8
+ self.funct2 = (instr.bin >> 5) & 3
+ self.rs2 = ((instr.bin >> 2) & 7) + 8
+ self.op = instr.bin & 3
+
+class CBtype:
+ """Compressed branch"""
+ def __init__(self, instr):
+ i = instr.bin
+ base = instr.base()
+ self.funct3 = i >> 13
+ self.offset = (i >> 10) & 7
+ rs1_ = (i >> 7) & 7
+ self.rs1 = rs1_ + 8
+ self.op = instr.bin & 3
+ if base in CBtype.branch:
+ self.offset = sign_ext(
+ (((i >> 12) & 1) << 8) \
+ | (((i >> 10) & 3) << 3) \
+ | (((i >> 5) & 3) << 6) \
+ | (((i >> 3) & 3) << 1) \
+ | (((i >> 2) & 1) << 5)
+ , 8)
+ if base in CBtype.regimm:
+ if base == 'C.ANDI':
+ self.shamt = sign_ext(CItype.imm(i), 5)
+ else:
+ self.shamt = CItype.imm(i)
+ self.rd = self.rs1
+
+ branch = ['C.BEQZ', 'C.BNEZ']
+ regimm = ['C.SRLI', 'C.SRAI', 'C.ANDI']
+
+class CJtype:
+ """Compressed jump"""
+ def __init__(self, instr):
+ self.funct3 = instr.bin >> 13
+ assert instr.base() in ['C.J', 'C.JAL']
+ self.offset = sign_ext(CJtype.offset(instr.bin), 11)
+ self.jump_target = (instr.bin >> 2) & 0x7ff
+ self.op = instr.bin & 3
+
+ offset = lambda i: (((i >> 12) & 1) << 11) | (((i << 11) & 1) << 4) \
+ | (((i >> 9) & 3) << 8) | (((i >> 8) & 1) << 10) \
+ | (((i >> 7) & 1) << 6) | (((i >> 6) & 1) << 7) \
+ | (((i >> 3) & 1) << 1) | (((i >> 2) & 1) << 5)
+
+class Instr:
+ """Instructions"""
+
+ table_16_4_RV32 = [
+ ['C.ADDI4SPN', 'C.FLD', 'C.LW', 'C.FLW',
+ 'Reserved', 'C.FSD', 'C.SW', 'C.FSW'],
+ ['C.ADDI', 'C.JAL', 'C.LI', 'C.LUI/C.ADDI16SP',
+ 'MISC-ALU', 'C.J', 'C.BEQZ', 'C.BNEZ'],
+ ['C.SLLI', 'C.FLDSP', 'C.LWSP', 'C.FLWSP',
+ 'C.J[AL]R/C.MV/C.ADD', 'C.FSDSP', 'C.SWSP', 'C.FSWSP'],
+ ]
+
+ table_24_1 = [
+ ['LOAD', 'LOAD-FP', 'custom-0', 'MISC-MEM', 'OP-IMM', 'AUIPC', 'OP-IMM-32', '48b'],
+ ['STORE', 'STORE-FP', 'custom-1', 'AMO', 'OP', 'LUI', 'OP-32', '64b'],
+ ['MADD', 'MSUB', 'NMSUB', 'NMADD', 'OP-FP', 'reserved', 'custom-2/rv128', '48b'],
+ ['BRANCH', 'JALR', 'reserved', 'JAL', 'SYSTEM', 'reserved', 'custom-3/rv128', '80b'],
+ ]
+ type_of_base = {
+ 'OP-IMM': Itype,
+ 'LUI': Utype,
+ 'AUIPC': Utype,
+ 'OP': Rtype,
+ 'OP-32': Rtype,
+ 'JAL': Jtype,
+ 'JALR': Itype,
+ 'BRANCH': Btype,
+ 'LOAD': Itype,
+ 'STORE': Stype,
+ 'SYSTEM': Itype,
+ 'C.LWSP': CItype,
+ 'C.LDSP': CItype,
+ 'C.LQSP': CItype,
+ 'C.FLWSP': CItype,
+ 'C.FLDSP': CItype,
+ 'C.SWSP': CSStype,
+ 'C.SDSP': CSStype,
+ 'C.SQSP': CSStype,
+ 'C.FSWSP': CSStype,
+ 'C.FSDSP': CSStype,
+ 'C.LW': CLtype,
+ 'C.LD': CLtype,
+ 'C.LQ': CLtype,
+ 'C.FLW': CLtype,
+ 'C.FLD': CLtype,
+ 'C.SW': CStype,
+ 'C.SD': CStype,
+ 'C.SQ': CStype,
+ 'C.FSW': CStype,
+ 'C.FSD': CStype,
+ 'C.J': CJtype,
+ 'C.JAL': CJtype,
+ 'C.J[AL]R/C.MV/C.ADD': CRtype,
+ 'C.BEQZ': CBtype,
+ 'C.BNEZ': CBtype,
+ 'C.LI': CItype,
+ 'C.LUI/C.ADDI16SP': CItype,
+ 'C.ADDI': CItype,
+ 'C.ADDIW': CItype,
+ 'C.ADDI4SPN': CIWtype,
+ 'C.SLLI': CItype,
+ 'MISC-ALU': CAtype,
+ }
+ iloads = ['C.LW', 'C.LWSP', 'LOAD']
+ floads = ['C.FLD', 'C.FLW', 'C.FLDSP', 'C.FLWSP', 'LOAD-FP']
+ istores = ['C.SW', 'C.SWSP', 'STORE']
+ fstores = ['C.FSD', 'C.FSW', 'C.FSDSP', 'C.FSWSP', 'STORE-FP']
+ loads = iloads + floads
+ stores = istores + fstores
+
+ def __init__(self, bincode):
+ self.bin = bincode
+ self.inst_1_0 = self.bin & 3
+
+ def base(self):
+ """Get the name of the base instruction"""
+ result = ""
+ if self.is_compressed():
+ line = self.bin & 3
+ col = (self.bin >> 13) & 7
+ result = Instr.table_16_4_RV32[line][col]
+ else:
+ line = (self.bin >> 5) & 3
+ col = (self.bin >> 2) & 7
+ result = Instr.table_24_1[line][col]
+ return result
+
+ def fields(self):
+ """Get an object with the fields of the instruction"""
+ return Instr.type_of_base[self.base()](self)
+
+ def is_compressed(self):
+ """Is the instruction from the C extension?"""
+ return (self.bin & 3) < 3
+
+ def size(self):
+ """Size of the instruction in bytes"""
+ return 2 if self.is_compressed() else 4
+
+ def is_load(self):
+ """Is the instruction a load?"""
+ return self.base() in Instr.loads
+
+ def is_store(self):
+ """Is the instruction a store?"""
+ return self.base() in Instr.stores
+
+ def is_branch(self):
+ """Is it a taken/not taken branch?"""
+ return self.base() in ['C.BEQZ', 'C.BNEZ', 'BRANCH']
+
+ def is_regjump(self):
+ """Is it a register jump?"""
+ if self.base() in ['JALR']:
+ return True
+ if self.base() == 'C.J[AL]R/C.MV/C.ADD':
+ return self.fields().name in ['C.JALR', 'C.JR']
+ return False
+
+ def is_jump(self):
+ """Is it an immediate jump?"""
+ return self.base() in ['JAL', 'C.JAL', 'C.J']
+
+ def is_muldiv(self):
+ """Is it a muldiv instruction?"""
+ return self.base() in ['OP', 'OP-32'] and self.fields().funct7 == 1
+
+ def offset(self):
+ """Get offset from instr (sometimes it is just 'imm' in RISCV spec)"""
+ fields = self.fields()
+ return fields.offset if hasattr(fields, 'offset') else fields.imm
+
+ def addr_fields(self):
+ """Get the register and offset to build an address"""
+ return AddrFields(self.fields().rs1, self.offset())
+
+ def has_WAW_from(self, other):
+ """b.has_WAW_from(a) if a.rd == b.rd"""
+ a = other.fields()
+ b = self.fields()
+ if not (hasattr(a, 'rd') and hasattr(b, 'rd')):
+ return False
+ return a.rd == b.rd and a.rd != Reg.zero
+
+ def has_RAW_from(self, other):
+ """b.has_RAW_from(a) if b.rsX == a.rd"""
+ a = other.fields()
+ b = self.fields()
+ if not hasattr(a, 'rd') or a.rd == Reg.zero:
+ return False
+ if hasattr(b, 'rs1') and a.rd == b.rs1:
+ return True
+ return hasattr(b, 'rs2') and a.rd == b.rs2
+
+ def has_WAR_from(self, other):
+ """b.has_WAR_from(a) if b.rd == a.rsX"""
+ a = other.fields()
+ b = self.fields()
+ if not hasattr(b, 'rd') or b.rd == Reg.zero:
+ return False
+ if hasattr(a, 'rs1') and a.rs1 == b.rd:
+ return True
+ return hasattr(a, 'rs2') and a.rs2 == b.rd
diff --git a/perf-model/model.py b/perf-model/model.py
new file mode 100644
index 000000000..736298a89
--- /dev/null
+++ b/perf-model/model.py
@@ -0,0 +1,666 @@
+# Copyright 2024 Thales Silicon Security
+#
+# Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
+# You may obtain a copy of the License at https://solderpad.org/licenses/
+#
+# Original Author: Côme ALLART - Thales
+
+"""
+Performance model of the cva6
+"""
+
+import sys
+import re
+
+from dataclasses import dataclass
+from enum import Enum
+from collections import defaultdict
+
+#from matplotlib import pyplot as plt
+
+from isa import Instr, Reg
+
+EventKind = Enum('EventKind', [
+ 'WAW', 'WAR', 'RAW',
+ 'BMISS', 'BHIT',
+ 'STRUCT',
+ 'issue', 'done', 'commit',
+])
+
+def to_signed(value, xlen=32):
+ signed = value
+ if signed >> (xlen - 1):
+ signed -= 1 << xlen
+ return signed
+
+class Event:
+ """Represents an event on an instruction"""
+ def __init__(self, kind, cycle):
+ self.kind = kind
+ self.cycle = cycle
+
+ def __repr__(self):
+ return f"@{self.cycle}: {self.kind}"
+
+class Instruction(Instr):
+ """Represents a RISC-V instruction with annotations"""
+
+ def __init__(self, line, address, hex_code, mnemo):
+ Instr.__init__(self, int(hex_code, base=16))
+ self.line = line
+ self.address = int(address, base=16)
+ self.hex_code = hex_code
+ self.mnemo = mnemo
+ self.events = []
+
+ def mnemo_name(self):
+ """The name of the instruction (fisrt word of the mnemo)"""
+ return self.mnemo.split()[0]
+
+ def next_addr(self):
+ """Address of next instruction"""
+ return self.address + self.size()
+
+ _ret_regs = [Reg.ra, Reg.t0]
+
+ def is_ret(self):
+ "Does CVA6 consider this instruction as a ret?"
+ f = self.fields()
+ # Strange conditions, no imm check, no rd-discard check
+ return self.is_regjump() \
+ and f.rs1 in Instruction._ret_regs \
+ and (self.is_compressed() or f.rs1 != f.rd)
+
+ def is_call(self):
+ "Does CVA6 consider this instruction as a ret?"
+ base = self.base()
+ f = self.fields()
+ return base == 'C.JAL' \
+ or base == 'C.J[AL]R/C.MV/C.ADD' and f.name == 'C.JALR' \
+ or base in ['JAL', 'JALR'] and f.rd in Instruction._ret_regs
+
+ def __repr__(self):
+ return self.mnemo
+
+@dataclass
+class Entry:
+ """A scoreboard entry"""
+ instr: Instruction
+ cycles_since_issue = 0
+ done: bool = False
+
+ def __repr__(self):
+ status = "DONE" if self.done else "WIP "
+ addr = f"0x{self.instr.address:08X}"
+ return f"{status} {addr}:`{self.instr}` for {self.cycles_since_issue}"
+
+@dataclass
+class LastIssue:
+ """To store the last issued instruction"""
+ instr: Instruction
+ issue_cycle: int
+
+class IqLen:
+ """Model of the instruction queue with only a size counter"""
+ def __init__(self, fetch_size, debug=False):
+ self.fetch_size = 4
+ while self.fetch_size < fetch_size:
+ self.fetch_size <<= 1
+ self.debug = debug
+ self.len = self.fetch_size
+ self.new_fetch = True
+
+ def fetch(self):
+ """Fetch bytes"""
+ self.len += self.fetch_size
+ self._debug(f"fetched {self.fetch_size}, got {self.len}")
+ self.new_fetch = True
+
+ def flush(self):
+ """Flush instruction queue (bmiss or exception)"""
+ self.len = 0
+ self._debug(f"flushed, got {self.len}")
+ self.new_fetch = False
+
+ def jump(self):
+ """Loose a fetch cycle and truncate (jump, branch hit taken)"""
+ if self.new_fetch:
+ self.len -= self.fetch_size
+ self._debug(f"jumping, removed {self.fetch_size}, got {self.len}")
+ self.new_fetch = False
+ self._truncate()
+ self._debug(f"jumped, got {self.len}")
+
+ def has(self, instr):
+ """Does the instruction queue have this instruction?"""
+ length = self.len
+ if self._is_crossword(instr):
+ length -= (self.fetch_size - 2)
+ self._debug(f"comparing {length} to {instr.size()} ({instr})")
+ return length >= instr.size()
+
+ def remove(self, instr):
+ """Remove instruction from queue"""
+ self.len -= instr.size()
+ self._debug(f"removed {instr.size()}, got {self.len}")
+ self._truncate(self._addr_index(instr.next_addr()))
+ if instr.is_jump():
+ self.jump()
+
+ def _addr_index(self, addr):
+ return addr & (self.fetch_size - 1)
+
+ def _is_crossword(self, instr):
+ is_last = self._addr_index(instr.address) == self.fetch_size - 2
+ return is_last and not instr.is_compressed()
+
+ def _truncate(self, index=0):
+ occupancy = self.fetch_size - self._addr_index(self.len)
+ to_remove = index - occupancy
+ if to_remove < 0:
+ to_remove += self.fetch_size
+ self.len -= to_remove
+ self._debug(f"truncated, removed {to_remove}, got {self.len}")
+
+ def _debug(self, message):
+ if self.debug:
+ print(f"iq: {message}")
+
+class Ras:
+ "Return Address Stack"
+ def __init__(self, depth=2, debug=False):
+ self.depth = depth - 1
+ self.stack = []
+ self.debug = debug
+ self.last_dropped = None
+
+ def push(self, addr):
+ "Push an address on the stack, forget oldest entry if full"
+ self.stack.append(addr)
+ self._debug(f"pushed 0x{addr:08X}")
+ if len(self.stack) > self.depth:
+ self.stack.pop(0)
+ self._debug("overflown")
+
+ def drop(self):
+ "Drop an address from the stack"
+ self._debug("dropping")
+ if len(self.stack) > 0:
+ self.last_dropped = self.stack.pop()
+ else:
+ self.last_dropped = None
+ self._debug("was already empty")
+
+ def read(self):
+ "Read the top of the stack without modifying it"
+ self._debug("reading")
+ if self.last_dropped is not None:
+ addr = self.last_dropped
+ self._debug(f"read 0x{addr:08X}")
+ return addr
+ self._debug("was empty")
+ return None
+
+ def resolve(self, instr):
+ "Push or pop depending on the instruction"
+ self._debug(f"issuing {instr}")
+ if instr.is_ret():
+ self._debug("detected ret")
+ self.drop()
+ if instr.is_call():
+ self._debug("detected call")
+ self.push(instr.next_addr())
+
+ def _debug(self, message):
+ if self.debug:
+ print(f"RAS: {message}")
+
+class Bht:
+ "Branch History Table"
+
+ @dataclass
+ class Entry:
+ "A BTB entry"
+ valid: bool = False
+ sat_counter: int = 0
+
+ def __init__(self, entries=128):
+ self.contents = [Bht.Entry() for _ in range(entries)]
+
+ def predict(self, addr):
+ "Is the branch taken? None if don't know"
+ entry = self.contents[self._index(addr)]
+ if entry.valid:
+ return entry.sat_counter >= 2
+ return None
+
+ def resolve(self, addr, taken):
+ "Update branch prediction"
+ index = self._index(addr)
+ entry = self.contents[index]
+ entry.valid = True
+ if taken:
+ if entry.sat_counter < 3:
+ entry.sat_counter += 1
+ else:
+ if entry.sat_counter > 0:
+ entry.sat_counter -= 1
+
+ def _index(self, addr):
+ return (addr >> 1) % len(self.contents)
+
+Fu = Enum('Fu', ['ALU', 'MUL', 'BRANCH', 'LDU', 'STU'])
+
+# We have
+# - FLU gathering ALU + BRANCH (+ CSR, not significant in CoreMark)
+# - LSU for loads and stores
+# - FP gathering MUL + second ALU (+ Floating, unused in CoreMark)
+# This way we do not have more write-back ports than currently with F
+
+def to_fu(instr):
+ if instr.is_branch() or instr.is_regjump():
+ return Fu.BRANCH
+ if instr.is_muldiv():
+ return Fu.MUL
+ if instr.is_load():
+ return Fu.LDU
+ if instr.is_store():
+ return Fu.STU
+ return Fu.ALU
+
+class FusBusy:
+ "Is each functional unit busy"
+ def __init__(self, has_alu2 = False):
+ self.has_alu2 = has_alu2
+
+ self.alu = False
+ self.mul = False
+ self.branch = False
+ self.ldu = False
+ self.stu = False
+ self.alu2 = False
+
+ self.issued_mul = False
+
+ def _alu2_ready(self):
+ return self.has_alu2 and not self.alu2
+
+ def is_ready(self, fu):
+ return {
+ Fu.ALU: self._alu2_ready() or not self.alu,
+ Fu.MUL: not self.mul,
+ Fu.BRANCH: not self.branch,
+ Fu.LDU: not self.ldu,
+ Fu.STU: not self.stu,
+ }[fu]
+
+ def is_ready_for(self, instr):
+ return self.is_ready(to_fu(instr))
+
+ def issue(self, instr):
+ return {
+ Fu.ALU: FusBusy.issue_alu,
+ Fu.MUL: FusBusy.issue_mul,
+ Fu.BRANCH: FusBusy.issue_branch,
+ Fu.LDU: FusBusy.issue_ldu,
+ Fu.STU: FusBusy.issue_stu,
+ }[to_fu(instr)](self)
+
+ def issue_mul(self):
+ self.mul = True
+ self.issued_mul = True
+
+ def issue_alu(self):
+ if not self._alu2_ready():
+ assert not self.alu
+ self.alu = True
+ self.branch = True
+ else:
+ self.alu2 = True
+
+ def issue_branch(self):
+ self.alu = True
+ self.branch = True
+ # Stores are not allowed yet
+ self.stu = True
+
+ def issue_ldu(self):
+ self.ldu = True
+ self.stu = True
+
+ def issue_stu(self):
+ self.stu = True
+ self.ldu = True
+
+ def cycle(self):
+ self.alu = self.issued_mul
+ self.mul = False
+ self.branch = self.issued_mul
+ self.ldu = False
+ self.stu = False
+ self.alu2 = False
+ self.issued_mul = False
+
+class Model:
+ """Models the scheduling of CVA6"""
+
+ re_instr = re.compile(
+ r"([a-z]+)\s+0:\s*0x00000000([0-9a-f]+)\s*\(([0-9a-fx]+)\)\s*@\s*([0-9]+)\s*(.*)"
+ )
+
+ def __init__(
+ self,
+ debug=False,
+ issue=1,
+ commit=2,
+ sb_len=8,
+ fetch_size=None,
+ has_forwarding=True,
+ has_renaming=True):
+ self.ras = Ras(debug=debug)
+ self.bht = Bht()
+ self.instr_queue = []
+ self.scoreboard = []
+ self.fus = FusBusy(issue > 1)
+ self.last_issued = None
+ self.last_committed = None
+ self.retired = []
+ self.sb_len = sb_len
+ self.debug = debug
+ self.iqlen = IqLen(fetch_size or 4 * issue, debug)
+ self.issue_width = issue
+ self.commit_width = commit
+ self.has_forwarding = has_forwarding
+ self.has_renaming = has_renaming
+ self.log = []
+
+ def log_event_on(self, instr, kind, cycle):
+ """Log an event on the instruction"""
+ if self.debug:
+ print(f"{instr}: {kind}")
+ event = Event(kind, cycle)
+ instr.events.append(event)
+ self.log.append((event, instr))
+
+ def predict_branch(self, instr):
+ """Predict if branch is taken or not"""
+ pred = self.bht.predict(instr.address)
+ if pred is not None:
+ return pred
+ return instr.offset() >> 31 != 0
+
+ def predict_regjump(self, instr):
+ """Predict destination address of indirect jump"""
+ if instr.is_ret():
+ return self.ras.read() or 0
+ return 0 # always miss, as there is no btb yet
+
+ def predict_pc(self, last):
+ """Predict next program counter depending on last issued instruction"""
+ if last.is_branch():
+ taken = self.predict_branch(last)
+ offset = to_signed(last.offset()) if taken else last.size()
+ return last.address + offset
+ if last.is_regjump():
+ return self.predict_regjump(last)
+ return None
+
+ def issue_manage_last_branch(self, instr, cycle):
+ """Flush IQ if branch miss, jump if branch hit"""
+ if self.last_issued is not None:
+ last = self.last_issued.instr
+ pred = self.predict_pc(last)
+ if pred is not None:
+ bmiss = pred != instr.address
+ resolved = cycle >= self.last_issued.issue_cycle + 6
+ if bmiss and not resolved:
+ self.iqlen.flush()
+ branch = EventKind.BMISS if bmiss else EventKind.BHIT
+ if branch not in [e.kind for e in instr.events]:
+ self.log_event_on(instr, branch, cycle)
+ taken = instr.address != last.next_addr()
+ if taken and not bmiss:
+ # last (not instr) was like a jump
+ self.iqlen.jump()
+
+ def commit_manage_last_branch(self, instr, cycle):
+ "Resolve branch prediction"
+ if self.last_committed is not None:
+ last = self.last_committed
+ if last.is_branch():
+ taken = instr.address != last.next_addr()
+ self.bht.resolve(last.address, taken)
+ self.last_committed = instr
+
+ def find_data_hazards(self, instr, cycle):
+ """Detect and log data hazards"""
+ found = False
+ for entry in self.scoreboard:
+ if instr.has_WAW_from(entry.instr) and not self.has_renaming:
+ self.log_event_on(instr, EventKind.WAW, cycle)
+ found = True
+ can_forward = self.has_forwarding and entry.done
+ if instr.has_RAW_from(entry.instr) and not can_forward:
+ self.log_event_on(instr, EventKind.RAW, cycle)
+ found = True
+ return found
+
+ def find_structural_hazard(self, instr, cycle):
+ """Detect and log structural hazards"""
+ if not self.fus.is_ready_for(instr):
+ self.log_event_on(instr, EventKind.STRUCT, cycle)
+ return True
+ return False
+
+ def try_issue(self, cycle):
+ """Try to issue an instruction"""
+ if len(self.instr_queue) == 0 or len(self.scoreboard) >= self.sb_len:
+ return
+ can_issue = True
+ instr = self.instr_queue[0]
+ if self.find_data_hazards(instr, cycle):
+ can_issue = False
+ if self.find_structural_hazard(instr, cycle):
+ can_issue = False
+ self.issue_manage_last_branch(instr, cycle)
+ if not self.iqlen.has(instr):
+ can_issue = False
+ if can_issue:
+ self.iqlen.remove(instr)
+ instr = self.instr_queue.pop(0)
+ self.log_event_on(instr, EventKind.issue, cycle)
+ entry = Entry(instr)
+ self.scoreboard.append(entry)
+ self.fus.issue(instr)
+ self.last_issued = LastIssue(instr, cycle)
+ self.ras.resolve(instr)
+
+ def try_execute(self, cycle):
+ """Try to execute instructions"""
+ for entry in self.scoreboard:
+ entry.cycles_since_issue += 1
+ instr = entry.instr
+ duration = 1
+ if instr.is_load() or instr.is_store():
+ duration = 2
+ if instr.is_muldiv():
+ duration = 2
+ if entry.cycles_since_issue == duration:
+ self.log_event_on(instr, EventKind.done, cycle)
+ entry.done = True
+
+ def try_commit(self, cycle, commit_port):
+ """Try to commit an instruction"""
+ if len(self.scoreboard) == 0:
+ return
+ entry = self.scoreboard[0]
+ can_commit = True
+ if commit_port > 0:
+ if entry.instr.is_store():
+ can_commit = False
+ if not entry.done:
+ can_commit = False
+ if can_commit:
+ instr = self.scoreboard.pop(0).instr
+ self.log_event_on(instr, EventKind.commit, cycle)
+ self.retired.append(instr)
+ self.commit_manage_last_branch(instr, cycle)
+
+ def run_cycle(self, cycle):
+ """Runs a cycle"""
+ self.fus.cycle()
+ for commit_port in range(self.commit_width):
+ self.try_commit(cycle, commit_port)
+ self.try_execute(cycle)
+ for _ in range(self.issue_width):
+ self.try_issue(cycle)
+ self.iqlen.fetch()
+
+ def load_file(self, path):
+ """Fill a model from a trace file"""
+ with open(path, "r", encoding="utf8") as file:
+ for line in [l.strip() for l in file]:
+ found = Model.re_instr.search(line)
+ if found:
+ address = found.group(2)
+ hex_code = found.group(3)
+ mnemo = found.group(5)
+ instr = Instruction(line, address, hex_code, mnemo)
+ self.instr_queue.append(instr)
+
+ def run(self, cycles=None):
+ """Run until completion"""
+ cycle = 0
+ while len(self.instr_queue) > 0 or len(self.scoreboard) > 0:
+ self.run_cycle(cycle)
+ if self.debug:
+ print(f"Scoreboard @{cycle}")
+ for entry in self.scoreboard:
+ print(f" {entry}")
+ print(f"iqlen = {self.iqlen.len}")
+ print()
+ cycle += 1
+
+ if cycles is not None and cycle > cycles:
+ break
+ return cycle
+
+def write_trace(output_file, instructions):
+ """Write cycle-annotated trace"""
+ pattern = re.compile(r"@\s*[0-9]+")
+
+ lines = []
+ for instr in instructions:
+ commit_event = instr.events[-1]
+ assert commit_event.kind == EventKind.commit
+ cycle = commit_event.cycle
+ annotated = re.sub(pattern, f"@ {cycle}", instr.line)
+ #if EventKind.STRUCT in [e.kind for e in instr.events]:
+ # annotated += " #STRUCT"
+ #if EventKind.RAW in [e.kind for e in instr.events]:
+ # annotated += " #RAW"
+ lines.append(f"{annotated}\n")
+
+ with open(output_file, 'w') as f:
+ f.writelines(lines)
+
+def print_data(name, value, ts=24, sep='='):
+ "Prints 'name = data' with alignment of the '='"
+
+ spaces = ' ' * (ts - len(name))
+ print(f"{name}{spaces} {sep} {value}")
+
+def display_scores(scores):
+ """Display a 3D graph of scores against commit/issue-wide"""
+ bars = []
+ for x, l in enumerate(scores):
+ for y, z in enumerate(l):
+ bars.append((x, y, z))
+
+ x, y, z, dx, dy, dz = [], [], [], [], [], []
+ for bx, by, bz in bars:
+ x.append(bx)
+ y.append(by)
+ z.append(0)
+ dx.append(.5)
+ dy.append(.5)
+ dz.append(bz)
+
+ #fig = plt.figure()
+ #ax1 = fig.add_subplot(111, projection='3d')
+ #ax1.bar3d(x, y, z, dx, dy, dz)
+ #ax1.set_xlabel("issue")
+ #ax1.set_ylabel("commit")
+ #ax1.set_zlabel("CoreMark/MHz")
+ #plt.show()
+
+def issue_commit_graph(input_file, n = 3):
+ """Plot the issue/commit graph"""
+
+ r = range(n + 1)
+ scores = [[0 for _ in r] for _ in r]
+
+ if input_file is None:
+ scores = [[0, 0, 0, 0, 0, 0], [0, 2.651936045910317, 2.651936045910317, 2.651936045910317, 2.651936045910317, 2.651936045910317], [0, 3.212779150348426, 3.6292766488711137, 3.6292766488711137, 3.6292766488711137, 3.6292766488711137], [0, 3.2550388000624966, 3.900216852056974, 3.914997572701505, 3.914997572701505, 3.914997572701505], [0, 3.2596436557555526, 3.9257869239889134, 3.9420984578510834, 3.9421606193922765, 3.9421606193922765], [0, 3.260695897718491, 3.944757614368385, 3.9623576027736505, 3.9625460150656, 3.9625460150656]] # pylint: disable=line-too-long
+ else:
+ r = range(1, n + 1)
+ for issue in r:
+ for commit in r:
+ print("running", issue, commit)
+ model = Model(issue=issue, commit=commit)
+ model.load_file(input_file)
+ model.run()
+ n_cycles = count_cycles(filter_timed_part(model.retired))
+ score = 1000000 / n_cycles
+ scores[issue][commit] = score
+ print(scores)
+ display_scores(scores)
+
+def filter_timed_part(all_instructions):
+ "Keep only timed part from a trace"
+ filtered = []
+ re_csrr_minstret = re.compile(r"^csrr\s+\w\w,\s*minstret$")
+ accepting = False
+ for instr in all_instructions:
+ if re_csrr_minstret.search(instr.mnemo):
+ accepting = not accepting
+ continue
+ if accepting:
+ filtered.append(instr)
+ return filtered
+
+def count_cycles(retired):
+ start = min(e.cycle for e in retired[0].events)
+ end = max(e.cycle for e in retired[-1].events)
+ return end - start
+
+def print_stats(instructions):
+ ecount = defaultdict(lambda: 0)
+
+ for instr in instructions:
+ for e in instr.events:
+ ecount[e.kind] += 1
+ cycle = e.cycle
+ n_instr = len(instructions)
+ n_cycles = count_cycles(instructions)
+
+ print_data("cycle number", n_cycles)
+ print_data("Coremark/MHz", 1000000 / n_cycles)
+ print_data("instruction number", n_instr)
+ for ek, count in ecount.items():
+ print_data(f"{ek}/instr", f"{100 * count / n_instr:.2f}%")
+
+def main(input_file: str):
+ "Entry point"
+
+ model = Model(debug=True, issue=2, commit=2)
+ model.load_file(input_file)
+ model.run()
+
+ write_trace('annotated.log', model.retired)
+ print_stats(filter_timed_part(model.retired))
+
+if __name__ == "__main__":
+ main(sys.argv[1])