mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-04-19 03:44:46 -04:00
Add CVA6 performance model (#2880)
This commit is contained in:
parent
4a1bffa87a
commit
7b3054156e
5 changed files with 1402 additions and 0 deletions
|
@ -4,6 +4,9 @@ CVA6 is a 6-stage, single-issue, in-order CPU which implements the 64-bit RISC-V
|
||||||
|
|
||||||
It has a configurable size, separate TLBs, a hardware PTW and branch-prediction (branch target buffer and branch history table). The primary design goal was on reducing critical path length.
|
It has a configurable size, separate TLBs, a hardware PTW and branch-prediction (branch target buffer and branch history table). The primary design goal was on reducing critical path length.
|
||||||
|
|
||||||
|
A performance model of CVA6 is available in the `perf-model/` folder of this repository.
|
||||||
|
It can be used to investigate performance-related micro-architecture changes.
|
||||||
|
|
||||||
<img src="docs/03_cva6_design/_static/ariane_overview.drawio.png"/>
|
<img src="docs/03_cva6_design/_static/ariane_overview.drawio.png"/>
|
||||||
|
|
||||||
|
|
||||||
|
|
79
perf-model/README.md
Normal file
79
perf-model/README.md
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
# CVA6 cycle-accurate performance model
|
||||||
|
|
||||||
|
This repository contains a cycle-accurate performance model of CVA6 control-path.
|
||||||
|
|
||||||
|
It was developed to explore microarchitecture changes in CVA6 before implementing them.
|
||||||
|
|
||||||
|
To cite this model, please head to the end of this document.
|
||||||
|
|
||||||
|
|
||||||
|
## Getting started
|
||||||
|
|
||||||
|
### Adapt RVFI trace generation
|
||||||
|
|
||||||
|
The regular expression expects the cycle number to be in the RVFI trace.
|
||||||
|
The value is not used by the model but it is used to compare the model and CVA6.
|
||||||
|
|
||||||
|
To emit cycle number in RVFI trace, modify `corev_apu/tb/rvfi_tracer.sv` in CVA6 repository as below.
|
||||||
|
|
||||||
|
```diff
|
||||||
|
- $fwrite(f, "core 0: 0x%h (0x%h) DASM(%h)\n",
|
||||||
|
- pc64, rvfi_i[i].insn, rvfi_i[i].insn);
|
||||||
|
+ $fwrite(f, "core 0: 0x%h (0x%h) @%d DASM(%h)\n",
|
||||||
|
+ pc64, rvfi_i[i].insn, cycles, rvfi_i[i].insn);
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Generate an RVFI trace
|
||||||
|
|
||||||
|
To generate an RVFI trace, follow the instructions in the CVA6 repository to run a simulation.
|
||||||
|
The RVFI trace will be in `verif/sim/out_<date>/<simulator>/<test-name>.log`.
|
||||||
|
|
||||||
|
|
||||||
|
### Running the model
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python3 model.py verif/sim/out_<date>/<simulator>/<test-name>.log
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Exploring design space
|
||||||
|
|
||||||
|
In `model.py`, the `main` function runs the model with arguments which override default values.
|
||||||
|
Generic parameters are available in `Model.__init__`.
|
||||||
|
You can add new parameters to explore here.
|
||||||
|
|
||||||
|
To perform exploration, run the model in a loop, like `issue_commit_graph` does.
|
||||||
|
The `display_scores` function is meant to print a 3D plot if you have `matplotlib`.
|
||||||
|
`issue_commit_graph` prints the scores so that you can store it and display the figure without re-running the model.
|
||||||
|
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
| :--- | :--- |
|
||||||
|
| `cycle_diff.py` | Calculates duration of each instruction in an RVFI trace |
|
||||||
|
| `isa.py` | Module to create Python objects from RISC-V instructions |
|
||||||
|
| `model.py` | The CVA6 performance model |
|
||||||
|
|
||||||
|
|
||||||
|
## Citing
|
||||||
|
|
||||||
|
```bibtex
|
||||||
|
@inproceedings{cf24,
|
||||||
|
author = {Allart, C\^{o}me and Coulon, Jean-Roch and Sintzoff, Andr\'{e} and Potin, Olivier and Rigaud, Jean-Baptiste},
|
||||||
|
title = {Using a Performance Model to Implement a Superscalar CVA6},
|
||||||
|
year = {2024},
|
||||||
|
isbn = {9798400704925},
|
||||||
|
publisher = {Association for Computing Machinery},
|
||||||
|
url = {https://doi.org/10.1145/3637543.3652871},
|
||||||
|
doi = {10.1145/3637543.3652871},
|
||||||
|
abstract = {A performance model of CVA6 RISC-V processor is built to evaluate performance-related modifications before implementing them in RTL. Its accuracy is 99.2\% on CoreMark. This model is used to evaluate a superscalar feature for CVA6. During design phase, the model helped detecting and fixing performance bugs. The superscalar feature resulted in a CVA6 performance improvement of 40\% on CoreMark.},
|
||||||
|
booktitle = {Proceedings of the 21st ACM International Conference on Computing Frontiers: Workshops and Special Sessions},
|
||||||
|
pages = {43–46},
|
||||||
|
numpages = {4},
|
||||||
|
keywords = {CVA6, Cycle-Based Model, Multi-Issue, Performance, RISC-V, Superscalar},
|
||||||
|
location = {Ischia, Italy},
|
||||||
|
series = {CF '24 Companion}
|
||||||
|
}
|
||||||
|
```
|
80
perf-model/cycle_diff.py
Normal file
80
perf-model/cycle_diff.py
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
# Copyright 2024 Thales Silicon Security
|
||||||
|
#
|
||||||
|
# Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||||
|
# You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||||
|
#
|
||||||
|
# Original Author: Côme ALLART - Thales
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
re_csrr_minstret = re.compile(r"^csrr\s+\w+,\s*minstret$")
|
||||||
|
re_full = re.compile(
|
||||||
|
r"([a-z]+)\s+0:\s*0x00000000([0-9a-f]+)\s*\(([0-9a-fx]+)\)\s*(\S*)@\s*([0-9]+)\s*(.*)"
|
||||||
|
)
|
||||||
|
|
||||||
|
class Trace:
|
||||||
|
def __init__(self, addr, cycle, mnemo, flags):
|
||||||
|
self.addr = addr
|
||||||
|
self.cycle = cycle
|
||||||
|
self.mnemo = mnemo
|
||||||
|
self.flags = flags
|
||||||
|
self.delta = None
|
||||||
|
|
||||||
|
def report(self):
|
||||||
|
"""True if the instruction is a loading instruction"""
|
||||||
|
return f"+{self.delta} {self.flags} 0x{self.addr}: {self.mnemo}"
|
||||||
|
|
||||||
|
def print_data(name, value):
|
||||||
|
"Prints 'name = data' with alignment of the '='"
|
||||||
|
spaces = ' ' * (24 - len(name))
|
||||||
|
print(f"{name}{spaces} = {value}")
|
||||||
|
|
||||||
|
def read_traces(input_file):
|
||||||
|
"Collect stage traces from file"
|
||||||
|
l = []
|
||||||
|
def filter_add(trace):
|
||||||
|
if not hasattr(filter_add, "accepting"):
|
||||||
|
filter_add.accepting = False
|
||||||
|
if re_csrr_minstret.search(trace.mnemo):
|
||||||
|
filter_add.accepting = not filter_add.accepting
|
||||||
|
return
|
||||||
|
if filter_add.accepting:
|
||||||
|
l.append(trace)
|
||||||
|
with open(input_file, "r", encoding="utf8") as f:
|
||||||
|
for line in [l.strip() for l in f]:
|
||||||
|
found = re_full.search(line)
|
||||||
|
if found:
|
||||||
|
addr = found.group(2)
|
||||||
|
flags = found.group(4)
|
||||||
|
cycle = int(found.group(5))
|
||||||
|
mnemo = found.group(6)
|
||||||
|
filter_add(Trace(addr, cycle, mnemo, flags))
|
||||||
|
#l.append(Trace(addr, cycle, mnemo, flags))
|
||||||
|
return l
|
||||||
|
|
||||||
|
def write_traces(outfile, traces):
|
||||||
|
"Write all instructions to output file"
|
||||||
|
print("output file:", outfile)
|
||||||
|
with open(outfile, "w", encoding="utf8") as f:
|
||||||
|
for trace in traces:
|
||||||
|
f.write(trace.report() + "\n")
|
||||||
|
|
||||||
|
def main(input_file: str):
|
||||||
|
"Main function"
|
||||||
|
traces = read_traces(input_file)
|
||||||
|
cycle = traces[0].cycle
|
||||||
|
cycle_number = traces[-1].cycle - cycle + 1
|
||||||
|
for trace in traces:
|
||||||
|
trace.delta = trace.cycle - cycle
|
||||||
|
cycle = trace.cycle
|
||||||
|
print_data("cycle number", cycle_number)
|
||||||
|
print_data("Coremark/MHz", 1000000 / cycle_number)
|
||||||
|
print_data("instruction number", len(traces))
|
||||||
|
print_data("IPC", len(traces) / cycle_number)
|
||||||
|
write_traces("traceout.log", traces)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main(sys.argv[1])
|
574
perf-model/isa.py
Normal file
574
perf-model/isa.py
Normal file
|
@ -0,0 +1,574 @@
|
||||||
|
# Copyright 2024 Thales Silicon Security
|
||||||
|
#
|
||||||
|
# Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||||
|
# You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||||
|
#
|
||||||
|
# Original Author: Côme ALLART - Thales
|
||||||
|
|
||||||
|
"""
|
||||||
|
Represents the instruction set
|
||||||
|
"""
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
class Reg:
|
||||||
|
"""Constants to represent registers"""
|
||||||
|
# ABI names
|
||||||
|
zero = 0
|
||||||
|
ra = 1
|
||||||
|
sp = 2
|
||||||
|
gp = 3
|
||||||
|
tp = 4
|
||||||
|
t0 = 5
|
||||||
|
t1 = 6
|
||||||
|
t2 = 7
|
||||||
|
s0 = 8
|
||||||
|
fp = 8
|
||||||
|
s1 = 9
|
||||||
|
a0 = 10
|
||||||
|
a1 = 11
|
||||||
|
a2 = 12
|
||||||
|
a3 = 13
|
||||||
|
a4 = 14
|
||||||
|
a5 = 15
|
||||||
|
a6 = 16
|
||||||
|
a7 = 17
|
||||||
|
s2 = 18
|
||||||
|
s3 = 19
|
||||||
|
s4 = 20
|
||||||
|
s5 = 21
|
||||||
|
s6 = 22
|
||||||
|
s7 = 23
|
||||||
|
s8 = 24
|
||||||
|
s9 = 25
|
||||||
|
s10 = 26
|
||||||
|
s11 = 27
|
||||||
|
t3 = 28
|
||||||
|
t4 = 29
|
||||||
|
t5 = 30
|
||||||
|
t6 = 31
|
||||||
|
# Register names
|
||||||
|
x0 = 0
|
||||||
|
x1 = 1
|
||||||
|
x2 = 2
|
||||||
|
x3 = 3
|
||||||
|
x4 = 4
|
||||||
|
x5 = 5
|
||||||
|
x6 = 6
|
||||||
|
x7 = 7
|
||||||
|
x8 = 8
|
||||||
|
x9 = 9
|
||||||
|
x10 = 10
|
||||||
|
x11 = 11
|
||||||
|
x12 = 12
|
||||||
|
x13 = 13
|
||||||
|
x14 = 14
|
||||||
|
x15 = 15
|
||||||
|
x16 = 16
|
||||||
|
x17 = 17
|
||||||
|
x18 = 18
|
||||||
|
x19 = 19
|
||||||
|
x20 = 20
|
||||||
|
x21 = 21
|
||||||
|
x22 = 22
|
||||||
|
x23 = 23
|
||||||
|
x24 = 24
|
||||||
|
x25 = 25
|
||||||
|
x26 = 26
|
||||||
|
x27 = 27
|
||||||
|
x28 = 28
|
||||||
|
x29 = 29
|
||||||
|
x30 = 30
|
||||||
|
x31 = 31
|
||||||
|
|
||||||
|
def sign_ext(imm, index, xlen=32):
|
||||||
|
"""
|
||||||
|
Sign extends a value
|
||||||
|
imm: value to sign extend
|
||||||
|
index: index of the sign bit of the value
|
||||||
|
len: target len for sign extended value
|
||||||
|
"""
|
||||||
|
imm_bits = index + 1
|
||||||
|
assert (imm >> imm_bits) == 0
|
||||||
|
neg = imm >> index
|
||||||
|
sext_bits = xlen - imm_bits
|
||||||
|
sext_ones = (1 << sext_bits) - 1
|
||||||
|
sext = neg * sext_ones << imm_bits
|
||||||
|
return sext | imm
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AddrFields:
|
||||||
|
"""Represents the data used to build a memory address"""
|
||||||
|
base_reg: int
|
||||||
|
offset: int
|
||||||
|
|
||||||
|
class Rtype:
|
||||||
|
"""R-type instructions"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.funct7 = instr.bin >> 25
|
||||||
|
self.rs2 = (instr.bin >> 20) & 31
|
||||||
|
self.rs1 = (instr.bin >> 15) & 31
|
||||||
|
self.funct3 = (instr.bin >> 12) & 7
|
||||||
|
self.rd = (instr.bin >> 7) & 31
|
||||||
|
self.opcode = instr.bin & 63
|
||||||
|
|
||||||
|
class Itype:
|
||||||
|
"""I-type instructions"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.rs1 = (instr.bin >> 15) & 31
|
||||||
|
self.funct3 = (instr.bin >> 12) & 7
|
||||||
|
self.rd = (instr.bin >> 7) & 31
|
||||||
|
self.opcode = instr.bin & 63
|
||||||
|
self.imm = sign_ext(instr.bin >> 20, 11)
|
||||||
|
|
||||||
|
class Stype:
|
||||||
|
"""S-type instructions"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.rs2 = (instr.bin >> 20) & 31
|
||||||
|
self.rs1 = (instr.bin >> 15) & 31
|
||||||
|
self.funct3 = (instr.bin >> 12) & 7
|
||||||
|
self.opcode = instr.bin & 63
|
||||||
|
self.imm = sign_ext(
|
||||||
|
((instr.bin >> 25) << 5) \
|
||||||
|
| ((instr.bin >> 7) & 31)
|
||||||
|
, 11)
|
||||||
|
|
||||||
|
class Btype:
|
||||||
|
"""B-type instructions"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.rs2 = (instr.bin >> 20) & 31
|
||||||
|
self.rs1 = (instr.bin >> 15) & 31
|
||||||
|
self.funct3 = (instr.bin >> 12) & 7
|
||||||
|
self.opcode = instr.bin & 63
|
||||||
|
self.imm = sign_ext(
|
||||||
|
((instr.bin >> 31) << 12) \
|
||||||
|
| (((instr.bin >> 7) & 1) << 11) \
|
||||||
|
| (((instr.bin >> 25) & 0x3f) << 5) \
|
||||||
|
| (((instr.bin >> 8) & 15) << 1)
|
||||||
|
, 12)
|
||||||
|
|
||||||
|
class Utype:
|
||||||
|
"""U-type instructions"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.imm_31_12 = instr.bin >> 12
|
||||||
|
self.imm_4_0 = (instr.bin >> 7) & 31
|
||||||
|
self.rd = (instr.bin >> 7) & 31
|
||||||
|
self.opcode = instr.bin & 63
|
||||||
|
self.imm = self.imm_31_12 << 12
|
||||||
|
|
||||||
|
class Jtype:
|
||||||
|
"""J-type instructions"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.rd = (instr.bin >> 7) & 31
|
||||||
|
self.opcode = instr.bin & 63
|
||||||
|
self.imm = sign_ext(
|
||||||
|
((instr.bin >> 31) << 20) \
|
||||||
|
| (((instr.bin >> 12) & 0xff) << 12) \
|
||||||
|
| (((instr.bin >> 20) & 1) << 11) \
|
||||||
|
| (((instr.bin >> 21) & 0x3ff) << 1)
|
||||||
|
, 20)
|
||||||
|
|
||||||
|
class MOItype:
|
||||||
|
"""Memory ordering instructions"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.fm = instr.bin >> 28
|
||||||
|
self.PI = (instr.bin >> 27) & 1
|
||||||
|
self.PO = (instr.bin >> 26) & 1
|
||||||
|
self.PR = (instr.bin >> 25) & 1
|
||||||
|
self.PW = (instr.bin >> 24) & 1
|
||||||
|
self.SI = (instr.bin >> 23) & 1
|
||||||
|
self.SO = (instr.bin >> 22) & 1
|
||||||
|
self.SR = (instr.bin >> 21) & 1
|
||||||
|
self.SW = (instr.bin >> 20) & 1
|
||||||
|
self.rs1 = (instr.bin >> 15) & 31
|
||||||
|
self.funct3 = (instr.bin >> 12) & 7
|
||||||
|
self.rd = (instr.bin >> 7) & 31
|
||||||
|
self.opcode = instr.bin & 63
|
||||||
|
|
||||||
|
class CRtype:
|
||||||
|
"""Compressed register"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.funct4 = instr.bin >> 12
|
||||||
|
r = (instr.bin >> 7) & 31
|
||||||
|
self.rs2 = (instr.bin >> 2) & 31
|
||||||
|
self.op = instr.bin & 3
|
||||||
|
self.rs1 = r
|
||||||
|
base = instr.base()
|
||||||
|
if base == 'C.J[AL]R/C.MV/C.ADD':
|
||||||
|
if self.funct4 & 1:
|
||||||
|
if self.rs2 == 0:
|
||||||
|
if r == 0:
|
||||||
|
base = 'C.EBREAK'
|
||||||
|
else:
|
||||||
|
base = 'C.JALR'
|
||||||
|
else:
|
||||||
|
base = 'C.ADD'
|
||||||
|
else:
|
||||||
|
if self.rs2 == 0:
|
||||||
|
base = 'C.JR'
|
||||||
|
else:
|
||||||
|
base = 'C.MV'
|
||||||
|
if base in CRtype.regreg:
|
||||||
|
self.rd = r
|
||||||
|
self.name = base
|
||||||
|
|
||||||
|
control = ['C.JR', 'C.JALR']
|
||||||
|
regreg = ['C.MV', 'C.ADD']
|
||||||
|
|
||||||
|
class CItype:
|
||||||
|
"""Compressed immediate"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.funct3 = instr.bin >> 13
|
||||||
|
r = (instr.bin >> 7) & 31
|
||||||
|
self.op = instr.bin & 3
|
||||||
|
base = instr.base()
|
||||||
|
if base == 'C.LUI/C.ADDI16SP':
|
||||||
|
if r == Reg.sp:
|
||||||
|
base = 'C.ADDI16SP'
|
||||||
|
else:
|
||||||
|
base = 'C.LUI'
|
||||||
|
if base in CItype.SPload + CItype.constgen:
|
||||||
|
self.rd = r
|
||||||
|
if base in CItype.SPload:
|
||||||
|
self.rs1 = Reg.sp
|
||||||
|
self.offset = CItype.offset[base](instr.bin)
|
||||||
|
# zero-extended offset
|
||||||
|
if base == 'C.LI':
|
||||||
|
self.imm = sign_ext(CItype.imm(instr.bin), 5)
|
||||||
|
if base == 'C.LUI':
|
||||||
|
self.nzimm = sign_ext(CItype.imm(instr.bin) << 12, 17)
|
||||||
|
if base in CItype.regimm:
|
||||||
|
self.rd = r
|
||||||
|
self.rs1 = r
|
||||||
|
if base == 'C.ADDI':
|
||||||
|
self.nzimm = sign_ext(CItype.imm(instr.bin), 5)
|
||||||
|
if base == 'C.ADDIW':
|
||||||
|
self.imm = sign_ext(CItype.imm(instr.bin), 5)
|
||||||
|
if base == 'C.ADDI16SP':
|
||||||
|
self.nzimm = sign_ext(CItype.immsp(instr.bin), 9)
|
||||||
|
if base == 'C.SLLI':
|
||||||
|
self.shamt = CItype.imm(instr.bin)
|
||||||
|
|
||||||
|
SPload = ['C.LWSP', 'C.LDSP', 'C.LQSP', 'C.FLWSP', 'C.FLDSP']
|
||||||
|
constgen = ['C.LI', 'C.LUI']
|
||||||
|
regimm = ['C.ADDI', 'C.ADDIW', 'C.ADDI16SP', 'C.SLLI']
|
||||||
|
|
||||||
|
Woffset = lambda i: (((i >> 12) & 1) << 5) | (((i >> 4) & 7) << 2) \
|
||||||
|
| (((i >> 2) & 3) << 6)
|
||||||
|
Doffset = lambda i: (((i >> 12) & 1) << 5) | (((i >> 5) & 3) << 3) \
|
||||||
|
| (((i >> 2) & 7) << 6)
|
||||||
|
Qoffset = lambda i: (((i >> 12) & 1) << 5) | (((i >> 6) & 1) << 4) \
|
||||||
|
| (((i >> 2) & 15) << 6)
|
||||||
|
imm = lambda i: (((i >> 12) & 1) << 5) | ((i >> 2) & 31)
|
||||||
|
immsp = lambda i: (((i >> 12) & 1) << 9) | (((i >> 6) & 1) << 4) \
|
||||||
|
| (((i >> 5) & 1) << 6) | (((i >> 3) & 3) << 7) \
|
||||||
|
| (((i >> 2) & 1) << 5)
|
||||||
|
|
||||||
|
offset = {
|
||||||
|
'C.LWSP': Woffset,
|
||||||
|
'C.LDSP': Doffset,
|
||||||
|
'C.LQSP': Qoffset,
|
||||||
|
'C.FLWSP': Woffset,
|
||||||
|
'C.FLDSP': Doffset,
|
||||||
|
}
|
||||||
|
|
||||||
|
class CSStype:
|
||||||
|
"""Compressed stack-relative store"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.funct3 = instr.bin >> 13
|
||||||
|
self.rs1 = Reg.sp
|
||||||
|
self.rs2 = (instr.bin >> 2) & 31
|
||||||
|
self.op = instr.bin & 3
|
||||||
|
self.offset = CSStype.offset[instr.base()](instr.bin)
|
||||||
|
# zero-extended offset
|
||||||
|
|
||||||
|
Woffset = lambda i: (((i >> 9) & 15) << 2) | (((i >> 7) & 3) << 6)
|
||||||
|
Doffset = lambda i: (((i >> 10) & 7) << 3) | (((i >> 7) & 7) << 6)
|
||||||
|
Qoffset = lambda i: (((i >> 11) & 3) << 4) | (((i >> 7) & 15) << 6)
|
||||||
|
|
||||||
|
offset = {
|
||||||
|
'C.SWSP': Woffset,
|
||||||
|
'C.SDSP': Doffset,
|
||||||
|
'C.SQSP': Qoffset,
|
||||||
|
'C.FSWSP': Woffset,
|
||||||
|
'C.FSDSP': Doffset,
|
||||||
|
}
|
||||||
|
|
||||||
|
class CIWtype:
|
||||||
|
"""Compressed wide immediate"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
i = instr.bin
|
||||||
|
self.funct3 = i >> 13
|
||||||
|
rd_ = (i >> 2) & 7
|
||||||
|
self.rd = rd_ + 8
|
||||||
|
self.op = i & 3
|
||||||
|
self.nzuimm = (((i >> 11) & 3) << 4) | (((i >> 7) & 15) << 6) \
|
||||||
|
| (((i >> 6) & 1) << 2) | (((i >> 5) & 1) << 3)
|
||||||
|
# zero-extended (unsigned) non-zero immediate
|
||||||
|
if instr.base() == 'C.ADDI4SPN':
|
||||||
|
self.rs1 = Reg.sp
|
||||||
|
|
||||||
|
CLS_Woffset = lambda i: (((i >> 10) & 7) << 3) | (((i >> 6) & 1) << 2) \
|
||||||
|
| (((i >> 5) & 1) << 6)
|
||||||
|
CLS_Doffset = lambda i: (((i >> 10) & 7) << 3) | (((i >> 5) & 3) << 6)
|
||||||
|
CLS_Qoffset = lambda i: (((i >> 11) & 3) << 4) | (((i >> 10) & 1) << 8) \
|
||||||
|
| (((i >> 5) & 3) << 6)
|
||||||
|
|
||||||
|
class CLtype:
|
||||||
|
"""Compressed load"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.funct3 = instr.bin >> 13
|
||||||
|
rs1_ = (instr.bin >> 7) & 7
|
||||||
|
rd_ = (instr.bin >> 2) & 7
|
||||||
|
self.rs1 = rs1_ + 8
|
||||||
|
self.rd = rd_ + 8
|
||||||
|
self.op = instr.bin & 3
|
||||||
|
self.offset = CLtype.offset[instr.base()](instr.bin)
|
||||||
|
# zero-extended offset
|
||||||
|
|
||||||
|
offset = {
|
||||||
|
'C.LW': CLS_Woffset,
|
||||||
|
'C.LD': CLS_Doffset,
|
||||||
|
'C.LQ': CLS_Qoffset,
|
||||||
|
'C.FLW': CLS_Woffset,
|
||||||
|
'C.FLD': CLS_Doffset,
|
||||||
|
}
|
||||||
|
|
||||||
|
class CStype:
|
||||||
|
"""Compressed store"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.funct3 = instr.bin >> 13
|
||||||
|
rs1_ = (instr.bin >> 7) & 7
|
||||||
|
rs2_ = (instr.bin >> 2) & 7
|
||||||
|
self.rs1 = rs1_ + 8
|
||||||
|
self.rs2 = rs2_ + 8
|
||||||
|
self.op = instr.bin & 3
|
||||||
|
self.offset = CStype.offset[instr.base()](instr.bin)
|
||||||
|
# zero-extended offset
|
||||||
|
|
||||||
|
offset = {
|
||||||
|
'C.SW': CLS_Woffset,
|
||||||
|
'C.SD': CLS_Doffset,
|
||||||
|
'C.SQ': CLS_Qoffset,
|
||||||
|
'C.FSW': CLS_Woffset,
|
||||||
|
'C.FSD': CLS_Doffset,
|
||||||
|
}
|
||||||
|
|
||||||
|
class CAtype:
|
||||||
|
"""Compressed arithmetic"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.funct6 = instr.bin >> 10
|
||||||
|
r = (instr.bin >> 7) & 7
|
||||||
|
self.rd = r + 8
|
||||||
|
self.rs1 = r + 8
|
||||||
|
self.funct2 = (instr.bin >> 5) & 3
|
||||||
|
self.rs2 = ((instr.bin >> 2) & 7) + 8
|
||||||
|
self.op = instr.bin & 3
|
||||||
|
|
||||||
|
class CBtype:
|
||||||
|
"""Compressed branch"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
i = instr.bin
|
||||||
|
base = instr.base()
|
||||||
|
self.funct3 = i >> 13
|
||||||
|
self.offset = (i >> 10) & 7
|
||||||
|
rs1_ = (i >> 7) & 7
|
||||||
|
self.rs1 = rs1_ + 8
|
||||||
|
self.op = instr.bin & 3
|
||||||
|
if base in CBtype.branch:
|
||||||
|
self.offset = sign_ext(
|
||||||
|
(((i >> 12) & 1) << 8) \
|
||||||
|
| (((i >> 10) & 3) << 3) \
|
||||||
|
| (((i >> 5) & 3) << 6) \
|
||||||
|
| (((i >> 3) & 3) << 1) \
|
||||||
|
| (((i >> 2) & 1) << 5)
|
||||||
|
, 8)
|
||||||
|
if base in CBtype.regimm:
|
||||||
|
if base == 'C.ANDI':
|
||||||
|
self.shamt = sign_ext(CItype.imm(i), 5)
|
||||||
|
else:
|
||||||
|
self.shamt = CItype.imm(i)
|
||||||
|
self.rd = self.rs1
|
||||||
|
|
||||||
|
branch = ['C.BEQZ', 'C.BNEZ']
|
||||||
|
regimm = ['C.SRLI', 'C.SRAI', 'C.ANDI']
|
||||||
|
|
||||||
|
class CJtype:
|
||||||
|
"""Compressed jump"""
|
||||||
|
def __init__(self, instr):
|
||||||
|
self.funct3 = instr.bin >> 13
|
||||||
|
assert instr.base() in ['C.J', 'C.JAL']
|
||||||
|
self.offset = sign_ext(CJtype.offset(instr.bin), 11)
|
||||||
|
self.jump_target = (instr.bin >> 2) & 0x7ff
|
||||||
|
self.op = instr.bin & 3
|
||||||
|
|
||||||
|
offset = lambda i: (((i >> 12) & 1) << 11) | (((i << 11) & 1) << 4) \
|
||||||
|
| (((i >> 9) & 3) << 8) | (((i >> 8) & 1) << 10) \
|
||||||
|
| (((i >> 7) & 1) << 6) | (((i >> 6) & 1) << 7) \
|
||||||
|
| (((i >> 3) & 1) << 1) | (((i >> 2) & 1) << 5)
|
||||||
|
|
||||||
|
class Instr:
|
||||||
|
"""Instructions"""
|
||||||
|
|
||||||
|
table_16_4_RV32 = [
|
||||||
|
['C.ADDI4SPN', 'C.FLD', 'C.LW', 'C.FLW',
|
||||||
|
'Reserved', 'C.FSD', 'C.SW', 'C.FSW'],
|
||||||
|
['C.ADDI', 'C.JAL', 'C.LI', 'C.LUI/C.ADDI16SP',
|
||||||
|
'MISC-ALU', 'C.J', 'C.BEQZ', 'C.BNEZ'],
|
||||||
|
['C.SLLI', 'C.FLDSP', 'C.LWSP', 'C.FLWSP',
|
||||||
|
'C.J[AL]R/C.MV/C.ADD', 'C.FSDSP', 'C.SWSP', 'C.FSWSP'],
|
||||||
|
]
|
||||||
|
|
||||||
|
table_24_1 = [
|
||||||
|
['LOAD', 'LOAD-FP', 'custom-0', 'MISC-MEM', 'OP-IMM', 'AUIPC', 'OP-IMM-32', '48b'],
|
||||||
|
['STORE', 'STORE-FP', 'custom-1', 'AMO', 'OP', 'LUI', 'OP-32', '64b'],
|
||||||
|
['MADD', 'MSUB', 'NMSUB', 'NMADD', 'OP-FP', 'reserved', 'custom-2/rv128', '48b'],
|
||||||
|
['BRANCH', 'JALR', 'reserved', 'JAL', 'SYSTEM', 'reserved', 'custom-3/rv128', '80b'],
|
||||||
|
]
|
||||||
|
type_of_base = {
|
||||||
|
'OP-IMM': Itype,
|
||||||
|
'LUI': Utype,
|
||||||
|
'AUIPC': Utype,
|
||||||
|
'OP': Rtype,
|
||||||
|
'OP-32': Rtype,
|
||||||
|
'JAL': Jtype,
|
||||||
|
'JALR': Itype,
|
||||||
|
'BRANCH': Btype,
|
||||||
|
'LOAD': Itype,
|
||||||
|
'STORE': Stype,
|
||||||
|
'SYSTEM': Itype,
|
||||||
|
'C.LWSP': CItype,
|
||||||
|
'C.LDSP': CItype,
|
||||||
|
'C.LQSP': CItype,
|
||||||
|
'C.FLWSP': CItype,
|
||||||
|
'C.FLDSP': CItype,
|
||||||
|
'C.SWSP': CSStype,
|
||||||
|
'C.SDSP': CSStype,
|
||||||
|
'C.SQSP': CSStype,
|
||||||
|
'C.FSWSP': CSStype,
|
||||||
|
'C.FSDSP': CSStype,
|
||||||
|
'C.LW': CLtype,
|
||||||
|
'C.LD': CLtype,
|
||||||
|
'C.LQ': CLtype,
|
||||||
|
'C.FLW': CLtype,
|
||||||
|
'C.FLD': CLtype,
|
||||||
|
'C.SW': CStype,
|
||||||
|
'C.SD': CStype,
|
||||||
|
'C.SQ': CStype,
|
||||||
|
'C.FSW': CStype,
|
||||||
|
'C.FSD': CStype,
|
||||||
|
'C.J': CJtype,
|
||||||
|
'C.JAL': CJtype,
|
||||||
|
'C.J[AL]R/C.MV/C.ADD': CRtype,
|
||||||
|
'C.BEQZ': CBtype,
|
||||||
|
'C.BNEZ': CBtype,
|
||||||
|
'C.LI': CItype,
|
||||||
|
'C.LUI/C.ADDI16SP': CItype,
|
||||||
|
'C.ADDI': CItype,
|
||||||
|
'C.ADDIW': CItype,
|
||||||
|
'C.ADDI4SPN': CIWtype,
|
||||||
|
'C.SLLI': CItype,
|
||||||
|
'MISC-ALU': CAtype,
|
||||||
|
}
|
||||||
|
iloads = ['C.LW', 'C.LWSP', 'LOAD']
|
||||||
|
floads = ['C.FLD', 'C.FLW', 'C.FLDSP', 'C.FLWSP', 'LOAD-FP']
|
||||||
|
istores = ['C.SW', 'C.SWSP', 'STORE']
|
||||||
|
fstores = ['C.FSD', 'C.FSW', 'C.FSDSP', 'C.FSWSP', 'STORE-FP']
|
||||||
|
loads = iloads + floads
|
||||||
|
stores = istores + fstores
|
||||||
|
|
||||||
|
def __init__(self, bincode):
|
||||||
|
self.bin = bincode
|
||||||
|
self.inst_1_0 = self.bin & 3
|
||||||
|
|
||||||
|
def base(self):
|
||||||
|
"""Get the name of the base instruction"""
|
||||||
|
result = ""
|
||||||
|
if self.is_compressed():
|
||||||
|
line = self.bin & 3
|
||||||
|
col = (self.bin >> 13) & 7
|
||||||
|
result = Instr.table_16_4_RV32[line][col]
|
||||||
|
else:
|
||||||
|
line = (self.bin >> 5) & 3
|
||||||
|
col = (self.bin >> 2) & 7
|
||||||
|
result = Instr.table_24_1[line][col]
|
||||||
|
return result
|
||||||
|
|
||||||
|
def fields(self):
|
||||||
|
"""Get an object with the fields of the instruction"""
|
||||||
|
return Instr.type_of_base[self.base()](self)
|
||||||
|
|
||||||
|
def is_compressed(self):
|
||||||
|
"""Is the instruction from the C extension?"""
|
||||||
|
return (self.bin & 3) < 3
|
||||||
|
|
||||||
|
def size(self):
|
||||||
|
"""Size of the instruction in bytes"""
|
||||||
|
return 2 if self.is_compressed() else 4
|
||||||
|
|
||||||
|
def is_load(self):
|
||||||
|
"""Is the instruction a load?"""
|
||||||
|
return self.base() in Instr.loads
|
||||||
|
|
||||||
|
def is_store(self):
|
||||||
|
"""Is the instruction a store?"""
|
||||||
|
return self.base() in Instr.stores
|
||||||
|
|
||||||
|
def is_branch(self):
|
||||||
|
"""Is it a taken/not taken branch?"""
|
||||||
|
return self.base() in ['C.BEQZ', 'C.BNEZ', 'BRANCH']
|
||||||
|
|
||||||
|
def is_regjump(self):
|
||||||
|
"""Is it a register jump?"""
|
||||||
|
if self.base() in ['JALR']:
|
||||||
|
return True
|
||||||
|
if self.base() == 'C.J[AL]R/C.MV/C.ADD':
|
||||||
|
return self.fields().name in ['C.JALR', 'C.JR']
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_jump(self):
|
||||||
|
"""Is it an immediate jump?"""
|
||||||
|
return self.base() in ['JAL', 'C.JAL', 'C.J']
|
||||||
|
|
||||||
|
def is_muldiv(self):
|
||||||
|
"""Is it a muldiv instruction?"""
|
||||||
|
return self.base() in ['OP', 'OP-32'] and self.fields().funct7 == 1
|
||||||
|
|
||||||
|
def offset(self):
|
||||||
|
"""Get offset from instr (sometimes it is just 'imm' in RISCV spec)"""
|
||||||
|
fields = self.fields()
|
||||||
|
return fields.offset if hasattr(fields, 'offset') else fields.imm
|
||||||
|
|
||||||
|
def addr_fields(self):
|
||||||
|
"""Get the register and offset to build an address"""
|
||||||
|
return AddrFields(self.fields().rs1, self.offset())
|
||||||
|
|
||||||
|
def has_WAW_from(self, other):
|
||||||
|
"""b.has_WAW_from(a) if a.rd == b.rd"""
|
||||||
|
a = other.fields()
|
||||||
|
b = self.fields()
|
||||||
|
if not (hasattr(a, 'rd') and hasattr(b, 'rd')):
|
||||||
|
return False
|
||||||
|
return a.rd == b.rd and a.rd != Reg.zero
|
||||||
|
|
||||||
|
def has_RAW_from(self, other):
|
||||||
|
"""b.has_RAW_from(a) if b.rsX == a.rd"""
|
||||||
|
a = other.fields()
|
||||||
|
b = self.fields()
|
||||||
|
if not hasattr(a, 'rd') or a.rd == Reg.zero:
|
||||||
|
return False
|
||||||
|
if hasattr(b, 'rs1') and a.rd == b.rs1:
|
||||||
|
return True
|
||||||
|
return hasattr(b, 'rs2') and a.rd == b.rs2
|
||||||
|
|
||||||
|
def has_WAR_from(self, other):
|
||||||
|
"""b.has_WAR_from(a) if b.rd == a.rsX"""
|
||||||
|
a = other.fields()
|
||||||
|
b = self.fields()
|
||||||
|
if not hasattr(b, 'rd') or b.rd == Reg.zero:
|
||||||
|
return False
|
||||||
|
if hasattr(a, 'rs1') and a.rs1 == b.rd:
|
||||||
|
return True
|
||||||
|
return hasattr(a, 'rs2') and a.rs2 == b.rd
|
666
perf-model/model.py
Normal file
666
perf-model/model.py
Normal file
|
@ -0,0 +1,666 @@
|
||||||
|
# Copyright 2024 Thales Silicon Security
|
||||||
|
#
|
||||||
|
# Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||||
|
# You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||||
|
#
|
||||||
|
# Original Author: Côme ALLART - Thales
|
||||||
|
|
||||||
|
"""
|
||||||
|
Performance model of the cva6
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
#from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
from isa import Instr, Reg
|
||||||
|
|
||||||
|
EventKind = Enum('EventKind', [
|
||||||
|
'WAW', 'WAR', 'RAW',
|
||||||
|
'BMISS', 'BHIT',
|
||||||
|
'STRUCT',
|
||||||
|
'issue', 'done', 'commit',
|
||||||
|
])
|
||||||
|
|
||||||
|
def to_signed(value, xlen=32):
|
||||||
|
signed = value
|
||||||
|
if signed >> (xlen - 1):
|
||||||
|
signed -= 1 << xlen
|
||||||
|
return signed
|
||||||
|
|
||||||
|
class Event:
|
||||||
|
"""Represents an event on an instruction"""
|
||||||
|
def __init__(self, kind, cycle):
|
||||||
|
self.kind = kind
|
||||||
|
self.cycle = cycle
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"@{self.cycle}: {self.kind}"
|
||||||
|
|
||||||
|
class Instruction(Instr):
|
||||||
|
"""Represents a RISC-V instruction with annotations"""
|
||||||
|
|
||||||
|
def __init__(self, line, address, hex_code, mnemo):
|
||||||
|
Instr.__init__(self, int(hex_code, base=16))
|
||||||
|
self.line = line
|
||||||
|
self.address = int(address, base=16)
|
||||||
|
self.hex_code = hex_code
|
||||||
|
self.mnemo = mnemo
|
||||||
|
self.events = []
|
||||||
|
|
||||||
|
def mnemo_name(self):
|
||||||
|
"""The name of the instruction (fisrt word of the mnemo)"""
|
||||||
|
return self.mnemo.split()[0]
|
||||||
|
|
||||||
|
def next_addr(self):
|
||||||
|
"""Address of next instruction"""
|
||||||
|
return self.address + self.size()
|
||||||
|
|
||||||
|
_ret_regs = [Reg.ra, Reg.t0]
|
||||||
|
|
||||||
|
def is_ret(self):
|
||||||
|
"Does CVA6 consider this instruction as a ret?"
|
||||||
|
f = self.fields()
|
||||||
|
# Strange conditions, no imm check, no rd-discard check
|
||||||
|
return self.is_regjump() \
|
||||||
|
and f.rs1 in Instruction._ret_regs \
|
||||||
|
and (self.is_compressed() or f.rs1 != f.rd)
|
||||||
|
|
||||||
|
def is_call(self):
|
||||||
|
"Does CVA6 consider this instruction as a ret?"
|
||||||
|
base = self.base()
|
||||||
|
f = self.fields()
|
||||||
|
return base == 'C.JAL' \
|
||||||
|
or base == 'C.J[AL]R/C.MV/C.ADD' and f.name == 'C.JALR' \
|
||||||
|
or base in ['JAL', 'JALR'] and f.rd in Instruction._ret_regs
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self.mnemo
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Entry:
|
||||||
|
"""A scoreboard entry"""
|
||||||
|
instr: Instruction
|
||||||
|
cycles_since_issue = 0
|
||||||
|
done: bool = False
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
status = "DONE" if self.done else "WIP "
|
||||||
|
addr = f"0x{self.instr.address:08X}"
|
||||||
|
return f"{status} {addr}:`{self.instr}` for {self.cycles_since_issue}"
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LastIssue:
|
||||||
|
"""To store the last issued instruction"""
|
||||||
|
instr: Instruction
|
||||||
|
issue_cycle: int
|
||||||
|
|
||||||
|
class IqLen:
|
||||||
|
"""Model of the instruction queue with only a size counter"""
|
||||||
|
def __init__(self, fetch_size, debug=False):
|
||||||
|
self.fetch_size = 4
|
||||||
|
while self.fetch_size < fetch_size:
|
||||||
|
self.fetch_size <<= 1
|
||||||
|
self.debug = debug
|
||||||
|
self.len = self.fetch_size
|
||||||
|
self.new_fetch = True
|
||||||
|
|
||||||
|
def fetch(self):
|
||||||
|
"""Fetch bytes"""
|
||||||
|
self.len += self.fetch_size
|
||||||
|
self._debug(f"fetched {self.fetch_size}, got {self.len}")
|
||||||
|
self.new_fetch = True
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
"""Flush instruction queue (bmiss or exception)"""
|
||||||
|
self.len = 0
|
||||||
|
self._debug(f"flushed, got {self.len}")
|
||||||
|
self.new_fetch = False
|
||||||
|
|
||||||
|
def jump(self):
|
||||||
|
"""Loose a fetch cycle and truncate (jump, branch hit taken)"""
|
||||||
|
if self.new_fetch:
|
||||||
|
self.len -= self.fetch_size
|
||||||
|
self._debug(f"jumping, removed {self.fetch_size}, got {self.len}")
|
||||||
|
self.new_fetch = False
|
||||||
|
self._truncate()
|
||||||
|
self._debug(f"jumped, got {self.len}")
|
||||||
|
|
||||||
|
def has(self, instr):
|
||||||
|
"""Does the instruction queue have this instruction?"""
|
||||||
|
length = self.len
|
||||||
|
if self._is_crossword(instr):
|
||||||
|
length -= (self.fetch_size - 2)
|
||||||
|
self._debug(f"comparing {length} to {instr.size()} ({instr})")
|
||||||
|
return length >= instr.size()
|
||||||
|
|
||||||
|
def remove(self, instr):
|
||||||
|
"""Remove instruction from queue"""
|
||||||
|
self.len -= instr.size()
|
||||||
|
self._debug(f"removed {instr.size()}, got {self.len}")
|
||||||
|
self._truncate(self._addr_index(instr.next_addr()))
|
||||||
|
if instr.is_jump():
|
||||||
|
self.jump()
|
||||||
|
|
||||||
|
def _addr_index(self, addr):
|
||||||
|
return addr & (self.fetch_size - 1)
|
||||||
|
|
||||||
|
def _is_crossword(self, instr):
|
||||||
|
is_last = self._addr_index(instr.address) == self.fetch_size - 2
|
||||||
|
return is_last and not instr.is_compressed()
|
||||||
|
|
||||||
|
def _truncate(self, index=0):
|
||||||
|
occupancy = self.fetch_size - self._addr_index(self.len)
|
||||||
|
to_remove = index - occupancy
|
||||||
|
if to_remove < 0:
|
||||||
|
to_remove += self.fetch_size
|
||||||
|
self.len -= to_remove
|
||||||
|
self._debug(f"truncated, removed {to_remove}, got {self.len}")
|
||||||
|
|
||||||
|
def _debug(self, message):
|
||||||
|
if self.debug:
|
||||||
|
print(f"iq: {message}")
|
||||||
|
|
||||||
|
class Ras:
|
||||||
|
"Return Address Stack"
|
||||||
|
def __init__(self, depth=2, debug=False):
|
||||||
|
self.depth = depth - 1
|
||||||
|
self.stack = []
|
||||||
|
self.debug = debug
|
||||||
|
self.last_dropped = None
|
||||||
|
|
||||||
|
def push(self, addr):
|
||||||
|
"Push an address on the stack, forget oldest entry if full"
|
||||||
|
self.stack.append(addr)
|
||||||
|
self._debug(f"pushed 0x{addr:08X}")
|
||||||
|
if len(self.stack) > self.depth:
|
||||||
|
self.stack.pop(0)
|
||||||
|
self._debug("overflown")
|
||||||
|
|
||||||
|
def drop(self):
|
||||||
|
"Drop an address from the stack"
|
||||||
|
self._debug("dropping")
|
||||||
|
if len(self.stack) > 0:
|
||||||
|
self.last_dropped = self.stack.pop()
|
||||||
|
else:
|
||||||
|
self.last_dropped = None
|
||||||
|
self._debug("was already empty")
|
||||||
|
|
||||||
|
def read(self):
|
||||||
|
"Read the top of the stack without modifying it"
|
||||||
|
self._debug("reading")
|
||||||
|
if self.last_dropped is not None:
|
||||||
|
addr = self.last_dropped
|
||||||
|
self._debug(f"read 0x{addr:08X}")
|
||||||
|
return addr
|
||||||
|
self._debug("was empty")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def resolve(self, instr):
|
||||||
|
"Push or pop depending on the instruction"
|
||||||
|
self._debug(f"issuing {instr}")
|
||||||
|
if instr.is_ret():
|
||||||
|
self._debug("detected ret")
|
||||||
|
self.drop()
|
||||||
|
if instr.is_call():
|
||||||
|
self._debug("detected call")
|
||||||
|
self.push(instr.next_addr())
|
||||||
|
|
||||||
|
def _debug(self, message):
|
||||||
|
if self.debug:
|
||||||
|
print(f"RAS: {message}")
|
||||||
|
|
||||||
|
class Bht:
|
||||||
|
"Branch History Table"
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Entry:
|
||||||
|
"A BTB entry"
|
||||||
|
valid: bool = False
|
||||||
|
sat_counter: int = 0
|
||||||
|
|
||||||
|
def __init__(self, entries=128):
|
||||||
|
self.contents = [Bht.Entry() for _ in range(entries)]
|
||||||
|
|
||||||
|
def predict(self, addr):
|
||||||
|
"Is the branch taken? None if don't know"
|
||||||
|
entry = self.contents[self._index(addr)]
|
||||||
|
if entry.valid:
|
||||||
|
return entry.sat_counter >= 2
|
||||||
|
return None
|
||||||
|
|
||||||
|
def resolve(self, addr, taken):
|
||||||
|
"Update branch prediction"
|
||||||
|
index = self._index(addr)
|
||||||
|
entry = self.contents[index]
|
||||||
|
entry.valid = True
|
||||||
|
if taken:
|
||||||
|
if entry.sat_counter < 3:
|
||||||
|
entry.sat_counter += 1
|
||||||
|
else:
|
||||||
|
if entry.sat_counter > 0:
|
||||||
|
entry.sat_counter -= 1
|
||||||
|
|
||||||
|
def _index(self, addr):
|
||||||
|
return (addr >> 1) % len(self.contents)
|
||||||
|
|
||||||
|
Fu = Enum('Fu', ['ALU', 'MUL', 'BRANCH', 'LDU', 'STU'])
|
||||||
|
|
||||||
|
# We have
|
||||||
|
# - FLU gathering ALU + BRANCH (+ CSR, not significant in CoreMark)
|
||||||
|
# - LSU for loads and stores
|
||||||
|
# - FP gathering MUL + second ALU (+ Floating, unused in CoreMark)
|
||||||
|
# This way we do not have more write-back ports than currently with F
|
||||||
|
|
||||||
|
def to_fu(instr):
|
||||||
|
if instr.is_branch() or instr.is_regjump():
|
||||||
|
return Fu.BRANCH
|
||||||
|
if instr.is_muldiv():
|
||||||
|
return Fu.MUL
|
||||||
|
if instr.is_load():
|
||||||
|
return Fu.LDU
|
||||||
|
if instr.is_store():
|
||||||
|
return Fu.STU
|
||||||
|
return Fu.ALU
|
||||||
|
|
||||||
|
class FusBusy:
|
||||||
|
"Is each functional unit busy"
|
||||||
|
def __init__(self, has_alu2 = False):
|
||||||
|
self.has_alu2 = has_alu2
|
||||||
|
|
||||||
|
self.alu = False
|
||||||
|
self.mul = False
|
||||||
|
self.branch = False
|
||||||
|
self.ldu = False
|
||||||
|
self.stu = False
|
||||||
|
self.alu2 = False
|
||||||
|
|
||||||
|
self.issued_mul = False
|
||||||
|
|
||||||
|
def _alu2_ready(self):
|
||||||
|
return self.has_alu2 and not self.alu2
|
||||||
|
|
||||||
|
def is_ready(self, fu):
|
||||||
|
return {
|
||||||
|
Fu.ALU: self._alu2_ready() or not self.alu,
|
||||||
|
Fu.MUL: not self.mul,
|
||||||
|
Fu.BRANCH: not self.branch,
|
||||||
|
Fu.LDU: not self.ldu,
|
||||||
|
Fu.STU: not self.stu,
|
||||||
|
}[fu]
|
||||||
|
|
||||||
|
def is_ready_for(self, instr):
|
||||||
|
return self.is_ready(to_fu(instr))
|
||||||
|
|
||||||
|
def issue(self, instr):
|
||||||
|
return {
|
||||||
|
Fu.ALU: FusBusy.issue_alu,
|
||||||
|
Fu.MUL: FusBusy.issue_mul,
|
||||||
|
Fu.BRANCH: FusBusy.issue_branch,
|
||||||
|
Fu.LDU: FusBusy.issue_ldu,
|
||||||
|
Fu.STU: FusBusy.issue_stu,
|
||||||
|
}[to_fu(instr)](self)
|
||||||
|
|
||||||
|
def issue_mul(self):
|
||||||
|
self.mul = True
|
||||||
|
self.issued_mul = True
|
||||||
|
|
||||||
|
def issue_alu(self):
|
||||||
|
if not self._alu2_ready():
|
||||||
|
assert not self.alu
|
||||||
|
self.alu = True
|
||||||
|
self.branch = True
|
||||||
|
else:
|
||||||
|
self.alu2 = True
|
||||||
|
|
||||||
|
def issue_branch(self):
|
||||||
|
self.alu = True
|
||||||
|
self.branch = True
|
||||||
|
# Stores are not allowed yet
|
||||||
|
self.stu = True
|
||||||
|
|
||||||
|
def issue_ldu(self):
|
||||||
|
self.ldu = True
|
||||||
|
self.stu = True
|
||||||
|
|
||||||
|
def issue_stu(self):
|
||||||
|
self.stu = True
|
||||||
|
self.ldu = True
|
||||||
|
|
||||||
|
def cycle(self):
|
||||||
|
self.alu = self.issued_mul
|
||||||
|
self.mul = False
|
||||||
|
self.branch = self.issued_mul
|
||||||
|
self.ldu = False
|
||||||
|
self.stu = False
|
||||||
|
self.alu2 = False
|
||||||
|
self.issued_mul = False
|
||||||
|
|
||||||
|
class Model:
|
||||||
|
"""Models the scheduling of CVA6"""
|
||||||
|
|
||||||
|
re_instr = re.compile(
|
||||||
|
r"([a-z]+)\s+0:\s*0x00000000([0-9a-f]+)\s*\(([0-9a-fx]+)\)\s*@\s*([0-9]+)\s*(.*)"
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
debug=False,
|
||||||
|
issue=1,
|
||||||
|
commit=2,
|
||||||
|
sb_len=8,
|
||||||
|
fetch_size=None,
|
||||||
|
has_forwarding=True,
|
||||||
|
has_renaming=True):
|
||||||
|
self.ras = Ras(debug=debug)
|
||||||
|
self.bht = Bht()
|
||||||
|
self.instr_queue = []
|
||||||
|
self.scoreboard = []
|
||||||
|
self.fus = FusBusy(issue > 1)
|
||||||
|
self.last_issued = None
|
||||||
|
self.last_committed = None
|
||||||
|
self.retired = []
|
||||||
|
self.sb_len = sb_len
|
||||||
|
self.debug = debug
|
||||||
|
self.iqlen = IqLen(fetch_size or 4 * issue, debug)
|
||||||
|
self.issue_width = issue
|
||||||
|
self.commit_width = commit
|
||||||
|
self.has_forwarding = has_forwarding
|
||||||
|
self.has_renaming = has_renaming
|
||||||
|
self.log = []
|
||||||
|
|
||||||
|
def log_event_on(self, instr, kind, cycle):
|
||||||
|
"""Log an event on the instruction"""
|
||||||
|
if self.debug:
|
||||||
|
print(f"{instr}: {kind}")
|
||||||
|
event = Event(kind, cycle)
|
||||||
|
instr.events.append(event)
|
||||||
|
self.log.append((event, instr))
|
||||||
|
|
||||||
|
def predict_branch(self, instr):
|
||||||
|
"""Predict if branch is taken or not"""
|
||||||
|
pred = self.bht.predict(instr.address)
|
||||||
|
if pred is not None:
|
||||||
|
return pred
|
||||||
|
return instr.offset() >> 31 != 0
|
||||||
|
|
||||||
|
def predict_regjump(self, instr):
|
||||||
|
"""Predict destination address of indirect jump"""
|
||||||
|
if instr.is_ret():
|
||||||
|
return self.ras.read() or 0
|
||||||
|
return 0 # always miss, as there is no btb yet
|
||||||
|
|
||||||
|
def predict_pc(self, last):
|
||||||
|
"""Predict next program counter depending on last issued instruction"""
|
||||||
|
if last.is_branch():
|
||||||
|
taken = self.predict_branch(last)
|
||||||
|
offset = to_signed(last.offset()) if taken else last.size()
|
||||||
|
return last.address + offset
|
||||||
|
if last.is_regjump():
|
||||||
|
return self.predict_regjump(last)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def issue_manage_last_branch(self, instr, cycle):
|
||||||
|
"""Flush IQ if branch miss, jump if branch hit"""
|
||||||
|
if self.last_issued is not None:
|
||||||
|
last = self.last_issued.instr
|
||||||
|
pred = self.predict_pc(last)
|
||||||
|
if pred is not None:
|
||||||
|
bmiss = pred != instr.address
|
||||||
|
resolved = cycle >= self.last_issued.issue_cycle + 6
|
||||||
|
if bmiss and not resolved:
|
||||||
|
self.iqlen.flush()
|
||||||
|
branch = EventKind.BMISS if bmiss else EventKind.BHIT
|
||||||
|
if branch not in [e.kind for e in instr.events]:
|
||||||
|
self.log_event_on(instr, branch, cycle)
|
||||||
|
taken = instr.address != last.next_addr()
|
||||||
|
if taken and not bmiss:
|
||||||
|
# last (not instr) was like a jump
|
||||||
|
self.iqlen.jump()
|
||||||
|
|
||||||
|
def commit_manage_last_branch(self, instr, cycle):
|
||||||
|
"Resolve branch prediction"
|
||||||
|
if self.last_committed is not None:
|
||||||
|
last = self.last_committed
|
||||||
|
if last.is_branch():
|
||||||
|
taken = instr.address != last.next_addr()
|
||||||
|
self.bht.resolve(last.address, taken)
|
||||||
|
self.last_committed = instr
|
||||||
|
|
||||||
|
def find_data_hazards(self, instr, cycle):
|
||||||
|
"""Detect and log data hazards"""
|
||||||
|
found = False
|
||||||
|
for entry in self.scoreboard:
|
||||||
|
if instr.has_WAW_from(entry.instr) and not self.has_renaming:
|
||||||
|
self.log_event_on(instr, EventKind.WAW, cycle)
|
||||||
|
found = True
|
||||||
|
can_forward = self.has_forwarding and entry.done
|
||||||
|
if instr.has_RAW_from(entry.instr) and not can_forward:
|
||||||
|
self.log_event_on(instr, EventKind.RAW, cycle)
|
||||||
|
found = True
|
||||||
|
return found
|
||||||
|
|
||||||
|
def find_structural_hazard(self, instr, cycle):
|
||||||
|
"""Detect and log structural hazards"""
|
||||||
|
if not self.fus.is_ready_for(instr):
|
||||||
|
self.log_event_on(instr, EventKind.STRUCT, cycle)
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def try_issue(self, cycle):
|
||||||
|
"""Try to issue an instruction"""
|
||||||
|
if len(self.instr_queue) == 0 or len(self.scoreboard) >= self.sb_len:
|
||||||
|
return
|
||||||
|
can_issue = True
|
||||||
|
instr = self.instr_queue[0]
|
||||||
|
if self.find_data_hazards(instr, cycle):
|
||||||
|
can_issue = False
|
||||||
|
if self.find_structural_hazard(instr, cycle):
|
||||||
|
can_issue = False
|
||||||
|
self.issue_manage_last_branch(instr, cycle)
|
||||||
|
if not self.iqlen.has(instr):
|
||||||
|
can_issue = False
|
||||||
|
if can_issue:
|
||||||
|
self.iqlen.remove(instr)
|
||||||
|
instr = self.instr_queue.pop(0)
|
||||||
|
self.log_event_on(instr, EventKind.issue, cycle)
|
||||||
|
entry = Entry(instr)
|
||||||
|
self.scoreboard.append(entry)
|
||||||
|
self.fus.issue(instr)
|
||||||
|
self.last_issued = LastIssue(instr, cycle)
|
||||||
|
self.ras.resolve(instr)
|
||||||
|
|
||||||
|
def try_execute(self, cycle):
|
||||||
|
"""Try to execute instructions"""
|
||||||
|
for entry in self.scoreboard:
|
||||||
|
entry.cycles_since_issue += 1
|
||||||
|
instr = entry.instr
|
||||||
|
duration = 1
|
||||||
|
if instr.is_load() or instr.is_store():
|
||||||
|
duration = 2
|
||||||
|
if instr.is_muldiv():
|
||||||
|
duration = 2
|
||||||
|
if entry.cycles_since_issue == duration:
|
||||||
|
self.log_event_on(instr, EventKind.done, cycle)
|
||||||
|
entry.done = True
|
||||||
|
|
||||||
|
def try_commit(self, cycle, commit_port):
|
||||||
|
"""Try to commit an instruction"""
|
||||||
|
if len(self.scoreboard) == 0:
|
||||||
|
return
|
||||||
|
entry = self.scoreboard[0]
|
||||||
|
can_commit = True
|
||||||
|
if commit_port > 0:
|
||||||
|
if entry.instr.is_store():
|
||||||
|
can_commit = False
|
||||||
|
if not entry.done:
|
||||||
|
can_commit = False
|
||||||
|
if can_commit:
|
||||||
|
instr = self.scoreboard.pop(0).instr
|
||||||
|
self.log_event_on(instr, EventKind.commit, cycle)
|
||||||
|
self.retired.append(instr)
|
||||||
|
self.commit_manage_last_branch(instr, cycle)
|
||||||
|
|
||||||
|
def run_cycle(self, cycle):
|
||||||
|
"""Runs a cycle"""
|
||||||
|
self.fus.cycle()
|
||||||
|
for commit_port in range(self.commit_width):
|
||||||
|
self.try_commit(cycle, commit_port)
|
||||||
|
self.try_execute(cycle)
|
||||||
|
for _ in range(self.issue_width):
|
||||||
|
self.try_issue(cycle)
|
||||||
|
self.iqlen.fetch()
|
||||||
|
|
||||||
|
def load_file(self, path):
|
||||||
|
"""Fill a model from a trace file"""
|
||||||
|
with open(path, "r", encoding="utf8") as file:
|
||||||
|
for line in [l.strip() for l in file]:
|
||||||
|
found = Model.re_instr.search(line)
|
||||||
|
if found:
|
||||||
|
address = found.group(2)
|
||||||
|
hex_code = found.group(3)
|
||||||
|
mnemo = found.group(5)
|
||||||
|
instr = Instruction(line, address, hex_code, mnemo)
|
||||||
|
self.instr_queue.append(instr)
|
||||||
|
|
||||||
|
def run(self, cycles=None):
|
||||||
|
"""Run until completion"""
|
||||||
|
cycle = 0
|
||||||
|
while len(self.instr_queue) > 0 or len(self.scoreboard) > 0:
|
||||||
|
self.run_cycle(cycle)
|
||||||
|
if self.debug:
|
||||||
|
print(f"Scoreboard @{cycle}")
|
||||||
|
for entry in self.scoreboard:
|
||||||
|
print(f" {entry}")
|
||||||
|
print(f"iqlen = {self.iqlen.len}")
|
||||||
|
print()
|
||||||
|
cycle += 1
|
||||||
|
|
||||||
|
if cycles is not None and cycle > cycles:
|
||||||
|
break
|
||||||
|
return cycle
|
||||||
|
|
||||||
|
def write_trace(output_file, instructions):
|
||||||
|
"""Write cycle-annotated trace"""
|
||||||
|
pattern = re.compile(r"@\s*[0-9]+")
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
for instr in instructions:
|
||||||
|
commit_event = instr.events[-1]
|
||||||
|
assert commit_event.kind == EventKind.commit
|
||||||
|
cycle = commit_event.cycle
|
||||||
|
annotated = re.sub(pattern, f"@ {cycle}", instr.line)
|
||||||
|
#if EventKind.STRUCT in [e.kind for e in instr.events]:
|
||||||
|
# annotated += " #STRUCT"
|
||||||
|
#if EventKind.RAW in [e.kind for e in instr.events]:
|
||||||
|
# annotated += " #RAW"
|
||||||
|
lines.append(f"{annotated}\n")
|
||||||
|
|
||||||
|
with open(output_file, 'w') as f:
|
||||||
|
f.writelines(lines)
|
||||||
|
|
||||||
|
def print_data(name, value, ts=24, sep='='):
|
||||||
|
"Prints 'name = data' with alignment of the '='"
|
||||||
|
|
||||||
|
spaces = ' ' * (ts - len(name))
|
||||||
|
print(f"{name}{spaces} {sep} {value}")
|
||||||
|
|
||||||
|
def display_scores(scores):
|
||||||
|
"""Display a 3D graph of scores against commit/issue-wide"""
|
||||||
|
bars = []
|
||||||
|
for x, l in enumerate(scores):
|
||||||
|
for y, z in enumerate(l):
|
||||||
|
bars.append((x, y, z))
|
||||||
|
|
||||||
|
x, y, z, dx, dy, dz = [], [], [], [], [], []
|
||||||
|
for bx, by, bz in bars:
|
||||||
|
x.append(bx)
|
||||||
|
y.append(by)
|
||||||
|
z.append(0)
|
||||||
|
dx.append(.5)
|
||||||
|
dy.append(.5)
|
||||||
|
dz.append(bz)
|
||||||
|
|
||||||
|
#fig = plt.figure()
|
||||||
|
#ax1 = fig.add_subplot(111, projection='3d')
|
||||||
|
#ax1.bar3d(x, y, z, dx, dy, dz)
|
||||||
|
#ax1.set_xlabel("issue")
|
||||||
|
#ax1.set_ylabel("commit")
|
||||||
|
#ax1.set_zlabel("CoreMark/MHz")
|
||||||
|
#plt.show()
|
||||||
|
|
||||||
|
def issue_commit_graph(input_file, n = 3):
|
||||||
|
"""Plot the issue/commit graph"""
|
||||||
|
|
||||||
|
r = range(n + 1)
|
||||||
|
scores = [[0 for _ in r] for _ in r]
|
||||||
|
|
||||||
|
if input_file is None:
|
||||||
|
scores = [[0, 0, 0, 0, 0, 0], [0, 2.651936045910317, 2.651936045910317, 2.651936045910317, 2.651936045910317, 2.651936045910317], [0, 3.212779150348426, 3.6292766488711137, 3.6292766488711137, 3.6292766488711137, 3.6292766488711137], [0, 3.2550388000624966, 3.900216852056974, 3.914997572701505, 3.914997572701505, 3.914997572701505], [0, 3.2596436557555526, 3.9257869239889134, 3.9420984578510834, 3.9421606193922765, 3.9421606193922765], [0, 3.260695897718491, 3.944757614368385, 3.9623576027736505, 3.9625460150656, 3.9625460150656]] # pylint: disable=line-too-long
|
||||||
|
else:
|
||||||
|
r = range(1, n + 1)
|
||||||
|
for issue in r:
|
||||||
|
for commit in r:
|
||||||
|
print("running", issue, commit)
|
||||||
|
model = Model(issue=issue, commit=commit)
|
||||||
|
model.load_file(input_file)
|
||||||
|
model.run()
|
||||||
|
n_cycles = count_cycles(filter_timed_part(model.retired))
|
||||||
|
score = 1000000 / n_cycles
|
||||||
|
scores[issue][commit] = score
|
||||||
|
print(scores)
|
||||||
|
display_scores(scores)
|
||||||
|
|
||||||
|
def filter_timed_part(all_instructions):
|
||||||
|
"Keep only timed part from a trace"
|
||||||
|
filtered = []
|
||||||
|
re_csrr_minstret = re.compile(r"^csrr\s+\w\w,\s*minstret$")
|
||||||
|
accepting = False
|
||||||
|
for instr in all_instructions:
|
||||||
|
if re_csrr_minstret.search(instr.mnemo):
|
||||||
|
accepting = not accepting
|
||||||
|
continue
|
||||||
|
if accepting:
|
||||||
|
filtered.append(instr)
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
def count_cycles(retired):
|
||||||
|
start = min(e.cycle for e in retired[0].events)
|
||||||
|
end = max(e.cycle for e in retired[-1].events)
|
||||||
|
return end - start
|
||||||
|
|
||||||
|
def print_stats(instructions):
|
||||||
|
ecount = defaultdict(lambda: 0)
|
||||||
|
|
||||||
|
for instr in instructions:
|
||||||
|
for e in instr.events:
|
||||||
|
ecount[e.kind] += 1
|
||||||
|
cycle = e.cycle
|
||||||
|
n_instr = len(instructions)
|
||||||
|
n_cycles = count_cycles(instructions)
|
||||||
|
|
||||||
|
print_data("cycle number", n_cycles)
|
||||||
|
print_data("Coremark/MHz", 1000000 / n_cycles)
|
||||||
|
print_data("instruction number", n_instr)
|
||||||
|
for ek, count in ecount.items():
|
||||||
|
print_data(f"{ek}/instr", f"{100 * count / n_instr:.2f}%")
|
||||||
|
|
||||||
|
def main(input_file: str):
|
||||||
|
"Entry point"
|
||||||
|
|
||||||
|
model = Model(debug=True, issue=2, commit=2)
|
||||||
|
model.load_file(input_file)
|
||||||
|
model.run()
|
||||||
|
|
||||||
|
write_trace('annotated.log', model.retired)
|
||||||
|
print_stats(filter_timed_part(model.retired))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main(sys.argv[1])
|
Loading…
Add table
Reference in a new issue