mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-04-17 19:04:48 -04:00
Add CVA6 performance model (#2880)
This commit is contained in:
parent
4a1bffa87a
commit
7b3054156e
5 changed files with 1402 additions and 0 deletions
|
@ -4,6 +4,9 @@ CVA6 is a 6-stage, single-issue, in-order CPU which implements the 64-bit RISC-V
|
|||
|
||||
It has a configurable size, separate TLBs, a hardware PTW and branch-prediction (branch target buffer and branch history table). The primary design goal was on reducing critical path length.
|
||||
|
||||
A performance model of CVA6 is available in the `perf-model/` folder of this repository.
|
||||
It can be used to investigate performance-related micro-architecture changes.
|
||||
|
||||
<img src="docs/03_cva6_design/_static/ariane_overview.drawio.png"/>
|
||||
|
||||
|
||||
|
|
79
perf-model/README.md
Normal file
79
perf-model/README.md
Normal file
|
@ -0,0 +1,79 @@
|
|||
# CVA6 cycle-accurate performance model
|
||||
|
||||
This repository contains a cycle-accurate performance model of CVA6 control-path.
|
||||
|
||||
It was developed to explore microarchitecture changes in CVA6 before implementing them.
|
||||
|
||||
To cite this model, please head to the end of this document.
|
||||
|
||||
|
||||
## Getting started
|
||||
|
||||
### Adapt RVFI trace generation
|
||||
|
||||
The regular expression expects the cycle number to be in the RVFI trace.
|
||||
The value is not used by the model but it is used to compare the model and CVA6.
|
||||
|
||||
To emit cycle number in RVFI trace, modify `corev_apu/tb/rvfi_tracer.sv` in CVA6 repository as below.
|
||||
|
||||
```diff
|
||||
- $fwrite(f, "core 0: 0x%h (0x%h) DASM(%h)\n",
|
||||
- pc64, rvfi_i[i].insn, rvfi_i[i].insn);
|
||||
+ $fwrite(f, "core 0: 0x%h (0x%h) @%d DASM(%h)\n",
|
||||
+ pc64, rvfi_i[i].insn, cycles, rvfi_i[i].insn);
|
||||
```
|
||||
|
||||
|
||||
### Generate an RVFI trace
|
||||
|
||||
To generate an RVFI trace, follow the instructions in the CVA6 repository to run a simulation.
|
||||
The RVFI trace will be in `verif/sim/out_<date>/<simulator>/<test-name>.log`.
|
||||
|
||||
|
||||
### Running the model
|
||||
|
||||
```bash
|
||||
python3 model.py verif/sim/out_<date>/<simulator>/<test-name>.log
|
||||
```
|
||||
|
||||
|
||||
### Exploring design space
|
||||
|
||||
In `model.py`, the `main` function runs the model with arguments which override default values.
|
||||
Generic parameters are available in `Model.__init__`.
|
||||
You can add new parameters to explore here.
|
||||
|
||||
To perform exploration, run the model in a loop, like `issue_commit_graph` does.
|
||||
The `display_scores` function is meant to print a 3D plot if you have `matplotlib`.
|
||||
`issue_commit_graph` prints the scores so that you can store it and display the figure without re-running the model.
|
||||
|
||||
|
||||
## Files
|
||||
|
||||
| Name | Description |
|
||||
| :--- | :--- |
|
||||
| `cycle_diff.py` | Calculates duration of each instruction in an RVFI trace |
|
||||
| `isa.py` | Module to create Python objects from RISC-V instructions |
|
||||
| `model.py` | The CVA6 performance model |
|
||||
|
||||
|
||||
## Citing
|
||||
|
||||
```bibtex
|
||||
@inproceedings{cf24,
|
||||
author = {Allart, C\^{o}me and Coulon, Jean-Roch and Sintzoff, Andr\'{e} and Potin, Olivier and Rigaud, Jean-Baptiste},
|
||||
title = {Using a Performance Model to Implement a Superscalar CVA6},
|
||||
year = {2024},
|
||||
isbn = {9798400704925},
|
||||
publisher = {Association for Computing Machinery},
|
||||
url = {https://doi.org/10.1145/3637543.3652871},
|
||||
doi = {10.1145/3637543.3652871},
|
||||
abstract = {A performance model of CVA6 RISC-V processor is built to evaluate performance-related modifications before implementing them in RTL. Its accuracy is 99.2\% on CoreMark. This model is used to evaluate a superscalar feature for CVA6. During design phase, the model helped detecting and fixing performance bugs. The superscalar feature resulted in a CVA6 performance improvement of 40\% on CoreMark.},
|
||||
booktitle = {Proceedings of the 21st ACM International Conference on Computing Frontiers: Workshops and Special Sessions},
|
||||
pages = {43–46},
|
||||
numpages = {4},
|
||||
keywords = {CVA6, Cycle-Based Model, Multi-Issue, Performance, RISC-V, Superscalar},
|
||||
location = {Ischia, Italy},
|
||||
series = {CF '24 Companion}
|
||||
}
|
||||
```
|
80
perf-model/cycle_diff.py
Normal file
80
perf-model/cycle_diff.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
# Copyright 2024 Thales Silicon Security
|
||||
#
|
||||
# Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||
# You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
#
|
||||
# Original Author: Côme ALLART - Thales
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
re_csrr_minstret = re.compile(r"^csrr\s+\w+,\s*minstret$")
|
||||
re_full = re.compile(
|
||||
r"([a-z]+)\s+0:\s*0x00000000([0-9a-f]+)\s*\(([0-9a-fx]+)\)\s*(\S*)@\s*([0-9]+)\s*(.*)"
|
||||
)
|
||||
|
||||
class Trace:
|
||||
def __init__(self, addr, cycle, mnemo, flags):
|
||||
self.addr = addr
|
||||
self.cycle = cycle
|
||||
self.mnemo = mnemo
|
||||
self.flags = flags
|
||||
self.delta = None
|
||||
|
||||
def report(self):
|
||||
"""True if the instruction is a loading instruction"""
|
||||
return f"+{self.delta} {self.flags} 0x{self.addr}: {self.mnemo}"
|
||||
|
||||
def print_data(name, value):
|
||||
"Prints 'name = data' with alignment of the '='"
|
||||
spaces = ' ' * (24 - len(name))
|
||||
print(f"{name}{spaces} = {value}")
|
||||
|
||||
def read_traces(input_file):
|
||||
"Collect stage traces from file"
|
||||
l = []
|
||||
def filter_add(trace):
|
||||
if not hasattr(filter_add, "accepting"):
|
||||
filter_add.accepting = False
|
||||
if re_csrr_minstret.search(trace.mnemo):
|
||||
filter_add.accepting = not filter_add.accepting
|
||||
return
|
||||
if filter_add.accepting:
|
||||
l.append(trace)
|
||||
with open(input_file, "r", encoding="utf8") as f:
|
||||
for line in [l.strip() for l in f]:
|
||||
found = re_full.search(line)
|
||||
if found:
|
||||
addr = found.group(2)
|
||||
flags = found.group(4)
|
||||
cycle = int(found.group(5))
|
||||
mnemo = found.group(6)
|
||||
filter_add(Trace(addr, cycle, mnemo, flags))
|
||||
#l.append(Trace(addr, cycle, mnemo, flags))
|
||||
return l
|
||||
|
||||
def write_traces(outfile, traces):
|
||||
"Write all instructions to output file"
|
||||
print("output file:", outfile)
|
||||
with open(outfile, "w", encoding="utf8") as f:
|
||||
for trace in traces:
|
||||
f.write(trace.report() + "\n")
|
||||
|
||||
def main(input_file: str):
|
||||
"Main function"
|
||||
traces = read_traces(input_file)
|
||||
cycle = traces[0].cycle
|
||||
cycle_number = traces[-1].cycle - cycle + 1
|
||||
for trace in traces:
|
||||
trace.delta = trace.cycle - cycle
|
||||
cycle = trace.cycle
|
||||
print_data("cycle number", cycle_number)
|
||||
print_data("Coremark/MHz", 1000000 / cycle_number)
|
||||
print_data("instruction number", len(traces))
|
||||
print_data("IPC", len(traces) / cycle_number)
|
||||
write_traces("traceout.log", traces)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1])
|
574
perf-model/isa.py
Normal file
574
perf-model/isa.py
Normal file
|
@ -0,0 +1,574 @@
|
|||
# Copyright 2024 Thales Silicon Security
|
||||
#
|
||||
# Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||
# You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
#
|
||||
# Original Author: Côme ALLART - Thales
|
||||
|
||||
"""
|
||||
Represents the instruction set
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
class Reg:
|
||||
"""Constants to represent registers"""
|
||||
# ABI names
|
||||
zero = 0
|
||||
ra = 1
|
||||
sp = 2
|
||||
gp = 3
|
||||
tp = 4
|
||||
t0 = 5
|
||||
t1 = 6
|
||||
t2 = 7
|
||||
s0 = 8
|
||||
fp = 8
|
||||
s1 = 9
|
||||
a0 = 10
|
||||
a1 = 11
|
||||
a2 = 12
|
||||
a3 = 13
|
||||
a4 = 14
|
||||
a5 = 15
|
||||
a6 = 16
|
||||
a7 = 17
|
||||
s2 = 18
|
||||
s3 = 19
|
||||
s4 = 20
|
||||
s5 = 21
|
||||
s6 = 22
|
||||
s7 = 23
|
||||
s8 = 24
|
||||
s9 = 25
|
||||
s10 = 26
|
||||
s11 = 27
|
||||
t3 = 28
|
||||
t4 = 29
|
||||
t5 = 30
|
||||
t6 = 31
|
||||
# Register names
|
||||
x0 = 0
|
||||
x1 = 1
|
||||
x2 = 2
|
||||
x3 = 3
|
||||
x4 = 4
|
||||
x5 = 5
|
||||
x6 = 6
|
||||
x7 = 7
|
||||
x8 = 8
|
||||
x9 = 9
|
||||
x10 = 10
|
||||
x11 = 11
|
||||
x12 = 12
|
||||
x13 = 13
|
||||
x14 = 14
|
||||
x15 = 15
|
||||
x16 = 16
|
||||
x17 = 17
|
||||
x18 = 18
|
||||
x19 = 19
|
||||
x20 = 20
|
||||
x21 = 21
|
||||
x22 = 22
|
||||
x23 = 23
|
||||
x24 = 24
|
||||
x25 = 25
|
||||
x26 = 26
|
||||
x27 = 27
|
||||
x28 = 28
|
||||
x29 = 29
|
||||
x30 = 30
|
||||
x31 = 31
|
||||
|
||||
def sign_ext(imm, index, xlen=32):
|
||||
"""
|
||||
Sign extends a value
|
||||
imm: value to sign extend
|
||||
index: index of the sign bit of the value
|
||||
len: target len for sign extended value
|
||||
"""
|
||||
imm_bits = index + 1
|
||||
assert (imm >> imm_bits) == 0
|
||||
neg = imm >> index
|
||||
sext_bits = xlen - imm_bits
|
||||
sext_ones = (1 << sext_bits) - 1
|
||||
sext = neg * sext_ones << imm_bits
|
||||
return sext | imm
|
||||
|
||||
@dataclass
|
||||
class AddrFields:
|
||||
"""Represents the data used to build a memory address"""
|
||||
base_reg: int
|
||||
offset: int
|
||||
|
||||
class Rtype:
|
||||
"""R-type instructions"""
|
||||
def __init__(self, instr):
|
||||
self.funct7 = instr.bin >> 25
|
||||
self.rs2 = (instr.bin >> 20) & 31
|
||||
self.rs1 = (instr.bin >> 15) & 31
|
||||
self.funct3 = (instr.bin >> 12) & 7
|
||||
self.rd = (instr.bin >> 7) & 31
|
||||
self.opcode = instr.bin & 63
|
||||
|
||||
class Itype:
|
||||
"""I-type instructions"""
|
||||
def __init__(self, instr):
|
||||
self.rs1 = (instr.bin >> 15) & 31
|
||||
self.funct3 = (instr.bin >> 12) & 7
|
||||
self.rd = (instr.bin >> 7) & 31
|
||||
self.opcode = instr.bin & 63
|
||||
self.imm = sign_ext(instr.bin >> 20, 11)
|
||||
|
||||
class Stype:
|
||||
"""S-type instructions"""
|
||||
def __init__(self, instr):
|
||||
self.rs2 = (instr.bin >> 20) & 31
|
||||
self.rs1 = (instr.bin >> 15) & 31
|
||||
self.funct3 = (instr.bin >> 12) & 7
|
||||
self.opcode = instr.bin & 63
|
||||
self.imm = sign_ext(
|
||||
((instr.bin >> 25) << 5) \
|
||||
| ((instr.bin >> 7) & 31)
|
||||
, 11)
|
||||
|
||||
class Btype:
|
||||
"""B-type instructions"""
|
||||
def __init__(self, instr):
|
||||
self.rs2 = (instr.bin >> 20) & 31
|
||||
self.rs1 = (instr.bin >> 15) & 31
|
||||
self.funct3 = (instr.bin >> 12) & 7
|
||||
self.opcode = instr.bin & 63
|
||||
self.imm = sign_ext(
|
||||
((instr.bin >> 31) << 12) \
|
||||
| (((instr.bin >> 7) & 1) << 11) \
|
||||
| (((instr.bin >> 25) & 0x3f) << 5) \
|
||||
| (((instr.bin >> 8) & 15) << 1)
|
||||
, 12)
|
||||
|
||||
class Utype:
|
||||
"""U-type instructions"""
|
||||
def __init__(self, instr):
|
||||
self.imm_31_12 = instr.bin >> 12
|
||||
self.imm_4_0 = (instr.bin >> 7) & 31
|
||||
self.rd = (instr.bin >> 7) & 31
|
||||
self.opcode = instr.bin & 63
|
||||
self.imm = self.imm_31_12 << 12
|
||||
|
||||
class Jtype:
|
||||
"""J-type instructions"""
|
||||
def __init__(self, instr):
|
||||
self.rd = (instr.bin >> 7) & 31
|
||||
self.opcode = instr.bin & 63
|
||||
self.imm = sign_ext(
|
||||
((instr.bin >> 31) << 20) \
|
||||
| (((instr.bin >> 12) & 0xff) << 12) \
|
||||
| (((instr.bin >> 20) & 1) << 11) \
|
||||
| (((instr.bin >> 21) & 0x3ff) << 1)
|
||||
, 20)
|
||||
|
||||
class MOItype:
|
||||
"""Memory ordering instructions"""
|
||||
def __init__(self, instr):
|
||||
self.fm = instr.bin >> 28
|
||||
self.PI = (instr.bin >> 27) & 1
|
||||
self.PO = (instr.bin >> 26) & 1
|
||||
self.PR = (instr.bin >> 25) & 1
|
||||
self.PW = (instr.bin >> 24) & 1
|
||||
self.SI = (instr.bin >> 23) & 1
|
||||
self.SO = (instr.bin >> 22) & 1
|
||||
self.SR = (instr.bin >> 21) & 1
|
||||
self.SW = (instr.bin >> 20) & 1
|
||||
self.rs1 = (instr.bin >> 15) & 31
|
||||
self.funct3 = (instr.bin >> 12) & 7
|
||||
self.rd = (instr.bin >> 7) & 31
|
||||
self.opcode = instr.bin & 63
|
||||
|
||||
class CRtype:
|
||||
"""Compressed register"""
|
||||
def __init__(self, instr):
|
||||
self.funct4 = instr.bin >> 12
|
||||
r = (instr.bin >> 7) & 31
|
||||
self.rs2 = (instr.bin >> 2) & 31
|
||||
self.op = instr.bin & 3
|
||||
self.rs1 = r
|
||||
base = instr.base()
|
||||
if base == 'C.J[AL]R/C.MV/C.ADD':
|
||||
if self.funct4 & 1:
|
||||
if self.rs2 == 0:
|
||||
if r == 0:
|
||||
base = 'C.EBREAK'
|
||||
else:
|
||||
base = 'C.JALR'
|
||||
else:
|
||||
base = 'C.ADD'
|
||||
else:
|
||||
if self.rs2 == 0:
|
||||
base = 'C.JR'
|
||||
else:
|
||||
base = 'C.MV'
|
||||
if base in CRtype.regreg:
|
||||
self.rd = r
|
||||
self.name = base
|
||||
|
||||
control = ['C.JR', 'C.JALR']
|
||||
regreg = ['C.MV', 'C.ADD']
|
||||
|
||||
class CItype:
|
||||
"""Compressed immediate"""
|
||||
def __init__(self, instr):
|
||||
self.funct3 = instr.bin >> 13
|
||||
r = (instr.bin >> 7) & 31
|
||||
self.op = instr.bin & 3
|
||||
base = instr.base()
|
||||
if base == 'C.LUI/C.ADDI16SP':
|
||||
if r == Reg.sp:
|
||||
base = 'C.ADDI16SP'
|
||||
else:
|
||||
base = 'C.LUI'
|
||||
if base in CItype.SPload + CItype.constgen:
|
||||
self.rd = r
|
||||
if base in CItype.SPload:
|
||||
self.rs1 = Reg.sp
|
||||
self.offset = CItype.offset[base](instr.bin)
|
||||
# zero-extended offset
|
||||
if base == 'C.LI':
|
||||
self.imm = sign_ext(CItype.imm(instr.bin), 5)
|
||||
if base == 'C.LUI':
|
||||
self.nzimm = sign_ext(CItype.imm(instr.bin) << 12, 17)
|
||||
if base in CItype.regimm:
|
||||
self.rd = r
|
||||
self.rs1 = r
|
||||
if base == 'C.ADDI':
|
||||
self.nzimm = sign_ext(CItype.imm(instr.bin), 5)
|
||||
if base == 'C.ADDIW':
|
||||
self.imm = sign_ext(CItype.imm(instr.bin), 5)
|
||||
if base == 'C.ADDI16SP':
|
||||
self.nzimm = sign_ext(CItype.immsp(instr.bin), 9)
|
||||
if base == 'C.SLLI':
|
||||
self.shamt = CItype.imm(instr.bin)
|
||||
|
||||
SPload = ['C.LWSP', 'C.LDSP', 'C.LQSP', 'C.FLWSP', 'C.FLDSP']
|
||||
constgen = ['C.LI', 'C.LUI']
|
||||
regimm = ['C.ADDI', 'C.ADDIW', 'C.ADDI16SP', 'C.SLLI']
|
||||
|
||||
Woffset = lambda i: (((i >> 12) & 1) << 5) | (((i >> 4) & 7) << 2) \
|
||||
| (((i >> 2) & 3) << 6)
|
||||
Doffset = lambda i: (((i >> 12) & 1) << 5) | (((i >> 5) & 3) << 3) \
|
||||
| (((i >> 2) & 7) << 6)
|
||||
Qoffset = lambda i: (((i >> 12) & 1) << 5) | (((i >> 6) & 1) << 4) \
|
||||
| (((i >> 2) & 15) << 6)
|
||||
imm = lambda i: (((i >> 12) & 1) << 5) | ((i >> 2) & 31)
|
||||
immsp = lambda i: (((i >> 12) & 1) << 9) | (((i >> 6) & 1) << 4) \
|
||||
| (((i >> 5) & 1) << 6) | (((i >> 3) & 3) << 7) \
|
||||
| (((i >> 2) & 1) << 5)
|
||||
|
||||
offset = {
|
||||
'C.LWSP': Woffset,
|
||||
'C.LDSP': Doffset,
|
||||
'C.LQSP': Qoffset,
|
||||
'C.FLWSP': Woffset,
|
||||
'C.FLDSP': Doffset,
|
||||
}
|
||||
|
||||
class CSStype:
|
||||
"""Compressed stack-relative store"""
|
||||
def __init__(self, instr):
|
||||
self.funct3 = instr.bin >> 13
|
||||
self.rs1 = Reg.sp
|
||||
self.rs2 = (instr.bin >> 2) & 31
|
||||
self.op = instr.bin & 3
|
||||
self.offset = CSStype.offset[instr.base()](instr.bin)
|
||||
# zero-extended offset
|
||||
|
||||
Woffset = lambda i: (((i >> 9) & 15) << 2) | (((i >> 7) & 3) << 6)
|
||||
Doffset = lambda i: (((i >> 10) & 7) << 3) | (((i >> 7) & 7) << 6)
|
||||
Qoffset = lambda i: (((i >> 11) & 3) << 4) | (((i >> 7) & 15) << 6)
|
||||
|
||||
offset = {
|
||||
'C.SWSP': Woffset,
|
||||
'C.SDSP': Doffset,
|
||||
'C.SQSP': Qoffset,
|
||||
'C.FSWSP': Woffset,
|
||||
'C.FSDSP': Doffset,
|
||||
}
|
||||
|
||||
class CIWtype:
|
||||
"""Compressed wide immediate"""
|
||||
def __init__(self, instr):
|
||||
i = instr.bin
|
||||
self.funct3 = i >> 13
|
||||
rd_ = (i >> 2) & 7
|
||||
self.rd = rd_ + 8
|
||||
self.op = i & 3
|
||||
self.nzuimm = (((i >> 11) & 3) << 4) | (((i >> 7) & 15) << 6) \
|
||||
| (((i >> 6) & 1) << 2) | (((i >> 5) & 1) << 3)
|
||||
# zero-extended (unsigned) non-zero immediate
|
||||
if instr.base() == 'C.ADDI4SPN':
|
||||
self.rs1 = Reg.sp
|
||||
|
||||
CLS_Woffset = lambda i: (((i >> 10) & 7) << 3) | (((i >> 6) & 1) << 2) \
|
||||
| (((i >> 5) & 1) << 6)
|
||||
CLS_Doffset = lambda i: (((i >> 10) & 7) << 3) | (((i >> 5) & 3) << 6)
|
||||
CLS_Qoffset = lambda i: (((i >> 11) & 3) << 4) | (((i >> 10) & 1) << 8) \
|
||||
| (((i >> 5) & 3) << 6)
|
||||
|
||||
class CLtype:
|
||||
"""Compressed load"""
|
||||
def __init__(self, instr):
|
||||
self.funct3 = instr.bin >> 13
|
||||
rs1_ = (instr.bin >> 7) & 7
|
||||
rd_ = (instr.bin >> 2) & 7
|
||||
self.rs1 = rs1_ + 8
|
||||
self.rd = rd_ + 8
|
||||
self.op = instr.bin & 3
|
||||
self.offset = CLtype.offset[instr.base()](instr.bin)
|
||||
# zero-extended offset
|
||||
|
||||
offset = {
|
||||
'C.LW': CLS_Woffset,
|
||||
'C.LD': CLS_Doffset,
|
||||
'C.LQ': CLS_Qoffset,
|
||||
'C.FLW': CLS_Woffset,
|
||||
'C.FLD': CLS_Doffset,
|
||||
}
|
||||
|
||||
class CStype:
|
||||
"""Compressed store"""
|
||||
def __init__(self, instr):
|
||||
self.funct3 = instr.bin >> 13
|
||||
rs1_ = (instr.bin >> 7) & 7
|
||||
rs2_ = (instr.bin >> 2) & 7
|
||||
self.rs1 = rs1_ + 8
|
||||
self.rs2 = rs2_ + 8
|
||||
self.op = instr.bin & 3
|
||||
self.offset = CStype.offset[instr.base()](instr.bin)
|
||||
# zero-extended offset
|
||||
|
||||
offset = {
|
||||
'C.SW': CLS_Woffset,
|
||||
'C.SD': CLS_Doffset,
|
||||
'C.SQ': CLS_Qoffset,
|
||||
'C.FSW': CLS_Woffset,
|
||||
'C.FSD': CLS_Doffset,
|
||||
}
|
||||
|
||||
class CAtype:
|
||||
"""Compressed arithmetic"""
|
||||
def __init__(self, instr):
|
||||
self.funct6 = instr.bin >> 10
|
||||
r = (instr.bin >> 7) & 7
|
||||
self.rd = r + 8
|
||||
self.rs1 = r + 8
|
||||
self.funct2 = (instr.bin >> 5) & 3
|
||||
self.rs2 = ((instr.bin >> 2) & 7) + 8
|
||||
self.op = instr.bin & 3
|
||||
|
||||
class CBtype:
|
||||
"""Compressed branch"""
|
||||
def __init__(self, instr):
|
||||
i = instr.bin
|
||||
base = instr.base()
|
||||
self.funct3 = i >> 13
|
||||
self.offset = (i >> 10) & 7
|
||||
rs1_ = (i >> 7) & 7
|
||||
self.rs1 = rs1_ + 8
|
||||
self.op = instr.bin & 3
|
||||
if base in CBtype.branch:
|
||||
self.offset = sign_ext(
|
||||
(((i >> 12) & 1) << 8) \
|
||||
| (((i >> 10) & 3) << 3) \
|
||||
| (((i >> 5) & 3) << 6) \
|
||||
| (((i >> 3) & 3) << 1) \
|
||||
| (((i >> 2) & 1) << 5)
|
||||
, 8)
|
||||
if base in CBtype.regimm:
|
||||
if base == 'C.ANDI':
|
||||
self.shamt = sign_ext(CItype.imm(i), 5)
|
||||
else:
|
||||
self.shamt = CItype.imm(i)
|
||||
self.rd = self.rs1
|
||||
|
||||
branch = ['C.BEQZ', 'C.BNEZ']
|
||||
regimm = ['C.SRLI', 'C.SRAI', 'C.ANDI']
|
||||
|
||||
class CJtype:
|
||||
"""Compressed jump"""
|
||||
def __init__(self, instr):
|
||||
self.funct3 = instr.bin >> 13
|
||||
assert instr.base() in ['C.J', 'C.JAL']
|
||||
self.offset = sign_ext(CJtype.offset(instr.bin), 11)
|
||||
self.jump_target = (instr.bin >> 2) & 0x7ff
|
||||
self.op = instr.bin & 3
|
||||
|
||||
offset = lambda i: (((i >> 12) & 1) << 11) | (((i << 11) & 1) << 4) \
|
||||
| (((i >> 9) & 3) << 8) | (((i >> 8) & 1) << 10) \
|
||||
| (((i >> 7) & 1) << 6) | (((i >> 6) & 1) << 7) \
|
||||
| (((i >> 3) & 1) << 1) | (((i >> 2) & 1) << 5)
|
||||
|
||||
class Instr:
|
||||
"""Instructions"""
|
||||
|
||||
table_16_4_RV32 = [
|
||||
['C.ADDI4SPN', 'C.FLD', 'C.LW', 'C.FLW',
|
||||
'Reserved', 'C.FSD', 'C.SW', 'C.FSW'],
|
||||
['C.ADDI', 'C.JAL', 'C.LI', 'C.LUI/C.ADDI16SP',
|
||||
'MISC-ALU', 'C.J', 'C.BEQZ', 'C.BNEZ'],
|
||||
['C.SLLI', 'C.FLDSP', 'C.LWSP', 'C.FLWSP',
|
||||
'C.J[AL]R/C.MV/C.ADD', 'C.FSDSP', 'C.SWSP', 'C.FSWSP'],
|
||||
]
|
||||
|
||||
table_24_1 = [
|
||||
['LOAD', 'LOAD-FP', 'custom-0', 'MISC-MEM', 'OP-IMM', 'AUIPC', 'OP-IMM-32', '48b'],
|
||||
['STORE', 'STORE-FP', 'custom-1', 'AMO', 'OP', 'LUI', 'OP-32', '64b'],
|
||||
['MADD', 'MSUB', 'NMSUB', 'NMADD', 'OP-FP', 'reserved', 'custom-2/rv128', '48b'],
|
||||
['BRANCH', 'JALR', 'reserved', 'JAL', 'SYSTEM', 'reserved', 'custom-3/rv128', '80b'],
|
||||
]
|
||||
type_of_base = {
|
||||
'OP-IMM': Itype,
|
||||
'LUI': Utype,
|
||||
'AUIPC': Utype,
|
||||
'OP': Rtype,
|
||||
'OP-32': Rtype,
|
||||
'JAL': Jtype,
|
||||
'JALR': Itype,
|
||||
'BRANCH': Btype,
|
||||
'LOAD': Itype,
|
||||
'STORE': Stype,
|
||||
'SYSTEM': Itype,
|
||||
'C.LWSP': CItype,
|
||||
'C.LDSP': CItype,
|
||||
'C.LQSP': CItype,
|
||||
'C.FLWSP': CItype,
|
||||
'C.FLDSP': CItype,
|
||||
'C.SWSP': CSStype,
|
||||
'C.SDSP': CSStype,
|
||||
'C.SQSP': CSStype,
|
||||
'C.FSWSP': CSStype,
|
||||
'C.FSDSP': CSStype,
|
||||
'C.LW': CLtype,
|
||||
'C.LD': CLtype,
|
||||
'C.LQ': CLtype,
|
||||
'C.FLW': CLtype,
|
||||
'C.FLD': CLtype,
|
||||
'C.SW': CStype,
|
||||
'C.SD': CStype,
|
||||
'C.SQ': CStype,
|
||||
'C.FSW': CStype,
|
||||
'C.FSD': CStype,
|
||||
'C.J': CJtype,
|
||||
'C.JAL': CJtype,
|
||||
'C.J[AL]R/C.MV/C.ADD': CRtype,
|
||||
'C.BEQZ': CBtype,
|
||||
'C.BNEZ': CBtype,
|
||||
'C.LI': CItype,
|
||||
'C.LUI/C.ADDI16SP': CItype,
|
||||
'C.ADDI': CItype,
|
||||
'C.ADDIW': CItype,
|
||||
'C.ADDI4SPN': CIWtype,
|
||||
'C.SLLI': CItype,
|
||||
'MISC-ALU': CAtype,
|
||||
}
|
||||
iloads = ['C.LW', 'C.LWSP', 'LOAD']
|
||||
floads = ['C.FLD', 'C.FLW', 'C.FLDSP', 'C.FLWSP', 'LOAD-FP']
|
||||
istores = ['C.SW', 'C.SWSP', 'STORE']
|
||||
fstores = ['C.FSD', 'C.FSW', 'C.FSDSP', 'C.FSWSP', 'STORE-FP']
|
||||
loads = iloads + floads
|
||||
stores = istores + fstores
|
||||
|
||||
def __init__(self, bincode):
|
||||
self.bin = bincode
|
||||
self.inst_1_0 = self.bin & 3
|
||||
|
||||
def base(self):
|
||||
"""Get the name of the base instruction"""
|
||||
result = ""
|
||||
if self.is_compressed():
|
||||
line = self.bin & 3
|
||||
col = (self.bin >> 13) & 7
|
||||
result = Instr.table_16_4_RV32[line][col]
|
||||
else:
|
||||
line = (self.bin >> 5) & 3
|
||||
col = (self.bin >> 2) & 7
|
||||
result = Instr.table_24_1[line][col]
|
||||
return result
|
||||
|
||||
def fields(self):
|
||||
"""Get an object with the fields of the instruction"""
|
||||
return Instr.type_of_base[self.base()](self)
|
||||
|
||||
def is_compressed(self):
|
||||
"""Is the instruction from the C extension?"""
|
||||
return (self.bin & 3) < 3
|
||||
|
||||
def size(self):
|
||||
"""Size of the instruction in bytes"""
|
||||
return 2 if self.is_compressed() else 4
|
||||
|
||||
def is_load(self):
|
||||
"""Is the instruction a load?"""
|
||||
return self.base() in Instr.loads
|
||||
|
||||
def is_store(self):
|
||||
"""Is the instruction a store?"""
|
||||
return self.base() in Instr.stores
|
||||
|
||||
def is_branch(self):
|
||||
"""Is it a taken/not taken branch?"""
|
||||
return self.base() in ['C.BEQZ', 'C.BNEZ', 'BRANCH']
|
||||
|
||||
def is_regjump(self):
|
||||
"""Is it a register jump?"""
|
||||
if self.base() in ['JALR']:
|
||||
return True
|
||||
if self.base() == 'C.J[AL]R/C.MV/C.ADD':
|
||||
return self.fields().name in ['C.JALR', 'C.JR']
|
||||
return False
|
||||
|
||||
def is_jump(self):
|
||||
"""Is it an immediate jump?"""
|
||||
return self.base() in ['JAL', 'C.JAL', 'C.J']
|
||||
|
||||
def is_muldiv(self):
|
||||
"""Is it a muldiv instruction?"""
|
||||
return self.base() in ['OP', 'OP-32'] and self.fields().funct7 == 1
|
||||
|
||||
def offset(self):
|
||||
"""Get offset from instr (sometimes it is just 'imm' in RISCV spec)"""
|
||||
fields = self.fields()
|
||||
return fields.offset if hasattr(fields, 'offset') else fields.imm
|
||||
|
||||
def addr_fields(self):
|
||||
"""Get the register and offset to build an address"""
|
||||
return AddrFields(self.fields().rs1, self.offset())
|
||||
|
||||
def has_WAW_from(self, other):
|
||||
"""b.has_WAW_from(a) if a.rd == b.rd"""
|
||||
a = other.fields()
|
||||
b = self.fields()
|
||||
if not (hasattr(a, 'rd') and hasattr(b, 'rd')):
|
||||
return False
|
||||
return a.rd == b.rd and a.rd != Reg.zero
|
||||
|
||||
def has_RAW_from(self, other):
|
||||
"""b.has_RAW_from(a) if b.rsX == a.rd"""
|
||||
a = other.fields()
|
||||
b = self.fields()
|
||||
if not hasattr(a, 'rd') or a.rd == Reg.zero:
|
||||
return False
|
||||
if hasattr(b, 'rs1') and a.rd == b.rs1:
|
||||
return True
|
||||
return hasattr(b, 'rs2') and a.rd == b.rs2
|
||||
|
||||
def has_WAR_from(self, other):
|
||||
"""b.has_WAR_from(a) if b.rd == a.rsX"""
|
||||
a = other.fields()
|
||||
b = self.fields()
|
||||
if not hasattr(b, 'rd') or b.rd == Reg.zero:
|
||||
return False
|
||||
if hasattr(a, 'rs1') and a.rs1 == b.rd:
|
||||
return True
|
||||
return hasattr(a, 'rs2') and a.rs2 == b.rd
|
666
perf-model/model.py
Normal file
666
perf-model/model.py
Normal file
|
@ -0,0 +1,666 @@
|
|||
# Copyright 2024 Thales Silicon Security
|
||||
#
|
||||
# Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0
|
||||
# You may obtain a copy of the License at https://solderpad.org/licenses/
|
||||
#
|
||||
# Original Author: Côme ALLART - Thales
|
||||
|
||||
"""
|
||||
Performance model of the cva6
|
||||
"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from collections import defaultdict
|
||||
|
||||
#from matplotlib import pyplot as plt
|
||||
|
||||
from isa import Instr, Reg
|
||||
|
||||
EventKind = Enum('EventKind', [
|
||||
'WAW', 'WAR', 'RAW',
|
||||
'BMISS', 'BHIT',
|
||||
'STRUCT',
|
||||
'issue', 'done', 'commit',
|
||||
])
|
||||
|
||||
def to_signed(value, xlen=32):
|
||||
signed = value
|
||||
if signed >> (xlen - 1):
|
||||
signed -= 1 << xlen
|
||||
return signed
|
||||
|
||||
class Event:
|
||||
"""Represents an event on an instruction"""
|
||||
def __init__(self, kind, cycle):
|
||||
self.kind = kind
|
||||
self.cycle = cycle
|
||||
|
||||
def __repr__(self):
|
||||
return f"@{self.cycle}: {self.kind}"
|
||||
|
||||
class Instruction(Instr):
|
||||
"""Represents a RISC-V instruction with annotations"""
|
||||
|
||||
def __init__(self, line, address, hex_code, mnemo):
|
||||
Instr.__init__(self, int(hex_code, base=16))
|
||||
self.line = line
|
||||
self.address = int(address, base=16)
|
||||
self.hex_code = hex_code
|
||||
self.mnemo = mnemo
|
||||
self.events = []
|
||||
|
||||
def mnemo_name(self):
|
||||
"""The name of the instruction (fisrt word of the mnemo)"""
|
||||
return self.mnemo.split()[0]
|
||||
|
||||
def next_addr(self):
|
||||
"""Address of next instruction"""
|
||||
return self.address + self.size()
|
||||
|
||||
_ret_regs = [Reg.ra, Reg.t0]
|
||||
|
||||
def is_ret(self):
|
||||
"Does CVA6 consider this instruction as a ret?"
|
||||
f = self.fields()
|
||||
# Strange conditions, no imm check, no rd-discard check
|
||||
return self.is_regjump() \
|
||||
and f.rs1 in Instruction._ret_regs \
|
||||
and (self.is_compressed() or f.rs1 != f.rd)
|
||||
|
||||
def is_call(self):
|
||||
"Does CVA6 consider this instruction as a ret?"
|
||||
base = self.base()
|
||||
f = self.fields()
|
||||
return base == 'C.JAL' \
|
||||
or base == 'C.J[AL]R/C.MV/C.ADD' and f.name == 'C.JALR' \
|
||||
or base in ['JAL', 'JALR'] and f.rd in Instruction._ret_regs
|
||||
|
||||
def __repr__(self):
|
||||
return self.mnemo
|
||||
|
||||
@dataclass
|
||||
class Entry:
|
||||
"""A scoreboard entry"""
|
||||
instr: Instruction
|
||||
cycles_since_issue = 0
|
||||
done: bool = False
|
||||
|
||||
def __repr__(self):
|
||||
status = "DONE" if self.done else "WIP "
|
||||
addr = f"0x{self.instr.address:08X}"
|
||||
return f"{status} {addr}:`{self.instr}` for {self.cycles_since_issue}"
|
||||
|
||||
@dataclass
|
||||
class LastIssue:
|
||||
"""To store the last issued instruction"""
|
||||
instr: Instruction
|
||||
issue_cycle: int
|
||||
|
||||
class IqLen:
|
||||
"""Model of the instruction queue with only a size counter"""
|
||||
def __init__(self, fetch_size, debug=False):
|
||||
self.fetch_size = 4
|
||||
while self.fetch_size < fetch_size:
|
||||
self.fetch_size <<= 1
|
||||
self.debug = debug
|
||||
self.len = self.fetch_size
|
||||
self.new_fetch = True
|
||||
|
||||
def fetch(self):
|
||||
"""Fetch bytes"""
|
||||
self.len += self.fetch_size
|
||||
self._debug(f"fetched {self.fetch_size}, got {self.len}")
|
||||
self.new_fetch = True
|
||||
|
||||
def flush(self):
|
||||
"""Flush instruction queue (bmiss or exception)"""
|
||||
self.len = 0
|
||||
self._debug(f"flushed, got {self.len}")
|
||||
self.new_fetch = False
|
||||
|
||||
def jump(self):
|
||||
"""Loose a fetch cycle and truncate (jump, branch hit taken)"""
|
||||
if self.new_fetch:
|
||||
self.len -= self.fetch_size
|
||||
self._debug(f"jumping, removed {self.fetch_size}, got {self.len}")
|
||||
self.new_fetch = False
|
||||
self._truncate()
|
||||
self._debug(f"jumped, got {self.len}")
|
||||
|
||||
def has(self, instr):
|
||||
"""Does the instruction queue have this instruction?"""
|
||||
length = self.len
|
||||
if self._is_crossword(instr):
|
||||
length -= (self.fetch_size - 2)
|
||||
self._debug(f"comparing {length} to {instr.size()} ({instr})")
|
||||
return length >= instr.size()
|
||||
|
||||
def remove(self, instr):
|
||||
"""Remove instruction from queue"""
|
||||
self.len -= instr.size()
|
||||
self._debug(f"removed {instr.size()}, got {self.len}")
|
||||
self._truncate(self._addr_index(instr.next_addr()))
|
||||
if instr.is_jump():
|
||||
self.jump()
|
||||
|
||||
def _addr_index(self, addr):
|
||||
return addr & (self.fetch_size - 1)
|
||||
|
||||
def _is_crossword(self, instr):
|
||||
is_last = self._addr_index(instr.address) == self.fetch_size - 2
|
||||
return is_last and not instr.is_compressed()
|
||||
|
||||
def _truncate(self, index=0):
|
||||
occupancy = self.fetch_size - self._addr_index(self.len)
|
||||
to_remove = index - occupancy
|
||||
if to_remove < 0:
|
||||
to_remove += self.fetch_size
|
||||
self.len -= to_remove
|
||||
self._debug(f"truncated, removed {to_remove}, got {self.len}")
|
||||
|
||||
def _debug(self, message):
|
||||
if self.debug:
|
||||
print(f"iq: {message}")
|
||||
|
||||
class Ras:
|
||||
"Return Address Stack"
|
||||
def __init__(self, depth=2, debug=False):
|
||||
self.depth = depth - 1
|
||||
self.stack = []
|
||||
self.debug = debug
|
||||
self.last_dropped = None
|
||||
|
||||
def push(self, addr):
|
||||
"Push an address on the stack, forget oldest entry if full"
|
||||
self.stack.append(addr)
|
||||
self._debug(f"pushed 0x{addr:08X}")
|
||||
if len(self.stack) > self.depth:
|
||||
self.stack.pop(0)
|
||||
self._debug("overflown")
|
||||
|
||||
def drop(self):
|
||||
"Drop an address from the stack"
|
||||
self._debug("dropping")
|
||||
if len(self.stack) > 0:
|
||||
self.last_dropped = self.stack.pop()
|
||||
else:
|
||||
self.last_dropped = None
|
||||
self._debug("was already empty")
|
||||
|
||||
def read(self):
|
||||
"Read the top of the stack without modifying it"
|
||||
self._debug("reading")
|
||||
if self.last_dropped is not None:
|
||||
addr = self.last_dropped
|
||||
self._debug(f"read 0x{addr:08X}")
|
||||
return addr
|
||||
self._debug("was empty")
|
||||
return None
|
||||
|
||||
def resolve(self, instr):
|
||||
"Push or pop depending on the instruction"
|
||||
self._debug(f"issuing {instr}")
|
||||
if instr.is_ret():
|
||||
self._debug("detected ret")
|
||||
self.drop()
|
||||
if instr.is_call():
|
||||
self._debug("detected call")
|
||||
self.push(instr.next_addr())
|
||||
|
||||
def _debug(self, message):
|
||||
if self.debug:
|
||||
print(f"RAS: {message}")
|
||||
|
||||
class Bht:
|
||||
"Branch History Table"
|
||||
|
||||
@dataclass
|
||||
class Entry:
|
||||
"A BTB entry"
|
||||
valid: bool = False
|
||||
sat_counter: int = 0
|
||||
|
||||
def __init__(self, entries=128):
|
||||
self.contents = [Bht.Entry() for _ in range(entries)]
|
||||
|
||||
def predict(self, addr):
|
||||
"Is the branch taken? None if don't know"
|
||||
entry = self.contents[self._index(addr)]
|
||||
if entry.valid:
|
||||
return entry.sat_counter >= 2
|
||||
return None
|
||||
|
||||
def resolve(self, addr, taken):
|
||||
"Update branch prediction"
|
||||
index = self._index(addr)
|
||||
entry = self.contents[index]
|
||||
entry.valid = True
|
||||
if taken:
|
||||
if entry.sat_counter < 3:
|
||||
entry.sat_counter += 1
|
||||
else:
|
||||
if entry.sat_counter > 0:
|
||||
entry.sat_counter -= 1
|
||||
|
||||
def _index(self, addr):
|
||||
return (addr >> 1) % len(self.contents)
|
||||
|
||||
Fu = Enum('Fu', ['ALU', 'MUL', 'BRANCH', 'LDU', 'STU'])
|
||||
|
||||
# We have
|
||||
# - FLU gathering ALU + BRANCH (+ CSR, not significant in CoreMark)
|
||||
# - LSU for loads and stores
|
||||
# - FP gathering MUL + second ALU (+ Floating, unused in CoreMark)
|
||||
# This way we do not have more write-back ports than currently with F
|
||||
|
||||
def to_fu(instr):
|
||||
if instr.is_branch() or instr.is_regjump():
|
||||
return Fu.BRANCH
|
||||
if instr.is_muldiv():
|
||||
return Fu.MUL
|
||||
if instr.is_load():
|
||||
return Fu.LDU
|
||||
if instr.is_store():
|
||||
return Fu.STU
|
||||
return Fu.ALU
|
||||
|
||||
class FusBusy:
|
||||
"Is each functional unit busy"
|
||||
def __init__(self, has_alu2 = False):
|
||||
self.has_alu2 = has_alu2
|
||||
|
||||
self.alu = False
|
||||
self.mul = False
|
||||
self.branch = False
|
||||
self.ldu = False
|
||||
self.stu = False
|
||||
self.alu2 = False
|
||||
|
||||
self.issued_mul = False
|
||||
|
||||
def _alu2_ready(self):
|
||||
return self.has_alu2 and not self.alu2
|
||||
|
||||
def is_ready(self, fu):
|
||||
return {
|
||||
Fu.ALU: self._alu2_ready() or not self.alu,
|
||||
Fu.MUL: not self.mul,
|
||||
Fu.BRANCH: not self.branch,
|
||||
Fu.LDU: not self.ldu,
|
||||
Fu.STU: not self.stu,
|
||||
}[fu]
|
||||
|
||||
def is_ready_for(self, instr):
|
||||
return self.is_ready(to_fu(instr))
|
||||
|
||||
def issue(self, instr):
|
||||
return {
|
||||
Fu.ALU: FusBusy.issue_alu,
|
||||
Fu.MUL: FusBusy.issue_mul,
|
||||
Fu.BRANCH: FusBusy.issue_branch,
|
||||
Fu.LDU: FusBusy.issue_ldu,
|
||||
Fu.STU: FusBusy.issue_stu,
|
||||
}[to_fu(instr)](self)
|
||||
|
||||
def issue_mul(self):
|
||||
self.mul = True
|
||||
self.issued_mul = True
|
||||
|
||||
def issue_alu(self):
|
||||
if not self._alu2_ready():
|
||||
assert not self.alu
|
||||
self.alu = True
|
||||
self.branch = True
|
||||
else:
|
||||
self.alu2 = True
|
||||
|
||||
def issue_branch(self):
|
||||
self.alu = True
|
||||
self.branch = True
|
||||
# Stores are not allowed yet
|
||||
self.stu = True
|
||||
|
||||
def issue_ldu(self):
|
||||
self.ldu = True
|
||||
self.stu = True
|
||||
|
||||
def issue_stu(self):
|
||||
self.stu = True
|
||||
self.ldu = True
|
||||
|
||||
def cycle(self):
|
||||
self.alu = self.issued_mul
|
||||
self.mul = False
|
||||
self.branch = self.issued_mul
|
||||
self.ldu = False
|
||||
self.stu = False
|
||||
self.alu2 = False
|
||||
self.issued_mul = False
|
||||
|
||||
class Model:
|
||||
"""Models the scheduling of CVA6"""
|
||||
|
||||
re_instr = re.compile(
|
||||
r"([a-z]+)\s+0:\s*0x00000000([0-9a-f]+)\s*\(([0-9a-fx]+)\)\s*@\s*([0-9]+)\s*(.*)"
|
||||
)
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
debug=False,
|
||||
issue=1,
|
||||
commit=2,
|
||||
sb_len=8,
|
||||
fetch_size=None,
|
||||
has_forwarding=True,
|
||||
has_renaming=True):
|
||||
self.ras = Ras(debug=debug)
|
||||
self.bht = Bht()
|
||||
self.instr_queue = []
|
||||
self.scoreboard = []
|
||||
self.fus = FusBusy(issue > 1)
|
||||
self.last_issued = None
|
||||
self.last_committed = None
|
||||
self.retired = []
|
||||
self.sb_len = sb_len
|
||||
self.debug = debug
|
||||
self.iqlen = IqLen(fetch_size or 4 * issue, debug)
|
||||
self.issue_width = issue
|
||||
self.commit_width = commit
|
||||
self.has_forwarding = has_forwarding
|
||||
self.has_renaming = has_renaming
|
||||
self.log = []
|
||||
|
||||
def log_event_on(self, instr, kind, cycle):
|
||||
"""Log an event on the instruction"""
|
||||
if self.debug:
|
||||
print(f"{instr}: {kind}")
|
||||
event = Event(kind, cycle)
|
||||
instr.events.append(event)
|
||||
self.log.append((event, instr))
|
||||
|
||||
def predict_branch(self, instr):
|
||||
"""Predict if branch is taken or not"""
|
||||
pred = self.bht.predict(instr.address)
|
||||
if pred is not None:
|
||||
return pred
|
||||
return instr.offset() >> 31 != 0
|
||||
|
||||
def predict_regjump(self, instr):
|
||||
"""Predict destination address of indirect jump"""
|
||||
if instr.is_ret():
|
||||
return self.ras.read() or 0
|
||||
return 0 # always miss, as there is no btb yet
|
||||
|
||||
def predict_pc(self, last):
|
||||
"""Predict next program counter depending on last issued instruction"""
|
||||
if last.is_branch():
|
||||
taken = self.predict_branch(last)
|
||||
offset = to_signed(last.offset()) if taken else last.size()
|
||||
return last.address + offset
|
||||
if last.is_regjump():
|
||||
return self.predict_regjump(last)
|
||||
return None
|
||||
|
||||
def issue_manage_last_branch(self, instr, cycle):
|
||||
"""Flush IQ if branch miss, jump if branch hit"""
|
||||
if self.last_issued is not None:
|
||||
last = self.last_issued.instr
|
||||
pred = self.predict_pc(last)
|
||||
if pred is not None:
|
||||
bmiss = pred != instr.address
|
||||
resolved = cycle >= self.last_issued.issue_cycle + 6
|
||||
if bmiss and not resolved:
|
||||
self.iqlen.flush()
|
||||
branch = EventKind.BMISS if bmiss else EventKind.BHIT
|
||||
if branch not in [e.kind for e in instr.events]:
|
||||
self.log_event_on(instr, branch, cycle)
|
||||
taken = instr.address != last.next_addr()
|
||||
if taken and not bmiss:
|
||||
# last (not instr) was like a jump
|
||||
self.iqlen.jump()
|
||||
|
||||
def commit_manage_last_branch(self, instr, cycle):
|
||||
"Resolve branch prediction"
|
||||
if self.last_committed is not None:
|
||||
last = self.last_committed
|
||||
if last.is_branch():
|
||||
taken = instr.address != last.next_addr()
|
||||
self.bht.resolve(last.address, taken)
|
||||
self.last_committed = instr
|
||||
|
||||
def find_data_hazards(self, instr, cycle):
|
||||
"""Detect and log data hazards"""
|
||||
found = False
|
||||
for entry in self.scoreboard:
|
||||
if instr.has_WAW_from(entry.instr) and not self.has_renaming:
|
||||
self.log_event_on(instr, EventKind.WAW, cycle)
|
||||
found = True
|
||||
can_forward = self.has_forwarding and entry.done
|
||||
if instr.has_RAW_from(entry.instr) and not can_forward:
|
||||
self.log_event_on(instr, EventKind.RAW, cycle)
|
||||
found = True
|
||||
return found
|
||||
|
||||
def find_structural_hazard(self, instr, cycle):
|
||||
"""Detect and log structural hazards"""
|
||||
if not self.fus.is_ready_for(instr):
|
||||
self.log_event_on(instr, EventKind.STRUCT, cycle)
|
||||
return True
|
||||
return False
|
||||
|
||||
def try_issue(self, cycle):
|
||||
"""Try to issue an instruction"""
|
||||
if len(self.instr_queue) == 0 or len(self.scoreboard) >= self.sb_len:
|
||||
return
|
||||
can_issue = True
|
||||
instr = self.instr_queue[0]
|
||||
if self.find_data_hazards(instr, cycle):
|
||||
can_issue = False
|
||||
if self.find_structural_hazard(instr, cycle):
|
||||
can_issue = False
|
||||
self.issue_manage_last_branch(instr, cycle)
|
||||
if not self.iqlen.has(instr):
|
||||
can_issue = False
|
||||
if can_issue:
|
||||
self.iqlen.remove(instr)
|
||||
instr = self.instr_queue.pop(0)
|
||||
self.log_event_on(instr, EventKind.issue, cycle)
|
||||
entry = Entry(instr)
|
||||
self.scoreboard.append(entry)
|
||||
self.fus.issue(instr)
|
||||
self.last_issued = LastIssue(instr, cycle)
|
||||
self.ras.resolve(instr)
|
||||
|
||||
def try_execute(self, cycle):
|
||||
"""Try to execute instructions"""
|
||||
for entry in self.scoreboard:
|
||||
entry.cycles_since_issue += 1
|
||||
instr = entry.instr
|
||||
duration = 1
|
||||
if instr.is_load() or instr.is_store():
|
||||
duration = 2
|
||||
if instr.is_muldiv():
|
||||
duration = 2
|
||||
if entry.cycles_since_issue == duration:
|
||||
self.log_event_on(instr, EventKind.done, cycle)
|
||||
entry.done = True
|
||||
|
||||
def try_commit(self, cycle, commit_port):
|
||||
"""Try to commit an instruction"""
|
||||
if len(self.scoreboard) == 0:
|
||||
return
|
||||
entry = self.scoreboard[0]
|
||||
can_commit = True
|
||||
if commit_port > 0:
|
||||
if entry.instr.is_store():
|
||||
can_commit = False
|
||||
if not entry.done:
|
||||
can_commit = False
|
||||
if can_commit:
|
||||
instr = self.scoreboard.pop(0).instr
|
||||
self.log_event_on(instr, EventKind.commit, cycle)
|
||||
self.retired.append(instr)
|
||||
self.commit_manage_last_branch(instr, cycle)
|
||||
|
||||
def run_cycle(self, cycle):
|
||||
"""Runs a cycle"""
|
||||
self.fus.cycle()
|
||||
for commit_port in range(self.commit_width):
|
||||
self.try_commit(cycle, commit_port)
|
||||
self.try_execute(cycle)
|
||||
for _ in range(self.issue_width):
|
||||
self.try_issue(cycle)
|
||||
self.iqlen.fetch()
|
||||
|
||||
def load_file(self, path):
|
||||
"""Fill a model from a trace file"""
|
||||
with open(path, "r", encoding="utf8") as file:
|
||||
for line in [l.strip() for l in file]:
|
||||
found = Model.re_instr.search(line)
|
||||
if found:
|
||||
address = found.group(2)
|
||||
hex_code = found.group(3)
|
||||
mnemo = found.group(5)
|
||||
instr = Instruction(line, address, hex_code, mnemo)
|
||||
self.instr_queue.append(instr)
|
||||
|
||||
def run(self, cycles=None):
|
||||
"""Run until completion"""
|
||||
cycle = 0
|
||||
while len(self.instr_queue) > 0 or len(self.scoreboard) > 0:
|
||||
self.run_cycle(cycle)
|
||||
if self.debug:
|
||||
print(f"Scoreboard @{cycle}")
|
||||
for entry in self.scoreboard:
|
||||
print(f" {entry}")
|
||||
print(f"iqlen = {self.iqlen.len}")
|
||||
print()
|
||||
cycle += 1
|
||||
|
||||
if cycles is not None and cycle > cycles:
|
||||
break
|
||||
return cycle
|
||||
|
||||
def write_trace(output_file, instructions):
|
||||
"""Write cycle-annotated trace"""
|
||||
pattern = re.compile(r"@\s*[0-9]+")
|
||||
|
||||
lines = []
|
||||
for instr in instructions:
|
||||
commit_event = instr.events[-1]
|
||||
assert commit_event.kind == EventKind.commit
|
||||
cycle = commit_event.cycle
|
||||
annotated = re.sub(pattern, f"@ {cycle}", instr.line)
|
||||
#if EventKind.STRUCT in [e.kind for e in instr.events]:
|
||||
# annotated += " #STRUCT"
|
||||
#if EventKind.RAW in [e.kind for e in instr.events]:
|
||||
# annotated += " #RAW"
|
||||
lines.append(f"{annotated}\n")
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
f.writelines(lines)
|
||||
|
||||
def print_data(name, value, ts=24, sep='='):
|
||||
"Prints 'name = data' with alignment of the '='"
|
||||
|
||||
spaces = ' ' * (ts - len(name))
|
||||
print(f"{name}{spaces} {sep} {value}")
|
||||
|
||||
def display_scores(scores):
|
||||
"""Display a 3D graph of scores against commit/issue-wide"""
|
||||
bars = []
|
||||
for x, l in enumerate(scores):
|
||||
for y, z in enumerate(l):
|
||||
bars.append((x, y, z))
|
||||
|
||||
x, y, z, dx, dy, dz = [], [], [], [], [], []
|
||||
for bx, by, bz in bars:
|
||||
x.append(bx)
|
||||
y.append(by)
|
||||
z.append(0)
|
||||
dx.append(.5)
|
||||
dy.append(.5)
|
||||
dz.append(bz)
|
||||
|
||||
#fig = plt.figure()
|
||||
#ax1 = fig.add_subplot(111, projection='3d')
|
||||
#ax1.bar3d(x, y, z, dx, dy, dz)
|
||||
#ax1.set_xlabel("issue")
|
||||
#ax1.set_ylabel("commit")
|
||||
#ax1.set_zlabel("CoreMark/MHz")
|
||||
#plt.show()
|
||||
|
||||
def issue_commit_graph(input_file, n = 3):
|
||||
"""Plot the issue/commit graph"""
|
||||
|
||||
r = range(n + 1)
|
||||
scores = [[0 for _ in r] for _ in r]
|
||||
|
||||
if input_file is None:
|
||||
scores = [[0, 0, 0, 0, 0, 0], [0, 2.651936045910317, 2.651936045910317, 2.651936045910317, 2.651936045910317, 2.651936045910317], [0, 3.212779150348426, 3.6292766488711137, 3.6292766488711137, 3.6292766488711137, 3.6292766488711137], [0, 3.2550388000624966, 3.900216852056974, 3.914997572701505, 3.914997572701505, 3.914997572701505], [0, 3.2596436557555526, 3.9257869239889134, 3.9420984578510834, 3.9421606193922765, 3.9421606193922765], [0, 3.260695897718491, 3.944757614368385, 3.9623576027736505, 3.9625460150656, 3.9625460150656]] # pylint: disable=line-too-long
|
||||
else:
|
||||
r = range(1, n + 1)
|
||||
for issue in r:
|
||||
for commit in r:
|
||||
print("running", issue, commit)
|
||||
model = Model(issue=issue, commit=commit)
|
||||
model.load_file(input_file)
|
||||
model.run()
|
||||
n_cycles = count_cycles(filter_timed_part(model.retired))
|
||||
score = 1000000 / n_cycles
|
||||
scores[issue][commit] = score
|
||||
print(scores)
|
||||
display_scores(scores)
|
||||
|
||||
def filter_timed_part(all_instructions):
|
||||
"Keep only timed part from a trace"
|
||||
filtered = []
|
||||
re_csrr_minstret = re.compile(r"^csrr\s+\w\w,\s*minstret$")
|
||||
accepting = False
|
||||
for instr in all_instructions:
|
||||
if re_csrr_minstret.search(instr.mnemo):
|
||||
accepting = not accepting
|
||||
continue
|
||||
if accepting:
|
||||
filtered.append(instr)
|
||||
return filtered
|
||||
|
||||
def count_cycles(retired):
|
||||
start = min(e.cycle for e in retired[0].events)
|
||||
end = max(e.cycle for e in retired[-1].events)
|
||||
return end - start
|
||||
|
||||
def print_stats(instructions):
|
||||
ecount = defaultdict(lambda: 0)
|
||||
|
||||
for instr in instructions:
|
||||
for e in instr.events:
|
||||
ecount[e.kind] += 1
|
||||
cycle = e.cycle
|
||||
n_instr = len(instructions)
|
||||
n_cycles = count_cycles(instructions)
|
||||
|
||||
print_data("cycle number", n_cycles)
|
||||
print_data("Coremark/MHz", 1000000 / n_cycles)
|
||||
print_data("instruction number", n_instr)
|
||||
for ek, count in ecount.items():
|
||||
print_data(f"{ek}/instr", f"{100 * count / n_instr:.2f}%")
|
||||
|
||||
def main(input_file: str):
|
||||
"Entry point"
|
||||
|
||||
model = Model(debug=True, issue=2, commit=2)
|
||||
model.load_file(input_file)
|
||||
model.run()
|
||||
|
||||
write_trace('annotated.log', model.retired)
|
||||
print_stats(filter_timed_part(model.retired))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv[1])
|
Loading…
Add table
Reference in a new issue