mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-04-19 03:44:46 -04:00
Merge branch 'dcache' into 'master'
Merge revised Data Cache into Master branch See merge request floce/ariane!9
This commit is contained in:
commit
cecdf9654f
51 changed files with 3546 additions and 778 deletions
|
@ -35,13 +35,6 @@ test_fifo:
|
|||
# - make scoreboard library=scoreboard_lib
|
||||
# - vcover-10.6 report scoreboard.ucdb
|
||||
|
||||
test_dcache_arbiter:
|
||||
stage: test
|
||||
before_script:
|
||||
- make build library=dcache_arbiter_lib
|
||||
script:
|
||||
- make dcache_arbiter library=dcache_arbiter_lib
|
||||
- vcover-10.6 report dcache_arbiter.ucdb
|
||||
|
||||
test_store_queue:
|
||||
stage: test
|
||||
|
@ -94,7 +87,6 @@ test_failed_tests:
|
|||
# paths:
|
||||
# - covhtmlreport
|
||||
|
||||
|
||||
pages:
|
||||
stage: deploy
|
||||
dependencies:
|
||||
|
|
12
.gitmodules
vendored
12
.gitmodules
vendored
|
@ -10,3 +10,15 @@
|
|||
[submodule "tb"]
|
||||
path = tb
|
||||
url = ../uvm-components.git
|
||||
[submodule "src/axi_mem_if"]
|
||||
path = src/axi_mem_if
|
||||
url = git@iis-git.ee.ethz.ch:kerbin/axi_mem_if.git
|
||||
[submodule "src/axi2per"]
|
||||
path = src/axi2per
|
||||
url = git@iis-git.ee.ethz.ch:kerbin/axi2per.git
|
||||
[submodule "src/axi_slice"]
|
||||
path = src/axi_slice
|
||||
url = git@iis-git.ee.ethz.ch:pulp-open/axi_slice.git
|
||||
[submodule "src/axi_node"]
|
||||
path = src/axi_node
|
||||
url = git@iis-git.ee.ethz.ch:kerbin/axi_node.git
|
||||
|
|
16
CHANGELOG.md
16
CHANGELOG.md
|
@ -6,6 +6,22 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
### 1.0.0
|
||||
|
||||
### Added
|
||||
|
||||
- Non-blocking data cache
|
||||
- Two AXI interfaces on top level, one for bypassing and one for actual cache-able regions
|
||||
- Performance Counters
|
||||
- Hardware multiplication (full M-Extension)
|
||||
- Support for inter processor interrupts (IPI)
|
||||
|
||||
### Changed
|
||||
|
||||
- Testbench: EOC component now listening on store interface only
|
||||
- Store interfaces has been simplified by removing the `valid` signal, a transaction is now considered finished as soon as the dcache gives the grant signal.
|
||||
- EOC and dcache checker has been reworked to get rid of absolute path in UVM testbench
|
||||
|
||||
### 0.4.0 - 2017-10-13
|
||||
|
||||
Linux booting on FPGA.
|
||||
|
|
45
Makefile
45
Makefile
|
@ -9,9 +9,9 @@ top_level = core_tb
|
|||
test_top_level = core_tb
|
||||
|
||||
# Ariane PKG
|
||||
ariane_pkg = include/ariane_pkg.sv
|
||||
ariane_pkg = include/ariane_pkg.sv include/nbdcache_pkg.sv
|
||||
# utility modules
|
||||
util = $(wildcard src/util/*.sv*)
|
||||
util = $(wildcard src/util/*.svh) src/util/instruction_tracer_pkg.sv src/util/instruction_tracer_if.sv src/util/cluster_clock_gating.sv src/util/behav_sram.sv
|
||||
# test targets
|
||||
tests = alu scoreboard fifo dcache_arbiter store_queue lsu core fetch_fifo
|
||||
# UVM agents
|
||||
|
@ -27,9 +27,11 @@ test_pkg = $(wildcard tb/test/*/*sequence_pkg.sv*) $(wildcard tb/test/*/*_pkg.sv
|
|||
# DPI
|
||||
dpi = $(wildcard tb/dpi/*)
|
||||
# this list contains the standalone components
|
||||
src = $(wildcard src/*.sv) $(wildcard tb/common/*.sv)
|
||||
src = $(wildcard src/*.sv) $(wildcard tb/common/*.sv) $(wildcard src/axi2per/*.sv) $(wildcard src/axi_slice/*.sv) \
|
||||
$(wildcard src/axi_node/*.sv) $(wildcard src/axi_mem_if/*.sv)
|
||||
# look for testbenches
|
||||
tbs = $(wildcard tb/*_tb.sv)
|
||||
tbs = tb/alu_tb.sv tb/core_tb.sv tb/dcache_arbiter_tb.sv tb/store_queue_tb.sv tb/scoreboard_tb.sv tb/fifo_tb.sv
|
||||
|
||||
# RISCV-tests path
|
||||
riscv-test-dir = riscv-tests/isa
|
||||
riscv-tests = rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p-addw rv64ui-p-and rv64ui-p-auipc \
|
||||
|
@ -48,12 +50,11 @@ riscv-tests = rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p-
|
|||
rv64ui-v-xor rv64ui-v-xori rv64ui-v-slliw rv64ui-v-sll rv64ui-v-slli rv64ui-v-slliw \
|
||||
rv64ui-v-slt rv64ui-v-slti rv64ui-v-sltiu rv64ui-v-sltu rv64ui-v-sra rv64ui-v-srai \
|
||||
rv64ui-v-sraiw rv64ui-v-sraw rv64ui-v-srl rv64ui-v-srli rv64ui-v-srliw rv64ui-v-srlw \
|
||||
rv64ui-v-lb rv64ui-v-lbu rv64ui-v-ld rv64ui-v-lh rv64ui-v-lhu rv64ui-v-lui
|
||||
|
||||
# rv64um-p-mul rv64um-p-mulh rv64um-p-mulhsu rv64um-p-mulhu rv64um-p-div rv64um-p-divu rv64um-p-rem \
|
||||
# rv64um-p-remu rv64um-p-mulw rv64um-p-divw rv64um-p-divuw rv64um-p-remw rv64um-p-remuw \
|
||||
# rv64um-v-mul rv64um-v-mulh rv64um-v-mulhsu rv64um-v-mulhu rv64um-v-div rv64um-v-divu rv64um-v-rem \
|
||||
# rv64um-v-remu rv64um-v-mulw rv64um-v-divw rv64um-v-divuw rv64um-v-remw rv64um-v-remuw
|
||||
rv64ui-v-lb rv64ui-v-lbu rv64ui-v-ld rv64ui-v-lh rv64ui-v-lhu rv64ui-v-lui \
|
||||
rv64um-p-mul rv64um-p-mulh rv64um-p-mulhsu rv64um-p-mulhu rv64um-p-div rv64um-p-divu rv64um-p-rem \
|
||||
rv64um-p-remu rv64um-p-mulw rv64um-p-divw rv64um-p-divuw rv64um-p-remw rv64um-p-remuw \
|
||||
rv64um-v-mul rv64um-v-mulh rv64um-v-mulhsu rv64um-v-mulhu rv64um-v-div rv64um-v-divu rv64um-v-rem \
|
||||
rv64um-v-remu rv64um-v-mulw rv64um-v-divw rv64um-v-divuw rv64um-v-remw rv64um-v-remuw
|
||||
|
||||
# failed test directory
|
||||
failed-tests = $(wildcard failedtests/*.S)
|
||||
|
@ -66,7 +67,7 @@ max_cycles = 10000000
|
|||
# Test case to run
|
||||
test_case = core_test
|
||||
# QuestaSim Version
|
||||
questa_version =
|
||||
questa_version = -10.6b
|
||||
compile_flag = +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive
|
||||
# Moore binary
|
||||
moore = ~fschuiki/bin/moore
|
||||
|
@ -74,8 +75,6 @@ uvm-flags = +UVM_NO_RELNOTES
|
|||
# Iterate over all include directories and write them with +incdir+ prefixed
|
||||
# +incdir+ works for Verilator and QuestaSim
|
||||
list_incdir = $(foreach dir, ${incdir}, +incdir+$(dir))
|
||||
# Device Tree Compiler
|
||||
DTC = dtc
|
||||
|
||||
# create library if it doesn't exist
|
||||
|
||||
|
@ -120,21 +119,26 @@ $(library):
|
|||
# Create the library
|
||||
vlib${questa_version} ${library}
|
||||
|
||||
sim: build ariane_tb.dtb
|
||||
sim: build
|
||||
vsim${questa_version} -lib ${library} ${top_level}_optimized +UVM_TESTNAME=${test_case} +BASEDIR=$(riscv-test-dir) \
|
||||
+ASMTEST=$(riscv-test) $(uvm-flags) +UVM_VERBOSITY=HIGH -coverage -classdebug -do "do tb/wave/wave_core.do"
|
||||
|
||||
simc: build ariane_tb.dtb
|
||||
sim_nopt: build
|
||||
vsim${questa_version} -novopt -lib ${library} ${top_level} +UVM_TESTNAME=${test_case} +BASEDIR=$(riscv-test-dir) \
|
||||
+ASMTEST=$(riscv-test) $(uvm-flags) +UVM_VERBOSITY=HIGH -coverage -classdebug -do "do tb/wave/wave_core.do"
|
||||
|
||||
|
||||
simc: build
|
||||
vsim${questa_version} -c -lib ${library} ${top_level}_optimized +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
|
||||
+BASEDIR=$(riscv-test-dir) $(uvm-flags) +ASMTEST=$(riscv-test) -coverage -classdebug -do "do tb/wave/wave_core.do"
|
||||
|
||||
run-asm-tests: build ariane_tb.dtb
|
||||
run-asm-tests: build
|
||||
$(foreach test, $(riscv-tests), vsim$(questa_version) +BASEDIR=$(riscv-test-dir) +max-cycles=$(max_cycles) \
|
||||
+UVM_TESTNAME=$(test_case) $(uvm-flags) +ASMTEST=$(test) +uvm_set_action="*,_ALL_,UVM_ERROR,UVM_DISPLAY|UVM_STOP" -c \
|
||||
-coverage -classdebug -do "coverage save -onexit $@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
|
||||
$(library).$(test_top_level)_optimized;)
|
||||
|
||||
run-failed-tests: build ariane_tb.dtb
|
||||
run-failed-tests: build
|
||||
# make the tests
|
||||
cd failedtests && make
|
||||
# run the RTL simulation
|
||||
|
@ -148,7 +152,7 @@ run-failed-tests: build ariane_tb.dtb
|
|||
$(foreach test, $(failed-tests:.S=), diff $(test).spike.sig $(test).rtlsim.sig;)
|
||||
|
||||
# Run the specified test case
|
||||
$(tests): build ariane_tb.dtb
|
||||
$(tests): build
|
||||
# Optimize top level
|
||||
vopt${questa_version} -work ${library} ${compile_flag} $@_tb -o $@_tb_optimized +acc -check_synthesis
|
||||
# vsim${questa_version} $@_tb_optimized
|
||||
|
@ -166,9 +170,6 @@ build-moore:
|
|||
build-tests:
|
||||
cd riscv-tests && autoconf && ./configure --prefix=/home/zarubaf/riscv && make isa -j8
|
||||
|
||||
# Compile device tree
|
||||
ariane_tb.dtb: ariane_tb.dts
|
||||
$(DTC) -I dts -O dtb ariane_tb.dts > ariane_tb.dtb
|
||||
|
||||
# User Verilator to lint the target
|
||||
lint:
|
||||
|
@ -183,5 +184,3 @@ clean:
|
|||
|
||||
.PHONY:
|
||||
build lint build-moore
|
||||
|
||||
# make CC=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/gcc CXX=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/g++ -j20
|
||||
|
|
|
@ -3,5 +3,5 @@
|
|||
cd output && make
|
||||
cd ../..
|
||||
# start the simulation
|
||||
vsim-10.6 -c -lib work core_tb_optimized +UVM_TESTNAME=core_test $2 +BASEDIR=riscv-torture $1 +ASMTEST=$3 +UVM_VERBOSITY=LOW -coverage -classdebug -do "run -a"
|
||||
vsim-10.6b -c -lib work core_tb_optimized +UVM_TESTNAME=core_test $2 +BASEDIR=riscv-torture $1 +ASMTEST=$3 +UVM_VERBOSITY=LOW -coverage -classdebug -do "run -a"
|
||||
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
/dts-v1/;
|
||||
|
||||
/ {
|
||||
#address-cells = <2>;
|
||||
#size-cells = <2>;
|
||||
compatible = "ucbbar,spike-bare-dev";
|
||||
model = "ucbbar,spike-bare";
|
||||
cpus {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
timebase-frequency = <10000000>;
|
||||
CPU0: cpu@0 {
|
||||
device_type = "cpu";
|
||||
reg = <0>;
|
||||
status = "okay";
|
||||
compatible = "riscv";
|
||||
riscv,isa = "rv64ic";
|
||||
mmu-type = "riscv,sv39";
|
||||
clock-frequency = <1000000000>;
|
||||
CPU0_intc: interrupt-controller {
|
||||
#interrupt-cells = <1>;
|
||||
interrupt-controller;
|
||||
compatible = "riscv,cpu-intc";
|
||||
};
|
||||
};
|
||||
};
|
||||
memory@80000000 {
|
||||
device_type = "memory";
|
||||
reg = <0x0 0x80000000 0x0 0x1000000>;
|
||||
};
|
||||
soc {
|
||||
#address-cells = <2>;
|
||||
#size-cells = <2>;
|
||||
compatible = "ucbbar,spike-bare-soc", "simple-bus";
|
||||
ranges;
|
||||
clint@2000000 {
|
||||
compatible = "riscv,clint0";
|
||||
interrupts-extended = <&CPU0_intc 3 &CPU0_intc 7 >;
|
||||
reg = <0x0 0x2000000 0x0 0xc0000>;
|
||||
};
|
||||
uart@3000000 {
|
||||
compatible = "sifive,uart0";
|
||||
reg = <0x0 0x3000000 0x0 0x3000000>;
|
||||
};
|
||||
};
|
||||
};
|
|
@ -43,7 +43,7 @@ $(asm_tests_bin): %: %.S $(extra_files)
|
|||
$(RISCV_GCC) $(RISCV_GCC_OPTS) -I../riscv-torture/env/p -T../riscv-torture/env/p/link.ld $< -o $@
|
||||
|
||||
$(asm_tests_hex): %.hex: % $(extra_files)
|
||||
elf2hex 8 16384 $< 2147483648 > $@
|
||||
elf2hex 8 16384 $< 1073741824 > $@
|
||||
|
||||
$(asm_tests_sig): %.sig: %
|
||||
$(RISCV_SIM) +signature=$@ $<
|
||||
|
|
|
@ -12,6 +12,9 @@
|
|||
|
||||
|
||||
package ariane_pkg;
|
||||
timeunit 1ns;
|
||||
timeprecision 1ps;
|
||||
|
||||
// ---------------
|
||||
// Global Config
|
||||
// ---------------
|
||||
|
@ -78,7 +81,7 @@ package ariane_pkg;
|
|||
// ---------------
|
||||
// EX Stage
|
||||
// ---------------
|
||||
typedef enum logic [5:0] { // basic ALU op
|
||||
typedef enum logic [6:0] { // basic ALU op
|
||||
ADD, SUB, ADDW, SUBW,
|
||||
// logic operations
|
||||
XORL, ORL, ANDL,
|
||||
|
@ -94,12 +97,30 @@ package ariane_pkg;
|
|||
MRET, SRET, ECALL, WFI, FENCE, FENCE_I, SFENCE_VMA, CSR_WRITE, CSR_READ, CSR_SET, CSR_CLEAR,
|
||||
// LSU functions
|
||||
LD, SD, LW, LWU, SW, LH, LHU, SH, LB, SB, LBU,
|
||||
// Atomic Memory Operations
|
||||
AMO_LRW, AMO_LRD, AMO_SCW, AMO_SCD,
|
||||
AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW, AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW, AMO_MINWU,
|
||||
AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD, AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND, AMO_MINDU,
|
||||
// Multiplications
|
||||
MUL, MULH, MULHU, MULHSU, MULW,
|
||||
// Divisions
|
||||
DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW
|
||||
} fu_op;
|
||||
|
||||
// ----------------------
|
||||
// Extract Bytes from Op
|
||||
// ----------------------
|
||||
// TODO: Add atomics
|
||||
function automatic logic [1:0] extract_transfer_size (fu_op op);
|
||||
case (op)
|
||||
LD, SD: return 2'b11;
|
||||
LW, LWU, SW: return 2'b10;
|
||||
LH, LHU, SH: return 2'b01;
|
||||
LB, SB, LBU: return 2'b00;
|
||||
default: return 2'b11;
|
||||
endcase
|
||||
endfunction
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [63:0] vaddr;
|
||||
|
@ -202,6 +223,14 @@ package ariane_pkg;
|
|||
localparam OPCODE_JAL = 7'h6f;
|
||||
localparam OPCODE_AUIPC = 7'h17;
|
||||
localparam OPCODE_LUI = 7'h37;
|
||||
localparam OPCODE_AMO = 7'h2F;
|
||||
// --------------------
|
||||
// Atomics
|
||||
// --------------------
|
||||
|
||||
typedef enum logic [3:0] {
|
||||
AMO_NONE, AMO_LR, AMO_SC, AMO_SWAP, AMO_ADD, AMO_AND, AMO_OR, AMO_XOR, AMO_MAX, AMO_MAXU, AMO_MIN, AMO_MINU
|
||||
} amo_t;
|
||||
|
||||
// --------------------
|
||||
// Privilege Spec
|
||||
|
@ -255,43 +284,75 @@ package ariane_pkg;
|
|||
localparam logic [63:0] M_TIMER_INTERRUPT = (1 << 63) | 7;
|
||||
localparam logic [63:0] S_EXT_INTERRUPT = (1 << 63) | 9;
|
||||
localparam logic [63:0] M_EXT_INTERRUPT = (1 << 63) | 11;
|
||||
|
||||
// ----------------------
|
||||
// Performance Counters
|
||||
// ----------------------
|
||||
localparam logic [11:0] PERF_L1_ICACHE_MISS = 12'h0; // L1 Instr Cache Miss
|
||||
localparam logic [11:0] PERF_L1_DCACHE_MISS = 12'h1; // L1 Data Cache Miss
|
||||
localparam logic [11:0] PERF_ITLB_MISS = 12'h2; // ITLB Miss
|
||||
localparam logic [11:0] PERF_DTLB_MISS = 12'h3; // DTLB Miss
|
||||
localparam logic [11:0] PERF_LOAD = 12'h4; // Loads
|
||||
localparam logic [11:0] PERF_STORE = 12'h5; // Stores
|
||||
localparam logic [11:0] PERF_EXCEPTION = 12'h6; // Taken exceptions
|
||||
localparam logic [11:0] PERF_EXCEPTION_RET = 12'h7; // Exception return
|
||||
localparam logic [11:0] PERF_BRANCH_JUMP = 12'h8; // Software change of PC
|
||||
localparam logic [11:0] PERF_CALL = 12'h9; // Procedure call
|
||||
localparam logic [11:0] PERF_RET = 12'hA; // Procedure Return
|
||||
localparam logic [11:0] PERF_MIS_PREDICT = 12'hB; // Branch mis-predicted
|
||||
|
||||
// -----
|
||||
// CSRs
|
||||
// -----
|
||||
typedef enum logic [11:0] {
|
||||
CSR_SSTATUS = 12'h100,
|
||||
CSR_SIE = 12'h104,
|
||||
CSR_STVEC = 12'h105,
|
||||
CSR_SCOUNTEREN = 12'h106,
|
||||
CSR_SSCRATCH = 12'h140,
|
||||
CSR_SEPC = 12'h141,
|
||||
CSR_SCAUSE = 12'h142,
|
||||
CSR_STVAL = 12'h143,
|
||||
CSR_SIP = 12'h144,
|
||||
CSR_SATP = 12'h180,
|
||||
|
||||
CSR_MSTATUS = 12'h300,
|
||||
CSR_MISA = 12'h301,
|
||||
CSR_MEDELEG = 12'h302,
|
||||
CSR_MIDELEG = 12'h303,
|
||||
CSR_MIE = 12'h304,
|
||||
CSR_MTVEC = 12'h305,
|
||||
CSR_MCOUNTEREN = 12'h306,
|
||||
CSR_MSCRATCH = 12'h340,
|
||||
CSR_MEPC = 12'h341,
|
||||
CSR_MCAUSE = 12'h342,
|
||||
CSR_MTVAL = 12'h343,
|
||||
CSR_MIP = 12'h344,
|
||||
CSR_MVENDORID = 12'hF11,
|
||||
CSR_MARCHID = 12'hF12,
|
||||
CSR_MIMPID = 12'hF13,
|
||||
CSR_MHARTID = 12'hF14,
|
||||
CSR_MCYCLE = 12'hB00,
|
||||
CSR_MINSTRET = 12'hB02,
|
||||
// Supervisor Mode CSRs
|
||||
CSR_SSTATUS = 12'h100,
|
||||
CSR_SIE = 12'h104,
|
||||
CSR_STVEC = 12'h105,
|
||||
CSR_SCOUNTEREN = 12'h106,
|
||||
CSR_SSCRATCH = 12'h140,
|
||||
CSR_SEPC = 12'h141,
|
||||
CSR_SCAUSE = 12'h142,
|
||||
CSR_STVAL = 12'h143,
|
||||
CSR_SIP = 12'h144,
|
||||
CSR_SATP = 12'h180,
|
||||
// Machine Mode CSRs
|
||||
CSR_MSTATUS = 12'h300,
|
||||
CSR_MISA = 12'h301,
|
||||
CSR_MEDELEG = 12'h302,
|
||||
CSR_MIDELEG = 12'h303,
|
||||
CSR_MIE = 12'h304,
|
||||
CSR_MTVEC = 12'h305,
|
||||
CSR_MCOUNTEREN = 12'h306,
|
||||
CSR_MSCRATCH = 12'h340,
|
||||
CSR_MEPC = 12'h341,
|
||||
CSR_MCAUSE = 12'h342,
|
||||
CSR_MTVAL = 12'h343,
|
||||
CSR_MIP = 12'h344,
|
||||
CSR_MVENDORID = 12'hF11,
|
||||
CSR_MARCHID = 12'hF12,
|
||||
CSR_MIMPID = 12'hF13,
|
||||
CSR_MHARTID = 12'hF14,
|
||||
CSR_MCYCLE = 12'hB00,
|
||||
CSR_MINSTRET = 12'hB02,
|
||||
CSR_DCACHE = 12'h700,
|
||||
// Counters and Timers
|
||||
CSR_CYCLE = 12'hC00,
|
||||
CSR_TIME = 12'hC01,
|
||||
CSR_INSTRET = 12'hC02
|
||||
CSR_CYCLE = 12'hC00,
|
||||
CSR_TIME = 12'hC01,
|
||||
CSR_INSTRET = 12'hC02,
|
||||
// Performance counters
|
||||
CSR_L1_ICACHE_MISS = PERF_L1_ICACHE_MISS + 12'hC03,
|
||||
CSR_L1_DCACHE_MISS = PERF_L1_DCACHE_MISS + 12'hC03,
|
||||
CSR_ITLB_MISS = PERF_ITLB_MISS + 12'hC03,
|
||||
CSR_DTLB_MISS = PERF_DTLB_MISS + 12'hC03,
|
||||
CSR_LOAD = PERF_LOAD + 12'hC03,
|
||||
CSR_STORE = PERF_STORE + 12'hC03,
|
||||
CSR_EXCEPTION = PERF_EXCEPTION + 12'hC03,
|
||||
CSR_EXCEPTION_RET = PERF_EXCEPTION_RET + 12'hC03,
|
||||
CSR_BRANCH_JUMP = PERF_BRANCH_JUMP + 12'hC03,
|
||||
CSR_CALL = PERF_CALL + 12'hC03,
|
||||
CSR_RET = PERF_RET + 12'hC03,
|
||||
CSR_MIS_PREDICT = PERF_MIS_PREDICT + 12'hC03
|
||||
} csr_reg_t;
|
||||
|
||||
// decoded CSR address
|
||||
|
@ -309,7 +370,6 @@ package ariane_pkg;
|
|||
// ----------------------
|
||||
// Debug Unit
|
||||
// ----------------------
|
||||
|
||||
typedef enum logic [15:0] {
|
||||
DBG_CTRL = 16'h0,
|
||||
DBG_HIT = 16'h8,
|
||||
|
@ -348,4 +408,10 @@ package ariane_pkg;
|
|||
DBG_CSR_M1 = 16'hF???
|
||||
} debug_reg_t;
|
||||
|
||||
// ----------------------
|
||||
// Arithmetic Functions
|
||||
// ----------------------
|
||||
function automatic logic [63:0] sext32 (logic [31:0] operand);
|
||||
return {{32{operand[31]}}, operand[31:0]};
|
||||
endfunction
|
||||
endpackage
|
||||
|
|
|
@ -38,13 +38,11 @@ interface mem_if
|
|||
`ifndef VERILATOR
|
||||
`ifndef SYNTHESIS
|
||||
clocking mck @(posedge clk);
|
||||
default input #1ns output #1ns;
|
||||
input address, data_wdata, data_we, data_req, data_be;
|
||||
output data_rvalid, data_rdata, data_gnt;
|
||||
endclocking
|
||||
// Memory interface configured as slave
|
||||
clocking sck @(posedge clk);
|
||||
default input #1ns output #1ns;
|
||||
output address, data_wdata, data_we, data_req, data_be;
|
||||
input data_rvalid, data_rdata, data_gnt;
|
||||
endclocking
|
||||
|
@ -79,4 +77,4 @@ interface mem_if
|
|||
// modport Passive (clocking pck);
|
||||
|
||||
endinterface
|
||||
`endif
|
||||
`endif
|
||||
|
|
80
include/nbdcache_pkg.sv
Normal file
80
include/nbdcache_pkg.sv
Normal file
|
@ -0,0 +1,80 @@
|
|||
/* File: nbdcache_pkh.sv
|
||||
* Author: Florian Zaruba <zarubaf@ethz.ch>
|
||||
* Date: 13.10.2017
|
||||
*
|
||||
* Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
* All rights reserved.
|
||||
*
|
||||
* Description: Contains all the necessary defines for the non-block DCache
|
||||
* of Ariane in one package.
|
||||
*/
|
||||
|
||||
package nbdcache_pkg;
|
||||
|
||||
localparam int unsigned INDEX_WIDTH = 12;
|
||||
localparam int unsigned TAG_WIDTH = 44;
|
||||
localparam int unsigned CACHE_LINE_WIDTH = 128;
|
||||
localparam int unsigned SET_ASSOCIATIVITY = 8;
|
||||
localparam int unsigned NR_MSHR = 1;
|
||||
|
||||
// Calculated parameter
|
||||
localparam BYTE_OFFSET = $clog2(CACHE_LINE_WIDTH/8);
|
||||
localparam NUM_WORDS = 2**(INDEX_WIDTH-BYTE_OFFSET);
|
||||
localparam DIRTY_WIDTH = SET_ASSOCIATIVITY*2;
|
||||
// localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not
|
||||
|
||||
typedef enum logic { SINGLE_REQ, CACHE_LINE_REQ } req_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [1:0] id; // id for which we handle the miss
|
||||
logic valid;
|
||||
logic we;
|
||||
logic [55:0] addr;
|
||||
logic [7:0][7:0] wdata;
|
||||
logic [7:0] be;
|
||||
} mshr_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [63:0] addr;
|
||||
logic [7:0] be;
|
||||
logic [1:0] size;
|
||||
logic we;
|
||||
logic [63:0] wdata;
|
||||
logic bypass;
|
||||
} miss_req_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic [TAG_WIDTH-1:0] tag; // tag array
|
||||
logic [CACHE_LINE_WIDTH-1:0] data; // data array
|
||||
logic valid; // state array
|
||||
logic dirty; // state array
|
||||
} cache_line_t;
|
||||
|
||||
// cache line byte enable
|
||||
typedef struct packed {
|
||||
logic [TAG_WIDTH-1:0] tag; // byte enable into tag array
|
||||
logic [CACHE_LINE_WIDTH-1:0] data; // byte enable into data array
|
||||
logic [DIRTY_WIDTH/2-1:0] dirty; // byte enable into state array
|
||||
logic [DIRTY_WIDTH/2-1:0] valid; // byte enable into state array
|
||||
} cl_be_t;
|
||||
|
||||
// convert one hot to bin for -> needed for cache replacement
|
||||
function automatic logic [$clog2(SET_ASSOCIATIVITY)-1:0] one_hot_to_bin (input logic [SET_ASSOCIATIVITY-1:0] in);
|
||||
for (int unsigned i = 0; i < SET_ASSOCIATIVITY; i++) begin
|
||||
if (in[i])
|
||||
return i;
|
||||
end
|
||||
endfunction
|
||||
// get the first bit set, returns one hot value
|
||||
function automatic logic [SET_ASSOCIATIVITY-1:0] get_victim_cl (input logic [SET_ASSOCIATIVITY-1:0] valid_dirty);
|
||||
// one-hot return vector
|
||||
logic [SET_ASSOCIATIVITY-1:0] oh = '0;
|
||||
for (int unsigned i = 0; i < SET_ASSOCIATIVITY; i++) begin
|
||||
if (valid_dirty[i]) begin
|
||||
oh[i] = 1'b1;
|
||||
return oh;
|
||||
end
|
||||
end
|
||||
endfunction
|
||||
endpackage
|
|
@ -1 +1 @@
|
|||
Subproject commit 120d3d7e50209b617785d2f4637ba75ae603cfe2
|
||||
Subproject commit f32ccd65cab47a024a83fee8f414390e40051677
|
138
src/ariane.sv
138
src/ariane.sv
|
@ -25,23 +25,20 @@ import instruction_tracer_pkg::*;
|
|||
`endif
|
||||
`endif
|
||||
|
||||
|
||||
module ariane
|
||||
#(
|
||||
parameter N_EXT_PERF_COUNTERS = 0
|
||||
)
|
||||
(
|
||||
module ariane #(
|
||||
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000, // address on which to decide whether the request is cache-able or not
|
||||
parameter int unsigned AXI_ID_WIDTH = 10,
|
||||
parameter int unsigned AXI_USER_WIDTH = 1
|
||||
)(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic test_en_i, // enable all clock gates for testing
|
||||
|
||||
output logic flush_icache_o, // request to flush icache
|
||||
output logic flush_dcache_o, // request to flush the dcache
|
||||
input logic flush_dcache_ack_i, // dcache flushed successfully
|
||||
// CPU Control Signals
|
||||
input logic fetch_enable_i,
|
||||
output logic core_busy_o,
|
||||
input logic [N_EXT_PERF_COUNTERS-1:0] ext_perf_counters_i,
|
||||
input logic l1_icache_miss_i,
|
||||
|
||||
// Core ID, Cluster ID and boot address are considered more or less static
|
||||
input logic [63:0] boot_addr_i,
|
||||
|
@ -55,19 +52,11 @@ module ariane
|
|||
input logic instr_if_data_rvalid_i,
|
||||
input logic [63:0] instr_if_data_rdata_i,
|
||||
// Data memory interface
|
||||
output logic [11:0] data_if_address_index_o,
|
||||
output logic [43:0] data_if_address_tag_o,
|
||||
output logic [63:0] data_if_data_wdata_o,
|
||||
output logic data_if_data_req_o,
|
||||
output logic data_if_data_we_o,
|
||||
output logic [7:0] data_if_data_be_o,
|
||||
output logic data_if_kill_req_o,
|
||||
output logic data_if_tag_valid_o,
|
||||
input logic data_if_data_gnt_i,
|
||||
input logic data_if_data_rvalid_i,
|
||||
input logic [63:0] data_if_data_rdata_i,
|
||||
AXI_BUS.Master data_if,
|
||||
AXI_BUS.Master bypass_if,
|
||||
// Interrupt inputs
|
||||
input logic irq_i, // level sensitive IR lines
|
||||
input logic [1:0] irq_i, // level sensitive IR lines, mip & sip
|
||||
input logic ipi_i, // inter-processor interrupts
|
||||
input logic [4:0] irq_id_i,
|
||||
output logic irq_ack_o,
|
||||
input logic irq_sec_i,
|
||||
|
@ -226,6 +215,17 @@ module ariane
|
|||
logic tvm_csr_id;
|
||||
logic tw_csr_id;
|
||||
logic tsr_csr_id;
|
||||
logic dcache_en_csr_nbdcache;
|
||||
// ----------------------------
|
||||
// Performance Counters <-> *
|
||||
// ----------------------------
|
||||
logic [11:0] addr_csr_perf;
|
||||
logic [63:0] data_csr_perf, data_perf_csr;
|
||||
logic we_csr_perf;
|
||||
|
||||
logic itlb_miss_ex_perf;
|
||||
logic dtlb_miss_ex_perf;
|
||||
logic dcache_miss_ex_perf;
|
||||
// --------------
|
||||
// CTRL <-> *
|
||||
// --------------
|
||||
|
@ -243,6 +243,8 @@ module ariane
|
|||
logic halt_ctrl;
|
||||
logic halt_debug_ctrl;
|
||||
logic halt_csr_ctrl;
|
||||
logic flush_dcache_ctrl_ex;
|
||||
logic flush_dcache_ack_ex_ctrl;
|
||||
// --------------
|
||||
// Debug <-> *
|
||||
// --------------
|
||||
|
@ -394,13 +396,18 @@ module ariane
|
|||
|
||||
.commit_instr_o ( commit_instr_id_commit ),
|
||||
.commit_ack_i ( commit_ack ),
|
||||
|
||||
.*
|
||||
);
|
||||
|
||||
// ---------
|
||||
// EX
|
||||
// ---------
|
||||
ex_stage ex_stage_i (
|
||||
ex_stage #(
|
||||
.CACHE_START_ADDR ( CACHE_START_ADDR ),
|
||||
.AXI_ID_WIDTH ( AXI_ID_WIDTH ),
|
||||
.AXI_USER_WIDTH ( AXI_USER_WIDTH )
|
||||
) ex_stage_i (
|
||||
.flush_i ( flush_ctrl_ex ),
|
||||
.fu_i ( fu_id_ex ),
|
||||
.operator_i ( operator_id_ex ),
|
||||
|
@ -437,7 +444,6 @@ module ariane
|
|||
.lsu_commit_ready_o ( lsu_commit_ready_ex_commit ), // to commit
|
||||
.lsu_exception_o ( lsu_exception_ex_id ),
|
||||
.no_st_pending_o ( no_st_pending_ex_commit ),
|
||||
|
||||
// CSR
|
||||
.csr_ready_o ( csr_ready_ex_id ),
|
||||
.csr_valid_i ( csr_valid_id_ex ),
|
||||
|
@ -446,6 +452,10 @@ module ariane
|
|||
.csr_valid_o ( csr_valid_ex_id ),
|
||||
.csr_addr_o ( csr_addr_ex_csr ),
|
||||
.csr_commit_i ( csr_commit_commit_ex ), // from commit
|
||||
// Performance counters
|
||||
.itlb_miss_o ( itlb_miss_ex_perf ),
|
||||
.dtlb_miss_o ( dtlb_miss_ex_perf ),
|
||||
.dcache_miss_o ( dcache_miss_ex_perf ),
|
||||
// Memory Management
|
||||
.enable_translation_i ( enable_translation_csr_ex ), // from CSR
|
||||
.en_ld_st_translation_i ( en_ld_st_translation_csr_ex ),
|
||||
|
@ -468,6 +478,12 @@ module ariane
|
|||
.mult_trans_id_o ( mult_trans_id_ex_id ),
|
||||
.mult_result_o ( mult_result_ex_id ),
|
||||
.mult_valid_o ( mult_valid_ex_id ),
|
||||
|
||||
.data_if ( data_if ),
|
||||
.dcache_en_i ( dcache_en_csr_nbdcache ),
|
||||
.flush_dcache_i ( flush_dcache_ctrl_ex ),
|
||||
.flush_dcache_ack_o ( flush_dcache_ack_ex_ctrl ),
|
||||
|
||||
.*
|
||||
);
|
||||
|
||||
|
@ -533,9 +549,35 @@ module ariane
|
|||
.tvm_o ( tvm_csr_id ),
|
||||
.tw_o ( tw_csr_id ),
|
||||
.tsr_o ( tsr_csr_id ),
|
||||
.dcache_en_o ( dcache_en_csr_nbdcache ),
|
||||
.perf_addr_o ( addr_csr_perf ),
|
||||
.perf_data_o ( data_csr_perf ),
|
||||
.perf_data_i ( data_perf_csr ),
|
||||
.perf_we_o ( we_csr_perf ),
|
||||
.*
|
||||
);
|
||||
|
||||
|
||||
// ------------------------
|
||||
// Performance Counters
|
||||
// ------------------------
|
||||
perf_counters i_perf_counters (
|
||||
.addr_i ( addr_csr_perf ),
|
||||
.we_i ( we_csr_perf ),
|
||||
.data_i ( data_csr_perf ),
|
||||
.data_o ( data_perf_csr ),
|
||||
.commit_instr_i ( commit_instr_id_commit ),
|
||||
.commit_ack_o ( commit_ack ),
|
||||
|
||||
.l1_dcache_miss_i ( dcache_miss_ex_perf ),
|
||||
.itlb_miss_i ( itlb_miss_ex_perf ),
|
||||
.dtlb_miss_i ( dtlb_miss_ex_perf ),
|
||||
|
||||
.ex_i ( ex_commit ),
|
||||
.eret_i ( eret ),
|
||||
.resolved_branch_i ( resolved_branch ),
|
||||
.*
|
||||
);
|
||||
// ------------
|
||||
// Controller
|
||||
// ------------
|
||||
|
@ -548,6 +590,8 @@ module ariane
|
|||
.flush_id_o ( flush_ctrl_id ),
|
||||
.flush_ex_o ( flush_ctrl_ex ),
|
||||
.flush_tlb_o ( flush_tlb_ctrl_ex ),
|
||||
.flush_dcache_o ( flush_dcache_ctrl_ex ),
|
||||
.flush_dcache_ack_i ( flush_dcache_ack_ex_ctrl ),
|
||||
|
||||
.halt_csr_i ( halt_csr_ctrl ),
|
||||
.halt_debug_i ( halt_debug_ctrl ),
|
||||
|
@ -620,32 +664,18 @@ module ariane
|
|||
assign tracer_if.commit_ack = commit_ack;
|
||||
// address translation
|
||||
// stores
|
||||
assign tracer_if.st_valid = ex_stage_i.lsu_i.store_unit_i.store_buffer_i.valid_i;
|
||||
assign tracer_if.st_paddr = ex_stage_i.lsu_i.store_unit_i.store_buffer_i.paddr_i;
|
||||
assign tracer_if.st_valid = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.valid_i;
|
||||
assign tracer_if.st_paddr = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.paddr_i;
|
||||
// loads
|
||||
assign tracer_if.ld_valid = ex_stage_i.lsu_i.load_unit_i.tag_valid_o;
|
||||
assign tracer_if.ld_kill = ex_stage_i.lsu_i.load_unit_i.kill_req_o;
|
||||
assign tracer_if.ld_paddr = ex_stage_i.lsu_i.load_unit_i.paddr_i;
|
||||
assign tracer_if.ld_valid = ex_stage_i.lsu_i.i_load_unit.tag_valid_o;
|
||||
assign tracer_if.ld_kill = ex_stage_i.lsu_i.i_load_unit.kill_req_o;
|
||||
assign tracer_if.ld_paddr = ex_stage_i.lsu_i.i_load_unit.paddr_i;
|
||||
// exceptions
|
||||
assign tracer_if.exception = commit_stage_i.exception_o;
|
||||
// assign current privilege level
|
||||
assign tracer_if.priv_lvl = priv_lvl;
|
||||
|
||||
program instr_tracer (instruction_tracer_if tracer_if);
|
||||
instruction_tracer it = new (tracer_if, 1'b0);
|
||||
|
||||
initial begin
|
||||
#15ns;
|
||||
it.create_file(cluster_id_i, core_id_i);
|
||||
it.trace();
|
||||
end
|
||||
|
||||
final begin
|
||||
it.close();
|
||||
end
|
||||
endprogram
|
||||
|
||||
instr_tracer instr_tracer_i (tracer_if);
|
||||
instr_tracer instr_tracer_i (tracer_if, cluster_id_i, core_id_i);
|
||||
`endif
|
||||
`endif
|
||||
|
||||
|
@ -658,3 +688,25 @@ module ariane
|
|||
end
|
||||
|
||||
endmodule // ariane
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
program instr_tracer
|
||||
(
|
||||
instruction_tracer_if tracer_if,
|
||||
input logic [5:0] cluster_id_i,
|
||||
input logic [3:0] core_id_i
|
||||
);
|
||||
|
||||
instruction_tracer it = new (tracer_if, 1'b0);
|
||||
|
||||
initial begin
|
||||
#15ns;
|
||||
it.create_file(cluster_id_i, core_id_i);
|
||||
it.trace();
|
||||
end
|
||||
|
||||
final begin
|
||||
it.close();
|
||||
end
|
||||
endprogram
|
||||
`endif
|
||||
|
|
1
src/axi2per
Submodule
1
src/axi2per
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 04753ab7ac05f0c227599749e97bdad24ebdfc4d
|
1
src/axi_mem_if
Submodule
1
src/axi_mem_if
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit dbf1f38dd677614394e8e0722c23463ac77176b5
|
1
src/axi_node
Submodule
1
src/axi_node
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 18d1fe362cac76496e0a2f9447d7a26cb3445efa
|
1
src/axi_slice
Submodule
1
src/axi_slice
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 940ab2b25c0d189a333343641b2e6e82ef227974
|
|
@ -71,7 +71,8 @@ module branch_unit (
|
|||
// here we handle the various possibilities of mis-predicts
|
||||
always_comb begin : mispredict_handler
|
||||
// set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC
|
||||
automatic logic [63:0] jump_base = (operator_i == JALR) ? operand_a_i : pc_i;
|
||||
automatic logic [63:0] jump_base;
|
||||
jump_base = (operator_i == JALR) ? operand_a_i : pc_i;
|
||||
|
||||
target_address = 64'b0;
|
||||
resolved_branch_o.target_address = 64'b0;
|
||||
|
|
428
src/cache_ctrl.sv
Normal file
428
src/cache_ctrl.sv
Normal file
|
@ -0,0 +1,428 @@
|
|||
/* File: cache_ctrl.svh
|
||||
* Author: Florian Zaruba <zarubaf@ethz.ch>
|
||||
* Date: 14.10.2017
|
||||
*
|
||||
* Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
* All rights reserved.
|
||||
*
|
||||
* Description: Cache controller
|
||||
*/
|
||||
|
||||
import ariane_pkg::*;
|
||||
import nbdcache_pkg::*;
|
||||
|
||||
module cache_ctrl #(
|
||||
parameter int unsigned SET_ASSOCIATIVITY = 8,
|
||||
parameter int unsigned INDEX_WIDTH = 12,
|
||||
parameter int unsigned TAG_WIDTH = 44,
|
||||
parameter int unsigned CACHE_LINE_WIDTH = 100,
|
||||
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000
|
||||
)(
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic bypass_i, // enable cache
|
||||
output logic busy_o,
|
||||
// Core request ports
|
||||
input logic [INDEX_WIDTH-1:0] address_index_i,
|
||||
input logic [TAG_WIDTH-1:0] address_tag_i,
|
||||
input logic [63:0] data_wdata_i,
|
||||
input logic data_req_i,
|
||||
input logic data_we_i,
|
||||
input logic [7:0] data_be_i,
|
||||
input logic [1:0] data_size_i,
|
||||
input logic kill_req_i,
|
||||
input logic tag_valid_i,
|
||||
output logic data_gnt_o,
|
||||
output logic data_rvalid_o,
|
||||
output logic [63:0] data_rdata_o,
|
||||
input amo_t amo_op_i,
|
||||
// SRAM interface
|
||||
output logic [SET_ASSOCIATIVITY-1:0] req_o, // req is valid
|
||||
output logic [INDEX_WIDTH-1:0] addr_o, // address into cache array
|
||||
input logic gnt_i,
|
||||
output cache_line_t data_o,
|
||||
output cl_be_t be_o,
|
||||
output logic [TAG_WIDTH-1:0] tag_o, //valid one cycle later
|
||||
input cache_line_t [SET_ASSOCIATIVITY-1:0] data_i,
|
||||
output logic we_o,
|
||||
input logic [SET_ASSOCIATIVITY-1:0] hit_way_i,
|
||||
// Miss handling
|
||||
output miss_req_t miss_req_o,
|
||||
// return
|
||||
input logic miss_gnt_i,
|
||||
input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss
|
||||
input logic [63:0] critical_word_i,
|
||||
input logic critical_word_valid_i,
|
||||
|
||||
input logic bypass_gnt_i,
|
||||
input logic bypass_valid_i,
|
||||
input logic [63:0] bypass_data_i,
|
||||
// check MSHR for aliasing
|
||||
output logic [55:0] mshr_addr_o,
|
||||
input logic mshr_addr_matches_i
|
||||
);
|
||||
|
||||
enum logic [3:0] {
|
||||
IDLE, WAIT_TAG, WAIT_TAG_BYPASSED, STORE_REQ, WAIT_REFILL_VALID, WAIT_REFILL_GNT, WAIT_TAG_SAVED, WAIT_MSHR, WAIT_CRITICAL_WORD
|
||||
} state_d, state_q;
|
||||
|
||||
typedef struct packed {
|
||||
logic [INDEX_WIDTH-1:0] index;
|
||||
logic [TAG_WIDTH-1:0] tag;
|
||||
logic [7:0] be;
|
||||
logic [1:0] size;
|
||||
logic we;
|
||||
logic [63:0] wdata;
|
||||
logic bypass;
|
||||
} mem_req_t;
|
||||
|
||||
logic [SET_ASSOCIATIVITY-1:0] hit_way_d, hit_way_q;
|
||||
|
||||
assign busy_o = (state_q != IDLE);
|
||||
|
||||
mem_req_t mem_req_d, mem_req_q;
|
||||
|
||||
logic [CACHE_LINE_WIDTH-1:0] cl_i;
|
||||
|
||||
always_comb begin : way_select
|
||||
cl_i = '0;
|
||||
for (int unsigned i = 0; i < SET_ASSOCIATIVITY; i++)
|
||||
if (hit_way_i[i])
|
||||
cl_i = data_i[i].data;
|
||||
|
||||
// cl_i = data_i[one_hot_to_bin(hit_way_i)].data;
|
||||
end
|
||||
|
||||
// --------------
|
||||
// Cache FSM
|
||||
// --------------
|
||||
always_comb begin : cache_ctrl_fsm
|
||||
automatic logic [$clog2(CACHE_LINE_WIDTH)-1:0] cl_offset;
|
||||
// incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
|
||||
// cache-line offset -> multiple of 64
|
||||
cl_offset = mem_req_q.index[BYTE_OFFSET-1:3] << 6; // shift by 6 to the left
|
||||
|
||||
// default assignments
|
||||
state_d = state_q;
|
||||
mem_req_d = mem_req_q;
|
||||
hit_way_d = hit_way_q;
|
||||
|
||||
// output assignments
|
||||
data_gnt_o = 1'b0;
|
||||
data_rvalid_o = 1'b0;
|
||||
data_rdata_o = '0;
|
||||
miss_req_o = '0;
|
||||
mshr_addr_o = '0;
|
||||
// Memory array communication
|
||||
req_o = '0;
|
||||
addr_o = address_index_i;
|
||||
data_o = '0;
|
||||
be_o = '0;
|
||||
tag_o = '0;
|
||||
we_o = '0;
|
||||
tag_o = 'b0;
|
||||
|
||||
case (state_q)
|
||||
|
||||
IDLE: begin
|
||||
// a new request arrived
|
||||
if (data_req_i) begin
|
||||
// request the cache line - we can do this specualtive
|
||||
req_o = '1;
|
||||
|
||||
// save index, be and we
|
||||
mem_req_d.index = address_index_i;
|
||||
mem_req_d.tag = address_tag_i;
|
||||
mem_req_d.be = data_be_i;
|
||||
mem_req_d.size = data_size_i;
|
||||
mem_req_d.we = data_we_i;
|
||||
mem_req_d.wdata = data_wdata_i;
|
||||
|
||||
// Bypass mode, check for uncacheable address here as well
|
||||
if (bypass_i) begin
|
||||
state_d = WAIT_TAG_BYPASSED;
|
||||
// grant this access
|
||||
data_gnt_o = 1'b1;
|
||||
mem_req_d.bypass = 1'b1;
|
||||
// ------------------
|
||||
// Cache is enabled
|
||||
// ------------------
|
||||
end else begin
|
||||
// Wait that we have access on the memory array
|
||||
if (gnt_i) begin
|
||||
state_d = WAIT_TAG;
|
||||
mem_req_d.bypass = 1'b0;
|
||||
// only for a read
|
||||
if (!data_we_i)
|
||||
data_gnt_o = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// cache enabled and waiting for tag
|
||||
WAIT_TAG, WAIT_TAG_SAVED: begin
|
||||
// depending on where we come from
|
||||
// For the store case the tag comes in the same cycle
|
||||
tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag : address_tag_i;
|
||||
|
||||
// we speculatively request another transfer
|
||||
if (data_req_i) begin
|
||||
req_o = '1;
|
||||
end
|
||||
|
||||
// check that the client really wants to do the request
|
||||
if (!kill_req_i) begin
|
||||
// ------------
|
||||
// HIT CASE
|
||||
// ------------
|
||||
if (|hit_way_i) begin
|
||||
// we can request another cache-line if this was a load
|
||||
// make another request
|
||||
if (data_req_i && !mem_req_q.we) begin
|
||||
state_d = WAIT_TAG; // switch back to WAIT_TAG
|
||||
mem_req_d.index = address_index_i;
|
||||
mem_req_d.be = data_be_i;
|
||||
mem_req_d.size = data_size_i;
|
||||
mem_req_d.we = data_we_i;
|
||||
mem_req_d.wdata = data_wdata_i;
|
||||
mem_req_d.tag = address_tag_i;
|
||||
mem_req_d.bypass = 1'b0;
|
||||
data_gnt_o = gnt_i;
|
||||
|
||||
if (!gnt_i) begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
|
||||
end else begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
|
||||
// this is timing critical
|
||||
// data_rdata_o = cl_i[cl_offset +: 64];
|
||||
case (mem_req_q.index[3])
|
||||
1'b0: data_rdata_o = cl_i[63:0];
|
||||
1'b1: data_rdata_o = cl_i[127:64];
|
||||
endcase
|
||||
|
||||
// report data for a read
|
||||
if (!mem_req_q.we) begin
|
||||
data_rvalid_o = 1'b1;
|
||||
|
||||
// else this was a store so we need an extra step to handle it
|
||||
end else begin
|
||||
state_d = STORE_REQ;
|
||||
hit_way_d = hit_way_i;
|
||||
end
|
||||
// ------------
|
||||
// MISS CASE
|
||||
// ------------
|
||||
end else begin
|
||||
// also save tag
|
||||
mem_req_d.tag = address_tag_i;
|
||||
// make a miss request
|
||||
state_d = WAIT_REFILL_GNT;
|
||||
end
|
||||
// ---------------
|
||||
// Check MSHR
|
||||
// ---------------
|
||||
mshr_addr_o = {address_tag_i, mem_req_q.index};
|
||||
// we've got a match on MSHR
|
||||
if (mshr_addr_matches_i) begin
|
||||
state_d = WAIT_MSHR;
|
||||
// save tag if we didn't already save it e.g.: we are not in in the Tag saved state
|
||||
if (state_q != WAIT_TAG_SAVED)
|
||||
mem_req_d.tag = address_tag_i;
|
||||
end
|
||||
// -------------------------
|
||||
// Check for cache-ability
|
||||
// -------------------------
|
||||
if (tag_o < CACHE_START_ADDR[TAG_WIDTH+INDEX_WIDTH-1:INDEX_WIDTH]) begin
|
||||
mem_req_d.tag = address_tag_i;
|
||||
mem_req_d.bypass = 1'b1;
|
||||
state_d = WAIT_REFILL_GNT;
|
||||
end
|
||||
end else begin
|
||||
// we can potentially accept a new request -> I don't know how this works out timing vise
|
||||
// as this will chain some paths together...
|
||||
// For now this should not happen to frequently and we spare another cycle
|
||||
// go back to idle
|
||||
state_d = IDLE;
|
||||
data_rvalid_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> we are here as we need a second round of memory access for a store
|
||||
STORE_REQ: begin
|
||||
// store data, write dirty bit
|
||||
req_o = hit_way_q;
|
||||
addr_o = mem_req_q.index;
|
||||
we_o = 1'b1;
|
||||
|
||||
be_o.dirty = hit_way_q;
|
||||
be_o.valid = hit_way_q;
|
||||
|
||||
// set the correct byte enable
|
||||
for (int unsigned i = 0; i < 8; i++) begin
|
||||
if (mem_req_q.be[i])
|
||||
be_o.data[cl_offset + i*8 +: 8] = '1;
|
||||
end
|
||||
|
||||
data_o.data[cl_offset +: 64] = mem_req_q.wdata;
|
||||
// ~> change the state
|
||||
data_o.dirty = 1'b1;
|
||||
data_o.valid = 1'b1;
|
||||
|
||||
// got a grant ~> this is finished now
|
||||
if (gnt_i) begin
|
||||
data_gnt_o = 1'b1;
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
// we've got a match on MSHR ~> miss unit is scurrently serving a request
|
||||
WAIT_MSHR: begin
|
||||
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
|
||||
// we can start a new request
|
||||
if (!mshr_addr_matches_i) begin
|
||||
req_o = '1;
|
||||
|
||||
addr_o = mem_req_q.index;
|
||||
|
||||
if (gnt_i)
|
||||
state_d = WAIT_TAG_SAVED;
|
||||
end
|
||||
end
|
||||
|
||||
// its for sure a miss
|
||||
WAIT_TAG_BYPASSED: begin
|
||||
// the request was killed
|
||||
if (kill_req_i) begin
|
||||
state_d = IDLE;
|
||||
// we need to ack the killing
|
||||
data_rvalid_o = 1'b1;
|
||||
end else begin
|
||||
// save tag
|
||||
mem_req_d.tag = address_tag_i;
|
||||
state_d = WAIT_REFILL_GNT;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> wait for grant from miss unit
|
||||
WAIT_REFILL_GNT: begin
|
||||
|
||||
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
|
||||
|
||||
miss_req_o.valid = 1'b1;
|
||||
miss_req_o.bypass = mem_req_q.bypass;
|
||||
miss_req_o.addr = {mem_req_q.tag, mem_req_q.index};
|
||||
miss_req_o.be = mem_req_q.be;
|
||||
miss_req_o.size = mem_req_q.size;
|
||||
miss_req_o.we = mem_req_q.we;
|
||||
miss_req_o.wdata = mem_req_q.wdata;
|
||||
|
||||
// got a grant so go to valid
|
||||
if (bypass_gnt_i) begin
|
||||
state_d = WAIT_REFILL_VALID;
|
||||
// if this was a write we still need to give a grant to the store unit
|
||||
if (mem_req_q.we)
|
||||
data_gnt_o = 1'b1;
|
||||
end
|
||||
|
||||
if (miss_gnt_i && !mem_req_q.we)
|
||||
state_d = WAIT_CRITICAL_WORD;
|
||||
else if (miss_gnt_i) begin
|
||||
state_d = IDLE;
|
||||
data_gnt_o = 1'b1;
|
||||
end
|
||||
|
||||
// it can be the case that the miss unit is currently serving a request which matches ours
|
||||
// so we need to check the mshr for matching continously
|
||||
// if the mshr matches we need to go to a different state -> we should never get a matching mshr and a high miss_gnt_i
|
||||
if (mshr_addr_matches_i && !active_serving_i) begin
|
||||
state_d = WAIT_MSHR;
|
||||
end
|
||||
end
|
||||
|
||||
// ~> wait for critical word to arrive
|
||||
WAIT_CRITICAL_WORD: begin
|
||||
// speculatively request another word
|
||||
if (data_req_i) begin
|
||||
// request the cache line
|
||||
req_o = '1;
|
||||
end
|
||||
|
||||
if (critical_word_valid_i) begin
|
||||
data_rvalid_o = 1'b1;
|
||||
data_rdata_o = critical_word_i;
|
||||
// we can make another request
|
||||
if (data_req_i) begin
|
||||
// save index, be and we
|
||||
mem_req_d.index = address_index_i;
|
||||
mem_req_d.be = data_be_i;
|
||||
mem_req_d.size = data_size_i;
|
||||
mem_req_d.we = data_we_i;
|
||||
mem_req_d.wdata = data_wdata_i;
|
||||
mem_req_d.tag = address_tag_i;
|
||||
|
||||
|
||||
state_d = IDLE;
|
||||
|
||||
// Wait until we have access on the memory array
|
||||
if (gnt_i) begin
|
||||
state_d = WAIT_TAG;
|
||||
mem_req_d.bypass = 1'b0;
|
||||
data_gnt_o = 1'b1;
|
||||
end
|
||||
|
||||
end else begin
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
end
|
||||
// ~> wait until the bypass request is valid
|
||||
WAIT_REFILL_VALID: begin
|
||||
// got a valid answer
|
||||
if (bypass_valid_i) begin
|
||||
data_rdata_o = bypass_data_i;
|
||||
data_rvalid_o = 1'b1;
|
||||
state_d = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
endcase
|
||||
end
|
||||
|
||||
// --------------
|
||||
// Registers
|
||||
// --------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
state_q <= IDLE;
|
||||
mem_req_q <= '0;
|
||||
hit_way_q <= '0;
|
||||
end else begin
|
||||
state_q <= state_d;
|
||||
mem_req_q <= mem_req_d;
|
||||
hit_way_q <= hit_way_d;
|
||||
end
|
||||
end
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
initial begin
|
||||
assert (CACHE_LINE_WIDTH == 128) else $error ("Cacheline width has to be 128 for the moment. But only small changes required in data select logic");
|
||||
end
|
||||
`endif
|
||||
endmodule
|
||||
|
||||
module AMO_alu (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
// AMO interface
|
||||
input logic amo_commit_i, // commit atomic memory operation
|
||||
output logic amo_valid_o, // we have a valid AMO result
|
||||
output logic [63:0] amo_result_o, // result of atomic memory operation
|
||||
input logic amo_flush_i // forget about AMO
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -113,12 +113,9 @@ module commit_stage (
|
|||
// check if this instruction was a SFENCE_VMA
|
||||
if (commit_instr_i.op == SFENCE_VMA) begin
|
||||
// no store pending so we can flush the TLBs and pipeline
|
||||
if (no_st_pending_i) begin
|
||||
sfence_vma_o = 1'b1;
|
||||
sfence_vma_o = no_st_pending_i;
|
||||
// wait for the store buffer to drain until flushing the pipeline
|
||||
end else begin
|
||||
commit_ack_o = 1'b0;
|
||||
end
|
||||
commit_ack_o = no_st_pending_i;
|
||||
end
|
||||
// ------------------
|
||||
// FENCE.I Logic
|
||||
|
@ -126,17 +123,17 @@ module commit_stage (
|
|||
// Fence synchronizes data and instruction streams. That means that we need to flush the private icache
|
||||
// and the private dcache. This is the most expensive instruction.
|
||||
if (commit_instr_i.op == FENCE_I) begin
|
||||
commit_ack_o = 1'b1;
|
||||
commit_ack_o = no_st_pending_i;
|
||||
// tell the controller to flush the I$
|
||||
fence_i_o = 1'b1;
|
||||
fence_i_o = no_st_pending_i;
|
||||
end
|
||||
// ------------------
|
||||
// FENCE Logic
|
||||
// ------------------
|
||||
if (commit_instr_i.op == FENCE) begin
|
||||
commit_ack_o = 1'b1;
|
||||
commit_ack_o = no_st_pending_i;
|
||||
// tell the controller to flush the D$
|
||||
fence_o = 1'b1;
|
||||
fence_o = no_st_pending_i;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -67,12 +67,19 @@ module csr_regfile #(
|
|||
output logic [43:0] satp_ppn_o,
|
||||
output logic [ASID_WIDTH-1:0] asid_o,
|
||||
// external interrupts
|
||||
input logic irq_i, // external interrupt in
|
||||
input logic [1:0] irq_i, // external interrupt in
|
||||
input logic ipi_i, // inter processor interrupt -> connected to machine mode sw
|
||||
// Visualization Support
|
||||
output logic tvm_o, // trap virtual memory
|
||||
output logic tw_o, // timeout wait
|
||||
output logic tsr_o // trap sret
|
||||
output logic tsr_o, // trap sret
|
||||
// Caches
|
||||
output logic dcache_en_o, // L1 DCache Enable
|
||||
// Performance Counter
|
||||
output logic [11:0] perf_addr_o, // address to performance counter module
|
||||
output logic [63:0] perf_data_o, // write data to performance counter module
|
||||
input logic [63:0] perf_data_i, // read data from performance counter module
|
||||
output logic perf_we_o
|
||||
);
|
||||
// internal signal to keep track of access exceptions
|
||||
logic read_access_exception, update_access_exception;
|
||||
|
@ -80,7 +87,7 @@ module csr_regfile #(
|
|||
logic [63:0] csr_wdata, csr_rdata;
|
||||
priv_lvl_t trap_to_priv_lvl;
|
||||
// register for enabling load store address translation, this is critical, hence the register
|
||||
logic en_ld_st_translation_n, en_ld_st_translation_q;
|
||||
logic en_ld_st_translation_d, en_ld_st_translation_q;
|
||||
|
||||
logic mret; // return from M-mode exception
|
||||
logic sret; // return from S-mode exception
|
||||
|
@ -98,7 +105,7 @@ module csr_regfile #(
|
|||
// CSR Registers
|
||||
// ----------------
|
||||
// privilege level register
|
||||
priv_lvl_t priv_lvl_n, priv_lvl_q;
|
||||
priv_lvl_t priv_lvl_d, priv_lvl_q;
|
||||
|
||||
typedef struct packed {
|
||||
logic sd; // signal dirty - read-only - hardwired zero
|
||||
|
@ -127,28 +134,29 @@ module csr_regfile #(
|
|||
logic uie; // user interrupts enable - hardwired to zero
|
||||
} status_t;
|
||||
|
||||
status_t mstatus_q, mstatus_n;
|
||||
status_t mstatus_q, mstatus_d;
|
||||
|
||||
logic [63:0] mtvec_q, mtvec_n;
|
||||
logic [63:0] medeleg_q, medeleg_n;
|
||||
logic [63:0] mideleg_q, mideleg_n;
|
||||
logic [63:0] mip_q, mip_n;
|
||||
logic [63:0] mie_q, mie_n;
|
||||
logic [63:0] mscratch_q, mscratch_n;
|
||||
logic [63:0] mepc_q, mepc_n;
|
||||
logic [63:0] mcause_q, mcause_n;
|
||||
logic [63:0] mtval_q, mtval_n;
|
||||
logic [63:0] mtvec_q, mtvec_d;
|
||||
logic [63:0] medeleg_q, medeleg_d;
|
||||
logic [63:0] mideleg_q, mideleg_d;
|
||||
logic [63:0] mip_q, mip_d;
|
||||
logic [63:0] mie_q, mie_d;
|
||||
logic [63:0] mscratch_q, mscratch_d;
|
||||
logic [63:0] mepc_q, mepc_d;
|
||||
logic [63:0] mcause_q, mcause_d;
|
||||
logic [63:0] mtval_q, mtval_d;
|
||||
|
||||
logic [63:0] stvec_q, stvec_n;
|
||||
logic [63:0] sscratch_q, sscratch_n;
|
||||
logic [63:0] sepc_q, sepc_n;
|
||||
logic [63:0] scause_q, scause_n;
|
||||
logic [63:0] stval_q, stval_n;
|
||||
logic [63:0] stvec_q, stvec_d;
|
||||
logic [63:0] sscratch_q, sscratch_d;
|
||||
logic [63:0] sepc_q, sepc_d;
|
||||
logic [63:0] scause_q, scause_d;
|
||||
logic [63:0] stval_q, stval_d;
|
||||
logic [63:0] dcache_q, dcache_d;
|
||||
|
||||
logic wfi_n, wfi_q;
|
||||
logic wfi_d, wfi_q;
|
||||
|
||||
logic [63:0] cycle_q, cycle_n;
|
||||
logic [63:0] instret_q, instret_n;
|
||||
logic [63:0] cycle_q, cycle_d;
|
||||
logic [63:0] instret_q, instret_d;
|
||||
|
||||
typedef struct packed {
|
||||
logic [3:0] mode;
|
||||
|
@ -156,8 +164,7 @@ module csr_regfile #(
|
|||
logic [43:0] ppn;
|
||||
} satp_t;
|
||||
|
||||
satp_t satp_q, satp_n;
|
||||
|
||||
satp_t satp_q, satp_d;
|
||||
|
||||
// ----------------
|
||||
// CSR Read logic
|
||||
|
@ -166,6 +173,9 @@ module csr_regfile #(
|
|||
// a read access exception can only occur if we attempt to read a CSR which does not exist
|
||||
read_access_exception = 1'b0;
|
||||
csr_rdata = 64'b0;
|
||||
// feed through address of performance counter
|
||||
perf_addr_o = csr_addr.address;
|
||||
|
||||
if (csr_read) begin
|
||||
case (csr_addr.address)
|
||||
|
||||
|
@ -204,10 +214,23 @@ module csr_regfile #(
|
|||
CSR_MHARTID: csr_rdata = {53'b0, cluster_id_i[5:0], 1'b0, core_id_i[3:0]};
|
||||
CSR_MCYCLE: csr_rdata = cycle_q;
|
||||
CSR_MINSTRET: csr_rdata = instret_q;
|
||||
CSR_DCACHE: csr_rdata = dcache_q;
|
||||
// Counters and Timers
|
||||
CSR_CYCLE: csr_rdata = cycle_q;
|
||||
CSR_TIME: csr_rdata = time_i;
|
||||
CSR_INSTRET: csr_rdata = instret_q;
|
||||
CSR_L1_ICACHE_MISS,
|
||||
CSR_L1_DCACHE_MISS,
|
||||
CSR_ITLB_MISS,
|
||||
CSR_DTLB_MISS,
|
||||
CSR_LOAD,
|
||||
CSR_STORE,
|
||||
CSR_EXCEPTION,
|
||||
CSR_EXCEPTION_RET,
|
||||
CSR_BRANCH_JUMP,
|
||||
CSR_CALL,
|
||||
CSR_RET,
|
||||
CSR_MIS_PREDICT: csr_rdata = perf_data_i;
|
||||
default: read_access_exception = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
@ -216,40 +239,46 @@ module csr_regfile #(
|
|||
// CSR Write and update logic
|
||||
// ---------------------------
|
||||
always_comb begin : csr_update
|
||||
automatic satp_t sapt = satp_q;
|
||||
automatic satp_t sapt;
|
||||
automatic logic [63:0] mip;
|
||||
sapt = satp_q;
|
||||
mip = csr_wdata & 64'h33;
|
||||
// only USIP, SSIP, UTIP, STIP are write-able
|
||||
automatic logic [63:0] mip = csr_wdata & 64'h33;
|
||||
|
||||
eret_o = 1'b0;
|
||||
flush_o = 1'b0;
|
||||
update_access_exception = 1'b0;
|
||||
|
||||
priv_lvl_n = priv_lvl_q;
|
||||
mstatus_n = mstatus_q;
|
||||
mtvec_n = mtvec_q;
|
||||
medeleg_n = medeleg_q;
|
||||
mideleg_n = mideleg_q;
|
||||
mip_n = mip_q;
|
||||
mie_n = mie_q;
|
||||
mepc_n = mepc_q;
|
||||
mcause_n = mcause_q;
|
||||
mscratch_n = mscratch_q;
|
||||
mtval_n = mtval_q;
|
||||
perf_we_o = 1'b0;
|
||||
perf_data_o = 'b0;
|
||||
|
||||
sepc_n = sepc_q;
|
||||
scause_n = scause_q;
|
||||
stvec_n = stvec_q;
|
||||
sscratch_n = sscratch_q;
|
||||
stval_n = stval_q;
|
||||
satp_n = satp_q;
|
||||
en_ld_st_translation_n = en_ld_st_translation_q;
|
||||
priv_lvl_d = priv_lvl_q;
|
||||
mstatus_d = mstatus_q;
|
||||
mtvec_d = mtvec_q;
|
||||
medeleg_d = medeleg_q;
|
||||
mideleg_d = mideleg_q;
|
||||
mip_d = mip_q;
|
||||
mie_d = mie_q;
|
||||
mepc_d = mepc_q;
|
||||
mcause_d = mcause_q;
|
||||
mscratch_d = mscratch_q;
|
||||
mtval_d = mtval_q;
|
||||
dcache_d = dcache_q;
|
||||
|
||||
sepc_d = sepc_q;
|
||||
scause_d = scause_q;
|
||||
stvec_d = stvec_q;
|
||||
sscratch_d = sscratch_q;
|
||||
stval_d = stval_q;
|
||||
satp_d = satp_q;
|
||||
en_ld_st_translation_d = en_ld_st_translation_q;
|
||||
|
||||
// check for correct access rights and that we are writing
|
||||
if (csr_we) begin
|
||||
case (csr_addr.address)
|
||||
// sstatus is a subset of mstatus - mask it accordingly
|
||||
CSR_SSTATUS: begin
|
||||
mstatus_n = csr_wdata & 64'h3fffe1fee;
|
||||
mstatus_d = csr_wdata & 64'h3fffe1fee;
|
||||
// this instruction has side-effects
|
||||
flush_o = 1'b1;
|
||||
end
|
||||
|
@ -260,21 +289,21 @@ module csr_regfile #(
|
|||
// are written
|
||||
for (int unsigned i = 0; i < 64; i++)
|
||||
if (mideleg_q[i])
|
||||
mie_n[i] = csr_wdata[i];
|
||||
mie_d[i] = csr_wdata[i];
|
||||
end
|
||||
|
||||
CSR_SIP: begin
|
||||
for (int unsigned i = 0; i < 64; i++)
|
||||
if (mideleg_q[i])
|
||||
mip_n[i] = mip[i];
|
||||
mip_d[i] = mip[i];
|
||||
end
|
||||
|
||||
CSR_SCOUNTEREN:;
|
||||
CSR_STVEC: stvec_n = {csr_wdata[63:2], 1'b0, csr_wdata[0]};
|
||||
CSR_SSCRATCH: sscratch_n = csr_wdata;
|
||||
CSR_SEPC: sepc_n = {csr_wdata[63:1], 1'b0};
|
||||
CSR_SCAUSE: scause_n = csr_wdata;
|
||||
CSR_STVAL: stval_n = csr_wdata;
|
||||
CSR_STVEC: stvec_d = {csr_wdata[63:2], 1'b0, csr_wdata[0]};
|
||||
CSR_SSCRATCH: sscratch_d = csr_wdata;
|
||||
CSR_SEPC: sepc_d = {csr_wdata[63:1], 1'b0};
|
||||
CSR_SCAUSE: scause_d = csr_wdata;
|
||||
CSR_STVAL: stval_d = csr_wdata;
|
||||
// supervisor address translation and protection
|
||||
CSR_SATP: begin
|
||||
// intercept SATP writes if in S-Mode and TVM is enabled
|
||||
|
@ -284,7 +313,7 @@ module csr_regfile #(
|
|||
sapt = satp_t'(csr_wdata);
|
||||
// only make ASID_LEN - 1 bit stick, that way software can figure out how many ASID bits are supported
|
||||
sapt.asid = sapt.asid & {{(16-ASID_WIDTH){1'b0}}, {ASID_WIDTH{1'b1}}};
|
||||
satp_n = sapt;
|
||||
satp_d = sapt;
|
||||
end
|
||||
// changing the mode can have side-effects on address translation (e.g.: other instructions), re-fetch
|
||||
// the next instruction by executing a flush
|
||||
|
@ -292,15 +321,15 @@ module csr_regfile #(
|
|||
end
|
||||
|
||||
CSR_MSTATUS: begin
|
||||
mstatus_n = csr_wdata;
|
||||
mstatus_n.sxl = 2'b10;
|
||||
mstatus_n.uxl = 2'b10;
|
||||
mstatus_d = csr_wdata;
|
||||
mstatus_d.sxl = 2'b10;
|
||||
mstatus_d.uxl = 2'b10;
|
||||
// hardwired zero registers
|
||||
mstatus_n.sd = 1'b0;
|
||||
mstatus_n.xs = 2'b0;
|
||||
mstatus_n.fs = 2'b0;
|
||||
mstatus_n.upie = 1'b0;
|
||||
mstatus_n.uie = 1'b0;
|
||||
mstatus_d.sd = 1'b0;
|
||||
mstatus_d.xs = 2'b0;
|
||||
mstatus_d.fs = 2'b0;
|
||||
mstatus_d.upie = 1'b0;
|
||||
mstatus_d.uie = 1'b0;
|
||||
// this register has side-effects on other registers, flush the pipeline
|
||||
flush_o = 1'b1;
|
||||
end
|
||||
|
@ -308,29 +337,46 @@ module csr_regfile #(
|
|||
CSR_MISA:;
|
||||
// machine exception delegation register
|
||||
// 0 - 15 exceptions supported
|
||||
CSR_MEDELEG: medeleg_n = csr_wdata & 64'hF7FF;
|
||||
CSR_MEDELEG: medeleg_d = csr_wdata & 64'hF7FF;
|
||||
// machine interrupt delegation register
|
||||
// we do not support user interrupt delegation
|
||||
CSR_MIDELEG: mideleg_n = csr_wdata & 64'hBBB;
|
||||
CSR_MIDELEG: mideleg_d = csr_wdata & 64'hBBB;
|
||||
|
||||
// mask the register so that unsupported interrupts can never be set
|
||||
CSR_MIE: mie_n = csr_wdata & 64'hBBB; // we only support supervisor and m-mode interrupts
|
||||
CSR_MIP: mip_n = mip;
|
||||
CSR_MIE: mie_d = csr_wdata & 64'hBBB; // we only support supervisor and m-mode interrupts
|
||||
CSR_MIP: mip_d = mip;
|
||||
|
||||
CSR_MTVEC: begin
|
||||
mtvec_n = {csr_wdata[63:2], 1'b0, csr_wdata[0]};
|
||||
mtvec_d = {csr_wdata[63:2], 1'b0, csr_wdata[0]};
|
||||
// we are in vector mode, this implementation requires the additional
|
||||
// alignment constraint of 64 * 4 bytes
|
||||
if (csr_wdata[0])
|
||||
mtvec_n = {csr_wdata[63:8], 7'b0, csr_wdata[0]};
|
||||
mtvec_d = {csr_wdata[63:8], 7'b0, csr_wdata[0]};
|
||||
end
|
||||
CSR_MCOUNTEREN:;
|
||||
CSR_MSCRATCH: mscratch_n = csr_wdata;
|
||||
CSR_MEPC: mepc_n = {csr_wdata[63:1], 1'b0};
|
||||
CSR_MCAUSE: mcause_n = csr_wdata;
|
||||
CSR_MTVAL: mtval_n = csr_wdata;
|
||||
CSR_MCYCLE: cycle_n = csr_wdata;
|
||||
CSR_MINSTRET: instret_n = csr_wdata;
|
||||
|
||||
CSR_MSCRATCH: mscratch_d = csr_wdata;
|
||||
CSR_MEPC: mepc_d = {csr_wdata[63:1], 1'b0};
|
||||
CSR_MCAUSE: mcause_d = csr_wdata;
|
||||
CSR_MTVAL: mtval_d = csr_wdata;
|
||||
CSR_MCYCLE: cycle_d = csr_wdata;
|
||||
CSR_MINSTRET: instret_d = csr_wdata;
|
||||
CSR_DCACHE: dcache_d = csr_wdata[0]; // enable bit
|
||||
CSR_L1_ICACHE_MISS,
|
||||
CSR_L1_DCACHE_MISS,
|
||||
CSR_ITLB_MISS,
|
||||
CSR_DTLB_MISS,
|
||||
CSR_LOAD,
|
||||
CSR_STORE,
|
||||
CSR_EXCEPTION,
|
||||
CSR_EXCEPTION_RET,
|
||||
CSR_BRANCH_JUMP,
|
||||
CSR_CALL,
|
||||
CSR_RET,
|
||||
CSR_MIS_PREDICT: begin
|
||||
perf_data_o = csr_wdata;
|
||||
perf_we_o = 1'b1;
|
||||
end
|
||||
default: update_access_exception = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
@ -338,11 +384,12 @@ module csr_regfile #(
|
|||
// External Interrupts
|
||||
// ---------------------
|
||||
// Machine Mode External Interrupt Pending
|
||||
// TODO: this is wrong for sure
|
||||
mip_n[11] = 1'b0;
|
||||
mip_n[9] = mie_q[9] & irq_i;
|
||||
mip_d[11] = mie_q[11] & irq_i[1];
|
||||
mip_d[9] = mie_q[9] & irq_i[0];
|
||||
// Machine software interrupt
|
||||
mip_d[3] = mie_q[3] & ipi_i;
|
||||
// Timer interrupt pending, coming from platform timer
|
||||
mip_n[7] = time_irq_i;
|
||||
mip_d[7] = time_irq_i;
|
||||
|
||||
// -----------------------
|
||||
// Manage Exception Stack
|
||||
|
@ -368,31 +415,31 @@ module csr_regfile #(
|
|||
// trap to supervisor mode
|
||||
if (trap_to_priv_lvl == PRIV_LVL_S) begin
|
||||
// update sstatus
|
||||
mstatus_n.sie = 1'b0;
|
||||
mstatus_n.spie = mstatus_q.sie;
|
||||
mstatus_d.sie = 1'b0;
|
||||
mstatus_d.spie = mstatus_q.sie;
|
||||
// this can either be user or supervisor mode
|
||||
mstatus_n.spp = logic'(priv_lvl_q);
|
||||
mstatus_d.spp = logic'(priv_lvl_q);
|
||||
// set cause
|
||||
scause_n = ex_i.cause;
|
||||
scause_d = ex_i.cause;
|
||||
// set epc
|
||||
sepc_n = pc_i;
|
||||
sepc_d = pc_i;
|
||||
// set mtval or stval
|
||||
stval_n = ex_i.tval;
|
||||
stval_d = ex_i.tval;
|
||||
// trap to machine mode
|
||||
end else begin
|
||||
// update mstatus
|
||||
mstatus_n.mie = 1'b0;
|
||||
mstatus_n.mpie = mstatus_q.mie;
|
||||
mstatus_d.mie = 1'b0;
|
||||
mstatus_d.mpie = mstatus_q.mie;
|
||||
// save the previous privilege mode
|
||||
mstatus_n.mpp = priv_lvl_q;
|
||||
mcause_n = ex_i.cause;
|
||||
mstatus_d.mpp = priv_lvl_q;
|
||||
mcause_d = ex_i.cause;
|
||||
// set epc
|
||||
mepc_n = pc_i;
|
||||
mepc_d = pc_i;
|
||||
// set mtval or stval
|
||||
mtval_n = ex_i.tval;
|
||||
mtval_d = ex_i.tval;
|
||||
end
|
||||
|
||||
priv_lvl_n = trap_to_priv_lvl;
|
||||
priv_lvl_d = trap_to_priv_lvl;
|
||||
end
|
||||
// ------------------------------
|
||||
// MPRV - Modify Privilege Level
|
||||
|
@ -400,9 +447,9 @@ module csr_regfile #(
|
|||
// Set the address translation at which the load and stores should occur
|
||||
// we can use the previous values since changing the address translation will always involve a pipeline flush
|
||||
if (mstatus_q.mprv && satp_q.mode == 4'h8 && (mstatus_q.mpp != PRIV_LVL_M))
|
||||
en_ld_st_translation_n = 1'b1;
|
||||
en_ld_st_translation_d = 1'b1;
|
||||
else // otherwise we go with the regular settings
|
||||
en_ld_st_translation_n = en_translation_o;
|
||||
en_ld_st_translation_d = en_translation_o;
|
||||
|
||||
ld_st_priv_lvl_o = (mstatus_q.mprv) ? mstatus_q.mpp : priv_lvl_o;
|
||||
en_ld_st_translation_o = en_ld_st_translation_q;
|
||||
|
@ -416,37 +463,37 @@ module csr_regfile #(
|
|||
eret_o = 1'b1;
|
||||
// return to the previous privilege level and restore all enable flags
|
||||
// get the previous machine interrupt enable flag
|
||||
mstatus_n.mie = mstatus_q.mpie;
|
||||
mstatus_d.mie = mstatus_q.mpie;
|
||||
// restore the previous privilege level
|
||||
priv_lvl_n = mstatus_q.mpp;
|
||||
priv_lvl_d = mstatus_q.mpp;
|
||||
// set mpp to user mode
|
||||
mstatus_n.mpp = PRIV_LVL_U;
|
||||
mstatus_d.mpp = PRIV_LVL_U;
|
||||
// set mpie to 1
|
||||
mstatus_n.mpie = 1'b1;
|
||||
mstatus_d.mpie = 1'b1;
|
||||
end
|
||||
|
||||
if (sret) begin
|
||||
// return from exception, IF doesn't care from where we are returning
|
||||
eret_o = 1'b1;
|
||||
// return the previous supervisor interrupt enable flag
|
||||
mstatus_n.sie = mstatus_n.spie;
|
||||
mstatus_d.sie = mstatus_d.spie;
|
||||
// restore the previous privilege level
|
||||
priv_lvl_n = priv_lvl_t'({1'b0, mstatus_n.spp});
|
||||
priv_lvl_d = priv_lvl_t'({1'b0, mstatus_d.spp});
|
||||
// set spp to user mode
|
||||
mstatus_n.spp = logic'(PRIV_LVL_U);
|
||||
mstatus_d.spp = logic'(PRIV_LVL_U);
|
||||
// set spie to 1
|
||||
mstatus_n.spie = 1'b1;
|
||||
mstatus_d.spie = 1'b1;
|
||||
end
|
||||
|
||||
// --------------------
|
||||
// Counters
|
||||
// --------------------
|
||||
instret_n = instret_q;
|
||||
instret_d = instret_q;
|
||||
// just increment the cycle count
|
||||
cycle_n = cycle_q + 1'b1;
|
||||
cycle_d = cycle_q + 1'b1;
|
||||
// increase instruction retired counter
|
||||
if (commit_ack_i) begin
|
||||
instret_n = instret_q + 1'b1;
|
||||
instret_d = instret_q + 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -504,9 +551,10 @@ module csr_regfile #(
|
|||
// Exception Control & Interrupt Control
|
||||
// --------------------------------------
|
||||
always_comb begin : exception_ctrl
|
||||
automatic logic [63:0] interrupt_cause = '0;
|
||||
automatic logic [63:0] interrupt_cause;
|
||||
interrupt_cause = '0;
|
||||
// wait for interrupt register
|
||||
wfi_n = wfi_q;
|
||||
wfi_d = wfi_q;
|
||||
|
||||
csr_exception_o = {
|
||||
64'b0, 64'b0, 1'b0
|
||||
|
@ -582,10 +630,10 @@ module csr_regfile #(
|
|||
// -------------------
|
||||
// if there is any interrupt pending un-stall the core
|
||||
if (|mip_q) begin
|
||||
wfi_n = 1'b0;
|
||||
wfi_d = 1'b0;
|
||||
// or alternatively if there is no exception pending, wait here for the interrupt
|
||||
end else if (csr_op_i == WFI && !ex_i.valid) begin
|
||||
wfi_n = 1'b1;
|
||||
wfi_d = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -605,6 +653,7 @@ module csr_regfile #(
|
|||
assign tw_o = mstatus_q.tw;
|
||||
assign tsr_o = mstatus_q.tsr;
|
||||
assign halt_csr_o = wfi_q;
|
||||
assign dcache_en_o = dcache_q[0];
|
||||
|
||||
// output assignments dependent on privilege mode
|
||||
always_comb begin : priv_output
|
||||
|
@ -643,6 +692,7 @@ module csr_regfile #(
|
|||
mcause_q <= 64'b0;
|
||||
mscratch_q <= 64'b0;
|
||||
mtval_q <= 64'b0;
|
||||
dcache_q <= 64'b1;
|
||||
// supervisor mode registers
|
||||
sepc_q <= 64'b0;
|
||||
scause_q <= 64'b0;
|
||||
|
@ -658,32 +708,33 @@ module csr_regfile #(
|
|||
// wait for interrupt
|
||||
wfi_q <= 1'b0;
|
||||
end else begin
|
||||
priv_lvl_q <= priv_lvl_n;
|
||||
priv_lvl_q <= priv_lvl_d;
|
||||
// machine mode registers
|
||||
mstatus_q <= mstatus_n;
|
||||
mtvec_q <= mtvec_n;
|
||||
medeleg_q <= medeleg_n;
|
||||
mideleg_q <= mideleg_n;
|
||||
mip_q <= mip_n;
|
||||
mie_q <= mie_n;
|
||||
mepc_q <= mepc_n;
|
||||
mcause_q <= mcause_n;
|
||||
mscratch_q <= mscratch_n;
|
||||
mtval_q <= mtval_n;
|
||||
mstatus_q <= mstatus_d;
|
||||
mtvec_q <= mtvec_d;
|
||||
medeleg_q <= medeleg_d;
|
||||
mideleg_q <= mideleg_d;
|
||||
mip_q <= mip_d;
|
||||
mie_q <= mie_d;
|
||||
mepc_q <= mepc_d;
|
||||
mcause_q <= mcause_d;
|
||||
mscratch_q <= mscratch_d;
|
||||
mtval_q <= mtval_d;
|
||||
dcache_q <= dcache_d;
|
||||
// supervisor mode registers
|
||||
sepc_q <= sepc_n;
|
||||
scause_q <= scause_n;
|
||||
stvec_q <= stvec_n;
|
||||
sscratch_q <= sscratch_n;
|
||||
stval_q <= stval_n;
|
||||
satp_q <= satp_n;
|
||||
sepc_q <= sepc_d;
|
||||
scause_q <= scause_d;
|
||||
stvec_q <= stvec_d;
|
||||
sscratch_q <= sscratch_d;
|
||||
stval_q <= stval_d;
|
||||
satp_q <= satp_d;
|
||||
// timer and counters
|
||||
cycle_q <= cycle_n;
|
||||
instret_q <= instret_n;
|
||||
cycle_q <= cycle_d;
|
||||
instret_q <= instret_d;
|
||||
// aux registers
|
||||
en_ld_st_translation_q <= en_ld_st_translation_n;
|
||||
en_ld_st_translation_q <= en_ld_st_translation_d;
|
||||
// wait for interrupt
|
||||
wfi_q <= wfi_n;
|
||||
wfi_q <= wfi_d;
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -1,193 +0,0 @@
|
|||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 24.4.2017
|
||||
// Description: Arbitrates the dcache ports
|
||||
//
|
||||
//
|
||||
// Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
// All rights reserved.
|
||||
//
|
||||
// This code is under development and not yet released to the public.
|
||||
// Until it is released, the code is under the copyright of ETH Zurich and
|
||||
// the University of Bologna, and may contain confidential and/or unpublished
|
||||
// work. Any reuse/redistribution is strictly forbidden without written
|
||||
// permission from ETH Zurich.
|
||||
//
|
||||
// Bug fixes and contributions will eventually be released under the
|
||||
// SolderPad open hardware license in the context of the PULP platform
|
||||
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
|
||||
// University of Bologna.
|
||||
//
|
||||
import ariane_pkg::*;
|
||||
|
||||
module dcache_arbiter #(
|
||||
parameter int NR_PORTS = 3
|
||||
)
|
||||
(
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
// slave port
|
||||
output logic [11:0] address_index_o,
|
||||
output logic [43:0] address_tag_o,
|
||||
output logic [63:0] data_wdata_o,
|
||||
output logic data_req_o,
|
||||
output logic data_we_o,
|
||||
output logic [7:0] data_be_o,
|
||||
output logic kill_req_o,
|
||||
output logic tag_valid_o,
|
||||
input logic data_gnt_i,
|
||||
input logic data_rvalid_i,
|
||||
input logic [63:0] data_rdata_i,
|
||||
// master ports
|
||||
input logic [NR_PORTS-1:0][11:0] address_index_i,
|
||||
input logic [NR_PORTS-1:0][43:0] address_tag_i,
|
||||
input logic [NR_PORTS-1:0][63:0] data_wdata_i,
|
||||
input logic [NR_PORTS-1:0] data_req_i,
|
||||
input logic [NR_PORTS-1:0] data_we_i,
|
||||
input logic [NR_PORTS-1:0][7:0] data_be_i,
|
||||
input logic [NR_PORTS-1:0] kill_req_i,
|
||||
input logic [NR_PORTS-1:0] tag_valid_i,
|
||||
output logic [NR_PORTS-1:0] data_gnt_o,
|
||||
output logic [NR_PORTS-1:0] data_rvalid_o,
|
||||
output logic [NR_PORTS-1:0][63:0] data_rdata_o
|
||||
);
|
||||
// one-hot encoded
|
||||
localparam DATA_WIDTH = NR_PORTS;
|
||||
// remember the request port in case of a multi-cycle transaction
|
||||
logic [DATA_WIDTH-1:0] request_port_n, request_port_q;
|
||||
// local ports
|
||||
// FIFO control ports
|
||||
logic full;
|
||||
logic empty;
|
||||
logic single_element;
|
||||
// FIFO input port
|
||||
logic [DATA_WIDTH-1:0] in_data;
|
||||
logic push;
|
||||
// FIFO output port
|
||||
logic [DATA_WIDTH-1:0] out_data;
|
||||
logic pop;
|
||||
|
||||
// FIFO to keep track of the responses
|
||||
fifo #(
|
||||
.dtype ( logic [DATA_WIDTH-1:0] ),
|
||||
.DEPTH ( 4 )
|
||||
) fifo_i (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.single_element_o ( single_element ),
|
||||
// the flush is accomplished implicitly by waiting for the queue to be drained before accepting any new request
|
||||
// it is the responsibility of the attached units to make sure it handles any outstanding responses
|
||||
.flush_i ( 1'b0 ),
|
||||
.full_o ( full ),
|
||||
.empty_o ( empty ),
|
||||
.data_i ( in_data ),
|
||||
.push_i ( push ),
|
||||
.data_o ( out_data ),
|
||||
.pop_i ( pop )
|
||||
);
|
||||
|
||||
// addressing read and full write
|
||||
always_comb begin : read_req_write
|
||||
automatic logic [DATA_WIDTH-1:0] request_index = request_port_q;
|
||||
data_req_o = 1'b0;
|
||||
in_data = '{default: 0};
|
||||
push = 1'b0;
|
||||
request_port_n = request_port_q;
|
||||
|
||||
for (int i = 0; i < NR_PORTS; i++)
|
||||
data_gnt_o[i] = 1'b0;
|
||||
|
||||
// ----------------------------
|
||||
// Single-cycle memory requests
|
||||
// ----------------------------
|
||||
// only go for a new request if we can wait for the valid e.g.: we have enough space in the buffer
|
||||
if (~full) begin
|
||||
for (int unsigned i = 0; i < NR_PORTS; i++) begin
|
||||
if (data_req_i[i] == 1'b1) begin
|
||||
data_req_o = data_req_i[i];
|
||||
// save the request port for future states
|
||||
request_port_n = i;
|
||||
request_index = i;
|
||||
// wait for the grant
|
||||
// set the slave on which we are waiting
|
||||
in_data = 1'b1 << i[DATA_WIDTH-1:0];
|
||||
break; // break here as this is a priority select
|
||||
end
|
||||
end
|
||||
// only if we got a grant save it to the queue
|
||||
if (data_gnt_i) begin
|
||||
push = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// pass through all signals from the correct slave port
|
||||
address_index_o = address_index_i[request_index];
|
||||
data_wdata_o = data_wdata_i[request_index];
|
||||
data_be_o = data_be_i[request_index];
|
||||
data_we_o = data_we_i[request_index];
|
||||
data_gnt_o[request_index] = data_gnt_i;
|
||||
// the following signals are to be passed through one-cycle later
|
||||
address_tag_o = address_tag_i[request_port_q];
|
||||
kill_req_o = kill_req_i[request_port_q];
|
||||
tag_valid_o = tag_valid_i[request_port_q];
|
||||
end
|
||||
|
||||
// ------------
|
||||
// Read port
|
||||
// ------------
|
||||
// results, listening on the input signals of the slave port
|
||||
genvar i;
|
||||
// this is very timing sensitive since we can give a new request if we got an rvalid
|
||||
// hence this combines the to most critical paths (from and to memory)
|
||||
generate
|
||||
// default assignment & one hot decoder
|
||||
for (i = 0; i < NR_PORTS; i++) begin
|
||||
assign data_rvalid_o[i] = out_data[i] & data_rvalid_i;
|
||||
assign data_rdata_o[i] = data_rdata_i;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
always_comb begin : slave_read_port
|
||||
pop = 1'b0;
|
||||
// if there is a valid signal the FIFO should not be empty anyway
|
||||
if (data_rvalid_i) begin
|
||||
pop = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// sequential process
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
request_port_q <= 'b0;
|
||||
end else begin
|
||||
request_port_q <= request_port_n;
|
||||
end
|
||||
end
|
||||
|
||||
// ------------
|
||||
// Assertions
|
||||
// ------------
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`ifndef VERILATOR
|
||||
// make sure that we eventually get an rvalid after we received a grant
|
||||
assert property (@(posedge clk_i) data_gnt_i |-> ##[1:$] data_rvalid_i )
|
||||
else begin $error("There was a grant without a rvalid"); $stop(); end
|
||||
// assert that there is no grant without a request
|
||||
assert property (@(negedge clk_i) data_gnt_i |-> data_req_o)
|
||||
else begin $error("There was a grant without a request."); $stop(); end
|
||||
// assert that the address does not contain X when request is sent
|
||||
assert property ( @(posedge clk_i) (data_req_o) |-> (!$isunknown(address_index_o)) )
|
||||
else begin $error("address contains X when request is set"); $stop(); end
|
||||
|
||||
// there should be no rvalid when we are in IDLE
|
||||
// assert property (
|
||||
// @(posedge clk) (CS == IDLE) |-> (data_rvalid_i == 1'b0) )
|
||||
// else begin $error("Received rvalid while in IDLE state"); $stop(); end
|
||||
|
||||
// assert that errors are only sent at the same time as grant or rvalid
|
||||
// assert property ( @(posedge clk) (data_err_i) |-> (data_gnt_i || data_rvalid_i) )
|
||||
// else begin $error("Error without data grant or rvalid"); $stop(); end
|
||||
|
||||
`endif
|
||||
`endif
|
||||
endmodule
|
|
@ -324,7 +324,6 @@ module debug_unit (
|
|||
HALT_REQ: begin
|
||||
// we've got a valid instruction in the commit stage so we can proceed to the halted state
|
||||
if (commit_instr_i.valid || !fetch_enable_i) begin
|
||||
halt_o = 1'b1;
|
||||
NS = HALTED;
|
||||
end
|
||||
end
|
||||
|
|
|
@ -237,7 +237,6 @@ module decoder (
|
|||
{7'b000_0000, 3'b101}: instruction_o.op = SRL; // Shift Right Logical
|
||||
{7'b010_0000, 3'b101}: instruction_o.op = SRA; // Shift Right Arithmetic
|
||||
// Multiplications
|
||||
`ifdef MULT
|
||||
{7'b000_0001, 3'b000}: instruction_o.op = MUL;
|
||||
{7'b000_0001, 3'b001}: instruction_o.op = MULH;
|
||||
{7'b000_0001, 3'b010}: instruction_o.op = MULHSU;
|
||||
|
@ -246,7 +245,6 @@ module decoder (
|
|||
{7'b000_0001, 3'b101}: instruction_o.op = DIVU;
|
||||
{7'b000_0001, 3'b110}: instruction_o.op = REM;
|
||||
{7'b000_0001, 3'b111}: instruction_o.op = REMU;
|
||||
`endif
|
||||
default: begin
|
||||
illegal_instr = 1'b1;
|
||||
end
|
||||
|
@ -271,13 +269,11 @@ module decoder (
|
|||
{7'b000_0000, 3'b101}: instruction_o.op = SRLW; // srlw
|
||||
{7'b010_0000, 3'b101}: instruction_o.op = SRAW; // sraw
|
||||
// Multiplications
|
||||
`ifdef MULT
|
||||
{7'b000_0001, 3'b000}: instruction_o.op = MULW;
|
||||
{7'b000_0001, 3'b100}: instruction_o.op = DIVW;
|
||||
{7'b000_0001, 3'b101}: instruction_o.op = DIVUW;
|
||||
{7'b000_0001, 3'b110}: instruction_o.op = REMW;
|
||||
{7'b000_0001, 3'b111}: instruction_o.op = REMUW;
|
||||
`endif
|
||||
default: illegal_instr = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
@ -345,7 +341,9 @@ module decoder (
|
|||
default: illegal_instr = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
||||
// --------------------------------
|
||||
// LSU
|
||||
// --------------------------------
|
||||
OPCODE_STORE: begin
|
||||
instruction_o.fu = STORE;
|
||||
imm_select = SIMM;
|
||||
|
@ -353,16 +351,11 @@ module decoder (
|
|||
instruction_o.rs2 = instr.stype.rs2;
|
||||
// determine store size
|
||||
unique case (instr.stype.funct3)
|
||||
3'b000:
|
||||
instruction_o.op = SB;
|
||||
3'b001:
|
||||
instruction_o.op = SH;
|
||||
3'b010:
|
||||
instruction_o.op = SW;
|
||||
3'b011:
|
||||
instruction_o.op = SD;
|
||||
default:
|
||||
illegal_instr = 1'b1;
|
||||
3'b000: instruction_o.op = SB;
|
||||
3'b001: instruction_o.op = SH;
|
||||
3'b010: instruction_o.op = SW;
|
||||
3'b011: instruction_o.op = SD;
|
||||
default: illegal_instr = 1'b1;
|
||||
endcase
|
||||
end
|
||||
|
||||
|
@ -373,26 +366,64 @@ module decoder (
|
|||
instruction_o.rd = instr.itype.rd;
|
||||
// determine load size and signed type
|
||||
unique case (instr.itype.funct3)
|
||||
3'b000:
|
||||
instruction_o.op = LB;
|
||||
3'b001:
|
||||
instruction_o.op = LH;
|
||||
3'b010:
|
||||
instruction_o.op = LW;
|
||||
3'b100:
|
||||
instruction_o.op = LBU;
|
||||
3'b101:
|
||||
instruction_o.op = LHU;
|
||||
3'b110:
|
||||
instruction_o.op = LWU;
|
||||
3'b011:
|
||||
instruction_o.op = LD;
|
||||
default:
|
||||
illegal_instr = 1'b1;
|
||||
3'b000: instruction_o.op = LB;
|
||||
3'b001: instruction_o.op = LH;
|
||||
3'b010: instruction_o.op = LW;
|
||||
3'b100: instruction_o.op = LBU;
|
||||
3'b101: instruction_o.op = LHU;
|
||||
3'b110: instruction_o.op = LWU;
|
||||
3'b011: instruction_o.op = LD;
|
||||
default: illegal_instr = 1'b1;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
`ifdef ENABLE_ATOMICS
|
||||
OPCODE_AMO: begin
|
||||
// we are going to use the load unit for AMOs
|
||||
instruction_o.fu = LOAD;
|
||||
instruction_o.rd = instr.stype.imm0;
|
||||
instruction_o.rs1 = instr.itype.rs1;
|
||||
// words
|
||||
if (instr.stype.funct3 == 3'h2) begin
|
||||
unique case (instr.instr[31:27])
|
||||
5'h0: instruction_o.op = AMO_ADDW;
|
||||
5'h1: instruction_o.op = AMO_SWAPW;
|
||||
5'h2: instruction_o.op = AMO_LRW;
|
||||
5'h3: instruction_o.op = AMO_SCW;
|
||||
5'h4: instruction_o.op = AMO_XORW;
|
||||
5'h8: instruction_o.op = AMO_ORW;
|
||||
5'hC: instruction_o.op = AMO_ANDW;
|
||||
5'h10: instruction_o.op = AMO_MINW;
|
||||
5'h14: instruction_o.op = AMO_MAXW;
|
||||
5'h18: instruction_o.op = AMO_MINWU;
|
||||
5'h1C: instruction_o.op = AMO_MAXWU;
|
||||
default: illegal_instr = 1'b1;
|
||||
endcase
|
||||
// double words
|
||||
end else if (instr.stype.funct3 == 3'h3) begin
|
||||
unique case (instr.instr[31:27])
|
||||
5'h0: instruction_o.op = AMO_ADDD;
|
||||
5'h1: instruction_o.op = AMO_SWAPD;
|
||||
5'h2: instruction_o.op = AMO_LRD;
|
||||
5'h3: instruction_o.op = AMO_SCD;
|
||||
5'h4: instruction_o.op = AMO_XORD;
|
||||
5'h8: instruction_o.op = AMO_ORD;
|
||||
5'hC: instruction_o.op = AMO_ANDD;
|
||||
5'h10: instruction_o.op = AMO_MIND;
|
||||
5'h14: instruction_o.op = AMO_MAXD;
|
||||
5'h18: instruction_o.op = AMO_MINDU;
|
||||
5'h1C: instruction_o.op = AMO_MAXDU;
|
||||
default: illegal_instr = 1'b1;
|
||||
endcase
|
||||
end else begin
|
||||
illegal_instr = 1'b1;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
// --------------------------------
|
||||
// Control Flow Instructions
|
||||
// --------------------------------
|
||||
OPCODE_BRANCH: begin
|
||||
imm_select = SBIMM;
|
||||
instruction_o.fu = CTRL_FLOW;
|
||||
|
|
27
src/div.sv
27
src/div.sv
|
@ -1,27 +0,0 @@
|
|||
// Author:
|
||||
//
|
||||
// Date: 25.07.2017
|
||||
// Description: Ariane Divider
|
||||
//
|
||||
//
|
||||
// Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
// All rights reserved.
|
||||
//
|
||||
// This code is under development and not yet released to the public.
|
||||
// Until it is released, the code is under the copyright of ETH Zurich and
|
||||
// the University of Bologna, and may contain confidential and/or unpublished
|
||||
// work. Any reuse/redistribution is strictly forbidden without written
|
||||
// permission from ETH Zurich.
|
||||
//
|
||||
// Bug fixes and contributions will eventually be released under the
|
||||
// SolderPad open hardware license in the context of the PULP platform
|
||||
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
|
||||
// University of Bologna.
|
||||
//
|
||||
module div (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni // Asynchronous reset active low
|
||||
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -20,7 +20,10 @@
|
|||
import ariane_pkg::*;
|
||||
|
||||
module ex_stage #(
|
||||
parameter int ASID_WIDTH = 1
|
||||
parameter int ASID_WIDTH = 1,
|
||||
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000,
|
||||
parameter int unsigned AXI_ID_WIDTH = 10,
|
||||
parameter int unsigned AXI_USER_WIDTH = 1
|
||||
)(
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
|
@ -95,6 +98,11 @@ module ex_stage #(
|
|||
input logic [43:0] satp_ppn_i,
|
||||
input logic [ASID_WIDTH-1:0] asid_i,
|
||||
|
||||
// Performance counters
|
||||
output logic itlb_miss_o,
|
||||
output logic dtlb_miss_o,
|
||||
output logic dcache_miss_o,
|
||||
|
||||
output logic [63:0] instr_if_address_o,
|
||||
output logic instr_if_data_req_o,
|
||||
output logic [3:0] instr_if_data_be_o,
|
||||
|
@ -102,17 +110,12 @@ module ex_stage #(
|
|||
input logic instr_if_data_rvalid_i,
|
||||
input logic [63:0] instr_if_data_rdata_i,
|
||||
|
||||
output logic [11:0] data_if_address_index_o,
|
||||
output logic [43:0] data_if_address_tag_o,
|
||||
output logic [63:0] data_if_data_wdata_o,
|
||||
output logic data_if_data_req_o,
|
||||
output logic data_if_data_we_o,
|
||||
output logic [7:0] data_if_data_be_o,
|
||||
output logic data_if_kill_req_o,
|
||||
output logic data_if_tag_valid_o,
|
||||
input logic data_if_data_gnt_i,
|
||||
input logic data_if_data_rvalid_i,
|
||||
input logic [63:0] data_if_data_rdata_i
|
||||
// DCache interface
|
||||
input logic dcache_en_i,
|
||||
input logic flush_dcache_i,
|
||||
output logic flush_dcache_ack_o,
|
||||
AXI_BUS.Master data_if,
|
||||
AXI_BUS.Master bypass_if
|
||||
);
|
||||
|
||||
// -----
|
||||
|
@ -134,19 +137,22 @@ module ex_stage #(
|
|||
// ----------------
|
||||
// Multiplication
|
||||
// ----------------
|
||||
`ifdef MULT
|
||||
mult mult_i (
|
||||
mult i_mult (
|
||||
.result_o ( mult_result_o ),
|
||||
.*
|
||||
);
|
||||
`endif
|
||||
|
||||
// ----------------
|
||||
// Load-Store Unit
|
||||
// ----------------
|
||||
lsu lsu_i (
|
||||
lsu #(
|
||||
.CACHE_START_ADDR ( CACHE_START_ADDR ),
|
||||
.AXI_ID_WIDTH ( AXI_ID_WIDTH ),
|
||||
.AXI_USER_WIDTH ( AXI_USER_WIDTH )
|
||||
) lsu_i (
|
||||
.commit_i ( lsu_commit_i ),
|
||||
.commit_ready_o ( lsu_commit_ready_o ),
|
||||
.data_if ( data_if ),
|
||||
.*
|
||||
);
|
||||
|
||||
|
|
|
@ -75,9 +75,12 @@ module fetch_fifo
|
|||
|
||||
always_comb begin : fetch_fifo_logic
|
||||
// counter
|
||||
automatic logic [$clog2(DEPTH)-1:0] status_cnt = status_cnt_q;
|
||||
automatic logic [$clog2(DEPTH)-1:0] write_pointer = write_pointer_q;
|
||||
automatic logic [$clog2(DEPTH)-1:0] read_pointer = read_pointer_q;
|
||||
automatic logic [$clog2(DEPTH)-1:0] status_cnt;
|
||||
automatic logic [$clog2(DEPTH)-1:0] write_pointer;
|
||||
automatic logic [$clog2(DEPTH)-1:0] read_pointer;
|
||||
status_cnt = status_cnt_q;
|
||||
write_pointer = write_pointer_q;
|
||||
read_pointer = read_pointer_q;
|
||||
|
||||
mem_n = mem_q;
|
||||
|
||||
|
|
|
@ -368,7 +368,7 @@ module issue_read_operands (
|
|||
`ifndef SYNTHESIS
|
||||
`ifndef verilator
|
||||
assert property (
|
||||
@(posedge clk_i) (alu_valid_q || lsu_valid_q || csr_valid_q || branch_valid_q || mult_valid_q) |-> (!$isunknown(operand_a_q) && !$isunknown(operand_b_q)))
|
||||
@(posedge clk_i) (branch_valid_q) |-> (!$isunknown(operand_a_q) && !$isunknown(operand_b_q)))
|
||||
else $warning ("Got unknown value in one of the operands");
|
||||
`endif
|
||||
`endif
|
||||
|
|
|
@ -43,10 +43,12 @@ module load_unit (
|
|||
// D$ interface
|
||||
output logic [11:0] address_index_o,
|
||||
output logic [43:0] address_tag_o,
|
||||
output amo_t amo_op_o,
|
||||
output logic [63:0] data_wdata_o,
|
||||
output logic data_req_o,
|
||||
output logic data_we_o,
|
||||
output logic [7:0] data_be_o,
|
||||
output logic [1:0] data_size_o,
|
||||
output logic kill_req_o,
|
||||
output logic tag_valid_o,
|
||||
input logic data_gnt_i,
|
||||
|
@ -91,6 +93,7 @@ module load_unit (
|
|||
kill_req_o = 1'b0;
|
||||
tag_valid_o = 1'b0;
|
||||
data_be_o = lsu_ctrl_i.be;
|
||||
data_size_o = extract_transfer_size(lsu_ctrl_i.operator);
|
||||
pop_ld_o = 1'b0;
|
||||
|
||||
case (CS)
|
||||
|
@ -277,6 +280,41 @@ module load_unit (
|
|||
end
|
||||
end
|
||||
|
||||
// ---------------
|
||||
// AMO Operation
|
||||
// ---------------
|
||||
always_comb begin : amo_op_select
|
||||
amo_op_o = AMO_NONE;
|
||||
|
||||
if (lsu_ctrl_i.valid) begin
|
||||
case (lsu_ctrl_i.operator)
|
||||
AMO_LRW: amo_op_o = AMO_LR;
|
||||
AMO_LRD: amo_op_o = AMO_LR;
|
||||
AMO_SCW: amo_op_o = AMO_SC;
|
||||
AMO_SCD: amo_op_o = AMO_SC;
|
||||
AMO_SWAPW: amo_op_o = AMO_SWAP;
|
||||
AMO_ADDW: amo_op_o = AMO_ADD;
|
||||
AMO_ANDW: amo_op_o = AMO_AND;
|
||||
AMO_ORW: amo_op_o = AMO_OR;
|
||||
AMO_XORW: amo_op_o = AMO_XOR;
|
||||
AMO_MAXW: amo_op_o = AMO_MAX;
|
||||
AMO_MAXWU: amo_op_o = AMO_MAXU;
|
||||
AMO_MINW: amo_op_o = AMO_MIN;
|
||||
AMO_MINWU: amo_op_o = AMO_MINU;
|
||||
AMO_SWAPD: amo_op_o = AMO_SWAP;
|
||||
AMO_ADDD: amo_op_o = AMO_ADD;
|
||||
AMO_ANDD: amo_op_o = AMO_AND;
|
||||
AMO_ORD: amo_op_o = AMO_OR;
|
||||
AMO_XORD: amo_op_o = AMO_XOR;
|
||||
AMO_MAXD: amo_op_o = AMO_MAX;
|
||||
AMO_MAXDU: amo_op_o = AMO_MAXU;
|
||||
AMO_MIND: amo_op_o = AMO_MIN;
|
||||
AMO_MINDU: amo_op_o = AMO_MINU;
|
||||
default: amo_op_o = AMO_NONE;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// ---------------
|
||||
// Sign Extend
|
||||
// ---------------
|
||||
|
|
93
src/lsu.sv
93
src/lsu.sv
|
@ -19,7 +19,10 @@
|
|||
import ariane_pkg::*;
|
||||
|
||||
module lsu #(
|
||||
parameter int ASID_WIDTH = 1
|
||||
parameter int ASID_WIDTH = 1,
|
||||
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000,
|
||||
parameter int unsigned AXI_ID_WIDTH = 10,
|
||||
parameter int unsigned AXI_USER_WIDTH = 1
|
||||
)(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
@ -56,6 +59,10 @@ module lsu #(
|
|||
input logic [43:0] satp_ppn_i, // From CSR register file
|
||||
input logic [ASID_WIDTH-1:0] asid_i, // From CSR register file
|
||||
input logic flush_tlb_i,
|
||||
// Performance counters
|
||||
output logic itlb_miss_o,
|
||||
output logic dtlb_miss_o,
|
||||
output logic dcache_miss_o,
|
||||
// Instruction memory/cache
|
||||
output logic [63:0] instr_if_address_o,
|
||||
output logic instr_if_data_req_o,
|
||||
|
@ -63,18 +70,13 @@ module lsu #(
|
|||
input logic instr_if_data_gnt_i,
|
||||
input logic instr_if_data_rvalid_i,
|
||||
input logic [63:0] instr_if_data_rdata_i,
|
||||
// Data cache
|
||||
output logic [11:0] data_if_address_index_o,
|
||||
output logic [43:0] data_if_address_tag_o,
|
||||
output logic [63:0] data_if_data_wdata_o,
|
||||
output logic data_if_data_req_o,
|
||||
output logic data_if_data_we_o,
|
||||
output logic [7:0] data_if_data_be_o,
|
||||
output logic data_if_kill_req_o,
|
||||
output logic data_if_tag_valid_o,
|
||||
input logic data_if_data_gnt_i,
|
||||
input logic data_if_data_rvalid_i,
|
||||
input logic [63:0] data_if_data_rdata_i,
|
||||
|
||||
input logic dcache_en_i,
|
||||
input logic flush_dcache_i,
|
||||
output logic flush_dcache_ack_o,
|
||||
// Data cache refill port
|
||||
AXI_BUS.Master data_if,
|
||||
AXI_BUS.Master bypass_if,
|
||||
|
||||
output exception_t lsu_exception_o // to WB, signal exception status LD/ST exception
|
||||
|
||||
|
@ -126,38 +128,43 @@ module lsu #(
|
|||
exception_t ld_ex;
|
||||
exception_t st_ex;
|
||||
|
||||
// ---------------
|
||||
// Memory Arbiter
|
||||
// ---------------
|
||||
// ------------
|
||||
// NB Dcache
|
||||
// ------------
|
||||
logic [2:0][11:0] address_index_i;
|
||||
logic [2:0][43:0] address_tag_i;
|
||||
logic [2:0][63:0] data_wdata_i;
|
||||
logic [2:0] data_req_i;
|
||||
logic [2:0] data_we_i;
|
||||
logic [2:0][1:0] data_size_i;
|
||||
|
||||
logic [2:0] kill_req_i;
|
||||
logic [2:0] tag_valid_i;
|
||||
logic [2:0][7:0] data_be_i;
|
||||
logic [2:0] data_gnt_o;
|
||||
logic [2:0] data_rvalid_o;
|
||||
logic [2:0][63:0] data_rdata_o;
|
||||
amo_t [2:0] amo_op_i;
|
||||
|
||||
// AMO operations always go through the load unit
|
||||
assign amo_op_i[0] = AMO_NONE;
|
||||
assign amo_op_i[2] = AMO_NONE;
|
||||
|
||||
// decreasing priority
|
||||
// Port 0: PTW
|
||||
// Port 1: Load Unit
|
||||
// Port 2: Store Unit
|
||||
dcache_arbiter dcache_arbiter_i (
|
||||
nbdcache #(
|
||||
.CACHE_START_ADDR ( CACHE_START_ADDR ),
|
||||
.AXI_ID_WIDTH ( AXI_ID_WIDTH ),
|
||||
.AXI_USER_WIDTH ( AXI_USER_WIDTH )
|
||||
) i_nbdcache (
|
||||
// to D$
|
||||
.address_index_o ( data_if_address_index_o ),
|
||||
.address_tag_o ( data_if_address_tag_o ),
|
||||
.data_wdata_o ( data_if_data_wdata_o ),
|
||||
.data_req_o ( data_if_data_req_o ),
|
||||
.data_we_o ( data_if_data_we_o ),
|
||||
.data_be_o ( data_if_data_be_o ),
|
||||
.kill_req_o ( data_if_kill_req_o ),
|
||||
.tag_valid_o ( data_if_tag_valid_o ),
|
||||
.data_gnt_i ( data_if_data_gnt_i ),
|
||||
.data_rvalid_i ( data_if_data_rvalid_i ),
|
||||
.data_rdata_i ( data_if_data_rdata_i ),
|
||||
.data_if ( data_if ),
|
||||
.bypass_if ( bypass_if ),
|
||||
.enable_i ( dcache_en_i ),
|
||||
.flush_i ( flush_dcache_i ),
|
||||
.flush_ack_o ( flush_dcache_ack_o ),
|
||||
// from PTW, Load Unit and Store Unit
|
||||
.address_index_i ( address_index_i ),
|
||||
.address_tag_i ( address_tag_i ),
|
||||
|
@ -165,11 +172,19 @@ module lsu #(
|
|||
.data_req_i ( data_req_i ),
|
||||
.data_we_i ( data_we_i ),
|
||||
.data_be_i ( data_be_i ),
|
||||
.data_size_i ( data_size_i ),
|
||||
.kill_req_i ( kill_req_i ),
|
||||
.tag_valid_i ( tag_valid_i ),
|
||||
.data_gnt_o ( data_gnt_o ),
|
||||
.data_rvalid_o ( data_rvalid_o ),
|
||||
.data_rdata_o ( data_rdata_o ),
|
||||
.amo_op_i ( amo_op_i ),
|
||||
|
||||
.amo_commit_i ( ),
|
||||
.amo_valid_o ( ),
|
||||
.amo_result_o ( ),
|
||||
.amo_flush_i ( 1'b0 ),
|
||||
.miss_o ( dcache_miss_o ),
|
||||
.*
|
||||
);
|
||||
|
||||
|
@ -180,7 +195,7 @@ module lsu #(
|
|||
.INSTR_TLB_ENTRIES ( 16 ),
|
||||
.DATA_TLB_ENTRIES ( 16 ),
|
||||
.ASID_WIDTH ( ASID_WIDTH )
|
||||
) mmu_i (
|
||||
) i_mmu (
|
||||
// misaligned bypass
|
||||
.misaligned_ex_i ( misaligned_exception ),
|
||||
.lsu_is_store_i ( st_translation_req ),
|
||||
|
@ -197,6 +212,7 @@ module lsu #(
|
|||
.data_req_o ( data_req_i [0] ),
|
||||
.data_we_o ( data_we_i [0] ),
|
||||
.data_be_o ( data_be_i [0] ),
|
||||
.data_size_o ( data_size_i [0] ),
|
||||
.kill_req_o ( kill_req_i [0] ),
|
||||
.tag_valid_o ( tag_valid_i [0] ),
|
||||
.data_gnt_i ( data_gnt_o [0] ),
|
||||
|
@ -207,7 +223,7 @@ module lsu #(
|
|||
// ------------------
|
||||
// Store Unit
|
||||
// ------------------
|
||||
store_unit store_unit_i (
|
||||
store_unit i_store_unit (
|
||||
.valid_i ( st_valid_i ),
|
||||
.lsu_ctrl_i ( lsu_ctrl ),
|
||||
.pop_st_o ( pop_st ),
|
||||
|
@ -232,6 +248,7 @@ module lsu #(
|
|||
.data_req_o ( data_req_i [2] ),
|
||||
.data_we_o ( data_we_i [2] ),
|
||||
.data_be_o ( data_be_i [2] ),
|
||||
.data_size_o ( data_size_i [2] ),
|
||||
.kill_req_o ( kill_req_i [2] ),
|
||||
.tag_valid_o ( tag_valid_i [2] ),
|
||||
.data_gnt_i ( data_gnt_o [2] ),
|
||||
|
@ -242,7 +259,7 @@ module lsu #(
|
|||
// ------------------
|
||||
// Load Unit
|
||||
// ------------------
|
||||
load_unit load_unit_i (
|
||||
load_unit i_load_unit (
|
||||
.valid_i ( ld_valid_i ),
|
||||
.lsu_ctrl_i ( lsu_ctrl ),
|
||||
.pop_ld_o ( pop_ld ),
|
||||
|
@ -264,9 +281,11 @@ module lsu #(
|
|||
.address_index_o ( address_index_i [1] ),
|
||||
.address_tag_o ( address_tag_i [1] ),
|
||||
.data_wdata_o ( data_wdata_i [1] ),
|
||||
.amo_op_o ( amo_op_i [1] ),
|
||||
.data_req_o ( data_req_i [1] ),
|
||||
.data_we_o ( data_we_i [1] ),
|
||||
.data_be_o ( data_be_i [1] ),
|
||||
.data_size_o ( data_size_i [1] ),
|
||||
.kill_req_o ( kill_req_i [1] ),
|
||||
.tag_valid_o ( tag_valid_i [1] ),
|
||||
.data_gnt_i ( data_gnt_o [1] ),
|
||||
|
@ -278,7 +297,7 @@ module lsu #(
|
|||
// ---------------------
|
||||
// Result Sequentialize
|
||||
// ---------------------
|
||||
lsu_arbiter lsu_arbiter_i (
|
||||
lsu_arbiter i_lsu_arbiter (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.flush_i ( flush_i ),
|
||||
|
@ -514,9 +533,13 @@ module lsu_bypass (
|
|||
assign ready_o = empty;
|
||||
|
||||
always_comb begin
|
||||
automatic logic [1:0] status_cnt = status_cnt_q;
|
||||
automatic logic write_pointer = write_pointer_q;
|
||||
automatic logic read_pointer = read_pointer_q;
|
||||
automatic logic [1:0] status_cnt;
|
||||
automatic logic write_pointer;
|
||||
automatic logic read_pointer;
|
||||
|
||||
status_cnt = status_cnt_q;
|
||||
write_pointer = write_pointer_q;
|
||||
read_pointer = read_pointer_q;
|
||||
|
||||
mem_n = mem_q;
|
||||
// we've got a valid LSU request
|
||||
|
|
|
@ -67,8 +67,11 @@ module lsu_arbiter (
|
|||
// Read-Write Process
|
||||
// -------------------
|
||||
always_comb begin : read_write_fifo
|
||||
automatic logic [$clog2(WIDTH)-1:0] status_cnt = status_cnt_q;
|
||||
automatic logic [$clog2(WIDTH)-1:0] write_pointer = write_pointer_q;
|
||||
automatic logic [$clog2(WIDTH)-1:0] status_cnt;
|
||||
automatic logic [$clog2(WIDTH)-1:0] write_pointer;
|
||||
|
||||
status_cnt = status_cnt_q;
|
||||
write_pointer = write_pointer_q;
|
||||
|
||||
// default assignments
|
||||
mem_n = mem_q;
|
||||
|
|
1037
src/miss_handler.sv
Normal file
1037
src/miss_handler.sv
Normal file
File diff suppressed because it is too large
Load diff
|
@ -63,6 +63,9 @@ module mmu #(
|
|||
input logic [43:0] satp_ppn_i,
|
||||
input logic [ASID_WIDTH-1:0] asid_i,
|
||||
input logic flush_tlb_i,
|
||||
// Performance counters
|
||||
output logic itlb_miss_o,
|
||||
output logic dtlb_miss_o,
|
||||
// Memory interfaces
|
||||
// Instruction memory/cache
|
||||
output logic [63:0] instr_if_address_o,
|
||||
|
@ -78,6 +81,7 @@ module mmu #(
|
|||
output logic data_req_o,
|
||||
output logic data_we_o,
|
||||
output logic [7:0] data_be_o,
|
||||
output logic [1:0] data_size_o,
|
||||
output logic kill_req_o,
|
||||
output logic tag_valid_o,
|
||||
input logic data_gnt_i,
|
||||
|
@ -406,7 +410,7 @@ module mmu #(
|
|||
// Registers
|
||||
// ----------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if(~rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
lsu_vaddr_q <= '0;
|
||||
lsu_req_q <= '0;
|
||||
misaligned_ex_q <= '0;
|
||||
|
|
612
src/mult.sv
612
src/mult.sv
|
@ -22,8 +22,7 @@
|
|||
|
||||
import ariane_pkg::*;
|
||||
|
||||
module mult
|
||||
(
|
||||
module mult (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic [TRANS_ID_BITS-1:0] trans_id_i,
|
||||
|
@ -36,116 +35,525 @@ module mult
|
|||
output logic mult_ready_o,
|
||||
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
|
||||
);
|
||||
logic mul_valid;
|
||||
logic div_valid;
|
||||
logic div_ready_i; // receiver of division result is able to accept the result
|
||||
logic [TRANS_ID_BITS-1:0] mul_trans_id;
|
||||
logic [TRANS_ID_BITS-1:0] div_trans_id;
|
||||
logic [63:0] mul_result;
|
||||
logic [63:0] div_result;
|
||||
|
||||
// ----------------
|
||||
// Mock Multiplier
|
||||
// ----------------
|
||||
function automatic logic [63:0] sign_extend (logic [31:0] operand);
|
||||
return {{32{operand[31]}}, operand[31:0]};
|
||||
endfunction
|
||||
logic div_valid_op;
|
||||
logic mul_valid_op;
|
||||
// Input Arbitration
|
||||
assign mul_valid_op = mult_valid_i && (operator_i inside { MUL, MULH, MULHU, MULHSU, MULW });
|
||||
assign div_valid_op = mult_valid_i && (operator_i inside { DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW });
|
||||
|
||||
assign mult_valid_o = mult_valid_i;
|
||||
assign mult_trans_id_o = trans_id_i;
|
||||
assign mult_ready_o = 1'b1;
|
||||
// ---------------------
|
||||
// Output Arbitration
|
||||
// ---------------------
|
||||
// we give precedence to multiplication as the divider supports stalling and the multiplier is
|
||||
// just a dumb pipelined multiplier
|
||||
assign div_ready_i = (mul_valid) ? 1'b0 : 1'b1;
|
||||
assign mult_trans_id_o = (mul_valid) ? mul_trans_id : div_trans_id;
|
||||
assign result_o = (mul_valid) ? mul_result : div_result;
|
||||
assign mult_valid_o = div_valid | mul_valid;
|
||||
// mult_ready_o = division as the multiplication will unconditionally be ready to accept new requests
|
||||
|
||||
// sign extend operand a and b
|
||||
logic sign_a, sign_b;
|
||||
// ---------------------
|
||||
// Multiplication
|
||||
// ---------------------
|
||||
mul i_mul (
|
||||
.result_o ( mul_result ),
|
||||
.mult_valid_i ( mul_valid_op ),
|
||||
.mult_valid_o ( mul_valid ),
|
||||
.mult_trans_id_o ( mul_trans_id ),
|
||||
.mult_ready_o ( ), // this unit is unconditionally ready
|
||||
.*
|
||||
);
|
||||
|
||||
// ---------------------
|
||||
// Division
|
||||
// ---------------------
|
||||
logic [5:0] ff1_result; // holds the index of the last '1' (as the input operand is reversed)
|
||||
logic ff1_no_one; // no one was found by find first one
|
||||
logic [63:0] ff1_input; // input to find first one
|
||||
logic [63:0] operand_b_rev, operand_b_rev_neg, operand_b_shift; // couple of different representations for the dividend
|
||||
logic [6:0] div_shift; // amount of which to shift to left
|
||||
logic div_signed; // should this operation be performed as a signed or unsigned division
|
||||
logic div_op_signed; // actual sign signal depends on div_signed and the MSB of the word
|
||||
logic [63:0] operand_b, operand_a; // input operands after input MUX (input silencing, word operations or full inputs)
|
||||
logic [63:0] result; // result before result mux
|
||||
|
||||
logic word_op; // is it a word operation
|
||||
logic rem; // is it a reminder (or not a reminder e.g.: a division)
|
||||
logic word_op_d, word_op_q; // save whether the operation was signed or not
|
||||
|
||||
// is this a signed operation?
|
||||
assign div_signed = (operator_i inside {DIV, DIVW, REM, REMW}) ? 1'b1 : 1'b0;
|
||||
// if this operation is signed look at the actual sign bit to determine whether we should perform signed or unsigned division
|
||||
assign div_op_signed = div_signed & operand_b[63];
|
||||
|
||||
// reverse input operands
|
||||
generate
|
||||
for (genvar k = 0; k < 64; k++)
|
||||
assign operand_b_rev[k] = operand_b[63-k];
|
||||
endgenerate
|
||||
// negated reverse input operand, used for signed divisions
|
||||
assign operand_b_rev_neg = ~operand_b_rev;
|
||||
assign ff1_input = (div_op_signed) ? operand_b_rev_neg : operand_b_rev;
|
||||
|
||||
// prepare the input operands and control divider
|
||||
always_comb begin
|
||||
// silence the inputs
|
||||
operand_a = '0;
|
||||
operand_b = '0;
|
||||
// control signals
|
||||
word_op_d = word_op_q;
|
||||
word_op = 1'b0;
|
||||
rem = 1'b0;
|
||||
|
||||
// we've go a new division operation
|
||||
if (mult_valid_i && operator_i inside {DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW}) begin
|
||||
// is this a word operation?
|
||||
if (operator_i inside {DIVW, DIVUW, REMW, REMUW}) begin
|
||||
word_op = 1'b1;
|
||||
// yes so check if we should sign extend this is only done for a signed operation
|
||||
if (div_signed) begin
|
||||
operand_a = sext32(operand_a_i[31:0]);
|
||||
operand_b = sext32(operand_b_i[31:0]);
|
||||
end else begin
|
||||
operand_a = {32'b0, operand_a_i[31:0]};
|
||||
operand_b = {32'b0, operand_b_i[31:0]};
|
||||
end
|
||||
|
||||
// save whether we want sign extend the result or not, this is done for all word operations
|
||||
word_op_d = 1'b1;
|
||||
// regular operation
|
||||
end else begin
|
||||
// no sign extending is necessary as we are already using the full 64 bit
|
||||
operand_a = operand_a_i;
|
||||
operand_b = operand_b_i;
|
||||
end
|
||||
|
||||
// is this a modulo?
|
||||
if (operator_i inside {REM, REMU, REMW, REMUW}) begin
|
||||
rem = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ---------------------
|
||||
// Find First one
|
||||
// ---------------------
|
||||
// this unit is used to speed up the sequential division by shifting the dividend first
|
||||
alu_ff #(
|
||||
.LEN ( 64 )
|
||||
) i_ff1 (
|
||||
.in_i ( ff1_input ), // signed = operand_b_rev_neg, unsigned operand_b_rev
|
||||
.first_one_o ( ff1_result ),
|
||||
.no_ones_o ( ff1_no_one )
|
||||
);
|
||||
|
||||
// if the dividend is all zero go for the full length
|
||||
assign div_shift = ff1_no_one ? 7'd64 : ff1_result;
|
||||
// prepare dividend by shifting
|
||||
assign operand_b_shift = operand_b <<< div_shift;
|
||||
|
||||
// ---------------------
|
||||
// Serial Divider
|
||||
// ---------------------
|
||||
serial_divider #(
|
||||
.C_WIDTH ( 64 ),
|
||||
.C_LOG_WIDTH ( $clog2(64) + 1 )
|
||||
) i_div (
|
||||
.Clk_CI ( clk_i ),
|
||||
.Rst_RBI ( rst_ni ),
|
||||
.TransId_DI ( trans_id_i ),
|
||||
.OpA_DI ( operand_a ),
|
||||
.OpB_DI ( operand_b_shift ),
|
||||
.OpBShift_DI ( div_shift ),
|
||||
.OpBIsZero_SI ( ~(|operand_b) ),
|
||||
.OpBSign_SI ( div_op_signed ), // gate this to 0 in case of unsigned ops
|
||||
.OpCode_SI ( {rem, div_signed} ), // 00: udiv, 10: urem, 01: div, 11: rem
|
||||
.InVld_SI ( div_valid_op ),
|
||||
.OutRdy_SO ( mult_ready_o ),
|
||||
.OutRdy_SI ( div_ready_i ),
|
||||
.OutVld_SO ( div_valid ),
|
||||
.TransId_DO ( div_trans_id ),
|
||||
.Res_DO ( result )
|
||||
);
|
||||
// Result multiplexer
|
||||
// if it was a signed word operation the bit will be set and the result will be sign extended accordingly
|
||||
assign div_result = (word_op_q) ? sext32(result) : result;
|
||||
|
||||
// ---------------------
|
||||
// Registers
|
||||
// ---------------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if(~rst_ni) begin
|
||||
word_op_q <= ADD;
|
||||
end else begin
|
||||
word_op_q <= word_op_d;
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
||||
/* File : mult.sv
|
||||
* Ver : 1.0
|
||||
* Date : 15.03.2016
|
||||
*
|
||||
*
|
||||
* Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
*
|
||||
* Description: this is a simple serial divider for signed integers.
|
||||
*
|
||||
*
|
||||
* Authors : Michael Schaffner (schaffner@iis.ee.ethz.ch)
|
||||
* Andreas Traber (atraber@iis.ee.ethz.ch)
|
||||
*
|
||||
*/
|
||||
module serial_divider #(
|
||||
parameter int unsigned C_WIDTH = 32,
|
||||
parameter int unsigned C_LOG_WIDTH = 6
|
||||
)(
|
||||
input logic Clk_CI,
|
||||
input logic Rst_RBI,
|
||||
// input IF
|
||||
input logic [TRANS_ID_BITS-1:0] TransId_DI,
|
||||
input logic [C_WIDTH-1:0] OpA_DI,
|
||||
input logic [C_WIDTH-1:0] OpB_DI,
|
||||
input logic [C_LOG_WIDTH-1:0] OpBShift_DI,
|
||||
input logic OpBIsZero_SI,
|
||||
//
|
||||
input logic OpBSign_SI, // gate this to 0 in case of unsigned ops
|
||||
input logic [1:0] OpCode_SI, // 0: udiv, 2: urem, 1: div, 3: rem
|
||||
// handshake
|
||||
input logic InVld_SI,
|
||||
// output IF
|
||||
output logic OutRdy_SO,
|
||||
input logic OutRdy_SI,
|
||||
output logic OutVld_SO,
|
||||
output logic [TRANS_ID_BITS-1:0] TransId_DO,
|
||||
output logic [C_WIDTH-1:0] Res_DO
|
||||
);
|
||||
|
||||
// ----------------------------------
|
||||
// Signal Declarations
|
||||
// ----------------------------------
|
||||
logic [C_WIDTH-1:0] ResReg_DP, ResReg_DN;
|
||||
logic [C_WIDTH-1:0] ResReg_DP_rev;
|
||||
logic [C_WIDTH-1:0] AReg_DP, AReg_DN;
|
||||
logic [C_WIDTH-1:0] BReg_DP, BReg_DN;
|
||||
logic OpBIsZero_SP, OpBIsZero_SN;
|
||||
|
||||
logic [TRANS_ID_BITS-1:0] TransId_DP, TransId_DN;
|
||||
|
||||
logic RemSel_SN, RemSel_SP;
|
||||
logic CompInv_SN, CompInv_SP;
|
||||
logic ResInv_SN, ResInv_SP;
|
||||
|
||||
logic [C_WIDTH-1:0] AddMux_D;
|
||||
logic [C_WIDTH-1:0] AddOut_D;
|
||||
logic [C_WIDTH-1:0] AddTmp_D;
|
||||
logic [C_WIDTH-1:0] BMux_D;
|
||||
logic [C_WIDTH-1:0] OutMux_D;
|
||||
|
||||
logic [C_LOG_WIDTH-1:0] Cnt_DP, Cnt_DN;
|
||||
logic CntZero_S;
|
||||
|
||||
logic ARegEn_S, BRegEn_S, ResRegEn_S, ABComp_S, PmSel_S, LoadEn_S;
|
||||
|
||||
enum logic [1:0] {IDLE, DIVIDE, FINISH} State_SN, State_SP;
|
||||
|
||||
|
||||
// -----------------
|
||||
// Datapath
|
||||
// -----------------
|
||||
assign PmSel_S = LoadEn_S & ~(OpCode_SI[0] & (OpA_DI[$high(OpA_DI)] ^ OpBSign_SI));
|
||||
|
||||
// muxes
|
||||
assign AddMux_D = (LoadEn_S) ? OpA_DI : BReg_DP;
|
||||
|
||||
// attention: logical shift in case of negative operand B!
|
||||
assign BMux_D = (LoadEn_S) ? OpB_DI : {CompInv_SP, (BReg_DP[$high(BReg_DP):1])};
|
||||
|
||||
assign ResReg_DP_rev = {<<{ResReg_DP}};
|
||||
assign OutMux_D = (RemSel_SP) ? AReg_DP : ResReg_DP_rev;
|
||||
|
||||
// invert if necessary
|
||||
assign Res_DO = (ResInv_SP) ? -$signed(OutMux_D) : OutMux_D;
|
||||
|
||||
// main comparator
|
||||
assign ABComp_S = ((AReg_DP == BReg_DP) | ((AReg_DP > BReg_DP) ^ CompInv_SP)) & ((|AReg_DP) | OpBIsZero_SP);
|
||||
|
||||
// main adder
|
||||
assign AddTmp_D = (LoadEn_S) ? 0 : AReg_DP;
|
||||
assign AddOut_D = (PmSel_S) ? AddTmp_D + AddMux_D : AddTmp_D - $signed(AddMux_D);
|
||||
|
||||
// -----------------
|
||||
// Counter
|
||||
// -----------------
|
||||
assign Cnt_DN = (LoadEn_S) ? OpBShift_DI :
|
||||
(~CntZero_S) ? Cnt_DP - 1 : Cnt_DP;
|
||||
|
||||
assign CntZero_S = ~(|Cnt_DP);
|
||||
|
||||
// -----------------
|
||||
// FSM
|
||||
// -----------------
|
||||
always_comb begin : p_fsm
|
||||
// default
|
||||
State_SN = State_SP;
|
||||
|
||||
OutVld_SO = 1'b0;
|
||||
OutRdy_SO = 1'b0;
|
||||
|
||||
LoadEn_S = 1'b0;
|
||||
|
||||
ARegEn_S = 1'b0;
|
||||
BRegEn_S = 1'b0;
|
||||
ResRegEn_S = 1'b0;
|
||||
|
||||
case (State_SP)
|
||||
|
||||
IDLE: begin
|
||||
OutRdy_SO = 1'b1;
|
||||
if(InVld_SI) begin
|
||||
OutRdy_SO = 1'b0;
|
||||
OutVld_SO = 1'b0;
|
||||
ARegEn_S = 1'b1;
|
||||
BRegEn_S = 1'b1;
|
||||
LoadEn_S = 1'b1;
|
||||
State_SN = DIVIDE;
|
||||
end
|
||||
end
|
||||
|
||||
DIVIDE: begin
|
||||
|
||||
ARegEn_S = ABComp_S;
|
||||
BRegEn_S = 1'b1;
|
||||
ResRegEn_S = 1'b1;
|
||||
|
||||
// calculation finished
|
||||
// one more divide cycle (C_WIDTH th divide cycle)
|
||||
if (CntZero_S) begin
|
||||
State_SN = FINISH;
|
||||
end
|
||||
end
|
||||
|
||||
FINISH: begin
|
||||
OutVld_SO = 1'b1;
|
||||
|
||||
if(OutRdy_SI) begin
|
||||
State_SN = IDLE;
|
||||
end
|
||||
end
|
||||
|
||||
default : /* default */ ;
|
||||
|
||||
endcase
|
||||
end
|
||||
|
||||
// -----------------
|
||||
// Registers
|
||||
// -----------------
|
||||
// get flags
|
||||
assign RemSel_SN = (LoadEn_S) ? OpCode_SI[1] : RemSel_SP;
|
||||
assign CompInv_SN = (LoadEn_S) ? OpBSign_SI : CompInv_SP;
|
||||
assign OpBIsZero_SN = (LoadEn_S) ? OpBIsZero_SI : OpBIsZero_SP;
|
||||
assign ResInv_SN = (LoadEn_S) ? (~OpBIsZero_SI | OpCode_SI[1]) & OpCode_SI[0] & (OpA_DI[$high(OpA_DI)] ^ OpBSign_SI) : ResInv_SP;
|
||||
|
||||
// transaction id
|
||||
assign TransId_DN = (LoadEn_S) ? TransId_DI : TransId_DP;
|
||||
assign TransId_DO = TransId_DP;
|
||||
|
||||
assign AReg_DN = (ARegEn_S) ? AddOut_D : AReg_DP;
|
||||
assign BReg_DN = (BRegEn_S) ? BMux_D : BReg_DP;
|
||||
assign ResReg_DN = (LoadEn_S) ? '0 :
|
||||
(ResRegEn_S) ? {ABComp_S, ResReg_DP[$high(ResReg_DP):1]} : ResReg_DP;
|
||||
|
||||
always_ff @(posedge Clk_CI or negedge Rst_RBI) begin : p_regs
|
||||
if (~Rst_RBI) begin
|
||||
State_SP <= IDLE;
|
||||
AReg_DP <= '0;
|
||||
BReg_DP <= '0;
|
||||
ResReg_DP <= '0;
|
||||
Cnt_DP <= '0;
|
||||
TransId_DP <= '0;
|
||||
RemSel_SP <= 1'b0;
|
||||
CompInv_SP <= 1'b0;
|
||||
ResInv_SP <= 1'b0;
|
||||
OpBIsZero_SP <= 1'b0;
|
||||
end else begin
|
||||
State_SP <= State_SN;
|
||||
AReg_DP <= AReg_DN;
|
||||
BReg_DP <= BReg_DN;
|
||||
ResReg_DP <= ResReg_DN;
|
||||
Cnt_DP <= Cnt_DN;
|
||||
TransId_DP <= TransId_DN;
|
||||
RemSel_SP <= RemSel_SN;
|
||||
CompInv_SP <= CompInv_SN;
|
||||
ResInv_SP <= ResInv_SN;
|
||||
OpBIsZero_SP <= OpBIsZero_SN;
|
||||
end
|
||||
end
|
||||
|
||||
// ------------
|
||||
// Assertions
|
||||
// ------------
|
||||
`ifndef SYNTHESIS
|
||||
initial begin : p_assertions
|
||||
assert (C_LOG_WIDTH == $clog2(C_WIDTH+1)) else $error("C_LOG_WIDTH must be $clog2(C_WIDTH+1)");
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
||||
// --------------------------------------------------
|
||||
// Multiplication Unit with one pipeline register
|
||||
// --------------------------------------------------
|
||||
module mul (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic [TRANS_ID_BITS-1:0] trans_id_i,
|
||||
input logic mult_valid_i,
|
||||
input fu_op operator_i,
|
||||
input logic [63:0] operand_a_i,
|
||||
input logic [63:0] operand_b_i,
|
||||
output logic [63:0] result_o,
|
||||
output logic mult_valid_o,
|
||||
output logic mult_ready_o,
|
||||
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
|
||||
|
||||
);
|
||||
// Pipeline register
|
||||
logic [TRANS_ID_BITS-1:0] trans_id_q;
|
||||
logic mult_valid_q;
|
||||
logic [63:0] result_q;
|
||||
// control registers
|
||||
logic sign_a, sign_b;
|
||||
logic mult_valid;
|
||||
|
||||
// control signals
|
||||
assign mult_valid_o = mult_valid_q;
|
||||
assign result_o = result_q;
|
||||
assign mult_trans_id_o = trans_id_q;
|
||||
assign mult_ready_o = 1'b1;
|
||||
|
||||
assign mult_valid = mult_valid_i && (operator_i inside {MUL, MULH, MULHU, MULHSU, MULW});
|
||||
// datapath
|
||||
logic [127:0] mult_result;
|
||||
logic [63:0] mult_result_w;
|
||||
|
||||
assign mult_result = $signed({operand_a_i[63] & sign_a, operand_a_i}) * $signed({operand_b_i[63] & sign_b, operand_b_i});
|
||||
assign mult_result_w = $signed({operand_a_i[31] & sign_a, operand_a_i[31:0]}) * $signed({operand_b_i[31] & sign_b, operand_b_i[31:0]});
|
||||
|
||||
always_comb begin : mul_div
|
||||
|
||||
// perform multiplication
|
||||
|
||||
result_o = '0;
|
||||
// Sign Select MUX
|
||||
always_comb begin
|
||||
sign_a = 1'b0;
|
||||
sign_b = 1'b0;
|
||||
|
||||
case (operator_i)
|
||||
// MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register
|
||||
MUL:
|
||||
result_o = mult_result[63:0];
|
||||
// signed multiplication
|
||||
if (operator_i == MULH) begin
|
||||
sign_a = 1'b1;
|
||||
sign_b = 1'b1;
|
||||
// signed - unsigned multiplication
|
||||
end else if (operator_i == MULHSU) begin
|
||||
sign_a = 1'b1;
|
||||
// unsigned multiplication
|
||||
end else begin
|
||||
sign_a = 1'b0;
|
||||
sign_b = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
MULH: begin
|
||||
sign_a = 1'b1;
|
||||
sign_b = 1'b1;
|
||||
result_o = mult_result[127:64];
|
||||
end
|
||||
// -----------------------
|
||||
// Output pipeline register
|
||||
// -----------------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
mult_valid_q <= '0;
|
||||
trans_id_q <= '0;
|
||||
result_q <= '0;
|
||||
end else begin
|
||||
// Input silencing
|
||||
trans_id_q <= trans_id_i;
|
||||
// Output Register
|
||||
mult_valid_q <= mult_valid;
|
||||
|
||||
MULHU:
|
||||
result_o = mult_result[127:64];
|
||||
|
||||
MULHSU: begin
|
||||
sign_a = 1'b1;
|
||||
result_o = mult_result[127:64];
|
||||
end
|
||||
|
||||
MULW:
|
||||
result_o = sign_extend(mult_result_w[31:0]);
|
||||
|
||||
// Divisions
|
||||
DIV: begin
|
||||
result_o = $signed(operand_a_i) / $signed(operand_b_i);
|
||||
// division by zero
|
||||
// set all bits
|
||||
if (operand_b_i == '0)
|
||||
result_o = -1;
|
||||
end
|
||||
|
||||
DIVU: begin
|
||||
result_o = operand_a_i / operand_b_i;
|
||||
// division by zero
|
||||
// set all bits
|
||||
if (operand_b_i == '0)
|
||||
result_o = -1;
|
||||
end
|
||||
|
||||
DIVW: begin
|
||||
result_o = sign_extend($signed(operand_a_i[31:0]) / $signed(operand_b_i[31:0]));
|
||||
// division by zero
|
||||
// set all bits
|
||||
if (operand_b_i == '0)
|
||||
result_o = -1;
|
||||
end
|
||||
|
||||
DIVUW: begin
|
||||
result_o = sign_extend(operand_a_i[31:0] / operand_b_i[31:0]);
|
||||
// division by zero
|
||||
// set all bits
|
||||
if (operand_b_i == '0)
|
||||
result_o = -1;
|
||||
end
|
||||
|
||||
REM: begin
|
||||
result_o = $signed(operand_a_i) % $signed(operand_b_i);
|
||||
// division by zero
|
||||
if (operand_b_i == '0)
|
||||
result_o = operand_a_i;
|
||||
end
|
||||
|
||||
REMU: begin
|
||||
result_o = operand_a_i % operand_b_i;
|
||||
// division by zero
|
||||
if (operand_b_i == '0)
|
||||
result_o = operand_a_i;
|
||||
end
|
||||
|
||||
REMW: begin
|
||||
result_o = sign_extend($signed(operand_a_i[31:0]) % $signed(operand_b_i[31:0]));
|
||||
// division by zero
|
||||
if (operand_b_i == '0)
|
||||
result_o = operand_a_i;
|
||||
end
|
||||
|
||||
REMUW: begin
|
||||
result_o = sign_extend(operand_a_i[31:0] % operand_b_i[31:0]);
|
||||
// division by zero
|
||||
if (operand_b_i == '0)
|
||||
result_o = operand_a_i;
|
||||
end
|
||||
endcase
|
||||
case (operator_i)
|
||||
// MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register
|
||||
MUL: result_q <= mult_result[63:0];
|
||||
MULH: result_q <= mult_result[127:64];
|
||||
MULHU: result_q <= mult_result[127:64];
|
||||
MULHSU: result_q <= mult_result[127:64];
|
||||
MULW: result_q <= sext32(mult_result[31:0]);
|
||||
endcase
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
||||
// -----------------
|
||||
// Find First One
|
||||
// -----------------
|
||||
module alu_ff #(
|
||||
parameter int unsigned LEN = 32
|
||||
)(
|
||||
input logic [LEN-1:0] in_i,
|
||||
output logic [$clog2(LEN)-1:0] first_one_o,
|
||||
output logic no_ones_o
|
||||
);
|
||||
|
||||
localparam int unsigned NUM_LEVELS = $clog2(LEN);
|
||||
|
||||
logic [LEN-1:0] [NUM_LEVELS-1:0] index_lut;
|
||||
logic [2**NUM_LEVELS-1:0] sel_nodes;
|
||||
logic [2**NUM_LEVELS-1:0] [NUM_LEVELS-1:0] index_nodes;
|
||||
|
||||
// ----------------------------
|
||||
// Generate Tree Structure
|
||||
// ----------------------------
|
||||
generate
|
||||
for (genvar j = 0; j < LEN; j++) begin
|
||||
assign index_lut[j] = $unsigned(j);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
generate
|
||||
for (genvar level = 0; level < NUM_LEVELS; level++) begin
|
||||
|
||||
if (level < NUM_LEVELS-1) begin
|
||||
for (genvar l = 0; l < 2**level; l++) begin
|
||||
assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
|
||||
assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ?
|
||||
index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1];
|
||||
end
|
||||
end
|
||||
|
||||
if (level == NUM_LEVELS-1) begin
|
||||
for (genvar k = 0; k < 2**level; k++) begin
|
||||
// if two successive indices are still in the vector...
|
||||
if (k * 2 < LEN) begin
|
||||
assign sel_nodes[2**level-1+k] = in_i[k*2] | in_i[k*2+1];
|
||||
assign index_nodes[2**level-1+k] = (in_i[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
|
||||
end
|
||||
// if only the first index is still in the vector...
|
||||
if (k * 2 == LEN) begin
|
||||
assign sel_nodes[2**level-1+k] = in_i[k*2];
|
||||
assign index_nodes[2**level-1+k] = index_lut[k*2];
|
||||
end
|
||||
// if index is out of range
|
||||
if (k * 2 > LEN) begin
|
||||
assign sel_nodes[2**level-1+k] = 1'b0;
|
||||
assign index_nodes[2**level-1+k] = '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// --------------------
|
||||
// Connect Output
|
||||
// --------------------
|
||||
assign first_one_o = index_nodes[0];
|
||||
assign no_ones_o = ~sel_nodes[0];
|
||||
|
||||
endmodule
|
||||
|
|
377
src/nbdcache.sv
Normal file
377
src/nbdcache.sv
Normal file
|
@ -0,0 +1,377 @@
|
|||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 13.10.2017
|
||||
// Description: Nonblocking private L1 dcache
|
||||
//
|
||||
// Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
// All rights reserved.
|
||||
//
|
||||
// This code is under development and not yet released to the public.
|
||||
// Until it is released, the code is under the copyright of ETH Zurich and
|
||||
// the University of Bologna, and may contain confidential and/or unpublished
|
||||
// work. Any reuse/redistribution is strictly forbidden without written
|
||||
// permission from ETH Zurich.
|
||||
//
|
||||
// Bug fixes and contributions will eventually be released under the
|
||||
// SolderPad open hardware license in the context of the PULP platform
|
||||
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
|
||||
// University of Bologna.
|
||||
|
||||
import ariane_pkg::*;
|
||||
import nbdcache_pkg::*;
|
||||
|
||||
module nbdcache #(
|
||||
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000,
|
||||
parameter int unsigned AXI_ID_WIDTH = 10,
|
||||
parameter int unsigned AXI_USER_WIDTH = 1
|
||||
)(
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
// Cache management
|
||||
input logic enable_i, // from CSR
|
||||
input logic flush_i, // high until acknowledged
|
||||
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
|
||||
output logic miss_o, // we missed on a ld/st
|
||||
// Cache AXI refill port
|
||||
AXI_BUS.Master data_if,
|
||||
AXI_BUS.Master bypass_if,
|
||||
// AMO interface
|
||||
input logic amo_commit_i, // commit atomic memory operation
|
||||
output logic amo_valid_o, // we have a valid AMO result
|
||||
output logic [63:0] amo_result_o, // result of atomic memory operation
|
||||
input logic amo_flush_i, // forget about AMO
|
||||
// Request ports
|
||||
input logic [2:0][INDEX_WIDTH-1:0] address_index_i,
|
||||
input logic [2:0][TAG_WIDTH-1:0] address_tag_i,
|
||||
input logic [2:0][63:0] data_wdata_i,
|
||||
input logic [2:0] data_req_i,
|
||||
input logic [2:0] data_we_i,
|
||||
input logic [2:0][7:0] data_be_i,
|
||||
input logic [2:0][1:0] data_size_i,
|
||||
input logic [2:0] kill_req_i,
|
||||
input logic [2:0] tag_valid_i,
|
||||
output logic [2:0] data_gnt_o,
|
||||
output logic [2:0] data_rvalid_o,
|
||||
output logic [2:0][63:0] data_rdata_o,
|
||||
input amo_t [2:0] amo_op_i
|
||||
);
|
||||
|
||||
// -------------------------------
|
||||
// Controller <-> Arbiter
|
||||
// -------------------------------
|
||||
// 1. Miss handler
|
||||
// 2. PTW
|
||||
// 3. Load Unit
|
||||
// 4. Store unit
|
||||
logic [3:0][SET_ASSOCIATIVITY-1:0] req;
|
||||
logic [3:0][INDEX_WIDTH-1:0] addr;
|
||||
logic [3:0] gnt;
|
||||
cache_line_t [SET_ASSOCIATIVITY-1:0] rdata;
|
||||
logic [3:0][TAG_WIDTH-1:0] tag;
|
||||
|
||||
cache_line_t [3:0] wdata;
|
||||
logic [3:0] we;
|
||||
cl_be_t [3:0] be;
|
||||
logic [SET_ASSOCIATIVITY-1:0] hit_way;
|
||||
// -------------------------------
|
||||
// Controller <-> Miss unit
|
||||
// -------------------------------
|
||||
logic [2:0] busy;
|
||||
logic [2:0][55:0] mshr_addr;
|
||||
logic [2:0] mshr_addr_matches;
|
||||
logic [63:0] critical_word;
|
||||
logic critical_word_valid;
|
||||
|
||||
logic [2:0][$bits(miss_req_t)-1:0] miss_req;
|
||||
logic [2:0] miss_gnt;
|
||||
logic [2:0] active_serving;
|
||||
|
||||
logic [2:0] bypass_gnt;
|
||||
logic [2:0] bypass_valid;
|
||||
logic [2:0][63:0] bypass_data;
|
||||
// -------------------------------
|
||||
// Arbiter <-> Datram,
|
||||
// -------------------------------
|
||||
logic [SET_ASSOCIATIVITY-1:0] req_ram;
|
||||
logic [INDEX_WIDTH-1:0] addr_ram;
|
||||
logic we_ram;
|
||||
cache_line_t wdata_ram;
|
||||
cache_line_t [SET_ASSOCIATIVITY-1:0] rdata_ram;
|
||||
cl_be_t be_ram;
|
||||
|
||||
// ------------------
|
||||
// Cache Controller
|
||||
// ------------------
|
||||
generate
|
||||
for (genvar i = 0; i < 3; i++) begin : master_ports
|
||||
cache_ctrl #(
|
||||
.SET_ASSOCIATIVITY ( SET_ASSOCIATIVITY ),
|
||||
.INDEX_WIDTH ( INDEX_WIDTH ),
|
||||
.TAG_WIDTH ( TAG_WIDTH ),
|
||||
.CACHE_LINE_WIDTH ( CACHE_LINE_WIDTH ),
|
||||
.CACHE_START_ADDR ( CACHE_START_ADDR )
|
||||
) i_cache_ctrl (
|
||||
.bypass_i ( ~enable_i ),
|
||||
.busy_o ( busy [i] ),
|
||||
.address_index_i ( address_index_i [i] ),
|
||||
.address_tag_i ( address_tag_i [i] ),
|
||||
.data_wdata_i ( data_wdata_i [i] ),
|
||||
.data_req_i ( data_req_i [i] ),
|
||||
.data_we_i ( data_we_i [i] ),
|
||||
.data_be_i ( data_be_i [i] ),
|
||||
.data_size_i ( data_size_i [i] ),
|
||||
.kill_req_i ( kill_req_i [i] ),
|
||||
.tag_valid_i ( tag_valid_i [i] ),
|
||||
.data_gnt_o ( data_gnt_o [i] ),
|
||||
.data_rvalid_o ( data_rvalid_o [i] ),
|
||||
.data_rdata_o ( data_rdata_o [i] ),
|
||||
.amo_op_i ( amo_op_i [i] ),
|
||||
|
||||
.req_o ( req [i+1] ),
|
||||
.addr_o ( addr [i+1] ),
|
||||
.gnt_i ( gnt [i+1] ),
|
||||
.data_i ( rdata ),
|
||||
.tag_o ( tag [i+1] ),
|
||||
.data_o ( wdata [i+1] ),
|
||||
.we_o ( we [i+1] ),
|
||||
.be_o ( be [i+1] ),
|
||||
.hit_way_i ( hit_way ),
|
||||
|
||||
.miss_req_o ( miss_req [i] ),
|
||||
.miss_gnt_i ( miss_gnt [i] ),
|
||||
.active_serving_i ( active_serving [i] ),
|
||||
.critical_word_i ( critical_word ),
|
||||
.critical_word_valid_i ( critical_word_valid ),
|
||||
.bypass_gnt_i ( bypass_gnt [i] ),
|
||||
.bypass_valid_i ( bypass_valid [i] ),
|
||||
.bypass_data_i ( bypass_data [i] ),
|
||||
|
||||
.mshr_addr_o ( mshr_addr [i] ), // TODO
|
||||
.mshr_addr_matches_i ( mshr_addr_matches [i] ), // TODO
|
||||
.*
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// ------------------
|
||||
// Miss Handling Unit
|
||||
// ------------------
|
||||
miss_handler #(
|
||||
.NR_PORTS ( 3 )
|
||||
) i_miss_handler (
|
||||
.busy_i ( |busy ),
|
||||
.miss_req_i ( miss_req ),
|
||||
.miss_gnt_o ( miss_gnt ),
|
||||
.bypass_gnt_o ( bypass_gnt ),
|
||||
.bypass_valid_o ( bypass_valid ),
|
||||
.bypass_data_o ( bypass_data ),
|
||||
.critical_word_o ( critical_word ),
|
||||
.critical_word_valid_o ( critical_word_valid ),
|
||||
.mshr_addr_i ( mshr_addr ),
|
||||
.mshr_addr_matches_o ( mshr_addr_matches ),
|
||||
.active_serving_o ( active_serving ),
|
||||
.req_o ( req [0] ),
|
||||
.addr_o ( addr [0] ),
|
||||
.gnt_i ( gnt [0] ),
|
||||
.data_i ( rdata ),
|
||||
.be_o ( be [0] ),
|
||||
.data_o ( wdata [0] ),
|
||||
.we_o ( we [0] ),
|
||||
.*
|
||||
);
|
||||
|
||||
assign tag[0] = '0;
|
||||
|
||||
// --------------
|
||||
// Memory Arrays
|
||||
// --------------
|
||||
generate
|
||||
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin : sram_block
|
||||
sram #(
|
||||
.DATA_WIDTH ( CACHE_LINE_WIDTH ),
|
||||
.NUM_WORDS ( NUM_WORDS )
|
||||
) data_sram (
|
||||
.req_i ( req_ram [i] ),
|
||||
.we_i ( we_ram ),
|
||||
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
|
||||
.wdata_i ( wdata_ram.data ),
|
||||
.be_i ( be_ram.data ),
|
||||
.rdata_o ( rdata_ram[i].data ),
|
||||
.*
|
||||
);
|
||||
|
||||
sram #(
|
||||
.DATA_WIDTH ( TAG_WIDTH ),
|
||||
.NUM_WORDS ( NUM_WORDS )
|
||||
) tag_sram (
|
||||
.req_i ( req_ram [i] ),
|
||||
.we_i ( we_ram ),
|
||||
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
|
||||
.wdata_i ( wdata_ram.tag ),
|
||||
.be_i ( be_ram.tag ),
|
||||
.rdata_o ( rdata_ram[i].tag ),
|
||||
.*
|
||||
);
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// ----------------
|
||||
// Dirty SRAM
|
||||
// ----------------
|
||||
logic [DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
|
||||
|
||||
generate
|
||||
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin
|
||||
assign dirty_wdata[i] = wdata_ram.dirty;
|
||||
assign dirty_wdata[SET_ASSOCIATIVITY + i] = wdata_ram.valid;
|
||||
assign rdata_ram[i].valid = dirty_rdata[SET_ASSOCIATIVITY + i];
|
||||
assign rdata_ram[i].dirty = dirty_rdata[i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
sram #(
|
||||
.DATA_WIDTH ( DIRTY_WIDTH ),
|
||||
.NUM_WORDS ( NUM_WORDS )
|
||||
) dirty_sram (
|
||||
.clk_i ( clk_i ),
|
||||
.req_i ( |req_ram ),
|
||||
.we_i ( we_ram ),
|
||||
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
|
||||
.wdata_i ( dirty_wdata ),
|
||||
.be_i ( {be_ram.valid, be_ram.dirty} ),
|
||||
.rdata_o ( dirty_rdata )
|
||||
);
|
||||
|
||||
// ------------------------------------------------
|
||||
// Tag Comparison and memory arbitration
|
||||
// ------------------------------------------------
|
||||
tag_cmp #(
|
||||
.NR_PORTS ( 4 ),
|
||||
.ADDR_WIDTH ( INDEX_WIDTH ),
|
||||
.SET_ASSOCIATIVITY ( SET_ASSOCIATIVITY )
|
||||
) i_tag_cmp (
|
||||
.req_i ( req ),
|
||||
.gnt_o ( gnt ),
|
||||
.addr_i ( addr ),
|
||||
.wdata_i ( wdata ),
|
||||
.we_i ( we ),
|
||||
.be_i ( be ),
|
||||
.rdata_o ( rdata ),
|
||||
.tag_i ( tag ),
|
||||
.hit_way_o ( hit_way ),
|
||||
|
||||
.req_o ( req_ram ),
|
||||
.addr_o ( addr_ram ),
|
||||
.wdata_o ( wdata_ram ),
|
||||
.we_o ( we_ram ),
|
||||
.be_o ( be_ram ),
|
||||
.rdata_i ( rdata_ram ),
|
||||
.*
|
||||
);
|
||||
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
initial begin
|
||||
assert ($bits(data_if.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus");
|
||||
assert (CACHE_LINE_WIDTH/64 inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of 64");
|
||||
end
|
||||
`endif
|
||||
endmodule
|
||||
|
||||
// --------------
|
||||
// Tag Compare
|
||||
// --------------
|
||||
//
|
||||
// Description: Arbitrates access to cache memories, simplified request grant protocol
|
||||
// checks for hit or miss on cache
|
||||
//
|
||||
module tag_cmp #(
|
||||
parameter int unsigned NR_PORTS = 3,
|
||||
parameter int unsigned ADDR_WIDTH = 64,
|
||||
parameter type data_t = cache_line_t,
|
||||
parameter type be_t = cl_be_t,
|
||||
parameter int unsigned SET_ASSOCIATIVITY = 8
|
||||
)(
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
input logic [NR_PORTS-1:0][SET_ASSOCIATIVITY-1:0] req_i,
|
||||
output logic [NR_PORTS-1:0] gnt_o,
|
||||
input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i,
|
||||
input data_t [NR_PORTS-1:0] wdata_i,
|
||||
input logic [NR_PORTS-1:0] we_i,
|
||||
input be_t [NR_PORTS-1:0] be_i,
|
||||
output data_t [SET_ASSOCIATIVITY-1:0] rdata_o,
|
||||
input logic [NR_PORTS-1:0][TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
|
||||
output logic [SET_ASSOCIATIVITY-1:0] hit_way_o, // we've got a hit on the corresponding way
|
||||
|
||||
|
||||
output logic [SET_ASSOCIATIVITY-1:0] req_o,
|
||||
output logic [ADDR_WIDTH-1:0] addr_o,
|
||||
output data_t wdata_o,
|
||||
output logic we_o,
|
||||
output be_t be_o,
|
||||
input data_t [SET_ASSOCIATIVITY-1:0] rdata_i
|
||||
);
|
||||
|
||||
assign rdata_o = rdata_i;
|
||||
// one hot encoded
|
||||
logic [NR_PORTS-1:0] id_d, id_q;
|
||||
logic [TAG_WIDTH-1:0] sel_tag;
|
||||
|
||||
always_comb begin : tag_sel
|
||||
sel_tag = '0;
|
||||
for (int unsigned i = 0; i < NR_PORTS; i++)
|
||||
if (id_q[i])
|
||||
sel_tag = tag_i[i];
|
||||
end
|
||||
|
||||
generate
|
||||
for (genvar j = 0; j < SET_ASSOCIATIVITY; j++) begin : tag_cmp
|
||||
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
always_comb begin
|
||||
|
||||
gnt_o = '0;
|
||||
id_d = '0;
|
||||
wdata_o = '0;
|
||||
req_o = '0;
|
||||
addr_o = '0;
|
||||
be_o = '0;
|
||||
we_o = '0;
|
||||
// Request Side
|
||||
// priority select
|
||||
for (int unsigned i = 0; i < NR_PORTS; i++) begin
|
||||
req_o = req_i[i];
|
||||
id_d = (1'b1 << i);
|
||||
gnt_o[i] = 1'b1;
|
||||
addr_o = addr_i[i];
|
||||
be_o = be_i[i];
|
||||
we_o = we_i[i];
|
||||
wdata_o = wdata_i[i];
|
||||
|
||||
if (req_i[i])
|
||||
break;
|
||||
end
|
||||
|
||||
`ifndef SYNTHESIS
|
||||
`ifndef VERILATOR
|
||||
// assert that cache only hits on one way
|
||||
assert property (
|
||||
@(posedge clk_i) $onehot0(hit_way_o)) else begin $error("Hit should be one-hot encoded"); $stop(); end
|
||||
`endif
|
||||
`endif
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
id_q <= 0;
|
||||
end else begin
|
||||
id_q <= id_d;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
@ -79,7 +79,8 @@ module pcgen_stage (
|
|||
// 6. Debug
|
||||
// Mis-predict handling is a little bit different
|
||||
always_comb begin : npc_select
|
||||
automatic logic [63:0] fetch_address = npc_q;
|
||||
automatic logic [63:0] fetch_address;
|
||||
fetch_address = npc_q;
|
||||
|
||||
branch_predict_o = branch_predict_btb;
|
||||
fetch_valid_o = 1'b1;
|
||||
|
|
122
src/perf_counters.sv
Normal file
122
src/perf_counters.sv
Normal file
|
@ -0,0 +1,122 @@
|
|||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 06.10.2017
|
||||
// Description: Performance counters
|
||||
//
|
||||
//
|
||||
// Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
// All rights reserved.
|
||||
//
|
||||
// This code is under development and not yet released to the public.
|
||||
// Until it is released, the code is under the copyright of ETH Zurich and
|
||||
// the University of Bologna, and may contain confidential and/or unpublished
|
||||
// work. Any reuse/redistribution is strictly forbidden without written
|
||||
// permission from ETH Zurich.
|
||||
//
|
||||
// Bug fixes and contributions will eventually be released under the
|
||||
// SolderPad open hardware license in the context of the PULP platform
|
||||
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
|
||||
// University of Bologna.
|
||||
//
|
||||
import ariane_pkg::*;
|
||||
|
||||
module perf_counters #(
|
||||
int unsigned NR_EXTERNAL_COUNTERS = 1
|
||||
)(
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
// SRAM like interface
|
||||
input logic [11:0] addr_i, // read/write address
|
||||
input logic we_i, // write enable
|
||||
input logic [63:0] data_i, // data to write
|
||||
output logic [63:0] data_o, // data to read
|
||||
// from commit stage
|
||||
input scoreboard_entry_t commit_instr_i,
|
||||
input logic commit_ack_o,
|
||||
|
||||
// from L1 caches
|
||||
input logic l1_icache_miss_i,
|
||||
input logic l1_dcache_miss_i,
|
||||
// from MMU
|
||||
input logic itlb_miss_i,
|
||||
input logic dtlb_miss_i,
|
||||
// from PC Gen
|
||||
input exception_t ex_i,
|
||||
input logic eret_i,
|
||||
input branchpredict_t resolved_branch_i
|
||||
);
|
||||
|
||||
logic [11:0][63:0] perf_counter_d, perf_counter_q;
|
||||
|
||||
always_comb begin : perf_counters
|
||||
perf_counter_d = perf_counter_q;
|
||||
data_o = 'b0;
|
||||
|
||||
// ------------------------------
|
||||
// Update Performance Counters
|
||||
// ------------------------------
|
||||
if (l1_icache_miss_i)
|
||||
perf_counter_d[PERF_L1_ICACHE_MISS] = perf_counter_q[PERF_L1_ICACHE_MISS] + 1'b1;
|
||||
|
||||
if (l1_dcache_miss_i)
|
||||
perf_counter_d[PERF_L1_DCACHE_MISS] = perf_counter_q[PERF_L1_DCACHE_MISS] + 1'b1;
|
||||
|
||||
if (itlb_miss_i)
|
||||
perf_counter_d[PERF_ITLB_MISS] = perf_counter_q[PERF_ITLB_MISS] + 1'b1;
|
||||
|
||||
if (dtlb_miss_i)
|
||||
perf_counter_d[PERF_DTLB_MISS] = perf_counter_q[PERF_DTLB_MISS] + 1'b1;
|
||||
|
||||
// instruction related perf counters
|
||||
if (commit_ack_o) begin
|
||||
if (commit_instr_i.fu == LOAD)
|
||||
perf_counter_d[PERF_LOAD] = perf_counter_q[PERF_LOAD] + 1'b1;
|
||||
|
||||
if (commit_instr_i.fu == STORE)
|
||||
perf_counter_d[PERF_STORE] = perf_counter_q[PERF_STORE] + 1'b1;
|
||||
|
||||
if (commit_instr_i.fu == CTRL_FLOW)
|
||||
perf_counter_d[PERF_BRANCH_JUMP] = perf_counter_q[PERF_BRANCH_JUMP] + 1'b1;
|
||||
|
||||
// The standard software calling convention uses register x1 to hold the return address on a call
|
||||
// the unconditional jump is decoded as ADD op
|
||||
if (commit_instr_i.fu == CTRL_FLOW && commit_instr_i.op == '0 && commit_instr_i.rd == 'b1)
|
||||
perf_counter_d[PERF_CALL] = perf_counter_q[PERF_CALL] + 1'b1;
|
||||
|
||||
// Return from call
|
||||
if (commit_instr_i.op == JALR && commit_instr_i.rs1 == 'b1)
|
||||
perf_counter_d[PERF_RET] = perf_counter_q[PERF_RET] + 1'b1;
|
||||
|
||||
end
|
||||
|
||||
if (ex_i.valid)
|
||||
perf_counter_d[PERF_EXCEPTION] = perf_counter_q[PERF_EXCEPTION] + 1'b1;
|
||||
|
||||
if (eret_i)
|
||||
perf_counter_d[PERF_EXCEPTION_RET] = perf_counter_q[PERF_EXCEPTION_RET] + 1'b1;
|
||||
|
||||
if (resolved_branch_i.valid && resolved_branch_i.is_mispredict)
|
||||
perf_counter_d[PERF_MIS_PREDICT] = perf_counter_q[PERF_MIS_PREDICT] + 1'b1;
|
||||
|
||||
// Read Port
|
||||
if (!we_i) begin
|
||||
data_o = perf_counter_q[addr_i[2:0]];
|
||||
// write port
|
||||
end else begin
|
||||
// on a write also output the current value
|
||||
data_o = perf_counter_q[addr_i[2:0]];
|
||||
perf_counter_d[addr_i[2:0]] = data_i;
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------
|
||||
// Registers
|
||||
// ----------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
perf_counter_q <= '0;
|
||||
end else begin
|
||||
perf_counter_q <= perf_counter_d;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
27
src/ptw.sv
27
src/ptw.sv
|
@ -44,6 +44,7 @@ module ptw #(
|
|||
output logic data_req_o,
|
||||
output logic data_we_o,
|
||||
output logic [7:0] data_be_o,
|
||||
output logic [1:0] data_size_o,
|
||||
output logic kill_req_o,
|
||||
output logic tag_valid_o,
|
||||
input logic data_gnt_i,
|
||||
|
@ -70,12 +71,18 @@ module ptw #(
|
|||
input logic [63:0] dtlb_vaddr_i,
|
||||
// from CSR file
|
||||
input logic [43:0] satp_ppn_i, // ppn from satp
|
||||
input logic mxr_i
|
||||
input logic mxr_i,
|
||||
// Performance counters
|
||||
output logic itlb_miss_o,
|
||||
output logic dtlb_miss_o
|
||||
|
||||
);
|
||||
// input registers
|
||||
logic data_rvalid_q;
|
||||
logic [63:0] data_rdata_q;
|
||||
|
||||
pte_t pte;
|
||||
assign pte = pte_t'(data_rdata_i);
|
||||
assign pte = pte_t'(data_rdata_q);
|
||||
|
||||
enum logic[2:0] {
|
||||
IDLE,
|
||||
|
@ -151,6 +158,7 @@ module ptw #(
|
|||
tag_valid_n = 1'b0;
|
||||
data_req_o = 1'b0;
|
||||
data_be_o = 8'hFF;
|
||||
data_size_o = 2'b11;
|
||||
data_we_o = 1'b0;
|
||||
ptw_error_o = 1'b0;
|
||||
itlb_update_o = 1'b0;
|
||||
|
@ -165,6 +173,9 @@ module ptw #(
|
|||
vaddr_n = vaddr_q;
|
||||
faulting_address_o = '0;
|
||||
|
||||
itlb_miss_o = 1'b0;
|
||||
dtlb_miss_o = 1'b0;
|
||||
|
||||
case (CS)
|
||||
|
||||
IDLE: begin
|
||||
|
@ -179,12 +190,14 @@ module ptw #(
|
|||
tlb_update_asid_n = asid_i;
|
||||
vaddr_n = itlb_vaddr_i;
|
||||
NS = WAIT_GRANT;
|
||||
itlb_miss_o = 1'b1;
|
||||
// we got an DTLB miss
|
||||
end else if (en_ld_st_translation_i & dtlb_access_i & dtlb_miss_i) begin
|
||||
ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[38:30], 3'b0};
|
||||
tlb_update_asid_n = asid_i;
|
||||
vaddr_n = dtlb_vaddr_i;
|
||||
NS = WAIT_GRANT;
|
||||
dtlb_miss_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -201,7 +214,7 @@ module ptw #(
|
|||
|
||||
PTE_LOOKUP: begin
|
||||
// we wait for the valid signal
|
||||
if (data_rvalid_i) begin
|
||||
if (data_rvalid_q) begin
|
||||
|
||||
// check if the global mapping bit is set
|
||||
if (pte.g)
|
||||
|
@ -303,7 +316,7 @@ module ptw #(
|
|||
end
|
||||
// wait for the rvalid before going back to IDLE
|
||||
WAIT_RVALID: begin
|
||||
if (data_rvalid_i)
|
||||
if (data_rvalid_q)
|
||||
NS = IDLE;
|
||||
end
|
||||
endcase
|
||||
|
@ -324,7 +337,7 @@ module ptw #(
|
|||
|
||||
// sequential process
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if(~rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
CS <= IDLE;
|
||||
is_instr_ptw_q <= 1'b0;
|
||||
ptw_lvl_q <= LVL1;
|
||||
|
@ -333,6 +346,8 @@ module ptw #(
|
|||
vaddr_q <= '0;
|
||||
ptw_pptr_q <= '{default: 0};
|
||||
global_mapping_q <= 1'b0;
|
||||
data_rdata_q <= '0;
|
||||
data_rvalid_q <= 1'b0;
|
||||
end else begin
|
||||
CS <= NS;
|
||||
ptw_pptr_q <= ptw_pptr_n;
|
||||
|
@ -342,6 +357,8 @@ module ptw #(
|
|||
tlb_update_asid_q <= tlb_update_asid_n;
|
||||
vaddr_q <= vaddr_n;
|
||||
global_mapping_q <= global_mapping_n;
|
||||
data_rdata_q <= data_rdata_i;
|
||||
data_rvalid_q <= data_rvalid_i;
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ module regfile
|
|||
);
|
||||
|
||||
|
||||
localparam ADDR_WIDTH = 5;;
|
||||
localparam ADDR_WIDTH = 5;
|
||||
localparam NUM_WORDS = 2**ADDR_WIDTH;
|
||||
|
||||
logic [DATA_WIDTH-1:0] mem[NUM_WORDS];
|
||||
|
@ -153,4 +153,4 @@ module regfile
|
|||
end
|
||||
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
|
|
|
@ -95,7 +95,9 @@ module scoreboard #(
|
|||
// maintain a FIFO with issued instructions
|
||||
// keep track of all issued instructions
|
||||
always_comb begin : issue_fifo
|
||||
automatic logic [$clog2(NR_ENTRIES)-1:0] issue_cnt = issue_cnt_q;
|
||||
automatic logic [$clog2(NR_ENTRIES)-1:0] issue_cnt;
|
||||
issue_cnt = issue_cnt_q;
|
||||
|
||||
// default assignment
|
||||
mem_n = mem_q;
|
||||
commit_pointer_n = commit_pointer_q;
|
||||
|
|
|
@ -34,9 +34,12 @@ module store_buffer (
|
|||
// it is only ready if it can unconditionally commit the instruction, e.g.:
|
||||
// the commit buffer needs to be empty
|
||||
input logic valid_i, // this is a valid store
|
||||
input logic valid_without_flush_i, // just tell if the address is valid which we are current putting and do not take any further action
|
||||
|
||||
input logic [63:0] paddr_i, // physical address of store which needs to be placed in the queue
|
||||
input logic [63:0] data_i, // data which is placed in the queue
|
||||
input logic [7:0] be_i, // byte enable in
|
||||
input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write)
|
||||
|
||||
// D$ interface
|
||||
output logic [11:0] address_index_o,
|
||||
|
@ -45,6 +48,7 @@ module store_buffer (
|
|||
output logic data_req_o,
|
||||
output logic data_we_o,
|
||||
output logic [7:0] data_be_o,
|
||||
output logic [1:0] data_size_o,
|
||||
output logic kill_req_o,
|
||||
output logic tag_valid_o,
|
||||
input logic data_gnt_i,
|
||||
|
@ -55,10 +59,6 @@ module store_buffer (
|
|||
// allocate more space for the commit buffer to be on the save side
|
||||
localparam int unsigned DEPTH_COMMIT = 4;
|
||||
|
||||
// we need to keep the tag portion of the address for a cycle later
|
||||
logic [43:0] address_tag_n, address_tag_q;
|
||||
logic tag_valid_n, tag_valid_q;
|
||||
|
||||
// the store queue has two parts:
|
||||
// 1. Speculative queue
|
||||
// 2. Commit queue which is non-speculative, e.g.: the store will definitely happen.
|
||||
|
@ -67,6 +67,7 @@ module store_buffer (
|
|||
logic [63:0] address;
|
||||
logic [63:0] data;
|
||||
logic [7:0] be;
|
||||
logic [1:0] data_size;
|
||||
logic valid; // this entry is valid, we need this for checking if the address offset matches
|
||||
} speculative_queue_n [DEPTH_SPEC-1:0], speculative_queue_q [DEPTH_SPEC-1:0],
|
||||
commit_queue_n [DEPTH_COMMIT-1:0], commit_queue_q [DEPTH_COMMIT-1:0];
|
||||
|
@ -85,7 +86,8 @@ module store_buffer (
|
|||
// Speculative Queue - Core Interface
|
||||
// ----------------------------------------
|
||||
always_comb begin : core_if
|
||||
automatic logic [DEPTH_SPEC:0] speculative_status_cnt = speculative_status_cnt_q;
|
||||
automatic logic [DEPTH_SPEC:0] speculative_status_cnt;
|
||||
speculative_status_cnt = speculative_status_cnt_q;
|
||||
|
||||
// we are ready if the speculative and the commit queue have a space left
|
||||
ready_o = (speculative_status_cnt_q < (DEPTH_SPEC - 1)) || commit_i;
|
||||
|
@ -97,9 +99,10 @@ module store_buffer (
|
|||
// LSU interface
|
||||
// we are ready to accept a new entry and the input data is valid
|
||||
if (valid_i) begin
|
||||
speculative_queue_n[speculative_write_pointer_q].address = paddr_i;
|
||||
speculative_queue_n[speculative_write_pointer_q].data = data_i;
|
||||
speculative_queue_n[speculative_write_pointer_q].be = be_i;
|
||||
speculative_queue_n[speculative_write_pointer_q].address = paddr_i;
|
||||
speculative_queue_n[speculative_write_pointer_q].data = data_i;
|
||||
speculative_queue_n[speculative_write_pointer_q].be = be_i;
|
||||
speculative_queue_n[speculative_write_pointer_q].data_size = data_size_i;
|
||||
speculative_queue_n[speculative_write_pointer_q].valid = 1'b1;
|
||||
// advance the write pointer
|
||||
speculative_write_pointer_n = speculative_write_pointer_q + 1'b1;
|
||||
|
@ -136,17 +139,20 @@ module store_buffer (
|
|||
// those signals can directly be output to the memory
|
||||
assign address_index_o = commit_queue_q[commit_read_pointer_q].address[11:0];
|
||||
// if we got a new request we already saved the tag from the previous cycle
|
||||
assign address_tag_o = address_tag_q;
|
||||
assign tag_valid_o = tag_valid_q;
|
||||
assign address_tag_o = commit_queue_q[commit_read_pointer_q].address[55:12];
|
||||
assign tag_valid_o = 1'b0;
|
||||
assign data_wdata_o = commit_queue_q[commit_read_pointer_q].data;
|
||||
assign data_be_o = commit_queue_q[commit_read_pointer_q].be;
|
||||
assign data_size_o = commit_queue_q[commit_read_pointer_q].data_size;
|
||||
// we will never kill a request in the store buffer since we already know that the translation is valid
|
||||
// e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault
|
||||
assign kill_req_o = 1'b0;
|
||||
assign data_we_o = 1'b1; // we will always write in the store queue
|
||||
|
||||
always_comb begin : store_if
|
||||
automatic logic [DEPTH_COMMIT:0] commit_status_cnt = commit_status_cnt_q;
|
||||
automatic logic [DEPTH_COMMIT:0] commit_status_cnt;
|
||||
commit_status_cnt = commit_status_cnt_q;
|
||||
|
||||
commit_ready_o = (commit_status_cnt_q < DEPTH_COMMIT);
|
||||
// no store is pending if we don't have any element in the commit queue e.g.: it is empty
|
||||
no_st_pending_o = (commit_status_cnt_q == 0);
|
||||
|
@ -154,10 +160,8 @@ module store_buffer (
|
|||
commit_read_pointer_n = commit_read_pointer_q;
|
||||
commit_write_pointer_n = commit_write_pointer_q;
|
||||
|
||||
address_tag_n = address_tag_q;
|
||||
commit_queue_n = commit_queue_q;
|
||||
|
||||
tag_valid_n = 1'b0;
|
||||
data_req_o = 1'b0;
|
||||
|
||||
// there should be no commit when we are flushing
|
||||
|
@ -167,10 +171,6 @@ module store_buffer (
|
|||
if (data_gnt_i) begin
|
||||
// we can evict it from the commit buffer
|
||||
commit_queue_n[commit_read_pointer_q].valid = 1'b0;
|
||||
// save the tag portion
|
||||
address_tag_n = commit_queue_q[commit_read_pointer_q].address[55:12];
|
||||
// signal a valid tag the cycle afterwards
|
||||
tag_valid_n = 1'b1;
|
||||
// advance the read_pointer
|
||||
commit_read_pointer_n = commit_read_pointer_q + 1'b1;
|
||||
commit_status_cnt--;
|
||||
|
@ -222,7 +222,7 @@ module store_buffer (
|
|||
end
|
||||
end
|
||||
// or it matches with the entry we are currently putting into the queue
|
||||
if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_i) begin
|
||||
if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_without_flush_i) begin
|
||||
page_offset_matches_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
@ -231,9 +231,7 @@ module store_buffer (
|
|||
// registers
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
|
||||
if(~rst_ni) begin
|
||||
address_tag_q <= 'b0;
|
||||
tag_valid_q <= 1'b0;
|
||||
// initialize the queues
|
||||
// initialize the queues
|
||||
speculative_queue_q <= '{default: 0};
|
||||
commit_queue_q <= '{default: 0};
|
||||
commit_read_pointer_q <= '0;
|
||||
|
@ -243,8 +241,6 @@ module store_buffer (
|
|||
speculative_write_pointer_q <= '0;
|
||||
speculative_status_cnt_q <= '0;
|
||||
end else begin
|
||||
address_tag_q <= address_tag_n;
|
||||
tag_valid_q <= tag_valid_n;
|
||||
speculative_queue_q <= speculative_queue_n;
|
||||
commit_queue_q <= commit_queue_n;
|
||||
commit_read_pointer_q <= commit_read_pointer_n;
|
||||
|
|
|
@ -51,6 +51,7 @@ module store_unit (
|
|||
output logic data_req_o,
|
||||
output logic data_we_o,
|
||||
output logic [7:0] data_be_o,
|
||||
output logic [1:0] data_size_o,
|
||||
output logic kill_req_o,
|
||||
output logic tag_valid_o,
|
||||
input logic data_gnt_i,
|
||||
|
@ -63,10 +64,12 @@ module store_unit (
|
|||
// store buffer control signals
|
||||
logic st_ready;
|
||||
logic st_valid;
|
||||
logic st_valid_without_flush;
|
||||
|
||||
// keep the data and the byte enable for the second cycle (after address translation)
|
||||
logic [63:0] st_data_n, st_data_q;
|
||||
logic [7:0] st_be_n, st_be_q;
|
||||
logic [1:0] st_data_size_n, st_data_size_q;
|
||||
logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
|
||||
|
||||
// output assignments
|
||||
|
@ -74,13 +77,14 @@ module store_unit (
|
|||
assign trans_id_o = trans_id_q; // transaction id from previous cycle
|
||||
|
||||
always_comb begin : store_control
|
||||
translation_req_o = 1'b0;
|
||||
valid_o = 1'b0;
|
||||
st_valid = 1'b0;
|
||||
pop_st_o = 1'b0;
|
||||
ex_o = ex_i;
|
||||
trans_id_n = lsu_ctrl_i.trans_id;
|
||||
NS = CS;
|
||||
translation_req_o = 1'b0;
|
||||
valid_o = 1'b0;
|
||||
st_valid = 1'b0;
|
||||
st_valid_without_flush = 1'b0;
|
||||
pop_st_o = 1'b0;
|
||||
ex_o = ex_i;
|
||||
trans_id_n = lsu_ctrl_i.trans_id;
|
||||
NS = CS;
|
||||
|
||||
case (CS)
|
||||
// we got a valid store
|
||||
|
@ -110,6 +114,8 @@ module store_unit (
|
|||
if (!flush_i)
|
||||
st_valid = 1'b1;
|
||||
|
||||
st_valid_without_flush = 1'b1;
|
||||
|
||||
// we have another request
|
||||
if (valid_i) begin
|
||||
|
||||
|
@ -175,8 +181,10 @@ module store_unit (
|
|||
// -----------
|
||||
// re-align the write data to comply with the address offset
|
||||
always_comb begin
|
||||
st_be_n = lsu_ctrl_i.be;
|
||||
st_data_n = lsu_ctrl_i.data;
|
||||
st_be_n = lsu_ctrl_i.be;
|
||||
st_data_n = lsu_ctrl_i.data;
|
||||
st_data_size_n = extract_transfer_size(lsu_ctrl_i.operator);
|
||||
|
||||
case (lsu_ctrl_i.vaddr[2:0])
|
||||
3'b000: st_data_n = lsu_ctrl_i.data;
|
||||
3'b001: st_data_n = {lsu_ctrl_i.data[55:0], lsu_ctrl_i.data[63:56]};
|
||||
|
@ -193,11 +201,15 @@ module store_unit (
|
|||
// ---------------
|
||||
store_buffer store_buffer_i (
|
||||
// store queue write port
|
||||
.valid_i ( st_valid ),
|
||||
.data_i ( st_data_q ),
|
||||
.be_i ( st_be_q ),
|
||||
.valid_i ( st_valid ),
|
||||
.valid_without_flush_i ( st_valid_without_flush ), // the flush signal can be critical and we need this valid
|
||||
// signal to check whether the page_offset matches or not, functionaly it doesn't
|
||||
// make a difference whether we use the correct valid signal or not as we are flushing the whole pipeline anyway
|
||||
.data_i ( st_data_q ),
|
||||
.be_i ( st_be_q ),
|
||||
.data_size_i ( st_data_size_q ),
|
||||
// store buffer out
|
||||
.ready_o ( st_ready ),
|
||||
.ready_o ( st_ready ),
|
||||
.*
|
||||
);
|
||||
// ---------------
|
||||
|
@ -205,15 +217,17 @@ module store_unit (
|
|||
// ---------------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if(~rst_ni) begin
|
||||
CS <= IDLE;
|
||||
st_be_q <= '0;
|
||||
st_data_q <= '0;
|
||||
trans_id_q <= '0;
|
||||
CS <= IDLE;
|
||||
st_be_q <= '0;
|
||||
st_data_q <= '0;
|
||||
st_data_size_q <= '0;
|
||||
trans_id_q <= '0;
|
||||
end else begin
|
||||
CS <= NS;
|
||||
st_be_q <= st_be_n;
|
||||
st_data_q <= st_data_n;
|
||||
trans_id_q <= trans_id_n;
|
||||
CS <= NS;
|
||||
st_be_q <= st_be_n;
|
||||
st_data_q <= st_data_n;
|
||||
trans_id_q <= trans_id_n;
|
||||
st_data_size_q <= st_data_size_n;
|
||||
end
|
||||
end
|
||||
|
||||
|
|
21
src/tlb.sv
21
src/tlb.sv
|
@ -164,15 +164,16 @@ module tlb #(
|
|||
// default: begin /* No hit */ end
|
||||
// endcase
|
||||
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
|
||||
automatic int unsigned idx_base, shift, new_index;
|
||||
// we got a hit so update the pointer as it was least recently used
|
||||
if (lu_hit[i] & lu_access_i) begin
|
||||
// Set the nodes to the values we would expect
|
||||
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
|
||||
automatic int unsigned idx_base = $unsigned((2**lvl)-1);
|
||||
idx_base = $unsigned((2**lvl)-1);
|
||||
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
|
||||
automatic int unsigned shift = $clog2(TLB_ENTRIES) - lvl;
|
||||
shift = $clog2(TLB_ENTRIES) - lvl;
|
||||
// to circumvent the 32 bit integer arithmetic assignment
|
||||
automatic int unsigned new_index = ~((i >> (shift-1)) & 32'b1);
|
||||
new_index = ~((i >> (shift-1)) & 32'b1);
|
||||
plru_tree_n[idx_base + (i >> shift)] = new_index[0];
|
||||
end
|
||||
end
|
||||
|
@ -192,15 +193,17 @@ module tlb #(
|
|||
// the corresponding bit of the entry's index, this is
|
||||
// the next entry to replace.
|
||||
for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
|
||||
automatic logic en = 1'b1;
|
||||
automatic logic en;
|
||||
automatic int unsigned idx_base, shift, new_index;
|
||||
en = 1'b1;
|
||||
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
|
||||
automatic int unsigned idx_base = $unsigned((2**lvl)-1);
|
||||
idx_base = $unsigned((2**lvl)-1);
|
||||
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
|
||||
automatic int unsigned shift = $clog2(TLB_ENTRIES) - lvl;
|
||||
shift = $clog2(TLB_ENTRIES) - lvl;
|
||||
|
||||
// en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
|
||||
automatic int unsigned new_index = (i >> (shift-1)) & 32'b1;
|
||||
if(new_index[0]) begin
|
||||
new_index = (i >> (shift-1)) & 32'b1;
|
||||
if (new_index[0]) begin
|
||||
en &= plru_tree_q[idx_base + (i>>shift)];
|
||||
end else begin
|
||||
en &= ~plru_tree_q[idx_base + (i>>shift)];
|
||||
|
@ -239,7 +242,7 @@ module tlb #(
|
|||
// Just for checking
|
||||
function int countSetBits(logic[TLB_ENTRIES-1:0] vector);
|
||||
automatic int count = 0;
|
||||
foreach(vector[idx]) begin
|
||||
foreach (vector[idx]) begin
|
||||
count += vector[idx];
|
||||
end
|
||||
return count;
|
||||
|
|
52
src/util/behav_sram.sv
Executable file
52
src/util/behav_sram.sv
Executable file
|
@ -0,0 +1,52 @@
|
|||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 13.10.2017
|
||||
// Description: SRAM Behavioral Model
|
||||
//
|
||||
// Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
// All rights reserved.
|
||||
//
|
||||
// This code is under development and not yet released to the public.
|
||||
// Until it is released, the code is under the copyright of ETH Zurich and
|
||||
// the University of Bologna, and may contain confidential and/or unpublished
|
||||
// work. Any reuse/redistribution is strictly forbidden without written
|
||||
// permission from ETH Zurich.
|
||||
//
|
||||
// Bug fixes and contributions will eventually be released under the
|
||||
// SolderPad open hardware license in the context of the PULP platform
|
||||
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
|
||||
// University of Bologna.
|
||||
|
||||
module sram #(
|
||||
int unsigned DATA_WIDTH = 64,
|
||||
int unsigned NUM_WORDS = 1024
|
||||
)(
|
||||
input logic clk_i,
|
||||
|
||||
input logic req_i,
|
||||
input logic we_i,
|
||||
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
|
||||
input logic [DATA_WIDTH-1:0] wdata_i,
|
||||
input logic [DATA_WIDTH-1:0] be_i,
|
||||
output logic [DATA_WIDTH-1:0] rdata_o
|
||||
);
|
||||
localparam ADDR_WIDTH = $clog2(NUM_WORDS);
|
||||
|
||||
logic [DATA_WIDTH-1:0] ram [NUM_WORDS-1:0];
|
||||
logic [ADDR_WIDTH-1:0] raddr_q;
|
||||
|
||||
// 1. randomize array
|
||||
// 2. randomize output when no request is active
|
||||
|
||||
always @(posedge clk_i) begin
|
||||
if (req_i) begin
|
||||
if (!we_i)
|
||||
raddr_q <= addr_i;
|
||||
else
|
||||
for (int i = 0; i < DATA_WIDTH; i++)
|
||||
if (be_i[i]) ram[addr_i][i] <= wdata_i[i];
|
||||
end
|
||||
end
|
||||
|
||||
assign rdata_o = ram[raddr_q];
|
||||
|
||||
endmodule
|
98
src/util/gf22_sram.sv
Executable file
98
src/util/gf22_sram.sv
Executable file
|
@ -0,0 +1,98 @@
|
|||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 13.10.2017
|
||||
// Description: SRAM Model for GF22
|
||||
//
|
||||
// Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
// All rights reserved.
|
||||
//
|
||||
// This code is under development and not yet released to the public.
|
||||
// Until it is released, the code is under the copyright of ETH Zurich and
|
||||
// the University of Bologna, and may contain confidential and/or unpublished
|
||||
// work. Any reuse/redistribution is strictly forbidden without written
|
||||
// permission from ETH Zurich.
|
||||
//
|
||||
// Bug fixes and contributions will eventually be released under the
|
||||
// SolderPad open hardware license in the context of the PULP platform
|
||||
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
|
||||
// University of Bologna.
|
||||
|
||||
module sram #(
|
||||
int unsigned DATA_WIDTH = 64,
|
||||
int unsigned NUM_WORDS = 1024
|
||||
)(
|
||||
input logic clk_i,
|
||||
|
||||
input logic req_i,
|
||||
input logic we_i,
|
||||
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
|
||||
input logic [DATA_WIDTH-1:0] wdata_i,
|
||||
input logic [DATA_WIDTH-1:0] be_i,
|
||||
output logic [DATA_WIDTH-1:0] rdata_o
|
||||
);
|
||||
|
||||
generate
|
||||
if (NUM_WORDS == 256) begin
|
||||
if (DATA_WIDTH == 16) begin
|
||||
IN22FDX_R1PH_NFHN_W00256B016M02C256 dirtyram (
|
||||
.CLK ( clk_i ),
|
||||
.CEN ( ~req_i ),
|
||||
.RDWEN ( ~we_i ),
|
||||
.AW ( addr_i[7:1] ),
|
||||
.AC ( addr_i[0] ),
|
||||
.D ( wdata_i ),
|
||||
.BW ( be_i ),
|
||||
.T_LOGIC ( 1'b0 ),
|
||||
.MA_SAWL ( '0 ),
|
||||
.MA_WL ( '0 ),
|
||||
.MA_WRAS ( '0 ),
|
||||
.MA_WRASD ( '0 ),
|
||||
.Q ( rdata_o ),
|
||||
.OBSV_CTL ( )
|
||||
);
|
||||
end
|
||||
|
||||
if (DATA_WIDTH == 44) begin
|
||||
logic [45:0] rdata;
|
||||
assign rdata_o = rdata[43:0];
|
||||
|
||||
IN22FDX_R1PH_NFHN_W00256B046M02C256 TAG_RAM (
|
||||
.CLK ( clk_i ),
|
||||
.CEN ( ~req_i ),
|
||||
.RDWEN ( ~we_i ),
|
||||
.AW ( addr_i[7:1] ),
|
||||
.AC ( addr_i[0] ),
|
||||
.D ( {2'b0, wdata_i} ),
|
||||
.BW ( {2'b0, be_i } ),
|
||||
.T_LOGIC ( 1'b0 ),
|
||||
.MA_SAWL ( '0 ),
|
||||
.MA_WL ( '0 ),
|
||||
.MA_WRAS ( '0 ),
|
||||
.MA_WRASD ( '0 ),
|
||||
.Q ( rdata ),
|
||||
.OBSV_CTL ( )
|
||||
);
|
||||
end
|
||||
|
||||
if (DATA_WIDTH == 128) begin
|
||||
IN22FDX_R1PH_NFHN_W00256B128M02C256 DATA_RAM
|
||||
(
|
||||
.CLK ( clk_i ),
|
||||
.CEN ( ~req_i ),
|
||||
.RDWEN ( ~we_i ),
|
||||
.AW ( addr_i[7:1] ),
|
||||
.AC ( addr_i[0] ),
|
||||
.D ( wdata_i ),
|
||||
.BW ( be_i ),
|
||||
.T_LOGIC ( 1'b0 ),
|
||||
.MA_SAWL ( '0 ),
|
||||
.MA_WL ( '0 ),
|
||||
.MA_WRAS ( '0 ),
|
||||
.MA_WRASD ( '0 ),
|
||||
.Q ( rdata_o ),
|
||||
.OBSV_CTL ( )
|
||||
);
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
|
@ -196,6 +196,12 @@ class instruction_trace_item;
|
|||
instr,
|
||||
s);
|
||||
|
||||
// s = $sformatf("%s %h %h %-36s",
|
||||
// priv_lvl,
|
||||
// sbe.pc,
|
||||
// instr,
|
||||
// s);
|
||||
|
||||
foreach (result_regs[i]) begin
|
||||
if (result_regs[i] != 0)
|
||||
s = $sformatf("%s %-4s:%16x", s, regAddrToStr(result_regs[i]), this.result);
|
||||
|
|
|
@ -155,7 +155,7 @@ class instruction_tracer;
|
|||
// flush all decoded instructions
|
||||
function void flushDecode ();
|
||||
decode_queue = {};
|
||||
endfunction;
|
||||
endfunction
|
||||
|
||||
// flush everything, we took an exception/interrupt
|
||||
function void flush ();
|
||||
|
@ -166,7 +166,7 @@ class instruction_tracer;
|
|||
// also clear mappings
|
||||
store_mapping = {};
|
||||
load_mapping = {};
|
||||
endfunction;
|
||||
endfunction
|
||||
|
||||
function void printInstr(scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl);
|
||||
instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr, priv_lvl);
|
||||
|
@ -174,7 +174,7 @@ class instruction_tracer;
|
|||
string print_instr = iti.printInstr();
|
||||
uvm_report_info( "Tracer", print_instr, UVM_HIGH);
|
||||
$fwrite(this.f, {print_instr, "\n"});
|
||||
endfunction;
|
||||
endfunction
|
||||
|
||||
function void printException(logic [63:0] pc, logic [63:0] cause, logic [63:0] tval);
|
||||
exception_trace_item eti = new (pc, cause, tval);
|
||||
|
|
|
@ -54,11 +54,13 @@ interface instruction_tracer_if (
|
|||
// current privilege level
|
||||
priv_lvl_t priv_lvl;
|
||||
// the tracer just has a passive interface we do not drive anything with it
|
||||
`ifndef SYNTHESIS
|
||||
clocking pck @(posedge clk);
|
||||
input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr,
|
||||
st_valid, st_paddr, ld_valid, ld_kill, ld_paddr,
|
||||
wdata, we, commit_instr, commit_ack, exception, priv_lvl;
|
||||
endclocking
|
||||
`endif
|
||||
|
||||
endinterface
|
||||
`endif
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
// University of Bologna.
|
||||
//
|
||||
package instruction_tracer_pkg;
|
||||
timeunit 1ns;
|
||||
timeprecision 1ps;
|
||||
|
||||
import ariane_pkg::*;
|
||||
`ifndef SYNTHESIS
|
||||
import uvm_pkg::*;
|
||||
|
|
87
src/util/xilinx_sram.sv
Executable file
87
src/util/xilinx_sram.sv
Executable file
|
@ -0,0 +1,87 @@
|
|||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 13.11.2017
|
||||
// Description: SRAM Model for Xilinx FPGA
|
||||
//
|
||||
// Copyright (C) 2017 ETH Zurich, University of Bologna
|
||||
// All rights reserved.
|
||||
//
|
||||
// This code is under development and not yet released to the public.
|
||||
// Until it is released, the code is under the copyright of ETH Zurich and
|
||||
// the University of Bologna, and may contain confidential and/or unpublished
|
||||
// work. Any reuse/redistribution is strictly forbidden without written
|
||||
// permission from ETH Zurich.
|
||||
//
|
||||
// Bug fixes and contributions will eventually be released under the
|
||||
// SolderPad open hardware license in the context of the PULP platform
|
||||
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
|
||||
// University of Bologna.
|
||||
|
||||
module sram #(
|
||||
int unsigned DATA_WIDTH = 64,
|
||||
int unsigned NUM_WORDS = 1024
|
||||
)(
|
||||
input logic clk_i,
|
||||
|
||||
input logic req_i,
|
||||
input logic we_i,
|
||||
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
|
||||
input logic [DATA_WIDTH-1:0] wdata_i,
|
||||
input logic [DATA_WIDTH-1:0] be_i,
|
||||
output logic [DATA_WIDTH-1:0] rdata_o
|
||||
);
|
||||
|
||||
generate
|
||||
if (NUM_WORDS == 256) begin
|
||||
|
||||
// Dirty RAM
|
||||
if (DATA_WIDTH == 16) begin
|
||||
|
||||
localparam NUM_WORDS = 2**8;
|
||||
|
||||
logic [NUM_WORDS-1:0][15:0] mem;
|
||||
|
||||
always_ff @(posedge clk_i) begin
|
||||
// write
|
||||
if (req_i && we_i) begin
|
||||
for (int unsigned i = 0; i < 16; i++) begin
|
||||
if (be_i[i])
|
||||
mem[addr_i][i] <= wdata_i[i];
|
||||
end
|
||||
// read
|
||||
end else if (req_i) begin
|
||||
rdata_o <= mem[addr_i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Data RAM
|
||||
if (DATA_WIDTH == 44) begin
|
||||
logic [47:0] data_o;
|
||||
assign rdata_o = data_o[43:0];
|
||||
|
||||
// this is actually 48 bits wide
|
||||
xilinx_dcache_bank_tag_256x46 TAG_RAM (
|
||||
.clka ( clk_i ),
|
||||
.ena ( req_i ),
|
||||
.wea ( {{be_i[40] & we_i}, {be_i[32] & we_i}, {be_i[24] & we_i}, {be_i[16] & we_i}, {be_i[8] & we_i}, {be_i[0] & we_i}} ),
|
||||
.addra ( addr_i ),
|
||||
.dina ( {4'b0, wdata_i} ),
|
||||
.douta ( data_o )
|
||||
);
|
||||
end
|
||||
|
||||
// Data RAM
|
||||
if (DATA_WIDTH == 128) begin
|
||||
xilinx_dcache_bank_data_256x128 DATA_RAM (
|
||||
.clka ( clk_i ),
|
||||
.ena ( req_i ),
|
||||
.wea ( {{be_i[15] & we_i}, {be_i[14] & we_i}, {be_i[13] & we_i}, {be_i[12] & we_i}, {be_i[11] & we_i}, {be_i[10] & we_i}, {be_i[9] & we_i}, {be_i[8] & we_i}, {be_i[7] & we_i}, {be_i[6] & we_i}, {be_i[5] & we_i}, {be_i[4] & we_i}, {be_i[3] & we_i}, {be_i[2] & we_i}, {be_i[1] & we_i}, {be_i[0] & we_i}}),
|
||||
.addra ( addr_i ),
|
||||
.dina ( wdata_i ),
|
||||
.douta ( rdata_o )
|
||||
);
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule
|
|
@ -4,6 +4,7 @@ ariane:
|
|||
]
|
||||
files: [
|
||||
include/ariane_pkg.sv,
|
||||
include/nbdcache_pkg.sv,
|
||||
src/util/instruction_tracer_if.sv,
|
||||
src/util/instruction_tracer_pkg.sv,
|
||||
src/ariane.sv,
|
||||
|
@ -15,7 +16,6 @@ ariane:
|
|||
src/controller.sv,
|
||||
src/csr_buffer.sv,
|
||||
src/csr_regfile.sv,
|
||||
src/dcache_arbiter.sv,
|
||||
src/decoder.sv,
|
||||
src/ex_stage.sv,
|
||||
src/fetch_fifo.sv,
|
||||
|
@ -37,6 +37,10 @@ ariane:
|
|||
src/store_unit.sv,
|
||||
src/tlb.sv,
|
||||
src/debug_unit.sv,
|
||||
src/nbdcache.sv,
|
||||
src/miss_handler.sv,
|
||||
src/cache_ctrl.sv,
|
||||
src/perf_counters.sv,
|
||||
]
|
||||
riscv_regfile_rtl:
|
||||
targets: [
|
||||
|
@ -48,6 +52,7 @@ riscv_regfile_rtl:
|
|||
]
|
||||
files: [
|
||||
src/regfile.sv,
|
||||
src/util/gf22_sram.sv,
|
||||
]
|
||||
|
||||
riscv_regfile_fpga:
|
||||
|
@ -59,4 +64,5 @@ riscv_regfile_fpga:
|
|||
]
|
||||
files: [
|
||||
src/regfile_ff.sv,
|
||||
src/util/xilinx_sram.sv,
|
||||
]
|
||||
|
|
2
tb
2
tb
|
@ -1 +1 @@
|
|||
Subproject commit 130fda9a1ea444b8a2ffdc104974e8901fb6f64d
|
||||
Subproject commit e6e142e7593c4387c7f06c919980f86a8cd8e7e5
|
Loading…
Add table
Reference in a new issue