Merge branch 'dcache' into 'master'

Merge revised Data Cache into Master branch

See merge request floce/ariane!9
This commit is contained in:
Florian Zaruba 2017-12-28 17:37:44 +01:00
commit cecdf9654f
51 changed files with 3546 additions and 778 deletions

View file

@ -35,13 +35,6 @@ test_fifo:
# - make scoreboard library=scoreboard_lib
# - vcover-10.6 report scoreboard.ucdb
test_dcache_arbiter:
stage: test
before_script:
- make build library=dcache_arbiter_lib
script:
- make dcache_arbiter library=dcache_arbiter_lib
- vcover-10.6 report dcache_arbiter.ucdb
test_store_queue:
stage: test
@ -94,7 +87,6 @@ test_failed_tests:
# paths:
# - covhtmlreport
pages:
stage: deploy
dependencies:

12
.gitmodules vendored
View file

@ -10,3 +10,15 @@
[submodule "tb"]
path = tb
url = ../uvm-components.git
[submodule "src/axi_mem_if"]
path = src/axi_mem_if
url = git@iis-git.ee.ethz.ch:kerbin/axi_mem_if.git
[submodule "src/axi2per"]
path = src/axi2per
url = git@iis-git.ee.ethz.ch:kerbin/axi2per.git
[submodule "src/axi_slice"]
path = src/axi_slice
url = git@iis-git.ee.ethz.ch:pulp-open/axi_slice.git
[submodule "src/axi_node"]
path = src/axi_node
url = git@iis-git.ee.ethz.ch:kerbin/axi_node.git

View file

@ -6,6 +6,22 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
## [Unreleased]
### 1.0.0
### Added
- Non-blocking data cache
- Two AXI interfaces on top level, one for bypassing and one for actual cache-able regions
- Performance Counters
- Hardware multiplication (full M-Extension)
- Support for inter processor interrupts (IPI)
### Changed
- Testbench: EOC component now listening on store interface only
- Store interfaces has been simplified by removing the `valid` signal, a transaction is now considered finished as soon as the dcache gives the grant signal.
- EOC and dcache checker has been reworked to get rid of absolute path in UVM testbench
### 0.4.0 - 2017-10-13
Linux booting on FPGA.

View file

@ -9,9 +9,9 @@ top_level = core_tb
test_top_level = core_tb
# Ariane PKG
ariane_pkg = include/ariane_pkg.sv
ariane_pkg = include/ariane_pkg.sv include/nbdcache_pkg.sv
# utility modules
util = $(wildcard src/util/*.sv*)
util = $(wildcard src/util/*.svh) src/util/instruction_tracer_pkg.sv src/util/instruction_tracer_if.sv src/util/cluster_clock_gating.sv src/util/behav_sram.sv
# test targets
tests = alu scoreboard fifo dcache_arbiter store_queue lsu core fetch_fifo
# UVM agents
@ -27,9 +27,11 @@ test_pkg = $(wildcard tb/test/*/*sequence_pkg.sv*) $(wildcard tb/test/*/*_pkg.sv
# DPI
dpi = $(wildcard tb/dpi/*)
# this list contains the standalone components
src = $(wildcard src/*.sv) $(wildcard tb/common/*.sv)
src = $(wildcard src/*.sv) $(wildcard tb/common/*.sv) $(wildcard src/axi2per/*.sv) $(wildcard src/axi_slice/*.sv) \
$(wildcard src/axi_node/*.sv) $(wildcard src/axi_mem_if/*.sv)
# look for testbenches
tbs = $(wildcard tb/*_tb.sv)
tbs = tb/alu_tb.sv tb/core_tb.sv tb/dcache_arbiter_tb.sv tb/store_queue_tb.sv tb/scoreboard_tb.sv tb/fifo_tb.sv
# RISCV-tests path
riscv-test-dir = riscv-tests/isa
riscv-tests = rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p-addw rv64ui-p-and rv64ui-p-auipc \
@ -48,12 +50,11 @@ riscv-tests = rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p-
rv64ui-v-xor rv64ui-v-xori rv64ui-v-slliw rv64ui-v-sll rv64ui-v-slli rv64ui-v-slliw \
rv64ui-v-slt rv64ui-v-slti rv64ui-v-sltiu rv64ui-v-sltu rv64ui-v-sra rv64ui-v-srai \
rv64ui-v-sraiw rv64ui-v-sraw rv64ui-v-srl rv64ui-v-srli rv64ui-v-srliw rv64ui-v-srlw \
rv64ui-v-lb rv64ui-v-lbu rv64ui-v-ld rv64ui-v-lh rv64ui-v-lhu rv64ui-v-lui
# rv64um-p-mul rv64um-p-mulh rv64um-p-mulhsu rv64um-p-mulhu rv64um-p-div rv64um-p-divu rv64um-p-rem \
# rv64um-p-remu rv64um-p-mulw rv64um-p-divw rv64um-p-divuw rv64um-p-remw rv64um-p-remuw \
# rv64um-v-mul rv64um-v-mulh rv64um-v-mulhsu rv64um-v-mulhu rv64um-v-div rv64um-v-divu rv64um-v-rem \
# rv64um-v-remu rv64um-v-mulw rv64um-v-divw rv64um-v-divuw rv64um-v-remw rv64um-v-remuw
rv64ui-v-lb rv64ui-v-lbu rv64ui-v-ld rv64ui-v-lh rv64ui-v-lhu rv64ui-v-lui \
rv64um-p-mul rv64um-p-mulh rv64um-p-mulhsu rv64um-p-mulhu rv64um-p-div rv64um-p-divu rv64um-p-rem \
rv64um-p-remu rv64um-p-mulw rv64um-p-divw rv64um-p-divuw rv64um-p-remw rv64um-p-remuw \
rv64um-v-mul rv64um-v-mulh rv64um-v-mulhsu rv64um-v-mulhu rv64um-v-div rv64um-v-divu rv64um-v-rem \
rv64um-v-remu rv64um-v-mulw rv64um-v-divw rv64um-v-divuw rv64um-v-remw rv64um-v-remuw
# failed test directory
failed-tests = $(wildcard failedtests/*.S)
@ -66,7 +67,7 @@ max_cycles = 10000000
# Test case to run
test_case = core_test
# QuestaSim Version
questa_version =
questa_version = -10.6b
compile_flag = +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive
# Moore binary
moore = ~fschuiki/bin/moore
@ -74,8 +75,6 @@ uvm-flags = +UVM_NO_RELNOTES
# Iterate over all include directories and write them with +incdir+ prefixed
# +incdir+ works for Verilator and QuestaSim
list_incdir = $(foreach dir, ${incdir}, +incdir+$(dir))
# Device Tree Compiler
DTC = dtc
# create library if it doesn't exist
@ -120,21 +119,26 @@ $(library):
# Create the library
vlib${questa_version} ${library}
sim: build ariane_tb.dtb
sim: build
vsim${questa_version} -lib ${library} ${top_level}_optimized +UVM_TESTNAME=${test_case} +BASEDIR=$(riscv-test-dir) \
+ASMTEST=$(riscv-test) $(uvm-flags) +UVM_VERBOSITY=HIGH -coverage -classdebug -do "do tb/wave/wave_core.do"
simc: build ariane_tb.dtb
sim_nopt: build
vsim${questa_version} -novopt -lib ${library} ${top_level} +UVM_TESTNAME=${test_case} +BASEDIR=$(riscv-test-dir) \
+ASMTEST=$(riscv-test) $(uvm-flags) +UVM_VERBOSITY=HIGH -coverage -classdebug -do "do tb/wave/wave_core.do"
simc: build
vsim${questa_version} -c -lib ${library} ${top_level}_optimized +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-test-dir) $(uvm-flags) +ASMTEST=$(riscv-test) -coverage -classdebug -do "do tb/wave/wave_core.do"
run-asm-tests: build ariane_tb.dtb
run-asm-tests: build
$(foreach test, $(riscv-tests), vsim$(questa_version) +BASEDIR=$(riscv-test-dir) +max-cycles=$(max_cycles) \
+UVM_TESTNAME=$(test_case) $(uvm-flags) +ASMTEST=$(test) +uvm_set_action="*,_ALL_,UVM_ERROR,UVM_DISPLAY|UVM_STOP" -c \
-coverage -classdebug -do "coverage save -onexit $@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
$(library).$(test_top_level)_optimized;)
run-failed-tests: build ariane_tb.dtb
run-failed-tests: build
# make the tests
cd failedtests && make
# run the RTL simulation
@ -148,7 +152,7 @@ run-failed-tests: build ariane_tb.dtb
$(foreach test, $(failed-tests:.S=), diff $(test).spike.sig $(test).rtlsim.sig;)
# Run the specified test case
$(tests): build ariane_tb.dtb
$(tests): build
# Optimize top level
vopt${questa_version} -work ${library} ${compile_flag} $@_tb -o $@_tb_optimized +acc -check_synthesis
# vsim${questa_version} $@_tb_optimized
@ -166,9 +170,6 @@ build-moore:
build-tests:
cd riscv-tests && autoconf && ./configure --prefix=/home/zarubaf/riscv && make isa -j8
# Compile device tree
ariane_tb.dtb: ariane_tb.dts
$(DTC) -I dts -O dtb ariane_tb.dts > ariane_tb.dtb
# User Verilator to lint the target
lint:
@ -183,5 +184,3 @@ clean:
.PHONY:
build lint build-moore
# make CC=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/gcc CXX=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/g++ -j20

View file

@ -3,5 +3,5 @@
cd output && make
cd ../..
# start the simulation
vsim-10.6 -c -lib work core_tb_optimized +UVM_TESTNAME=core_test $2 +BASEDIR=riscv-torture $1 +ASMTEST=$3 +UVM_VERBOSITY=LOW -coverage -classdebug -do "run -a"
vsim-10.6b -c -lib work core_tb_optimized +UVM_TESTNAME=core_test $2 +BASEDIR=riscv-torture $1 +ASMTEST=$3 +UVM_VERBOSITY=LOW -coverage -classdebug -do "run -a"

View file

@ -1,46 +0,0 @@
/dts-v1/;
/ {
#address-cells = <2>;
#size-cells = <2>;
compatible = "ucbbar,spike-bare-dev";
model = "ucbbar,spike-bare";
cpus {
#address-cells = <1>;
#size-cells = <0>;
timebase-frequency = <10000000>;
CPU0: cpu@0 {
device_type = "cpu";
reg = <0>;
status = "okay";
compatible = "riscv";
riscv,isa = "rv64ic";
mmu-type = "riscv,sv39";
clock-frequency = <1000000000>;
CPU0_intc: interrupt-controller {
#interrupt-cells = <1>;
interrupt-controller;
compatible = "riscv,cpu-intc";
};
};
};
memory@80000000 {
device_type = "memory";
reg = <0x0 0x80000000 0x0 0x1000000>;
};
soc {
#address-cells = <2>;
#size-cells = <2>;
compatible = "ucbbar,spike-bare-soc", "simple-bus";
ranges;
clint@2000000 {
compatible = "riscv,clint0";
interrupts-extended = <&CPU0_intc 3 &CPU0_intc 7 >;
reg = <0x0 0x2000000 0x0 0xc0000>;
};
uart@3000000 {
compatible = "sifive,uart0";
reg = <0x0 0x3000000 0x0 0x3000000>;
};
};
};

View file

@ -43,7 +43,7 @@ $(asm_tests_bin): %: %.S $(extra_files)
$(RISCV_GCC) $(RISCV_GCC_OPTS) -I../riscv-torture/env/p -T../riscv-torture/env/p/link.ld $< -o $@
$(asm_tests_hex): %.hex: % $(extra_files)
elf2hex 8 16384 $< 2147483648 > $@
elf2hex 8 16384 $< 1073741824 > $@
$(asm_tests_sig): %.sig: %
$(RISCV_SIM) +signature=$@ $<

View file

@ -12,6 +12,9 @@
package ariane_pkg;
timeunit 1ns;
timeprecision 1ps;
// ---------------
// Global Config
// ---------------
@ -78,7 +81,7 @@ package ariane_pkg;
// ---------------
// EX Stage
// ---------------
typedef enum logic [5:0] { // basic ALU op
typedef enum logic [6:0] { // basic ALU op
ADD, SUB, ADDW, SUBW,
// logic operations
XORL, ORL, ANDL,
@ -94,12 +97,30 @@ package ariane_pkg;
MRET, SRET, ECALL, WFI, FENCE, FENCE_I, SFENCE_VMA, CSR_WRITE, CSR_READ, CSR_SET, CSR_CLEAR,
// LSU functions
LD, SD, LW, LWU, SW, LH, LHU, SH, LB, SB, LBU,
// Atomic Memory Operations
AMO_LRW, AMO_LRD, AMO_SCW, AMO_SCD,
AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW, AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW, AMO_MINWU,
AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD, AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND, AMO_MINDU,
// Multiplications
MUL, MULH, MULHU, MULHSU, MULW,
// Divisions
DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW
} fu_op;
// ----------------------
// Extract Bytes from Op
// ----------------------
// TODO: Add atomics
function automatic logic [1:0] extract_transfer_size (fu_op op);
case (op)
LD, SD: return 2'b11;
LW, LWU, SW: return 2'b10;
LH, LHU, SH: return 2'b01;
LB, SB, LBU: return 2'b00;
default: return 2'b11;
endcase
endfunction
typedef struct packed {
logic valid;
logic [63:0] vaddr;
@ -202,6 +223,14 @@ package ariane_pkg;
localparam OPCODE_JAL = 7'h6f;
localparam OPCODE_AUIPC = 7'h17;
localparam OPCODE_LUI = 7'h37;
localparam OPCODE_AMO = 7'h2F;
// --------------------
// Atomics
// --------------------
typedef enum logic [3:0] {
AMO_NONE, AMO_LR, AMO_SC, AMO_SWAP, AMO_ADD, AMO_AND, AMO_OR, AMO_XOR, AMO_MAX, AMO_MAXU, AMO_MIN, AMO_MINU
} amo_t;
// --------------------
// Privilege Spec
@ -255,43 +284,75 @@ package ariane_pkg;
localparam logic [63:0] M_TIMER_INTERRUPT = (1 << 63) | 7;
localparam logic [63:0] S_EXT_INTERRUPT = (1 << 63) | 9;
localparam logic [63:0] M_EXT_INTERRUPT = (1 << 63) | 11;
// ----------------------
// Performance Counters
// ----------------------
localparam logic [11:0] PERF_L1_ICACHE_MISS = 12'h0; // L1 Instr Cache Miss
localparam logic [11:0] PERF_L1_DCACHE_MISS = 12'h1; // L1 Data Cache Miss
localparam logic [11:0] PERF_ITLB_MISS = 12'h2; // ITLB Miss
localparam logic [11:0] PERF_DTLB_MISS = 12'h3; // DTLB Miss
localparam logic [11:0] PERF_LOAD = 12'h4; // Loads
localparam logic [11:0] PERF_STORE = 12'h5; // Stores
localparam logic [11:0] PERF_EXCEPTION = 12'h6; // Taken exceptions
localparam logic [11:0] PERF_EXCEPTION_RET = 12'h7; // Exception return
localparam logic [11:0] PERF_BRANCH_JUMP = 12'h8; // Software change of PC
localparam logic [11:0] PERF_CALL = 12'h9; // Procedure call
localparam logic [11:0] PERF_RET = 12'hA; // Procedure Return
localparam logic [11:0] PERF_MIS_PREDICT = 12'hB; // Branch mis-predicted
// -----
// CSRs
// -----
typedef enum logic [11:0] {
CSR_SSTATUS = 12'h100,
CSR_SIE = 12'h104,
CSR_STVEC = 12'h105,
CSR_SCOUNTEREN = 12'h106,
CSR_SSCRATCH = 12'h140,
CSR_SEPC = 12'h141,
CSR_SCAUSE = 12'h142,
CSR_STVAL = 12'h143,
CSR_SIP = 12'h144,
CSR_SATP = 12'h180,
CSR_MSTATUS = 12'h300,
CSR_MISA = 12'h301,
CSR_MEDELEG = 12'h302,
CSR_MIDELEG = 12'h303,
CSR_MIE = 12'h304,
CSR_MTVEC = 12'h305,
CSR_MCOUNTEREN = 12'h306,
CSR_MSCRATCH = 12'h340,
CSR_MEPC = 12'h341,
CSR_MCAUSE = 12'h342,
CSR_MTVAL = 12'h343,
CSR_MIP = 12'h344,
CSR_MVENDORID = 12'hF11,
CSR_MARCHID = 12'hF12,
CSR_MIMPID = 12'hF13,
CSR_MHARTID = 12'hF14,
CSR_MCYCLE = 12'hB00,
CSR_MINSTRET = 12'hB02,
// Supervisor Mode CSRs
CSR_SSTATUS = 12'h100,
CSR_SIE = 12'h104,
CSR_STVEC = 12'h105,
CSR_SCOUNTEREN = 12'h106,
CSR_SSCRATCH = 12'h140,
CSR_SEPC = 12'h141,
CSR_SCAUSE = 12'h142,
CSR_STVAL = 12'h143,
CSR_SIP = 12'h144,
CSR_SATP = 12'h180,
// Machine Mode CSRs
CSR_MSTATUS = 12'h300,
CSR_MISA = 12'h301,
CSR_MEDELEG = 12'h302,
CSR_MIDELEG = 12'h303,
CSR_MIE = 12'h304,
CSR_MTVEC = 12'h305,
CSR_MCOUNTEREN = 12'h306,
CSR_MSCRATCH = 12'h340,
CSR_MEPC = 12'h341,
CSR_MCAUSE = 12'h342,
CSR_MTVAL = 12'h343,
CSR_MIP = 12'h344,
CSR_MVENDORID = 12'hF11,
CSR_MARCHID = 12'hF12,
CSR_MIMPID = 12'hF13,
CSR_MHARTID = 12'hF14,
CSR_MCYCLE = 12'hB00,
CSR_MINSTRET = 12'hB02,
CSR_DCACHE = 12'h700,
// Counters and Timers
CSR_CYCLE = 12'hC00,
CSR_TIME = 12'hC01,
CSR_INSTRET = 12'hC02
CSR_CYCLE = 12'hC00,
CSR_TIME = 12'hC01,
CSR_INSTRET = 12'hC02,
// Performance counters
CSR_L1_ICACHE_MISS = PERF_L1_ICACHE_MISS + 12'hC03,
CSR_L1_DCACHE_MISS = PERF_L1_DCACHE_MISS + 12'hC03,
CSR_ITLB_MISS = PERF_ITLB_MISS + 12'hC03,
CSR_DTLB_MISS = PERF_DTLB_MISS + 12'hC03,
CSR_LOAD = PERF_LOAD + 12'hC03,
CSR_STORE = PERF_STORE + 12'hC03,
CSR_EXCEPTION = PERF_EXCEPTION + 12'hC03,
CSR_EXCEPTION_RET = PERF_EXCEPTION_RET + 12'hC03,
CSR_BRANCH_JUMP = PERF_BRANCH_JUMP + 12'hC03,
CSR_CALL = PERF_CALL + 12'hC03,
CSR_RET = PERF_RET + 12'hC03,
CSR_MIS_PREDICT = PERF_MIS_PREDICT + 12'hC03
} csr_reg_t;
// decoded CSR address
@ -309,7 +370,6 @@ package ariane_pkg;
// ----------------------
// Debug Unit
// ----------------------
typedef enum logic [15:0] {
DBG_CTRL = 16'h0,
DBG_HIT = 16'h8,
@ -348,4 +408,10 @@ package ariane_pkg;
DBG_CSR_M1 = 16'hF???
} debug_reg_t;
// ----------------------
// Arithmetic Functions
// ----------------------
function automatic logic [63:0] sext32 (logic [31:0] operand);
return {{32{operand[31]}}, operand[31:0]};
endfunction
endpackage

View file

@ -38,13 +38,11 @@ interface mem_if
`ifndef VERILATOR
`ifndef SYNTHESIS
clocking mck @(posedge clk);
default input #1ns output #1ns;
input address, data_wdata, data_we, data_req, data_be;
output data_rvalid, data_rdata, data_gnt;
endclocking
// Memory interface configured as slave
clocking sck @(posedge clk);
default input #1ns output #1ns;
output address, data_wdata, data_we, data_req, data_be;
input data_rvalid, data_rdata, data_gnt;
endclocking
@ -79,4 +77,4 @@ interface mem_if
// modport Passive (clocking pck);
endinterface
`endif
`endif

80
include/nbdcache_pkg.sv Normal file
View file

@ -0,0 +1,80 @@
/* File: nbdcache_pkh.sv
* Author: Florian Zaruba <zarubaf@ethz.ch>
* Date: 13.10.2017
*
* Copyright (C) 2017 ETH Zurich, University of Bologna
* All rights reserved.
*
* Description: Contains all the necessary defines for the non-block DCache
* of Ariane in one package.
*/
package nbdcache_pkg;
localparam int unsigned INDEX_WIDTH = 12;
localparam int unsigned TAG_WIDTH = 44;
localparam int unsigned CACHE_LINE_WIDTH = 128;
localparam int unsigned SET_ASSOCIATIVITY = 8;
localparam int unsigned NR_MSHR = 1;
// Calculated parameter
localparam BYTE_OFFSET = $clog2(CACHE_LINE_WIDTH/8);
localparam NUM_WORDS = 2**(INDEX_WIDTH-BYTE_OFFSET);
localparam DIRTY_WIDTH = SET_ASSOCIATIVITY*2;
// localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not
typedef enum logic { SINGLE_REQ, CACHE_LINE_REQ } req_t;
typedef struct packed {
logic [1:0] id; // id for which we handle the miss
logic valid;
logic we;
logic [55:0] addr;
logic [7:0][7:0] wdata;
logic [7:0] be;
} mshr_t;
typedef struct packed {
logic valid;
logic [63:0] addr;
logic [7:0] be;
logic [1:0] size;
logic we;
logic [63:0] wdata;
logic bypass;
} miss_req_t;
typedef struct packed {
logic [TAG_WIDTH-1:0] tag; // tag array
logic [CACHE_LINE_WIDTH-1:0] data; // data array
logic valid; // state array
logic dirty; // state array
} cache_line_t;
// cache line byte enable
typedef struct packed {
logic [TAG_WIDTH-1:0] tag; // byte enable into tag array
logic [CACHE_LINE_WIDTH-1:0] data; // byte enable into data array
logic [DIRTY_WIDTH/2-1:0] dirty; // byte enable into state array
logic [DIRTY_WIDTH/2-1:0] valid; // byte enable into state array
} cl_be_t;
// convert one hot to bin for -> needed for cache replacement
function automatic logic [$clog2(SET_ASSOCIATIVITY)-1:0] one_hot_to_bin (input logic [SET_ASSOCIATIVITY-1:0] in);
for (int unsigned i = 0; i < SET_ASSOCIATIVITY; i++) begin
if (in[i])
return i;
end
endfunction
// get the first bit set, returns one hot value
function automatic logic [SET_ASSOCIATIVITY-1:0] get_victim_cl (input logic [SET_ASSOCIATIVITY-1:0] valid_dirty);
// one-hot return vector
logic [SET_ASSOCIATIVITY-1:0] oh = '0;
for (int unsigned i = 0; i < SET_ASSOCIATIVITY; i++) begin
if (valid_dirty[i]) begin
oh[i] = 1'b1;
return oh;
end
end
endfunction
endpackage

@ -1 +1 @@
Subproject commit 120d3d7e50209b617785d2f4637ba75ae603cfe2
Subproject commit f32ccd65cab47a024a83fee8f414390e40051677

View file

@ -25,23 +25,20 @@ import instruction_tracer_pkg::*;
`endif
`endif
module ariane
#(
parameter N_EXT_PERF_COUNTERS = 0
)
(
module ariane #(
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000, // address on which to decide whether the request is cache-able or not
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned AXI_USER_WIDTH = 1
)(
input logic clk_i,
input logic rst_ni,
input logic test_en_i, // enable all clock gates for testing
output logic flush_icache_o, // request to flush icache
output logic flush_dcache_o, // request to flush the dcache
input logic flush_dcache_ack_i, // dcache flushed successfully
// CPU Control Signals
input logic fetch_enable_i,
output logic core_busy_o,
input logic [N_EXT_PERF_COUNTERS-1:0] ext_perf_counters_i,
input logic l1_icache_miss_i,
// Core ID, Cluster ID and boot address are considered more or less static
input logic [63:0] boot_addr_i,
@ -55,19 +52,11 @@ module ariane
input logic instr_if_data_rvalid_i,
input logic [63:0] instr_if_data_rdata_i,
// Data memory interface
output logic [11:0] data_if_address_index_o,
output logic [43:0] data_if_address_tag_o,
output logic [63:0] data_if_data_wdata_o,
output logic data_if_data_req_o,
output logic data_if_data_we_o,
output logic [7:0] data_if_data_be_o,
output logic data_if_kill_req_o,
output logic data_if_tag_valid_o,
input logic data_if_data_gnt_i,
input logic data_if_data_rvalid_i,
input logic [63:0] data_if_data_rdata_i,
AXI_BUS.Master data_if,
AXI_BUS.Master bypass_if,
// Interrupt inputs
input logic irq_i, // level sensitive IR lines
input logic [1:0] irq_i, // level sensitive IR lines, mip & sip
input logic ipi_i, // inter-processor interrupts
input logic [4:0] irq_id_i,
output logic irq_ack_o,
input logic irq_sec_i,
@ -226,6 +215,17 @@ module ariane
logic tvm_csr_id;
logic tw_csr_id;
logic tsr_csr_id;
logic dcache_en_csr_nbdcache;
// ----------------------------
// Performance Counters <-> *
// ----------------------------
logic [11:0] addr_csr_perf;
logic [63:0] data_csr_perf, data_perf_csr;
logic we_csr_perf;
logic itlb_miss_ex_perf;
logic dtlb_miss_ex_perf;
logic dcache_miss_ex_perf;
// --------------
// CTRL <-> *
// --------------
@ -243,6 +243,8 @@ module ariane
logic halt_ctrl;
logic halt_debug_ctrl;
logic halt_csr_ctrl;
logic flush_dcache_ctrl_ex;
logic flush_dcache_ack_ex_ctrl;
// --------------
// Debug <-> *
// --------------
@ -394,13 +396,18 @@ module ariane
.commit_instr_o ( commit_instr_id_commit ),
.commit_ack_i ( commit_ack ),
.*
);
// ---------
// EX
// ---------
ex_stage ex_stage_i (
ex_stage #(
.CACHE_START_ADDR ( CACHE_START_ADDR ),
.AXI_ID_WIDTH ( AXI_ID_WIDTH ),
.AXI_USER_WIDTH ( AXI_USER_WIDTH )
) ex_stage_i (
.flush_i ( flush_ctrl_ex ),
.fu_i ( fu_id_ex ),
.operator_i ( operator_id_ex ),
@ -437,7 +444,6 @@ module ariane
.lsu_commit_ready_o ( lsu_commit_ready_ex_commit ), // to commit
.lsu_exception_o ( lsu_exception_ex_id ),
.no_st_pending_o ( no_st_pending_ex_commit ),
// CSR
.csr_ready_o ( csr_ready_ex_id ),
.csr_valid_i ( csr_valid_id_ex ),
@ -446,6 +452,10 @@ module ariane
.csr_valid_o ( csr_valid_ex_id ),
.csr_addr_o ( csr_addr_ex_csr ),
.csr_commit_i ( csr_commit_commit_ex ), // from commit
// Performance counters
.itlb_miss_o ( itlb_miss_ex_perf ),
.dtlb_miss_o ( dtlb_miss_ex_perf ),
.dcache_miss_o ( dcache_miss_ex_perf ),
// Memory Management
.enable_translation_i ( enable_translation_csr_ex ), // from CSR
.en_ld_st_translation_i ( en_ld_st_translation_csr_ex ),
@ -468,6 +478,12 @@ module ariane
.mult_trans_id_o ( mult_trans_id_ex_id ),
.mult_result_o ( mult_result_ex_id ),
.mult_valid_o ( mult_valid_ex_id ),
.data_if ( data_if ),
.dcache_en_i ( dcache_en_csr_nbdcache ),
.flush_dcache_i ( flush_dcache_ctrl_ex ),
.flush_dcache_ack_o ( flush_dcache_ack_ex_ctrl ),
.*
);
@ -533,9 +549,35 @@ module ariane
.tvm_o ( tvm_csr_id ),
.tw_o ( tw_csr_id ),
.tsr_o ( tsr_csr_id ),
.dcache_en_o ( dcache_en_csr_nbdcache ),
.perf_addr_o ( addr_csr_perf ),
.perf_data_o ( data_csr_perf ),
.perf_data_i ( data_perf_csr ),
.perf_we_o ( we_csr_perf ),
.*
);
// ------------------------
// Performance Counters
// ------------------------
perf_counters i_perf_counters (
.addr_i ( addr_csr_perf ),
.we_i ( we_csr_perf ),
.data_i ( data_csr_perf ),
.data_o ( data_perf_csr ),
.commit_instr_i ( commit_instr_id_commit ),
.commit_ack_o ( commit_ack ),
.l1_dcache_miss_i ( dcache_miss_ex_perf ),
.itlb_miss_i ( itlb_miss_ex_perf ),
.dtlb_miss_i ( dtlb_miss_ex_perf ),
.ex_i ( ex_commit ),
.eret_i ( eret ),
.resolved_branch_i ( resolved_branch ),
.*
);
// ------------
// Controller
// ------------
@ -548,6 +590,8 @@ module ariane
.flush_id_o ( flush_ctrl_id ),
.flush_ex_o ( flush_ctrl_ex ),
.flush_tlb_o ( flush_tlb_ctrl_ex ),
.flush_dcache_o ( flush_dcache_ctrl_ex ),
.flush_dcache_ack_i ( flush_dcache_ack_ex_ctrl ),
.halt_csr_i ( halt_csr_ctrl ),
.halt_debug_i ( halt_debug_ctrl ),
@ -620,32 +664,18 @@ module ariane
assign tracer_if.commit_ack = commit_ack;
// address translation
// stores
assign tracer_if.st_valid = ex_stage_i.lsu_i.store_unit_i.store_buffer_i.valid_i;
assign tracer_if.st_paddr = ex_stage_i.lsu_i.store_unit_i.store_buffer_i.paddr_i;
assign tracer_if.st_valid = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.valid_i;
assign tracer_if.st_paddr = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.paddr_i;
// loads
assign tracer_if.ld_valid = ex_stage_i.lsu_i.load_unit_i.tag_valid_o;
assign tracer_if.ld_kill = ex_stage_i.lsu_i.load_unit_i.kill_req_o;
assign tracer_if.ld_paddr = ex_stage_i.lsu_i.load_unit_i.paddr_i;
assign tracer_if.ld_valid = ex_stage_i.lsu_i.i_load_unit.tag_valid_o;
assign tracer_if.ld_kill = ex_stage_i.lsu_i.i_load_unit.kill_req_o;
assign tracer_if.ld_paddr = ex_stage_i.lsu_i.i_load_unit.paddr_i;
// exceptions
assign tracer_if.exception = commit_stage_i.exception_o;
// assign current privilege level
assign tracer_if.priv_lvl = priv_lvl;
program instr_tracer (instruction_tracer_if tracer_if);
instruction_tracer it = new (tracer_if, 1'b0);
initial begin
#15ns;
it.create_file(cluster_id_i, core_id_i);
it.trace();
end
final begin
it.close();
end
endprogram
instr_tracer instr_tracer_i (tracer_if);
instr_tracer instr_tracer_i (tracer_if, cluster_id_i, core_id_i);
`endif
`endif
@ -658,3 +688,25 @@ module ariane
end
endmodule // ariane
`ifndef SYNTHESIS
program instr_tracer
(
instruction_tracer_if tracer_if,
input logic [5:0] cluster_id_i,
input logic [3:0] core_id_i
);
instruction_tracer it = new (tracer_if, 1'b0);
initial begin
#15ns;
it.create_file(cluster_id_i, core_id_i);
it.trace();
end
final begin
it.close();
end
endprogram
`endif

1
src/axi2per Submodule

@ -0,0 +1 @@
Subproject commit 04753ab7ac05f0c227599749e97bdad24ebdfc4d

1
src/axi_mem_if Submodule

@ -0,0 +1 @@
Subproject commit dbf1f38dd677614394e8e0722c23463ac77176b5

1
src/axi_node Submodule

@ -0,0 +1 @@
Subproject commit 18d1fe362cac76496e0a2f9447d7a26cb3445efa

1
src/axi_slice Submodule

@ -0,0 +1 @@
Subproject commit 940ab2b25c0d189a333343641b2e6e82ef227974

View file

@ -71,7 +71,8 @@ module branch_unit (
// here we handle the various possibilities of mis-predicts
always_comb begin : mispredict_handler
// set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC
automatic logic [63:0] jump_base = (operator_i == JALR) ? operand_a_i : pc_i;
automatic logic [63:0] jump_base;
jump_base = (operator_i == JALR) ? operand_a_i : pc_i;
target_address = 64'b0;
resolved_branch_o.target_address = 64'b0;

428
src/cache_ctrl.sv Normal file
View file

@ -0,0 +1,428 @@
/* File: cache_ctrl.svh
* Author: Florian Zaruba <zarubaf@ethz.ch>
* Date: 14.10.2017
*
* Copyright (C) 2017 ETH Zurich, University of Bologna
* All rights reserved.
*
* Description: Cache controller
*/
import ariane_pkg::*;
import nbdcache_pkg::*;
module cache_ctrl #(
parameter int unsigned SET_ASSOCIATIVITY = 8,
parameter int unsigned INDEX_WIDTH = 12,
parameter int unsigned TAG_WIDTH = 44,
parameter int unsigned CACHE_LINE_WIDTH = 100,
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic bypass_i, // enable cache
output logic busy_o,
// Core request ports
input logic [INDEX_WIDTH-1:0] address_index_i,
input logic [TAG_WIDTH-1:0] address_tag_i,
input logic [63:0] data_wdata_i,
input logic data_req_i,
input logic data_we_i,
input logic [7:0] data_be_i,
input logic [1:0] data_size_i,
input logic kill_req_i,
input logic tag_valid_i,
output logic data_gnt_o,
output logic data_rvalid_o,
output logic [63:0] data_rdata_o,
input amo_t amo_op_i,
// SRAM interface
output logic [SET_ASSOCIATIVITY-1:0] req_o, // req is valid
output logic [INDEX_WIDTH-1:0] addr_o, // address into cache array
input logic gnt_i,
output cache_line_t data_o,
output cl_be_t be_o,
output logic [TAG_WIDTH-1:0] tag_o, //valid one cycle later
input cache_line_t [SET_ASSOCIATIVITY-1:0] data_i,
output logic we_o,
input logic [SET_ASSOCIATIVITY-1:0] hit_way_i,
// Miss handling
output miss_req_t miss_req_o,
// return
input logic miss_gnt_i,
input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss
input logic [63:0] critical_word_i,
input logic critical_word_valid_i,
input logic bypass_gnt_i,
input logic bypass_valid_i,
input logic [63:0] bypass_data_i,
// check MSHR for aliasing
output logic [55:0] mshr_addr_o,
input logic mshr_addr_matches_i
);
enum logic [3:0] {
IDLE, WAIT_TAG, WAIT_TAG_BYPASSED, STORE_REQ, WAIT_REFILL_VALID, WAIT_REFILL_GNT, WAIT_TAG_SAVED, WAIT_MSHR, WAIT_CRITICAL_WORD
} state_d, state_q;
typedef struct packed {
logic [INDEX_WIDTH-1:0] index;
logic [TAG_WIDTH-1:0] tag;
logic [7:0] be;
logic [1:0] size;
logic we;
logic [63:0] wdata;
logic bypass;
} mem_req_t;
logic [SET_ASSOCIATIVITY-1:0] hit_way_d, hit_way_q;
assign busy_o = (state_q != IDLE);
mem_req_t mem_req_d, mem_req_q;
logic [CACHE_LINE_WIDTH-1:0] cl_i;
always_comb begin : way_select
cl_i = '0;
for (int unsigned i = 0; i < SET_ASSOCIATIVITY; i++)
if (hit_way_i[i])
cl_i = data_i[i].data;
// cl_i = data_i[one_hot_to_bin(hit_way_i)].data;
end
// --------------
// Cache FSM
// --------------
always_comb begin : cache_ctrl_fsm
automatic logic [$clog2(CACHE_LINE_WIDTH)-1:0] cl_offset;
// incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
// cache-line offset -> multiple of 64
cl_offset = mem_req_q.index[BYTE_OFFSET-1:3] << 6; // shift by 6 to the left
// default assignments
state_d = state_q;
mem_req_d = mem_req_q;
hit_way_d = hit_way_q;
// output assignments
data_gnt_o = 1'b0;
data_rvalid_o = 1'b0;
data_rdata_o = '0;
miss_req_o = '0;
mshr_addr_o = '0;
// Memory array communication
req_o = '0;
addr_o = address_index_i;
data_o = '0;
be_o = '0;
tag_o = '0;
we_o = '0;
tag_o = 'b0;
case (state_q)
IDLE: begin
// a new request arrived
if (data_req_i) begin
// request the cache line - we can do this specualtive
req_o = '1;
// save index, be and we
mem_req_d.index = address_index_i;
mem_req_d.tag = address_tag_i;
mem_req_d.be = data_be_i;
mem_req_d.size = data_size_i;
mem_req_d.we = data_we_i;
mem_req_d.wdata = data_wdata_i;
// Bypass mode, check for uncacheable address here as well
if (bypass_i) begin
state_d = WAIT_TAG_BYPASSED;
// grant this access
data_gnt_o = 1'b1;
mem_req_d.bypass = 1'b1;
// ------------------
// Cache is enabled
// ------------------
end else begin
// Wait that we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
// only for a read
if (!data_we_i)
data_gnt_o = 1'b1;
end
end
end
end
// cache enabled and waiting for tag
WAIT_TAG, WAIT_TAG_SAVED: begin
// depending on where we come from
// For the store case the tag comes in the same cycle
tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag : address_tag_i;
// we speculatively request another transfer
if (data_req_i) begin
req_o = '1;
end
// check that the client really wants to do the request
if (!kill_req_i) begin
// ------------
// HIT CASE
// ------------
if (|hit_way_i) begin
// we can request another cache-line if this was a load
// make another request
if (data_req_i && !mem_req_q.we) begin
state_d = WAIT_TAG; // switch back to WAIT_TAG
mem_req_d.index = address_index_i;
mem_req_d.be = data_be_i;
mem_req_d.size = data_size_i;
mem_req_d.we = data_we_i;
mem_req_d.wdata = data_wdata_i;
mem_req_d.tag = address_tag_i;
mem_req_d.bypass = 1'b0;
data_gnt_o = gnt_i;
if (!gnt_i) begin
state_d = IDLE;
end
end else begin
state_d = IDLE;
end
// this is timing critical
// data_rdata_o = cl_i[cl_offset +: 64];
case (mem_req_q.index[3])
1'b0: data_rdata_o = cl_i[63:0];
1'b1: data_rdata_o = cl_i[127:64];
endcase
// report data for a read
if (!mem_req_q.we) begin
data_rvalid_o = 1'b1;
// else this was a store so we need an extra step to handle it
end else begin
state_d = STORE_REQ;
hit_way_d = hit_way_i;
end
// ------------
// MISS CASE
// ------------
end else begin
// also save tag
mem_req_d.tag = address_tag_i;
// make a miss request
state_d = WAIT_REFILL_GNT;
end
// ---------------
// Check MSHR
// ---------------
mshr_addr_o = {address_tag_i, mem_req_q.index};
// we've got a match on MSHR
if (mshr_addr_matches_i) begin
state_d = WAIT_MSHR;
// save tag if we didn't already save it e.g.: we are not in in the Tag saved state
if (state_q != WAIT_TAG_SAVED)
mem_req_d.tag = address_tag_i;
end
// -------------------------
// Check for cache-ability
// -------------------------
if (tag_o < CACHE_START_ADDR[TAG_WIDTH+INDEX_WIDTH-1:INDEX_WIDTH]) begin
mem_req_d.tag = address_tag_i;
mem_req_d.bypass = 1'b1;
state_d = WAIT_REFILL_GNT;
end
end else begin
// we can potentially accept a new request -> I don't know how this works out timing vise
// as this will chain some paths together...
// For now this should not happen to frequently and we spare another cycle
// go back to idle
state_d = IDLE;
data_rvalid_o = 1'b1;
end
end
// ~> we are here as we need a second round of memory access for a store
STORE_REQ: begin
// store data, write dirty bit
req_o = hit_way_q;
addr_o = mem_req_q.index;
we_o = 1'b1;
be_o.dirty = hit_way_q;
be_o.valid = hit_way_q;
// set the correct byte enable
for (int unsigned i = 0; i < 8; i++) begin
if (mem_req_q.be[i])
be_o.data[cl_offset + i*8 +: 8] = '1;
end
data_o.data[cl_offset +: 64] = mem_req_q.wdata;
// ~> change the state
data_o.dirty = 1'b1;
data_o.valid = 1'b1;
// got a grant ~> this is finished now
if (gnt_i) begin
data_gnt_o = 1'b1;
state_d = IDLE;
end
end
// we've got a match on MSHR ~> miss unit is scurrently serving a request
WAIT_MSHR: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// we can start a new request
if (!mshr_addr_matches_i) begin
req_o = '1;
addr_o = mem_req_q.index;
if (gnt_i)
state_d = WAIT_TAG_SAVED;
end
end
// its for sure a miss
WAIT_TAG_BYPASSED: begin
// the request was killed
if (kill_req_i) begin
state_d = IDLE;
// we need to ack the killing
data_rvalid_o = 1'b1;
end else begin
// save tag
mem_req_d.tag = address_tag_i;
state_d = WAIT_REFILL_GNT;
end
end
// ~> wait for grant from miss unit
WAIT_REFILL_GNT: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
miss_req_o.valid = 1'b1;
miss_req_o.bypass = mem_req_q.bypass;
miss_req_o.addr = {mem_req_q.tag, mem_req_q.index};
miss_req_o.be = mem_req_q.be;
miss_req_o.size = mem_req_q.size;
miss_req_o.we = mem_req_q.we;
miss_req_o.wdata = mem_req_q.wdata;
// got a grant so go to valid
if (bypass_gnt_i) begin
state_d = WAIT_REFILL_VALID;
// if this was a write we still need to give a grant to the store unit
if (mem_req_q.we)
data_gnt_o = 1'b1;
end
if (miss_gnt_i && !mem_req_q.we)
state_d = WAIT_CRITICAL_WORD;
else if (miss_gnt_i) begin
state_d = IDLE;
data_gnt_o = 1'b1;
end
// it can be the case that the miss unit is currently serving a request which matches ours
// so we need to check the mshr for matching continously
// if the mshr matches we need to go to a different state -> we should never get a matching mshr and a high miss_gnt_i
if (mshr_addr_matches_i && !active_serving_i) begin
state_d = WAIT_MSHR;
end
end
// ~> wait for critical word to arrive
WAIT_CRITICAL_WORD: begin
// speculatively request another word
if (data_req_i) begin
// request the cache line
req_o = '1;
end
if (critical_word_valid_i) begin
data_rvalid_o = 1'b1;
data_rdata_o = critical_word_i;
// we can make another request
if (data_req_i) begin
// save index, be and we
mem_req_d.index = address_index_i;
mem_req_d.be = data_be_i;
mem_req_d.size = data_size_i;
mem_req_d.we = data_we_i;
mem_req_d.wdata = data_wdata_i;
mem_req_d.tag = address_tag_i;
state_d = IDLE;
// Wait until we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
data_gnt_o = 1'b1;
end
end else begin
state_d = IDLE;
end
end
end
// ~> wait until the bypass request is valid
WAIT_REFILL_VALID: begin
// got a valid answer
if (bypass_valid_i) begin
data_rdata_o = bypass_data_i;
data_rvalid_o = 1'b1;
state_d = IDLE;
end
end
endcase
end
// --------------
// Registers
// --------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
mem_req_q <= '0;
hit_way_q <= '0;
end else begin
state_q <= state_d;
mem_req_q <= mem_req_d;
hit_way_q <= hit_way_d;
end
end
`ifndef SYNTHESIS
initial begin
assert (CACHE_LINE_WIDTH == 128) else $error ("Cacheline width has to be 128 for the moment. But only small changes required in data select logic");
end
`endif
endmodule
module AMO_alu (
input logic clk_i,
input logic rst_ni,
// AMO interface
input logic amo_commit_i, // commit atomic memory operation
output logic amo_valid_o, // we have a valid AMO result
output logic [63:0] amo_result_o, // result of atomic memory operation
input logic amo_flush_i // forget about AMO
);
endmodule

View file

@ -113,12 +113,9 @@ module commit_stage (
// check if this instruction was a SFENCE_VMA
if (commit_instr_i.op == SFENCE_VMA) begin
// no store pending so we can flush the TLBs and pipeline
if (no_st_pending_i) begin
sfence_vma_o = 1'b1;
sfence_vma_o = no_st_pending_i;
// wait for the store buffer to drain until flushing the pipeline
end else begin
commit_ack_o = 1'b0;
end
commit_ack_o = no_st_pending_i;
end
// ------------------
// FENCE.I Logic
@ -126,17 +123,17 @@ module commit_stage (
// Fence synchronizes data and instruction streams. That means that we need to flush the private icache
// and the private dcache. This is the most expensive instruction.
if (commit_instr_i.op == FENCE_I) begin
commit_ack_o = 1'b1;
commit_ack_o = no_st_pending_i;
// tell the controller to flush the I$
fence_i_o = 1'b1;
fence_i_o = no_st_pending_i;
end
// ------------------
// FENCE Logic
// ------------------
if (commit_instr_i.op == FENCE) begin
commit_ack_o = 1'b1;
commit_ack_o = no_st_pending_i;
// tell the controller to flush the D$
fence_o = 1'b1;
fence_o = no_st_pending_i;
end
end
end

View file

@ -67,12 +67,19 @@ module csr_regfile #(
output logic [43:0] satp_ppn_o,
output logic [ASID_WIDTH-1:0] asid_o,
// external interrupts
input logic irq_i, // external interrupt in
input logic [1:0] irq_i, // external interrupt in
input logic ipi_i, // inter processor interrupt -> connected to machine mode sw
// Visualization Support
output logic tvm_o, // trap virtual memory
output logic tw_o, // timeout wait
output logic tsr_o // trap sret
output logic tsr_o, // trap sret
// Caches
output logic dcache_en_o, // L1 DCache Enable
// Performance Counter
output logic [11:0] perf_addr_o, // address to performance counter module
output logic [63:0] perf_data_o, // write data to performance counter module
input logic [63:0] perf_data_i, // read data from performance counter module
output logic perf_we_o
);
// internal signal to keep track of access exceptions
logic read_access_exception, update_access_exception;
@ -80,7 +87,7 @@ module csr_regfile #(
logic [63:0] csr_wdata, csr_rdata;
priv_lvl_t trap_to_priv_lvl;
// register for enabling load store address translation, this is critical, hence the register
logic en_ld_st_translation_n, en_ld_st_translation_q;
logic en_ld_st_translation_d, en_ld_st_translation_q;
logic mret; // return from M-mode exception
logic sret; // return from S-mode exception
@ -98,7 +105,7 @@ module csr_regfile #(
// CSR Registers
// ----------------
// privilege level register
priv_lvl_t priv_lvl_n, priv_lvl_q;
priv_lvl_t priv_lvl_d, priv_lvl_q;
typedef struct packed {
logic sd; // signal dirty - read-only - hardwired zero
@ -127,28 +134,29 @@ module csr_regfile #(
logic uie; // user interrupts enable - hardwired to zero
} status_t;
status_t mstatus_q, mstatus_n;
status_t mstatus_q, mstatus_d;
logic [63:0] mtvec_q, mtvec_n;
logic [63:0] medeleg_q, medeleg_n;
logic [63:0] mideleg_q, mideleg_n;
logic [63:0] mip_q, mip_n;
logic [63:0] mie_q, mie_n;
logic [63:0] mscratch_q, mscratch_n;
logic [63:0] mepc_q, mepc_n;
logic [63:0] mcause_q, mcause_n;
logic [63:0] mtval_q, mtval_n;
logic [63:0] mtvec_q, mtvec_d;
logic [63:0] medeleg_q, medeleg_d;
logic [63:0] mideleg_q, mideleg_d;
logic [63:0] mip_q, mip_d;
logic [63:0] mie_q, mie_d;
logic [63:0] mscratch_q, mscratch_d;
logic [63:0] mepc_q, mepc_d;
logic [63:0] mcause_q, mcause_d;
logic [63:0] mtval_q, mtval_d;
logic [63:0] stvec_q, stvec_n;
logic [63:0] sscratch_q, sscratch_n;
logic [63:0] sepc_q, sepc_n;
logic [63:0] scause_q, scause_n;
logic [63:0] stval_q, stval_n;
logic [63:0] stvec_q, stvec_d;
logic [63:0] sscratch_q, sscratch_d;
logic [63:0] sepc_q, sepc_d;
logic [63:0] scause_q, scause_d;
logic [63:0] stval_q, stval_d;
logic [63:0] dcache_q, dcache_d;
logic wfi_n, wfi_q;
logic wfi_d, wfi_q;
logic [63:0] cycle_q, cycle_n;
logic [63:0] instret_q, instret_n;
logic [63:0] cycle_q, cycle_d;
logic [63:0] instret_q, instret_d;
typedef struct packed {
logic [3:0] mode;
@ -156,8 +164,7 @@ module csr_regfile #(
logic [43:0] ppn;
} satp_t;
satp_t satp_q, satp_n;
satp_t satp_q, satp_d;
// ----------------
// CSR Read logic
@ -166,6 +173,9 @@ module csr_regfile #(
// a read access exception can only occur if we attempt to read a CSR which does not exist
read_access_exception = 1'b0;
csr_rdata = 64'b0;
// feed through address of performance counter
perf_addr_o = csr_addr.address;
if (csr_read) begin
case (csr_addr.address)
@ -204,10 +214,23 @@ module csr_regfile #(
CSR_MHARTID: csr_rdata = {53'b0, cluster_id_i[5:0], 1'b0, core_id_i[3:0]};
CSR_MCYCLE: csr_rdata = cycle_q;
CSR_MINSTRET: csr_rdata = instret_q;
CSR_DCACHE: csr_rdata = dcache_q;
// Counters and Timers
CSR_CYCLE: csr_rdata = cycle_q;
CSR_TIME: csr_rdata = time_i;
CSR_INSTRET: csr_rdata = instret_q;
CSR_L1_ICACHE_MISS,
CSR_L1_DCACHE_MISS,
CSR_ITLB_MISS,
CSR_DTLB_MISS,
CSR_LOAD,
CSR_STORE,
CSR_EXCEPTION,
CSR_EXCEPTION_RET,
CSR_BRANCH_JUMP,
CSR_CALL,
CSR_RET,
CSR_MIS_PREDICT: csr_rdata = perf_data_i;
default: read_access_exception = 1'b1;
endcase
end
@ -216,40 +239,46 @@ module csr_regfile #(
// CSR Write and update logic
// ---------------------------
always_comb begin : csr_update
automatic satp_t sapt = satp_q;
automatic satp_t sapt;
automatic logic [63:0] mip;
sapt = satp_q;
mip = csr_wdata & 64'h33;
// only USIP, SSIP, UTIP, STIP are write-able
automatic logic [63:0] mip = csr_wdata & 64'h33;
eret_o = 1'b0;
flush_o = 1'b0;
update_access_exception = 1'b0;
priv_lvl_n = priv_lvl_q;
mstatus_n = mstatus_q;
mtvec_n = mtvec_q;
medeleg_n = medeleg_q;
mideleg_n = mideleg_q;
mip_n = mip_q;
mie_n = mie_q;
mepc_n = mepc_q;
mcause_n = mcause_q;
mscratch_n = mscratch_q;
mtval_n = mtval_q;
perf_we_o = 1'b0;
perf_data_o = 'b0;
sepc_n = sepc_q;
scause_n = scause_q;
stvec_n = stvec_q;
sscratch_n = sscratch_q;
stval_n = stval_q;
satp_n = satp_q;
en_ld_st_translation_n = en_ld_st_translation_q;
priv_lvl_d = priv_lvl_q;
mstatus_d = mstatus_q;
mtvec_d = mtvec_q;
medeleg_d = medeleg_q;
mideleg_d = mideleg_q;
mip_d = mip_q;
mie_d = mie_q;
mepc_d = mepc_q;
mcause_d = mcause_q;
mscratch_d = mscratch_q;
mtval_d = mtval_q;
dcache_d = dcache_q;
sepc_d = sepc_q;
scause_d = scause_q;
stvec_d = stvec_q;
sscratch_d = sscratch_q;
stval_d = stval_q;
satp_d = satp_q;
en_ld_st_translation_d = en_ld_st_translation_q;
// check for correct access rights and that we are writing
if (csr_we) begin
case (csr_addr.address)
// sstatus is a subset of mstatus - mask it accordingly
CSR_SSTATUS: begin
mstatus_n = csr_wdata & 64'h3fffe1fee;
mstatus_d = csr_wdata & 64'h3fffe1fee;
// this instruction has side-effects
flush_o = 1'b1;
end
@ -260,21 +289,21 @@ module csr_regfile #(
// are written
for (int unsigned i = 0; i < 64; i++)
if (mideleg_q[i])
mie_n[i] = csr_wdata[i];
mie_d[i] = csr_wdata[i];
end
CSR_SIP: begin
for (int unsigned i = 0; i < 64; i++)
if (mideleg_q[i])
mip_n[i] = mip[i];
mip_d[i] = mip[i];
end
CSR_SCOUNTEREN:;
CSR_STVEC: stvec_n = {csr_wdata[63:2], 1'b0, csr_wdata[0]};
CSR_SSCRATCH: sscratch_n = csr_wdata;
CSR_SEPC: sepc_n = {csr_wdata[63:1], 1'b0};
CSR_SCAUSE: scause_n = csr_wdata;
CSR_STVAL: stval_n = csr_wdata;
CSR_STVEC: stvec_d = {csr_wdata[63:2], 1'b0, csr_wdata[0]};
CSR_SSCRATCH: sscratch_d = csr_wdata;
CSR_SEPC: sepc_d = {csr_wdata[63:1], 1'b0};
CSR_SCAUSE: scause_d = csr_wdata;
CSR_STVAL: stval_d = csr_wdata;
// supervisor address translation and protection
CSR_SATP: begin
// intercept SATP writes if in S-Mode and TVM is enabled
@ -284,7 +313,7 @@ module csr_regfile #(
sapt = satp_t'(csr_wdata);
// only make ASID_LEN - 1 bit stick, that way software can figure out how many ASID bits are supported
sapt.asid = sapt.asid & {{(16-ASID_WIDTH){1'b0}}, {ASID_WIDTH{1'b1}}};
satp_n = sapt;
satp_d = sapt;
end
// changing the mode can have side-effects on address translation (e.g.: other instructions), re-fetch
// the next instruction by executing a flush
@ -292,15 +321,15 @@ module csr_regfile #(
end
CSR_MSTATUS: begin
mstatus_n = csr_wdata;
mstatus_n.sxl = 2'b10;
mstatus_n.uxl = 2'b10;
mstatus_d = csr_wdata;
mstatus_d.sxl = 2'b10;
mstatus_d.uxl = 2'b10;
// hardwired zero registers
mstatus_n.sd = 1'b0;
mstatus_n.xs = 2'b0;
mstatus_n.fs = 2'b0;
mstatus_n.upie = 1'b0;
mstatus_n.uie = 1'b0;
mstatus_d.sd = 1'b0;
mstatus_d.xs = 2'b0;
mstatus_d.fs = 2'b0;
mstatus_d.upie = 1'b0;
mstatus_d.uie = 1'b0;
// this register has side-effects on other registers, flush the pipeline
flush_o = 1'b1;
end
@ -308,29 +337,46 @@ module csr_regfile #(
CSR_MISA:;
// machine exception delegation register
// 0 - 15 exceptions supported
CSR_MEDELEG: medeleg_n = csr_wdata & 64'hF7FF;
CSR_MEDELEG: medeleg_d = csr_wdata & 64'hF7FF;
// machine interrupt delegation register
// we do not support user interrupt delegation
CSR_MIDELEG: mideleg_n = csr_wdata & 64'hBBB;
CSR_MIDELEG: mideleg_d = csr_wdata & 64'hBBB;
// mask the register so that unsupported interrupts can never be set
CSR_MIE: mie_n = csr_wdata & 64'hBBB; // we only support supervisor and m-mode interrupts
CSR_MIP: mip_n = mip;
CSR_MIE: mie_d = csr_wdata & 64'hBBB; // we only support supervisor and m-mode interrupts
CSR_MIP: mip_d = mip;
CSR_MTVEC: begin
mtvec_n = {csr_wdata[63:2], 1'b0, csr_wdata[0]};
mtvec_d = {csr_wdata[63:2], 1'b0, csr_wdata[0]};
// we are in vector mode, this implementation requires the additional
// alignment constraint of 64 * 4 bytes
if (csr_wdata[0])
mtvec_n = {csr_wdata[63:8], 7'b0, csr_wdata[0]};
mtvec_d = {csr_wdata[63:8], 7'b0, csr_wdata[0]};
end
CSR_MCOUNTEREN:;
CSR_MSCRATCH: mscratch_n = csr_wdata;
CSR_MEPC: mepc_n = {csr_wdata[63:1], 1'b0};
CSR_MCAUSE: mcause_n = csr_wdata;
CSR_MTVAL: mtval_n = csr_wdata;
CSR_MCYCLE: cycle_n = csr_wdata;
CSR_MINSTRET: instret_n = csr_wdata;
CSR_MSCRATCH: mscratch_d = csr_wdata;
CSR_MEPC: mepc_d = {csr_wdata[63:1], 1'b0};
CSR_MCAUSE: mcause_d = csr_wdata;
CSR_MTVAL: mtval_d = csr_wdata;
CSR_MCYCLE: cycle_d = csr_wdata;
CSR_MINSTRET: instret_d = csr_wdata;
CSR_DCACHE: dcache_d = csr_wdata[0]; // enable bit
CSR_L1_ICACHE_MISS,
CSR_L1_DCACHE_MISS,
CSR_ITLB_MISS,
CSR_DTLB_MISS,
CSR_LOAD,
CSR_STORE,
CSR_EXCEPTION,
CSR_EXCEPTION_RET,
CSR_BRANCH_JUMP,
CSR_CALL,
CSR_RET,
CSR_MIS_PREDICT: begin
perf_data_o = csr_wdata;
perf_we_o = 1'b1;
end
default: update_access_exception = 1'b1;
endcase
end
@ -338,11 +384,12 @@ module csr_regfile #(
// External Interrupts
// ---------------------
// Machine Mode External Interrupt Pending
// TODO: this is wrong for sure
mip_n[11] = 1'b0;
mip_n[9] = mie_q[9] & irq_i;
mip_d[11] = mie_q[11] & irq_i[1];
mip_d[9] = mie_q[9] & irq_i[0];
// Machine software interrupt
mip_d[3] = mie_q[3] & ipi_i;
// Timer interrupt pending, coming from platform timer
mip_n[7] = time_irq_i;
mip_d[7] = time_irq_i;
// -----------------------
// Manage Exception Stack
@ -368,31 +415,31 @@ module csr_regfile #(
// trap to supervisor mode
if (trap_to_priv_lvl == PRIV_LVL_S) begin
// update sstatus
mstatus_n.sie = 1'b0;
mstatus_n.spie = mstatus_q.sie;
mstatus_d.sie = 1'b0;
mstatus_d.spie = mstatus_q.sie;
// this can either be user or supervisor mode
mstatus_n.spp = logic'(priv_lvl_q);
mstatus_d.spp = logic'(priv_lvl_q);
// set cause
scause_n = ex_i.cause;
scause_d = ex_i.cause;
// set epc
sepc_n = pc_i;
sepc_d = pc_i;
// set mtval or stval
stval_n = ex_i.tval;
stval_d = ex_i.tval;
// trap to machine mode
end else begin
// update mstatus
mstatus_n.mie = 1'b0;
mstatus_n.mpie = mstatus_q.mie;
mstatus_d.mie = 1'b0;
mstatus_d.mpie = mstatus_q.mie;
// save the previous privilege mode
mstatus_n.mpp = priv_lvl_q;
mcause_n = ex_i.cause;
mstatus_d.mpp = priv_lvl_q;
mcause_d = ex_i.cause;
// set epc
mepc_n = pc_i;
mepc_d = pc_i;
// set mtval or stval
mtval_n = ex_i.tval;
mtval_d = ex_i.tval;
end
priv_lvl_n = trap_to_priv_lvl;
priv_lvl_d = trap_to_priv_lvl;
end
// ------------------------------
// MPRV - Modify Privilege Level
@ -400,9 +447,9 @@ module csr_regfile #(
// Set the address translation at which the load and stores should occur
// we can use the previous values since changing the address translation will always involve a pipeline flush
if (mstatus_q.mprv && satp_q.mode == 4'h8 && (mstatus_q.mpp != PRIV_LVL_M))
en_ld_st_translation_n = 1'b1;
en_ld_st_translation_d = 1'b1;
else // otherwise we go with the regular settings
en_ld_st_translation_n = en_translation_o;
en_ld_st_translation_d = en_translation_o;
ld_st_priv_lvl_o = (mstatus_q.mprv) ? mstatus_q.mpp : priv_lvl_o;
en_ld_st_translation_o = en_ld_st_translation_q;
@ -416,37 +463,37 @@ module csr_regfile #(
eret_o = 1'b1;
// return to the previous privilege level and restore all enable flags
// get the previous machine interrupt enable flag
mstatus_n.mie = mstatus_q.mpie;
mstatus_d.mie = mstatus_q.mpie;
// restore the previous privilege level
priv_lvl_n = mstatus_q.mpp;
priv_lvl_d = mstatus_q.mpp;
// set mpp to user mode
mstatus_n.mpp = PRIV_LVL_U;
mstatus_d.mpp = PRIV_LVL_U;
// set mpie to 1
mstatus_n.mpie = 1'b1;
mstatus_d.mpie = 1'b1;
end
if (sret) begin
// return from exception, IF doesn't care from where we are returning
eret_o = 1'b1;
// return the previous supervisor interrupt enable flag
mstatus_n.sie = mstatus_n.spie;
mstatus_d.sie = mstatus_d.spie;
// restore the previous privilege level
priv_lvl_n = priv_lvl_t'({1'b0, mstatus_n.spp});
priv_lvl_d = priv_lvl_t'({1'b0, mstatus_d.spp});
// set spp to user mode
mstatus_n.spp = logic'(PRIV_LVL_U);
mstatus_d.spp = logic'(PRIV_LVL_U);
// set spie to 1
mstatus_n.spie = 1'b1;
mstatus_d.spie = 1'b1;
end
// --------------------
// Counters
// --------------------
instret_n = instret_q;
instret_d = instret_q;
// just increment the cycle count
cycle_n = cycle_q + 1'b1;
cycle_d = cycle_q + 1'b1;
// increase instruction retired counter
if (commit_ack_i) begin
instret_n = instret_q + 1'b1;
instret_d = instret_q + 1'b1;
end
end
@ -504,9 +551,10 @@ module csr_regfile #(
// Exception Control & Interrupt Control
// --------------------------------------
always_comb begin : exception_ctrl
automatic logic [63:0] interrupt_cause = '0;
automatic logic [63:0] interrupt_cause;
interrupt_cause = '0;
// wait for interrupt register
wfi_n = wfi_q;
wfi_d = wfi_q;
csr_exception_o = {
64'b0, 64'b0, 1'b0
@ -582,10 +630,10 @@ module csr_regfile #(
// -------------------
// if there is any interrupt pending un-stall the core
if (|mip_q) begin
wfi_n = 1'b0;
wfi_d = 1'b0;
// or alternatively if there is no exception pending, wait here for the interrupt
end else if (csr_op_i == WFI && !ex_i.valid) begin
wfi_n = 1'b1;
wfi_d = 1'b1;
end
end
@ -605,6 +653,7 @@ module csr_regfile #(
assign tw_o = mstatus_q.tw;
assign tsr_o = mstatus_q.tsr;
assign halt_csr_o = wfi_q;
assign dcache_en_o = dcache_q[0];
// output assignments dependent on privilege mode
always_comb begin : priv_output
@ -643,6 +692,7 @@ module csr_regfile #(
mcause_q <= 64'b0;
mscratch_q <= 64'b0;
mtval_q <= 64'b0;
dcache_q <= 64'b1;
// supervisor mode registers
sepc_q <= 64'b0;
scause_q <= 64'b0;
@ -658,32 +708,33 @@ module csr_regfile #(
// wait for interrupt
wfi_q <= 1'b0;
end else begin
priv_lvl_q <= priv_lvl_n;
priv_lvl_q <= priv_lvl_d;
// machine mode registers
mstatus_q <= mstatus_n;
mtvec_q <= mtvec_n;
medeleg_q <= medeleg_n;
mideleg_q <= mideleg_n;
mip_q <= mip_n;
mie_q <= mie_n;
mepc_q <= mepc_n;
mcause_q <= mcause_n;
mscratch_q <= mscratch_n;
mtval_q <= mtval_n;
mstatus_q <= mstatus_d;
mtvec_q <= mtvec_d;
medeleg_q <= medeleg_d;
mideleg_q <= mideleg_d;
mip_q <= mip_d;
mie_q <= mie_d;
mepc_q <= mepc_d;
mcause_q <= mcause_d;
mscratch_q <= mscratch_d;
mtval_q <= mtval_d;
dcache_q <= dcache_d;
// supervisor mode registers
sepc_q <= sepc_n;
scause_q <= scause_n;
stvec_q <= stvec_n;
sscratch_q <= sscratch_n;
stval_q <= stval_n;
satp_q <= satp_n;
sepc_q <= sepc_d;
scause_q <= scause_d;
stvec_q <= stvec_d;
sscratch_q <= sscratch_d;
stval_q <= stval_d;
satp_q <= satp_d;
// timer and counters
cycle_q <= cycle_n;
instret_q <= instret_n;
cycle_q <= cycle_d;
instret_q <= instret_d;
// aux registers
en_ld_st_translation_q <= en_ld_st_translation_n;
en_ld_st_translation_q <= en_ld_st_translation_d;
// wait for interrupt
wfi_q <= wfi_n;
wfi_q <= wfi_d;
end
end

View file

@ -1,193 +0,0 @@
// Author: Florian Zaruba, ETH Zurich
// Date: 24.4.2017
// Description: Arbitrates the dcache ports
//
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
import ariane_pkg::*;
module dcache_arbiter #(
parameter int NR_PORTS = 3
)
(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// slave port
output logic [11:0] address_index_o,
output logic [43:0] address_tag_o,
output logic [63:0] data_wdata_o,
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
input logic data_rvalid_i,
input logic [63:0] data_rdata_i,
// master ports
input logic [NR_PORTS-1:0][11:0] address_index_i,
input logic [NR_PORTS-1:0][43:0] address_tag_i,
input logic [NR_PORTS-1:0][63:0] data_wdata_i,
input logic [NR_PORTS-1:0] data_req_i,
input logic [NR_PORTS-1:0] data_we_i,
input logic [NR_PORTS-1:0][7:0] data_be_i,
input logic [NR_PORTS-1:0] kill_req_i,
input logic [NR_PORTS-1:0] tag_valid_i,
output logic [NR_PORTS-1:0] data_gnt_o,
output logic [NR_PORTS-1:0] data_rvalid_o,
output logic [NR_PORTS-1:0][63:0] data_rdata_o
);
// one-hot encoded
localparam DATA_WIDTH = NR_PORTS;
// remember the request port in case of a multi-cycle transaction
logic [DATA_WIDTH-1:0] request_port_n, request_port_q;
// local ports
// FIFO control ports
logic full;
logic empty;
logic single_element;
// FIFO input port
logic [DATA_WIDTH-1:0] in_data;
logic push;
// FIFO output port
logic [DATA_WIDTH-1:0] out_data;
logic pop;
// FIFO to keep track of the responses
fifo #(
.dtype ( logic [DATA_WIDTH-1:0] ),
.DEPTH ( 4 )
) fifo_i (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.single_element_o ( single_element ),
// the flush is accomplished implicitly by waiting for the queue to be drained before accepting any new request
// it is the responsibility of the attached units to make sure it handles any outstanding responses
.flush_i ( 1'b0 ),
.full_o ( full ),
.empty_o ( empty ),
.data_i ( in_data ),
.push_i ( push ),
.data_o ( out_data ),
.pop_i ( pop )
);
// addressing read and full write
always_comb begin : read_req_write
automatic logic [DATA_WIDTH-1:0] request_index = request_port_q;
data_req_o = 1'b0;
in_data = '{default: 0};
push = 1'b0;
request_port_n = request_port_q;
for (int i = 0; i < NR_PORTS; i++)
data_gnt_o[i] = 1'b0;
// ----------------------------
// Single-cycle memory requests
// ----------------------------
// only go for a new request if we can wait for the valid e.g.: we have enough space in the buffer
if (~full) begin
for (int unsigned i = 0; i < NR_PORTS; i++) begin
if (data_req_i[i] == 1'b1) begin
data_req_o = data_req_i[i];
// save the request port for future states
request_port_n = i;
request_index = i;
// wait for the grant
// set the slave on which we are waiting
in_data = 1'b1 << i[DATA_WIDTH-1:0];
break; // break here as this is a priority select
end
end
// only if we got a grant save it to the queue
if (data_gnt_i) begin
push = 1'b1;
end
end
// pass through all signals from the correct slave port
address_index_o = address_index_i[request_index];
data_wdata_o = data_wdata_i[request_index];
data_be_o = data_be_i[request_index];
data_we_o = data_we_i[request_index];
data_gnt_o[request_index] = data_gnt_i;
// the following signals are to be passed through one-cycle later
address_tag_o = address_tag_i[request_port_q];
kill_req_o = kill_req_i[request_port_q];
tag_valid_o = tag_valid_i[request_port_q];
end
// ------------
// Read port
// ------------
// results, listening on the input signals of the slave port
genvar i;
// this is very timing sensitive since we can give a new request if we got an rvalid
// hence this combines the to most critical paths (from and to memory)
generate
// default assignment & one hot decoder
for (i = 0; i < NR_PORTS; i++) begin
assign data_rvalid_o[i] = out_data[i] & data_rvalid_i;
assign data_rdata_o[i] = data_rdata_i;
end
endgenerate
always_comb begin : slave_read_port
pop = 1'b0;
// if there is a valid signal the FIFO should not be empty anyway
if (data_rvalid_i) begin
pop = 1'b1;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
request_port_q <= 'b0;
end else begin
request_port_q <= request_port_n;
end
end
// ------------
// Assertions
// ------------
`ifndef SYNTHESIS
`ifndef VERILATOR
// make sure that we eventually get an rvalid after we received a grant
assert property (@(posedge clk_i) data_gnt_i |-> ##[1:$] data_rvalid_i )
else begin $error("There was a grant without a rvalid"); $stop(); end
// assert that there is no grant without a request
assert property (@(negedge clk_i) data_gnt_i |-> data_req_o)
else begin $error("There was a grant without a request."); $stop(); end
// assert that the address does not contain X when request is sent
assert property ( @(posedge clk_i) (data_req_o) |-> (!$isunknown(address_index_o)) )
else begin $error("address contains X when request is set"); $stop(); end
// there should be no rvalid when we are in IDLE
// assert property (
// @(posedge clk) (CS == IDLE) |-> (data_rvalid_i == 1'b0) )
// else begin $error("Received rvalid while in IDLE state"); $stop(); end
// assert that errors are only sent at the same time as grant or rvalid
// assert property ( @(posedge clk) (data_err_i) |-> (data_gnt_i || data_rvalid_i) )
// else begin $error("Error without data grant or rvalid"); $stop(); end
`endif
`endif
endmodule

View file

@ -324,7 +324,6 @@ module debug_unit (
HALT_REQ: begin
// we've got a valid instruction in the commit stage so we can proceed to the halted state
if (commit_instr_i.valid || !fetch_enable_i) begin
halt_o = 1'b1;
NS = HALTED;
end
end

View file

@ -237,7 +237,6 @@ module decoder (
{7'b000_0000, 3'b101}: instruction_o.op = SRL; // Shift Right Logical
{7'b010_0000, 3'b101}: instruction_o.op = SRA; // Shift Right Arithmetic
// Multiplications
`ifdef MULT
{7'b000_0001, 3'b000}: instruction_o.op = MUL;
{7'b000_0001, 3'b001}: instruction_o.op = MULH;
{7'b000_0001, 3'b010}: instruction_o.op = MULHSU;
@ -246,7 +245,6 @@ module decoder (
{7'b000_0001, 3'b101}: instruction_o.op = DIVU;
{7'b000_0001, 3'b110}: instruction_o.op = REM;
{7'b000_0001, 3'b111}: instruction_o.op = REMU;
`endif
default: begin
illegal_instr = 1'b1;
end
@ -271,13 +269,11 @@ module decoder (
{7'b000_0000, 3'b101}: instruction_o.op = SRLW; // srlw
{7'b010_0000, 3'b101}: instruction_o.op = SRAW; // sraw
// Multiplications
`ifdef MULT
{7'b000_0001, 3'b000}: instruction_o.op = MULW;
{7'b000_0001, 3'b100}: instruction_o.op = DIVW;
{7'b000_0001, 3'b101}: instruction_o.op = DIVUW;
{7'b000_0001, 3'b110}: instruction_o.op = REMW;
{7'b000_0001, 3'b111}: instruction_o.op = REMUW;
`endif
default: illegal_instr = 1'b1;
endcase
end
@ -345,7 +341,9 @@ module decoder (
default: illegal_instr = 1'b1;
endcase
end
// --------------------------------
// LSU
// --------------------------------
OPCODE_STORE: begin
instruction_o.fu = STORE;
imm_select = SIMM;
@ -353,16 +351,11 @@ module decoder (
instruction_o.rs2 = instr.stype.rs2;
// determine store size
unique case (instr.stype.funct3)
3'b000:
instruction_o.op = SB;
3'b001:
instruction_o.op = SH;
3'b010:
instruction_o.op = SW;
3'b011:
instruction_o.op = SD;
default:
illegal_instr = 1'b1;
3'b000: instruction_o.op = SB;
3'b001: instruction_o.op = SH;
3'b010: instruction_o.op = SW;
3'b011: instruction_o.op = SD;
default: illegal_instr = 1'b1;
endcase
end
@ -373,26 +366,64 @@ module decoder (
instruction_o.rd = instr.itype.rd;
// determine load size and signed type
unique case (instr.itype.funct3)
3'b000:
instruction_o.op = LB;
3'b001:
instruction_o.op = LH;
3'b010:
instruction_o.op = LW;
3'b100:
instruction_o.op = LBU;
3'b101:
instruction_o.op = LHU;
3'b110:
instruction_o.op = LWU;
3'b011:
instruction_o.op = LD;
default:
illegal_instr = 1'b1;
3'b000: instruction_o.op = LB;
3'b001: instruction_o.op = LH;
3'b010: instruction_o.op = LW;
3'b100: instruction_o.op = LBU;
3'b101: instruction_o.op = LHU;
3'b110: instruction_o.op = LWU;
3'b011: instruction_o.op = LD;
default: illegal_instr = 1'b1;
endcase
end
`ifdef ENABLE_ATOMICS
OPCODE_AMO: begin
// we are going to use the load unit for AMOs
instruction_o.fu = LOAD;
instruction_o.rd = instr.stype.imm0;
instruction_o.rs1 = instr.itype.rs1;
// words
if (instr.stype.funct3 == 3'h2) begin
unique case (instr.instr[31:27])
5'h0: instruction_o.op = AMO_ADDW;
5'h1: instruction_o.op = AMO_SWAPW;
5'h2: instruction_o.op = AMO_LRW;
5'h3: instruction_o.op = AMO_SCW;
5'h4: instruction_o.op = AMO_XORW;
5'h8: instruction_o.op = AMO_ORW;
5'hC: instruction_o.op = AMO_ANDW;
5'h10: instruction_o.op = AMO_MINW;
5'h14: instruction_o.op = AMO_MAXW;
5'h18: instruction_o.op = AMO_MINWU;
5'h1C: instruction_o.op = AMO_MAXWU;
default: illegal_instr = 1'b1;
endcase
// double words
end else if (instr.stype.funct3 == 3'h3) begin
unique case (instr.instr[31:27])
5'h0: instruction_o.op = AMO_ADDD;
5'h1: instruction_o.op = AMO_SWAPD;
5'h2: instruction_o.op = AMO_LRD;
5'h3: instruction_o.op = AMO_SCD;
5'h4: instruction_o.op = AMO_XORD;
5'h8: instruction_o.op = AMO_ORD;
5'hC: instruction_o.op = AMO_ANDD;
5'h10: instruction_o.op = AMO_MIND;
5'h14: instruction_o.op = AMO_MAXD;
5'h18: instruction_o.op = AMO_MINDU;
5'h1C: instruction_o.op = AMO_MAXDU;
default: illegal_instr = 1'b1;
endcase
end else begin
illegal_instr = 1'b1;
end
end
`endif
// --------------------------------
// Control Flow Instructions
// --------------------------------
OPCODE_BRANCH: begin
imm_select = SBIMM;
instruction_o.fu = CTRL_FLOW;

View file

@ -1,27 +0,0 @@
// Author:
//
// Date: 25.07.2017
// Description: Ariane Divider
//
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
module div (
input logic clk_i, // Clock
input logic rst_ni // Asynchronous reset active low
);
endmodule

View file

@ -20,7 +20,10 @@
import ariane_pkg::*;
module ex_stage #(
parameter int ASID_WIDTH = 1
parameter int ASID_WIDTH = 1,
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000,
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned AXI_USER_WIDTH = 1
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
@ -95,6 +98,11 @@ module ex_stage #(
input logic [43:0] satp_ppn_i,
input logic [ASID_WIDTH-1:0] asid_i,
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic dcache_miss_o,
output logic [63:0] instr_if_address_o,
output logic instr_if_data_req_o,
output logic [3:0] instr_if_data_be_o,
@ -102,17 +110,12 @@ module ex_stage #(
input logic instr_if_data_rvalid_i,
input logic [63:0] instr_if_data_rdata_i,
output logic [11:0] data_if_address_index_o,
output logic [43:0] data_if_address_tag_o,
output logic [63:0] data_if_data_wdata_o,
output logic data_if_data_req_o,
output logic data_if_data_we_o,
output logic [7:0] data_if_data_be_o,
output logic data_if_kill_req_o,
output logic data_if_tag_valid_o,
input logic data_if_data_gnt_i,
input logic data_if_data_rvalid_i,
input logic [63:0] data_if_data_rdata_i
// DCache interface
input logic dcache_en_i,
input logic flush_dcache_i,
output logic flush_dcache_ack_o,
AXI_BUS.Master data_if,
AXI_BUS.Master bypass_if
);
// -----
@ -134,19 +137,22 @@ module ex_stage #(
// ----------------
// Multiplication
// ----------------
`ifdef MULT
mult mult_i (
mult i_mult (
.result_o ( mult_result_o ),
.*
);
`endif
// ----------------
// Load-Store Unit
// ----------------
lsu lsu_i (
lsu #(
.CACHE_START_ADDR ( CACHE_START_ADDR ),
.AXI_ID_WIDTH ( AXI_ID_WIDTH ),
.AXI_USER_WIDTH ( AXI_USER_WIDTH )
) lsu_i (
.commit_i ( lsu_commit_i ),
.commit_ready_o ( lsu_commit_ready_o ),
.data_if ( data_if ),
.*
);

View file

@ -75,9 +75,12 @@ module fetch_fifo
always_comb begin : fetch_fifo_logic
// counter
automatic logic [$clog2(DEPTH)-1:0] status_cnt = status_cnt_q;
automatic logic [$clog2(DEPTH)-1:0] write_pointer = write_pointer_q;
automatic logic [$clog2(DEPTH)-1:0] read_pointer = read_pointer_q;
automatic logic [$clog2(DEPTH)-1:0] status_cnt;
automatic logic [$clog2(DEPTH)-1:0] write_pointer;
automatic logic [$clog2(DEPTH)-1:0] read_pointer;
status_cnt = status_cnt_q;
write_pointer = write_pointer_q;
read_pointer = read_pointer_q;
mem_n = mem_q;

View file

@ -368,7 +368,7 @@ module issue_read_operands (
`ifndef SYNTHESIS
`ifndef verilator
assert property (
@(posedge clk_i) (alu_valid_q || lsu_valid_q || csr_valid_q || branch_valid_q || mult_valid_q) |-> (!$isunknown(operand_a_q) && !$isunknown(operand_b_q)))
@(posedge clk_i) (branch_valid_q) |-> (!$isunknown(operand_a_q) && !$isunknown(operand_b_q)))
else $warning ("Got unknown value in one of the operands");
`endif
`endif

View file

@ -43,10 +43,12 @@ module load_unit (
// D$ interface
output logic [11:0] address_index_o,
output logic [43:0] address_tag_o,
output amo_t amo_op_o,
output logic [63:0] data_wdata_o,
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic [1:0] data_size_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
@ -91,6 +93,7 @@ module load_unit (
kill_req_o = 1'b0;
tag_valid_o = 1'b0;
data_be_o = lsu_ctrl_i.be;
data_size_o = extract_transfer_size(lsu_ctrl_i.operator);
pop_ld_o = 1'b0;
case (CS)
@ -277,6 +280,41 @@ module load_unit (
end
end
// ---------------
// AMO Operation
// ---------------
always_comb begin : amo_op_select
amo_op_o = AMO_NONE;
if (lsu_ctrl_i.valid) begin
case (lsu_ctrl_i.operator)
AMO_LRW: amo_op_o = AMO_LR;
AMO_LRD: amo_op_o = AMO_LR;
AMO_SCW: amo_op_o = AMO_SC;
AMO_SCD: amo_op_o = AMO_SC;
AMO_SWAPW: amo_op_o = AMO_SWAP;
AMO_ADDW: amo_op_o = AMO_ADD;
AMO_ANDW: amo_op_o = AMO_AND;
AMO_ORW: amo_op_o = AMO_OR;
AMO_XORW: amo_op_o = AMO_XOR;
AMO_MAXW: amo_op_o = AMO_MAX;
AMO_MAXWU: amo_op_o = AMO_MAXU;
AMO_MINW: amo_op_o = AMO_MIN;
AMO_MINWU: amo_op_o = AMO_MINU;
AMO_SWAPD: amo_op_o = AMO_SWAP;
AMO_ADDD: amo_op_o = AMO_ADD;
AMO_ANDD: amo_op_o = AMO_AND;
AMO_ORD: amo_op_o = AMO_OR;
AMO_XORD: amo_op_o = AMO_XOR;
AMO_MAXD: amo_op_o = AMO_MAX;
AMO_MAXDU: amo_op_o = AMO_MAXU;
AMO_MIND: amo_op_o = AMO_MIN;
AMO_MINDU: amo_op_o = AMO_MINU;
default: amo_op_o = AMO_NONE;
endcase
end
end
// ---------------
// Sign Extend
// ---------------

View file

@ -19,7 +19,10 @@
import ariane_pkg::*;
module lsu #(
parameter int ASID_WIDTH = 1
parameter int ASID_WIDTH = 1,
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000,
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned AXI_USER_WIDTH = 1
)(
input logic clk_i,
input logic rst_ni,
@ -56,6 +59,10 @@ module lsu #(
input logic [43:0] satp_ppn_i, // From CSR register file
input logic [ASID_WIDTH-1:0] asid_i, // From CSR register file
input logic flush_tlb_i,
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic dcache_miss_o,
// Instruction memory/cache
output logic [63:0] instr_if_address_o,
output logic instr_if_data_req_o,
@ -63,18 +70,13 @@ module lsu #(
input logic instr_if_data_gnt_i,
input logic instr_if_data_rvalid_i,
input logic [63:0] instr_if_data_rdata_i,
// Data cache
output logic [11:0] data_if_address_index_o,
output logic [43:0] data_if_address_tag_o,
output logic [63:0] data_if_data_wdata_o,
output logic data_if_data_req_o,
output logic data_if_data_we_o,
output logic [7:0] data_if_data_be_o,
output logic data_if_kill_req_o,
output logic data_if_tag_valid_o,
input logic data_if_data_gnt_i,
input logic data_if_data_rvalid_i,
input logic [63:0] data_if_data_rdata_i,
input logic dcache_en_i,
input logic flush_dcache_i,
output logic flush_dcache_ack_o,
// Data cache refill port
AXI_BUS.Master data_if,
AXI_BUS.Master bypass_if,
output exception_t lsu_exception_o // to WB, signal exception status LD/ST exception
@ -126,38 +128,43 @@ module lsu #(
exception_t ld_ex;
exception_t st_ex;
// ---------------
// Memory Arbiter
// ---------------
// ------------
// NB Dcache
// ------------
logic [2:0][11:0] address_index_i;
logic [2:0][43:0] address_tag_i;
logic [2:0][63:0] data_wdata_i;
logic [2:0] data_req_i;
logic [2:0] data_we_i;
logic [2:0][1:0] data_size_i;
logic [2:0] kill_req_i;
logic [2:0] tag_valid_i;
logic [2:0][7:0] data_be_i;
logic [2:0] data_gnt_o;
logic [2:0] data_rvalid_o;
logic [2:0][63:0] data_rdata_o;
amo_t [2:0] amo_op_i;
// AMO operations always go through the load unit
assign amo_op_i[0] = AMO_NONE;
assign amo_op_i[2] = AMO_NONE;
// decreasing priority
// Port 0: PTW
// Port 1: Load Unit
// Port 2: Store Unit
dcache_arbiter dcache_arbiter_i (
nbdcache #(
.CACHE_START_ADDR ( CACHE_START_ADDR ),
.AXI_ID_WIDTH ( AXI_ID_WIDTH ),
.AXI_USER_WIDTH ( AXI_USER_WIDTH )
) i_nbdcache (
// to D$
.address_index_o ( data_if_address_index_o ),
.address_tag_o ( data_if_address_tag_o ),
.data_wdata_o ( data_if_data_wdata_o ),
.data_req_o ( data_if_data_req_o ),
.data_we_o ( data_if_data_we_o ),
.data_be_o ( data_if_data_be_o ),
.kill_req_o ( data_if_kill_req_o ),
.tag_valid_o ( data_if_tag_valid_o ),
.data_gnt_i ( data_if_data_gnt_i ),
.data_rvalid_i ( data_if_data_rvalid_i ),
.data_rdata_i ( data_if_data_rdata_i ),
.data_if ( data_if ),
.bypass_if ( bypass_if ),
.enable_i ( dcache_en_i ),
.flush_i ( flush_dcache_i ),
.flush_ack_o ( flush_dcache_ack_o ),
// from PTW, Load Unit and Store Unit
.address_index_i ( address_index_i ),
.address_tag_i ( address_tag_i ),
@ -165,11 +172,19 @@ module lsu #(
.data_req_i ( data_req_i ),
.data_we_i ( data_we_i ),
.data_be_i ( data_be_i ),
.data_size_i ( data_size_i ),
.kill_req_i ( kill_req_i ),
.tag_valid_i ( tag_valid_i ),
.data_gnt_o ( data_gnt_o ),
.data_rvalid_o ( data_rvalid_o ),
.data_rdata_o ( data_rdata_o ),
.amo_op_i ( amo_op_i ),
.amo_commit_i ( ),
.amo_valid_o ( ),
.amo_result_o ( ),
.amo_flush_i ( 1'b0 ),
.miss_o ( dcache_miss_o ),
.*
);
@ -180,7 +195,7 @@ module lsu #(
.INSTR_TLB_ENTRIES ( 16 ),
.DATA_TLB_ENTRIES ( 16 ),
.ASID_WIDTH ( ASID_WIDTH )
) mmu_i (
) i_mmu (
// misaligned bypass
.misaligned_ex_i ( misaligned_exception ),
.lsu_is_store_i ( st_translation_req ),
@ -197,6 +212,7 @@ module lsu #(
.data_req_o ( data_req_i [0] ),
.data_we_o ( data_we_i [0] ),
.data_be_o ( data_be_i [0] ),
.data_size_o ( data_size_i [0] ),
.kill_req_o ( kill_req_i [0] ),
.tag_valid_o ( tag_valid_i [0] ),
.data_gnt_i ( data_gnt_o [0] ),
@ -207,7 +223,7 @@ module lsu #(
// ------------------
// Store Unit
// ------------------
store_unit store_unit_i (
store_unit i_store_unit (
.valid_i ( st_valid_i ),
.lsu_ctrl_i ( lsu_ctrl ),
.pop_st_o ( pop_st ),
@ -232,6 +248,7 @@ module lsu #(
.data_req_o ( data_req_i [2] ),
.data_we_o ( data_we_i [2] ),
.data_be_o ( data_be_i [2] ),
.data_size_o ( data_size_i [2] ),
.kill_req_o ( kill_req_i [2] ),
.tag_valid_o ( tag_valid_i [2] ),
.data_gnt_i ( data_gnt_o [2] ),
@ -242,7 +259,7 @@ module lsu #(
// ------------------
// Load Unit
// ------------------
load_unit load_unit_i (
load_unit i_load_unit (
.valid_i ( ld_valid_i ),
.lsu_ctrl_i ( lsu_ctrl ),
.pop_ld_o ( pop_ld ),
@ -264,9 +281,11 @@ module lsu #(
.address_index_o ( address_index_i [1] ),
.address_tag_o ( address_tag_i [1] ),
.data_wdata_o ( data_wdata_i [1] ),
.amo_op_o ( amo_op_i [1] ),
.data_req_o ( data_req_i [1] ),
.data_we_o ( data_we_i [1] ),
.data_be_o ( data_be_i [1] ),
.data_size_o ( data_size_i [1] ),
.kill_req_o ( kill_req_i [1] ),
.tag_valid_o ( tag_valid_i [1] ),
.data_gnt_i ( data_gnt_o [1] ),
@ -278,7 +297,7 @@ module lsu #(
// ---------------------
// Result Sequentialize
// ---------------------
lsu_arbiter lsu_arbiter_i (
lsu_arbiter i_lsu_arbiter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
@ -514,9 +533,13 @@ module lsu_bypass (
assign ready_o = empty;
always_comb begin
automatic logic [1:0] status_cnt = status_cnt_q;
automatic logic write_pointer = write_pointer_q;
automatic logic read_pointer = read_pointer_q;
automatic logic [1:0] status_cnt;
automatic logic write_pointer;
automatic logic read_pointer;
status_cnt = status_cnt_q;
write_pointer = write_pointer_q;
read_pointer = read_pointer_q;
mem_n = mem_q;
// we've got a valid LSU request

View file

@ -67,8 +67,11 @@ module lsu_arbiter (
// Read-Write Process
// -------------------
always_comb begin : read_write_fifo
automatic logic [$clog2(WIDTH)-1:0] status_cnt = status_cnt_q;
automatic logic [$clog2(WIDTH)-1:0] write_pointer = write_pointer_q;
automatic logic [$clog2(WIDTH)-1:0] status_cnt;
automatic logic [$clog2(WIDTH)-1:0] write_pointer;
status_cnt = status_cnt_q;
write_pointer = write_pointer_q;
// default assignments
mem_n = mem_q;

1037
src/miss_handler.sv Normal file

File diff suppressed because it is too large Load diff

View file

@ -63,6 +63,9 @@ module mmu #(
input logic [43:0] satp_ppn_i,
input logic [ASID_WIDTH-1:0] asid_i,
input logic flush_tlb_i,
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
// Memory interfaces
// Instruction memory/cache
output logic [63:0] instr_if_address_o,
@ -78,6 +81,7 @@ module mmu #(
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic [1:0] data_size_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
@ -406,7 +410,7 @@ module mmu #(
// Registers
// ----------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
if (~rst_ni) begin
lsu_vaddr_q <= '0;
lsu_req_q <= '0;
misaligned_ex_q <= '0;

View file

@ -22,8 +22,7 @@
import ariane_pkg::*;
module mult
(
module mult (
input logic clk_i,
input logic rst_ni,
input logic [TRANS_ID_BITS-1:0] trans_id_i,
@ -36,116 +35,525 @@ module mult
output logic mult_ready_o,
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
);
logic mul_valid;
logic div_valid;
logic div_ready_i; // receiver of division result is able to accept the result
logic [TRANS_ID_BITS-1:0] mul_trans_id;
logic [TRANS_ID_BITS-1:0] div_trans_id;
logic [63:0] mul_result;
logic [63:0] div_result;
// ----------------
// Mock Multiplier
// ----------------
function automatic logic [63:0] sign_extend (logic [31:0] operand);
return {{32{operand[31]}}, operand[31:0]};
endfunction
logic div_valid_op;
logic mul_valid_op;
// Input Arbitration
assign mul_valid_op = mult_valid_i && (operator_i inside { MUL, MULH, MULHU, MULHSU, MULW });
assign div_valid_op = mult_valid_i && (operator_i inside { DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW });
assign mult_valid_o = mult_valid_i;
assign mult_trans_id_o = trans_id_i;
assign mult_ready_o = 1'b1;
// ---------------------
// Output Arbitration
// ---------------------
// we give precedence to multiplication as the divider supports stalling and the multiplier is
// just a dumb pipelined multiplier
assign div_ready_i = (mul_valid) ? 1'b0 : 1'b1;
assign mult_trans_id_o = (mul_valid) ? mul_trans_id : div_trans_id;
assign result_o = (mul_valid) ? mul_result : div_result;
assign mult_valid_o = div_valid | mul_valid;
// mult_ready_o = division as the multiplication will unconditionally be ready to accept new requests
// sign extend operand a and b
logic sign_a, sign_b;
// ---------------------
// Multiplication
// ---------------------
mul i_mul (
.result_o ( mul_result ),
.mult_valid_i ( mul_valid_op ),
.mult_valid_o ( mul_valid ),
.mult_trans_id_o ( mul_trans_id ),
.mult_ready_o ( ), // this unit is unconditionally ready
.*
);
// ---------------------
// Division
// ---------------------
logic [5:0] ff1_result; // holds the index of the last '1' (as the input operand is reversed)
logic ff1_no_one; // no one was found by find first one
logic [63:0] ff1_input; // input to find first one
logic [63:0] operand_b_rev, operand_b_rev_neg, operand_b_shift; // couple of different representations for the dividend
logic [6:0] div_shift; // amount of which to shift to left
logic div_signed; // should this operation be performed as a signed or unsigned division
logic div_op_signed; // actual sign signal depends on div_signed and the MSB of the word
logic [63:0] operand_b, operand_a; // input operands after input MUX (input silencing, word operations or full inputs)
logic [63:0] result; // result before result mux
logic word_op; // is it a word operation
logic rem; // is it a reminder (or not a reminder e.g.: a division)
logic word_op_d, word_op_q; // save whether the operation was signed or not
// is this a signed operation?
assign div_signed = (operator_i inside {DIV, DIVW, REM, REMW}) ? 1'b1 : 1'b0;
// if this operation is signed look at the actual sign bit to determine whether we should perform signed or unsigned division
assign div_op_signed = div_signed & operand_b[63];
// reverse input operands
generate
for (genvar k = 0; k < 64; k++)
assign operand_b_rev[k] = operand_b[63-k];
endgenerate
// negated reverse input operand, used for signed divisions
assign operand_b_rev_neg = ~operand_b_rev;
assign ff1_input = (div_op_signed) ? operand_b_rev_neg : operand_b_rev;
// prepare the input operands and control divider
always_comb begin
// silence the inputs
operand_a = '0;
operand_b = '0;
// control signals
word_op_d = word_op_q;
word_op = 1'b0;
rem = 1'b0;
// we've go a new division operation
if (mult_valid_i && operator_i inside {DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW}) begin
// is this a word operation?
if (operator_i inside {DIVW, DIVUW, REMW, REMUW}) begin
word_op = 1'b1;
// yes so check if we should sign extend this is only done for a signed operation
if (div_signed) begin
operand_a = sext32(operand_a_i[31:0]);
operand_b = sext32(operand_b_i[31:0]);
end else begin
operand_a = {32'b0, operand_a_i[31:0]};
operand_b = {32'b0, operand_b_i[31:0]};
end
// save whether we want sign extend the result or not, this is done for all word operations
word_op_d = 1'b1;
// regular operation
end else begin
// no sign extending is necessary as we are already using the full 64 bit
operand_a = operand_a_i;
operand_b = operand_b_i;
end
// is this a modulo?
if (operator_i inside {REM, REMU, REMW, REMUW}) begin
rem = 1'b1;
end
end
end
// ---------------------
// Find First one
// ---------------------
// this unit is used to speed up the sequential division by shifting the dividend first
alu_ff #(
.LEN ( 64 )
) i_ff1 (
.in_i ( ff1_input ), // signed = operand_b_rev_neg, unsigned operand_b_rev
.first_one_o ( ff1_result ),
.no_ones_o ( ff1_no_one )
);
// if the dividend is all zero go for the full length
assign div_shift = ff1_no_one ? 7'd64 : ff1_result;
// prepare dividend by shifting
assign operand_b_shift = operand_b <<< div_shift;
// ---------------------
// Serial Divider
// ---------------------
serial_divider #(
.C_WIDTH ( 64 ),
.C_LOG_WIDTH ( $clog2(64) + 1 )
) i_div (
.Clk_CI ( clk_i ),
.Rst_RBI ( rst_ni ),
.TransId_DI ( trans_id_i ),
.OpA_DI ( operand_a ),
.OpB_DI ( operand_b_shift ),
.OpBShift_DI ( div_shift ),
.OpBIsZero_SI ( ~(|operand_b) ),
.OpBSign_SI ( div_op_signed ), // gate this to 0 in case of unsigned ops
.OpCode_SI ( {rem, div_signed} ), // 00: udiv, 10: urem, 01: div, 11: rem
.InVld_SI ( div_valid_op ),
.OutRdy_SO ( mult_ready_o ),
.OutRdy_SI ( div_ready_i ),
.OutVld_SO ( div_valid ),
.TransId_DO ( div_trans_id ),
.Res_DO ( result )
);
// Result multiplexer
// if it was a signed word operation the bit will be set and the result will be sign extended accordingly
assign div_result = (word_op_q) ? sext32(result) : result;
// ---------------------
// Registers
// ---------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
word_op_q <= ADD;
end else begin
word_op_q <= word_op_d;
end
end
endmodule
/* File : mult.sv
* Ver : 1.0
* Date : 15.03.2016
*
*
* Copyright (C) 2017 ETH Zurich, University of Bologna
*
* Description: this is a simple serial divider for signed integers.
*
*
* Authors : Michael Schaffner (schaffner@iis.ee.ethz.ch)
* Andreas Traber (atraber@iis.ee.ethz.ch)
*
*/
module serial_divider #(
parameter int unsigned C_WIDTH = 32,
parameter int unsigned C_LOG_WIDTH = 6
)(
input logic Clk_CI,
input logic Rst_RBI,
// input IF
input logic [TRANS_ID_BITS-1:0] TransId_DI,
input logic [C_WIDTH-1:0] OpA_DI,
input logic [C_WIDTH-1:0] OpB_DI,
input logic [C_LOG_WIDTH-1:0] OpBShift_DI,
input logic OpBIsZero_SI,
//
input logic OpBSign_SI, // gate this to 0 in case of unsigned ops
input logic [1:0] OpCode_SI, // 0: udiv, 2: urem, 1: div, 3: rem
// handshake
input logic InVld_SI,
// output IF
output logic OutRdy_SO,
input logic OutRdy_SI,
output logic OutVld_SO,
output logic [TRANS_ID_BITS-1:0] TransId_DO,
output logic [C_WIDTH-1:0] Res_DO
);
// ----------------------------------
// Signal Declarations
// ----------------------------------
logic [C_WIDTH-1:0] ResReg_DP, ResReg_DN;
logic [C_WIDTH-1:0] ResReg_DP_rev;
logic [C_WIDTH-1:0] AReg_DP, AReg_DN;
logic [C_WIDTH-1:0] BReg_DP, BReg_DN;
logic OpBIsZero_SP, OpBIsZero_SN;
logic [TRANS_ID_BITS-1:0] TransId_DP, TransId_DN;
logic RemSel_SN, RemSel_SP;
logic CompInv_SN, CompInv_SP;
logic ResInv_SN, ResInv_SP;
logic [C_WIDTH-1:0] AddMux_D;
logic [C_WIDTH-1:0] AddOut_D;
logic [C_WIDTH-1:0] AddTmp_D;
logic [C_WIDTH-1:0] BMux_D;
logic [C_WIDTH-1:0] OutMux_D;
logic [C_LOG_WIDTH-1:0] Cnt_DP, Cnt_DN;
logic CntZero_S;
logic ARegEn_S, BRegEn_S, ResRegEn_S, ABComp_S, PmSel_S, LoadEn_S;
enum logic [1:0] {IDLE, DIVIDE, FINISH} State_SN, State_SP;
// -----------------
// Datapath
// -----------------
assign PmSel_S = LoadEn_S & ~(OpCode_SI[0] & (OpA_DI[$high(OpA_DI)] ^ OpBSign_SI));
// muxes
assign AddMux_D = (LoadEn_S) ? OpA_DI : BReg_DP;
// attention: logical shift in case of negative operand B!
assign BMux_D = (LoadEn_S) ? OpB_DI : {CompInv_SP, (BReg_DP[$high(BReg_DP):1])};
assign ResReg_DP_rev = {<<{ResReg_DP}};
assign OutMux_D = (RemSel_SP) ? AReg_DP : ResReg_DP_rev;
// invert if necessary
assign Res_DO = (ResInv_SP) ? -$signed(OutMux_D) : OutMux_D;
// main comparator
assign ABComp_S = ((AReg_DP == BReg_DP) | ((AReg_DP > BReg_DP) ^ CompInv_SP)) & ((|AReg_DP) | OpBIsZero_SP);
// main adder
assign AddTmp_D = (LoadEn_S) ? 0 : AReg_DP;
assign AddOut_D = (PmSel_S) ? AddTmp_D + AddMux_D : AddTmp_D - $signed(AddMux_D);
// -----------------
// Counter
// -----------------
assign Cnt_DN = (LoadEn_S) ? OpBShift_DI :
(~CntZero_S) ? Cnt_DP - 1 : Cnt_DP;
assign CntZero_S = ~(|Cnt_DP);
// -----------------
// FSM
// -----------------
always_comb begin : p_fsm
// default
State_SN = State_SP;
OutVld_SO = 1'b0;
OutRdy_SO = 1'b0;
LoadEn_S = 1'b0;
ARegEn_S = 1'b0;
BRegEn_S = 1'b0;
ResRegEn_S = 1'b0;
case (State_SP)
IDLE: begin
OutRdy_SO = 1'b1;
if(InVld_SI) begin
OutRdy_SO = 1'b0;
OutVld_SO = 1'b0;
ARegEn_S = 1'b1;
BRegEn_S = 1'b1;
LoadEn_S = 1'b1;
State_SN = DIVIDE;
end
end
DIVIDE: begin
ARegEn_S = ABComp_S;
BRegEn_S = 1'b1;
ResRegEn_S = 1'b1;
// calculation finished
// one more divide cycle (C_WIDTH th divide cycle)
if (CntZero_S) begin
State_SN = FINISH;
end
end
FINISH: begin
OutVld_SO = 1'b1;
if(OutRdy_SI) begin
State_SN = IDLE;
end
end
default : /* default */ ;
endcase
end
// -----------------
// Registers
// -----------------
// get flags
assign RemSel_SN = (LoadEn_S) ? OpCode_SI[1] : RemSel_SP;
assign CompInv_SN = (LoadEn_S) ? OpBSign_SI : CompInv_SP;
assign OpBIsZero_SN = (LoadEn_S) ? OpBIsZero_SI : OpBIsZero_SP;
assign ResInv_SN = (LoadEn_S) ? (~OpBIsZero_SI | OpCode_SI[1]) & OpCode_SI[0] & (OpA_DI[$high(OpA_DI)] ^ OpBSign_SI) : ResInv_SP;
// transaction id
assign TransId_DN = (LoadEn_S) ? TransId_DI : TransId_DP;
assign TransId_DO = TransId_DP;
assign AReg_DN = (ARegEn_S) ? AddOut_D : AReg_DP;
assign BReg_DN = (BRegEn_S) ? BMux_D : BReg_DP;
assign ResReg_DN = (LoadEn_S) ? '0 :
(ResRegEn_S) ? {ABComp_S, ResReg_DP[$high(ResReg_DP):1]} : ResReg_DP;
always_ff @(posedge Clk_CI or negedge Rst_RBI) begin : p_regs
if (~Rst_RBI) begin
State_SP <= IDLE;
AReg_DP <= '0;
BReg_DP <= '0;
ResReg_DP <= '0;
Cnt_DP <= '0;
TransId_DP <= '0;
RemSel_SP <= 1'b0;
CompInv_SP <= 1'b0;
ResInv_SP <= 1'b0;
OpBIsZero_SP <= 1'b0;
end else begin
State_SP <= State_SN;
AReg_DP <= AReg_DN;
BReg_DP <= BReg_DN;
ResReg_DP <= ResReg_DN;
Cnt_DP <= Cnt_DN;
TransId_DP <= TransId_DN;
RemSel_SP <= RemSel_SN;
CompInv_SP <= CompInv_SN;
ResInv_SP <= ResInv_SN;
OpBIsZero_SP <= OpBIsZero_SN;
end
end
// ------------
// Assertions
// ------------
`ifndef SYNTHESIS
initial begin : p_assertions
assert (C_LOG_WIDTH == $clog2(C_WIDTH+1)) else $error("C_LOG_WIDTH must be $clog2(C_WIDTH+1)");
end
`endif
endmodule
// --------------------------------------------------
// Multiplication Unit with one pipeline register
// --------------------------------------------------
module mul (
input logic clk_i,
input logic rst_ni,
input logic [TRANS_ID_BITS-1:0] trans_id_i,
input logic mult_valid_i,
input fu_op operator_i,
input logic [63:0] operand_a_i,
input logic [63:0] operand_b_i,
output logic [63:0] result_o,
output logic mult_valid_o,
output logic mult_ready_o,
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
);
// Pipeline register
logic [TRANS_ID_BITS-1:0] trans_id_q;
logic mult_valid_q;
logic [63:0] result_q;
// control registers
logic sign_a, sign_b;
logic mult_valid;
// control signals
assign mult_valid_o = mult_valid_q;
assign result_o = result_q;
assign mult_trans_id_o = trans_id_q;
assign mult_ready_o = 1'b1;
assign mult_valid = mult_valid_i && (operator_i inside {MUL, MULH, MULHU, MULHSU, MULW});
// datapath
logic [127:0] mult_result;
logic [63:0] mult_result_w;
assign mult_result = $signed({operand_a_i[63] & sign_a, operand_a_i}) * $signed({operand_b_i[63] & sign_b, operand_b_i});
assign mult_result_w = $signed({operand_a_i[31] & sign_a, operand_a_i[31:0]}) * $signed({operand_b_i[31] & sign_b, operand_b_i[31:0]});
always_comb begin : mul_div
// perform multiplication
result_o = '0;
// Sign Select MUX
always_comb begin
sign_a = 1'b0;
sign_b = 1'b0;
case (operator_i)
// MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register
MUL:
result_o = mult_result[63:0];
// signed multiplication
if (operator_i == MULH) begin
sign_a = 1'b1;
sign_b = 1'b1;
// signed - unsigned multiplication
end else if (operator_i == MULHSU) begin
sign_a = 1'b1;
// unsigned multiplication
end else begin
sign_a = 1'b0;
sign_b = 1'b0;
end
end
MULH: begin
sign_a = 1'b1;
sign_b = 1'b1;
result_o = mult_result[127:64];
end
// -----------------------
// Output pipeline register
// -----------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mult_valid_q <= '0;
trans_id_q <= '0;
result_q <= '0;
end else begin
// Input silencing
trans_id_q <= trans_id_i;
// Output Register
mult_valid_q <= mult_valid;
MULHU:
result_o = mult_result[127:64];
MULHSU: begin
sign_a = 1'b1;
result_o = mult_result[127:64];
end
MULW:
result_o = sign_extend(mult_result_w[31:0]);
// Divisions
DIV: begin
result_o = $signed(operand_a_i) / $signed(operand_b_i);
// division by zero
// set all bits
if (operand_b_i == '0)
result_o = -1;
end
DIVU: begin
result_o = operand_a_i / operand_b_i;
// division by zero
// set all bits
if (operand_b_i == '0)
result_o = -1;
end
DIVW: begin
result_o = sign_extend($signed(operand_a_i[31:0]) / $signed(operand_b_i[31:0]));
// division by zero
// set all bits
if (operand_b_i == '0)
result_o = -1;
end
DIVUW: begin
result_o = sign_extend(operand_a_i[31:0] / operand_b_i[31:0]);
// division by zero
// set all bits
if (operand_b_i == '0)
result_o = -1;
end
REM: begin
result_o = $signed(operand_a_i) % $signed(operand_b_i);
// division by zero
if (operand_b_i == '0)
result_o = operand_a_i;
end
REMU: begin
result_o = operand_a_i % operand_b_i;
// division by zero
if (operand_b_i == '0)
result_o = operand_a_i;
end
REMW: begin
result_o = sign_extend($signed(operand_a_i[31:0]) % $signed(operand_b_i[31:0]));
// division by zero
if (operand_b_i == '0)
result_o = operand_a_i;
end
REMUW: begin
result_o = sign_extend(operand_a_i[31:0] % operand_b_i[31:0]);
// division by zero
if (operand_b_i == '0)
result_o = operand_a_i;
end
endcase
case (operator_i)
// MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register
MUL: result_q <= mult_result[63:0];
MULH: result_q <= mult_result[127:64];
MULHU: result_q <= mult_result[127:64];
MULHSU: result_q <= mult_result[127:64];
MULW: result_q <= sext32(mult_result[31:0]);
endcase
end
end
endmodule
// -----------------
// Find First One
// -----------------
module alu_ff #(
parameter int unsigned LEN = 32
)(
input logic [LEN-1:0] in_i,
output logic [$clog2(LEN)-1:0] first_one_o,
output logic no_ones_o
);
localparam int unsigned NUM_LEVELS = $clog2(LEN);
logic [LEN-1:0] [NUM_LEVELS-1:0] index_lut;
logic [2**NUM_LEVELS-1:0] sel_nodes;
logic [2**NUM_LEVELS-1:0] [NUM_LEVELS-1:0] index_nodes;
// ----------------------------
// Generate Tree Structure
// ----------------------------
generate
for (genvar j = 0; j < LEN; j++) begin
assign index_lut[j] = $unsigned(j);
end
endgenerate
generate
for (genvar level = 0; level < NUM_LEVELS; level++) begin
if (level < NUM_LEVELS-1) begin
for (genvar l = 0; l < 2**level; l++) begin
assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ?
index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1];
end
end
if (level == NUM_LEVELS-1) begin
for (genvar k = 0; k < 2**level; k++) begin
// if two successive indices are still in the vector...
if (k * 2 < LEN) begin
assign sel_nodes[2**level-1+k] = in_i[k*2] | in_i[k*2+1];
assign index_nodes[2**level-1+k] = (in_i[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
end
// if only the first index is still in the vector...
if (k * 2 == LEN) begin
assign sel_nodes[2**level-1+k] = in_i[k*2];
assign index_nodes[2**level-1+k] = index_lut[k*2];
end
// if index is out of range
if (k * 2 > LEN) begin
assign sel_nodes[2**level-1+k] = 1'b0;
assign index_nodes[2**level-1+k] = '0;
end
end
end
end
endgenerate
// --------------------
// Connect Output
// --------------------
assign first_one_o = index_nodes[0];
assign no_ones_o = ~sel_nodes[0];
endmodule

377
src/nbdcache.sv Normal file
View file

@ -0,0 +1,377 @@
// Author: Florian Zaruba, ETH Zurich
// Date: 13.10.2017
// Description: Nonblocking private L1 dcache
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
import ariane_pkg::*;
import nbdcache_pkg::*;
module nbdcache #(
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000,
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned AXI_USER_WIDTH = 1
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// Cache management
input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
// Cache AXI refill port
AXI_BUS.Master data_if,
AXI_BUS.Master bypass_if,
// AMO interface
input logic amo_commit_i, // commit atomic memory operation
output logic amo_valid_o, // we have a valid AMO result
output logic [63:0] amo_result_o, // result of atomic memory operation
input logic amo_flush_i, // forget about AMO
// Request ports
input logic [2:0][INDEX_WIDTH-1:0] address_index_i,
input logic [2:0][TAG_WIDTH-1:0] address_tag_i,
input logic [2:0][63:0] data_wdata_i,
input logic [2:0] data_req_i,
input logic [2:0] data_we_i,
input logic [2:0][7:0] data_be_i,
input logic [2:0][1:0] data_size_i,
input logic [2:0] kill_req_i,
input logic [2:0] tag_valid_i,
output logic [2:0] data_gnt_o,
output logic [2:0] data_rvalid_o,
output logic [2:0][63:0] data_rdata_o,
input amo_t [2:0] amo_op_i
);
// -------------------------------
// Controller <-> Arbiter
// -------------------------------
// 1. Miss handler
// 2. PTW
// 3. Load Unit
// 4. Store unit
logic [3:0][SET_ASSOCIATIVITY-1:0] req;
logic [3:0][INDEX_WIDTH-1:0] addr;
logic [3:0] gnt;
cache_line_t [SET_ASSOCIATIVITY-1:0] rdata;
logic [3:0][TAG_WIDTH-1:0] tag;
cache_line_t [3:0] wdata;
logic [3:0] we;
cl_be_t [3:0] be;
logic [SET_ASSOCIATIVITY-1:0] hit_way;
// -------------------------------
// Controller <-> Miss unit
// -------------------------------
logic [2:0] busy;
logic [2:0][55:0] mshr_addr;
logic [2:0] mshr_addr_matches;
logic [63:0] critical_word;
logic critical_word_valid;
logic [2:0][$bits(miss_req_t)-1:0] miss_req;
logic [2:0] miss_gnt;
logic [2:0] active_serving;
logic [2:0] bypass_gnt;
logic [2:0] bypass_valid;
logic [2:0][63:0] bypass_data;
// -------------------------------
// Arbiter <-> Datram,
// -------------------------------
logic [SET_ASSOCIATIVITY-1:0] req_ram;
logic [INDEX_WIDTH-1:0] addr_ram;
logic we_ram;
cache_line_t wdata_ram;
cache_line_t [SET_ASSOCIATIVITY-1:0] rdata_ram;
cl_be_t be_ram;
// ------------------
// Cache Controller
// ------------------
generate
for (genvar i = 0; i < 3; i++) begin : master_ports
cache_ctrl #(
.SET_ASSOCIATIVITY ( SET_ASSOCIATIVITY ),
.INDEX_WIDTH ( INDEX_WIDTH ),
.TAG_WIDTH ( TAG_WIDTH ),
.CACHE_LINE_WIDTH ( CACHE_LINE_WIDTH ),
.CACHE_START_ADDR ( CACHE_START_ADDR )
) i_cache_ctrl (
.bypass_i ( ~enable_i ),
.busy_o ( busy [i] ),
.address_index_i ( address_index_i [i] ),
.address_tag_i ( address_tag_i [i] ),
.data_wdata_i ( data_wdata_i [i] ),
.data_req_i ( data_req_i [i] ),
.data_we_i ( data_we_i [i] ),
.data_be_i ( data_be_i [i] ),
.data_size_i ( data_size_i [i] ),
.kill_req_i ( kill_req_i [i] ),
.tag_valid_i ( tag_valid_i [i] ),
.data_gnt_o ( data_gnt_o [i] ),
.data_rvalid_o ( data_rvalid_o [i] ),
.data_rdata_o ( data_rdata_o [i] ),
.amo_op_i ( amo_op_i [i] ),
.req_o ( req [i+1] ),
.addr_o ( addr [i+1] ),
.gnt_i ( gnt [i+1] ),
.data_i ( rdata ),
.tag_o ( tag [i+1] ),
.data_o ( wdata [i+1] ),
.we_o ( we [i+1] ),
.be_o ( be [i+1] ),
.hit_way_i ( hit_way ),
.miss_req_o ( miss_req [i] ),
.miss_gnt_i ( miss_gnt [i] ),
.active_serving_i ( active_serving [i] ),
.critical_word_i ( critical_word ),
.critical_word_valid_i ( critical_word_valid ),
.bypass_gnt_i ( bypass_gnt [i] ),
.bypass_valid_i ( bypass_valid [i] ),
.bypass_data_i ( bypass_data [i] ),
.mshr_addr_o ( mshr_addr [i] ), // TODO
.mshr_addr_matches_i ( mshr_addr_matches [i] ), // TODO
.*
);
end
endgenerate
// ------------------
// Miss Handling Unit
// ------------------
miss_handler #(
.NR_PORTS ( 3 )
) i_miss_handler (
.busy_i ( |busy ),
.miss_req_i ( miss_req ),
.miss_gnt_o ( miss_gnt ),
.bypass_gnt_o ( bypass_gnt ),
.bypass_valid_o ( bypass_valid ),
.bypass_data_o ( bypass_data ),
.critical_word_o ( critical_word ),
.critical_word_valid_o ( critical_word_valid ),
.mshr_addr_i ( mshr_addr ),
.mshr_addr_matches_o ( mshr_addr_matches ),
.active_serving_o ( active_serving ),
.req_o ( req [0] ),
.addr_o ( addr [0] ),
.gnt_i ( gnt [0] ),
.data_i ( rdata ),
.be_o ( be [0] ),
.data_o ( wdata [0] ),
.we_o ( we [0] ),
.*
);
assign tag[0] = '0;
// --------------
// Memory Arrays
// --------------
generate
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin : sram_block
sram #(
.DATA_WIDTH ( CACHE_LINE_WIDTH ),
.NUM_WORDS ( NUM_WORDS )
) data_sram (
.req_i ( req_ram [i] ),
.we_i ( we_ram ),
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
.wdata_i ( wdata_ram.data ),
.be_i ( be_ram.data ),
.rdata_o ( rdata_ram[i].data ),
.*
);
sram #(
.DATA_WIDTH ( TAG_WIDTH ),
.NUM_WORDS ( NUM_WORDS )
) tag_sram (
.req_i ( req_ram [i] ),
.we_i ( we_ram ),
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
.wdata_i ( wdata_ram.tag ),
.be_i ( be_ram.tag ),
.rdata_o ( rdata_ram[i].tag ),
.*
);
end
endgenerate
// ----------------
// Dirty SRAM
// ----------------
logic [DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
generate
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin
assign dirty_wdata[i] = wdata_ram.dirty;
assign dirty_wdata[SET_ASSOCIATIVITY + i] = wdata_ram.valid;
assign rdata_ram[i].valid = dirty_rdata[SET_ASSOCIATIVITY + i];
assign rdata_ram[i].dirty = dirty_rdata[i];
end
endgenerate
sram #(
.DATA_WIDTH ( DIRTY_WIDTH ),
.NUM_WORDS ( NUM_WORDS )
) dirty_sram (
.clk_i ( clk_i ),
.req_i ( |req_ram ),
.we_i ( we_ram ),
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
.wdata_i ( dirty_wdata ),
.be_i ( {be_ram.valid, be_ram.dirty} ),
.rdata_o ( dirty_rdata )
);
// ------------------------------------------------
// Tag Comparison and memory arbitration
// ------------------------------------------------
tag_cmp #(
.NR_PORTS ( 4 ),
.ADDR_WIDTH ( INDEX_WIDTH ),
.SET_ASSOCIATIVITY ( SET_ASSOCIATIVITY )
) i_tag_cmp (
.req_i ( req ),
.gnt_o ( gnt ),
.addr_i ( addr ),
.wdata_i ( wdata ),
.we_i ( we ),
.be_i ( be ),
.rdata_o ( rdata ),
.tag_i ( tag ),
.hit_way_o ( hit_way ),
.req_o ( req_ram ),
.addr_o ( addr_ram ),
.wdata_o ( wdata_ram ),
.we_o ( we_ram ),
.be_o ( be_ram ),
.rdata_i ( rdata_ram ),
.*
);
`ifndef SYNTHESIS
initial begin
assert ($bits(data_if.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus");
assert (CACHE_LINE_WIDTH/64 inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of 64");
end
`endif
endmodule
// --------------
// Tag Compare
// --------------
//
// Description: Arbitrates access to cache memories, simplified request grant protocol
// checks for hit or miss on cache
//
module tag_cmp #(
parameter int unsigned NR_PORTS = 3,
parameter int unsigned ADDR_WIDTH = 64,
parameter type data_t = cache_line_t,
parameter type be_t = cl_be_t,
parameter int unsigned SET_ASSOCIATIVITY = 8
)(
input logic clk_i,
input logic rst_ni,
input logic [NR_PORTS-1:0][SET_ASSOCIATIVITY-1:0] req_i,
output logic [NR_PORTS-1:0] gnt_o,
input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i,
input data_t [NR_PORTS-1:0] wdata_i,
input logic [NR_PORTS-1:0] we_i,
input be_t [NR_PORTS-1:0] be_i,
output data_t [SET_ASSOCIATIVITY-1:0] rdata_o,
input logic [NR_PORTS-1:0][TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
output logic [SET_ASSOCIATIVITY-1:0] hit_way_o, // we've got a hit on the corresponding way
output logic [SET_ASSOCIATIVITY-1:0] req_o,
output logic [ADDR_WIDTH-1:0] addr_o,
output data_t wdata_o,
output logic we_o,
output be_t be_o,
input data_t [SET_ASSOCIATIVITY-1:0] rdata_i
);
assign rdata_o = rdata_i;
// one hot encoded
logic [NR_PORTS-1:0] id_d, id_q;
logic [TAG_WIDTH-1:0] sel_tag;
always_comb begin : tag_sel
sel_tag = '0;
for (int unsigned i = 0; i < NR_PORTS; i++)
if (id_q[i])
sel_tag = tag_i[i];
end
generate
for (genvar j = 0; j < SET_ASSOCIATIVITY; j++) begin : tag_cmp
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
end
endgenerate
always_comb begin
gnt_o = '0;
id_d = '0;
wdata_o = '0;
req_o = '0;
addr_o = '0;
be_o = '0;
we_o = '0;
// Request Side
// priority select
for (int unsigned i = 0; i < NR_PORTS; i++) begin
req_o = req_i[i];
id_d = (1'b1 << i);
gnt_o[i] = 1'b1;
addr_o = addr_i[i];
be_o = be_i[i];
we_o = we_i[i];
wdata_o = wdata_i[i];
if (req_i[i])
break;
end
`ifndef SYNTHESIS
`ifndef VERILATOR
// assert that cache only hits on one way
assert property (
@(posedge clk_i) $onehot0(hit_way_o)) else begin $error("Hit should be one-hot encoded"); $stop(); end
`endif
`endif
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
id_q <= 0;
end else begin
id_q <= id_d;
end
end
endmodule

View file

@ -79,7 +79,8 @@ module pcgen_stage (
// 6. Debug
// Mis-predict handling is a little bit different
always_comb begin : npc_select
automatic logic [63:0] fetch_address = npc_q;
automatic logic [63:0] fetch_address;
fetch_address = npc_q;
branch_predict_o = branch_predict_btb;
fetch_valid_o = 1'b1;

122
src/perf_counters.sv Normal file
View file

@ -0,0 +1,122 @@
// Author: Florian Zaruba, ETH Zurich
// Date: 06.10.2017
// Description: Performance counters
//
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
import ariane_pkg::*;
module perf_counters #(
int unsigned NR_EXTERNAL_COUNTERS = 1
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// SRAM like interface
input logic [11:0] addr_i, // read/write address
input logic we_i, // write enable
input logic [63:0] data_i, // data to write
output logic [63:0] data_o, // data to read
// from commit stage
input scoreboard_entry_t commit_instr_i,
input logic commit_ack_o,
// from L1 caches
input logic l1_icache_miss_i,
input logic l1_dcache_miss_i,
// from MMU
input logic itlb_miss_i,
input logic dtlb_miss_i,
// from PC Gen
input exception_t ex_i,
input logic eret_i,
input branchpredict_t resolved_branch_i
);
logic [11:0][63:0] perf_counter_d, perf_counter_q;
always_comb begin : perf_counters
perf_counter_d = perf_counter_q;
data_o = 'b0;
// ------------------------------
// Update Performance Counters
// ------------------------------
if (l1_icache_miss_i)
perf_counter_d[PERF_L1_ICACHE_MISS] = perf_counter_q[PERF_L1_ICACHE_MISS] + 1'b1;
if (l1_dcache_miss_i)
perf_counter_d[PERF_L1_DCACHE_MISS] = perf_counter_q[PERF_L1_DCACHE_MISS] + 1'b1;
if (itlb_miss_i)
perf_counter_d[PERF_ITLB_MISS] = perf_counter_q[PERF_ITLB_MISS] + 1'b1;
if (dtlb_miss_i)
perf_counter_d[PERF_DTLB_MISS] = perf_counter_q[PERF_DTLB_MISS] + 1'b1;
// instruction related perf counters
if (commit_ack_o) begin
if (commit_instr_i.fu == LOAD)
perf_counter_d[PERF_LOAD] = perf_counter_q[PERF_LOAD] + 1'b1;
if (commit_instr_i.fu == STORE)
perf_counter_d[PERF_STORE] = perf_counter_q[PERF_STORE] + 1'b1;
if (commit_instr_i.fu == CTRL_FLOW)
perf_counter_d[PERF_BRANCH_JUMP] = perf_counter_q[PERF_BRANCH_JUMP] + 1'b1;
// The standard software calling convention uses register x1 to hold the return address on a call
// the unconditional jump is decoded as ADD op
if (commit_instr_i.fu == CTRL_FLOW && commit_instr_i.op == '0 && commit_instr_i.rd == 'b1)
perf_counter_d[PERF_CALL] = perf_counter_q[PERF_CALL] + 1'b1;
// Return from call
if (commit_instr_i.op == JALR && commit_instr_i.rs1 == 'b1)
perf_counter_d[PERF_RET] = perf_counter_q[PERF_RET] + 1'b1;
end
if (ex_i.valid)
perf_counter_d[PERF_EXCEPTION] = perf_counter_q[PERF_EXCEPTION] + 1'b1;
if (eret_i)
perf_counter_d[PERF_EXCEPTION_RET] = perf_counter_q[PERF_EXCEPTION_RET] + 1'b1;
if (resolved_branch_i.valid && resolved_branch_i.is_mispredict)
perf_counter_d[PERF_MIS_PREDICT] = perf_counter_q[PERF_MIS_PREDICT] + 1'b1;
// Read Port
if (!we_i) begin
data_o = perf_counter_q[addr_i[2:0]];
// write port
end else begin
// on a write also output the current value
data_o = perf_counter_q[addr_i[2:0]];
perf_counter_d[addr_i[2:0]] = data_i;
end
end
// ----------------
// Registers
// ----------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
perf_counter_q <= '0;
end else begin
perf_counter_q <= perf_counter_d;
end
end
endmodule

View file

@ -44,6 +44,7 @@ module ptw #(
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic [1:0] data_size_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
@ -70,12 +71,18 @@ module ptw #(
input logic [63:0] dtlb_vaddr_i,
// from CSR file
input logic [43:0] satp_ppn_i, // ppn from satp
input logic mxr_i
input logic mxr_i,
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o
);
// input registers
logic data_rvalid_q;
logic [63:0] data_rdata_q;
pte_t pte;
assign pte = pte_t'(data_rdata_i);
assign pte = pte_t'(data_rdata_q);
enum logic[2:0] {
IDLE,
@ -151,6 +158,7 @@ module ptw #(
tag_valid_n = 1'b0;
data_req_o = 1'b0;
data_be_o = 8'hFF;
data_size_o = 2'b11;
data_we_o = 1'b0;
ptw_error_o = 1'b0;
itlb_update_o = 1'b0;
@ -165,6 +173,9 @@ module ptw #(
vaddr_n = vaddr_q;
faulting_address_o = '0;
itlb_miss_o = 1'b0;
dtlb_miss_o = 1'b0;
case (CS)
IDLE: begin
@ -179,12 +190,14 @@ module ptw #(
tlb_update_asid_n = asid_i;
vaddr_n = itlb_vaddr_i;
NS = WAIT_GRANT;
itlb_miss_o = 1'b1;
// we got an DTLB miss
end else if (en_ld_st_translation_i & dtlb_access_i & dtlb_miss_i) begin
ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[38:30], 3'b0};
tlb_update_asid_n = asid_i;
vaddr_n = dtlb_vaddr_i;
NS = WAIT_GRANT;
dtlb_miss_o = 1'b1;
end
end
@ -201,7 +214,7 @@ module ptw #(
PTE_LOOKUP: begin
// we wait for the valid signal
if (data_rvalid_i) begin
if (data_rvalid_q) begin
// check if the global mapping bit is set
if (pte.g)
@ -303,7 +316,7 @@ module ptw #(
end
// wait for the rvalid before going back to IDLE
WAIT_RVALID: begin
if (data_rvalid_i)
if (data_rvalid_q)
NS = IDLE;
end
endcase
@ -324,7 +337,7 @@ module ptw #(
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
if (~rst_ni) begin
CS <= IDLE;
is_instr_ptw_q <= 1'b0;
ptw_lvl_q <= LVL1;
@ -333,6 +346,8 @@ module ptw #(
vaddr_q <= '0;
ptw_pptr_q <= '{default: 0};
global_mapping_q <= 1'b0;
data_rdata_q <= '0;
data_rvalid_q <= 1'b0;
end else begin
CS <= NS;
ptw_pptr_q <= ptw_pptr_n;
@ -342,6 +357,8 @@ module ptw #(
tlb_update_asid_q <= tlb_update_asid_n;
vaddr_q <= vaddr_n;
global_mapping_q <= global_mapping_n;
data_rdata_q <= data_rdata_i;
data_rvalid_q <= data_rvalid_i;
end
end

View file

@ -55,7 +55,7 @@ module regfile
);
localparam ADDR_WIDTH = 5;;
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH;
logic [DATA_WIDTH-1:0] mem[NUM_WORDS];
@ -153,4 +153,4 @@ module regfile
end
endmodule
endmodule

View file

@ -95,7 +95,9 @@ module scoreboard #(
// maintain a FIFO with issued instructions
// keep track of all issued instructions
always_comb begin : issue_fifo
automatic logic [$clog2(NR_ENTRIES)-1:0] issue_cnt = issue_cnt_q;
automatic logic [$clog2(NR_ENTRIES)-1:0] issue_cnt;
issue_cnt = issue_cnt_q;
// default assignment
mem_n = mem_q;
commit_pointer_n = commit_pointer_q;

View file

@ -34,9 +34,12 @@ module store_buffer (
// it is only ready if it can unconditionally commit the instruction, e.g.:
// the commit buffer needs to be empty
input logic valid_i, // this is a valid store
input logic valid_without_flush_i, // just tell if the address is valid which we are current putting and do not take any further action
input logic [63:0] paddr_i, // physical address of store which needs to be placed in the queue
input logic [63:0] data_i, // data which is placed in the queue
input logic [7:0] be_i, // byte enable in
input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write)
// D$ interface
output logic [11:0] address_index_o,
@ -45,6 +48,7 @@ module store_buffer (
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic [1:0] data_size_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
@ -55,10 +59,6 @@ module store_buffer (
// allocate more space for the commit buffer to be on the save side
localparam int unsigned DEPTH_COMMIT = 4;
// we need to keep the tag portion of the address for a cycle later
logic [43:0] address_tag_n, address_tag_q;
logic tag_valid_n, tag_valid_q;
// the store queue has two parts:
// 1. Speculative queue
// 2. Commit queue which is non-speculative, e.g.: the store will definitely happen.
@ -67,6 +67,7 @@ module store_buffer (
logic [63:0] address;
logic [63:0] data;
logic [7:0] be;
logic [1:0] data_size;
logic valid; // this entry is valid, we need this for checking if the address offset matches
} speculative_queue_n [DEPTH_SPEC-1:0], speculative_queue_q [DEPTH_SPEC-1:0],
commit_queue_n [DEPTH_COMMIT-1:0], commit_queue_q [DEPTH_COMMIT-1:0];
@ -85,7 +86,8 @@ module store_buffer (
// Speculative Queue - Core Interface
// ----------------------------------------
always_comb begin : core_if
automatic logic [DEPTH_SPEC:0] speculative_status_cnt = speculative_status_cnt_q;
automatic logic [DEPTH_SPEC:0] speculative_status_cnt;
speculative_status_cnt = speculative_status_cnt_q;
// we are ready if the speculative and the commit queue have a space left
ready_o = (speculative_status_cnt_q < (DEPTH_SPEC - 1)) || commit_i;
@ -97,9 +99,10 @@ module store_buffer (
// LSU interface
// we are ready to accept a new entry and the input data is valid
if (valid_i) begin
speculative_queue_n[speculative_write_pointer_q].address = paddr_i;
speculative_queue_n[speculative_write_pointer_q].data = data_i;
speculative_queue_n[speculative_write_pointer_q].be = be_i;
speculative_queue_n[speculative_write_pointer_q].address = paddr_i;
speculative_queue_n[speculative_write_pointer_q].data = data_i;
speculative_queue_n[speculative_write_pointer_q].be = be_i;
speculative_queue_n[speculative_write_pointer_q].data_size = data_size_i;
speculative_queue_n[speculative_write_pointer_q].valid = 1'b1;
// advance the write pointer
speculative_write_pointer_n = speculative_write_pointer_q + 1'b1;
@ -136,17 +139,20 @@ module store_buffer (
// those signals can directly be output to the memory
assign address_index_o = commit_queue_q[commit_read_pointer_q].address[11:0];
// if we got a new request we already saved the tag from the previous cycle
assign address_tag_o = address_tag_q;
assign tag_valid_o = tag_valid_q;
assign address_tag_o = commit_queue_q[commit_read_pointer_q].address[55:12];
assign tag_valid_o = 1'b0;
assign data_wdata_o = commit_queue_q[commit_read_pointer_q].data;
assign data_be_o = commit_queue_q[commit_read_pointer_q].be;
assign data_size_o = commit_queue_q[commit_read_pointer_q].data_size;
// we will never kill a request in the store buffer since we already know that the translation is valid
// e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault
assign kill_req_o = 1'b0;
assign data_we_o = 1'b1; // we will always write in the store queue
always_comb begin : store_if
automatic logic [DEPTH_COMMIT:0] commit_status_cnt = commit_status_cnt_q;
automatic logic [DEPTH_COMMIT:0] commit_status_cnt;
commit_status_cnt = commit_status_cnt_q;
commit_ready_o = (commit_status_cnt_q < DEPTH_COMMIT);
// no store is pending if we don't have any element in the commit queue e.g.: it is empty
no_st_pending_o = (commit_status_cnt_q == 0);
@ -154,10 +160,8 @@ module store_buffer (
commit_read_pointer_n = commit_read_pointer_q;
commit_write_pointer_n = commit_write_pointer_q;
address_tag_n = address_tag_q;
commit_queue_n = commit_queue_q;
tag_valid_n = 1'b0;
data_req_o = 1'b0;
// there should be no commit when we are flushing
@ -167,10 +171,6 @@ module store_buffer (
if (data_gnt_i) begin
// we can evict it from the commit buffer
commit_queue_n[commit_read_pointer_q].valid = 1'b0;
// save the tag portion
address_tag_n = commit_queue_q[commit_read_pointer_q].address[55:12];
// signal a valid tag the cycle afterwards
tag_valid_n = 1'b1;
// advance the read_pointer
commit_read_pointer_n = commit_read_pointer_q + 1'b1;
commit_status_cnt--;
@ -222,7 +222,7 @@ module store_buffer (
end
end
// or it matches with the entry we are currently putting into the queue
if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_i) begin
if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_without_flush_i) begin
page_offset_matches_o = 1'b1;
end
end
@ -231,9 +231,7 @@ module store_buffer (
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
if(~rst_ni) begin
address_tag_q <= 'b0;
tag_valid_q <= 1'b0;
// initialize the queues
// initialize the queues
speculative_queue_q <= '{default: 0};
commit_queue_q <= '{default: 0};
commit_read_pointer_q <= '0;
@ -243,8 +241,6 @@ module store_buffer (
speculative_write_pointer_q <= '0;
speculative_status_cnt_q <= '0;
end else begin
address_tag_q <= address_tag_n;
tag_valid_q <= tag_valid_n;
speculative_queue_q <= speculative_queue_n;
commit_queue_q <= commit_queue_n;
commit_read_pointer_q <= commit_read_pointer_n;

View file

@ -51,6 +51,7 @@ module store_unit (
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic [1:0] data_size_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
@ -63,10 +64,12 @@ module store_unit (
// store buffer control signals
logic st_ready;
logic st_valid;
logic st_valid_without_flush;
// keep the data and the byte enable for the second cycle (after address translation)
logic [63:0] st_data_n, st_data_q;
logic [7:0] st_be_n, st_be_q;
logic [1:0] st_data_size_n, st_data_size_q;
logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
// output assignments
@ -74,13 +77,14 @@ module store_unit (
assign trans_id_o = trans_id_q; // transaction id from previous cycle
always_comb begin : store_control
translation_req_o = 1'b0;
valid_o = 1'b0;
st_valid = 1'b0;
pop_st_o = 1'b0;
ex_o = ex_i;
trans_id_n = lsu_ctrl_i.trans_id;
NS = CS;
translation_req_o = 1'b0;
valid_o = 1'b0;
st_valid = 1'b0;
st_valid_without_flush = 1'b0;
pop_st_o = 1'b0;
ex_o = ex_i;
trans_id_n = lsu_ctrl_i.trans_id;
NS = CS;
case (CS)
// we got a valid store
@ -110,6 +114,8 @@ module store_unit (
if (!flush_i)
st_valid = 1'b1;
st_valid_without_flush = 1'b1;
// we have another request
if (valid_i) begin
@ -175,8 +181,10 @@ module store_unit (
// -----------
// re-align the write data to comply with the address offset
always_comb begin
st_be_n = lsu_ctrl_i.be;
st_data_n = lsu_ctrl_i.data;
st_be_n = lsu_ctrl_i.be;
st_data_n = lsu_ctrl_i.data;
st_data_size_n = extract_transfer_size(lsu_ctrl_i.operator);
case (lsu_ctrl_i.vaddr[2:0])
3'b000: st_data_n = lsu_ctrl_i.data;
3'b001: st_data_n = {lsu_ctrl_i.data[55:0], lsu_ctrl_i.data[63:56]};
@ -193,11 +201,15 @@ module store_unit (
// ---------------
store_buffer store_buffer_i (
// store queue write port
.valid_i ( st_valid ),
.data_i ( st_data_q ),
.be_i ( st_be_q ),
.valid_i ( st_valid ),
.valid_without_flush_i ( st_valid_without_flush ), // the flush signal can be critical and we need this valid
// signal to check whether the page_offset matches or not, functionaly it doesn't
// make a difference whether we use the correct valid signal or not as we are flushing the whole pipeline anyway
.data_i ( st_data_q ),
.be_i ( st_be_q ),
.data_size_i ( st_data_size_q ),
// store buffer out
.ready_o ( st_ready ),
.ready_o ( st_ready ),
.*
);
// ---------------
@ -205,15 +217,17 @@ module store_unit (
// ---------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
CS <= IDLE;
st_be_q <= '0;
st_data_q <= '0;
trans_id_q <= '0;
CS <= IDLE;
st_be_q <= '0;
st_data_q <= '0;
st_data_size_q <= '0;
trans_id_q <= '0;
end else begin
CS <= NS;
st_be_q <= st_be_n;
st_data_q <= st_data_n;
trans_id_q <= trans_id_n;
CS <= NS;
st_be_q <= st_be_n;
st_data_q <= st_data_n;
trans_id_q <= trans_id_n;
st_data_size_q <= st_data_size_n;
end
end

View file

@ -164,15 +164,16 @@ module tlb #(
// default: begin /* No hit */ end
// endcase
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
automatic int unsigned idx_base, shift, new_index;
// we got a hit so update the pointer as it was least recently used
if (lu_hit[i] & lu_access_i) begin
// Set the nodes to the values we would expect
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
automatic int unsigned idx_base = $unsigned((2**lvl)-1);
idx_base = $unsigned((2**lvl)-1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
automatic int unsigned shift = $clog2(TLB_ENTRIES) - lvl;
shift = $clog2(TLB_ENTRIES) - lvl;
// to circumvent the 32 bit integer arithmetic assignment
automatic int unsigned new_index = ~((i >> (shift-1)) & 32'b1);
new_index = ~((i >> (shift-1)) & 32'b1);
plru_tree_n[idx_base + (i >> shift)] = new_index[0];
end
end
@ -192,15 +193,17 @@ module tlb #(
// the corresponding bit of the entry's index, this is
// the next entry to replace.
for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
automatic logic en = 1'b1;
automatic logic en;
automatic int unsigned idx_base, shift, new_index;
en = 1'b1;
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
automatic int unsigned idx_base = $unsigned((2**lvl)-1);
idx_base = $unsigned((2**lvl)-1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
automatic int unsigned shift = $clog2(TLB_ENTRIES) - lvl;
shift = $clog2(TLB_ENTRIES) - lvl;
// en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
automatic int unsigned new_index = (i >> (shift-1)) & 32'b1;
if(new_index[0]) begin
new_index = (i >> (shift-1)) & 32'b1;
if (new_index[0]) begin
en &= plru_tree_q[idx_base + (i>>shift)];
end else begin
en &= ~plru_tree_q[idx_base + (i>>shift)];
@ -239,7 +242,7 @@ module tlb #(
// Just for checking
function int countSetBits(logic[TLB_ENTRIES-1:0] vector);
automatic int count = 0;
foreach(vector[idx]) begin
foreach (vector[idx]) begin
count += vector[idx];
end
return count;

52
src/util/behav_sram.sv Executable file
View file

@ -0,0 +1,52 @@
// Author: Florian Zaruba, ETH Zurich
// Date: 13.10.2017
// Description: SRAM Behavioral Model
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
module sram #(
int unsigned DATA_WIDTH = 64,
int unsigned NUM_WORDS = 1024
)(
input logic clk_i,
input logic req_i,
input logic we_i,
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
input logic [DATA_WIDTH-1:0] wdata_i,
input logic [DATA_WIDTH-1:0] be_i,
output logic [DATA_WIDTH-1:0] rdata_o
);
localparam ADDR_WIDTH = $clog2(NUM_WORDS);
logic [DATA_WIDTH-1:0] ram [NUM_WORDS-1:0];
logic [ADDR_WIDTH-1:0] raddr_q;
// 1. randomize array
// 2. randomize output when no request is active
always @(posedge clk_i) begin
if (req_i) begin
if (!we_i)
raddr_q <= addr_i;
else
for (int i = 0; i < DATA_WIDTH; i++)
if (be_i[i]) ram[addr_i][i] <= wdata_i[i];
end
end
assign rdata_o = ram[raddr_q];
endmodule

98
src/util/gf22_sram.sv Executable file
View file

@ -0,0 +1,98 @@
// Author: Florian Zaruba, ETH Zurich
// Date: 13.10.2017
// Description: SRAM Model for GF22
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
module sram #(
int unsigned DATA_WIDTH = 64,
int unsigned NUM_WORDS = 1024
)(
input logic clk_i,
input logic req_i,
input logic we_i,
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
input logic [DATA_WIDTH-1:0] wdata_i,
input logic [DATA_WIDTH-1:0] be_i,
output logic [DATA_WIDTH-1:0] rdata_o
);
generate
if (NUM_WORDS == 256) begin
if (DATA_WIDTH == 16) begin
IN22FDX_R1PH_NFHN_W00256B016M02C256 dirtyram (
.CLK ( clk_i ),
.CEN ( ~req_i ),
.RDWEN ( ~we_i ),
.AW ( addr_i[7:1] ),
.AC ( addr_i[0] ),
.D ( wdata_i ),
.BW ( be_i ),
.T_LOGIC ( 1'b0 ),
.MA_SAWL ( '0 ),
.MA_WL ( '0 ),
.MA_WRAS ( '0 ),
.MA_WRASD ( '0 ),
.Q ( rdata_o ),
.OBSV_CTL ( )
);
end
if (DATA_WIDTH == 44) begin
logic [45:0] rdata;
assign rdata_o = rdata[43:0];
IN22FDX_R1PH_NFHN_W00256B046M02C256 TAG_RAM (
.CLK ( clk_i ),
.CEN ( ~req_i ),
.RDWEN ( ~we_i ),
.AW ( addr_i[7:1] ),
.AC ( addr_i[0] ),
.D ( {2'b0, wdata_i} ),
.BW ( {2'b0, be_i } ),
.T_LOGIC ( 1'b0 ),
.MA_SAWL ( '0 ),
.MA_WL ( '0 ),
.MA_WRAS ( '0 ),
.MA_WRASD ( '0 ),
.Q ( rdata ),
.OBSV_CTL ( )
);
end
if (DATA_WIDTH == 128) begin
IN22FDX_R1PH_NFHN_W00256B128M02C256 DATA_RAM
(
.CLK ( clk_i ),
.CEN ( ~req_i ),
.RDWEN ( ~we_i ),
.AW ( addr_i[7:1] ),
.AC ( addr_i[0] ),
.D ( wdata_i ),
.BW ( be_i ),
.T_LOGIC ( 1'b0 ),
.MA_SAWL ( '0 ),
.MA_WL ( '0 ),
.MA_WRAS ( '0 ),
.MA_WRASD ( '0 ),
.Q ( rdata_o ),
.OBSV_CTL ( )
);
end
end
endgenerate
endmodule

View file

@ -196,6 +196,12 @@ class instruction_trace_item;
instr,
s);
// s = $sformatf("%s %h %h %-36s",
// priv_lvl,
// sbe.pc,
// instr,
// s);
foreach (result_regs[i]) begin
if (result_regs[i] != 0)
s = $sformatf("%s %-4s:%16x", s, regAddrToStr(result_regs[i]), this.result);

View file

@ -155,7 +155,7 @@ class instruction_tracer;
// flush all decoded instructions
function void flushDecode ();
decode_queue = {};
endfunction;
endfunction
// flush everything, we took an exception/interrupt
function void flush ();
@ -166,7 +166,7 @@ class instruction_tracer;
// also clear mappings
store_mapping = {};
load_mapping = {};
endfunction;
endfunction
function void printInstr(scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl);
instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr, priv_lvl);
@ -174,7 +174,7 @@ class instruction_tracer;
string print_instr = iti.printInstr();
uvm_report_info( "Tracer", print_instr, UVM_HIGH);
$fwrite(this.f, {print_instr, "\n"});
endfunction;
endfunction
function void printException(logic [63:0] pc, logic [63:0] cause, logic [63:0] tval);
exception_trace_item eti = new (pc, cause, tval);

View file

@ -54,11 +54,13 @@ interface instruction_tracer_if (
// current privilege level
priv_lvl_t priv_lvl;
// the tracer just has a passive interface we do not drive anything with it
`ifndef SYNTHESIS
clocking pck @(posedge clk);
input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr,
st_valid, st_paddr, ld_valid, ld_kill, ld_paddr,
wdata, we, commit_instr, commit_ack, exception, priv_lvl;
endclocking
`endif
endinterface
`endif

View file

@ -17,6 +17,9 @@
// University of Bologna.
//
package instruction_tracer_pkg;
timeunit 1ns;
timeprecision 1ps;
import ariane_pkg::*;
`ifndef SYNTHESIS
import uvm_pkg::*;

87
src/util/xilinx_sram.sv Executable file
View file

@ -0,0 +1,87 @@
// Author: Florian Zaruba, ETH Zurich
// Date: 13.11.2017
// Description: SRAM Model for Xilinx FPGA
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
module sram #(
int unsigned DATA_WIDTH = 64,
int unsigned NUM_WORDS = 1024
)(
input logic clk_i,
input logic req_i,
input logic we_i,
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
input logic [DATA_WIDTH-1:0] wdata_i,
input logic [DATA_WIDTH-1:0] be_i,
output logic [DATA_WIDTH-1:0] rdata_o
);
generate
if (NUM_WORDS == 256) begin
// Dirty RAM
if (DATA_WIDTH == 16) begin
localparam NUM_WORDS = 2**8;
logic [NUM_WORDS-1:0][15:0] mem;
always_ff @(posedge clk_i) begin
// write
if (req_i && we_i) begin
for (int unsigned i = 0; i < 16; i++) begin
if (be_i[i])
mem[addr_i][i] <= wdata_i[i];
end
// read
end else if (req_i) begin
rdata_o <= mem[addr_i];
end
end
end
// Data RAM
if (DATA_WIDTH == 44) begin
logic [47:0] data_o;
assign rdata_o = data_o[43:0];
// this is actually 48 bits wide
xilinx_dcache_bank_tag_256x46 TAG_RAM (
.clka ( clk_i ),
.ena ( req_i ),
.wea ( {{be_i[40] & we_i}, {be_i[32] & we_i}, {be_i[24] & we_i}, {be_i[16] & we_i}, {be_i[8] & we_i}, {be_i[0] & we_i}} ),
.addra ( addr_i ),
.dina ( {4'b0, wdata_i} ),
.douta ( data_o )
);
end
// Data RAM
if (DATA_WIDTH == 128) begin
xilinx_dcache_bank_data_256x128 DATA_RAM (
.clka ( clk_i ),
.ena ( req_i ),
.wea ( {{be_i[15] & we_i}, {be_i[14] & we_i}, {be_i[13] & we_i}, {be_i[12] & we_i}, {be_i[11] & we_i}, {be_i[10] & we_i}, {be_i[9] & we_i}, {be_i[8] & we_i}, {be_i[7] & we_i}, {be_i[6] & we_i}, {be_i[5] & we_i}, {be_i[4] & we_i}, {be_i[3] & we_i}, {be_i[2] & we_i}, {be_i[1] & we_i}, {be_i[0] & we_i}}),
.addra ( addr_i ),
.dina ( wdata_i ),
.douta ( rdata_o )
);
end
end
endgenerate
endmodule

View file

@ -4,6 +4,7 @@ ariane:
]
files: [
include/ariane_pkg.sv,
include/nbdcache_pkg.sv,
src/util/instruction_tracer_if.sv,
src/util/instruction_tracer_pkg.sv,
src/ariane.sv,
@ -15,7 +16,6 @@ ariane:
src/controller.sv,
src/csr_buffer.sv,
src/csr_regfile.sv,
src/dcache_arbiter.sv,
src/decoder.sv,
src/ex_stage.sv,
src/fetch_fifo.sv,
@ -37,6 +37,10 @@ ariane:
src/store_unit.sv,
src/tlb.sv,
src/debug_unit.sv,
src/nbdcache.sv,
src/miss_handler.sv,
src/cache_ctrl.sv,
src/perf_counters.sv,
]
riscv_regfile_rtl:
targets: [
@ -48,6 +52,7 @@ riscv_regfile_rtl:
]
files: [
src/regfile.sv,
src/util/gf22_sram.sv,
]
riscv_regfile_fpga:
@ -59,4 +64,5 @@ riscv_regfile_fpga:
]
files: [
src/regfile_ff.sv,
src/util/xilinx_sram.sv,
]

2
tb

@ -1 +1 @@
Subproject commit 130fda9a1ea444b8a2ffdc104974e8901fb6f64d
Subproject commit e6e142e7593c4387c7f06c919980f86a8cd8e7e5