[rtl] Instantiate instruction cache

- Add parameters and actual instantiation of icache
- Add a custom CSR in the M-mode custom RW range to enable the cache
- Wire up the cache invalidation signal to trigger on fence.i

Signed-off-by: Tom Roberts <tomroberts@lowrisc.org>
This commit is contained in:
Tom Roberts 2020-03-19 16:29:14 +00:00 committed by Tom Roberts
parent e9171001c3
commit c054a63c3d
16 changed files with 235 additions and 35 deletions

View file

@ -66,6 +66,8 @@ Ibex implements all the Control and Status Registers (CSRs) listed in the follow
+---------+--------------------+--------+-----------------------------------------------+
| 0x7B3 | ``dscratch1`` | RW | Debug Scratch Register 1 |
+---------+--------------------+--------+-----------------------------------------------+
| 0x7C0 | ``cpuctrl`` | RW | CPU Control Register (Custom CSR) |
+---------+--------------------+--------+-----------------------------------------------+
| 0xB00 | ``mcycle`` | RW | Machine Cycle Counter |
+---------+--------------------+--------+-----------------------------------------------+
| 0xB02 | ``minstret`` | RW | Machine Instructions-Retired Counter |
@ -483,6 +485,26 @@ Reset Value: ``0x0000_0000``
Scratch register to be used by the debug module.
Accessible in Debug Mode only.
CPU Control Register (cpuctrl)
------------------------------
CSR Address: ``0x7C0``
Reset Value: ``0x0000_0000``
Custom CSR to control runtime configuration of CPU components.
Accessible in Machine Mode only.
Ibex implements the following bit fields.
Other bit fields read as zero.
+-------+------+------------------------------------------------------------------+
| Bit# | R/W | Description |
+-------+------+------------------------------------------------------------------+
| 0 | WARL | **icache_enable:** Enable (1) or disable (0) the instruction |
| | | cache. If the instruction cache has not been configured (ICache |
| | | parameter == 0), this field will always read as zero. |
+-------+------+------------------------------------------------------------------+
Time Registers (time(h))
------------------------

View file

@ -21,6 +21,11 @@ A localparam ``DEPTH`` gives a configurable depth which is set to 3 by default.
The top-level of the instruction fetch controls the prefetch buffer (in particular flushing it on branches/jumps/exception and beginning prefetching from the appropriate new PC) and supplies new instructions to the ID/EX stage along with their PC.
Compressed instructions are expanded by the IF stage so the decoder can always deal with uncompressed instructions (the ID stage still receives the compressed instruction for placing into ``mtval`` on an illegal instruction exception).
If Ibex has been configured with an instruction cache (parameter ICache == 1), then the prefetch buffer is replaced by the icache module (:ref:`icache`).
The interfaces of the icache module are the same as the prefetch buffer with two additions.
Firstly, a signal to enable the cache which is driven from a custom CSR.
Secondly a signal to the flush the cache which is set every time a ``fence.i`` instruction is executed.
Instruction-Side Memory Interface
---------------------------------

View file

@ -20,6 +20,8 @@ Instantiation Template
.RV32E ( 0 ),
.RV32M ( 1 ),
.MultiplierImplementation ( "fast" ),
.ICache ( 0 ),
.ICacheECC ( 0 ),
.DbgTriggerEn ( 0 ),
.DmHaltAddr ( 32'h1A110800 ),
.DmExceptionAddr ( 32'h1A110808 )
@ -98,6 +100,12 @@ Parameters
| | | | "fast": multi-cycle fast, |
| | | | "single-cycle": single-cycle |
+------------------------------+-------------+------------+-----------------------------------------------------------------+
| ``ICache`` | bit | 0 | *EXPERIMENTAL* Enable instruction cache instead of prefetch |
| | | | buffer |
+------------------------------+-------------+------------+-----------------------------------------------------------------+
| ``ICacheECC`` | bit | 0 | *EXPERIMENTAL* Enable SECDED ECC protection in ICache (if |
| | | | ICache == 1) |
+------------------------------+-------------+------------+-----------------------------------------------------------------+
| ``DbgTriggerEn`` | bit | 0 | Enable debug trigger support (one trigger only) |
+------------------------------+-------------+------------+-----------------------------------------------------------------+
| ``DmHaltAddr`` | int | 0x1A110800 | Address to jump to when entering Debug Mode |

View file

@ -4,6 +4,7 @@
module tb_cs_registers #(
parameter bit DbgTriggerEn = 0,
parameter bit ICache = 0,
parameter int unsigned MHPMCounterNum = 8,
parameter int unsigned MHPMCounterWidth = 40,
parameter bit PMPEnable = 0,
@ -70,6 +71,8 @@ module tb_cs_registers #(
logic [31:0] pc_id_i;
logic [31:0] pc_wb_i;
logic icache_enable_o;
logic csr_save_if_i;
logic csr_save_id_i;
logic csr_save_wb_i;

View file

@ -14,6 +14,11 @@ ${PRJ_DIR}/ibex/shared/rtl/prim_clock_gating.sv
// ibex CORE RTL files
+incdir+${PRJ_DIR}/ibex/rtl
${PRJ_DIR}/ibex/shared/rtl/prim_assert.sv
${PRJ_DIR}/ibex/shared/rtl/prim_generic_ram_1p.sv
${PRJ_DIR}/ibex/shared/rtl/prim_secded_28_22_enc.sv
${PRJ_DIR}/ibex/shared/rtl/prim_secded_28_22_dec.sv
${PRJ_DIR}/ibex/shared/rtl/prim_secded_72_64_enc.sv
${PRJ_DIR}/ibex/shared/rtl/prim_secded_72_64_dec.sv
${PRJ_DIR}/ibex/rtl/ibex_pkg.sv
${PRJ_DIR}/ibex/rtl/ibex_tracer_pkg.sv
${PRJ_DIR}/ibex/rtl/ibex_tracer.sv
@ -26,6 +31,7 @@ ${PRJ_DIR}/ibex/rtl/ibex_decoder.sv
${PRJ_DIR}/ibex/rtl/ibex_ex_block.sv
${PRJ_DIR}/ibex/rtl/ibex_wb_stage.sv
${PRJ_DIR}/ibex/rtl/ibex_id_stage.sv
${PRJ_DIR}/ibex/rtl/ibex_icache.sv
${PRJ_DIR}/ibex/rtl/ibex_if_stage.sv
${PRJ_DIR}/ibex/rtl/ibex_load_store_unit.sv
${PRJ_DIR}/ibex/rtl/ibex_multdiv_slow.sv

View file

@ -87,4 +87,7 @@ targets:
# XXX: Cleanup all warnings and remove this option
# (or make it more fine-grained at least)
- "-Wno-fatal"
# RAM primitives wider than 64bit (required for ECC) fail to build in
# Verilator without increasing the unroll count (see Verilator#1266)
- "--unroll-count 72"

View file

@ -19,6 +19,7 @@ filesets:
- rtl/ibex_decoder.sv
- rtl/ibex_ex_block.sv
- rtl/ibex_fetch_fifo.sv
- rtl/ibex_icache.sv
- rtl/ibex_id_stage.sv
- rtl/ibex_if_stage.sv
- rtl/ibex_load_store_unit.sv
@ -70,6 +71,18 @@ parameters:
description: "Multiplier implementation. Valid values: fast, slow"
default: fast
ICache:
datatype: bool
paramtype: vlogparam
description: "Enable instruction cache"
default: false
ICacheECC:
datatype: bool
paramtype: vlogparam
description: "Enable ECC protection in instruction cache"
default: false
BranchTargetALU:
datatype: int
paramtype: vlogparam
@ -104,6 +117,9 @@ targets:
mode: lint-only
verilator_options:
- "-Wall"
# RAM primitives wider than 64bit (required for ECC) fail to build in
# Verilator without increasing the unroll count (see Verilator#1266)
- "--unroll-count 72"
veriblelint:
ruleset: default
rules:

View file

@ -46,6 +46,18 @@ parameters:
description: "Multiplier implementation. Valid values: fast, slow"
default: fast
ICache:
datatype: bool
paramtype: vlogparam
description: "Enable instruction cache"
default: false
ICacheECC:
datatype: bool
paramtype: vlogparam
description: "Enable ECC protection in instruction cache"
default: false
BranchTargetALU:
datatype: int
paramtype: vlogparam
@ -79,6 +91,9 @@ targets:
mode: lint-only
verilator_options:
- "-Wall"
# RAM primitives wider than 64bit (required for ECC) fail to build in
# Verilator without increasing the unroll count (see Verilator#1266)
- "--unroll-count 72"
veriblelint:
ruleset: default
rules:

View file

@ -21,6 +21,8 @@ module ibex_core #(
parameter bit BranchTargetALU = 1'b0,
parameter bit WritebackStage = 1'b0,
parameter MultiplierImplementation = "fast",
parameter bit ICache = 1'b0,
parameter bit ICacheECC = 1'b0,
parameter bit DbgTriggerEn = 1'b0,
parameter int unsigned DmHaltAddr = 32'h1A110800,
parameter int unsigned DmExceptionAddr = 32'h1A110808
@ -113,6 +115,9 @@ module ibex_core #(
logic [31:0] pc_id; // Program counter in ID stage
logic [31:0] pc_wb; // Program counter in WB stage
logic icache_enable;
logic icache_inval;
logic instr_first_cycle_id;
logic instr_valid_clear;
logic pc_set;
@ -339,8 +344,10 @@ module ibex_core #(
//////////////
ibex_if_stage #(
.DmHaltAddr ( DmHaltAddr ),
.DmExceptionAddr ( DmExceptionAddr )
.DmHaltAddr ( DmHaltAddr ),
.DmExceptionAddr ( DmExceptionAddr ),
.ICache ( ICache ),
.ICacheECC ( ICacheECC )
) if_stage_i (
.clk_i ( clk ),
.rst_ni ( rst_ni ),
@ -376,6 +383,8 @@ module ibex_core #(
.pc_mux_i ( pc_mux_id ),
.exc_pc_mux_i ( exc_pc_mux_id ),
.exc_cause ( exc_cause ),
.icache_enable_i ( icache_enable ),
.icache_inval_i ( icache_inval ),
// jump targets
.jump_target_ex_i ( jump_target_ex ),
@ -436,6 +445,7 @@ module ibex_core #(
.pc_mux_o ( pc_mux_id ),
.exc_pc_mux_o ( exc_pc_mux_id ),
.exc_cause_o ( exc_cause ),
.icache_inval_o ( icache_inval ),
.instr_fetch_err_i ( instr_fetch_err ),
.instr_fetch_err_plus2_i ( instr_fetch_err_plus2 ),
@ -756,6 +766,7 @@ module ibex_core #(
ibex_cs_registers #(
.DbgTriggerEn ( DbgTriggerEn ),
.ICache ( ICache ),
.MHPMCounterNum ( MHPMCounterNum ),
.MHPMCounterWidth ( MHPMCounterWidth ),
.PMPEnable ( PMPEnable ),
@ -816,6 +827,8 @@ module ibex_core #(
.pc_id_i ( pc_id ),
.pc_wb_i ( pc_wb ),
.icache_enable_o ( icache_enable ),
.csr_save_if_i ( csr_save_if ),
.csr_save_id_i ( csr_save_id ),
.csr_save_wb_i ( csr_save_wb ),

View file

@ -16,6 +16,8 @@ module ibex_core_tracing #(
parameter bit RV32M = 1'b1,
parameter bit BranchTargetALU = 1'b0,
parameter MultiplierImplementation = "fast",
parameter bit ICache = 1'b0,
parameter bit ICacheECC = 1'b0,
parameter bit DbgTriggerEn = 1'b0,
parameter bit WritebackStage = 1'b0,
parameter int unsigned DmHaltAddr = 32'h1A110800,
@ -103,6 +105,8 @@ module ibex_core_tracing #(
.RV32M ( RV32M ),
.BranchTargetALU ( BranchTargetALU ),
.MultiplierImplementation ( MultiplierImplementation ),
.ICache ( ICache ),
.ICacheECC ( ICacheECC ),
.DbgTriggerEn ( DbgTriggerEn ),
.WritebackStage ( WritebackStage ),
.DmHaltAddr ( DmHaltAddr ),

View file

@ -14,6 +14,7 @@
module ibex_cs_registers #(
parameter bit DbgTriggerEn = 0,
parameter bit ICache = 1'b0,
parameter int unsigned MHPMCounterNum = 10,
parameter int unsigned MHPMCounterWidth = 40,
parameter bit PMPEnable = 0,
@ -77,6 +78,10 @@ module ibex_cs_registers #(
input logic [31:0] pc_id_i,
input logic [31:0] pc_wb_i,
// CPU control bits
output logic icache_enable_o,
// Exception save/restore
input logic csr_save_if_i,
input logic csr_save_id_i,
input logic csr_save_wb_i,
@ -151,6 +156,12 @@ module ibex_cs_registers #(
priv_lvl_e prv;
} Dcsr_t;
// CPU control register fields
typedef struct packed {
logic [30:0] unused_ctrl;
logic icache_enable;
} CpuCtrl_t;
// Interrupt and exception control signals
logic [31:0] exception_pc;
@ -200,6 +211,9 @@ module ibex_cs_registers #(
logic [31:0] tmatch_control_rdata;
logic [31:0] tmatch_value_rdata;
// CPU control bits
CpuCtrl_t cpuctrl_rdata, cpuctrl_wdata;
// CSR update logic
logic [31:0] csr_wdata_int;
logic [31:0] csr_rdata_int;
@ -398,6 +412,11 @@ module ibex_cs_registers #(
illegal_csr = ~DbgTriggerEn;
end
// Custom CSR for controlling CPU features
CSR_CPUCTRL: begin
csr_rdata_int = {cpuctrl_rdata};
end
default: begin
illegal_csr = 1'b1;
end
@ -1016,6 +1035,44 @@ module ibex_cs_registers #(
assign trigger_match_o = 'b0;
end
// CPU control fields
assign cpuctrl_rdata.unused_ctrl = '0;
// Cast register write data
assign cpuctrl_wdata = CpuCtrl_t'(csr_wdata_int);
// Generate icache enable bit
if (ICache) begin : gen_icache_enable
logic icache_enable_d, icache_enable_q;
// Update the value when cpuctrl register is written
assign icache_enable_d = (csr_we_int & (csr_addr == CSR_CPUCTRL)) ?
cpuctrl_wdata.icache_enable : icache_enable_q;
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
icache_enable_q <= 1'b0; // disabled on reset
end else begin
icache_enable_q <= icache_enable_d;
end
end
assign cpuctrl_rdata.icache_enable = icache_enable_q;
end else begin : gen_no_icache
// tieoff for the unused icen bit
logic unused_icen;
assign unused_icen = cpuctrl_wdata.icache_enable;
// icen field will always read as zero if ICache not configured
assign cpuctrl_rdata.icache_enable = 1'b0;
end
// tieoff for the currently unused bits of cpuctrl
logic [31:1] unused_cpuctrl;
assign unused_cpuctrl = {cpuctrl_wdata[31:1]};
assign icache_enable_o = cpuctrl_rdata.icache_enable;
////////////////
// Assertions //
////////////////

View file

@ -34,6 +34,7 @@ module ibex_decoder #(
output logic ecall_insn_o, // syscall instr encountered
output logic wfi_insn_o, // wait for interrupt instr encountered
output logic jump_set_o, // jump taken set signal
output logic icache_inval_o,
// from IF-ID pipeline register
input logic instr_first_cycle_i, // instruction read is in its first cycle
@ -171,6 +172,7 @@ module ibex_decoder #(
jump_in_dec_o = 1'b0;
jump_set_o = 1'b0;
branch_in_dec_o = 1'b0;
icache_inval_o = 1'b0;
mult_en_o = 1'b0;
div_en_o = 1'b0;
@ -431,7 +433,7 @@ module ibex_decoder #(
OPCODE_MISC_MEM: begin
// For now, treat the FENCE (funct3 == 000) instruction as a NOP. This may not be correct
// in a system with caches and should be revisited.
// FENCE.I will flush the IF stage and prefetch buffer but nothing else.
// FENCE.I will flush the IF stage and prefetch buffer (or ICache) but nothing else.
unique case (instr[14:12])
3'b000: begin
rf_we = 1'b0;
@ -440,12 +442,14 @@ module ibex_decoder #(
// FENCE.I is implemented as a jump to the next PC, this gives the required flushing
// behaviour (iside prefetch buffer flushed and response to any outstanding iside
// requests will be ignored).
// If present, the ICache will also be flushed.
jump_in_dec_o = 1'b1;
rf_we = 1'b0;
if (instr_first_cycle_i) begin
jump_set_o = 1'b1;
icache_inval_o = 1'b1;
end
end
default: begin

View file

@ -14,7 +14,7 @@ module ibex_icache #(
// Cache arrangement parameters
parameter int unsigned BusWidth = 32,
parameter int unsigned CacheSizeBytes = 4*1024,
parameter bit CacheECC = 1'b0,
parameter bit ICacheECC = 1'b0,
parameter int unsigned LineSize = 64,
parameter int unsigned NumWays = 2,
// Always make speculative bus requests in parallel with lookups
@ -65,7 +65,7 @@ module ibex_icache #(
// Request throttling threshold
localparam int unsigned FB_THRESHOLD = NUM_FB - 2;
// Derived parameters
localparam int unsigned LINE_SIZE_ECC = CacheECC ? (LineSize + 8) : LineSize;
localparam int unsigned LINE_SIZE_ECC = ICacheECC ? (LineSize + 8) : LineSize;
localparam int unsigned LINE_SIZE_BYTES = LineSize/8;
localparam int unsigned LINE_W = $clog2(LINE_SIZE_BYTES);
localparam int unsigned BUS_BYTES = BusWidth/8;
@ -76,7 +76,7 @@ module ibex_icache #(
localparam int unsigned INDEX_W = $clog2(NUM_LINES);
localparam int unsigned INDEX_HI = INDEX_W + LINE_W - 1;
localparam int unsigned TAG_SIZE = ADDR_W - INDEX_W - LINE_W + 1; // 1 valid bit
localparam int unsigned TAG_SIZE_ECC = CacheECC ? (TAG_SIZE + 6) : TAG_SIZE;
localparam int unsigned TAG_SIZE_ECC = ICacheECC ? (TAG_SIZE + 6) : TAG_SIZE;
localparam int unsigned OUTPUT_BEATS = (BUS_BYTES / 2); // number of halfwords
// Prefetch signals
@ -257,7 +257,7 @@ module ibex_icache #(
assign data_write_ic0 = tag_write_ic0;
// Append ECC checkbits to write data if required
if (CacheECC) begin : gen_ecc_wdata
if (ICacheECC) begin : gen_ecc_wdata
// Tagram ECC
// Reuse the same ecc encoding module for larger cache sizes by padding with zeros
@ -383,7 +383,7 @@ module ibex_icache #(
round_robin_way_q;
// ECC checking logic
if (CacheECC) begin : gen_data_ecc_checking
if (ICacheECC) begin : gen_data_ecc_checking
logic [NumWays-1:0] tag_err_ic1;
logic [1:0] data_err_ic1;
logic ecc_correction_write_d, ecc_correction_write_q;

View file

@ -39,6 +39,7 @@ module ibex_id_stage #(
output logic instr_first_cycle_id_o,
output logic instr_valid_clear_o, // kill instr in IF-ID reg
output logic id_in_ready_o, // ID stage is ready for next instr
output logic icache_inval_o,
// Jumps and branches
input logic branch_decision_i,
@ -331,6 +332,7 @@ module ibex_id_stage #(
.ecall_insn_o ( ecall_insn_dec ),
.wfi_insn_o ( wfi_insn_dec ),
.jump_set_o ( jump_set ),
.icache_inval_o ( icache_inval_o ),
// from IF-ID pipeline register
.instr_first_cycle_i ( instr_first_cycle ),

View file

@ -13,8 +13,10 @@
`include "prim_assert.sv"
module ibex_if_stage #(
parameter int unsigned DmHaltAddr = 32'h1A110800,
parameter int unsigned DmExceptionAddr = 32'h1A110808
parameter int unsigned DmHaltAddr = 32'h1A110800,
parameter int unsigned DmExceptionAddr = 32'h1A110808,
parameter bit ICache = 1'b0,
parameter bit ICacheECC = 1'b0
) (
input logic clk_i,
input logic rst_ni,
@ -56,6 +58,9 @@ module ibex_if_stage #(
input ibex_pkg::exc_pc_sel_e exc_pc_mux_i, // selects ISR address
input ibex_pkg::exc_cause_e exc_cause, // selects ISR address for
// vectorized interrupt lines
input logic icache_enable_i,
input logic icache_inval_i,
// jump and branch target
input logic [31:0] jump_target_ex_i, // jump target address
@ -134,35 +139,71 @@ module ibex_if_stage #(
// tell CS register file to initialize mtvec on boot
assign csr_mtvec_init_o = (pc_mux_i == PC_BOOT) & pc_set_i;
// prefetch buffer, caches a fixed number of instructions
ibex_prefetch_buffer prefetch_buffer_i (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
if (ICache) begin : gen_icache
// Full I-Cache option
ibex_icache #(
.ICacheECC (ICacheECC)
) icache_i (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req_i ),
.req_i ( req_i ),
.branch_i ( branch_req ),
.addr_i ( {fetch_addr_n[31:1], 1'b0} ),
.branch_i ( branch_req ),
.addr_i ( {fetch_addr_n[31:1], 1'b0} ),
.ready_i ( fetch_ready ),
.valid_o ( fetch_valid ),
.rdata_o ( fetch_rdata ),
.addr_o ( fetch_addr ),
.err_o ( fetch_err ),
.err_plus2_o ( fetch_err_plus2 ),
.ready_i ( fetch_ready ),
.valid_o ( fetch_valid ),
.rdata_o ( fetch_rdata ),
.addr_o ( fetch_addr ),
.err_o ( fetch_err ),
.err_plus2_o ( fetch_err_plus2 ),
// goes to instruction memory / instruction cache
.instr_req_o ( instr_req_o ),
.instr_addr_o ( instr_addr_o ),
.instr_gnt_i ( instr_gnt_i ),
.instr_rvalid_i ( instr_rvalid_i ),
.instr_rdata_i ( instr_rdata_i ),
.instr_err_i ( instr_err_i ),
.instr_pmp_err_i ( instr_pmp_err_i ),
.instr_req_o ( instr_req_o ),
.instr_addr_o ( instr_addr_o ),
.instr_gnt_i ( instr_gnt_i ),
.instr_rvalid_i ( instr_rvalid_i ),
.instr_rdata_i ( instr_rdata_i ),
.instr_err_i ( instr_err_i ),
.instr_pmp_err_i ( instr_pmp_err_i ),
// Prefetch Buffer Status
.busy_o ( prefetch_busy )
);
.icache_enable_i ( icache_enable_i ),
.icache_inval_i ( icache_inval_i ),
.busy_o ( prefetch_busy )
);
end else begin : gen_prefetch_buffer
// prefetch buffer, caches a fixed number of instructions
ibex_prefetch_buffer prefetch_buffer_i (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req_i ),
.branch_i ( branch_req ),
.addr_i ( {fetch_addr_n[31:1], 1'b0} ),
.ready_i ( fetch_ready ),
.valid_o ( fetch_valid ),
.rdata_o ( fetch_rdata ),
.addr_o ( fetch_addr ),
.err_o ( fetch_err ),
.err_plus2_o ( fetch_err_plus2 ),
.instr_req_o ( instr_req_o ),
.instr_addr_o ( instr_addr_o ),
.instr_gnt_i ( instr_gnt_i ),
.instr_rvalid_i ( instr_rvalid_i ),
.instr_rdata_i ( instr_rdata_i ),
.instr_err_i ( instr_err_i ),
.instr_pmp_err_i ( instr_pmp_err_i ),
.busy_o ( prefetch_busy )
);
// ICache tieoffs
logic unused_icen, unused_icinv;
assign unused_icen = icache_enable_i;
assign unused_icinv = icache_inval_i;
end
assign branch_req = pc_set_i;
assign fetch_ready = id_in_ready_i;

View file

@ -389,7 +389,8 @@ typedef enum logic[11:0] {
CSR_MHPMCOUNTER28H = 12'hB9C,
CSR_MHPMCOUNTER29H = 12'hB9D,
CSR_MHPMCOUNTER30H = 12'hB9E,
CSR_MHPMCOUNTER31H = 12'hB9F
CSR_MHPMCOUNTER31H = 12'hB9F,
CSR_CPUCTRL = 12'h7C0
} csr_num_e;
// CSR pmp-related offsets