mirror of
https://github.com/lowRISC/ibex.git
synced 2025-06-28 09:17:17 -04:00
This refactors the invalidation control logic into an explicit state machine. The top-level icache_invalid_o signal is also removed. Replaced with an explicit scramble key request instead. This has all been done to better deal with corner cases around a new invalidation being requested whilst another is still going on. Previously there was a bug wher an invalidation request in the final cycle of an ongoing invalidation didn't restart the invalidation but did rotate the scrambling key producing an ECC failure and an alert.
1204 lines
49 KiB
Systemverilog
1204 lines
49 KiB
Systemverilog
// Copyright lowRISC contributors.
|
|
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
/**
|
|
* Instruction cache
|
|
*
|
|
* Provides an instruction cache along with cache management, instruction buffering and prefetching
|
|
*/
|
|
|
|
`include "prim_assert.sv"
|
|
|
|
module ibex_icache import ibex_pkg::*; #(
|
|
parameter bit ICacheECC = 1'b0,
|
|
parameter bit ResetAll = 1'b0,
|
|
parameter int unsigned BusSizeECC = BUS_SIZE,
|
|
parameter int unsigned TagSizeECC = IC_TAG_SIZE,
|
|
parameter int unsigned LineSizeECC = IC_LINE_SIZE,
|
|
// Only cache branch targets
|
|
parameter bit BranchCache = 1'b0
|
|
) (
|
|
// Clock and reset
|
|
input logic clk_i,
|
|
input logic rst_ni,
|
|
|
|
// Signal that the core would like instructions
|
|
input logic req_i,
|
|
|
|
// Set the cache's address counter
|
|
input logic branch_i,
|
|
input logic [31:0] addr_i,
|
|
|
|
// IF stage interface: Pass fetched instructions to the core
|
|
input logic ready_i,
|
|
output logic valid_o,
|
|
output logic [31:0] rdata_o,
|
|
output logic [31:0] addr_o,
|
|
output logic err_o,
|
|
output logic err_plus2_o,
|
|
|
|
// Instruction memory / interconnect interface: Fetch instruction data from memory
|
|
output logic instr_req_o,
|
|
input logic instr_gnt_i,
|
|
output logic [31:0] instr_addr_o,
|
|
input logic [BUS_SIZE-1:0] instr_rdata_i,
|
|
input logic instr_err_i,
|
|
input logic instr_rvalid_i,
|
|
|
|
// RAM IO
|
|
output logic [IC_NUM_WAYS-1:0] ic_tag_req_o,
|
|
output logic ic_tag_write_o,
|
|
output logic [IC_INDEX_W-1:0] ic_tag_addr_o,
|
|
output logic [TagSizeECC-1:0] ic_tag_wdata_o,
|
|
input logic [TagSizeECC-1:0] ic_tag_rdata_i [IC_NUM_WAYS],
|
|
output logic [IC_NUM_WAYS-1:0] ic_data_req_o,
|
|
output logic ic_data_write_o,
|
|
output logic [IC_INDEX_W-1:0] ic_data_addr_o,
|
|
output logic [LineSizeECC-1:0] ic_data_wdata_o,
|
|
input logic [LineSizeECC-1:0] ic_data_rdata_i [IC_NUM_WAYS],
|
|
input logic ic_scr_key_valid_i,
|
|
output logic ic_scr_key_req_o,
|
|
|
|
// Cache status
|
|
input logic icache_enable_i,
|
|
input logic icache_inval_i,
|
|
output logic busy_o,
|
|
output logic ecc_error_o
|
|
);
|
|
|
|
// Number of fill buffers (must be >= 2)
|
|
localparam int unsigned NUM_FB = 4;
|
|
// Request throttling threshold
|
|
localparam int unsigned FB_THRESHOLD = NUM_FB - 2;
|
|
|
|
// Prefetch signals
|
|
logic [ADDR_W-1:0] lookup_addr_aligned;
|
|
logic [ADDR_W-1:0] prefetch_addr_d, prefetch_addr_q;
|
|
logic prefetch_addr_en;
|
|
// Cache pipelipe IC0 signals
|
|
logic lookup_throttle;
|
|
logic lookup_req_ic0;
|
|
logic [ADDR_W-1:0] lookup_addr_ic0;
|
|
logic [IC_INDEX_W-1:0] lookup_index_ic0;
|
|
logic fill_req_ic0;
|
|
logic [IC_INDEX_W-1:0] fill_index_ic0;
|
|
logic [IC_TAG_SIZE-1:0] fill_tag_ic0;
|
|
logic [IC_LINE_SIZE-1:0] fill_wdata_ic0;
|
|
logic lookup_grant_ic0;
|
|
logic lookup_actual_ic0;
|
|
logic fill_grant_ic0;
|
|
logic tag_req_ic0;
|
|
logic [IC_INDEX_W-1:0] tag_index_ic0;
|
|
logic [IC_NUM_WAYS-1:0] tag_banks_ic0;
|
|
logic tag_write_ic0;
|
|
logic [TagSizeECC-1:0] tag_wdata_ic0;
|
|
logic data_req_ic0;
|
|
logic [IC_INDEX_W-1:0] data_index_ic0;
|
|
logic [IC_NUM_WAYS-1:0] data_banks_ic0;
|
|
logic data_write_ic0;
|
|
logic [LineSizeECC-1:0] data_wdata_ic0;
|
|
// Cache pipelipe IC1 signals
|
|
logic [TagSizeECC-1:0] tag_rdata_ic1 [IC_NUM_WAYS];
|
|
logic [LineSizeECC-1:0] data_rdata_ic1 [IC_NUM_WAYS];
|
|
logic [LineSizeECC-1:0] hit_data_ecc_ic1;
|
|
logic [IC_LINE_SIZE-1:0] hit_data_ic1;
|
|
logic lookup_valid_ic1;
|
|
logic [ADDR_W-1:IC_INDEX_HI+1] lookup_addr_ic1;
|
|
logic [IC_NUM_WAYS-1:0] tag_match_ic1;
|
|
logic tag_hit_ic1;
|
|
logic [IC_NUM_WAYS-1:0] tag_invalid_ic1;
|
|
logic [IC_NUM_WAYS-1:0] lowest_invalid_way_ic1;
|
|
logic [IC_NUM_WAYS-1:0] round_robin_way_ic1, round_robin_way_q;
|
|
logic [IC_NUM_WAYS-1:0] sel_way_ic1;
|
|
logic ecc_err_ic1;
|
|
logic ecc_write_req;
|
|
logic [IC_NUM_WAYS-1:0] ecc_write_ways;
|
|
logic [IC_INDEX_W-1:0] ecc_write_index;
|
|
// Fill buffer signals
|
|
logic [$clog2(NUM_FB)-1:0] fb_fill_level;
|
|
logic fill_cache_new;
|
|
logic fill_new_alloc;
|
|
logic fill_spec_req, fill_spec_done, fill_spec_hold;
|
|
logic [NUM_FB-1:0][NUM_FB-1:0] fill_older_d, fill_older_q;
|
|
logic [NUM_FB-1:0] fill_alloc_sel, fill_alloc;
|
|
logic [NUM_FB-1:0] fill_busy_d, fill_busy_q;
|
|
logic [NUM_FB-1:0] fill_done;
|
|
logic [NUM_FB-1:0] fill_in_ic1;
|
|
logic [NUM_FB-1:0] fill_stale_d, fill_stale_q;
|
|
logic [NUM_FB-1:0] fill_cache_d, fill_cache_q;
|
|
logic [NUM_FB-1:0] fill_hit_ic1, fill_hit_d, fill_hit_q;
|
|
logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_ext_cnt_d, fill_ext_cnt_q;
|
|
logic [NUM_FB-1:0] fill_ext_hold_d, fill_ext_hold_q;
|
|
logic [NUM_FB-1:0] fill_ext_done_d, fill_ext_done_q;
|
|
logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_rvd_cnt_d, fill_rvd_cnt_q;
|
|
logic [NUM_FB-1:0] fill_rvd_done;
|
|
logic [NUM_FB-1:0] fill_ram_done_d, fill_ram_done_q;
|
|
logic [NUM_FB-1:0] fill_out_grant;
|
|
logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_out_cnt_d, fill_out_cnt_q;
|
|
logic [NUM_FB-1:0] fill_out_done;
|
|
logic [NUM_FB-1:0] fill_ext_req, fill_rvd_exp, fill_ram_req, fill_out_req;
|
|
logic [NUM_FB-1:0] fill_data_sel, fill_data_reg;
|
|
logic [NUM_FB-1:0] fill_data_hit, fill_data_rvd;
|
|
logic [NUM_FB-1:0][IC_LINE_BEATS_W-1:0] fill_ext_off, fill_rvd_off;
|
|
logic [NUM_FB-1:0][IC_LINE_BEATS_W:0] fill_ext_beat, fill_rvd_beat;
|
|
logic [NUM_FB-1:0] fill_ext_arb, fill_ram_arb, fill_out_arb;
|
|
logic [NUM_FB-1:0] fill_rvd_arb;
|
|
logic [NUM_FB-1:0] fill_entry_en;
|
|
logic [NUM_FB-1:0] fill_addr_en;
|
|
logic [NUM_FB-1:0] fill_way_en;
|
|
logic [NUM_FB-1:0][IC_LINE_BEATS-1:0] fill_data_en;
|
|
logic [NUM_FB-1:0][IC_LINE_BEATS-1:0] fill_err_d, fill_err_q;
|
|
logic [ADDR_W-1:0] fill_addr_q [NUM_FB];
|
|
logic [IC_NUM_WAYS-1:0] fill_way_q [NUM_FB];
|
|
logic [IC_LINE_SIZE-1:0] fill_data_d [NUM_FB];
|
|
logic [IC_LINE_SIZE-1:0] fill_data_q [NUM_FB];
|
|
logic [ADDR_W-1:BUS_W] fill_ext_req_addr;
|
|
logic [ADDR_W-1:0] fill_ram_req_addr;
|
|
logic [IC_NUM_WAYS-1:0] fill_ram_req_way;
|
|
logic [IC_LINE_SIZE-1:0] fill_ram_req_data;
|
|
logic [IC_LINE_SIZE-1:0] fill_out_data;
|
|
logic [IC_LINE_BEATS-1:0] fill_out_err;
|
|
// External req signals
|
|
logic instr_req;
|
|
logic [ADDR_W-1:BUS_W] instr_addr;
|
|
// Data output signals
|
|
logic skid_complete_instr;
|
|
logic skid_ready;
|
|
logic output_compressed;
|
|
logic skid_valid_d, skid_valid_q, skid_en;
|
|
logic [15:0] skid_data_d, skid_data_q;
|
|
logic skid_err_q;
|
|
logic output_valid;
|
|
logic addr_incr_two;
|
|
logic output_addr_en;
|
|
logic [ADDR_W-1:1] output_addr_incr;
|
|
logic [ADDR_W-1:1] output_addr_d, output_addr_q;
|
|
logic [15:0] output_data_lo, output_data_hi;
|
|
logic data_valid, output_ready;
|
|
logic [IC_LINE_SIZE-1:0] line_data;
|
|
logic [IC_LINE_BEATS-1:0] line_err;
|
|
logic [31:0] line_data_muxed;
|
|
logic line_err_muxed;
|
|
logic [31:0] output_data;
|
|
logic output_err;
|
|
// Invalidations
|
|
typedef enum logic [1:0] {
|
|
OUT_OF_RESET,
|
|
AWAIT_SCRAMBLE_KEY,
|
|
INVAL_CACHE,
|
|
IDLE
|
|
} inval_state_e;
|
|
|
|
inval_state_e inval_state_q, inval_state_d;
|
|
logic inval_write_req;
|
|
logic inval_block_cache;
|
|
logic [IC_INDEX_W-1:0] inval_index_d, inval_index_q;
|
|
logic inval_index_en;
|
|
logic inval_active;
|
|
|
|
//////////////////////////
|
|
// Instruction prefetch //
|
|
//////////////////////////
|
|
|
|
assign lookup_addr_aligned = {lookup_addr_ic0[ADDR_W-1:IC_LINE_W], {IC_LINE_W{1'b0}}};
|
|
|
|
// The prefetch address increments by one cache line for each granted request.
|
|
// This address is also updated if there is a branch that is not granted, since the target
|
|
// address (addr_i) is only valid for one cycle while branch_i is high.
|
|
|
|
// The captured branch target address is not forced to be aligned since the offset in the cache
|
|
// line must also be recorded for later use by the fill buffers.
|
|
assign prefetch_addr_d =
|
|
lookup_grant_ic0 ? (lookup_addr_aligned +
|
|
{{ADDR_W-IC_LINE_W-1{1'b0}}, 1'b1, {IC_LINE_W{1'b0}}}) :
|
|
addr_i;
|
|
|
|
assign prefetch_addr_en = branch_i | lookup_grant_ic0;
|
|
|
|
if (ResetAll) begin : g_prefetch_addr_ra
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
prefetch_addr_q <= '0;
|
|
end else if (prefetch_addr_en) begin
|
|
prefetch_addr_q <= prefetch_addr_d;
|
|
end
|
|
end
|
|
end else begin : g_prefetch_addr_nr
|
|
always_ff @(posedge clk_i) begin
|
|
if (prefetch_addr_en) begin
|
|
prefetch_addr_q <= prefetch_addr_d;
|
|
end
|
|
end
|
|
end
|
|
|
|
////////////////////////
|
|
// Pipeline stage IC0 //
|
|
////////////////////////
|
|
|
|
// Cache lookup
|
|
assign lookup_throttle = (fb_fill_level > FB_THRESHOLD[$clog2(NUM_FB)-1:0]);
|
|
|
|
assign lookup_req_ic0 = req_i & ~&fill_busy_q & (branch_i | ~lookup_throttle) &
|
|
~ecc_write_req;
|
|
assign lookup_addr_ic0 = branch_i ? addr_i : prefetch_addr_q;
|
|
assign lookup_index_ic0 = lookup_addr_ic0[IC_INDEX_HI:IC_LINE_W];
|
|
|
|
// Cache write
|
|
assign fill_req_ic0 = (|fill_ram_req);
|
|
assign fill_index_ic0 = fill_ram_req_addr[IC_INDEX_HI:IC_LINE_W];
|
|
assign fill_tag_ic0 = {(~inval_write_req & ~ecc_write_req),
|
|
fill_ram_req_addr[ADDR_W-1:IC_INDEX_HI+1]};
|
|
assign fill_wdata_ic0 = fill_ram_req_data;
|
|
|
|
// Arbitrated signals - lookups have highest priority
|
|
assign lookup_grant_ic0 = lookup_req_ic0;
|
|
assign fill_grant_ic0 = fill_req_ic0 & ~lookup_req_ic0 & ~inval_write_req &
|
|
~ecc_write_req;
|
|
// Qualified lookup grant to mask ram signals in IC1 if access was not made
|
|
assign lookup_actual_ic0 = lookup_grant_ic0 & icache_enable_i & ~inval_block_cache;
|
|
|
|
// Tagram
|
|
assign tag_req_ic0 = lookup_req_ic0 | fill_req_ic0 | inval_write_req | ecc_write_req;
|
|
assign tag_index_ic0 = inval_write_req ? inval_index_q :
|
|
ecc_write_req ? ecc_write_index :
|
|
fill_grant_ic0 ? fill_index_ic0 :
|
|
lookup_index_ic0;
|
|
assign tag_banks_ic0 = ecc_write_req ? ecc_write_ways :
|
|
fill_grant_ic0 ? fill_ram_req_way :
|
|
{IC_NUM_WAYS{1'b1}};
|
|
assign tag_write_ic0 = fill_grant_ic0 | inval_write_req | ecc_write_req;
|
|
|
|
// Dataram
|
|
assign data_req_ic0 = lookup_req_ic0 | fill_req_ic0;
|
|
assign data_index_ic0 = tag_index_ic0;
|
|
assign data_banks_ic0 = tag_banks_ic0;
|
|
assign data_write_ic0 = tag_write_ic0;
|
|
|
|
// Append ECC checkbits to write data if required
|
|
if (ICacheECC) begin : gen_ecc_wdata
|
|
// SEC_CM: ICACHE.MEM.INTEGRITY
|
|
// Tagram ECC
|
|
// Reuse the same ecc encoding module for larger cache sizes by padding with zeros
|
|
logic [21:0] tag_ecc_input_padded;
|
|
logic [27:0] tag_ecc_output_padded;
|
|
logic [22-IC_TAG_SIZE:0] unused_tag_ecc_output;
|
|
|
|
assign tag_ecc_input_padded = {{22-IC_TAG_SIZE{1'b0}},fill_tag_ic0};
|
|
assign unused_tag_ecc_output = tag_ecc_output_padded[21:IC_TAG_SIZE-1];
|
|
|
|
prim_secded_inv_28_22_enc tag_ecc_enc (
|
|
.data_i (tag_ecc_input_padded),
|
|
.data_o (tag_ecc_output_padded)
|
|
);
|
|
|
|
assign tag_wdata_ic0 = {tag_ecc_output_padded[27:22],tag_ecc_output_padded[IC_TAG_SIZE-1:0]};
|
|
|
|
// Dataram ECC
|
|
for (genvar bank = 0; bank < IC_LINE_BEATS; bank++) begin : gen_ecc_banks
|
|
prim_secded_inv_39_32_enc data_ecc_enc (
|
|
.data_i (fill_wdata_ic0[bank*BUS_SIZE+:BUS_SIZE]),
|
|
.data_o (data_wdata_ic0[bank*BusSizeECC+:BusSizeECC])
|
|
);
|
|
end
|
|
|
|
end else begin : gen_noecc_wdata
|
|
assign tag_wdata_ic0 = fill_tag_ic0;
|
|
assign data_wdata_ic0 = fill_wdata_ic0;
|
|
end
|
|
|
|
////////////////
|
|
// IC0 -> IC1 //
|
|
////////////////
|
|
|
|
// Tag RAMs outputs
|
|
assign ic_tag_req_o = {IC_NUM_WAYS{tag_req_ic0}} & tag_banks_ic0;
|
|
assign ic_tag_write_o = tag_write_ic0;
|
|
assign ic_tag_addr_o = tag_index_ic0;
|
|
assign ic_tag_wdata_o = tag_wdata_ic0;
|
|
|
|
// Tag RAMs inputs
|
|
assign tag_rdata_ic1 = ic_tag_rdata_i;
|
|
|
|
// Data RAMs outputs
|
|
assign ic_data_req_o = {IC_NUM_WAYS{data_req_ic0}} & data_banks_ic0;
|
|
assign ic_data_write_o = data_write_ic0;
|
|
assign ic_data_addr_o = data_index_ic0;
|
|
assign ic_data_wdata_o = data_wdata_ic0;
|
|
|
|
// Data RAMs inputs
|
|
assign data_rdata_ic1 = ic_data_rdata_i;
|
|
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
lookup_valid_ic1 <= 1'b0;
|
|
end else begin
|
|
lookup_valid_ic1 <= lookup_actual_ic0;
|
|
end
|
|
end
|
|
|
|
if (ResetAll) begin : g_lookup_addr_ra
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
lookup_addr_ic1 <= '0;
|
|
fill_in_ic1 <= '0;
|
|
end else if (lookup_grant_ic0) begin
|
|
lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:IC_INDEX_HI+1];
|
|
fill_in_ic1 <= fill_alloc_sel;
|
|
end
|
|
end
|
|
end else begin : g_lookup_addr_nr
|
|
always_ff @(posedge clk_i) begin
|
|
if (lookup_grant_ic0) begin
|
|
lookup_addr_ic1 <= lookup_addr_ic0[ADDR_W-1:IC_INDEX_HI+1];
|
|
fill_in_ic1 <= fill_alloc_sel;
|
|
end
|
|
end
|
|
end
|
|
|
|
////////////////////////
|
|
// Pipeline stage IC1 //
|
|
////////////////////////
|
|
|
|
// Tag matching
|
|
for (genvar way = 0; way < IC_NUM_WAYS; way++) begin : gen_tag_match
|
|
assign tag_match_ic1[way] = (tag_rdata_ic1[way][IC_TAG_SIZE-1:0] ==
|
|
{1'b1,lookup_addr_ic1[ADDR_W-1:IC_INDEX_HI+1]});
|
|
assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][IC_TAG_SIZE-1];
|
|
end
|
|
|
|
assign tag_hit_ic1 = |tag_match_ic1;
|
|
|
|
// Hit data mux
|
|
always_comb begin
|
|
hit_data_ecc_ic1 = 'b0;
|
|
for (int way = 0; way < IC_NUM_WAYS; way++) begin
|
|
if (tag_match_ic1[way]) begin
|
|
hit_data_ecc_ic1 |= data_rdata_ic1[way];
|
|
end
|
|
end
|
|
end
|
|
|
|
// Way selection for allocations to the cache (onehot signals)
|
|
// 1 first invalid way
|
|
// 2 global round-robin (pseudorandom) way
|
|
assign lowest_invalid_way_ic1[0] = tag_invalid_ic1[0];
|
|
assign round_robin_way_ic1[0] = round_robin_way_q[IC_NUM_WAYS-1];
|
|
for (genvar way = 1; way < IC_NUM_WAYS; way++) begin : gen_lowest_way
|
|
assign lowest_invalid_way_ic1[way] = tag_invalid_ic1[way] & ~|tag_invalid_ic1[way-1:0];
|
|
assign round_robin_way_ic1[way] = round_robin_way_q[way-1];
|
|
end
|
|
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
round_robin_way_q <= {{IC_NUM_WAYS-1{1'b0}}, 1'b1};
|
|
end else if (lookup_valid_ic1) begin
|
|
round_robin_way_q <= round_robin_way_ic1;
|
|
end
|
|
end
|
|
|
|
assign sel_way_ic1 = |tag_invalid_ic1 ? lowest_invalid_way_ic1 :
|
|
round_robin_way_q;
|
|
|
|
// ECC checking logic
|
|
if (ICacheECC) begin : gen_data_ecc_checking
|
|
// SEC_CM: ICACHE.MEM.INTEGRITY
|
|
logic [IC_NUM_WAYS-1:0] tag_err_ic1;
|
|
logic [IC_LINE_BEATS*2-1:0] data_err_ic1;
|
|
logic ecc_correction_write_d, ecc_correction_write_q;
|
|
logic [IC_NUM_WAYS-1:0] ecc_correction_ways_d, ecc_correction_ways_q;
|
|
logic [IC_INDEX_W-1:0] lookup_index_ic1, ecc_correction_index_q;
|
|
|
|
// Tag ECC checking
|
|
for (genvar way = 0; way < IC_NUM_WAYS; way++) begin : gen_tag_ecc
|
|
logic [1:0] tag_err_bank_ic1;
|
|
logic [27:0] tag_rdata_padded_ic1;
|
|
|
|
// Expand the tag rdata with extra padding if the tag size is less than the maximum
|
|
assign tag_rdata_padded_ic1 = {tag_rdata_ic1[way][TagSizeECC-1-:6],
|
|
{22-IC_TAG_SIZE{1'b0}},
|
|
tag_rdata_ic1[way][IC_TAG_SIZE-1:0]};
|
|
|
|
prim_secded_inv_28_22_dec data_ecc_dec (
|
|
.data_i (tag_rdata_padded_ic1),
|
|
.data_o (),
|
|
.syndrome_o (),
|
|
.err_o (tag_err_bank_ic1)
|
|
);
|
|
assign tag_err_ic1[way] = |tag_err_bank_ic1;
|
|
end
|
|
|
|
// Data ECC checking
|
|
// Note - could generate for all ways and mux after
|
|
for (genvar bank = 0; bank < IC_LINE_BEATS; bank++) begin : gen_ecc_banks
|
|
prim_secded_inv_39_32_dec data_ecc_dec (
|
|
.data_i (hit_data_ecc_ic1[bank*BusSizeECC+:BusSizeECC]),
|
|
.data_o (),
|
|
.syndrome_o (),
|
|
.err_o (data_err_ic1[bank*2+:2])
|
|
);
|
|
|
|
assign hit_data_ic1[bank*BUS_SIZE+:BUS_SIZE] =
|
|
hit_data_ecc_ic1[bank*BusSizeECC+:BUS_SIZE];
|
|
|
|
end
|
|
|
|
// Tag ECC across all ways is always expected to be correct so the check does not need to be
|
|
// qualified by hit or tag valid. Initial (invalid with correct ECC) tags are written on reset
|
|
// and all further tag writes produce correct ECC. For data ECC no initialisation is done on
|
|
// reset so unused data (in particular those ways that don't have a valid tag) may have
|
|
// incorrect ECC. We only check data ECC where tags indicate it is valid and we have hit on it.
|
|
assign ecc_err_ic1 = lookup_valid_ic1 & (((|data_err_ic1) & tag_hit_ic1) | (|tag_err_ic1));
|
|
|
|
// Error correction
|
|
// All ways will be invalidated on a tag error to prevent X-propagation from data_err_ic1 on
|
|
// spurious hits. Also prevents the same line being allocated twice when there was a true
|
|
// hit and a spurious hit.
|
|
assign ecc_correction_ways_d = {IC_NUM_WAYS{|tag_err_ic1}} |
|
|
(tag_match_ic1 & {IC_NUM_WAYS{|data_err_ic1}});
|
|
assign ecc_correction_write_d = ecc_err_ic1;
|
|
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
ecc_correction_write_q <= 1'b0;
|
|
end else begin
|
|
ecc_correction_write_q <= ecc_correction_write_d;
|
|
end
|
|
end
|
|
|
|
// The index is required in IC1 only when ECC is configured so is registered here
|
|
if (ResetAll) begin : g_lookup_ind_ra
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
lookup_index_ic1 <= '0;
|
|
end else if (lookup_grant_ic0) begin
|
|
lookup_index_ic1 <= lookup_addr_ic0[IC_INDEX_HI-:IC_INDEX_W];
|
|
end
|
|
end
|
|
end else begin : g_lookup_ind_nr
|
|
always_ff @(posedge clk_i) begin
|
|
if (lookup_grant_ic0) begin
|
|
lookup_index_ic1 <= lookup_addr_ic0[IC_INDEX_HI-:IC_INDEX_W];
|
|
end
|
|
end
|
|
end
|
|
|
|
// Store the ways with errors to be invalidated
|
|
if (ResetAll) begin : g_ecc_correction_ra
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
ecc_correction_ways_q <= '0;
|
|
ecc_correction_index_q <= '0;
|
|
end else if (ecc_err_ic1) begin
|
|
ecc_correction_ways_q <= ecc_correction_ways_d;
|
|
ecc_correction_index_q <= lookup_index_ic1;
|
|
end
|
|
end
|
|
end else begin : g_ecc_correction_nr
|
|
always_ff @(posedge clk_i) begin
|
|
if (ecc_err_ic1) begin
|
|
ecc_correction_ways_q <= ecc_correction_ways_d;
|
|
ecc_correction_index_q <= lookup_index_ic1;
|
|
end
|
|
end
|
|
end
|
|
|
|
assign ecc_write_req = ecc_correction_write_q;
|
|
assign ecc_write_ways = ecc_correction_ways_q;
|
|
assign ecc_write_index = ecc_correction_index_q;
|
|
|
|
assign ecc_error_o = ecc_err_ic1;
|
|
end else begin : gen_no_data_ecc
|
|
assign ecc_err_ic1 = 1'b0;
|
|
assign ecc_write_req = 1'b0;
|
|
assign ecc_write_ways = '0;
|
|
assign ecc_write_index = '0;
|
|
assign hit_data_ic1 = hit_data_ecc_ic1;
|
|
|
|
assign ecc_error_o = 1'b0;
|
|
end
|
|
|
|
///////////////////////////////
|
|
// Cache allocation decision //
|
|
///////////////////////////////
|
|
|
|
if (BranchCache) begin : gen_caching_logic
|
|
|
|
// Cache branch target + a number of subsequent lines
|
|
localparam int unsigned CACHE_AHEAD = 2;
|
|
localparam int unsigned CACHE_CNT_W = (CACHE_AHEAD == 1) ? 1 : $clog2(CACHE_AHEAD) + 1;
|
|
logic cache_cnt_dec;
|
|
logic [CACHE_CNT_W-1:0] cache_cnt_d, cache_cnt_q;
|
|
|
|
assign cache_cnt_dec = lookup_grant_ic0 & (|cache_cnt_q);
|
|
assign cache_cnt_d = branch_i ? CACHE_AHEAD[CACHE_CNT_W-1:0] :
|
|
(cache_cnt_q - {{CACHE_CNT_W-1{1'b0}},cache_cnt_dec});
|
|
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
cache_cnt_q <= '0;
|
|
end else begin
|
|
cache_cnt_q <= cache_cnt_d;
|
|
end
|
|
end
|
|
|
|
assign fill_cache_new = (branch_i | (|cache_cnt_q)) & icache_enable_i & ~inval_block_cache;
|
|
end else begin : gen_cache_all
|
|
|
|
// Cache all missing fetches
|
|
assign fill_cache_new = icache_enable_i & ~inval_block_cache;
|
|
end
|
|
|
|
//////////////////////////
|
|
// Fill buffer tracking //
|
|
//////////////////////////
|
|
|
|
always_comb begin
|
|
fb_fill_level = '0;
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
if (fill_busy_q[i] & ~fill_stale_q[i]) begin
|
|
fb_fill_level += {{$clog2(NUM_FB) - 1{1'b0}}, 1'b1};
|
|
end
|
|
end
|
|
end
|
|
|
|
// Allocate a new buffer for every granted lookup
|
|
assign fill_new_alloc = lookup_grant_ic0;
|
|
// Track whether a speculative external request was made from IC0, and whether it was granted
|
|
// Speculative requests are only made for branches, or if the cache is disabled
|
|
assign fill_spec_req = (~icache_enable_i | branch_i) & ~|fill_ext_req;
|
|
assign fill_spec_done = fill_spec_req & instr_gnt_i;
|
|
assign fill_spec_hold = fill_spec_req & ~instr_gnt_i;
|
|
|
|
for (genvar fb = 0; fb < NUM_FB; fb++) begin : gen_fbs
|
|
|
|
/////////////////////////////
|
|
// Fill buffer allocations //
|
|
/////////////////////////////
|
|
|
|
// Allocate the lowest available buffer
|
|
if (fb == 0) begin : gen_fb_zero
|
|
assign fill_alloc_sel[fb] = ~fill_busy_q[fb];
|
|
end else begin : gen_fb_rest
|
|
assign fill_alloc_sel[fb] = ~fill_busy_q[fb] & (&fill_busy_q[fb-1:0]);
|
|
end
|
|
|
|
assign fill_alloc[fb] = fill_alloc_sel[fb] & fill_new_alloc;
|
|
assign fill_busy_d[fb] = fill_alloc[fb] | (fill_busy_q[fb] & ~fill_done[fb]);
|
|
|
|
// Track which other fill buffers are older than this one (for age-based arbitration)
|
|
// TODO sparsify
|
|
assign fill_older_d[fb] = (fill_alloc[fb] ? fill_busy_q : fill_older_q[fb]) & ~fill_done;
|
|
|
|
// A fill buffer can release once all its actions are completed
|
|
// all data written to the cache (unless hit or error)
|
|
assign fill_done[fb] = (fill_ram_done_q[fb] | fill_hit_q[fb] | ~fill_cache_q[fb] |
|
|
(|fill_err_q[fb])) &
|
|
// all data output unless stale due to intervening branch
|
|
(fill_out_done[fb] | fill_stale_q[fb] | branch_i) &
|
|
// all external requests completed
|
|
fill_rvd_done[fb];
|
|
|
|
/////////////////////////////////
|
|
// Fill buffer status tracking //
|
|
/////////////////////////////////
|
|
|
|
// Track staleness (requests become stale when a branch intervenes)
|
|
assign fill_stale_d[fb] = fill_busy_q[fb] & (branch_i | fill_stale_q[fb]);
|
|
// Track whether or not this request should allocate to the cache
|
|
// Any invalidation or disabling of the cache while the buffer is busy will stop allocation
|
|
assign fill_cache_d[fb] = (fill_alloc[fb] & fill_cache_new) |
|
|
(fill_cache_q[fb] & fill_busy_q[fb] &
|
|
icache_enable_i & ~icache_inval_i);
|
|
// Record whether the request hit in the cache
|
|
assign fill_hit_ic1[fb] = lookup_valid_ic1 & fill_in_ic1[fb] & tag_hit_ic1 & ~ecc_err_ic1;
|
|
assign fill_hit_d[fb] = fill_hit_ic1[fb] | (fill_hit_q[fb] & fill_busy_q[fb]);
|
|
|
|
///////////////////////////////////////////
|
|
// Fill buffer external request tracking //
|
|
///////////////////////////////////////////
|
|
|
|
// Make an external request
|
|
assign fill_ext_req[fb] = fill_busy_q[fb] & ~fill_ext_done_d[fb];
|
|
|
|
// Count the number of completed external requests (each line requires IC_LINE_BEATS requests)
|
|
assign fill_ext_cnt_d[fb] = fill_alloc[fb] ?
|
|
{{IC_LINE_BEATS_W{1'b0}},fill_spec_done} :
|
|
(fill_ext_cnt_q[fb] + {{IC_LINE_BEATS_W{1'b0}},
|
|
fill_ext_arb[fb] & instr_gnt_i});
|
|
// External request must be held until granted
|
|
assign fill_ext_hold_d[fb] = (fill_alloc[fb] & fill_spec_hold) |
|
|
(fill_ext_arb[fb] & ~instr_gnt_i);
|
|
// External requests are completed when the counter is filled or when the request is cancelled
|
|
assign fill_ext_done_d[fb] = (fill_ext_cnt_q[fb][IC_LINE_BEATS_W] |
|
|
// external requests are considered complete if the request hit
|
|
fill_hit_ic1[fb] | fill_hit_q[fb] |
|
|
// cancel if the line won't be cached and, it is stale
|
|
(~fill_cache_q[fb] & (branch_i | fill_stale_q[fb] |
|
|
// or we're already at the end of the line
|
|
fill_ext_beat[fb][IC_LINE_BEATS_W]))) &
|
|
// can't cancel while we are waiting for a grant on the bus
|
|
~fill_ext_hold_q[fb] & fill_busy_q[fb];
|
|
// Track whether this fill buffer expects to receive beats of data
|
|
assign fill_rvd_exp[fb] = fill_busy_q[fb] & ~fill_rvd_done[fb];
|
|
// Count the number of rvalid beats received
|
|
assign fill_rvd_cnt_d[fb] = fill_alloc[fb] ? '0 :
|
|
(fill_rvd_cnt_q[fb] +
|
|
{{IC_LINE_BEATS_W{1'b0}},fill_rvd_arb[fb]});
|
|
// External data is complete when all issued external requests have received their data
|
|
assign fill_rvd_done[fb] = (fill_ext_done_q[fb] & ~fill_ext_hold_q[fb]) &
|
|
(fill_rvd_cnt_q[fb] == fill_ext_cnt_q[fb]);
|
|
|
|
//////////////////////////////////////
|
|
// Fill buffer data output tracking //
|
|
//////////////////////////////////////
|
|
|
|
// Send data to the IF stage for requests that are not stale, have not completed their
|
|
// data output, and have data available to send.
|
|
// Data is available if:
|
|
// - The request hit in the cache
|
|
// - Buffered data is available (fill_rvd_cnt_q is ahead of fill_out_cnt_q)
|
|
// - Data is available from the bus this cycle (fill_rvd_arb)
|
|
assign fill_out_req[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
|
|
(fill_hit_ic1[fb] | fill_hit_q[fb] |
|
|
(fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_rvd_arb[fb]);
|
|
|
|
// Calculate when a beat of data is output. Any ECC error squashes the output that cycle.
|
|
assign fill_out_grant[fb] = fill_out_arb[fb] & output_ready;
|
|
|
|
// Count the beats of data output to the IF stage
|
|
assign fill_out_cnt_d[fb] = fill_alloc[fb] ? {1'b0,lookup_addr_ic0[IC_LINE_W-1:BUS_W]} :
|
|
(fill_out_cnt_q[fb] +
|
|
{{IC_LINE_BEATS_W{1'b0}},fill_out_grant[fb]});
|
|
// Data output complete when the counter fills
|
|
assign fill_out_done[fb] = fill_out_cnt_q[fb][IC_LINE_BEATS_W];
|
|
|
|
//////////////////////////////////////
|
|
// Fill buffer ram request tracking //
|
|
//////////////////////////////////////
|
|
|
|
// make a fill request once all data beats received
|
|
assign fill_ram_req[fb] = fill_busy_q[fb] & fill_rvd_cnt_q[fb][IC_LINE_BEATS_W] &
|
|
// unless the request hit, was non-allocating or got an error
|
|
~fill_hit_q[fb] & fill_cache_q[fb] & ~|fill_err_q[fb] &
|
|
// or the request was already completed
|
|
~fill_ram_done_q[fb];
|
|
|
|
// Record when a cache allocation request has been completed
|
|
assign fill_ram_done_d[fb] = fill_ram_arb[fb] | (fill_ram_done_q[fb] & fill_busy_q[fb]);
|
|
|
|
//////////////////////////////
|
|
// Fill buffer line offsets //
|
|
//////////////////////////////
|
|
|
|
// When we branch into the middle of a line, the output count will not start from zero. This
|
|
// beat count is used to know which incoming rdata beats are relevant.
|
|
assign fill_ext_beat[fb] = {1'b0,fill_addr_q[fb][IC_LINE_W-1:BUS_W]} +
|
|
fill_ext_cnt_q[fb][IC_LINE_BEATS_W:0];
|
|
assign fill_ext_off[fb] = fill_ext_beat[fb][IC_LINE_BEATS_W-1:0];
|
|
assign fill_rvd_beat[fb] = {1'b0,fill_addr_q[fb][IC_LINE_W-1:BUS_W]} +
|
|
fill_rvd_cnt_q[fb][IC_LINE_BEATS_W:0];
|
|
assign fill_rvd_off[fb] = fill_rvd_beat[fb][IC_LINE_BEATS_W-1:0];
|
|
|
|
/////////////////////////////
|
|
// Fill buffer arbitration //
|
|
/////////////////////////////
|
|
|
|
// Age based arbitration - all these signals are one-hot
|
|
assign fill_ext_arb[fb] = fill_ext_req[fb] & ~|(fill_ext_req & fill_older_q[fb]);
|
|
assign fill_ram_arb[fb] = fill_ram_req[fb] & fill_grant_ic0 &
|
|
~|(fill_ram_req & fill_older_q[fb]);
|
|
// Calculate which fill buffer is the oldest one which still needs to output data to IF
|
|
assign fill_data_sel[fb] = ~|(fill_busy_q & ~fill_out_done & ~fill_stale_q &
|
|
fill_older_q[fb]);
|
|
// Arbitrate the request which has data available to send, and is the oldest outstanding
|
|
assign fill_out_arb[fb] = fill_out_req[fb] & fill_data_sel[fb];
|
|
// Assign incoming rvalid data to the oldest fill buffer expecting it
|
|
assign fill_rvd_arb[fb] = instr_rvalid_i & fill_rvd_exp[fb] &
|
|
~|(fill_rvd_exp & fill_older_q[fb]);
|
|
|
|
/////////////////////////////
|
|
// Fill buffer data muxing //
|
|
/////////////////////////////
|
|
|
|
// Output data muxing controls
|
|
// 1. Select data from the fill buffer data register
|
|
assign fill_data_reg[fb] = fill_busy_q[fb] & ~fill_stale_q[fb] &
|
|
~fill_out_done[fb] & fill_data_sel[fb] &
|
|
// The incoming data is already ahead of the output count
|
|
((fill_rvd_beat[fb] > fill_out_cnt_q[fb]) | fill_hit_q[fb] |
|
|
(|fill_err_q[fb]));
|
|
// 2. Select IC1 hit data
|
|
assign fill_data_hit[fb] = fill_busy_q[fb] & fill_hit_ic1[fb] & fill_data_sel[fb];
|
|
// 3. Select incoming instr_rdata_i
|
|
assign fill_data_rvd[fb] = fill_busy_q[fb] & fill_rvd_arb[fb] & ~fill_hit_q[fb] &
|
|
~fill_hit_ic1[fb] & ~fill_stale_q[fb] & ~fill_out_done[fb] &
|
|
// The incoming data lines up with the output count
|
|
(fill_rvd_beat[fb] == fill_out_cnt_q[fb]) & fill_data_sel[fb];
|
|
|
|
|
|
///////////////////////////
|
|
// Fill buffer registers //
|
|
///////////////////////////
|
|
|
|
// Fill buffer general enable
|
|
assign fill_entry_en[fb] = fill_alloc[fb] | fill_busy_q[fb];
|
|
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
fill_busy_q[fb] <= 1'b0;
|
|
fill_older_q[fb] <= '0;
|
|
fill_stale_q[fb] <= 1'b0;
|
|
fill_cache_q[fb] <= 1'b0;
|
|
fill_hit_q[fb] <= 1'b0;
|
|
fill_ext_cnt_q[fb] <= '0;
|
|
fill_ext_hold_q[fb] <= 1'b0;
|
|
fill_ext_done_q[fb] <= 1'b0;
|
|
fill_rvd_cnt_q[fb] <= '0;
|
|
fill_ram_done_q[fb] <= 1'b0;
|
|
fill_out_cnt_q[fb] <= '0;
|
|
end else if (fill_entry_en[fb]) begin
|
|
fill_busy_q[fb] <= fill_busy_d[fb];
|
|
fill_older_q[fb] <= fill_older_d[fb];
|
|
fill_stale_q[fb] <= fill_stale_d[fb];
|
|
fill_cache_q[fb] <= fill_cache_d[fb];
|
|
fill_hit_q[fb] <= fill_hit_d[fb];
|
|
fill_ext_cnt_q[fb] <= fill_ext_cnt_d[fb];
|
|
fill_ext_hold_q[fb] <= fill_ext_hold_d[fb];
|
|
fill_ext_done_q[fb] <= fill_ext_done_d[fb];
|
|
fill_rvd_cnt_q[fb] <= fill_rvd_cnt_d[fb];
|
|
fill_ram_done_q[fb] <= fill_ram_done_d[fb];
|
|
fill_out_cnt_q[fb] <= fill_out_cnt_d[fb];
|
|
end
|
|
end
|
|
|
|
////////////////////////////////////////
|
|
// Fill buffer address / data storage //
|
|
////////////////////////////////////////
|
|
|
|
assign fill_addr_en[fb] = fill_alloc[fb];
|
|
assign fill_way_en[fb] = (lookup_valid_ic1 & fill_in_ic1[fb]);
|
|
|
|
if (ResetAll) begin : g_fill_addr_ra
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
fill_addr_q[fb] <= '0;
|
|
end else if (fill_addr_en[fb]) begin
|
|
fill_addr_q[fb] <= lookup_addr_ic0;
|
|
end
|
|
end
|
|
end else begin : g_fill_addr_nr
|
|
always_ff @(posedge clk_i) begin
|
|
if (fill_addr_en[fb]) begin
|
|
fill_addr_q[fb] <= lookup_addr_ic0;
|
|
end
|
|
end
|
|
end
|
|
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
fill_way_q[fb] <= '0;
|
|
end else if (fill_way_en[fb]) begin
|
|
fill_way_q[fb] <= sel_way_ic1;
|
|
end
|
|
end
|
|
|
|
// Data either comes from the cache or the bus. If there was an ECC error, we must take
|
|
// the incoming bus data since the cache hit data is corrupted.
|
|
assign fill_data_d[fb] = fill_hit_ic1[fb] ? hit_data_ic1 :
|
|
{IC_LINE_BEATS{instr_rdata_i}};
|
|
|
|
for (genvar b = 0; b < IC_LINE_BEATS; b++) begin : gen_data_buf
|
|
// Error tracking (per beat)
|
|
assign fill_err_d[fb][b] = (fill_rvd_arb[fb] & instr_err_i &
|
|
(fill_rvd_off[fb] == b[IC_LINE_BEATS_W-1:0])) |
|
|
// Hold the error once recorded
|
|
(fill_busy_q[fb] & fill_err_q[fb][b]);
|
|
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
fill_err_q[fb][b] <= '0;
|
|
end else if (fill_entry_en[fb]) begin
|
|
fill_err_q[fb][b] <= fill_err_d[fb][b];
|
|
end
|
|
end
|
|
|
|
// Enable the relevant part of the data register (or all for cache hits)
|
|
// Ignore incoming rvalid data when we already have cache hit data
|
|
assign fill_data_en[fb][b] = fill_hit_ic1[fb] |
|
|
(fill_rvd_arb[fb] & ~fill_hit_q[fb] &
|
|
(fill_rvd_off[fb] == b[IC_LINE_BEATS_W-1:0]));
|
|
|
|
if (ResetAll) begin : g_fill_data_ra
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= '0;
|
|
end else if (fill_data_en[fb][b]) begin
|
|
fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= fill_data_d[fb][b*BUS_SIZE+:BUS_SIZE];
|
|
end
|
|
end
|
|
end else begin : g_fill_data_nr
|
|
always_ff @(posedge clk_i) begin
|
|
if (fill_data_en[fb][b]) begin
|
|
fill_data_q[fb][b*BUS_SIZE+:BUS_SIZE] <= fill_data_d[fb][b*BUS_SIZE+:BUS_SIZE];
|
|
end
|
|
end
|
|
end
|
|
|
|
end
|
|
end
|
|
|
|
////////////////////////////////
|
|
// Fill buffer one-hot muxing //
|
|
////////////////////////////////
|
|
|
|
// External req info
|
|
always_comb begin
|
|
fill_ext_req_addr = '0;
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
if (fill_ext_arb[i]) begin
|
|
fill_ext_req_addr |= {fill_addr_q[i][ADDR_W-1:IC_LINE_W], fill_ext_off[i]};
|
|
end
|
|
end
|
|
end
|
|
|
|
// Cache req info
|
|
always_comb begin
|
|
fill_ram_req_addr = '0;
|
|
fill_ram_req_way = '0;
|
|
fill_ram_req_data = '0;
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
if (fill_ram_arb[i]) begin
|
|
fill_ram_req_addr |= fill_addr_q[i];
|
|
fill_ram_req_way |= fill_way_q[i];
|
|
fill_ram_req_data |= fill_data_q[i];
|
|
end
|
|
end
|
|
end
|
|
|
|
// IF stage output data
|
|
always_comb begin
|
|
fill_out_data = '0;
|
|
fill_out_err = '0;
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
if (fill_data_reg[i]) begin
|
|
fill_out_data |= fill_data_q[i];
|
|
// Ignore any speculative errors accumulated on cache hits
|
|
fill_out_err |= (fill_err_q[i] & ~{IC_LINE_BEATS{fill_hit_q[i]}});
|
|
end
|
|
end
|
|
end
|
|
|
|
///////////////////////
|
|
// External requests //
|
|
///////////////////////
|
|
|
|
assign instr_req = ((~icache_enable_i | branch_i) & lookup_grant_ic0) |
|
|
(|fill_ext_req);
|
|
|
|
assign instr_addr = |fill_ext_req ? fill_ext_req_addr :
|
|
lookup_addr_ic0[ADDR_W-1:BUS_W];
|
|
|
|
assign instr_req_o = instr_req;
|
|
assign instr_addr_o = {instr_addr[ADDR_W-1:BUS_W],{BUS_W{1'b0}}};
|
|
|
|
////////////////////////
|
|
// Output data muxing //
|
|
////////////////////////
|
|
|
|
// Mux between line-width data sources
|
|
assign line_data = |fill_data_hit ? hit_data_ic1 : fill_out_data;
|
|
assign line_err = |fill_data_hit ? {IC_LINE_BEATS{1'b0}} : fill_out_err;
|
|
|
|
// Mux the relevant beat of line data, based on the output address
|
|
always_comb begin
|
|
line_data_muxed = '0;
|
|
line_err_muxed = 1'b0;
|
|
for (int unsigned i = 0; i < IC_LINE_BEATS; i++) begin
|
|
// When data has been skidded, the output address is behind by one
|
|
if ((output_addr_q[IC_LINE_W-1:BUS_W] + {{IC_LINE_BEATS_W-1{1'b0}},skid_valid_q}) ==
|
|
i[IC_LINE_BEATS_W-1:0]) begin
|
|
line_data_muxed |= line_data[i*32+:32];
|
|
line_err_muxed |= line_err[i];
|
|
end
|
|
end
|
|
end
|
|
|
|
// Mux between incoming rdata and the muxed line data
|
|
assign output_data = |fill_data_rvd ? instr_rdata_i : line_data_muxed;
|
|
assign output_err = |fill_data_rvd ? instr_err_i : line_err_muxed;
|
|
|
|
// Output data is valid (from any of the three possible sources). Note that fill_out_arb
|
|
// must be used here rather than fill_out_req because data can become valid out of order
|
|
// (e.g. cache hit data can become available ahead of an older outstanding miss).
|
|
assign data_valid = |fill_out_arb;
|
|
|
|
// Skid buffer data
|
|
assign skid_data_d = output_data[31:16];
|
|
|
|
assign skid_en = data_valid & (ready_i | skid_ready);
|
|
|
|
if (ResetAll) begin : g_skid_data_ra
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
skid_data_q <= '0;
|
|
skid_err_q <= '0;
|
|
end else if (skid_en) begin
|
|
skid_data_q <= skid_data_d;
|
|
skid_err_q <= output_err;
|
|
end
|
|
end
|
|
end else begin : g_skid_data_nr
|
|
always_ff @(posedge clk_i) begin
|
|
if (skid_en) begin
|
|
skid_data_q <= skid_data_d;
|
|
skid_err_q <= output_err;
|
|
end
|
|
end
|
|
end
|
|
|
|
// The data in the skid buffer is ready if it's a complete compressed instruction or if there's
|
|
// an error (no need to wait for the second half)
|
|
assign skid_complete_instr = skid_valid_q & ((skid_data_q[1:0] != 2'b11) | skid_err_q);
|
|
|
|
// Data can be loaded into the skid buffer for an unaligned uncompressed instruction
|
|
assign skid_ready = output_addr_q[1] & ~skid_valid_q & (~output_compressed | output_err);
|
|
|
|
assign output_ready = (ready_i | skid_ready) & ~skid_complete_instr;
|
|
|
|
assign output_compressed = (rdata_o[1:0] != 2'b11);
|
|
|
|
assign skid_valid_d =
|
|
// Branches invalidate the skid buffer
|
|
branch_i ? 1'b0 :
|
|
// Once valid, the skid buffer stays valid until a compressed instruction realigns the stream
|
|
(skid_valid_q ? ~(ready_i & ((skid_data_q[1:0] != 2'b11) | skid_err_q)) :
|
|
// The skid buffer becomes valid when:
|
|
// - we branch to an unaligned uncompressed instruction
|
|
(data_valid &
|
|
(((output_addr_q[1] & (~output_compressed | output_err)) |
|
|
// - a compressed instruction misaligns the stream
|
|
(~output_addr_q[1] & output_compressed & ~output_err & ready_i)))));
|
|
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
skid_valid_q <= 1'b0;
|
|
end else begin
|
|
skid_valid_q <= skid_valid_d;
|
|
end
|
|
end
|
|
|
|
// Signal that valid data is available to the IF stage
|
|
// Note that if the first half of an unaligned instruction reports an error, we do not need
|
|
// to wait for the second half
|
|
// Compressed instruction completely satisfied by skid buffer
|
|
assign output_valid = skid_complete_instr |
|
|
// Output data available and, output stream aligned, or skid data available,
|
|
(data_valid & (~output_addr_q[1] | skid_valid_q |
|
|
// or this is an error or an unaligned compressed instruction
|
|
output_err | (output_data[17:16] != 2'b11)));
|
|
|
|
// Update the address on branches and every time an instruction is driven
|
|
assign output_addr_en = branch_i | (ready_i & valid_o);
|
|
|
|
// Increment the address by two every time a compressed instruction is popped
|
|
assign addr_incr_two = output_compressed & ~err_o;
|
|
|
|
// Next IF stage PC
|
|
assign output_addr_incr = (output_addr_q[31:1] +
|
|
// Increment address by 4 or 2
|
|
{29'd0, ~addr_incr_two, addr_incr_two});
|
|
|
|
// Redirect the address on branches
|
|
assign output_addr_d = branch_i ? addr_i[31:1] : output_addr_incr;
|
|
|
|
if (ResetAll) begin : g_output_addr_ra
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
output_addr_q <= '0;
|
|
end else if (output_addr_en) begin
|
|
output_addr_q <= output_addr_d;
|
|
end
|
|
end
|
|
end else begin : g_output_addr_nr
|
|
always_ff @(posedge clk_i) begin
|
|
if (output_addr_en) begin
|
|
output_addr_q <= output_addr_d;
|
|
end
|
|
end
|
|
end
|
|
|
|
// Mux the data from BUS_SIZE to halfword
|
|
// This muxing realigns data when instruction words are split across BUS_W e.g.
|
|
// word 1 |----|*h1*|
|
|
// word 0 |*h0*|----| --> |*h1*|*h0*|
|
|
// 31 15 0 31 15 0
|
|
always_comb begin
|
|
output_data_lo = '0;
|
|
for (int unsigned i = 0; i < IC_OUTPUT_BEATS; i++) begin
|
|
if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
|
|
output_data_lo |= output_data[i*16+:16];
|
|
end
|
|
end
|
|
end
|
|
|
|
always_comb begin
|
|
output_data_hi = '0;
|
|
for (int unsigned i = 0; i < IC_OUTPUT_BEATS - 1; i++) begin
|
|
if (output_addr_q[BUS_W-1:1] == i[BUS_W-2:0]) begin
|
|
output_data_hi |= output_data[(i+1)*16+:16];
|
|
end
|
|
end
|
|
if (&output_addr_q[BUS_W-1:1]) begin
|
|
output_data_hi |= output_data[15:0];
|
|
end
|
|
end
|
|
|
|
assign valid_o = output_valid;
|
|
assign rdata_o = {output_data_hi, (skid_valid_q ? skid_data_q : output_data_lo)};
|
|
assign addr_o = {output_addr_q, 1'b0};
|
|
assign err_o = (skid_valid_q & skid_err_q) | (~skid_complete_instr & output_err);
|
|
// Error caused by the second half of a misaligned uncompressed instruction
|
|
// (only relevant when err_o is set)
|
|
assign err_plus2_o = skid_valid_q & ~skid_err_q;
|
|
|
|
///////////////////
|
|
// Invalidations //
|
|
///////////////////
|
|
|
|
// Invalidation (writing all entries in the tag RAM with an invalid tag) occurs straight out of
|
|
// reset and after any invalidation request (signalled via icache_inval_i). An invalidation
|
|
// request coming whilst another is writing tags causes the invalidation to start again. This
|
|
// ensures a new scramble key is requested where a previous one is in use.
|
|
// TODO: Ditch this behaviour for non-secure ibex?
|
|
always_comb begin
|
|
inval_state_d = inval_state_q;
|
|
inval_index_d = inval_index_q;
|
|
inval_index_en = 1'b0;
|
|
inval_write_req = 1'b0;
|
|
ic_scr_key_req_o = 1'b0;
|
|
|
|
// Prevent other cache activity (cache lookups and cache allocations) whilst an invalidation is
|
|
// in progress. Set to 1 by default as the only time we don't block is when the state machine is
|
|
// IDLE.
|
|
inval_block_cache = 1'b1;
|
|
|
|
case (inval_state_q)
|
|
OUT_OF_RESET: begin
|
|
// Initial state, this initialises the tag RAMs out of reset before the icache can be used
|
|
inval_state_d = AWAIT_SCRAMBLE_KEY;
|
|
|
|
if (~ic_scr_key_valid_i) begin
|
|
ic_scr_key_req_o = 1'b1;
|
|
end
|
|
end
|
|
AWAIT_SCRAMBLE_KEY: begin
|
|
// When invalidating a new scrambling key is requested on all invalidation requests. Wait
|
|
// for that new key to be available before beginning with the actual invalidation (cannot
|
|
// write to the tag RAM until we have the new scrambling key that will be used). Ignore any
|
|
// requests in this phase (once a scramble key request has started we cannot request a new
|
|
// one until the on-going request is done).
|
|
if (ic_scr_key_valid_i) begin
|
|
inval_state_d = INVAL_CACHE;
|
|
inval_index_d = '0;
|
|
inval_index_en = 1'b1;
|
|
end
|
|
end
|
|
INVAL_CACHE: begin
|
|
// Actually invalidate the cache. Write every entry in the tag RAM with an invalid tag. Once
|
|
// all are written we're done.
|
|
inval_write_req = 1'b1;
|
|
inval_index_d = (inval_index_q + {{IC_INDEX_W-1{1'b0}},1'b1});
|
|
inval_index_en = 1'b1;
|
|
|
|
if (icache_inval_i) begin
|
|
// If a new invalidaiton requests comes in go back to the beginning with a new scramble
|
|
// key
|
|
ic_scr_key_req_o = 1'b1;
|
|
inval_state_d = AWAIT_SCRAMBLE_KEY;
|
|
end else if (&inval_index_q) begin
|
|
// When the final index is written we're done
|
|
inval_state_d = IDLE;
|
|
end
|
|
end
|
|
IDLE: begin
|
|
// Usual running state
|
|
if (icache_inval_i) begin
|
|
ic_scr_key_req_o = 1'b1;
|
|
inval_state_d = AWAIT_SCRAMBLE_KEY;
|
|
end else begin
|
|
// Allow other cache activies whilst in IDLE and no invalidation has been requested
|
|
inval_block_cache = 1'b0;
|
|
end
|
|
end
|
|
default: ;
|
|
endcase
|
|
end
|
|
|
|
assign inval_active = inval_state_q != IDLE;
|
|
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
inval_state_q <= OUT_OF_RESET;
|
|
end else begin
|
|
inval_state_q <= inval_state_d;
|
|
end
|
|
end
|
|
|
|
if (ResetAll) begin : g_inval_index_ra
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
inval_index_q <= '0;
|
|
end else if (inval_index_en) begin
|
|
inval_index_q <= inval_index_d;
|
|
end
|
|
end
|
|
end else begin : g_inval_index_nr
|
|
always_ff @(posedge clk_i) begin
|
|
if (inval_index_en) begin
|
|
inval_index_q <= inval_index_d;
|
|
end
|
|
end
|
|
end
|
|
|
|
/////////////////
|
|
// Busy status //
|
|
/////////////////
|
|
|
|
// Only busy (for WFI purposes) while an invalidation is in-progress, or external requests are
|
|
// outstanding.
|
|
assign busy_o = inval_active | (|(fill_busy_q & ~fill_rvd_done));
|
|
|
|
////////////////
|
|
// Assertions //
|
|
////////////////
|
|
|
|
`ASSERT_INIT(size_param_legal, (IC_LINE_SIZE > 32))
|
|
|
|
// ECC primitives will need to be changed for different sizes
|
|
`ASSERT_INIT(ecc_tag_param_legal, (IC_TAG_SIZE <= 27))
|
|
`ASSERT_INIT(ecc_data_param_legal, !ICacheECC || (BUS_SIZE == 32))
|
|
|
|
// Lookups in the tag ram should always give a known result
|
|
`ASSERT_KNOWN(TagHitKnown, lookup_valid_ic1 & tag_hit_ic1)
|
|
`ASSERT_KNOWN(TagInvalidKnown, lookup_valid_ic1 & tag_invalid_ic1)
|
|
|
|
// This is only used for the Yosys-based formal flow. Once we have working bind support, we can
|
|
// get rid of it.
|
|
`ifdef FORMAL
|
|
`ifdef YOSYS
|
|
// Unfortunately, Yosys doesn't support passing unpacked arrays as ports. Explicitly pack up the
|
|
// signals we need.
|
|
logic [NUM_FB-1:0][ADDR_W-1:0] packed_fill_addr_q;
|
|
always_comb begin
|
|
for (int i = 0; i < NUM_FB; i++) begin
|
|
packed_fill_addr_q[i][ADDR_W-1:0] = fill_addr_q[i];
|
|
end
|
|
end
|
|
|
|
`include "formal_tb_frag.svh"
|
|
`endif
|
|
`endif
|
|
|
|
|
|
endmodule
|