Serpend dcache WIP

This commit is contained in:
Michael Schaffner 2018-09-14 20:49:37 +02:00
parent e8d18c648a
commit 91b270adf1
No known key found for this signature in database
GPG key ID: 7AA09AE049819C2C
4 changed files with 562 additions and 295 deletions

View file

@ -37,13 +37,16 @@ package serpent_cache_pkg;
// Calculated parameter
// localparam DCACHE_BYTE_OFFSET = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8);
// localparam DCACHE_NUM_WORDS = 2**(ariane_pkg::DCACHE_INDEX_WIDTH-DCACHE_BYTE_OFFSET);
// localparam DCACHE_DIRTY_WIDTH = ariane_pkg::DCACHE_SET_ASSOC*2;
localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH/8);
localparam ICACHE_NUM_WORDS = 2**(ariane_pkg::ICACHE_INDEX_WIDTH-ICACHE_OFFSET_WIDTH);
localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS);// excluding byte offset
localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8);
localparam DCACHE_NUM_WORDS = 2**(ariane_pkg::DCACHE_INDEX_WIDTH-DCACHE_OFFSET_WIDTH);
localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS);// excluding byte offset
localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH/64;
// local interfaces between caches and L15 adapter
typedef enum logic [1:0] {
@ -195,7 +198,16 @@ package serpent_cache_pkg;
return out;
endfunction
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] bin2onehot (
function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh (
input logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] in
);
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] out;
out = '0;
out[in] = 1'b1;
return out;
endfunction
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh (
input logic [$clog2(ariane_pkg::DCACHE_SET_ASSOC)-1:0] in
);
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] out;
@ -204,6 +216,16 @@ package serpent_cache_pkg;
return out;
endfunction
function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh (
input logic [$clog2(DCACHE_NUM_BANKS)-1:0] in
);
logic [DCACHE_NUM_BANKS-1:0] out;
out = '0;
out[in] = 1'b1;
return out;
endfunction
function automatic logic [5:0] popcnt64 (
input logic [63:0] in
);

View file

@ -8,9 +8,9 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 13.10.2017
// Description: Nonblocking private L1 dcache
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: Instruction cache that is compatible with openpiton.
import ariane_pkg::*;
import serpent_cache_pkg::*;
@ -40,310 +40,215 @@ module serpent_dcache #(
input logic mem_data_ack_i,
output dcache_req_t mem_data_o
);
/*
// -------------------------------
// Controller <-> Arbiter
// -------------------------------
// 1. Miss handler
// 2. PTW
// 3. Load Unit
// 4. Store unit
logic [3:0][DCACHE_SET_ASSOC-1:0] req;
logic [3:0][DCACHE_INDEX_WIDTH-1:0]addr;
logic [3:0] gnt;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata;
logic [3:0][DCACHE_TAG_WIDTH-1:0] tag;
cache_line_t [3:0] wdata;
logic [3:0] we;
cl_be_t [3:0] be;
logic [DCACHE_SET_ASSOC-1:0] hit_way;
// -------------------------------
// Controller <-> Miss unit
// -------------------------------
logic [2:0] busy;
logic [2:0][55:0] mshr_addr;
logic [2:0] mshr_addr_matches;
logic [2:0] mshr_index_matches;
logic [63:0] critical_word;
logic critical_word_valid;
// // -------------------------------
// // Controller <-> Arbiter
// // -------------------------------
// // 1. Miss handler
// // 2. PTW
// // 3. Load Unit
// // 4. Store unit
// logic [3:0][DCACHE_SET_ASSOC-1:0] req;
// logic [3:0][DCACHE_INDEX_WIDTH-1:0]addr;
// logic [3:0] gnt;
// cache_line_t [DCACHE_SET_ASSOC-1:0] rdata;
// logic [3:0][DCACHE_TAG_WIDTH-1:0] tag;
logic [2:0][$bits(miss_req_t)-1:0] miss_req;
logic [2:0] miss_gnt;
logic [2:0] active_serving;
// cache_line_t [3:0] wdata;
// logic [3:0] we;
// cl_be_t [3:0] be;
// logic [DCACHE_SET_ASSOC-1:0] hit_way;
// // -------------------------------
// // Controller <-> Miss unit
// // -------------------------------
// logic [2:0] busy;
// logic [2:0][55:0] mshr_addr;
// logic [2:0] mshr_addr_matches;
// logic [2:0] mshr_index_matches;
// logic [63:0] critical_word;
// logic critical_word_valid;
logic [2:0] bypass_gnt;
logic [2:0] bypass_valid;
logic [2:0][63:0] bypass_data;
// -------------------------------
// Arbiter <-> Datram,
// -------------------------------
logic [DCACHE_SET_ASSOC-1:0] req_ram;
logic [DCACHE_INDEX_WIDTH-1:0] addr_ram;
logic we_ram;
cache_line_t wdata_ram;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram;
// logic [2:0][$bits(miss_req_t)-1:0] miss_req;
// logic [2:0] miss_gnt;
// logic [2:0] active_serving;
// ------------------
// Cache Controller
// ------------------
generate
for (genvar i = 0; i < 3; i++) begin : master_ports
cache_ctrl #(
.CACHE_START_ADDR ( CACHE_START_ADDR )
) i_cache_ctrl (
.bypass_i ( ~enable_i ),
// logic [2:0] bypass_gnt;
// logic [2:0] bypass_valid;
// logic [2:0][63:0] bypass_data;
// // -------------------------------
// // Arbiter <-> Datram,
// // -------------------------------
// logic [DCACHE_SET_ASSOC-1:0] req_ram;
// logic [DCACHE_INDEX_WIDTH-1:0] addr_ram;
// logic we_ram;
// cache_line_t wdata_ram;
// cache_line_t [DCACHE_SET_ASSOC-1:0] rdata_ram;
// cl_be_t be_ram;
.busy_o ( busy [i] ),
// // ------------------
// // Cache Controller
// // ------------------
// generate
// for (genvar i = 0; i < 3; i++) begin : master_ports
// cache_ctrl #(
// .CACHE_START_ADDR ( CACHE_START_ADDR )
// ) i_cache_ctrl (
// .bypass_i ( ~enable_i ),
.req_port_i ( req_ports_i [i] ),
.req_port_o ( req_ports_o [i] ),
// .busy_o ( busy [i] ),
// .req_port_i ( req_ports_i [i] ),
// .req_port_o ( req_ports_o [i] ),
.req_o ( req [i+1] ),
.addr_o ( addr [i+1] ),
.gnt_i ( gnt [i+1] ),
.data_i ( rdata ),
.tag_o ( tag [i+1] ),
.data_o ( wdata [i+1] ),
.we_o ( we [i+1] ),
.be_o ( be [i+1] ),
.hit_way_i ( hit_way ),
// .req_o ( req [i+1] ),
// .addr_o ( addr [i+1] ),
// .gnt_i ( gnt [i+1] ),
// .data_i ( rdata ),
// .tag_o ( tag [i+1] ),
// .data_o ( wdata [i+1] ),
// .we_o ( we [i+1] ),
// .be_o ( be [i+1] ),
// .hit_way_i ( hit_way ),
.miss_req_o ( miss_req [i] ),
.miss_gnt_i ( miss_gnt [i] ),
.active_serving_i ( active_serving [i] ),
.critical_word_i ( critical_word ),
.critical_word_valid_i ( critical_word_valid ),
.bypass_gnt_i ( bypass_gnt [i] ),
.bypass_valid_i ( bypass_valid [i] ),
.bypass_data_i ( bypass_data [i] ),
// .miss_req_o ( miss_req [i] ),
// .miss_gnt_i ( miss_gnt [i] ),
// .active_serving_i ( active_serving [i] ),
// .critical_word_i ( critical_word ),
// .critical_word_valid_i ( critical_word_valid ),
// .bypass_gnt_i ( bypass_gnt [i] ),
// .bypass_valid_i ( bypass_valid [i] ),
// .bypass_data_i ( bypass_data [i] ),
.mshr_addr_o ( mshr_addr [i] ), // TODO
.mshr_addr_matches_i ( mshr_addr_matches [i] ), // TODO
.mshr_index_matches_i ( mshr_index_matches[i] ), // TODO
.*
);
end
endgenerate
// .mshr_addr_o ( mshr_addr [i] ), // TODO
// .mshr_addr_matches_i ( mshr_addr_matches [i] ), // TODO
// .mshr_index_matches_i ( mshr_index_matches[i] ), // TODO
// .*
// );
// end
// endgenerate
// ------------------
// Miss Handling Unit
// ------------------
miss_handler #(
.NR_PORTS ( 3 )
) i_miss_handler (
.busy_i ( |busy ),
.miss_req_i ( miss_req ),
.miss_gnt_o ( miss_gnt ),
.bypass_gnt_o ( bypass_gnt ),
.bypass_valid_o ( bypass_valid ),
.bypass_data_o ( bypass_data ),
.critical_word_o ( critical_word ),
.critical_word_valid_o ( critical_word_valid ),
.mshr_addr_i ( mshr_addr ),
.mshr_addr_matches_o ( mshr_addr_matches ),
.mshr_index_matches_o ( mshr_index_matches ),
.active_serving_o ( active_serving ),
.req_o ( req [0] ),
.addr_o ( addr [0] ),
.data_i ( rdata ),
.be_o ( be [0] ),
.data_o ( wdata [0] ),
.we_o ( we [0] ),
.*
);
// // ------------------
// // Miss Handling Unit
// // ------------------
// miss_handler #(
// .NR_PORTS ( 3 )
// ) i_miss_handler (
// .busy_i ( |busy ),
// .miss_req_i ( miss_req ),
// .miss_gnt_o ( miss_gnt ),
// .bypass_gnt_o ( bypass_gnt ),
// .bypass_valid_o ( bypass_valid ),
// .bypass_data_o ( bypass_data ),
// .critical_word_o ( critical_word ),
// .critical_word_valid_o ( critical_word_valid ),
// .mshr_addr_i ( mshr_addr ),
// .mshr_addr_matches_o ( mshr_addr_matches ),
// .mshr_index_matches_o ( mshr_index_matches ),
// .active_serving_o ( active_serving ),
// .req_o ( req [0] ),
// .addr_o ( addr [0] ),
// .data_i ( rdata ),
// .be_o ( be [0] ),
// .data_o ( wdata [0] ),
// .we_o ( we [0] ),
// .*
// );
assign tag[0] = '0;
// assign tag[0] = '0;
// --------------
// Memory Arrays
// --------------
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block
sram #(
.DATA_WIDTH ( DCACHE_LINE_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) data_sram (
.req_i ( req_ram [i] ),
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wdata_i ( wdata_ram.data ),
.be_i ( be_ram.data ),
.rdata_o ( rdata_ram[i].data ),
.*
);
// // --------------
// // Memory Arrays
// // --------------
// for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block
// sram #(
// .DATA_WIDTH ( DCACHE_LINE_WIDTH ),
// .NUM_WORDS ( DCACHE_NUM_WORDS )
// ) data_sram (
// .req_i ( req_ram [i] ),
// .rst_ni ( rst_ni ),
// .we_i ( we_ram ),
// .addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
// .wdata_i ( wdata_ram.data ),
// .be_i ( be_ram.data ),
// .rdata_o ( rdata_ram[i].data ),
// .*
// );
sram #(
.DATA_WIDTH ( DCACHE_TAG_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) tag_sram (
.req_i ( req_ram [i] ),
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wdata_i ( wdata_ram.tag ),
.be_i ( be_ram.tag ),
.rdata_o ( rdata_ram[i].tag ),
.*
);
// sram #(
// .DATA_WIDTH ( DCACHE_TAG_WIDTH ),
// .NUM_WORDS ( DCACHE_NUM_WORDS )
// ) tag_sram (
// .req_i ( req_ram [i] ),
// .rst_ni ( rst_ni ),
// .we_i ( we_ram ),
// .addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
// .wdata_i ( wdata_ram.tag ),
// .be_i ( be_ram.tag ),
// .rdata_o ( rdata_ram[i].tag ),
// .*
// );
end
// end
// ----------------
// Valid/Dirty Regs
// ----------------
logic [DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
// // ----------------
// // Valid/Dirty Regs
// // ----------------
// logic [DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign dirty_wdata[i] = wdata_ram.dirty;
assign dirty_wdata[DCACHE_SET_ASSOC + i] = wdata_ram.valid;
assign rdata_ram[i].valid = dirty_rdata[DCACHE_SET_ASSOC + i];
assign rdata_ram[i].dirty = dirty_rdata[i];
end
// for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
// assign dirty_wdata[i] = wdata_ram.dirty;
// assign dirty_wdata[DCACHE_SET_ASSOC + i] = wdata_ram.valid;
// assign rdata_ram[i].valid = dirty_rdata[DCACHE_SET_ASSOC + i];
// assign rdata_ram[i].dirty = dirty_rdata[i];
// end
vdregs #(
.DATA_WIDTH ( DCACHE_DIRTY_WIDTH ),
.DATA_DEPTH ( DCACHE_NUM_WORDS )
) i_vdregs (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( |req_ram ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wdata_i ( dirty_wdata ),
.biten_i ( {be_ram.valid, be_ram.dirty} ),
.rdata_o ( dirty_rdata )
);
// vdregs #(
// .DATA_WIDTH ( DCACHE_DIRTY_WIDTH ),
// .DATA_DEPTH ( DCACHE_NUM_WORDS )
// ) i_vdregs (
// .clk_i ( clk_i ),
// .rst_ni ( rst_ni ),
// .req_i ( |req_ram ),
// .we_i ( we_ram ),
// .addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
// .wdata_i ( dirty_wdata ),
// .biten_i ( {be_ram.valid, be_ram.dirty} ),
// .rdata_o ( dirty_rdata )
// );
// ------------------------------------------------
// Tag Comparison and memory arbitration
// ------------------------------------------------
tag_cmp #(
.NR_PORTS ( 4 ),
.ADDR_WIDTH ( DCACHE_INDEX_WIDTH ),
.DCACHE_SET_ASSOC ( DCACHE_SET_ASSOC )
) i_tag_cmp (
.req_i ( req ),
.gnt_o ( gnt ),
.addr_i ( addr ),
.wdata_i ( wdata ),
.we_i ( we ),
.be_i ( be ),
.rdata_o ( rdata ),
.tag_i ( tag ),
.hit_way_o ( hit_way ),
// // ------------------------------------------------
// // Tag Comparison and memory arbitration
// // ------------------------------------------------
// tag_cmp #(
// .NR_PORTS ( 4 ),
// .ADDR_WIDTH ( DCACHE_INDEX_WIDTH ),
// .DCACHE_SET_ASSOC ( DCACHE_SET_ASSOC )
// ) i_tag_cmp (
// .req_i ( req ),
// .gnt_o ( gnt ),
// .addr_i ( addr ),
// .wdata_i ( wdata ),
// .we_i ( we ),
// .be_i ( be ),
// .rdata_o ( rdata ),
// .tag_i ( tag ),
// .hit_way_o ( hit_way ),
.req_o ( req_ram ),
.addr_o ( addr_ram ),
.wdata_o ( wdata_ram ),
.we_o ( we_ram ),
.be_o ( be_ram ),
.rdata_i ( rdata_ram ),
.*
);
// .req_o ( req_ram ),
// .addr_o ( addr_ram ),
// .wdata_o ( wdata_ram ),
// .we_o ( we_ram ),
// .be_o ( be_ram ),
// .rdata_i ( rdata_ram ),
// .*
// );
`ifndef SYNTHESIS
initial begin
assert ($bits(data_if.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus");
assert (DCACHE_LINE_WIDTH/64 inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of 64");
end
`endif
endmodule
// --------------
// Tag Compare
// --------------
//
// Description: Arbitrates access to cache memories, simplified request grant protocol
// checks for hit or miss on cache
//
module tag_cmp #(
parameter int unsigned NR_PORTS = 3,
parameter int unsigned ADDR_WIDTH = 64,
parameter type data_t = cache_line_t,
parameter type be_t = cl_be_t,
parameter int unsigned DCACHE_SET_ASSOC = 8
)(
input logic clk_i,
input logic rst_ni,
input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i,
output logic [NR_PORTS-1:0] gnt_o,
input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i,
input data_t [NR_PORTS-1:0] wdata_i,
input logic [NR_PORTS-1:0] we_i,
input be_t [NR_PORTS-1:0] be_i,
output data_t [DCACHE_SET_ASSOC-1:0] rdata_o,
input logic [NR_PORTS-1:0][DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way
output logic [DCACHE_SET_ASSOC-1:0] req_o,
output logic [ADDR_WIDTH-1:0] addr_o,
output data_t wdata_o,
output logic we_o,
output be_t be_o,
input data_t [DCACHE_SET_ASSOC-1:0] rdata_i
);
assign rdata_o = rdata_i;
// one hot encoded
logic [NR_PORTS-1:0] id_d, id_q;
logic [DCACHE_TAG_WIDTH-1:0] sel_tag;
always_comb begin : tag_sel
sel_tag = '0;
for (int unsigned i = 0; i < NR_PORTS; i++)
if (id_q[i])
sel_tag = tag_i[i];
end
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
end
always_comb begin
gnt_o = '0;
id_d = '0;
wdata_o = '0;
req_o = '0;
addr_o = '0;
be_o = '0;
we_o = '0;
// Request Side
// priority select
for (int unsigned i = 0; i < NR_PORTS; i++) begin
req_o = req_i[i];
id_d = (1'b1 << i);
gnt_o[i] = 1'b1;
addr_o = addr_i[i];
be_o = be_i[i];
we_o = we_i[i];
wdata_o = wdata_i[i];
if (req_i[i])
break;
end
`ifndef SYNTHESIS
`ifndef VERILATOR
// assert that cache only hits on one way
assert property (
@(posedge clk_i) $onehot0(hit_way_o)) else begin $error("Hit should be one-hot encoded"); $stop(); end
`endif
`endif
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
id_q <= 0;
end else begin
id_q <= id_d;
end
end*/
// `ifndef SYNTHESIS
// initial begin
// assert ($bits(data_if.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus");
// assert (DCACHE_LINE_WIDTH/64 inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of 64");
// end
// `endif
endmodule // serpent_dcache

View file

@ -0,0 +1,340 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: Memory arrays, arbiter and tag comparison for serpent dcache.
//
//
// Notes: 1) reads trigger a readout of all ways, and the way where the tag hits is selected
// writes typically go to a single way, and can either be full cacheline access (refills), or single word accesses (writes)
// the cache is multi-banked and hence writes and reads can occur simultaneously if they go to different offsets.
//
// 2) port 0 is special in the sense that it is the only port that has write access to the cache
import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_dcache_mem #(
parameter int unsigned NUM_PORTS = 3,
parameter NC_ADDR_BEGIN = 40'h8000000000, // start address of noncacheable I/O region
parameter bit NC_ADDR_GE_LT = 1'b1 // determines how the physical address is compared with NC_ADDR_BEGIN
)(
input logic clk_i,
input logic rst_ni,
input logic cache_en_i, // make sure this is registered
input logic [NUM_PORTS-1:0][DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
input logic [NUM_PORTS-1:0][DCACHE_CL_IDX_WIDTH-1:0] idx_i,
input logic [NUM_PORTS-1:0][DCACHE_OFFSET_WIDTH-1:0] off_i,
input logic [NUM_PORTS-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways
output logic [NUM_PORTS-1:0] gnt_o,
// only available on port 0
input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3]
input logic [DCACHE_SET_ASSOC-1:0] wr_cl_req_i, // writes a full cacheline
input logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_i,
input logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i,
input logic wr_cl_data_is_nc_i, // used to bypass data read from memory
input logic [DCACHE_SET_ASSOC-1:0] wr_vld_data_i, // valid bits
// single word write, no access to tags and valid bits
input logic [63:0] wr_data_i,
input logic [7:0] wr_data_be_i,
// shared by all ports
output logic [63:0] rd_data_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_vld_data_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o,
output logic rd_paddr_is_nc_o
);
// ///////////////////////////////////////////////////////
// // arbiter
// ///////////////////////////////////////////////////////
// // Priority is highest for lowest index in port array
// // Bank mapping:
// //
// // Bank 0 Bank 2
// // [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] ..
// logic [NUM_PORTS-1:0][DCACHE_NUM_BANKS-1:0] port_bank_req;
// logic [NUM_PORTS-1:0][DCACHE_NUM_BANKS-1:0] port_bank_gnt;
// logic [DCACHE_NUM_BANKS-1:0] bank_gnt;
// generate
// // first port has write access
// // full cl write request will block read requests of lower prio ports
// // only single word writes can interleave with single word reads
// assign port_bank_req[0] = (wr_cl_req_i) ? '1 :
// (rd_req_i[0] | |wr_req_i[0]) ? dcache_cl_bin2oh(off_i[DCACHE_OFFSET_WIDTH-1:3]) : '0;
// for (genvar k=1;k<NUM_PORTS;k++) begin : g_req
// assign port_bank_req[k] = (rd_req_i[k]) ? dcache_cl_bin2oh(off_i[DCACHE_OFFSET_WIDTH-1:3]) : '0;
// end
// // check whether the request matches with the grant
// for (genvar k=0;k<NUM_PORTS;k++) begin : g_gnt
// assign gnt_o[k] = (port_bank_gnt[k] == port_bank_req[k]);
// end
// endgenerate
// // priority arbiting for each bank separately
// always_comb begin : p_prio_arb
// automatic logic tmp;
// port_bank_gnt = '0;
// bank_gnt = '0;
// // loop over banks
// for(int j=0;j<DCACHE_NUM_BANKS;j++) begin
// tmp = 1'b0;
// // loop over ports
// for (int k=0;k<NUM_PORTS;k++) begin
// if(port_bank_req[k][j]) begin
// port_bank_gnt[k][j] = 1'b1;
// tmp = 1'b1;
// break;
// end
// // can only have one read request at the moment
// // due to contentions at the valid/tag memory
// // note: single word writes do NOT need access to the valid/tag memory
// // at the moment
// if(rd_req_i[k]) begin
// break;
// end
// end
// bank_gnt[j] = tmp;
// end
// end
// ///////////////////////////////////////////////////////
// // address and data muxes
// ///////////////////////////////////////////////////////
// logic [DCACHE_NUM_BANKS-1:0] bank_req;
// logic [DCACHE_NUM_BANKS-1:0] bank_we;
// logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][7:0] bank_be;
// logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0] bank_idx;
// logic [DCACHE_NUM_BANKS-1:0][DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q;
// logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][63:0] bank_wdata; //
// logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][63:0] bank_rdata; //
// logic [DCACHE_SET_ASSOC-1:0][63:0] bank_sel; // selected word from each cacheline
// logic [DCACHE_TAG_WIDTH-1:0] tag;
// logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
// logic vld_we; // valid bits write enable
// logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
// logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem
// logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
// logic [NUM_PORTS-1:0][$log2(DCACHE_NUM_BANKS)-1:0] port_bank_sel, vld_sel_d, vld_sel_q;
// logic byp_en_d, byp_en_q;
// assign bank_req = bank_gnt;
// assign bank_we = (((|wr_req_i) | (|wr_cl_req_i)) & ~wr_cl_data_is_nc_i) ? bank_gnt : '0;
// generate
// for (genvar k=0;k<DCACHE_NUM_BANKS;k++) begin : g_bank
// for (genvar j=0;j<DCACHE_SET_ASSOC;j++) begin : g_bank_way
// assign bank_be[k][j] = (wr_cl_req_i[j]) ? wr_cl_data_be_i[k*8 +: 8] :
// (wr_req_i [j]) ? wr_data_be_i :
// '0;
// assign bank_wdata[k][j] = (|wr_cl_req_i) ? wr_cl_data_i[k*64 +: 64] :
// wr_data_i;
// end
// lzc #(
// .WIDTH ( NUM_PORTS )
// ) i_lzc (
// .in_i ( port_bank_req[k] ),// use req signals here for better timing
// .cnt_o ( port_bank_sel[k] ),
// .empty_o ( )
// );
// assign bank_idx[k] = idx_i[port_bank_sel[k]];
// assign bank_off_d[k] = off_i[port_bank_sel[k]];
// end
// endgenerate
// // only reads and full cl writes access the tag array
// lzc #(
// .WIDTH ( NUM_PORTS )
// ) i_lzc (
// .in_i ( wr_cl_req_i | rd_req_i ),
// .cnt_o ( vld_sel_d ),
// .empty_o ( )
// );
// assign vld_addr = idx_i[vld_sel_d];
// assign tag = tag_i[vld_sel_q];// delayed by one cycle
// assign vld_req = (|wr_cl_req_i) ? wr_cl_req_i :
// (|rd_req_i) ? '1 :
// '0;
// assign vld_we = ((|bank_gnt) & ~wr_cl_data_is_nc_i) ? wr_cl_req_i : '0;
// assign vld_wdata = wr_vld_data_i;
// assign byp_en_d = |wr_cl_req_i;
// always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
// if(~rst_ni) begin
// bank_off_q <= '0;
// vld_sel_q <= '0;
// byp_en_q <= '0;
// end else begin
// bank_off_q <= bank_off_d;
// vld_sel_q <= vld_sel_d;
// byp_en_q <= byp_en_d;
// end
// end
// ///////////////////////////////////////////////////////
// // tag comparison, hit generation
// ///////////////////////////////////////////////////////
// // tag comparison of way 0
// assign rd_hit_oh_o[0] = (tag == tag_rdata[0]) & rd_vld_data_o[0];
// // use way 0 to bypass read data in case we missed on the cache or in case the req is NC
// assign bank_sel[0] = (byp_en_q) ? wr_cl_data_i[bank_off_q[DCACHE_OFFSET_WIDTH-1:3]];
// (rd_hit_oh_o[0]) ? bank_rdata[0][bank_off_q[DCACHE_OFFSET_WIDTH-1:3]]:
// ? '0 :
// generate
// for (genvar i=1;i<DCACHE_SET_ASSOC;i++) begin : g_tag_cmpsel
// // tag comparison of ways >0
// assign rd_hit_oh_o[i] = (tag == tag_rdata[i]) & rd_vld_data_o[i];
// // byte offset mux of ways >0
// assign cl_sel[i] = (rd_hit_oh_o[i] & ~byp_en_q) ? bank_rdata[i][bank_off_q[DCACHE_OFFSET_WIDTH-1:3]] : '0;
// end
// endgenerate
// // OR reduction of selected cachelines
// always_comb begin : p_reduction
// rd_data_o = cl_sel[0];
// for(int i=1; i<DCACHE_SET_ASSOC;i++)
// rd_data_o |= cl_sel[i];
// end
// generate
// if (NC_ADDR_GE_LT) begin : g_nc_addr_high
// assign rd_paddr_is_nc_o = (tag >= (NC_ADDR_BEGIN>>ICACHE_INDEX_WIDTH)) | ~cache_en_i;
// end
// if (~NC_ADDR_GE_LT) begin : g_nc_addr_low
// assign rd_paddr_is_nc_o = (tag < (NC_ADDR_BEGIN>>ICACHE_INDEX_WIDTH)) | ~cache_en_i;
// end
// endgenerate
// ///////////////////////////////////////////////////////
// // memory arrays and regs
// ///////////////////////////////////////////////////////
// logic [ICACHE_TAG_WIDTH:0] vld_tag_rdata [ICACHE_SET_ASSOC-1:0];
// generate
// for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : g_data_banks
// // Data RAM
// sram #(
// .DATA_WIDTH ( 64*DCACHE_SET_ASSOC ),
// .NUM_WORDS ( DCACHE_NUM_WORDS )
// ) data_sram (
// .clk_i ( clk_i ),
// .rst_ni ( rst_ni ),
// .req_i ( bank_req [k] ),
// .we_i ( bank_we [k] ),
// .addr_i ( bank_idx [k] ),
// .wdata_i ( bank_wdata [k] ),
// .be_i ( bank_be [k] ),
// .rdata_o ( bank_rdata [k] )
// );
// end
// for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : g_sram
// assign tag_rdata[i] = vld_tag_rdata[i][ICACHE_TAG_WIDTH-1:0];
// assign vld_rdata[i] = vld_tag_rdata[i][ICACHE_TAG_WIDTH];
// // Tag RAM
// sram #(
// // tag + valid bit
// .DATA_WIDTH ( ICACHE_TAG_WIDTH+1 ),
// .NUM_WORDS ( ICACHE_NUM_WORDS )
// ) tag_sram (
// .clk_i ( clk_i ),
// .rst_ni ( rst_ni ),
// .req_i ( vld_req[i] ),
// .we_i ( vld_we ),
// .addr_i ( vld_addr ),
// .wdata_i ( {vld_wdata[i], tag} ),
// .be_i ( '1 ),
// .rdata_o ( vld_tag_rdata[i] )
// );
// end
// endgenerate
// ///////////////////////////////////////////////////////
// // assertions
// ///////////////////////////////////////////////////////
// //pragma translate_off
// `ifndef VERILATOR
// // //needs to be hot one
// // wr_req_i
// // // hot one per bank
// // port_bank_gnt[ports][banks]
// // noncacheable0: assert property (
// // @(posedge clk_i) disable iff (~rst_ni) paddr_is_nc |-> mem_rtrn_vld_i && (mem_rtrn_i.rtype == ICACHE_IFILL_ACK) |-> mem_rtrn_i.nc)
// // else $fatal("[l1 icache] NC paddr implies nc ifill");
// // noncacheable1: assert property (
// // @(posedge clk_i) disable iff (~rst_ni) mem_rtrn_vld_i |-> mem_rtrn_i.f4b |-> mem_rtrn_i.nc)
// // else $fatal(1,"[l1 icache] 4b ifill implies NC");
// // noncacheable2: assert property (
// // @(posedge clk_i) disable iff (~rst_ni) mem_rtrn_vld_i |-> mem_rtrn_i.nc |-> mem_rtrn_i.f4b)
// // else $fatal(1,"[l1 icache] NC implies 4b ifill");
// // repl_inval0: assert property (
// // @(posedge clk_i) disable iff (~rst_ni) cache_wren |-> ~(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld))
// // else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
// // repl_inval1: assert property (
// // @(posedge clk_i) disable iff (~rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> ~cache_wren)
// // else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
// // invalid_state: assert property (
// // @(posedge clk_i) disable iff (~rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, TLB_MISS, KILL_ATRANS, KILL_MISS}))
// // else $fatal(1,"[l1 icache] fsm reached an invalid state");
// // hot1: assert property (
// // @(posedge clk_i) disable iff (~rst_ni) (~inv_en) |=> cmp_en_q |-> $onehot0(cl_hit))
// // else $fatal(1,"[l1 icache] cl_hit signal must be hot1");
// // initial begin
// // // assert wrong parameterizations
// // assert (ICACHE_INDEX_WIDTH<=12)
// // else $fatal(1,"[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages");
// // end
// `endif
// //pragma translate_on
endmodule // serpent_dcache_mem

View file

@ -379,9 +379,9 @@ module serpent_icache #(
(inv_en) ? mem_rtrn_i.inv.idx[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] :
cl_index;
assign vld_req = (flush_en | cache_rden) ? '1 :
(mem_rtrn_i.inv.all & inv_en) ? '1 :
(mem_rtrn_i.inv.vld & inv_en) ? bin2onehot(mem_rtrn_i.inv.way) :
assign vld_req = (flush_en | cache_rden) ? '1 :
(mem_rtrn_i.inv.all & inv_en) ? '1 :
(mem_rtrn_i.inv.vld & inv_en) ? icache_way_bin2oh(mem_rtrn_i.inv.way) :
repl_way_oh_q;
assign vld_wdata = (cache_wren) ? '1 : '0;
@ -393,7 +393,7 @@ module serpent_icache #(
// chose random replacement if all are valid
assign update_lfsr = cache_wren & all_ways_valid;
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign repl_way_oh_d = (cmp_en_q) ? bin2onehot(repl_way) : repl_way_oh_q;
assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q;
// enable signals for memory arrays
assign cl_req = (cache_rden) ? '1 :