Implemented serpent DCache and the corresponding TB

This commit is contained in:
Michael Schaffner 2018-09-25 13:07:32 +02:00
parent 5f188375c7
commit ff7bba23aa
No known key found for this signature in database
GPG key ID: 7AA09AE049819C2C
22 changed files with 3609 additions and 557 deletions

View file

@ -138,6 +138,17 @@ serp-icache-quest:
dependencies:
- build
serp-dcache-quest:
stage: test_serpent
script:
- cd tb/tb_serpent_dcache/
- make simc
- "grep 'CI: PASSED' RD0_summary.rep"
- "grep 'CI: PASSED' RD1_summary.rep"
- "grep 'CI: PASSED' TB_MEM_summary.rep"
dependencies:
- build
serp-torture:
stage: test_serpent
script:

View file

@ -162,16 +162,16 @@ package ariane_pkg;
// ---------------
// I$
localparam int unsigned ICACHE_INDEX_WIDTH = 12; // in bit
localparam int unsigned ICACHE_TAG_WIDTH = 44; // in bit
localparam int unsigned ICACHE_SET_ASSOC = 4;
localparam int unsigned ICACHE_LINE_WIDTH = 128; // in bit
localparam int unsigned ICACHE_INDEX_WIDTH = 12; // in bit
localparam int unsigned ICACHE_TAG_WIDTH = 44; // in bit
localparam int unsigned ICACHE_LINE_WIDTH = 128; // in bit
localparam int unsigned ICACHE_SET_ASSOC = 4;
// D$
localparam int unsigned DCACHE_INDEX_WIDTH = 12;
localparam int unsigned DCACHE_TAG_WIDTH = 44;
localparam int unsigned DCACHE_LINE_WIDTH = 128;
localparam int unsigned DCACHE_SET_ASSOC = 8;
localparam int unsigned DCACHE_INDEX_WIDTH = 12;
localparam int unsigned DCACHE_TAG_WIDTH = 44;
localparam int unsigned DCACHE_LINE_WIDTH = 128;
localparam int unsigned DCACHE_SET_ASSOC = 8;
// ---------------
// EX Stage

View file

@ -18,16 +18,16 @@
package serpent_cache_pkg;
localparam L15_SET_ASSOC = 4;
localparam L15_SET_ASSOC = 4;
// these parames need to coincide with the current L1.5 parameterization
// do not change
localparam L15_TID_WIDTH = 2;
localparam L15_TLB_CSM_WIDTH = 33;
localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC);
localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC);
localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC);
localparam L15_TID_WIDTH = 2;
localparam L15_TLB_CSM_WIDTH = 33;
localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC);
localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC);
localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC);
// FIFO depths of L15 adapter
localparam ADAPTER_REQ_FIFO_DEPTH = 2;
@ -37,31 +37,40 @@ package serpent_cache_pkg;
// Calculated parameter
localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH/8);
localparam ICACHE_NUM_WORDS = 2**(ariane_pkg::ICACHE_INDEX_WIDTH-ICACHE_OFFSET_WIDTH);
localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS);// excluding byte offset
localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8);
localparam DCACHE_NUM_WORDS = 2**(ariane_pkg::DCACHE_INDEX_WIDTH-DCACHE_OFFSET_WIDTH);
localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS);// excluding byte offset
localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH/64;
// write buffer parameterization
localparam DCACHE_WBUF_DEPTH = 8;
localparam DCACHE_MAX_TX = 4;// TODO: set to number of threads supported in
localparam DCACHE_ID_WIDTH = $clog2(DCACHE_MAX_TX);// TODO: set to number of threads supported in
localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH/8);
localparam ICACHE_NUM_WORDS = 2**(ariane_pkg::ICACHE_INDEX_WIDTH-ICACHE_OFFSET_WIDTH);
localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS);// excluding byte offset
localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8);
localparam DCACHE_NUM_WORDS = 2**(ariane_pkg::DCACHE_INDEX_WIDTH-DCACHE_OFFSET_WIDTH);
localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS);// excluding byte offset
localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH/64;
// write buffer parameterization
localparam DCACHE_WBUF_DEPTH = 8;
localparam DCACHE_MAX_TX = 4;// TODO: set to number of threads supported in
localparam DCACHE_ID_WIDTH = $clog2(DCACHE_MAX_TX);// TODO: set to number of threads supported in
typedef struct packed {
logic [ariane_pkg::DCACHE_INDEX_WIDTH+ariane_pkg::DCACHE_TAG_WIDTH-1:0] wtag;
logic [63:0] data;
logic [7:0] dirty; // byte is dirty (not yet sent to memory)
logic [7:0] valid; // byte is valid
logic [63:0] data;
logic [7:0] dirty; // byte is dirty
logic [7:0] valid; // byte is valid
logic [7:0] txblock; // byte is part of transaction in-flight
logic checked; // if cache state of this word has been checked
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] hit_oh; // valid way in the cache
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] hit_oh; // valid way in the cache
} wbuffer_t;
// TX status registers are indexed with the transaction ID
// they basically store which bytes from which buffer entry are part
// of that transaction
typedef struct packed {
logic vld;
logic [7:0] be;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] ptr;
} tx_stat_t;
// local interfaces between caches and L15 adapter
typedef enum logic [1:0] {
@ -127,7 +136,6 @@ package serpent_cache_pkg;
// taken from iop.h in openpiton
// this is a work around, need to include files properly
// to l1.5 (only marked subset is used)
typedef enum logic [4:0] {LOAD_RQ = 5'b00000, // load request
IMISS_RQ = 5'b10000, // instruction fill request
@ -165,7 +173,6 @@ package serpent_cache_pkg;
} l15_rtrntypes_t;
// l15 interface uses reg for compatibility with verilog
typedef struct packed {
l15_reqtypes_t l15_rqtype; // see below for encoding
logic l15_nc; // non-cacheable bit

View file

@ -34,8 +34,8 @@ module serpent_dcache #(
output amo_resp_t amo_ack_o,
// Request ports
input dcache_req_i_t [2:0] req_ports_i, // request ports
output dcache_req_o_t [2:0] req_ports_o, // request ports
input dcache_req_i_t [2:0] req_ports_i,
output dcache_req_o_t [2:0] req_ports_o,
input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i,
@ -45,21 +45,20 @@ module serpent_dcache #(
);
// LD unit and PTW
localparam NUM_RD_PORTS = 3;
localparam NUM_PORTS = 3;
// miss unit <-> read controllers
logic cache_en, flush_en;
// miss unit <-> memory
logic bypass_en;
logic [DCACHE_SET_ASSOC-1:0] wr_cl_vld;
logic wr_cl_vld;
logic wr_cl_nc;
logic [DCACHE_SET_ASSOC-1:0] wr_cl_we;
logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx;
logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off;
logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data;
logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be;
logic wr_cl_data_is_nc;
logic wr_cl_byp_en;
logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits;
logic [DCACHE_SET_ASSOC-1:0] wr_req;
logic wr_ack;
@ -69,34 +68,37 @@ module serpent_dcache #(
logic [7:0] wr_data_be;
// miss unit <-> controllers/wbuffer
logic [NUM_RD_PORTS-1:0] miss_req;
logic [NUM_RD_PORTS-1:0] miss_ack;
logic [NUM_RD_PORTS-1:0] miss_nc;
logic [NUM_RD_PORTS-1:0] miss_we;
logic [NUM_RD_PORTS-1:0][63:0] miss_wdata;
logic [NUM_RD_PORTS-1:0][63:0] miss_paddr;
logic [NUM_RD_PORTS-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits;
logic [NUM_RD_PORTS-1:0][2:0] miss_size;
logic [NUM_RD_PORTS-1:0][DCACHE_ID_WIDTH-1:0] miss_id;
logic miss_rtrn;
logic [DCACHE_ID_WIDTH-1:0] miss_rtrn_id;
logic [NUM_PORTS-1:0] miss_req;
logic [NUM_PORTS-1:0] miss_ack;
logic [NUM_PORTS-1:0] miss_nc;
logic [NUM_PORTS-1:0] miss_we;
logic [NUM_PORTS-1:0][63:0] miss_wdata;
logic [NUM_PORTS-1:0][63:0] miss_paddr;
logic [NUM_PORTS-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits;
logic [NUM_PORTS-1:0][2:0] miss_size;
logic [NUM_PORTS-1:0][DCACHE_ID_WIDTH-1:0] miss_wr_id;
logic [NUM_PORTS-1:0] miss_replay;
logic [NUM_PORTS-1:0] miss_rtrn_vld;
logic [DCACHE_ID_WIDTH-1:0] miss_rtrn_id;
// memory <-> read controllers/miss unit
logic [NUM_RD_PORTS:0] rd_req;
logic [NUM_RD_PORTS:0] rd_ack;
logic [NUM_RD_PORTS-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [NUM_RD_PORTS-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx;
logic [NUM_RD_PORTS-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off;
logic [63:0] rd_data;
logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits;
logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh;
logic [NUM_PORTS-1:0] rd_prio;
logic [NUM_PORTS-1:0] rd_tag_only;
logic [NUM_PORTS-1:0] rd_req;
logic [NUM_PORTS-1:0] rd_ack;
logic [NUM_PORTS-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [NUM_PORTS-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx;
logic [NUM_PORTS-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off;
logic [63:0] rd_data;
logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits;
logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh;
// miss unit <-> wbuffer
logic inval_vld;
logic [DCACHE_CL_IDX_WIDTH-1:0] inval_cl_idx;
// wbuffer <-> memory
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data;
logic [DCACHE_MAX_TX-1:0][63:0] tx_paddr;
logic [DCACHE_MAX_TX-1:0] tx_vld;
// wbuffer <-> memory
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data;
///////////////////////////////////////////////////////
@ -104,7 +106,7 @@ module serpent_dcache #(
///////////////////////////////////////////////////////
serpent_dcache_missunit #(
.NUM_PORTS(NUM_RD_PORTS)
.NUM_PORTS(NUM_PORTS)
) i_serpent_dcache_missunit (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
@ -127,21 +129,23 @@ module serpent_dcache #(
.miss_paddr_i ( miss_paddr ),
.miss_vld_bits_i ( miss_vld_bits ),
.miss_size_i ( miss_size ),
.miss_id_i ( miss_id ),
// to writebuffer
.miss_rtrn_o ( miss_rtrn ),
.miss_wr_id_i ( miss_wr_id ),
.miss_replay_o ( miss_replay ),
.miss_rtrn_vld_o ( miss_rtrn_vld ),
.miss_rtrn_id_o ( miss_rtrn_id ),
.inval_vld_o ( inval_vld ),
.inval_cl_idx_o ( inval_cl_idx ),
// from writebuffer
.tx_paddr_i ( tx_paddr ),
.tx_vld_i ( tx_vld ),
// cache memory interface
.wr_cl_vld_o ( wr_cl_vld ),
.wr_cl_nc_o ( wr_cl_nc ),
.wr_cl_we_o ( wr_cl_we ),
.wr_cl_tag_o ( wr_cl_tag ),
.wr_cl_idx_o ( wr_cl_idx ),
.wr_cl_off_o ( wr_cl_off ),
.wr_cl_data_o ( wr_cl_data ),
.wr_cl_data_be_o ( wr_cl_data_be ),
.wr_vld_bits_o ( wr_vld_bits ),
.wr_cl_byp_en_o ( wr_cl_byp_en ),
// memory interface
.mem_rtrn_vld_i ( mem_rtrn_vld_i ),
.mem_rtrn_i ( mem_rtrn_i ),
@ -156,37 +160,45 @@ module serpent_dcache #(
generate
// note: last read port is used by the write buffer
for(genvar k=0; k<NUM_RD_PORTS-1; k++) begin
for(genvar k=0; k<NUM_PORTS-1; k++) begin
// set these to high prio ports
assign rd_prio[k] = 1'b1;
serpent_dcache_ctrl #(
.NC_ADDR_BEGIN(NC_ADDR_BEGIN),
.NC_ADDR_GE_LT(NC_ADDR_GE_LT))
i_serpent_dcache_ctrl (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_en ),
.cache_en_i ( cache_en ),
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_en ),
.cache_en_i ( cache_en ),
// reqs from core
.req_port_i ( req_ports_i [k] ),
.req_port_o ( req_ports_o [k] ),
// miss interface
.miss_req_o ( miss_req [k] ),
.miss_ack_i ( miss_ack [k] ),
.miss_we_o ( miss_we [k] ),
.miss_wdata_o ( miss_wdata [k] ),
.miss_vld_bits_o ( miss_vld_bits[k] ),
.miss_paddr_o ( miss_paddr [k] ),
.miss_nc_o ( miss_nc [k] ),
.miss_size_o ( miss_size [k] ),
.miss_id_o ( miss_id [k] ),
// cache mem interface
.rd_tag_o ( rd_tag [k] ),
.rd_idx_o ( rd_idx [k] ),
.rd_off_o ( rd_off [k] ),
.rd_req_o ( rd_req [k] ),
.rd_ack_i ( rd_ack [k] ),
.rd_data_i ( rd_data ),
.rd_vld_data_i ( rd_vld_data ),
.rd_hit_oh_i ( rd_hit_oh )
.req_port_i ( req_ports_i [k] ),
.req_port_o ( req_ports_o [k] ),
// miss interface
.miss_req_o ( miss_req [k] ),
.miss_ack_i ( miss_ack [k] ),
.miss_we_o ( miss_we [k] ),
.miss_wdata_o ( miss_wdata [k] ),
.miss_vld_bits_o ( miss_vld_bits [k] ),
.miss_paddr_o ( miss_paddr [k] ),
.miss_nc_o ( miss_nc [k] ),
.miss_size_o ( miss_size [k] ),
.miss_wr_id_o ( miss_wr_id [k] ),
.miss_replay_i ( miss_replay [k] ),
.miss_rtrn_vld_i ( miss_rtrn_vld [k] ),
// used to detect readout mux collisions
.wr_cl_vld_i ( wr_cl_vld ),
// cache mem interface
.rd_tag_o ( rd_tag [k] ),
.rd_idx_o ( rd_idx [k] ),
.rd_off_o ( rd_off [k] ),
.rd_req_o ( rd_req [k] ),
.rd_tag_only_o ( rd_tag_only [k] ),
.rd_ack_i ( rd_ack [k] ),
.rd_data_i ( rd_data ),
.rd_vld_bits_i ( rd_vld_bits ),
.rd_hit_oh_i ( rd_hit_oh )
);
end
endgenerate
@ -194,91 +206,98 @@ module serpent_dcache #(
///////////////////////////////////////////////////////
// store unit controller
///////////////////////////////////////////////////////
serpent_dcache_wbuffer #(
.NUM_WORDS ( DCACHE_WBUF_DEPTH ),
.MAX_TX ( DCACHE_MAX_TX ),
.NC_ADDR_BEGIN ( NC_ADDR_BEGIN ),
.NC_ADDR_GE_LT ( NC_ADDR_GE_LT ))
i_serpent_dcache_wbuffer (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.empty_o ( wbuffer_empty_o ),
.cache_en_i ( cache_en ),
// request ports from core (store unit)
.req_port_i ( req_ports_i [2] ),
.req_port_o ( req_ports_o [2] ),
// miss unit interface
.miss_req_o ( miss_req [2] ),
.miss_ack_i ( miss_ack [2] ),
.miss_we_o ( miss_we [2] ),
.miss_wdata_o ( miss_wdata [2] ),
.miss_vld_bits_o ( miss_vld_bits[2] ),
.miss_paddr_o ( miss_paddr [2] ),
.miss_nc_o ( miss_nc [2] ),
.miss_size_o ( miss_size [2] ),
.miss_id_o ( miss_id [2] ),
.miss_rtrn_i ( miss_rtrn ),
.miss_rtrn_id_i ( miss_rtrn_id ),
// cache read interface
.rd_tag_o ( rd_tag [2] ),
.rd_idx_o ( rd_idx [2] ),
.rd_off_o ( rd_off [2] ),
.rd_req_o ( rd_req [2] ),
.rd_ack_i ( rd_ack [2] ),
.rd_data_i ( rd_data ),
.rd_vld_data_i ( rd_vld_bits ),
.rd_hit_oh_i ( rd_hit_oh ),
// incoming invalidations
.inval_vld_i ( inval_vld ),
.inval_cl_idx_i ( inval_cl_idx ),
// single word write interface
.wr_req_o ( wr_req ),
.wr_ack_i ( wr_ack ),
.wr_idx_o ( wr_idx ),
.wr_off_o ( wr_off ),
.wr_data_o ( wr_data ),
.wr_data_be ( wr_data_be ),
// write buffer forwarding
.wbuffer_data_o ( wbuffer_data )
);
// set read port to low priority
assign rd_prio[2] = 1'b0;
serpent_dcache_wbuffer #(
.NC_ADDR_BEGIN ( NC_ADDR_BEGIN ),
.NC_ADDR_GE_LT ( NC_ADDR_GE_LT ))
i_serpent_dcache_wbuffer (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.empty_o ( wbuffer_empty_o ),
.cache_en_i ( cache_en ),
// request ports from core (store unit)
.req_port_i ( req_ports_i [2] ),
.req_port_o ( req_ports_o [2] ),
// miss unit interface
.miss_req_o ( miss_req [2] ),
.miss_ack_i ( miss_ack [2] ),
.miss_we_o ( miss_we [2] ),
.miss_wdata_o ( miss_wdata [2] ),
.miss_vld_bits_o ( miss_vld_bits [2] ),
.miss_paddr_o ( miss_paddr [2] ),
.miss_nc_o ( miss_nc [2] ),
.miss_size_o ( miss_size [2] ),
.miss_wr_id_o ( miss_wr_id [2] ),
.miss_rtrn_vld_i ( miss_rtrn_vld [2] ),
.miss_rtrn_id_i ( miss_rtrn_id ),
// cache read interface
.rd_tag_o ( rd_tag [2] ),
.rd_idx_o ( rd_idx [2] ),
.rd_off_o ( rd_off [2] ),
.rd_req_o ( rd_req [2] ),
.rd_tag_only_o ( rd_tag_only [2] ),
.rd_ack_i ( rd_ack [2] ),
.rd_data_i ( rd_data ),
.rd_vld_bits_i ( rd_vld_bits ),
.rd_hit_oh_i ( rd_hit_oh ),
// incoming invalidations/cache refills
.wr_cl_vld_i ( wr_cl_vld ),
.wr_cl_idx_i ( wr_cl_idx ),
// single word write interface
.wr_req_o ( wr_req ),
.wr_ack_i ( wr_ack ),
.wr_idx_o ( wr_idx ),
.wr_off_o ( wr_off ),
.wr_data_o ( wr_data ),
.wr_data_be_o ( wr_data_be ),
// write buffer forwarding
.wbuffer_data_o ( wbuffer_data ),
.tx_paddr_o ( tx_paddr ),
.tx_vld_o ( tx_vld )
);
///////////////////////////////////////////////////////
// memory arrays, arbitration and tag comparison
///////////////////////////////////////////////////////
serpent_dcache_mem #(
.NUM_RD_PORTS(NUM_RD_PORTS)
.NUM_PORTS(NUM_PORTS)
) i_serpent_dcache_mem (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
// read ports
.rd_tag_i ( rd_tag ),
.rd_idx_i ( rd_idx ),
.rd_off_i ( rd_off ),
.rd_req_i ( rd_req ),
.rd_ack_o ( rd_ack ),
.rd_vld_bits_o ( rd_vld_bits ),
.rd_hit_oh_o ( rd_hit_oh ),
.rd_data_o ( rd_data ),
// cacheline write port
.wr_cl_vld_i ( wr_cl_vld ),
.wr_cl_tag_i ( wr_cl_tag ),
.wr_cl_idx_i ( wr_cl_idx ),
.wr_cl_off_i ( wr_cl_off ),
.wr_cl_data_i ( wr_cl_data ),
.wr_cl_data_be_i ( wr_cl_data_be ),
.wr_vld_bits_i ( wr_vld_bits ),
.wr_cl_byp_en_i ( wr_cl_byp_en ),
// single word write port
.wr_req_i ( wr_req ),
.wr_ack_o ( wr_ack ),
.wr_idx_i ( wr_idx ),
.wr_off_i ( wr_off ),
.wr_data_i ( wr_data ),
.wr_data_be_i ( wr_data_be ),
// write buffer forwarding
.wbuffer_data_i ( wbuffer_data )
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
// read ports
.rd_prio_i ( rd_prio ),
.rd_tag_i ( rd_tag ),
.rd_idx_i ( rd_idx ),
.rd_off_i ( rd_off ),
.rd_req_i ( rd_req ),
.rd_tag_only_i ( rd_tag_only ),
.rd_ack_o ( rd_ack ),
.rd_vld_bits_o ( rd_vld_bits ),
.rd_hit_oh_o ( rd_hit_oh ),
.rd_data_o ( rd_data ),
// cacheline write port
.wr_cl_vld_i ( wr_cl_vld ),
.wr_cl_nc_i ( wr_cl_nc ),
.wr_cl_we_i ( wr_cl_we ),
.wr_cl_tag_i ( wr_cl_tag ),
.wr_cl_idx_i ( wr_cl_idx ),
.wr_cl_off_i ( wr_cl_off ),
.wr_cl_data_i ( wr_cl_data ),
.wr_cl_data_be_i ( wr_cl_data_be ),
.wr_vld_bits_i ( wr_vld_bits ),
// single word write port
.wr_req_i ( wr_req ),
.wr_ack_o ( wr_ack ),
.wr_idx_i ( wr_idx ),
.wr_off_i ( wr_off ),
.wr_data_i ( wr_data ),
.wr_data_be_i ( wr_data_be ),
// write buffer forwarding
.wbuffer_data_i ( wbuffer_data )
);
///////////////////////////////////////////////////////
@ -290,57 +309,16 @@ serpent_dcache_wbuffer #(
//pragma translate_off
`ifndef VERILATOR
flush: assert property (
@(posedge clk_i) disable iff (~rst_ni) flush_i |-> flush_ack_o |-> wbuffer_empty_o)
else $fatal(1,"[l1 dcache] flushed cache implies flushed wbuffer");
// //needs to be hot one
// wr_req_i
// // hot one per bank
// port_bank_gnt[ports][banks]
// hot1: assert property (
// @(posedge clk_i) disable iff (~rst_ni) &vld_req |-> ~vld_we |=> $onehot0(rd_hit_oh_o))
// else $fatal(1,"[l1 dcache] rd_hit_oh_o signal must be hot1");
// noncacheable0: assert property (
// @(posedge clk_i) disable iff (~rst_ni) paddr_is_nc |-> mem_rtrn_vld_i && (mem_rtrn_i.rtype == DCACHE_IFILL_ACK) |-> mem_rtrn_i.nc)
// else $fatal("[l1 icache] NC paddr implies nc ifill");
// noncacheable1: assert property (
// @(posedge clk_i) disable iff (~rst_ni) mem_rtrn_vld_i |-> mem_rtrn_i.f4b |-> mem_rtrn_i.nc)
// else $fatal(1,"[l1 icache] 4b ifill implies NC");
// noncacheable2: assert property (
// @(posedge clk_i) disable iff (~rst_ni) mem_rtrn_vld_i |-> mem_rtrn_i.nc |-> mem_rtrn_i.f4b)
// else $fatal(1,"[l1 icache] NC implies 4b ifill");
// repl_inval0: assert property (
// @(posedge clk_i) disable iff (~rst_ni) cache_wren |-> ~(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld))
// else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
// repl_inval1: assert property (
// @(posedge clk_i) disable iff (~rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> ~cache_wren)
// else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
// invalid_state: assert property (
// @(posedge clk_i) disable iff (~rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, TLB_MISS, KILL_ATRANS, KILL_MISS}))
// else $fatal(1,"[l1 icache] fsm reached an invalid state");
// hot1: assert property (
// @(posedge clk_i) disable iff (~rst_ni) (~inv_en) |=> cmp_en_q |-> $onehot0(cl_hit))
// else $fatal(1,"[l1 icache] cl_hit signal must be hot1");
// initial begin
// // assert wrong parameterizations
// assert (DCACHE_INDEX_WIDTH<=12)
// else $fatal(1,"[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages");
// end
initial begin
// assert wrong parameterizations
assert (DCACHE_INDEX_WIDTH<=12)
else $fatal(1,"[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages");
end
`endif
//pragma translate_on
// `ifndef SYNTHESIS
// initial begin
// assert ($bits(data_if.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus");
// assert (DCACHE_LINE_WIDTH/64 inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of 64");
// end
// `endif
endmodule // serpent_dcache

View file

@ -29,65 +29,78 @@ module serpent_dcache_ctrl #(
output dcache_req_o_t req_port_o,
// interface to miss handler
output logic miss_req_o,
input logic miss_ack_i, // asserted in the same cycle as when the data returns from memory
input logic miss_ack_i,
output logic miss_we_o, // unused (set to 0)
output logic [63:0] miss_wdata_o, // unused (set to 0)
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index
output logic [63:0] miss_paddr_o,
output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline
output logic [DCACHE_ID_WIDTH-1:0] miss_id_o, // unused (set to 0)
output logic [DCACHE_ID_WIDTH-1:0] miss_wr_id_o, // unused (set to 0)
input logic miss_replay_i, // request collided with pending miss - have to replay the request
input logic miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory
// used to detect readout mux collisions
input logic wr_cl_vld_i,
// cache memory interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to zero here
input logic rd_ack_i,
input logic [63:0] rd_data_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_data_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i
input logic [63:0] rd_data_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i
);
// cpmtroller FSM
typedef enum logic[1:0] {IDLE, READ, MISS, KILL_MISS} state_t;
// controller FSM
typedef enum logic[2:0] {IDLE, READ, MISS_REQ, MISS_WAIT, KILL_MISS, REPLAY_REQ, REPLAY_READ} state_t;
state_t state_d, state_q;
logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q;
logic [DCACHE_CL_IDX_WIDTH-1:0] address_idx_d, address_idx_q;
logic [DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q;
logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q;
logic save_tag;
logic save_tag, rd_req_d, rd_req_q, rd_ack_d, rd_ack_q;
logic [1:0] data_size_d, data_size_q;
///////////////////////////////////////////////////////
// misc
///////////////////////////////////////////////////////
// map address to tag/idx/offset and save
assign vld_data_d = (save_tag) ? rd_vld_data_i : vld_data_q;
assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q;
assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q;
assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : address_idx_q;
assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_OFFSET_WIDTH-1:0] : address_off_q;
assign tag_o = address_tag_d;
assign off_o = address_off_d;
assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q;
assign rd_tag_o = address_tag_d;
assign rd_idx_o = address_idx_d;
assign rd_off_o = address_off_d;
assign req_port_o.data_rdata = rd_data_i;
// to miss unit
assign miss_vld_bits_o = vld_data_d;
assign miss_paddr_o = {address_tag_d, address_idx_q, address_off_q};
assign miss_size_o = (miss_nc_o) ? req_port_i.data_size : 3'b111;
assign miss_vld_bits_o = vld_data_q;
assign miss_paddr_o = {address_tag_q, address_idx_q, address_off_q};
assign miss_size_o = (miss_nc_o) ? data_size_q : 3'b111;
generate
if (NC_ADDR_GE_LT) begin : g_nc_addr_high
assign miss_nc_o = (address_tag_d >= (NC_ADDR_BEGIN>>DCACHE_INDEX_WIDTH)) | ~cache_en_i;
assign miss_nc_o = (address_tag_q >= (NC_ADDR_BEGIN>>DCACHE_INDEX_WIDTH)) | ~cache_en_i;
end
if (~NC_ADDR_GE_LT) begin : g_nc_addr_low
assign miss_nc_o = (address_tag_d < (NC_ADDR_BEGIN>>DCACHE_INDEX_WIDTH)) | ~cache_en_i;
assign miss_nc_o = (address_tag_q < (NC_ADDR_BEGIN>>DCACHE_INDEX_WIDTH)) | ~cache_en_i;
end
endgenerate
assign miss_we_o = '0;
assign miss_wdata_o = '0;
assign miss_id_o = '0;
assign miss_wr_id_o = '0;
assign rd_req_d = rd_req_o;
assign rd_ack_d = rd_ack_i;
assign rd_tag_only_o = '0;
///////////////////////////////////////////////////////
// main control logic
///////////////////////////////////////////////////////
@ -122,51 +135,81 @@ module serpent_dcache_ctrl #(
// or in case the address is NC, we
// reuse the miss mechanism to handle
// the request
READ: begin
READ, REPLAY_READ: begin
// speculatively request cache line
rd_req_o = 1'b1;
save_tag = 1'b1;
// flush or kill -> go back to IDLE
if(flush_i || req_port_i.kill_req) begin
state_d = IDLE;
end else if(req_port_i.tag_valid) begin
end else if(req_port_i.tag_valid | state_q==REPLAY_READ) begin
save_tag = (state_q!=REPLAY_READ);
if(wr_cl_vld_i | ~rd_ack_q) begin
state_d = REPLAY_REQ;
// we've got a hit
if((|rd_hit_oh_i) & cache_en_i) begin
end else if((|rd_hit_oh_i) & cache_en_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
// we can handle another request
if (rd_ack_i) begin
if (rd_ack_i && req_port_i.data_req) begin
state_d = READ;
req_port_o.data_gnt = 1'b1;
end
// we've got a miss
end else begin
miss_req_o = 1'b1;
state_d = MISS;
state_d = MISS_REQ;
end
end
end
//////////////////////////////////
// wait until the memory transaction
// returns.
MISS: begin
// issue request
MISS_REQ: begin
miss_req_o = 1'b1;
if (flush_i || req_port_i.kill_req) begin
if(miss_ack_i) begin
state_d = KILL_MISS;
end else begin
state_d = IDLE;
end
end else if(miss_replay_i) begin
state_d = REPLAY_REQ;
end else if(miss_ack_i) begin
state_d = MISS_WAIT;
end
end
//////////////////////////////////
// wait until the memory transaction
// returns.
MISS_WAIT: begin
if(flush_i || req_port_i.kill_req) begin
state_d = KILL_MISS;
end else if(miss_ack_i) begin
if(miss_rtrn_vld_i) begin
state_d = IDLE;
end else begin
state_d = KILL_MISS;
end
end else if(miss_rtrn_vld_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end
end
//////////////////////////////////
// replay read request
REPLAY_REQ: begin
rd_req_o = 1'b1;
if (flush_i || req_port_i.kill_req) begin
state_d = IDLE;
end else if(rd_ack_i) begin
state_d = REPLAY_READ;
end
end
//////////////////////////////////
// killed miss,
// wait until miss unit responds and
// go back to idle
KILL_MISS: begin
if (miss_ack_i) begin
state_d = IDLE;
if (miss_rtrn_vld_i) begin
state_d = IDLE;
end
end
default: begin
@ -182,15 +225,23 @@ module serpent_dcache_ctrl #(
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(~rst_ni) begin
state_q <= IDLE;
address_tag_q <= '0;
address_idx_q <= '0;
address_off_q <= '0;
state_q <= IDLE;
address_tag_q <= '0;
address_idx_q <= '0;
address_off_q <= '0;
vld_data_q <= '0;
data_size_q <= '0;
rd_req_q <= '0;
rd_ack_q <= '0;
end else begin
state_q <= state_d;
address_tag_q <= address_tag_d;
address_idx_q <= address_idx_d;
address_off_q <= address_off_d;
state_q <= state_d;
address_tag_q <= address_tag_d;
address_idx_q <= address_idx_d;
address_off_q <= address_off_d;
vld_data_q <= vld_data_d;
data_size_q <= data_size_d;
rd_req_q <= rd_req_d;
rd_ack_q <= rd_ack_d;
end
end

View file

@ -22,36 +22,40 @@
// these single word writes can interleave with read operations if they go to different
// cacheline offsets, since each word offset is placed into a different SRAM bank.
//
// 4) Access priority is port 0 > port 1 > port 2... > single word write port
// 4) Read ports with same priority are RR arbited. but high prio ports (rd_prio_i[port_nr] = '1b1) will stall
// low prio ports (rd_prio_i[port_nr] = '1b0)
import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_dcache_mem #(
parameter int unsigned NUM_RD_PORTS = 3
parameter int unsigned NUM_PORTS = 3
)(
input logic clk_i,
input logic rst_ni,
// ports
input logic [NUM_RD_PORTS-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later
input logic [NUM_RD_PORTS-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i,
input logic [NUM_RD_PORTS-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i,
input logic [NUM_RD_PORTS-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways
output logic [NUM_RD_PORTS-1:0] rd_ack_o,
input logic [NUM_PORTS-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later
input logic [NUM_PORTS-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i,
input logic [NUM_PORTS-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i,
input logic [NUM_PORTS-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways
input logic [NUM_PORTS-1:0] rd_tag_only_i, // only do a tag/valid lookup, no access to data arrays
input logic [NUM_PORTS-1:0] rd_prio_i, // 0: low prio, 1: high prio
output logic [NUM_PORTS-1:0] rd_ack_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o,
output logic [63:0] rd_data_o,
// only available on port 0, uses address signals of port 0
input logic [DCACHE_SET_ASSOC-1:0] wr_cl_vld_i, // writes a full cacheline
input logic wr_cl_vld_i,
input logic wr_cl_nc_i, // noncacheable access
input logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline
input logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
input logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i,
input logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_i,
input logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i,
input logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_i,
input logic wr_cl_byp_en_i,
// separate port for single word write, no tag access
input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3]
@ -77,22 +81,23 @@ module serpent_dcache_mem #(
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][63:0] bank_rdata; //
logic [DCACHE_SET_ASSOC-1:0][63:0] rdata_cl; // selected word from each cacheline
logic [DCACHE_TAG_WIDTH-1:0] tag;
logic [DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable
logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem
logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
logic [NUM_RD_PORTS-1:0][$clog2(DCACHE_NUM_BANKS)-1:0] vld_sel_d, vld_sel_q;
logic [$clog2(NUM_PORTS)-1:0] vld_sel_d, vld_sel_q;
logic [DCACHE_SET_ASSOC-1:0] wr_idx_oh;
logic [DCACHE_SET_ASSOC-1:0][63:0] rdata_sel;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh;
logic [7:0] wbuffer_be;
logic [63:0] wbuffer_rdata, rdata;
logic [63:0] wbuffer_cmp_addr;
logic cmp_en_d, cmp_en_q;
logic rd_acked;
logic [NUM_PORTS-1:0] bank_collision, rd_req_masked, rd_req_prio;
///////////////////////////////////////////////////////
// arbiter
@ -109,139 +114,138 @@ module serpent_dcache_mem #(
generate
for (genvar k=0;k<DCACHE_NUM_BANKS;k++) begin : g_bank
for (genvar j=0;j<DCACHE_SET_ASSOC;j++) begin : g_bank_way
assign bank_be[k][j] = (wr_cl_vld_i[j]) ? wr_cl_data_be_i[k*8 +: 8] :
(wr_ack_o) ? wr_data_be_i :
'0;
assign bank_be[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*8 +: 8] :
(wr_req_i[j] & wr_ack_o) ? wr_data_be_i :
'0;
assign bank_wdata[k][j] = (|wr_cl_vld_i) ? wr_cl_data_i[k*64 +: 64] :
wr_data_i;
assign bank_wdata[k][j] = (wr_cl_vld_i) ? wr_cl_data_i[k*64 +: 64] :
wr_data_i;
end
end
endgenerate
assign vld_wdata = wr_vld_bits_i;
assign vld_addr = (|wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign tag = (|wr_cl_vld_i) ? wr_cl_tag_i : rd_tag_i[vld_sel_q];// delayed by one cycle
assign idx = (|wr_cl_vld_i) ? wr_cl_tag_i : rd_tag_i[vld_sel_q];// delayed by one cycle
assign bank_off_d = (|wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d];
assign wr_idx_oh = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:3]);
assign vld_addr = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign rd_tag = rd_tag_i[vld_sel_q]; //delayed by one cycle
assign bank_off_d = (wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d];
assign bank_idx_d = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign vld_req = (wr_cl_vld_i) ? wr_cl_we_i : (rd_acked) ? '1 : '0;
// priority masking
// disable low prio requests when any of the high prio reqs is present
assign rd_req_prio = rd_req_i & rd_prio_i;
assign rd_req_masked = (|rd_req_prio) ? rd_req_prio : rd_req_i;
// priority arbiting for each bank separately
// full cl writes always block the cache
// tag readouts / cl readouts may interleave with single word writes in case
// there is no conflict between the indexes
always_comb begin : p_prio_arb
// interface
wr_ack_o = 1'b0;
rd_ack_o = '0;
// mem arrays
bank_req = '0;
bank_we = '0;
vld_req = '0;
vld_we = 1'b0;
vld_sel_d = 0;
bank_idx = '{default:wr_cl_idx_i};
bank_idx_d = wr_cl_idx_i;
if(|wr_cl_vld_i) begin
bank_req = '1;
bank_we = '1;
vld_req = wr_cl_vld_i;
vld_we = 1'b1;
end else begin
// loop over ports
for (int k=0;k<NUM_RD_PORTS;k++) begin
if(rd_req_i[k]) begin
rd_ack_o[k] = 1'b1;
vld_req = 1'b1;
vld_sel_d = k;
bank_req = dcache_cl_bin2oh(rd_off_i[k][DCACHE_OFFSET_WIDTH-1:3]);
bank_idx = '{default:rd_idx_i[k]};
bank_idx_d = rd_idx_i[k];
break;
end
end
// check whether we can interleave a single word write
if(~(|(bank_req & wr_idx_oh))) begin
if(|wr_req_i) begin
wr_ack_o = 1'b1;
bank_req |= wr_idx_oh;
bank_we = wr_idx_oh;
bank_idx[wr_off_i[DCACHE_OFFSET_WIDTH-1:3]] = wr_idx_i;
end
end
end
end
///////////////////////////////////////////////////////
// tag comparison, hit generation
///////////////////////////////////////////////////////
logic [DCACHE_WBUF_DEPTH-1:0][7:0] wbuffer_bvalid;
logic [DCACHE_WBUF_DEPTH-1:0][63:0] wbuffer_data;
// word tag comparison in write buffer
assign wbuffer_cmp_addr = (wr_cl_byp_en_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} :
{tag, bank_idx_q, bank_off_q};
// tag comparison of way 0
assign rd_hit_oh_o[0] = (tag == tag_rdata[0]) & rd_vld_bits_o[0];
// use way 0 to bypass read data in case we missed on the cache or in case the req is NC
assign rdata_cl[0] = (wr_cl_byp_en_i) ? wr_cl_data_i[wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3]] :
(rd_hit_oh_o[0]) ? bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:3]][0] :
'0 ;
// read port arbiter
rrarbiter #(
.NUM_REQ(NUM_PORTS)
) i_rrarbiter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i( 1'b0 ),
.en_i ( ~wr_cl_vld_i ),
.req_i ( rd_req_masked ),
.ack_o ( rd_ack_o ),
.vld_o ( rd_acked ),
.idx_o ( vld_sel_d )
);
generate
for (genvar i=1;i<DCACHE_SET_ASSOC;i++) begin : g_tag_cmpsel
// tag comparison of ways >0
assign rd_hit_oh_o[i] = (tag == tag_rdata[i]) & rd_vld_bits_o[i];
// byte offset mux of ways >0
assign rdata_cl[i] = (rd_hit_oh_o[i] & ~wr_cl_byp_en_i) ? bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:3]][i] : '0;
end
always_comb begin : p_bank_req
vld_we = wr_cl_vld_i;
bank_req = '0;
wr_ack_o = '0;
bank_we = '0;
bank_idx = '{default:wr_idx_i};
for(genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin
for(genvar j=0; j<8; j++) begin
assign wbuffer_bvalid[k][j] = wbuffer_data_i[k].valid[j];
end
assign wbuffer_data[k] = wbuffer_data_i[k].data;
assign wbuffer_hit_oh[k] = (|wbuffer_bvalid[k]) & (wbuffer_data_i[k].wtag == (wbuffer_cmp_addr >> 3));
for(int k=0; k<NUM_PORTS; k++) begin
bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:3] == wr_off_i[DCACHE_OFFSET_WIDTH-1:3];
end
// overlay bytes that hit in the write buffer
for(genvar k=0; k<8; k++) begin
assign rd_data_o[8*k +: 8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k +: 8] : rdata[8*k +: 8];
if(wr_cl_vld_i) begin
bank_req = '1;
bank_we = '1;
bank_idx = '{default:wr_cl_idx_i};
end else begin
if(rd_acked) begin
if(~rd_tag_only_i[vld_sel_d]) begin
bank_req = dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:3]);
bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:3]] = rd_idx_i[vld_sel_d];
end
end
if(|wr_req_i) begin
if(rd_tag_only_i[vld_sel_d] | ~(rd_ack_o[vld_sel_d] & bank_collision[vld_sel_d])) begin
wr_ack_o = 1'b1;
bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:3]);
bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:3]);
end
end
end
end
///////////////////////////////////////////////////////
// tag comparison, hit generatio, readoud muxes
///////////////////////////////////////////////////////
logic [DCACHE_WBUF_DEPTH-1:0][7:0] wbuffer_bvalid;
logic [DCACHE_WBUF_DEPTH-1:0][63:0] wbuffer_data;
logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx;
assign cmp_en_d = (|vld_req) & ~vld_we;
// word tag comparison in write buffer
assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} :
{rd_tag, bank_idx_q, bank_off_q};
// hit generation
generate
for (genvar i=0;i<DCACHE_SET_ASSOC;i++) begin : g_tag_cmpsel
// tag comparison of ways >0
assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q;
// byte offset mux of ways >0
assign rdata_cl[i] = bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:3]][i];
end
for(genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin : g_wbuffer_hit
assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & (wbuffer_data_i[k].wtag == (wbuffer_cmp_addr >> 3));
end
endgenerate
// OR reduction of writebuffer byte enables
always_comb begin : p_wbuf_be_red
for(int j=0; j<8;j++) begin
wbuffer_be[j] = (wbuffer_hit_oh[0]) ? wbuffer_bvalid[0][j] : '0;
for(int k=1; k<DCACHE_WBUF_DEPTH;k++)
wbuffer_be[j] |= (wbuffer_hit_oh[k]) ? wbuffer_bvalid[k][j] : '0;
lzc #(
.WIDTH ( DCACHE_WBUF_DEPTH )
) i_lzc_wbuffer_hit (
.in_i ( wbuffer_hit_oh ),
.cnt_o ( wbuffer_hit_idx ),
.empty_o ( )
);
lzc #(
.WIDTH ( DCACHE_SET_ASSOC )
) i_lzc_rd_hit (
.in_i ( rd_hit_oh_o ),
.cnt_o ( rd_hit_idx ),
.empty_o ( )
);
assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data;
assign wbuffer_be = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0;
assign wr_cl_off = (wr_cl_nc_i) ? '0 : wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3];
assign rdata = (wr_cl_vld_i) ? wr_cl_data_i[wr_cl_off*64 +: 64] :
rdata_cl[rd_hit_idx];
// overlay bytes that hit in the write buffer
generate
for(genvar k=0; k<8; k++) begin : g_rd_data
assign rd_data_o[8*k +: 8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k +: 8] : rdata[8*k +: 8];
end
end
endgenerate
// OR reduction of writebuffer data
always_comb begin : p_wbuf_data_red
wbuffer_rdata = (wbuffer_hit_oh[0]) ? wbuffer_data[0] : '0;
for(int k=1; k<DCACHE_WBUF_DEPTH;k++)
wbuffer_rdata |= (wbuffer_hit_oh[k]) ? wbuffer_data[k] : '0;
end
// OR reduction of selected cachelines
always_comb begin : p_data_red
rdata = rdata_cl[0];
for(int k=1; k<DCACHE_SET_ASSOC;k++)
rdata |= rdata_cl[k];
end
///////////////////////////////////////////////////////
// memory arrays and regs
@ -255,7 +259,7 @@ module serpent_dcache_mem #(
sram #(
.DATA_WIDTH ( 64*DCACHE_SET_ASSOC ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) data_sram (
) i_data_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( bank_req [k] ),
@ -267,7 +271,7 @@ module serpent_dcache_mem #(
);
end
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : g_sram
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : g_tag_srams
assign tag_rdata[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH-1:0];
assign rd_vld_bits_o[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH];
@ -277,13 +281,13 @@ module serpent_dcache_mem #(
// tag + valid bit
.DATA_WIDTH ( DCACHE_TAG_WIDTH+1 ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) tag_sram (
) i_tag_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( vld_req[i] ),
.we_i ( vld_we ),
.addr_i ( vld_addr ),
.wdata_i ( {vld_wdata[i], tag} ),
.wdata_i ( {vld_wdata[i], wr_cl_tag_i} ),
.be_i ( '1 ),
.rdata_o ( vld_tag_rdata[i] )
);
@ -296,10 +300,12 @@ module serpent_dcache_mem #(
bank_idx_q <= '0;
bank_off_q <= '0;
vld_sel_q <= '0;
cmp_en_q <= '0;
end else begin
bank_idx_q <= bank_idx_d;
bank_off_q <= bank_off_d;
vld_sel_q <= vld_sel_d ;
cmp_en_q <= cmp_en_d;
end
end
@ -309,29 +315,61 @@ module serpent_dcache_mem #(
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
//pragma translate_off
`ifndef VERILATOR
hit_hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) &vld_req |-> ~vld_we |=> $onehot0(rd_hit_oh_o))
else $fatal(1,"[l1 dcache] rd_hit_oh_o signal must be hot1");
hit_hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) &vld_req |-> ~vld_we |=> $onehot0(rd_hit_oh_o))
else $fatal(1,"[l1 dcache] rd_hit_oh_o signal must be hot1");
word_write_hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) wr_ack_o |-> $onehot0(wr_req_i))
else $fatal(1,"[l1 dcache] wr_req_i signal must be hot1");
word_write_hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) wr_ack_o |-> $onehot0(wr_req_i))
else $fatal(1,"[l1 dcache] wr_req_i signal must be hot1");
wbuffer_hit_hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) &vld_req |-> ~vld_we |=> $onehot0(wbuffer_hit_oh))
else $fatal(1,"[l1 dcache] wbuffer_hit_oh signal must be hot1");
wbuffer_hit_hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) &vld_req |-> ~vld_we |=> $onehot0(wbuffer_hit_oh))
else $fatal(1,"[l1 dcache] wbuffer_hit_oh signal must be hot1");
// this is only used for verification!
logic vld_mirror[DCACHE_NUM_WORDS-1:0][DCACHE_SET_ASSOC-1:0];
logic [DCACHE_TAG_WIDTH-1:0] tag_mirror[DCACHE_NUM_WORDS-1:0][DCACHE_SET_ASSOC-1:0];
logic [DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
if(~rst_ni) begin
vld_mirror <= '{default:'0};
tag_mirror <= '{default:'0};
end else begin
for (int i = 0; i < DCACHE_SET_ASSOC; i++) begin
if(vld_req[i] & vld_we) begin
vld_mirror[vld_addr][i] <= vld_wdata[i];
tag_mirror[vld_addr][i] <= wr_cl_tag_i;
end
end
end
end
// initial begin
// // assert wrong parameterizations
// assert (DCACHE_INDEX_WIDTH<=12)
// else $fatal(1,"[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages");
// end
`endif
//pragma translate_on
generate
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata);
end
endgenerate
tag_write_duplicate: assert property (
@(posedge clk_i) disable iff (~rst_ni) |vld_req |-> vld_we |-> ~(|tag_write_duplicate_test))
else $fatal(1,"[l1 dcache] cannot allocate a CL that is already present in the cache");
// logic tst;
// always_comb begin : p_test
// tst = tag == 44'h13;
// // for (int k=0; k<DCACHE_SET_ASSOC;k++) begin
// // tst |= tag_rdata[k] == 44'h96;
// // end
// tst &= bank_idx_d == 64'h0C;
// tst &= |wr_cl_we_i;
// end
`endif
//pragma translate_on
endmodule // serpent_dcache_mem

View file

@ -0,0 +1,480 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: miss controller for serpent dcache. Note that the current assumption
// is that the port with the highest index issues writes instead of reads.
import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_dcache_missunit #(
parameter NUM_PORTS = 3
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// cache management, signals from/to core
input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
// local cache management signals
input logic wbuffer_empty_i,
output logic cache_en_o, // local cache enable signal
output logic flush_en_o, // local flush enable signal
// AMO interface
input amo_req_t amo_req_i,
output amo_resp_t amo_ack_o,
// miss handling interface (ld, ptw, wbuffer)
input logic [NUM_PORTS-1:0] miss_req_i,
output logic [NUM_PORTS-1:0] miss_ack_o,
input logic [NUM_PORTS-1:0] miss_nc_i,
input logic [NUM_PORTS-1:0] miss_we_i,
input logic [NUM_PORTS-1:0][63:0] miss_wdata_i,
input logic [NUM_PORTS-1:0][63:0] miss_paddr_i,
input logic [NUM_PORTS-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i,
input logic [NUM_PORTS-1:0][2:0] miss_size_i,
input logic [NUM_PORTS-1:0][DCACHE_ID_WIDTH-1:0] miss_wr_id_i, // only used for writes, set to zero fro reads
// signals that the request collided with a pending read
output logic [NUM_PORTS-1:0] miss_replay_o,
// signals response from memory
output logic [NUM_PORTS-1:0] miss_rtrn_vld_o,
output logic [DCACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads
// from writebuffer
input logic [DCACHE_MAX_TX-1:0][63:0] tx_paddr_i, // used to check for address collisions with read operations
input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations
// write interface to cache memory
output logic wr_cl_vld_o, // writes a full cacheline
output logic wr_cl_nc_o, // writes a full cacheline
output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline
output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o,
output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o,
output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o,
output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o,
// memory interface
input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output dcache_req_t mem_data_o
);
// controller FSM
typedef enum logic[2:0] {IDLE, DRAIN, AMO, AMO_WAIT, FLUSH} state_t;
state_t state_d, state_q;
// MSHR for reads
typedef struct packed {
logic [63:0] paddr ;
logic [2:0] size ;
logic [DCACHE_SET_ASSOC-1:0] vld_bits;
logic [DCACHE_ID_WIDTH-1:0] id ;
logic nc ;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way;
logic [$clog2(NUM_PORTS)-1:0] miss_port_idx;
} mshr_t;
mshr_t mshr_d, mshr_q;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way, inv_way, rnd_way;
logic mshr_vld_d, mshr_vld_q, mshr_vld_q1;
logic mshr_allocate;
logic update_lfsr, all_ways_valid;
logic enable_d, enable_q;
logic flush_ack_d, flush_ack_q;
logic flush_en, flush_done;
logic amo_sel, mask_reads, miss_is_write;
logic [63:0] amo_data;
logic [$clog2(NUM_PORTS)-1:0] miss_port_idx;
logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q;
logic [NUM_PORTS-1:0] miss_req;
logic inv_vld, inv_vld_all, cl_write_en;
logic load_ack, store_ack, amo_ack;
logic [NUM_PORTS-1:0] mshr_rdrd_collision_d, mshr_rdrd_collision_q;
logic [NUM_PORTS-1:0] mshr_rdrd_collision;
logic tx_rdwr_collision, mshr_rdwr_collision;
///////////////////////////////////////////////////////
// input arbitration and general control sigs
///////////////////////////////////////////////////////
assign cache_en_o = enable_q;
assign cnt_d = (flush_en) ? cnt_q + 1 : '0;
assign flush_done = (cnt_q == DCACHE_NUM_WORDS-1);
assign miss_req = (mask_reads) ? miss_we_i & miss_req_i : miss_req_i;
assign miss_is_write = miss_we_i[miss_port_idx];
// determine which port to serve (lower indices have higher prio)
lzc #(
.WIDTH ( NUM_PORTS )
) i_port (
.in_i ( miss_req ),
.cnt_o ( miss_port_idx ),
.empty_o ( )
);
///////////////////////////////////////////////////////
// MSHR and way replacement logic (only for read ops)
///////////////////////////////////////////////////////
// find invalid cache line
lzc #(
.WIDTH ( DCACHE_SET_ASSOC )
) i_lzc (
.in_i ( ~miss_vld_bits_i[miss_port_idx] ),
.cnt_o ( inv_way ),
.empty_o ( all_ways_valid )
);
// generate random cacheline index
lfsr_8bit #(
.WIDTH ( DCACHE_SET_ASSOC )
) i_lfsr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( update_lfsr ),
.refill_way_oh ( ),
.refill_way_bin ( rnd_way )
);
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign mshr_d.size = (mshr_allocate) ? miss_size_i [miss_port_idx] : mshr_q.size;
assign mshr_d.paddr = (mshr_allocate) ? miss_paddr_i [miss_port_idx] : mshr_q.paddr;
assign mshr_d.vld_bits = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits;
assign mshr_d.id = (mshr_allocate) ? miss_wr_id_i [miss_port_idx] : mshr_q.id;
assign mshr_d.nc = (mshr_allocate) ? miss_nc_i [miss_port_idx] : mshr_q.nc;
assign mshr_d.repl_way = (mshr_allocate) ? repl_way : mshr_q.repl_way;
assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx;
// currently we only have one outstanding read TX, hence an incoming load clears the MSHR
assign mshr_vld_d = (mshr_allocate) ? 1'b1 :
(load_ack) ? 1'b0 :
mshr_vld_q;
assign miss_o = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0;
generate
for(genvar k=0; k<NUM_PORTS; k++) begin
assign mshr_rdrd_collision[k] = (mshr_q.paddr[63:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][63:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1);
assign mshr_rdrd_collision_d[k] = (~miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k];
end
endgenerate
// read/write collision, stalls the corresponding request
// write collides with MSHR
assign mshr_rdwr_collision = (mshr_q.paddr[63:DCACHE_OFFSET_WIDTH] == miss_paddr_i[NUM_PORTS-1][63:DCACHE_OFFSET_WIDTH]) && mshr_vld_q;
// read collides with inflight TX
always_comb begin : p_tx_coll
tx_rdwr_collision = 1'b0;
for(int k=0; k<DCACHE_MAX_TX; k++) begin
tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][63:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][63:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k];
end
end
///////////////////////////////////////////////////////
// to memory
///////////////////////////////////////////////////////
// if size = 32bit word, select appropriate offset, replicate for openpiton...
assign amo_data = (amo_req_i.size==2'b10) ? {amo_req_i.operand_b[amo_req_i.operand_a[2]*32 +: 32],
amo_req_i.operand_b[amo_req_i.operand_a[2]*32 +: 32]} :
amo_req_i.operand_b;
// always sign extend 32bit values
assign amo_ack_o.result = (amo_req_i.size==2'b10) ? $signed(mem_rtrn_i.data[amo_req_i.operand_a[2]*32 +: 32]) :
mem_rtrn_i.data[63:0];
// outgoing memory requests
assign mem_data_o.tid = (amo_sel) ? '0 : miss_wr_id_i[miss_port_idx];
assign mem_data_o.nc = (amo_sel) ? 1'b0 : miss_nc_i[miss_port_idx];
assign mem_data_o.way = (amo_sel) ? '0 : repl_way;
assign mem_data_o.data = (amo_sel) ? amo_data : miss_wdata_i[miss_port_idx];
assign mem_data_o.size = (amo_sel) ? amo_req_i.size : miss_size_i [miss_port_idx];
assign mem_data_o.amo_op = (amo_sel) ? amo_req_i.amo_op : AMO_NONE;
// align address depending on transfer size
always_comb begin : p_align
mem_data_o.paddr = (amo_sel) ? amo_req_i.operand_a : miss_paddr_i[miss_port_idx];
unique case (mem_data_o.size)
3'b001: mem_data_o.paddr[0:0] = '0;
3'b010: mem_data_o.paddr[1:0] = '0;
3'b011: mem_data_o.paddr[2:0] = '0;
3'b111: mem_data_o.paddr[DCACHE_OFFSET_WIDTH-1:0] = '0;
default: ;
endcase
end
///////////////////////////////////////////////////////
// responses from memory
///////////////////////////////////////////////////////
// incoming responses
always_comb begin : p_rtrn_logic
load_ack = 1'b0;
store_ack = 1'b0;
amo_ack = 1'b0;
inv_vld = 1'b0;
inv_vld_all = 1'b0;
miss_rtrn_vld_o ='0;
if(mem_rtrn_vld_i) begin
unique case (mem_rtrn_i.rtype)
DCACHE_LOAD_ACK: begin
load_ack = 1'b1;
miss_rtrn_vld_o[mshr_q.miss_port_idx] = 1'b1;
end
DCACHE_STORE_ACK: begin
store_ack = 1'b1;
miss_rtrn_vld_o[NUM_PORTS-1] = 1'b1;
end
DCACHE_ATOMIC_ACK: begin
amo_ack = 1'b1;
end
DCACHE_INV_REQ: begin
inv_vld = mem_rtrn_i.inv.vld | mem_rtrn_i.inv.all;
inv_vld_all = mem_rtrn_i.inv.all;
end
// TODO:
// DCACHE_INT_REQ: begin
// end
default : begin
end
endcase
end
end
// to write buffer
assign miss_rtrn_id_o = mem_rtrn_i.tid;
///////////////////////////////////////////////////////
// writes to cache memory
///////////////////////////////////////////////////////
// cacheline write port
assign wr_cl_nc_o = mshr_q.nc;
assign wr_cl_vld_o = load_ack | |wr_cl_we_o;
assign wr_cl_we_o = ( flush_en ) ? '1 :
( inv_vld_all ) ? '1 :
( inv_vld ) ? dcache_way_bin2oh(mem_rtrn_i.inv.way) :
( cl_write_en ) ? dcache_way_bin2oh(mshr_q.repl_way) :
'0;
assign wr_vld_bits_o = ( flush_en ) ? '0 :
( inv_vld ) ? '0 :
( cl_write_en ) ? dcache_way_bin2oh(mshr_q.repl_way) :
'0;
assign wr_cl_idx_o = ( flush_en ) ? cnt_q :
( inv_vld ) ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] :
mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_cl_data_o = mem_rtrn_i.data;
assign wr_cl_data_be_o = ( cl_write_en ) ? '1 : '0;// we only write complete cachelines into the memory
// only NC responses write to the cache
assign cl_write_en = load_ack & ~mshr_q.nc;
///////////////////////////////////////////////////////
// main control logic for generating tx
///////////////////////////////////////////////////////
always_comb begin : p_fsm
// default assignment
state_d = state_q;
// disabling cache is possible anytime, enabling goes via flush
enable_d = enable_q & enable_i;
flush_ack_d = flush_ack_q;
flush_ack_o = 1'b0;
flush_en_o = 1'b0;
flush_en = 1'b0;
mem_data_o.rtype = DCACHE_LOAD_REQ;
mem_data_req_o = 1'b0;
amo_sel = 1'b0;
amo_ack_o.ack = 1'b0;
update_lfsr = 1'b0;
mshr_allocate = 1'b0;
miss_ack_o = '0;
miss_replay_o = '0;
mask_reads = mshr_vld_q;
// interfaces
unique case (state_q)
//////////////////////////////////
// wait for misses / amo ops
IDLE: begin
if(flush_i | (enable_i & ~enable_q)) begin
if(wbuffer_empty_i) begin
flush_ack_d = flush_i;
state_d = FLUSH;
end else begin
state_d = DRAIN;
end
end else if(amo_req_i.req) begin
if(wbuffer_empty_i) begin
state_d = AMO;
end else begin
state_d = DRAIN;
end
// we've got a miss
end else if(|miss_req) begin
// this is a write miss, just pass through (but check whether write collides with MSHR)
if(miss_is_write) begin
// stall in case this write collides with the MSHR address
if(~mshr_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
miss_ack_o[miss_port_idx] = mem_data_ack_i;
end
// this is a read miss, can only allocate 1 MSHR
// in case of a load_ack we can accept a new miss, since the MSHR is being cleared
end else if(~mshr_vld_q | load_ack) begin
// replay the read request in case the address has collided with MSHR during the time the request was pending
// i.e., the cache state may have been updated in the mean time due to a refill at the same CL address
if(mshr_rdrd_collision_d[miss_port_idx]) begin
miss_replay_o[miss_port_idx] = 1'b1;
// stall in case this CL address overlaps with a write TX that is in flight
end else if(~tx_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_LOAD_REQ;
miss_ack_o[miss_port_idx] = mem_data_ack_i;
mshr_allocate = mem_data_ack_i;
update_lfsr = all_ways_valid & mem_data_ack_i;// need to evict a random way
end
end
end
end
//////////////////////////////////
// only handle stores, do not accept new read requests
// wait until MSHR is cleared and wbuffer is empty
DRAIN: begin
mask_reads = 1'b1;
// these are writes, check whether they collide with MSHR
if(|miss_req && ~mshr_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
miss_ack_o[miss_port_idx] = mem_data_ack_i;
end
if(wbuffer_empty_i && ~mshr_vld_q) begin
state_d = IDLE;
end
end
//////////////////////////////////
// flush the cache
FLUSH: begin
// internal flush signal
flush_en = 1'b1;
// only flush port controllers if this is a real flush
// (i.e., not only a "clear" upon enabling the cache)
flush_en_o = flush_ack_q;
if(flush_done) begin
state_d = IDLE;
flush_ack_o = flush_ack_q;
flush_ack_d = 1'b0;
enable_d = enable_i;
end
end
//////////////////////////////////
// send out amo op request
AMO: begin
mem_data_o.rtype = DCACHE_ATOMIC_REQ;
mem_data_req_o = 1'b1;
amo_sel = 1'b1;
if(mem_data_ack_i) begin
state_d = AMO_WAIT;
end
end
//////////////////////////////////
// block and wait until AMO OP returns
AMO_WAIT: begin
amo_sel = 1'b1;
if(amo_ack) begin
amo_ack_o.ack = 1'b1;
state_d = IDLE;
end
end
//////////////////////////////////
default: begin
// we should never get here
state_d = IDLE;
end
endcase // state_q
end
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(~rst_ni) begin
state_q <= FLUSH;
cnt_q <= '0;
enable_q <= '0;
flush_ack_q <= '0;
mshr_vld_q <= '0;
mshr_vld_q1 <= '0;
mshr_q <= '0;
mshr_rdrd_collision_q <= '0;
end else begin
state_q <= state_d;
cnt_q <= cnt_d;
enable_q <= enable_d;
flush_ack_q <= flush_ack_d;
mshr_vld_q <= mshr_vld_d;
mshr_vld_q1 <= mshr_vld_q;
mshr_q <= mshr_d;
mshr_rdrd_collision_q <= mshr_rdrd_collision_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
nc_response : assert property (
@(posedge clk_i) disable iff (~rst_ni) mshr_vld_q |-> mshr_q.nc |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.nc)
else $fatal(1,"[l1 dcache missunit] NC load response implies NC load response");
read_tid : assert property (
@(posedge clk_i) disable iff (~rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id)
else $fatal(1,"[l1 dcache missunit] TID of load response doesn't match");
read_ports : assert property (
@(posedge clk_i) disable iff (~rst_ni) |miss_req_i[NUM_PORTS-2:0] |-> miss_we_i[NUM_PORTS-2:0] == 0)
else $fatal(1,"[l1 dcache missunit] only last port can issue write requests");
write_port : assert property (
@(posedge clk_i) disable iff (~rst_ni) miss_req_i[NUM_PORTS-1] |-> miss_we_i[NUM_PORTS-1])
else $fatal(1,"[l1 dcache missunit] last port can only issue write requests");
initial begin
// assert wrong parameterizations
assert (NUM_PORTS>=2)
else $fatal(1,"[l1 dcache missunit] at least two ports are required (one read port, one write port)");
end
`endif
//pragma translate_on
endmodule // serpent_dcache_missunit

View file

@ -17,11 +17,16 @@
// 1) the write buffer behaves as a fully-associative cache, and is therefore coalescing.
// this cache is used by the cache readout logic to forward data to the load unit.
//
// each byte can be in 3 states (valid/dirty):
// each byte can be in the following states (valid/dirty/txblock):
//
// 0/x: invalid -> free entry in the buffer that can be written
// 1/1: written and not part of TX in-flight
// 1/0: Byte is part of a TX in-flight
// 0/0/0: invalid -> free entry in the buffer
// 1/1/0: valid and dirty, Byte is hence not part of TX in-flight
// 1/0/1: valid and not dirty, Byte is part of a TX in-flight
// 1/1/1: valid and, part of tx and dirty. this means that the byte has been
// overwritten while in TX and needs to be retransmitted once the write of that byte returns.
// 1/0/0: this would represent a clean state, but is never reached in the wbuffer in the current implementation.
// this is because when a TX returns, and the byte is in state [1/0/1], it is written to cache if needed and
// its state is immediately cleared to 0/x/x.
//
// this state is used to distinguish between bytes that have been written and not
// yet sent to the memory subsystem, and bytes that are part of a transaction.
@ -49,8 +54,6 @@ import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_dcache_wbuffer #(
parameter NUM_WORDS = 8,
parameter MAX_TX = 4, // determines the number of unique TX IDs
parameter NC_ADDR_BEGIN = 40'h8000000000, // start address of noncacheable I/O region
parameter bit NC_ADDR_GE_LT = 1'b1 // determines how the physical address is compared with NC_ADDR_BEGIN
)(
@ -64,73 +67,78 @@ module serpent_dcache_wbuffer #(
output dcache_req_o_t req_port_o,
// interface to miss handler
input logic miss_ack_i,
output logic miss_paddr_o,
output logic [63:0] miss_paddr_o,
output logic miss_req_o,
output logic miss_we_o, // always 1 here
output logic [63:0] miss_wdata_o,
output logic miss_vld_bits_o, // unused here (set to 0)
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0)
output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, //
output logic [$clog2(MAX_TX)-1:0] miss_id_o, // id of this transaction
output logic [DCACHE_ID_WIDTH-1:0] miss_wr_id_o, // id of this transaction
// write responses from memory
input logic miss_rtrn_i,
input logic [$clog2(MAX_TX)-1:0] miss_rtrn_id_i, // transaction id to clear
input logic miss_rtrn_vld_i,
input logic [DCACHE_ID_WIDTH-1:0] miss_rtrn_id_i, // transaction id to clear
// cache read interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to 1 here as we do not have to read the data arrays
input logic rd_ack_i,
input logic [63:0] rd_data_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_data_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i,
// incoming invalidations
input logic inval_vld_i,
input logic [DCACHE_CL_IDX_WIDTH-1:0] inval_cl_idx_i,
// cacheline writes
input logic wr_cl_vld_i,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
// cache word write interface
output logic [DCACHE_SET_ASSOC-1:0] wr_req_o,
logic wr_ack_i,
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o,
logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o,
logic [63:0] wr_data_o,
logic [7:0] wr_data_be,
// to forwarding logic
output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o
input logic wr_ack_i,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o,
output logic [63:0] wr_data_o,
output logic [7:0] wr_data_be_o,
// to forwarding logic and miss unit
output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o,
output logic [DCACHE_MAX_TX-1:0][63:0] tx_paddr_o, // used to check for address collisions with read operations
output logic [DCACHE_MAX_TX-1:0] tx_vld_o
);
// TX status registers are indexed with the transaction ID
// they basically store which bytes from which buffer entry are part
// of that transaction
typedef struct packed {
logic vld;
logic [7:0] be;
logic [$clog2(NUM_WORDS)-1:0] ptr;
} tx_stat_t;
tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q;
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q;
logic [DCACHE_WBUF_DEPTH-1:0] valid;
logic [DCACHE_WBUF_DEPTH-1:0] dirty;
logic [DCACHE_WBUF_DEPTH-1:0] tocheck;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit;
logic [DCACHE_WBUF_DEPTH-1:0][7:0] bdirty;
tx_stat_t [MAX_TX-1:0] tx_stat_d, tx_stat_q;
wbuffer_t [NUM_WORDS-1:0] wbuffer_q, wbuffer_d;
logic [NUM_WORDS-1:0] valid;
logic [NUM_WORDS-1:0] dirty;
logic [NUM_WORDS-1:0] tx;
logic [NUM_WORDS-1:0] tocheck;
logic [NUM_WORDS-1:0] wbuffer_hit_oh, inval_hit;
logic [NUM_WORDS-1:0][7:0] bdirty;
logic [$clog2(NUM_WORDS)-1:0] next_ptr, dirty_ptr, hit_ptr, wr_ptr, inval_ptr, check_ptr_d, check_ptr_q, rtrn_ptr;
logic [$clog2(MAX_TX)-1:0] tx_cnt_q, tx_cnt_d, tx_id_q, tx_id_d, rtrn_id;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr;
logic [DCACHE_ID_WIDTH-1:0] tx_id_q, tx_id_d, rtrn_id;
logic [DCACHE_ID_WIDTH:0] tx_cnt_q, tx_cnt_d;
logic tx_id_wrap;
logic [2:0] bdirty_off;
logic [7:0] tx_be;
logic [63:0] wr_paddr, rd_paddr;
logic check_en_d, check_en_q;
logic [DCACHE_TAG_WIDTH-1:0] rd_tag_d, rd_tag_q;
logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_d, rd_hit_oh_q;
logic check_en_d, check_en_q, check_en_q1;
logic full, dirty_rd_en, rdy;
logic rtrn_empty, evict;
logic nc_pending_d, nc_pending_q, addr_is_nc;
logic wbuffer_wren;
logic wr_cl_vld_q, wr_cl_vld_d;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_q, wr_cl_idx_d;
logic [63:0] debug_paddr [DCACHE_WBUF_DEPTH-1:0];
///////////////////////////////////////////////////////
// misc
///////////////////////////////////////////////////////
assign miss_nc_o = nc_pending_q;
generate
if (NC_ADDR_GE_LT) begin : g_nc_addr_high
assign addr_is_nc = (req_port_i.address_tag >= (NC_ADDR_BEGIN>>DCACHE_INDEX_WIDTH)) | ~cache_en_i;
@ -144,6 +152,13 @@ assign miss_we_o = 1'b1;
assign miss_vld_bits_o = '0;
assign wbuffer_data_o = wbuffer_q;
generate
for(genvar k=0; k<DCACHE_MAX_TX;k++) begin
assign tx_vld_o[k] = tx_stat_q[k].vld;
assign tx_paddr_o[k] = wbuffer_q[tx_stat_q[k].ptr].wtag<<3;
end
endgenerate
///////////////////////////////////////////////////////
// openpiton does not understand byte enable sigs
// need to convert to the four cases:
@ -156,7 +171,7 @@ assign wbuffer_data_o = wbuffer_q;
// get byte offset
lzc #(
.WIDTH ( NUM_WORDS )
.WIDTH ( DCACHE_WBUF_DEPTH )
) i_vld_bdirty (
.in_i ( bdirty[dirty_ptr] ),
.cnt_o ( bdirty_off ),
@ -165,8 +180,8 @@ lzc #(
// add the offset to the physical base address of this buffer entry
assign miss_paddr_o = {wbuffer_q[dirty_ptr].wtag, bdirty_off};
assign miss_id_o = tx_id_q;
assign miss_req_o = (|dirty) && (tx_cnt_q < MAX_TX);
assign miss_wr_id_o = tx_id_q;
assign miss_req_o = (|dirty) && (tx_cnt_q < DCACHE_MAX_TX);
always_comb begin : p_be_to_size
unique case(bdirty[dirty_ptr])
@ -190,29 +205,29 @@ always_comb begin : p_offset_mux
miss_wdata_o = '0;
tx_be = '0;
unique case(miss_size_o)
2'b00: begin // byte
for(int k=0; k<8; k++) begin
miss_wdata_o[k*8 +: 8] = wbuffer_q[dirty_ptr][bdirty_off*8 +: 8];
end
tx_be[bdirty_off] = '1;
end
2'b01: begin // hword
3'b001: begin // hword
for(int k=0; k<4; k++) begin
miss_wdata_o[k*16 +: 16] = wbuffer_q[dirty_ptr][bdirty_off*16 +: 16];
miss_wdata_o[k*16 +: 16] = wbuffer_q[dirty_ptr].data[bdirty_off*8 +: 16];
end
tx_be[bdirty_off +:2 ] = '1;
end
2'b10: begin // word
3'b010: begin // word
for(int k=0; k<2; k++) begin
miss_wdata_o[k*32 +: 32] = wbuffer_q[dirty_ptr][bdirty_off*32 +: 32];
miss_wdata_o[k*32 +: 32] = wbuffer_q[dirty_ptr].data[bdirty_off*8 +: 32];
end
tx_be[bdirty_off +:4 ] = '1;
end
2'b11: begin // dword
miss_wdata_o = wbuffer_q[dirty_ptr];
tx_be = '1;
3'b011: begin // dword
miss_wdata_o = wbuffer_q[dirty_ptr].data;
tx_be = '1;
end
endcase // miss_size_o
default: begin // byte
for(int k=0; k<8; k++) begin
miss_wdata_o[k*8 +: 8] = wbuffer_q[dirty_ptr].data[bdirty_off*8 +: 8];
end
tx_be[bdirty_off] = '1;
end
endcase // miss_size_o
end
///////////////////////////////////////////////////////
@ -221,9 +236,9 @@ end
// TODO: todo: make this fall through!
fifo_v2 #(
.FALL_THROUGH ( 1'b0 ),
.DATA_WIDTH ( $clog2(MAX_TX) ),
.DEPTH ( MAX_TX )
.FALL_THROUGH ( 1'b0 ),
.DATA_WIDTH ( $clog2(DCACHE_MAX_TX) ),
.DEPTH ( DCACHE_MAX_TX )
) i_rtrn_id_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
@ -234,7 +249,7 @@ fifo_v2 #(
.alm_full_o ( ),
.alm_empty_o( ),
.data_i ( miss_rtrn_id_i ),
.push_i ( miss_rtrn_i ),
.push_i ( miss_rtrn_vld_i ),
.data_o ( rtrn_id ),
.pop_i ( evict )
);
@ -248,35 +263,23 @@ always_comb begin : p_tx_stat
if(dirty_rd_en) begin
tx_stat_d[tx_id_q].vld = 1'b1;
tx_stat_d[tx_id_q].ptr = dirty_ptr;
tx_stat_d[tx_id_q].be = bdirty[dirty_ptr];
tx_stat_d[tx_id_q].be = tx_be;
end
// clear entry if it is clear whether it can be pushed to the cache or not
if((~rtrn_empty) && wbuffer_q[rtrn_ptr].checked) begin
wr_req_o = wbuffer_q[rtrn_ptr].hit_oh;
if(|wbuffer_q[rtrn_ptr].hit_oh) begin
// check if data is clean and can be written, otherwise skip
// check if CL is present, otherwise skip
if((|wr_data_be_o) && (|wbuffer_q[rtrn_ptr].hit_oh)) begin
wr_req_o = wbuffer_q[rtrn_ptr].hit_oh;
if(wr_ack_i) begin
evict = 1'b1;
tx_stat_d[rtrn_id].vld = 1'b0;
end
end
end else begin
evict = 1'b1;
tx_stat_d[rtrn_id].vld = 1'b0;
end
end
for(int k=0; k<NUM_WORDS;k++) begin
// if we write into a word that is currently being transmitted,
// we have to clear the corresponding byte enable signals in these transactions
// this ensures that the byte flags of the dirty bytes are not changed to CLEAN
// when the write response comes back from memory
if(req_port_i.data_req & rdy & (wr_ptr==tx_stat_q[k].ptr) & tx_stat_q[k].vld) begin
for(int j=0; j<8; j++) begin
if(req_port_i.data_be[j]) begin
tx_stat_d[k].be[j] = 1'b0;
end
end
end
end
end
end
@ -285,48 +288,66 @@ assign tx_cnt_d = (dirty_rd_en & evict) ? tx_cnt_q :
(evict) ? tx_cnt_q - 1 :
tx_cnt_q;
// wrapping counter
assign tx_id_d = (dirty_rd_en) ? tx_id_q + 1 :
tx_id_q;
assign tx_id_d = (dirty_rd_en & tx_id_wrap) ? '0 :
(dirty_rd_en) ? tx_id_q + 1 :
tx_id_q;
assign tx_id_wrap = tx_id_q == (DCACHE_MAX_TX-1);
///////////////////////////////////////////////////////
// cache readout & update
///////////////////////////////////////////////////////
assign rd_tag_d = rd_paddr>>DCACHE_INDEX_WIDTH;
// trigger TAG readout in cache
assign rd_tag_only_o = 1'b1;
assign rd_paddr = wbuffer_q[check_ptr_d].wtag<<3;
assign rd_req_o = |tocheck;
assign rd_tag_o = rd_paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
assign rd_tag_o = rd_tag_q;//delay by one cycle
assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign check_en_d = rd_req_o & rd_ack_i;
// cache update port
assign rtrn_ptr = tx_stat_q[rtrn_id].ptr;
assign wr_data_be = tx_stat_q[rtrn_id].be;
assign wr_paddr = wbuffer_q[rtrn_ptr].wtag<<3;
assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_data_o = wbuffer_q[rtrn_ptr].data;
assign rtrn_ptr = tx_stat_q[rtrn_id].ptr;
// if we wrote into a word while it was in-flight, we cannot write the dirty bytes to the cache
// when the TX returns
assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty);
assign wr_paddr = wbuffer_q[rtrn_ptr].wtag<<3;
assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_data_o = wbuffer_q[rtrn_ptr].data;
///////////////////////////////////////////////////////
// readout of status bits, index calculation
///////////////////////////////////////////////////////
assign wr_cl_vld_d = wr_cl_vld_i;
assign wr_cl_idx_d = wr_cl_idx_i;
generate
for(genvar k=0; k<NUM_WORDS-1; k++) begin
for(genvar j=0; j<8; j++) begin
assign bdirty[k][j] = wbuffer_q[k].dirty[j] & wbuffer_q[k].valid[j];
end
for(genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin
// only for debug, will be pruned
assign debug_paddr[k] = wbuffer_q[k].wtag << 3;
// dirty bytes that are ready for transmission.
// note that we cannot retransmit a byte that is already in-flight
// since the multiple transactions might overtake each other in the memory system!
assign bdirty[k] = wbuffer_q[k].dirty & wbuffer_q[k].valid & (~wbuffer_q[k].txblock);
assign dirty[k] = |bdirty[k];
assign valid[k] = |wbuffer_q[k].valid;
assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:3]});
// checks if an invalidation/cache refill hits a particular word
// note: an invalidation can hit multiple words!
assign inval_hit[k] = inval_vld_i & valid[k] & (wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH-3] == inval_cl_idx_i);
// need to respect previous cycle, too, since we add a cycle of latency to the rd_hit_oh_i signal...
assign inval_hit[k] = (wr_cl_vld_d & valid[k] & (wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH-3] == wr_cl_idx_d)) |
(wr_cl_vld_q & valid[k] & (wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH-3] == wr_cl_idx_q));
// these word have to be looked up in the cache
assign tocheck[k] = (~wbuffer_q[k].checked) & valid[k];
end
@ -339,7 +360,7 @@ assign rdy = (|wbuffer_hit_oh) | (~full);
// next free entry in the buffer
lzc #(
.WIDTH ( NUM_WORDS )
.WIDTH ( DCACHE_WBUF_DEPTH )
) i_vld_lzc (
.in_i ( ~valid ),
.cnt_o ( next_ptr ),
@ -348,25 +369,16 @@ lzc #(
// next free entry in the buffer
lzc #(
.WIDTH ( NUM_WORDS )
.WIDTH ( DCACHE_WBUF_DEPTH )
) i_hit_lzc (
.in_i ( wbuffer_hit_oh ),
.cnt_o ( hit_ptr ),
.empty_o ( )
);
// convert invalidation to index
lzc #(
.WIDTH ( NUM_WORDS )
) i_inval_lzc (
.in_i ( inval_hit ),
.cnt_o ( inval_ptr ),
.empty_o ( )
);
// next dirty word to serve
rrarbiter #(
.NUM_REQ ( NUM_WORDS )
.NUM_REQ ( DCACHE_WBUF_DEPTH )
) i_dirty_rr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
@ -380,7 +392,7 @@ rrarbiter #(
// next word to lookup in the cache
rrarbiter #(
.NUM_REQ ( NUM_WORDS )
.NUM_REQ ( DCACHE_WBUF_DEPTH )
) i_clean_rr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
@ -397,36 +409,54 @@ rrarbiter #(
// update logic
///////////////////////////////////////////////////////
assign req_port_o.data_rvalid = '0;
assign req_port_o.data_rdata = '0;
assign rd_hit_oh_d = rd_hit_oh_i;
// TODO: rewrite and separate into MUXES and write strobe logic
always_comb begin : p_buffer
wbuffer_d = wbuffer_q;
nc_pending_d = nc_pending_q;
dirty_rd_en = 1'b0;
req_port_o.data_gnt = 1'b0;
miss_nc_o = 1'b0;
wbuffer_wren = 1'b0;
// TAG lookup returns, mark corresponding word
if(check_en_q) begin
wbuffer_d[check_ptr_q].checked = 1'b1;
wbuffer_d[check_ptr_q].hit_oh = rd_hit_oh_i;
if(check_en_q1) begin
if(wbuffer_q[check_ptr_q1].valid) begin
wbuffer_d[check_ptr_q1].checked = 1'b1;
wbuffer_d[check_ptr_q1].hit_oh = rd_hit_oh_q;
end
end
// if an invalidation or cache line refill comes in and hits on the write buffer,
// we have to discard our knowledge of the corresponding cacheline state
for(int k=0; k<NUM_WORDS; k++) begin
for(int k=0; k<DCACHE_WBUF_DEPTH; k++) begin
if(inval_hit[k]) begin
wbuffer_d[k].checked = 1'b0;
end
end
// once TX write response came back and word is written to cache, change to INV
// once TX write response came back, we can clear the TX block. if it was not dirty, we
// can completely evict it - otherwise we have to leave it there for retransmission
if(evict) begin
for(int k=0; k<8; k++) begin
if(tx_stat_q[rtrn_id].be[k]) begin
wbuffer_d[rtrn_ptr].dirty[k] = 1'b0;
wbuffer_d[rtrn_ptr].valid[k] = 1'b0;
wbuffer_d[rtrn_ptr].txblock[k] = 1'b0;
if(~wbuffer_q[rtrn_ptr].dirty[k]) begin
wbuffer_d[rtrn_ptr].valid[k] = 1'b0;
// NOTE: uncomment only for debugging.
// this is not strictly needed, but makes it much easier to debug, since no invalid data remains in the buffer
wbuffer_d[rtrn_ptr].data[k*8 +:8] = '0;
end
end
end
// if all bytes are evicted, clear the cache status flag
if(wbuffer_d[rtrn_ptr].valid == 0) begin
wbuffer_d[rtrn_ptr].checked = 1'b0;
end
end
// mark bytes sent out to the memory system
@ -434,7 +464,8 @@ always_comb begin : p_buffer
dirty_rd_en = 1'b1;
for(int k=0; k<8; k++) begin
if(tx_be[k]) begin
wbuffer_d[dirty_ptr].dirty[k] = 1'b0;
wbuffer_d[dirty_ptr].dirty[k] = 1'b0;
wbuffer_d[dirty_ptr].txblock[k] = 1'b1;
end
end
end
@ -443,26 +474,25 @@ always_comb begin : p_buffer
if(nc_pending_q) begin
if(empty_o) begin
nc_pending_d = 1'b0;
req_port_o.data_gnt = 1'b1;
miss_nc_o = 1'b1;
end
end else begin
// write new word into the buffer
if(req_port_i.data_req & rdy) begin
// in case we have an NC address, need to drain the buffer first
if(empty_o | ~addr_is_nc) begin
// leave in the core fifo if NC
req_port_o.data_gnt = ~addr_is_nc;
nc_pending_d = addr_is_nc;
wbuffer_wren = 1'b1;
// leave in the core fifo if it is NC
req_port_o.data_gnt = 1'b1;
nc_pending_d = addr_is_nc;
wbuffer_d[wr_ptr].checked = 1'b0;
wbuffer_d[wr_ptr].wtag = {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:3]};
// mark bytes as dirty
for(int k=0; k<8; k++) begin
if(req_port_i.data_be[k]) begin
wbuffer_d[wr_ptr].valid = 1'b1;
wbuffer_d[wr_ptr].valid[k] = 1'b1;
wbuffer_d[wr_ptr].dirty[k] = 1'b1;
wbuffer_d[wr_ptr].data[k*8 +: 8] = req_port_i.data_wdata[k*8 +: 8];
end
@ -485,7 +515,13 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
tx_cnt_q <= '0;
tx_id_q <= '0;
check_ptr_q <= '0;
check_ptr_q1 <= '0;
check_en_q <= '0;
check_en_q1 <= '0;
rd_tag_q <= '0;
rd_hit_oh_q <= '0;
wr_cl_vld_q <= '0;
wr_cl_idx_q <= '0;
end else begin
wbuffer_q <= wbuffer_d;
tx_stat_q <= tx_stat_d;
@ -493,10 +529,17 @@ always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
tx_id_q <= tx_id_d;
nc_pending_q <= nc_pending_d;
check_ptr_q <= check_ptr_d;
check_ptr_q1 <= check_ptr_q;
check_en_q <= check_en_d;
check_en_q1 <= check_en_q;
rd_tag_q <= rd_tag_d;
rd_hit_oh_q <= rd_hit_oh_d;
wr_cl_vld_q <= wr_cl_vld_d;
wr_cl_idx_q <= wr_cl_idx_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
@ -512,6 +555,14 @@ end
@(posedge clk_i) disable iff (~rst_ni) evict & miss_ack_i & miss_req_o |-> (tx_id_q != rtrn_id))
else $fatal(1,"[l1 dcache wbuffer] cannot allocate and clear same tx slot id in the same cycle");
tx_valid0: assert property (
@(posedge clk_i) disable iff (~rst_ni) evict |-> tx_stat_q[rtrn_id].vld)
else $fatal(1,"[l1 dcache wbuffer] evicting invalid transaction slot");
tx_valid1: assert property (
@(posedge clk_i) disable iff (~rst_ni) evict |-> |wbuffer_q[rtrn_ptr].valid)
else $fatal(1,"[l1 dcache wbuffer] wbuffer entry corresponding to this transaction is invalid");
write_full: assert property (
@(posedge clk_i) disable iff (~rst_ni) req_port_i.data_req |-> req_port_o.data_gnt |-> ((~full) | (|wbuffer_hit_oh)))
else $fatal(1,"[l1 dcache wbuffer] cannot write if full or no hit");
@ -524,10 +575,19 @@ end
@(posedge clk_i) disable iff (~rst_ni) ~req_port_i.kill_req)
else $fatal(1,"[l1 dcache wbuffer] req_port_i.kill_req should not be asserted");
unused2: assert property (
@(posedge clk_i) disable iff (~rst_ni) ~req_port_i.data_we)
else $fatal(1,"[l1 dcache wbuffer] req_port_i.data_we should not be asserted");
generate
for(genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin
for(genvar j=0; j<8; j++) begin
byteStates: assert property (
@(posedge clk_i) disable iff (~rst_ni) {wbuffer_q[k].valid[j], wbuffer_q[k].dirty[j], wbuffer_q[k].txblock[j]} inside {3'b000, 3'b110, 3'b101, 3'b111} )
else $fatal(1,$psprintf("[l1 dcache wbuffer] byte %02d of wbuffer entry %02d has invalid state: valid=%01b, dirty=%01b, txblock=%01b",
j,k,
wbuffer_q[k].valid[j],
wbuffer_q[k].dirty[j],
wbuffer_q[k].txblock[j]));
end
end
endgenerate
// initial begin
// // assert wrong parameterizations
// assert (DCACHE_INDEX_WIDTH<=12)

View file

@ -57,7 +57,7 @@ import serpent_cache_pkg::*;
module serpent_l15_adapter #(
) (
)(
input logic clk_i,
input logic rst_ni,

@ -1 +1 @@
Subproject commit 9278bc769f3efd006864a7ef7721f2796ed968e6
Subproject commit 21a060d2c2c75173312b82cc72db96a2c62e66c5

3
tb/tb_serpent_dcache/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
work
modelsim.ini
*.rep

27
tb/tb_serpent_dcache/Makefile Executable file
View file

@ -0,0 +1,27 @@
library ?= work
toplevel ?= tb
src-list := tb.list
inc-path := $(shell pwd)/hdl/
src := $(shell xargs printf '\n%s' < $(src-list) | cut -b 1-)
compile_flag += +cover+i_dut -incr -64 -nologo
sim_opts += -64 -coverage -classdebug -voptargs="+acc"
questa_version ?= ${QUESTASIM_VERSION}
build: clean
vlib${questa_version} $(library)
vlog${questa_version} -work $(library) -pedanticerrors $(src) $(compile_flag) +incdir+$(inc-path)
touch $(library)/.build
sim: build
vsim${questa_version} -lib $(library) $(toplevel) -do "do wave.do" $(sim_opts)
simc: build
vsim${questa_version} -lib $(library) $(toplevel) -c -do "run -all; exit" $(sim_opts)
clean:
rm -rf $(library)
.PHONY: clean simc sim build

View file

@ -0,0 +1,606 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: testbench for piton_icache. includes the following tests:
//
// 0) random accesses with disabled cache
// 1) random accesses with enabled cache to cacheable and noncacheable memory
// 2) linear, wrapping sweep with enabled cache
// 3) 1) with random stalls on the memory side and TLB side
// 4) nr 3) with random invalidations
//
// note that we use a simplified address translation scheme to emulate the TLB.
// (random offsets).
`include "tb.svh"
import ariane_pkg::*;
import serpent_cache_pkg::*;
import tb_pkg::*;
module tb;
// leave this
timeunit 1ps;
timeprecision 1ps;
// memory configuration (64bit words)
parameter MEM_BYTES = 2**DCACHE_INDEX_WIDTH * 4 * 32;
parameter MEM_WORDS = MEM_BYTES>>3;
// noncacheable portion
parameter NC_ADDR_BEGIN = MEM_BYTES>>3;//1/8th of the memory is NC
parameter NC_ADDR_GE_LT = 0;
// contention and invalidation rates (in %)
parameter MEM_RAND_HIT_RATE = 75;
parameter MEM_RAND_INV_RATE = 10;
parameter TLB_HIT_RATE = 95;
// parameters for random read sequences (in %)
parameter KILL_RATE = 5;
parameter VERBOSE = 0;
///////////////////////////////////////////////////////////////////////////////
// MUT signal declarations
///////////////////////////////////////////////////////////////////////////////
logic enable_i;
logic flush_i;
logic flush_ack_o;
logic miss_o;
logic wbuffer_empty_o;
amo_req_t amo_req_i;
amo_resp_t amo_ack_o;
dcache_req_i_t [2:0] req_ports_i;
dcache_req_o_t [2:0] req_ports_o;
logic mem_rtrn_vld_i;
dcache_rtrn_t mem_rtrn_i;
logic mem_data_req_o;
logic mem_data_ack_i;
dcache_req_t mem_data_o;
///////////////////////////////////////////////////////////////////////////////
// TB signal declarations
///////////////////////////////////////////////////////////////////////////////
logic [63:0] mem_array[MEM_WORDS-1:0];
string test_name;
logic clk_i, rst_ni;
logic [31:0] seq_num_resp, seq_num_write;
seq_t [2:0] seq_type;
logic [2:0] seq_done;
logic [6:0] req_rate[2:0];
logic seq_run, seq_last;
logic end_of_sim;
logic mem_rand_en;
logic inv_rand_en;
logic amo_rand_en;
logic tlb_rand_en;
logic write_en;
logic [63:0] write_paddr, write_data;
logic [7:0] write_be;
logic check_en;
logic [7:0] commit_be;
logic [63:0] commit_paddr;
logic commit_en;
typedef struct packed {
logic [1:0] size;
logic [63:0] paddr;
} resp_fifo_t;
logic [63:0] act_paddr[1:0];
logic [63:0] exp_rdata[1:0];
logic [63:0] exp_paddr[1:0];
resp_fifo_t fifo_data_in[1:0];
resp_fifo_t fifo_data[1:0];
logic [1:0] fifo_push, fifo_pop, fifo_flush;
///////////////////////////////////////////////////////////////////////////////
// helper tasks
///////////////////////////////////////////////////////////////////////////////
task automatic runSeq(input int nReadVectors, input int nWriteVectors = 0, input logic last =1'b0);
seq_last = last;
seq_run = 1'b1;
seq_num_resp = nReadVectors;
seq_num_write = nWriteVectors;
`APPL_WAIT_CYC(clk_i,1)
seq_run = 1'b0;
`APPL_WAIT_SIG(clk_i, &seq_done)
`APPL_WAIT_CYC(clk_i,1)
endtask : runSeq
task automatic flushCache();
flush_i = 1'b1;
`APPL_WAIT_SIG(clk_i, flush_ack_o);
flush_i = 0'b0;
`APPL_WAIT_CYC(clk_i,1)
endtask : flushCache
task automatic memCheck();
check_en = 1'b1;
`APPL_WAIT_CYC(clk_i,1)
check_en = 0'b0;
`APPL_WAIT_CYC(clk_i,1)
endtask : memCheck
///////////////////////////////////////////////////////////////////////////////
// Clock Process
///////////////////////////////////////////////////////////////////////////////
always @*
begin
do begin
clk_i = 1;#(CLK_HI);
clk_i = 0;#(CLK_LO);
end while (end_of_sim == 1'b0);
repeat (100) begin
// generate a few extra cycle to allow
// response acquisition to complete
clk_i = 1;#(CLK_HI);
clk_i = 0;#(CLK_LO);
end
end
///////////////////////////////////////////////////////////////////////////////
// memory emulation
///////////////////////////////////////////////////////////////////////////////
tb_mem #(
.MEM_RAND_HIT_RATE ( MEM_RAND_HIT_RATE ),
.MEM_RAND_INV_RATE ( MEM_RAND_INV_RATE ),
.MEM_WORDS ( MEM_WORDS ),
.NC_ADDR_BEGIN ( NC_ADDR_BEGIN ),
.NC_ADDR_GE_LT ( NC_ADDR_GE_LT )
) i_tb_mem (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.mem_rand_en_i ( mem_rand_en ),
.inv_rand_en_i ( inv_rand_en ),
.amo_rand_en_i ( amo_rand_en ),
.mem_data_req_i ( mem_data_req_o ),
.mem_data_ack_o ( mem_data_ack_i ),
.mem_data_i ( mem_data_o ),
.mem_rtrn_vld_o ( mem_rtrn_vld_i ),
.mem_rtrn_o ( mem_rtrn_i ),
// for verification
.seq_last_i ( seq_last ),
.check_en_i ( check_en ),
.commit_en_i ( commit_en ),
.commit_be_i ( commit_be ),
.commit_paddr_i ( commit_paddr ),
.write_en_i ( write_en ),
.write_be_i ( write_be ),
.write_data_i ( write_data ),
.write_paddr_i ( write_paddr ),
.mem_array_o ( mem_array )
);
///////////////////////////////////////////////////////////////////////////////
// MUT
///////////////////////////////////////////////////////////////////////////////
serpent_dcache #(
.NC_ADDR_BEGIN ( NC_ADDR_BEGIN ),
.NC_ADDR_GE_LT ( NC_ADDR_GE_LT )
) i_dut (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.flush_ack_o ( flush_ack_o ),
.enable_i ( enable_i ),
.miss_o ( miss_o ),
.wbuffer_empty_o ( wbuffer_empty_o ),
.amo_req_i ( amo_req_i ),
.amo_ack_o ( amo_ack_o ),
.req_ports_i ( req_ports_i ),
.req_ports_o ( req_ports_o ),
.mem_rtrn_vld_i ( mem_rtrn_vld_i ),
.mem_rtrn_i ( mem_rtrn_i ),
.mem_data_req_o ( mem_data_req_o ),
.mem_data_ack_i ( mem_data_ack_i ),
.mem_data_o ( mem_data_o )
);
///////////////////////////////////////////////////////////////////////////////
// port emulation programs
///////////////////////////////////////////////////////////////////////////////
// get actual paddr from read controllers
assign act_paddr[0] = {i_dut.genblk1[0].i_serpent_dcache_ctrl.address_tag_d,
i_dut.genblk1[0].i_serpent_dcache_ctrl.address_idx_q,
i_dut.genblk1[0].i_serpent_dcache_ctrl.address_off_q};
assign act_paddr[1] = {i_dut.genblk1[1].i_serpent_dcache_ctrl.address_tag_d,
i_dut.genblk1[1].i_serpent_dcache_ctrl.address_idx_q,
i_dut.genblk1[1].i_serpent_dcache_ctrl.address_off_q};
// generate fifo queues for expected responses
generate
for(genvar k=0; k<2;k++) begin
assign fifo_data_in[k] = {req_ports_i[k].data_size,
exp_paddr[k]};
assign exp_rdata[k] = mem_array[fifo_data[k].paddr>>3];
assign fifo_push[k] = req_ports_i[k].data_req & req_ports_o[k].data_gnt;
assign fifo_flush[k] = req_ports_i[k].kill_req | flush_i;
assign fifo_pop[k] = req_ports_o[k].data_rvalid;
fifo_v2 #(
.dtype(resp_fifo_t)
) i_resp_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( fifo_flush[k] ),
.testmode_i ( '0 ),
.full_o ( ),
.empty_o ( ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( fifo_data_in[k] ),
.push_i ( fifo_push[k] ),
.data_o ( fifo_data[k] ),
.pop_i ( fifo_pop[k] )
);
end
endgenerate
tb_readport #(
.PORT_NAME ( "RD0" ),
.KILL_RATE ( KILL_RATE ),
.TLB_HIT_RATE ( TLB_HIT_RATE ),
.MEM_WORDS ( MEM_WORDS ),
.NC_ADDR_BEGIN ( NC_ADDR_BEGIN ),
.RND_SEED ( 5555555 ),
.VERBOSE ( VERBOSE )
) i_tb_readport0 (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.test_name_i ( test_name ),
.req_rate_i ( req_rate[0] ),
.seq_type_i ( seq_type[0] ),
.tlb_rand_en_i ( tlb_rand_en ),
.seq_run_i ( seq_run ),
.seq_num_resp_i ( seq_num_resp ),
.seq_last_i ( seq_last ),
.seq_done_o ( seq_done[0] ),
.exp_paddr_o ( exp_paddr[0] ),
.exp_size_i ( fifo_data[0].size ),
.exp_paddr_i ( fifo_data[0].paddr ),
.exp_rdata_i ( exp_rdata[0] ),
.act_paddr_i ( act_paddr[0] ),
.dut_req_port_o ( req_ports_i[0] ),
.dut_req_port_i ( req_ports_o[0] )
);
tb_readport #(
.PORT_NAME ( "RD1" ),
.KILL_RATE ( KILL_RATE ),
.TLB_HIT_RATE ( TLB_HIT_RATE ),
.MEM_WORDS ( MEM_WORDS ),
.NC_ADDR_BEGIN ( NC_ADDR_BEGIN ),
.RND_SEED ( 3333333 ),
.VERBOSE ( VERBOSE )
) i_tb_readport1 (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.test_name_i ( test_name ),
.req_rate_i ( req_rate[1] ),
.seq_type_i ( seq_type[1] ),
.tlb_rand_en_i ( tlb_rand_en ),
.seq_run_i ( seq_run ),
.seq_num_resp_i ( seq_num_resp ),
.seq_last_i ( seq_last ),
.exp_paddr_o ( exp_paddr[1] ),
.exp_size_i ( fifo_data[1].size ),
.exp_paddr_i ( fifo_data[1].paddr ),
.exp_rdata_i ( exp_rdata[1] ),
.act_paddr_i ( act_paddr[1] ),
.seq_done_o ( seq_done[1] ),
.dut_req_port_o ( req_ports_i[1] ),
.dut_req_port_i ( req_ports_o[1] )
);
tb_writeport #(
.PORT_NAME ( "WR0" ),
.MEM_WORDS ( MEM_WORDS ),
.NC_ADDR_BEGIN ( NC_ADDR_BEGIN ),
.RND_SEED ( 7777777 ),
.VERBOSE ( VERBOSE )
) i_tb_writeport (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.test_name_i ( test_name ),
.req_rate_i ( req_rate[2] ),
.seq_type_i ( seq_type[2] ),
.seq_run_i ( seq_run ),
.seq_num_vect_i ( seq_num_write ),
.seq_last_i ( seq_last ),
.seq_done_o ( seq_done[2] ),
.dut_req_port_o ( req_ports_i[2] ),
.dut_req_port_i ( req_ports_o[2] )
);
assign write_en = req_ports_i[2].data_req & req_ports_o[2].data_gnt & req_ports_i[2].data_we;
assign write_paddr = {req_ports_i[2].address_tag, req_ports_i[2].address_index};
assign write_data = req_ports_i[2].data_wdata;
assign write_be = req_ports_i[2].data_be;
// generate write buffer commit signals based on internal eviction status
assign commit_be = i_dut.i_serpent_dcache_wbuffer.wr_data_be_o;
assign commit_paddr = i_dut.i_serpent_dcache_wbuffer.wr_paddr;
assign commit_en = i_dut.i_serpent_dcache_wbuffer.evict;
// TODO: implement AMO agent
assign amo_req_i.req = '0;
assign amo_req_i.amo_op = AMO_NONE;
assign amo_req_i.size = '0;
assign amo_req_i.operand_a = '0;
assign amo_req_i.operand_b = '0;
// amo_ack_o
///////////////////////////////////////////////////////////////////////////////
// simulation coordinator process
///////////////////////////////////////////////////////////////////////////////
// TODO: implement CSR / controller
// flush_i, flush_ack_o, enable_i, miss_o, wbuffer_empty_o
initial begin : p_stim
test_name = "";
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd75};
seq_run = 1'b0;
seq_last = 1'b0;
seq_num_resp = '0;
seq_num_write = '0;
check_en = '0;
// seq_done
end_of_sim = 0;
rst_ni = 0;
// randomization settings
mem_rand_en = 0;
tlb_rand_en = 0;
inv_rand_en = 0;
amo_rand_en = 0;
// cache ctrl
flush_i = 0;
// flush_ack_o
// wbuffer_empty_o
enable_i = 0;
// miss_o
// print some info
$display("TB> current configuration:");
$display("TB> MEM_WORDS %d", MEM_WORDS);
$display("TB> NC_ADDR_BEGIN %16X", NC_ADDR_BEGIN);
$display("TB> MEM_RAND_HIT_RATE %d", MEM_RAND_HIT_RATE);
$display("TB> MEM_RAND_INV_RATE %d", MEM_RAND_INV_RATE);
// reset cycles
`APPL_WAIT_CYC(clk_i,100)
rst_ni = 1'b1;
`APPL_WAIT_CYC(clk_i,100)
$display("TB> start with test sequences");
// apply each test until seq_num_resp memory
// requests have successfully completed
///////////////////////////////////////////////
test_name = "TEST 0 -- random read -- disabled cache";
// config
enable_i = 0;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 1 -- sequential read -- disabled cache";
// config
enable_i = 0;
seq_type = '{default: LINEAR_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 2 -- random read -- enabled cache";
// config
enable_i = 1;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 3 -- linear read -- enabled cache";
// config
enable_i = 1;
seq_type = '{default: LINEAR_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 4 -- random read -- enabled cache + tlb, mem contentions";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 5 -- linear read -- enabled cache + tlb, mem contentions";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
seq_type = '{default: LINEAR_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 6 -- random read -- enabled cache + tlb, mem contentions + invalidations";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
inv_rand_en = 1;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 7 -- random read/write -- disabled cache";
// config
enable_i = 0;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd25};
runSeq(10000,10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 8 -- random read/write -- enabled cache";
// config
enable_i = 1;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd25};
runSeq(10000,20000);// last sequence flag, terminates agents
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 9 -- random read/write -- enabled cache + tlb, mem contentions + invalidations";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
inv_rand_en = 1;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd25};
runSeq(10000,20000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 10 -- linear burst write -- enabled cache";
// config
enable_i = 1;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{LINEAR_SEQ, IDLE_SEQ, IDLE_SEQ};
req_rate = '{100, 0, 0};
runSeq(0,5000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 11 -- linear burst write with hot cache";
// config
enable_i = 1;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{IDLE_SEQ, IDLE_SEQ, LINEAR_SEQ};
req_rate = '{default:100};
runSeq((NC_ADDR_BEGIN>>3)+(2**(DCACHE_INDEX_WIDTH-3))*DCACHE_SET_ASSOC,0);
seq_type = '{LINEAR_SEQ, IDLE_SEQ, IDLE_SEQ};
runSeq(0,(NC_ADDR_BEGIN>>3)+(2**(DCACHE_INDEX_WIDTH-3))*DCACHE_SET_ASSOC,1);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 12 -- random write bursts -- enabled cache";
// config
enable_i = 1;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{BURST_SEQ, RANDOM_SEQ, RANDOM_SEQ};
req_rate = '{75, 0, 0};
runSeq(0,5000,0);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 13 -- random write bursts -- enabled cache + tlb, mem contentions + invalidations";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
inv_rand_en = 1;
seq_type = '{BURST_SEQ, IDLE_SEQ, IDLE_SEQ};
req_rate = '{75, 0, 0};
runSeq(0,5000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 14 -- random write/read-- enabled cache + tlb, mem contentions + invalidations";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
inv_rand_en = 1;
seq_type = '{RANDOM_SEQ, RANDOM_SEQ, RANDOM_SEQ};
req_rate = '{default:25};
runSeq(5000,5000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 15 -- short wrapping sequences to provoke writebuffer hits";
// config
enable_i = 1;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{WRAP_SEQ, IDLE_SEQ, WRAP_SEQ};
req_rate = '{100,0,20};
runSeq(5000,5000,1);// last sequence flag, terminates agents
flushCache();
memCheck();
///////////////////////////////////////////////
end_of_sim = 1;
$display("TB> end test sequences");
end
endmodule

View file

@ -0,0 +1,66 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description:
//
//////////////////////////////////////////////////////////////////////////////
// use to ensure proper ATI timing
///////////////////////////////////////////////////////////////////////////////
`define APPL_ACQ_WAIT #(ACQ_DEL-APPL_DEL);
`define WAIT_CYC(CLK, N) \
repeat(N) @(posedge(CLK));
`define WAIT(CLK, SIG) \
do begin \
@(posedge(CLK)); \
end while(SIG == 1'b0);
`define WAIT_SIG(CLK,SIG) \
do begin \
@(posedge(CLK)); \
end while(SIG == 1'b0);
`define APPL_WAIT_COMB_SIG(CLK,SIG) \
`APPL_ACQ_WAIT \
while(SIG == 1'b0) begin \
@(posedge(CLK)); \
#(ACQ_DEL); \
end
`define APPL_WAIT_SIG(CLK,SIG) \
do begin \
@(posedge(CLK)); \
#(APPL_DEL); \
end while(SIG == 1'b0);
`define ACQ_WAIT_SIG(CLK,SIG) \
do begin \
@(posedge(CLK)); \
#(ACQ_DEL); \
end while(SIG == 1'b0);
`define APPL_WAIT_CYC(CLK, N) \
repeat(N) @(posedge(CLK)); \
#(tb_pkg::APPL_DEL);
`define ACQ_WAIT_CYC(CLK, N) \
repeat(N) @(posedge(CLK)); \
#(tb_pkg::ACQ_DEL);

View file

@ -0,0 +1,398 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: simple emulation layer for the memory subsystem.
//
`include "tb.svh"
import ariane_pkg::*;
import serpent_cache_pkg::*;
import tb_pkg::*;
module tb_mem #(
parameter string MEM_NAME = "TB_MEM",
parameter MEM_RAND_HIT_RATE = 10, //in percent
parameter MEM_RAND_INV_RATE = 5, //in percent
parameter MEM_WORDS = 1024*1024,// in 64bit words
parameter NC_ADDR_BEGIN = MEM_WORDS/2,
parameter NC_ADDR_GE_LT = 1'b1
)(
input logic clk_i,
input logic rst_ni,
// randomization settings
input logic mem_rand_en_i,
input logic inv_rand_en_i,
input logic amo_rand_en_i,
// dcache interface
output logic mem_rtrn_vld_o,
output dcache_rtrn_t mem_rtrn_o,
input logic mem_data_req_i,
output logic mem_data_ack_o,
input dcache_req_t mem_data_i,
// expected response interface
input logic seq_last_i,
input logic check_en_i,
input logic commit_en_i,
input logic [7:0] commit_be_i,
input logic [63:0] commit_paddr_i,
input logic write_en_i,
input logic [7:0] write_be_i,
input logic [63:0] write_data_i,
input logic [63:0] write_paddr_i,
output logic [63:0] mem_array_o[MEM_WORDS-1:0]
);
// leave this
timeunit 1ps;
timeprecision 1ps;
logic mem_ready_q, mem_inv_q;
logic [63:0] rand_addr_q;
dcache_req_t outfifo_data;
logic outfifo_pop, outfifo_push, outfifo_full, outfifo_empty;
dcache_rtrn_t infifo_data;
logic infifo_pop, infifo_push, infifo_full, infifo_empty;
logic initialized_q;
logic write_en;
logic [63:0] mem_array_q[MEM_WORDS-1:0];
// this shadow memory provides a view that is consistent with the one from the core
// i.e., pending writes are present in this view, and invalidations will not overwrite
// the corresponding bytes until they have been commited to the normal memory.
logic [63:0] mem_array_shadow_q[MEM_WORDS-1:0];
logic [7:0] mem_array_dirty_q[MEM_WORDS-1:0];
assign mem_array_o = mem_array_shadow_q;
// sequential process holding the state of the memory readout process
always_ff @(posedge clk_i or negedge rst_ni) begin : p_tlb_rand
automatic int rnd = 0;
automatic logic [63:0] val;
automatic logic [63:0] lval;
if(~rst_ni) begin
mem_ready_q <= '0;
mem_inv_q <= '0;
rand_addr_q <= '0;
initialized_q <= '0;
end else begin
// fill the memory once with random data
if (initialized_q) begin
// commit "virtual" writes (i.e., clear the dirty flags)
if(commit_en_i) begin
for(int k=0; k<8; k++) begin
if(commit_be_i[k]) begin
mem_array_dirty_q[commit_paddr_i>>3][k] <= 1'b0;
end
end
end
// "virtual" writes coming from TB agent, used to generate expected responses
if(write_en_i) begin
for(int k=0; k<8; k++) begin
if(write_be_i[k]) begin
mem_array_shadow_q[write_paddr_i>>3][k*8 +: 8] <= write_data_i[k*8 +: 8];
mem_array_dirty_q[write_paddr_i>>3][k] <= 1'b1;
end
end
end
// "real" writes coming via the miss controller
if(write_en) begin
unique case(outfifo_data.size)
3'b000: mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2:0]*8 +: 8] = outfifo_data.data[outfifo_data.paddr[2:0]*8 +: 8];
3'b001: mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2:1]*16 +: 16] = outfifo_data.data[outfifo_data.paddr[2:1]*16 +: 16];
3'b010: mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2:2]*32 +: 32] = outfifo_data.data[outfifo_data.paddr[2:2]*32 +: 32];
3'b011: mem_array_q[outfifo_data.paddr>>3] = outfifo_data.data[0 +: 64];
default: begin
$fatal(1,"unsupported transfer size for write");
end
endcase // infifo_data.size
end
// initialization with random data
end else begin
mem_array_dirty_q <= '{default:'0};
for (int k=0; k<MEM_WORDS; k++) begin
void'(randomize(val));
mem_array_q[k] <= val;
mem_array_shadow_q[k] <= val;
end
initialized_q <= 1;
end
// generate random contentions
if (mem_rand_en_i) begin
void'(randomize(rnd) with {rnd > 0; rnd <= 100;});
if(rnd < MEM_RAND_HIT_RATE) begin
mem_ready_q <= '1;
end else begin
mem_ready_q <= '0;
end
end else begin
mem_ready_q <= '1;
end
// generate random invalidations
if (inv_rand_en_i) begin
void'(randomize(rnd) with {rnd > 0; rnd <= 100;});
if(rnd < MEM_RAND_INV_RATE) begin
mem_inv_q <= '1;
void'(randomize(lval) with {lval>=0; lval<(MEM_WORDS>>3);});
void'(randomize(val));
rand_addr_q <= lval<<3;
// with the current TB setup, we cannot invalidate a memory location if a write response to the same address is
// in flight, since this could lead to an incosistent state between the real memory and the shadow memory view.
// the workaround is not to overwrite shadow memory regions that are still pending in the write buffer
// this can be improved.
for(int k=0; k<8; k++) begin
if(~mem_array_dirty_q[lval][k]) begin
mem_array_q [lval][k*8 +: 8] <= val[k*8 +: 8];
mem_array_shadow_q[lval][k*8 +: 8] <= val[k*8 +: 8];
end
end
end else begin
mem_inv_q <= '0;
end
end else begin
mem_inv_q <= '0;
end
end
end
// readout process
always_comb begin : proc_mem
infifo_push = 0;
infifo_data = '0;
outfifo_pop = 0;
infifo_data.rtype = DCACHE_LOAD_ACK;
infifo_data.data = 'x;
write_en = '0;
// TODO: atomic request
// DCACHE_ATOMIC_REQ
// DCACHE_ATOMIC_ACK
// TODO: stores
// DCACHE_STORE_REQ
// DCACHE_STORE_ACK
// TODO: interrupts
// DCACHE_INT_REQ
// DCACHE_INT_ACK
// generate random invalidation
if (mem_inv_q) begin
infifo_data.rtype = DCACHE_INV_REQ;
// since we do not keep a mirror tag table here,
// we allways invalidate all ways of the aliased index.
// this is not entirely correct and will produce
// too many invalidations
infifo_data.inv.idx = rand_addr_q[DCACHE_INDEX_WIDTH-1:0];
infifo_data.inv.all = '1;
infifo_push = 1'b1;
end else if ((~outfifo_empty) && (~infifo_full) && mem_ready_q) begin
outfifo_pop = 1'b1;
infifo_push = 1'b1;
unique case (outfifo_data.rtype)
DCACHE_LOAD_REQ: begin
infifo_data.tid = outfifo_data.tid;
infifo_data.nc = outfifo_data.nc;
// openpiton replicates the data if size < dword
unique case(outfifo_data.size)
3'b000: begin
for(int k=0;k<64;k+=8) infifo_data.data[k+:8] = mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2:0]*8 +: 8];
end
3'b001: begin
for(int k=0;k<64;k+=16) infifo_data.data[k+:16] = mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2:1]*16+:16];
end
3'b010: begin
for(int k=0;k<64;k+=32) infifo_data.data[k+:32] = mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2] *32+:32];
end
3'b011: infifo_data.data[0+:64] = mem_array_q[outfifo_data.paddr>>3];
3'b111: begin// full cacheline
for (int k=0; k<DCACHE_LINE_WIDTH/64; k++) begin
infifo_data.data[k*64 +:64] = mem_array_q[(outfifo_data.paddr>>3) + k];
end
end
default: begin
$fatal(1,"unsupported transfer size for read");
end
endcase // infifo_data.size
end
DCACHE_STORE_REQ: begin
infifo_data.tid = outfifo_data.tid;
infifo_data.rtype = DCACHE_STORE_ACK;
infifo_data.nc = outfifo_data.nc;
write_en = 1'b1;
end
// DCACHE_ATOMIC_REQ: $fatal(1, "DCACHE_ATOMIC_REQ not implemented yet");
// DCACHE_INT_REQ: $fatal(1, "DCACHE_INT_REQ not implemented yet");
default: begin
// $fatal(1, "unsupported request type");
end
endcase // outfifo_data.rtype
end
end
fifo_v2 #(
.dtype(dcache_req_t),
.DEPTH(2)
) i_outfifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( outfifo_full ),
.empty_o ( outfifo_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( mem_data_i ),
.push_i ( outfifo_push ),
.data_o ( outfifo_data ),
.pop_i ( outfifo_pop )
);
assign outfifo_push = mem_data_req_i & (~outfifo_full);
assign mem_data_ack_o = outfifo_push;
fifo_v2 #(
.dtype(dcache_rtrn_t),
.DEPTH(2)
) i_infifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( infifo_full ),
.empty_o ( infifo_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( infifo_data ),
.push_i ( infifo_push ),
.data_o ( mem_rtrn_o ),
.pop_i ( infifo_pop )
);
assign infifo_pop = ~infifo_empty;
assign mem_rtrn_vld_o = infifo_pop;
///////////////////////////////////////////////////////
// checker process
///////////////////////////////////////////////////////
initial begin
bit ok;
progress status;
status = new(MEM_NAME);
`ACQ_WAIT_CYC(clk_i,10)
`ACQ_WAIT_SIG(clk_i,~rst_ni)
while(~seq_last_i) begin
`ACQ_WAIT_SIG(clk_i,check_en_i)
status.reset(MEM_WORDS);
// crosscheck whether shadow and real memory arrays still match
for(int k=0; k<MEM_WORDS; k++) begin
ok = (mem_array_q[k] == mem_array_shadow_q[k]) && !(|mem_array_dirty_q[k]);
if(!ok) begin
$display("%s> dirty bytes at k=%016X: real[k>>3]=%016X, shadow[k>>3]=%016X, dirty[k>>3]=%02X",
MEM_NAME, k<<3, mem_array_q[k], mem_array_shadow_q[k], mem_array_dirty_q[k]);
end
status.addRes(!ok);
status.print();
end
end
status.printToFile({MEM_NAME, "_summary.rep"}, 1);
if(status.totErrCnt == 0) begin
$display("%s> ----------------------------------------------------------------------", MEM_NAME);
$display("%s> PASSED %0d VECTORS", MEM_NAME, status.totAcqCnt);
$display("%s> ----------------------------------------------------------------------\n", MEM_NAME);
end else begin
$display("%s> ----------------------------------------------------------------------\n", MEM_NAME);
$display("%s> FAILED %0d OF %0d VECTORS\n", MEM_NAME , status.totErrCnt, status.totAcqCnt);
$display("%s> ----------------------------------------------------------------------\n", MEM_NAME);
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef verilator
generate
if (NC_ADDR_GE_LT) begin : g_nc_addr_high
nc_region: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_data_req_i |-> mem_data_i.paddr >= NC_ADDR_BEGIN |-> mem_data_i.nc)
else $fatal(1, "cached access into noncached region");
end
if (~NC_ADDR_GE_LT) begin : g_nc_addr_low
nc_region: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_data_req_i |-> mem_data_i.paddr < NC_ADDR_BEGIN |-> mem_data_i.nc)
else $fatal(1, "cached access into noncached region");
end
endgenerate
cached_reads: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_data_req_i |-> mem_data_i.rtype==DCACHE_LOAD_REQ |-> ~mem_data_i.nc |-> mem_data_i.size == 3'b111)
else $fatal(1, "cached read accesses always have to be one CL wide");
nc_reads: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_data_req_i |-> mem_data_i.rtype==DCACHE_LOAD_REQ |-> mem_data_i.nc |-> mem_data_i.size inside {3'b000, 3'b001, 3'b010, 3'b011})
else $fatal(1, "nc read size can only be one of the following: byte, halfword, word, dword");
write_size: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_data_req_i |-> mem_data_i.rtype==DCACHE_STORE_REQ |-> mem_data_i.size inside {3'b000, 3'b001, 3'b010, 3'b011})
else $fatal(1, "write size can only be one of the following: byte, halfword, word, dword");
addr_range: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_data_req_i |-> mem_data_i.rtype inside {DCACHE_STORE_REQ, DCACHE_STORE_REQ} |-> mem_data_i.paddr < (MEM_WORDS<<3))
else $fatal(1, "address is out of bounds");
`endif
//pragma translate_on
// align0: assert property (
// @(posedge clk_i) disable iff (~rst_ni) ~exp_empty |-> stim_addr[1:0] == 0)
// else $fatal(1,"stim_addr is not 32bit word aligned");
// align1: assert property (
// @(posedge clk_i) disable iff (~rst_ni) ~outfifo_empty |-> outfifo_data.paddr[1:0] == 0)
// else $fatal(1,"paddr is not 32bit word aligned");
endmodule // mem_emul

View file

@ -0,0 +1,150 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: testbench package with some helper functions.
package tb_pkg;
// // for abs(double) function
// import mti_cstdlib::*;
// for timestamps
import "DPI-C" \time = function int _time (inout int tloc[4]);
import "DPI-C" function string ctime(inout int tloc[4]);
///////////////////////////////////////////////////////////////////////////////
// parameters
///////////////////////////////////////////////////////////////////////////////
// creates a 10ns ATI timing cycle
time CLK_HI = 5ns; // set clock high time
time CLK_LO = 5ns; // set clock low time
time CLK_PERIOD = CLK_HI+CLK_LO;
time APPL_DEL = 2ns; // set stimuli application delay
time ACQ_DEL = 8ns; // set response aquisition delay
parameter ERROR_CNT_STOP_LEVEL = 1; // use 1 for debugging. 0 runs the complete simulation...
// tb_readport sequences
typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ } seq_t;
///////////////////////////////////////////////////////////////////////////////
// progress
///////////////////////////////////////////////////////////////////////////////
class progress;
real newState, oldState;
longint numResp, acqCnt, errCnt, totAcqCnt, totErrCnt;
string name;
function new(string name);
begin
this.name = name;
this.acqCnt = 0;
this.errCnt = 0;
this.newState = 0.0;
this.oldState = 0.0;
this.numResp = 1;
this.totAcqCnt = 0;
this.totErrCnt = 0;
end
endfunction : new
function void reset(longint numResp_);
begin
this.acqCnt = 0;
this.errCnt = 0;
this.newState = 0.0;
this.oldState = 0.0;
this.numResp = numResp_;
end
endfunction : reset
function void addRes(int isError);
begin
this.acqCnt++;
this.totAcqCnt++;
this.errCnt += isError;
this.totErrCnt += isError;
if(ERROR_CNT_STOP_LEVEL <= this.errCnt && ERROR_CNT_STOP_LEVEL > 0) begin
$error("%s> simulation stopped (ERROR_CNT_STOP_LEVEL = %d reached).", this.name, ERROR_CNT_STOP_LEVEL);
$stop();
end
end
endfunction : addRes
function void print();
begin
this.newState = $itor(this.acqCnt) / $itor(this.numResp);
if(this.newState - this.oldState >= 0.01) begin
$display("%s> validated %03d%% -- %01d failed (%03.3f%%) ",
this.name,
$rtoi(this.newState*100.0),
this.errCnt,
$itor(this.errCnt) / $itor(this.acqCnt) * 100.0);
// $fflush();
this.oldState = this.newState;
end
end
endfunction : print
function void printToFile(string file, bit summary = 0);
begin
int fptr;
// sanitize string
for(fptr=0; fptr<$size(file);fptr++) begin
if(file[fptr] == " " || file[fptr] == "/" || file[fptr] == "\\") begin
file[fptr] = "_";
end
end
fptr = $fopen(file,"w");
if(summary) begin
$fdisplay(fptr, "Simulation Summary of %s", this.name);
$fdisplay(fptr, "total: %01d of %01d vectors failed (%03.3f%%) ",
this.totErrCnt,
this.totAcqCnt,
$itor(this.totErrCnt) / ($itor(this.totAcqCnt) * 100.0 + 0.000000001));
if(this.totErrCnt == 0) begin
$fdisplay(fptr, "CI: PASSED");
end else begin
$fdisplay(fptr, "CI: FAILED");
end
end else begin
$fdisplay(fptr, "test name: %s", file);
$fdisplay(fptr, "this test: %01d of %01d vectors failed (%03.3f%%) ",
this.errCnt,
this.acqCnt,
$itor(this.errCnt) / $itor(this.acqCnt) * 100.0);
$fdisplay(fptr, "total so far: %01d of %01d vectors failed (%03.3f%%) ",
this.totErrCnt,
this.totAcqCnt,
$itor(this.totErrCnt) / $itor(this.totAcqCnt) * 100.0);
end
$fclose(fptr);
end
endfunction : printToFile
endclass : progress
endpackage : tb_pkg

View file

@ -0,0 +1,383 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: program that emulates a cache readport. the program can generate
// randomized or linear read sequences, and it checks the returned responses against
// the expected responses coming directly from the emulated memory (tb_mem).
//
`include "tb.svh"
import ariane_pkg::*;
import serpent_cache_pkg::*;
import tb_pkg::*;
program tb_readport #(
parameter string PORT_NAME = "read port 0",
parameter KILL_RATE = 5,
parameter TLB_HIT_RATE = 95,
parameter MEM_WORDS = 1024*1024,// in 64bit words
parameter NC_ADDR_BEGIN = 0,
parameter RND_SEED = 1110,
parameter VERBOSE = 0
)(
input logic clk_i,
input logic rst_ni,
// to testbench master
ref string test_name_i,
input logic [6:0] req_rate_i, //a rate between 0 and 100 percent
input seq_t seq_type_i,
input logic tlb_rand_en_i,
input logic seq_run_i,
input logic [31:0] seq_num_resp_i,
input logic seq_last_i,
output logic seq_done_o,
// expresp interface
output logic [63:0] exp_paddr_o,
input logic [1:0] exp_size_i,
input logic [63:0] exp_rdata_i,
input logic [63:0] exp_paddr_i,
input logic [63:0] act_paddr_i,
// interface to DUT
output dcache_req_i_t dut_req_port_o,
input dcache_req_o_t dut_req_port_i
);
// leave this
timeunit 1ps;
timeprecision 1ps;
logic [63:0] paddr;
logic seq_end_req, seq_end_ack, prog_end;
logic [DCACHE_TAG_WIDTH-1:0] tag_q;
logic [DCACHE_TAG_WIDTH-1:0] tag_vld_q;
///////////////////////////////////////////////////////////////////////////////
// Randomly delay the tag by at least one cycle
///////////////////////////////////////////////////////////////////////////////
// // TODO: add randomization
initial begin : p_tag_delay
logic [63:0] tmp_paddr, val;
int unsigned cnt;
logic tmp_vld;
tag_q <= '0;
tag_vld_q <= 1'b0;
`APPL_WAIT_CYC(clk_i, 10)
`APPL_WAIT_SIG(clk_i,~rst_ni)
`APPL_WAIT_CYC(clk_i,1)
tmp_vld = 0;
cnt = 0;
forever begin
`APPL_WAIT_CYC(clk_i,1)
if(cnt==0) begin
if(tmp_vld) begin
tmp_vld = 0;
tag_q <= tmp_paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
tag_vld_q <= 1'b1;
end else begin
tag_vld_q <= 1'b0;
end
`APPL_ACQ_WAIT;
if(dut_req_port_o.data_req) begin
tmp_paddr = paddr;
tmp_vld = 1;
if(tlb_rand_en_i) begin
void'(randomize(val) with {val>0; val<=100;});
if(val>=TLB_HIT_RATE) begin
void'(randomize(cnt) with {cnt>0; cnt<=50;});
end
end
end
end else begin
tag_vld_q <= 1'b0;
cnt -= 1;
`APPL_ACQ_WAIT;
end
if(dut_req_port_o.kill_req) begin
tmp_vld = 0;
cnt = 0;
end
end
end
assign dut_req_port_o.address_tag = tag_q;
assign dut_req_port_o.tag_valid = tag_vld_q;
assign dut_req_port_o.address_index = paddr[DCACHE_INDEX_WIDTH-1:0];
assign exp_paddr_o = paddr;
///////////////////////////////////////////////////////////////////////////////
// Helper tasks
///////////////////////////////////////////////////////////////////////////////
task automatic genRandReq();
automatic logic [63:0] val;
automatic logic [1:0] size;
void'($urandom(RND_SEED));
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
while(~seq_end_req) begin
// randomize request
dut_req_port_o.data_req = '0;
// generate random control events
void'(randomize(val) with {val > 0; val <= 100;});
if(val < KILL_RATE) begin
dut_req_port_o.kill_req = 1'b1;
`APPL_WAIT_CYC(clk_i,1)
dut_req_port_o.kill_req = 1'b0;
end else begin
void'(randomize(val) with {val > 0; val <= 100;});
if(val < req_rate_i) begin
dut_req_port_o.data_req = 1'b1;
// generate random address
void'(randomize(val) with {val >= 0; val < (MEM_WORDS<<3);});
void'(randomize(size));
dut_req_port_o.data_size = size;
paddr = val;
// align to size
unique case(size)
2'b01: paddr[0] = 1'b0;
2'b10: paddr[1:0] = 2'b00;
2'b11: paddr[2:0] = 3'b000;
default: ;
endcase
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
end
`APPL_WAIT_CYC(clk_i,1)
end
end
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
endtask : genRandReq
task automatic genSeqRead();
automatic logic [63:0] val;
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
val = '0;
while(~seq_end_req) begin
dut_req_port_o.data_req = 1'b1;
dut_req_port_o.data_size = 2'b11;
paddr = val;
// generate linear read
val = (val + 8) % (MEM_WORDS<<3);
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
`APPL_WAIT_CYC(clk_i,1)
end
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
endtask : genSeqRead
task automatic genWrapSeq();
automatic logic [63:0] val;
paddr = NC_ADDR_BEGIN;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
val = '0;
while(~seq_end_req) begin
dut_req_port_o.data_req = 1'b1;
dut_req_port_o.data_size = 2'b11;
paddr = val;
// generate wrapping read of 1 cachelines
paddr = NC_ADDR_BEGIN + val;
val = (val + 8) % (1*(DCACHE_LINE_WIDTH/64)*8);
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
`APPL_WAIT_CYC(clk_i,1)
end
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
endtask : genWrapSeq
///////////////////////////////////////////////////////////////////////////////
// Sequence application
///////////////////////////////////////////////////////////////////////////////
initial begin : p_stim
paddr = '0;
dut_req_port_o.data_wdata = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_we = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
seq_end_ack = '0;
// print some info
$display("%s> current configuration:", PORT_NAME);
$display("%s> KILL_RATE %d", PORT_NAME, KILL_RATE);
$display("%s> TLB_HIT_RATE %d", PORT_NAME, TLB_HIT_RATE);
$display("%s> RND_SEED %d", PORT_NAME, RND_SEED);
`APPL_WAIT_CYC(clk_i,1)
`APPL_WAIT_SIG(clk_i,~rst_ni)
$display("%s> starting application", PORT_NAME);
while(~seq_last_i) begin
`APPL_WAIT_SIG(clk_i,seq_run_i)
unique case(seq_type_i)
RANDOM_SEQ: begin
$display("%s> start random sequence with %04d responses and req_rate %03d", PORT_NAME, seq_num_resp_i, req_rate_i);
genRandReq();
end
LINEAR_SEQ: begin
$display("%s> start linear sequence with %04d responses and req_rate %03d", PORT_NAME, seq_num_resp_i, req_rate_i);
genSeqRead();
end
WRAP_SEQ: begin
$display("%s> start wrapping sequence with %04d responses and req_rate %03d", PORT_NAME, seq_num_resp_i, req_rate_i);
genWrapSeq();
end
IDLE_SEQ: begin
`APPL_WAIT_SIG(clk_i,seq_end_req)
end
BURST_SEQ: begin
$fatal(1, "Burst sequence not implemented for read port agent");
end
endcase // seq_type_i
seq_end_ack = 1'b1;
$display("%s> stop sequence", PORT_NAME);
`APPL_WAIT_CYC(clk_i,1)
seq_end_ack = 1'b0;
end
$display("%s> ending application", PORT_NAME);
end
///////////////////////////////////////////////////////////////////////////////
// Response acquisition
///////////////////////////////////////////////////////////////////////////////
initial begin : p_acq
bit ok;
progress status;
string failingTests, tmpstr1, tmpstr2;
int n;
logic [63:0] exp_rdata, exp_paddr;
logic [1:0] exp_size;
status = new(PORT_NAME);
failingTests = "";
seq_done_o = 1'b0;
seq_end_req = 1'b0;
prog_end = 1'b0;
`ACQ_WAIT_CYC(clk_i,1)
`ACQ_WAIT_SIG(clk_i,~rst_ni)
///////////////////////////////////////////////
// loop over tests
n=0;
while(~seq_last_i) begin
`ACQ_WAIT_SIG(clk_i,seq_run_i)
seq_done_o = 1'b0;
$display("%s> %s", PORT_NAME, test_name_i);
status.reset(seq_num_resp_i);
for (int k=0;k<seq_num_resp_i && seq_type_i != IDLE_SEQ;k++) begin
`ACQ_WAIT_SIG(clk_i, dut_req_port_i.data_rvalid)
exp_rdata = 'x;
unique case(exp_size_i)
2'b00: exp_rdata[exp_paddr_i[2:0]*8 +: 8] = exp_rdata_i[exp_paddr_i[2:0]*8 +: 8];
2'b01: exp_rdata[exp_paddr_i[2:1]*16 +: 16] = exp_rdata_i[exp_paddr_i[2:1]*16 +: 16];
2'b10: exp_rdata[exp_paddr_i[2] *32 +: 32] = exp_rdata_i[exp_paddr_i[2] *32 +: 32];
2'b11: exp_rdata = exp_rdata_i;
endcase // exp_size
// note: wildcard as defined in right operand!
ok=(dut_req_port_i.data_rdata ==? exp_rdata) && (exp_paddr_i == act_paddr_i);
if(VERBOSE | !ok) begin
tmpstr1 = $psprintf("vector: %02d - %06d -- exp_paddr: %16X -- exp_data: %16X -- access size: %01d Byte",
n, k, exp_paddr_i, exp_rdata, 2**exp_size_i);
tmpstr2 = $psprintf("vector: %02d - %06d -- act_paddr: %16X -- act_data: %16X -- access size: %01d Byte",
n, k, act_paddr_i, dut_req_port_i.data_rdata, 2**exp_size_i);
$display("%s> %s", PORT_NAME, tmpstr1);
$display("%s> %s", PORT_NAME, tmpstr2);
end
if(!ok) begin
failingTests = $psprintf("%s%s> %s\n%s> %s\n", failingTests, PORT_NAME, tmpstr1, PORT_NAME, tmpstr2);
end
status.addRes(!ok);
status.print();
end
seq_end_req = 1'b1;
`ACQ_WAIT_SIG(clk_i, seq_end_ack)
seq_end_req = 1'b0;
`ACQ_WAIT_CYC(clk_i,1)
seq_done_o = 1'b1;
n++;
end
///////////////////////////////////////////////
status.printToFile({PORT_NAME, "_summary.rep"}, 1);
if(status.totErrCnt == 0) begin
$display("%s> ----------------------------------------------------------------------", PORT_NAME);
$display("%s> PASSED %0d VECTORS", PORT_NAME, status.totAcqCnt);
$display("%s> ----------------------------------------------------------------------\n", PORT_NAME);
end else begin
$display("%s> ----------------------------------------------------------------------\n", PORT_NAME);
$display("%s> FAILED %0d OF %0d VECTORS\n", PORT_NAME , status.totErrCnt, status.totAcqCnt);
$display("%s> failing tests:", PORT_NAME);
$display("%s", failingTests);
$display("%s> ----------------------------------------------------------------------\n", PORT_NAME);
end
prog_end = 1'b1;
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
// `ifndef VERILATOR
// `endif
//pragma translate_on
endprogram // tb_readport

View file

@ -0,0 +1,299 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: program that emulates a cache write port. the program can generate
// randomized or linear read sequences.
//
`include "tb.svh"
import ariane_pkg::*;
import serpent_cache_pkg::*;
import tb_pkg::*;
program tb_writeport #(
parameter string PORT_NAME = "write port 0",
parameter MEM_WORDS = 1024*1024,// in 64bit words
parameter NC_ADDR_BEGIN = 0,
parameter RND_SEED = 1110,
parameter VERBOSE = 0
)(
input logic clk_i,
input logic rst_ni,
// to testbench master
ref string test_name_i,
input logic [6:0] req_rate_i,
input seq_t seq_type_i,
input logic seq_run_i,
input logic [31:0] seq_num_vect_i,
input logic seq_last_i,
output logic seq_done_o,
// interface to DUT
output dcache_req_i_t dut_req_port_o,
input dcache_req_o_t dut_req_port_i
);
// leave this
timeunit 1ps;
timeprecision 1ps;
logic [63:0] paddr;
assign dut_req_port_o.address_tag = paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
assign dut_req_port_o.address_index = paddr[DCACHE_INDEX_WIDTH-1:0];
assign dut_req_port_o.data_we = dut_req_port_o.data_req;
///////////////////////////////////////////////////////////////////////////////
// Helper tasks
///////////////////////////////////////////////////////////////////////////////
task automatic applyRandData();
automatic logic [63:0] val;
automatic logic [7:0] be;
automatic logic [1:0] size;
void'(randomize(size));
// align to size, set correct byte enables
be = '0;
unique case(size)
2'b00: be[paddr[2:0] +: 1] = '1;
2'b01: be[paddr[2:1]<<1 +: 2] = '1;
2'b10: be[paddr[2:2]<<2 +: 4] = '1;
2'b11: be = '1;
default: ;
endcase
paddr[2:0] = '0;
void'(randomize(val));
for(int k=0; k<8; k++) begin
if( be[k] ) begin
dut_req_port_o.data_wdata[k*8 +: 8] = val[k*8 +: 8];
end
end
dut_req_port_o.data_be = be;
dut_req_port_o.data_size = size;
endtask : applyRandData
task automatic genRandReq();
automatic logic [63:0] val;
void'($urandom(RND_SEED));
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
repeat(seq_num_vect_i) begin
// randomize request
dut_req_port_o.data_req = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
void'(randomize(val) with {val > 0; val <= 100;});
if(val < req_rate_i) begin
dut_req_port_o.data_req = 1'b1;
// generate random address
void'(randomize(paddr) with {paddr >= 0; paddr < (MEM_WORDS<<3);});
applyRandData();
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
end
`APPL_WAIT_CYC(clk_i,1)
end
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
endtask : genRandReq
task automatic genSeqWrite();
automatic logic [63:0] val;
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
val = '0;
repeat(seq_num_vect_i) begin
dut_req_port_o.data_req = 1'b1;
dut_req_port_o.data_size = 2'b11;
dut_req_port_o.data_be = '1;
dut_req_port_o.data_wdata = val;
paddr = val;
// generate linear read
val = (val + 8) % (MEM_WORDS<<3);
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
`APPL_WAIT_CYC(clk_i,1)
end
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
endtask : genSeqWrite
task automatic genWrapSeq();
automatic logic [63:0] val;
void'($urandom(RND_SEED));
paddr = NC_ADDR_BEGIN;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
val = '0;
repeat(seq_num_vect_i) begin
dut_req_port_o.data_req = 1'b1;
applyRandData();
// generate wrapping read of 1 cacheline
paddr = NC_ADDR_BEGIN + val;
val = (val + 8) % (1*(DCACHE_LINE_WIDTH/64)*8);
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
`APPL_WAIT_CYC(clk_i,1)
end
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
endtask : genWrapSeq
task automatic genSeqBurst();
automatic logic [63:0] val;
automatic logic [7:0] be;
automatic logic [1:0] size;
automatic int cnt, burst_len;
void'($urandom(RND_SEED));
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
cnt = 0;
while(cnt < seq_num_vect_i) begin
// randomize request
dut_req_port_o.data_req = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
void'(randomize(val) with {val > 0; val <= 100;});
if(val < req_rate_i) begin
dut_req_port_o.data_req = 1'b1;
// generate random address base
void'(randomize(paddr) with {paddr >= 0; paddr < (MEM_WORDS<<3);});
// do a random burst
void'(randomize(burst_len) with {burst_len >= 0; burst_len < 100;});
for(int k=0; k<burst_len && cnt < seq_num_vect_i && paddr < ((MEM_WORDS-1)<<3); k++) begin
applyRandData();
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
`APPL_WAIT_CYC(clk_i,1)
//void'(randomize(val) with {val>=0 val<=8;};);
paddr += 8;
cnt ++;
end
end
`APPL_WAIT_CYC(clk_i,1)
end
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
endtask : genSeqBurst
///////////////////////////////////////////////////////////////////////////////
// Sequence application
///////////////////////////////////////////////////////////////////////////////
initial begin : p_stim
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = '0;
dut_req_port_o.tag_valid = '0;
dut_req_port_o.kill_req = '0;
seq_done_o = 1'b0;
// print some info
$display("%s> current configuration:", PORT_NAME);
$display("%s> RND_SEED %d", PORT_NAME, RND_SEED);
`APPL_WAIT_CYC(clk_i,1)
`APPL_WAIT_SIG(clk_i,~rst_ni)
$display("%s> starting application", PORT_NAME);
while(~seq_last_i) begin
`APPL_WAIT_SIG(clk_i,seq_run_i)
seq_done_o = 1'b0;
unique case(seq_type_i)
RANDOM_SEQ: begin
$display("%s> start random sequence with %04d vectors and req_rate %03d", PORT_NAME, seq_num_vect_i, req_rate_i);
genRandReq();
end
LINEAR_SEQ: begin
$display("%s> start linear sequence with %04d vectors and req_rate %03d", PORT_NAME, seq_num_vect_i, req_rate_i);
genSeqWrite();
end
WRAP_SEQ: begin
$display("%s> start wrapping sequence with %04d vectors and req_rate %03d", PORT_NAME, seq_num_vect_i, req_rate_i);
genWrapSeq();
end
IDLE_SEQ: ;// do nothing
BURST_SEQ: begin
$display("%s> start burst sequence with %04d vectors and req_rate %03d", PORT_NAME, seq_num_vect_i, req_rate_i);
genSeqBurst();
end
endcase // seq_type_i
seq_done_o = 1'b1;
$display("%s> stop sequence", PORT_NAME);
`APPL_WAIT_CYC(clk_i,1)
end
$display("%s> ending application", PORT_NAME);
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
// `ifndef verilator
// exp_resp_vld: assert property (
// @(posedge clk_i) disable iff (~rst_ni) dut_req_port_i.data_rvalid |-> exp_rdata_queue.size()>0 && exp_size_queue.size()>0 && exp_paddr_queue.size()>0)
// else $fatal(1, "expected response must be in the queue when DUT response returns");
// `endif
//pragma translate_on
endprogram // tb_readport

View file

@ -0,0 +1,20 @@
../../include/riscv_pkg.sv
../../src/debug/dm_pkg.sv
../../include/ariane_pkg.sv
../../include/serpent_cache_pkg.sv
../../src/fpga-support/rtl/SyncSpRamBeNx64.sv
../../src/cache_subsystem/serpent_dcache_ctrl.sv
../../src/cache_subsystem/serpent_dcache_mem.sv
../../src/cache_subsystem/serpent_dcache_missunit.sv
../../src/cache_subsystem/serpent_dcache_wbuffer.sv
../../src/cache_subsystem/serpent_dcache.sv
../../src/common_cells/src/lfsr_8bit.sv
../../src/common_cells/src/fifo_v2.sv
../../src/common_cells/src/lzc.sv
../../src/common_cells/src/rrarbiter.sv
../../src/util/sram.sv
hdl/tb_pkg.sv
hdl/tb_mem.sv
hdl/tb_readport.sv
hdl/tb_writeport.sv
hdl/tb.sv

View file

@ -0,0 +1,474 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /tb/KILL_RATE
add wave -noupdate /tb/MEM_BYTES
add wave -noupdate /tb/MEM_RAND_HIT_RATE
add wave -noupdate /tb/MEM_RAND_INV_RATE
add wave -noupdate /tb/MEM_WORDS
add wave -noupdate /tb/NC_ADDR_BEGIN
add wave -noupdate /tb/amo_ack_o
add wave -noupdate /tb/amo_rand_en
add wave -noupdate /tb/amo_req_i
add wave -noupdate /tb/clk_i
add wave -noupdate /tb/enable_i
add wave -noupdate /tb/end_of_sim
add wave -noupdate /tb/flush_ack_o
add wave -noupdate /tb/flush_i
add wave -noupdate /tb/inv_rand_en
add wave -noupdate /tb/mem_array
add wave -noupdate /tb/mem_data_ack_i
add wave -noupdate /tb/mem_data_o
add wave -noupdate /tb/mem_data_req_o
add wave -noupdate /tb/mem_rand_en
add wave -noupdate -expand /tb/mem_rtrn_i
add wave -noupdate /tb/mem_rtrn_vld_i
add wave -noupdate /tb/miss_o
add wave -noupdate /tb/req_ports_i
add wave -noupdate /tb/req_ports_o
add wave -noupdate /tb/rst_ni
add wave -noupdate /tb/seq_done
add wave -noupdate /tb/seq_last
add wave -noupdate /tb/seq_num_resp
add wave -noupdate /tb/seq_run
add wave -noupdate /tb/seq_type
add wave -noupdate /tb/test_name
add wave -noupdate /tb/wbuffer_empty_o
add wave -noupdate -divider Programs
add wave -noupdate -group Writeport /tb/i_tb_writeport/clk_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/rst_ni
add wave -noupdate -group Writeport /tb/i_tb_writeport/req_rate_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/seq_type_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/seq_run_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/seq_num_vect_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/seq_last_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/dut_req_port_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/MEM_WORDS
add wave -noupdate -group Writeport /tb/i_tb_writeport/RND_SEED
add wave -noupdate -group Writeport /tb/i_tb_writeport/VERBOSE
add wave -noupdate -group Writeport /tb/i_tb_writeport/test_name_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/paddr
add wave -noupdate -group Writeport /tb/i_tb_writeport/seq_done_o
add wave -noupdate -group Writeport /tb/i_tb_writeport/dut_req_port_o
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/clk_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/rst_ni
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_type_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_run_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_num_resp_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_last_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_done_o
add wave -noupdate -group {Readport 0} -expand /tb/i_tb_readport0/dut_req_port_o
add wave -noupdate -group {Readport 0} -expand /tb/i_tb_readport0/dut_req_port_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/paddr
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_end_req
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_end_ack
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/tag_q
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/tag_vld_q
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/clk_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/rst_ni
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_type_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_run_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_num_resp_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_last_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_done_o
add wave -noupdate -group {Readport 1} -expand /tb/i_tb_readport1/dut_req_port_o
add wave -noupdate -group {Readport 1} -expand /tb/i_tb_readport1/dut_req_port_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/paddr
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_end_req
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_end_ack
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/tag_q
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/tag_vld_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/clk_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/rst_ni
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_rand_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/inv_rand_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/amo_rand_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_data_req_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_data_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/seq_last_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/check_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/commit_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/commit_be_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/commit_paddr_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/write_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/write_be_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/write_data_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/write_paddr_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/MEM_RAND_HIT_RATE
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/MEM_RAND_INV_RATE
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/MEM_WORDS
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/NC_ADDR_BEGIN
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/NC_ADDR_GE_LT
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_ready_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_inv_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/rand_addr_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/outfifo_data
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/outfifo_pop
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/outfifo_push
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/outfifo_full
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/outfifo_empty
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/infifo_data
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/infifo_pop
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/infifo_push
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/infifo_full
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/infifo_empty
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/initialized_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/write_en
add wave -noupdate -group i_tb_mem -color Magenta /tb/i_tb_mem/mem_array_q
add wave -noupdate -group i_tb_mem -color Magenta /tb/i_tb_mem/mem_array_shadow_q
add wave -noupdate -group i_tb_mem -color Magenta /tb/i_tb_mem/mem_array_dirty_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_rtrn_vld_o
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_rtrn_o
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_data_ack_o
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_array_o
add wave -noupdate -divider Modules
add wave -noupdate -group i_dut /tb/i_dut/clk_i
add wave -noupdate -group i_dut /tb/i_dut/rst_ni
add wave -noupdate -group i_dut /tb/i_dut/enable_i
add wave -noupdate -group i_dut /tb/i_dut/flush_i
add wave -noupdate -group i_dut /tb/i_dut/amo_req_i
add wave -noupdate -group i_dut /tb/i_dut/req_ports_i
add wave -noupdate -group i_dut /tb/i_dut/mem_rtrn_vld_i
add wave -noupdate -group i_dut /tb/i_dut/mem_rtrn_i
add wave -noupdate -group i_dut /tb/i_dut/mem_data_ack_i
add wave -noupdate -group i_dut /tb/i_dut/NC_ADDR_BEGIN
add wave -noupdate -group i_dut /tb/i_dut/NC_ADDR_GE_LT
add wave -noupdate -group i_dut /tb/i_dut/NUM_PORTS
add wave -noupdate -group i_dut /tb/i_dut/cache_en
add wave -noupdate -group i_dut /tb/i_dut/flush_en
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_vld
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_tag
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_idx
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_off
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_data
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_data_be
add wave -noupdate -group i_dut /tb/i_dut/wr_vld_bits
add wave -noupdate -group i_dut /tb/i_dut/wr_req
add wave -noupdate -group i_dut /tb/i_dut/wr_ack
add wave -noupdate -group i_dut /tb/i_dut/wr_idx
add wave -noupdate -group i_dut /tb/i_dut/wr_off
add wave -noupdate -group i_dut /tb/i_dut/wr_data
add wave -noupdate -group i_dut /tb/i_dut/wr_data_be
add wave -noupdate -group i_dut /tb/i_dut/miss_req
add wave -noupdate -group i_dut /tb/i_dut/miss_ack
add wave -noupdate -group i_dut /tb/i_dut/miss_nc
add wave -noupdate -group i_dut /tb/i_dut/miss_we
add wave -noupdate -group i_dut /tb/i_dut/miss_wdata
add wave -noupdate -group i_dut /tb/i_dut/miss_paddr
add wave -noupdate -group i_dut /tb/i_dut/miss_vld_bits
add wave -noupdate -group i_dut /tb/i_dut/miss_size
add wave -noupdate -group i_dut /tb/i_dut/miss_wr_id
add wave -noupdate -group i_dut /tb/i_dut/miss_rtrn_vld
add wave -noupdate -group i_dut /tb/i_dut/miss_rtrn_id
add wave -noupdate -group i_dut /tb/i_dut/rd_req
add wave -noupdate -group i_dut /tb/i_dut/rd_ack
add wave -noupdate -group i_dut /tb/i_dut/rd_tag
add wave -noupdate -group i_dut /tb/i_dut/rd_idx
add wave -noupdate -group i_dut /tb/i_dut/rd_off
add wave -noupdate -group i_dut /tb/i_dut/rd_data
add wave -noupdate -group i_dut /tb/i_dut/rd_vld_bits
add wave -noupdate -group i_dut /tb/i_dut/rd_hit_oh
add wave -noupdate -group i_dut /tb/i_dut/wbuffer_data
add wave -noupdate -group i_dut /tb/i_dut/flush_ack_o
add wave -noupdate -group i_dut /tb/i_dut/miss_o
add wave -noupdate -group i_dut /tb/i_dut/wbuffer_empty_o
add wave -noupdate -group i_dut /tb/i_dut/amo_ack_o
add wave -noupdate -group i_dut /tb/i_dut/req_ports_o
add wave -noupdate -group i_dut /tb/i_dut/mem_data_req_o
add wave -noupdate -group i_dut /tb/i_dut/mem_data_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/clk_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rst_ni
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/cache_en_i
add wave -noupdate -group i_wbuffer -color Magenta /tb/i_dut/i_serpent_dcache_wbuffer/req_port_i
add wave -noupdate -group i_wbuffer -color Magenta /tb/i_dut/i_serpent_dcache_wbuffer/req_port_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_ack_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_rtrn_vld_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_rtrn_id_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_ack_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_data_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_vld_bits_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_hit_oh_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_ack_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/empty_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_paddr_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_req_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_we_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_wdata_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_vld_bits_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_nc_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_size_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_wr_id_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_tag_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_idx_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_off_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_req_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_req_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_idx_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_off_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_data_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_data_be_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wbuffer_data_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/NC_ADDR_BEGIN
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/NC_ADDR_GE_LT
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_stat_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_stat_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wbuffer_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wbuffer_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/valid
add wave -noupdate -group i_wbuffer -color Magenta /tb/i_dut/i_serpent_dcache_wbuffer/debug_paddr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/dirty
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tocheck
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wbuffer_hit_oh
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/inval_hit
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/bdirty
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/next_ptr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/dirty_ptr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/hit_ptr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_ptr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/check_ptr_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/check_ptr_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rtrn_ptr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_cnt_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_cnt_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_id_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_id_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rtrn_id
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/bdirty_off
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_be
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_paddr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_paddr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/check_en_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/check_en_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/full
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/dirty_rd_en
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rdy
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rtrn_empty
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/evict
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/nc_pending_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/nc_pending_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/addr_is_nc
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/clk_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/rst_ni
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/enable_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wbuffer_empty_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/amo_req_i
add wave -noupdate -group i_missunit -expand /tb/i_dut/i_serpent_dcache_missunit/miss_req_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_nc_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_we_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_wdata_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_paddr_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_vld_bits_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_size_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_wr_id_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mem_rtrn_vld_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mem_rtrn_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mem_data_ack_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_ack_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/cache_en_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_en_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/amo_ack_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_ack_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_replay_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_rtrn_vld_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_rtrn_id_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_vld_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_nc_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_we_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_tag_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_idx_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_off_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_data_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_data_be_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_vld_bits_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mem_data_req_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mem_data_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/NUM_PORTS
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/state_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/state_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mshr_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mshr_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/repl_way
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/inv_way
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/rnd_way
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mshr_vld_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mshr_vld_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mshr_allocate
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/update_lfsr
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/all_ways_valid
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/enable_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/enable_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_ack_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_ack_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/amo_sel
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_done
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mask_reads
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_is_write
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/amo_data
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_port_idx
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/cnt_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/cnt_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_req
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/inv_vld
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/cl_write_en
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/load_ack
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/store_ack
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/amo_ack
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/clk_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rst_ni
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_tag_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_idx_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_off_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_req_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_vld_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_tag_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_idx_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_off_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_data_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_data_be_i
add wave -noupdate -expand -group i_mem -expand /tb/i_dut/i_serpent_dcache_mem/wr_vld_bits_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_req_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_idx_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_off_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_data_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_data_be_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_data_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_ack_o
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_vld_bits_o
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_hit_oh_o
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_data_o
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_ack_o
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/NUM_PORTS
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_req
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_we
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_be
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_idx
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_idx_d
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_idx_q
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_off_d
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_off_q
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_wdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_rdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rdata_cl
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_req
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_we
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_wdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/tag_rdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_addr
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_sel_d
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_sel_q
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_hit_oh
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_be
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_rdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_cmp_addr
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_bvalid
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_data
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_tag_rdata
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/clk_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rst_ni}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/flush_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/cache_en_i}
add wave -noupdate -group i_ctrl0 -expand {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/req_port_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_ack_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_replay_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_rtrn_vld_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_ack_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_data_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_vld_bits_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_hit_oh_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/req_port_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_req_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_we_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_wdata_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_vld_bits_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_paddr_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_nc_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_size_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_wr_id_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_tag_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_idx_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_off_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_req_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/NC_ADDR_BEGIN}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/NC_ADDR_GE_LT}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/state_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/state_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_tag_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_tag_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_idx_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_idx_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_off_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_off_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/vld_data_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/vld_data_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/save_tag}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_req_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_req_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/data_size_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/data_size_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/clk_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rst_ni}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/flush_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/cache_en_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/req_port_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_ack_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_replay_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_rtrn_vld_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_ack_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_data_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_vld_bits_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_hit_oh_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/req_port_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_req_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_we_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_wdata_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_vld_bits_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_paddr_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_nc_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_size_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_wr_id_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_tag_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_idx_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_off_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_req_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/NC_ADDR_BEGIN}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/NC_ADDR_GE_LT}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/state_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/state_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_tag_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_tag_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_idx_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_idx_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_off_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_off_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/vld_data_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/vld_data_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/save_tag}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_req_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_req_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/data_size_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/data_size_q}
TreeUpdate [SetDefaultTree]
quietly WaveActivateNextPane
add wave -noupdate {/tb/i_tb_mem/mem_array_q[6741]}
add wave -noupdate {/tb/i_tb_mem/mem_array_shadow_q[6741]}
add wave -noupdate {/tb/i_tb_mem/mem_array_dirty_q[6741]}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {31432807547 ps} 0} {{Cursor 2} {29040000 ps} 0} {{Cursor 3} {1027790000 ps} 0}
quietly wave cursor active 2
configure wave -namecolwidth 375
configure wave -valuecolwidth 224
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {0 ps} {103267500 ps}

View file

@ -4,6 +4,7 @@ src-list := tb.list
src := $(shell xargs printf '\n%s' < $(src-list) | cut -b 1-)
compile_flag += +cover+/dut -incr -64 -nologo
sim_opts += -64 -coverage -classdebug -voptargs="+acc"
questa_version ?= ${QUESTASIM_VERSION}
build: clean
vlib${questa_version} $(library)

View file

@ -38,7 +38,7 @@ package tb_pkg;
parameter ERROR_CNT_STOP_LEVEL = 1; // use 1 for debugging. 0 runs the complete simulation...
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
// use to ensure proper ATI timing
///////////////////////////////////////////////////////////////////////////////