rtl cache redesign to support xilinx bram types

This commit is contained in:
Blaise Tine 2024-10-13 03:40:45 -07:00
parent a5381fd788
commit 28bf27e951
10 changed files with 419 additions and 384 deletions

View file

@ -155,6 +155,7 @@ cache()
# test writeback
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_DIRTYBYTES=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress

View file

@ -98,7 +98,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
.TAG_WIDTH (L2_TAG_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`L2_WRITEBACK),
.DIRTY_BYTES (`L2_WRITEBACK),
.DIRTY_BYTES (`L2_DIRTYBYTES),
.UUID_WIDTH (`UUID_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.CORE_OUT_BUF (3),

View file

@ -542,6 +542,11 @@
`define DCACHE_WRITEBACK 0
`endif
// Enable Cache Dirty bytes
`ifndef DCACHE_DIRTYBYTES
`define DCACHE_DIRTYBYTES 0
`endif
// LMEM Configurable Knobs ////////////////////////////////////////////////////
`ifndef LMEM_DISABLE
@ -602,6 +607,11 @@
`define L2_WRITEBACK 0
`endif
// Enable Cache Dirty bytes
`ifndef L2_DIRTYBYTES
`define L2_DIRTYBYTES 0
`endif
// L3cache Configurable Knobs /////////////////////////////////////////////////
// Cache Size
@ -644,6 +654,11 @@
`define L3_WRITEBACK 0
`endif
// Enable Cache Dirty bytes
`ifndef L3_DIRTYBYTES
`define L3_DIRTYBYTES 0
`endif
`ifndef MEMORY_BANKS
`define MEMORY_BANKS 2
`endif

View file

@ -150,7 +150,7 @@ module VX_socket import VX_gpu_pkg::*; #(
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`DCACHE_WRITEBACK),
.DIRTY_BYTES (`DCACHE_WRITEBACK),
.DIRTY_BYTES (`DCACHE_DIRTYBYTES),
.NC_ENABLE (1),
.CORE_OUT_BUF (3),
.MEM_OUT_BUF (2)

View file

@ -84,7 +84,7 @@ module Vortex import VX_gpu_pkg::*; (
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`L3_WRITEBACK),
.DIRTY_BYTES (`L3_WRITEBACK),
.DIRTY_BYTES (`L3_DIRTYBYTES),
.UUID_WIDTH (`UUID_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.CORE_OUT_BUF (3),

View file

@ -48,20 +48,20 @@ module VX_bank_flush #(
localparam STATE_WAIT2 = 4;
localparam STATE_DONE = 5;
reg [2:0] state_r, state_n;
reg [2:0] state, state_n;
reg [CTR_WIDTH-1:0] counter_r;
reg [CTR_WIDTH-1:0] counter;
always @(*) begin
state_n = state_r;
case (state_r)
state_n = state;
case (state)
STATE_IDLE: begin
if (flush_begin) begin
state_n = STATE_WAIT1;
end
end
STATE_INIT: begin
if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
if (counter == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
state_n = STATE_IDLE;
end
end
@ -72,7 +72,7 @@ module VX_bank_flush #(
end
end
STATE_FLUSH: begin
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
if (counter == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
state_n = (BANK_ID == 0) ? STATE_DONE : STATE_WAIT2;
end
end
@ -93,32 +93,32 @@ module VX_bank_flush #(
always @(posedge clk) begin
if (reset) begin
state_r <= STATE_INIT;
counter_r <= '0;
state <= STATE_INIT;
counter <= '0;
end else begin
state_r <= state_n;
if (state_r != STATE_IDLE) begin
if ((state_r == STATE_INIT)
|| ((state_r == STATE_FLUSH) && flush_ready)) begin
counter_r <= counter_r + CTR_WIDTH'(1);
state <= state_n;
if (state != STATE_IDLE) begin
if ((state == STATE_INIT)
|| ((state == STATE_FLUSH) && flush_ready)) begin
counter <= counter + CTR_WIDTH'(1);
end
end else begin
counter_r <= '0;
counter <= '0;
end
end
end
assign flush_end = (state_r == STATE_DONE);
assign flush_init = (state_r == STATE_INIT);
assign flush_valid = (state_r == STATE_FLUSH);
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];
assign flush_end = (state == STATE_DONE);
assign flush_init = (state == STATE_INIT);
assign flush_valid = (state == STATE_FLUSH);
assign flush_line = counter[`CS_LINE_SEL_BITS-1:0];
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way
VX_decoder #(
.N (`CS_WAY_SEL_BITS),
.D (NUM_WAYS)
) ctr_decoder (
.sel_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
.sel_in (counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
.data_in (1'b1),
.data_out (flush_way)
);

View file

@ -141,13 +141,18 @@ module VX_cache_bank #(
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
wire replay_ready;
wire is_init_st0, is_init_st1;
wire valid_sel, valid_st0, valid_st1;
wire is_init_st0;
wire is_creq_st0, is_creq_st1;
wire is_fill_st0, is_fill_st1;
wire is_flush_st0, is_flush_st1;
wire [NUM_WAYS-1:0] flush_way_st0;
wire [NUM_WAYS-1:0] evict_way_st0, evict_way_st1;
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1;
wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1;
wire [`CS_TAG_SEL_BITS-1:0] line_tag_st1;
wire rw_sel, rw_st0, rw_st1;
wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1;
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
@ -158,16 +163,10 @@ module VX_cache_bank #(
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0;
wire valid_sel, valid_st0, valid_st1;
wire is_creq_st0, is_creq_st1;
wire is_fill_st0, is_fill_st1;
wire is_replay_st0, is_replay_st1;
wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1;
wire evict_dirty_st0, evict_dirty_st1;
wire [NUM_WAYS-1:0] way_idx_st0, way_idx_st1;
wire [NUM_WAYS-1:0] tag_matches_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
wire mshr_pending_st0, mshr_pending_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_id_st0, mshr_prev_id_st1;
wire mshr_empty;
wire flush_valid;
@ -201,11 +200,9 @@ module VX_cache_bank #(
.bank_empty (no_pending_req)
);
wire rdw_hazard1_sel;
wire rdw_hazard2_sel;
reg rdw_hazard3_st1;
logic rdw_hazard, post_hazard;
wire pipe_stall = crsp_queue_stall || rdw_hazard3_st1;
wire pipe_stall = crsp_queue_stall || rdw_hazard;
// inputs arbitration:
// mshr replay has highest priority to maximize utilization since there is no miss.
@ -224,17 +221,14 @@ module VX_cache_bank #(
wire creq_enable = creq_grant && core_req_valid;
assign replay_ready = replay_grant
&& ~rdw_hazard1_sel
&& ~pipe_stall;
assign mem_rsp_ready = fill_grant
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
&& ~rdw_hazard2_sel
&& ~pipe_stall;
assign flush_ready = flush_grant
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
&& ~rdw_hazard2_sel
&& ~pipe_stall;
assign core_req_ready = creq_grant
@ -298,6 +292,12 @@ module VX_cache_bank #(
assign req_uuid_sel = '0;
end
wire is_init_sel = init_valid;
wire is_creq_sel = creq_enable || replay_enable;
wire is_fill_sel = fill_enable;
wire is_flush_sel = flush_enable;
wire is_replay_sel = replay_enable;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
.RESETW (1)
@ -305,8 +305,8 @@ module VX_cache_bank #(
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}),
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0})
.data_in ({valid_sel, is_init_sel, is_fill_sel, is_flush_sel, is_creq_sel, is_replay_sel, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}),
.data_out ({valid_st0, is_init_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0})
);
if (UUID_WIDTH != 0) begin : g_req_uuid_st0
@ -315,82 +315,67 @@ module VX_cache_bank #(
assign req_uuid_st0 = '0;
end
wire do_init_st0 = valid_st0 && is_init_st0;
wire do_flush_st0 = valid_st0 && is_flush_st0;
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
wire do_creq_wr_st0 = valid_st0 && is_creq_st0 && rw_st0;
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
wire do_replay_wr_st0 = valid_st0 && is_replay_st0 && rw_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0;
wire do_cache_rd_st0 = do_creq_rd_st0 || do_replay_rd_st0;
wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0;
wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0;
wire is_read_st0 = is_creq_st0 && ~rw_st0;
wire is_write_st0 = is_creq_st0 && rw_st0;
wire do_init_st0 = valid_st0 && is_init_st0;
wire do_flush_st0 = valid_st0 && is_flush_st0;
wire do_read_st0 = valid_st0 && is_read_st0;
wire do_write_st0 = valid_st0 && is_write_st0;
wire do_fill_st0 = valid_st0 && is_fill_st0;
assign write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0);
wire [NUM_WAYS-1:0] evict_way_st0;
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1;
wire [NUM_WAYS-1:0] tag_matches_st1;
wire do_lookup_st0 = do_read_st0 || do_write_st0;
VX_cache_tags #(
.INSTANCE_ID($sformatf("%s-tags", INSTANCE_ID)),
.BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS),
.WORD_SIZE (WORD_SIZE),
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH)
.WRITEBACK (WRITEBACK)
) cache_tags (
.clk (clk),
.reset (reset),
.req_uuid (req_uuid_st0),
// init/flush/fill/write/lookup
.stall (pipe_stall),
// inputs
.init (do_init_st0),
.flush (do_flush_st0 && ~pipe_stall),
.fill (do_fill_st0 && ~pipe_stall),
.write (do_cache_wr_st0 && ~pipe_stall),
.lookup (do_lookup_st0 && ~pipe_stall),
.line_addr (addr_st0),
.way_idx (flush_way_st0),
// tag matches
.tag_matches(tag_matches_st0),
// replacement
.evict_dirty(evict_dirty_st0),
.flush_way (flush_way_st0),
// outputs
.tag_matches_r(tag_matches_st1),
.line_tag_r (line_tag_st1),
.evict_tag_r(evict_tag_st1),
.evict_way (evict_way_st0),
.evict_tag (evict_tag_st0)
.evict_way_r(evict_way_st1)
);
wire [`CS_TAG_SEL_BITS-1:0] line_tag2_st0;
wire is_flush2_st0 = WRITEBACK && is_flush_st0;
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
assign way_idx_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
assign line_tag2_st0 = (is_fill_st0 || is_flush2_st0) ? evict_tag_st0 : line_tag_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1})
.data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_id_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_id_st1, mshr_pending_st1})
);
// we have a tag hit
wire is_hit_st1 = (| way_idx_st1);
wire is_hit_st1 = (| tag_matches_st1);
if (UUID_WIDTH != 0) begin : g_req_uuid_st1
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
@ -398,51 +383,71 @@ module VX_cache_bank #(
assign req_uuid_st1 = '0;
end
wire is_read_st1 = is_creq_st1 && ~rw_st1;
wire is_write_st1 = is_creq_st1 && rw_st1;
wire is_read_st1 = is_creq_st1 && ~rw_st1;
wire is_write_st1 = is_creq_st1 && rw_st1;
wire do_init_st1 = valid_st1 && is_init_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_flush_st1 = valid_st1 && is_flush_st1;
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
wire do_read_hit_st1 = do_creq_rd_st1 && is_hit_st1;
wire do_read_miss_st1 = do_creq_rd_st1 && ~is_hit_st1;
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
wire do_read_st1 = valid_st1 && is_read_st1;
wire do_write_st1 = valid_st1 && is_write_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK;
assign addr_st1 = {line_tag_st1, line_idx_st1};
// ensure mshr replay always get a hit
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time))
// both tag and data stores use BRAM with no read-during-write protection.
// we ned to stall the pipeline to prevent read-after-write hazards.
assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
always @(posedge clk) begin
// stall reads following writes to same line address
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_idx_st0 == line_idx_st1)
&& ~rdw_hazard3_st1; // release pipeline stall
if (WRITE_ENABLE) begin : g_rdw_hazard
// This implementation uses single-port BRAMs for the tags and data stores.
// Using different stages for read and write operations requires a pipeline stall in between due to address port sharing.
// Tags fill/flush can perform read and write in the same stage, since no dependency between.
// Data fill/flush can perform read and write in the same stage, since way_idx is available in st0.
// A data read should happen in st0 for its result to be available in st1.
// A data write should happen in st1 when the tag hit status is available.
wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0];
wire is_read_sel = is_creq_sel && !rw_sel;
wire is_write_sel = is_creq_sel && rw_sel;
wire is_same_read_sel = is_read_sel && (line_idx_sel == line_idx_st0);
always @(posedge clk) begin
if (reset) begin
post_hazard <= 0;
rdw_hazard <= 0;
end else begin
if (!crsp_queue_stall) begin
post_hazard <= rdw_hazard;
rdw_hazard <= do_write_st0 && valid_sel && !(is_write_sel || is_same_read_sel || (is_flush_sel && !WRITEBACK));
end
end
end
end else begin : g_rdw_hazard_ro
assign rdw_hazard = 0;
assign post_hazard = 0;
end
assign write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0];
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
`UNUSED_VAR (data_st1)
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
wire [LINE_SIZE-1:0] dirty_byteen_st1;
wire [`CS_LINE_WIDTH-1:0] evict_data_st1;
wire [LINE_SIZE-1:0] evict_byteen_st1;
wire line_dirty_st1;
wire data_write;
wire [`CS_LINE_SEL_BITS-1:0] data_line_idx;
if (WRITE_ENABLE) begin : g_data_ctrl
// by default all data accesses happen in sto and use line_idx_st0.
// data writes should happen in st1 when the tag hit is available,
// and use line_idx_st1 to ensure the correct line is updated.
// if a rdw hazard is active due to conflict, ensure we don't write twice.
assign data_write = do_write_st1 && !post_hazard && ~crsp_queue_stall;
assign data_line_idx = data_write ? line_idx_st1 : line_idx_st0;
end else begin : g_data_ctrl_ro
`UNUSED_VAR (post_hazard)
`UNUSED_VAR (do_write_st1)
assign data_write = 0;
assign data_line_idx = line_idx_st0;
end
VX_cache_data #(
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
.BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
@ -450,50 +455,58 @@ module VX_cache_bank #(
.WORD_SIZE (WORD_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.WRITEBACK (WRITEBACK),
.DIRTY_BYTES (DIRTY_BYTES),
.UUID_WIDTH (UUID_WIDTH)
.DIRTY_BYTES (DIRTY_BYTES)
) cache_data (
.clk (clk),
.reset (reset),
.req_uuid (req_uuid_st1),
.init (do_init_st1),
.fill (do_fill_st1 && ~pipe_stall),
.flush (do_flush_st1 && ~pipe_stall),
.write (do_cache_wr_st1 && ~pipe_stall),
.read (do_cache_rd_st1 && ~pipe_stall),
.way_idx (way_idx_st1),
.line_addr (addr_st1),
.word_idx (word_idx_st1),
.fill_data (fill_data_st1),
.stall (pipe_stall),
// inputs
.init (do_init_st0),
.fill (do_fill_st0 && ~pipe_stall),
.flush (do_flush_st0 && ~pipe_stall),
.read (do_read_st0 && ~pipe_stall),
.write (data_write),
.evict_way (evict_way_st0),
.tag_matches(tag_matches_st1),
.line_idx (data_line_idx),
.fill_data (data_st0),
.write_data (write_data_st1),
.word_idx (word_idx_st1),
.write_byteen(byteen_st1),
// outputs
.read_data (read_data_st1),
.dirty_data (dirty_data_st1),
.dirty_byteen(dirty_byteen_st1)
.line_dirty (line_dirty_st1),
.evict_data (evict_data_st1),
.evict_byteen(evict_byteen_st1)
);
wire mshr_allocate_st0 = valid_st0 && is_creq_st0;
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~is_replay_st0;
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~is_replay_st1;
// release allocated mshr entry if we had a hit
wire mshr_release_st1;
if (WRITEBACK) begin : g_mshr_release
assign mshr_release_st1 = valid_st1 && is_creq_st1 && is_hit_st1;
assign mshr_release_st1 = is_hit_st1;
end else begin : g_mshr_release_ro
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
// this can happen when writes are sent late, when the fill was already in flight.
assign mshr_release_st1 = valid_st1 && is_creq_st1 && (is_hit_st1 || (rw_st1 && ~mshr_pending_st1));
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address.
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content.
// this can happen when writes are sent to memory late, when a related fill was already in flight.
assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
end
wire mshr_dequeue = mshr_release_st1 && ~pipe_stall;
wire mshr_release_fire = mshr_finalize_st1 && mshr_release_st1 && ~pipe_stall;
wire [1:0] mshr_dequeue;
`POP_COUNT(mshr_dequeue, {replay_fire, mshr_release_fire});
VX_pending_size #(
.SIZE (MSHR_SIZE)
.SIZE (MSHR_SIZE),
.DECRW (2)
) mshr_pending_size (
.clk (clk),
.reset (reset),
.incr (core_req_fire),
.decr (replay_fire || mshr_dequeue),
.decr (mshr_dequeue),
.empty (mshr_empty),
`UNUSED_PIN (alm_empty),
.full (mshr_alm_full),
@ -508,7 +521,6 @@ module VX_cache_bank #(
.NUM_BANKS (NUM_BANKS),
.MSHR_SIZE (MSHR_SIZE),
.WRITEBACK (WRITEBACK),
.RDW_STALL (1),
.UUID_WIDTH (UUID_WIDTH),
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
) cache_mshr (
@ -517,7 +529,7 @@ module VX_cache_bank #(
.deq_req_uuid (req_uuid_sel),
.alc_req_uuid (req_uuid_st0),
.rel_req_uuid (req_uuid_st1),
.fin_req_uuid (req_uuid_st1),
// memory fill
.fill_valid (mem_rsp_fire),
@ -539,11 +551,15 @@ module VX_cache_bank #(
.allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
.allocate_id (mshr_alloc_id_st0),
.allocate_pending(mshr_pending_st0),
.allocate_previd(mshr_prev_id_st0),
`UNUSED_PIN (allocate_ready),
// release
.release_valid (mshr_release_st1 && ~pipe_stall),
.release_id (mshr_id_st1)
// finalize
.finalize_valid (mshr_finalize_st1 && ~pipe_stall),
.finalize_is_release(mshr_release_st1),
.finalize_is_pending(mshr_pending_st1),
.finalize_id (mshr_id_st1),
.finalize_previd(mshr_prev_id_st1)
);
// schedule core response
@ -553,7 +569,7 @@ module VX_cache_bank #(
wire [REQ_SEL_WIDTH-1:0] crsp_queue_idx;
wire [TAG_WIDTH-1:0] crsp_queue_tag;
assign crsp_queue_valid = do_cache_rd_st1;
assign crsp_queue_valid = do_read_st1 && is_hit_st1;
assign crsp_queue_idx = req_idx_st1;
assign crsp_queue_data = read_data_st1;
assign crsp_queue_tag = tag_st1;
@ -565,7 +581,7 @@ module VX_cache_bank #(
) core_rsp_queue (
.clk (clk),
.reset (reset),
.valid_in (crsp_queue_valid && ~rdw_hazard3_st1),
.valid_in (crsp_queue_valid && ~rdw_hazard),
.ready_in (crsp_queue_ready),
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
@ -585,37 +601,26 @@ module VX_cache_bank #(
wire mreq_queue_rw;
wire [`UP(FLAGS_WIDTH)-1:0] mreq_queue_flags;
wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK);
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
if (WRITEBACK) begin : g_mreq_queue_push
if (DIRTY_BYTES) begin : g_dirty_bytes
// ensure dirty bytes match the tag info
wire has_dirty_bytes = (| dirty_byteen_st1);
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
end
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|| do_writeback_st1)
&& ~rdw_hazard3_st1;
end else begin : g_mreq_queue_push_ro
`UNUSED_VAR (do_write_miss_st1)
`UNUSED_VAR (do_writeback_st1)
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|| do_creq_wr_st1)
&& ~rdw_hazard3_st1;
end
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
assign mreq_queue_addr = addr_st1;
assign mreq_queue_flags = flags_st1;
wire do_writeback_st1 = do_fill_or_flush_st1 && line_dirty_st1;
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1};
if (WRITE_ENABLE) begin : g_mreq_queue
if (WRITEBACK) begin : g_writeback
if (WRITEBACK) begin : g_wb
if (DIRTY_BYTES) begin : g_dirty_bytes
// ensure dirty bytes match the tag info
wire has_dirty_bytes = (| evict_byteen_st1);
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (line_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, line_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
end
assign mreq_queue_push = (((do_read_st1 || do_write_st1) && ~is_hit_st1 && ~mshr_pending_st1)
|| do_writeback_st1)
&& ~pipe_stall;
assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1;
assign mreq_queue_rw = is_fill_or_flush_st1;
assign mreq_queue_data = dirty_data_st1;
assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1;
end else begin : g_writethrough
assign mreq_queue_data = evict_data_st1;
assign mreq_queue_byteen = is_fill_or_flush_st1 ? evict_byteen_st1 : '1;
end else begin : g_wt
wire [LINE_SIZE-1:0] line_byteen;
VX_decoder #(
.N (`CS_WORD_SEL_BITS),
@ -625,19 +630,30 @@ module VX_cache_bank #(
.data_in (byteen_st1),
.data_out (line_byteen)
);
assign mreq_queue_push = ((do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1)
|| do_write_st1)
&& ~pipe_stall;
assign mreq_queue_addr = addr_st1;
assign mreq_queue_rw = rw_st1;
assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_data_st1}};
assign mreq_queue_byteen = rw_st1 ? line_byteen : '1;
`UNUSED_VAR (is_fill_or_flush_st1)
`UNUSED_VAR (dirty_data_st1)
`UNUSED_VAR (dirty_byteen_st1)
`UNUSED_VAR (do_writeback_st1)
`UNUSED_VAR (evict_addr_st1)
`UNUSED_VAR (evict_data_st1)
`UNUSED_VAR (evict_byteen_st1)
end
end else begin : g_mreq_queue_ro
assign mreq_queue_push = (do_read_st1 && ~is_hit_st1 && ~mshr_pending_st1)
&& ~pipe_stall;
assign mreq_queue_addr = addr_st1;
assign mreq_queue_rw = 0;
assign mreq_queue_data = '0;
assign mreq_queue_byteen = '1;
`UNUSED_VAR (dirty_data_st1)
`UNUSED_VAR (dirty_byteen_st1)
`UNUSED_VAR (do_writeback_st1)
`UNUSED_VAR (evict_addr_st1)
`UNUSED_VAR (evict_data_st1)
`UNUSED_VAR (evict_byteen_st1)
end
if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid
@ -646,6 +662,9 @@ module VX_cache_bank #(
assign mreq_queue_tag = mshr_id_st1;
end
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
assign mreq_queue_flags = flags_st1;
VX_fifo_queue #(
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
.DEPTH (MREQ_SIZE),
@ -667,6 +686,10 @@ module VX_cache_bank #(
assign mem_req_valid = ~mreq_queue_empty;
`UNUSED_VAR (do_fill_st1)
`UNUSED_VAR (do_flush_st1)
`UNUSED_VAR (evict_way_st1)
///////////////////////////////////////////////////////////////////////////////
`ifdef PERF_ENABLE
@ -681,7 +704,7 @@ module VX_cache_bank #(
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
always @(posedge clk) begin
if (input_stall || pipe_stall) begin
`TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1))
`TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard))
end
if (mem_rsp_fire) begin
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel))
@ -696,13 +719,54 @@ module VX_cache_bank #(
`TRACE(2, ("%t: %s core-rd-req: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(core_req_addr, BANK_ID), core_req_tag, core_req_idx, req_uuid_sel))
end
end
if (do_init_st0) begin
`TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0))
end
if (do_fill_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0))
end
if (do_flush_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0))
end
if (do_read_st1 && ~pipe_stall) begin
if (is_hit_st1) begin
`TRACE(3, ("%t: %s tags-rd-hit: addr=0x%0h, way=%b, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, req_uuid_st1))
end else begin
`TRACE(3, ("%t: %s tags-rd-miss: addr=0x%0h, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), line_idx_st1, line_tag_st1, req_uuid_st1))
end
end
if (do_write_st1 && ~pipe_stall) begin
if (is_hit_st1) begin
`TRACE(3, ("%t: %s tags-wr-hit: addr=0x%0h, way=%b, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, line_tag_st1, req_uuid_st1))
end else begin
`TRACE(3, ("%t: %s tags-wr-miss: addr=0x%0h, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), line_idx_st1, line_tag_st1, req_uuid_st1))
end
end
if (do_fill_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0))
end
if (do_flush_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0))
end
if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1))
end
if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_data_st1, req_uuid_st1))
end
if (crsp_queue_fire) begin
`TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1))
end
if (mreq_queue_push) begin
if (do_creq_wr_st1 && !WRITEBACK) begin
if (!WRITEBACK && do_write_st1) begin
`TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
end else if (do_writeback_st1) begin
end else if (WRITEBACK && do_writeback_st1) begin
`TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
end else begin
`TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1))

View file

@ -14,8 +14,6 @@
`include "VX_cache_define.vh"
module VX_cache_data #(
parameter `STRING INSTANCE_ID= "",
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
@ -31,94 +29,105 @@ module VX_cache_data #(
// Enable cache writeback
parameter WRITEBACK = 0,
// Enable dirty bytes on writeback
parameter DIRTY_BYTES = 0,
// Request debug identifier
parameter UUID_WIDTH = 0
parameter DIRTY_BYTES = 0
) (
input wire clk,
input wire reset,
`IGNORE_UNUSED_BEGIN
input wire[`UP(UUID_WIDTH)-1:0] req_uuid,
`IGNORE_UNUSED_END
input wire stall,
// inputs
input wire init,
input wire fill,
input wire flush,
input wire write,
input wire read,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx,
input wire write,
input wire [`CS_LINE_SEL_BITS-1:0] line_idx,
input wire [NUM_WAYS-1:0] evict_way,
input wire [NUM_WAYS-1:0] tag_matches,
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
input wire [`CS_WORD_WIDTH-1:0] write_data,
input wire [WORD_SIZE-1:0] write_byteen,
input wire [NUM_WAYS-1:0] way_idx,
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx,
// outputs
output wire [`CS_WORD_WIDTH-1:0] read_data,
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
output wire [LINE_SIZE-1:0] dirty_byteen
output wire line_dirty,
output wire [`CS_LINE_WIDTH-1:0] evict_data,
output wire [LINE_SIZE-1:0] evict_byteen
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (line_addr)
`UNUSED_VAR (init)
`UNUSED_VAR (read)
`UNUSED_VAR (flush)
`UNUSED_VAR (stall)
localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1;
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx_bin;
wire [`CS_LINE_SEL_BITS-1:0] line_idx;
assign line_idx = line_addr[`CS_LINE_SEL_BITS-1:0];
if (WRITEBACK != 0) begin : g_writeback
localparam BYTEEN_DATAW = 1 + ((DIRTY_BYTES != 0) ? LINE_SIZE : 0);
wire [`LOG2UP(NUM_WAYS)-1:0] evict_way_idx, evict_way_idx_r;
VX_onehot_encoder #(
.N (NUM_WAYS)
) way_idx_enc (
.data_in (way_idx),
.data_out (way_idx_bin),
`UNUSED_PIN (valid_out)
);
VX_onehot_encoder #(
.N (NUM_WAYS)
) fill_way_enc (
.data_in (evict_way),
.data_out (evict_way_idx),
`UNUSED_PIN (valid_out)
);
if (WRITEBACK) begin : g_dirty_data
assign dirty_data = line_rdata[way_idx_bin];
end else begin : g_dirty_data_0
assign dirty_data = '0;
end
`BUFFER_EX(evict_way_idx_r, evict_way_idx, ~stall, 1);
if (DIRTY_BYTES) begin : g_dirty_byteen
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_rdata;
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_wdata;
wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_rdata;
wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wdata;
wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wren;
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j
wire [WORD_SIZE-1:0] word_mask = {WORD_SIZE{(WORD_SIZE == 1) || (word_idx == j)}};
wire [WORD_SIZE-1:0] wdata = write ? (bs_rdata[i][j] | (write_byteen & word_mask)) : ((fill || flush) ? '0 : bs_rdata[i][j]);
assign bs_wdata[i][j] = init ? '0 : (way_idx[i] ? wdata : bs_rdata[i][j]);
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata
wire dirty_data = write; // only asserted on writes
wire dirty_wren = init || (write ? tag_matches[i] : evict_way[i]);
if (DIRTY_BYTES != 0) begin : g_dirty_bytes
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_data;
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bytes_wren;
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j
wire word_sel = tag_matches[i] && ((WORD_SIZE == 1) || (word_idx == j));
wire [WORD_SIZE-1:0] word_en = write_byteen & {WORD_SIZE{word_sel}};
assign bytes_data[j] = {WORD_SIZE{write}}; // only asserted on writes
assign bytes_wren[j] = {WORD_SIZE{init}} | (write ? word_en : {WORD_SIZE{evict_way[i]}});
end
assign byteen_wdata[i] = {dirty_data, bytes_data};
assign byteen_wren[i] = {dirty_wren, bytes_wren};
assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r];
end else begin : g_no_dirty_bytes
assign byteen_wdata[i] = dirty_data;
assign byteen_wren[i] = dirty_wren;
assign line_dirty = byteen_rdata[evict_way_idx_r];
assign evict_byteen = '1;
end
end
wire bs_read = write || fill || flush;
wire bs_write = init || write || fill || flush;
wire byteen_read = fill || flush;
wire byteen_write = init || write || fill || flush;
VX_sp_ram #(
.DATAW (LINE_SIZE * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK)
.DATAW (BYTEEN_DATAW * NUM_WAYS),
.WRENW (BYTEEN_DATAW * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (1)
) byteen_store (
.clk (clk),
.reset (reset),
.read (bs_read),
.write (bs_write),
.wren (1'b1),
.read (byteen_read),
.write (byteen_write),
.wren (byteen_wren),
.addr (line_idx),
.wdata (bs_wdata),
.rdata (bs_rdata)
.wdata (byteen_wdata),
.rdata (byteen_rdata)
);
assign dirty_byteen = bs_rdata[way_idx_bin];
end else begin : g_dirty_byteen_0
assign dirty_byteen = '1;
assign evict_data = line_rdata[evict_way_idx_r];
end else begin : g_no_writeback
`UNUSED_VAR (init)
assign line_dirty = 0;
assign evict_data = '0;
assign evict_byteen = '0;
end
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store
@ -128,29 +137,26 @@ module VX_cache_data #(
wire line_write;
wire line_read;
wire way_en = (NUM_WAYS == 1) || way_idx[i];
if (WRITE_ENABLE != 0) begin : g_line_data
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_w;
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j
wire word_en = (WORD_SIZE == 1) || (word_idx == j);
assign line_wdata[j] = fill ? fill_data[j] : write_data;
assign wren_w[j] = {WORD_SIZE{fill}} | (write_byteen & {WORD_SIZE{word_en}});
assign line_wdata[j] = write ? write_data : fill_data[j];
assign wren_w[j] = write ? (write_byteen & {WORD_SIZE{word_en}}) : {WORD_SIZE{1'b1}};
end
assign line_wren = wren_w;
assign line_write = (fill || write) && way_en;
if (WRITEBACK) begin : g_line_read_wb
assign line_read = (read || fill || flush);
end else begin : g_line_read_wt
assign line_read = read;
end
assign line_write = (fill && ((NUM_WAYS == 1) || evict_way[i]))
|| (write && tag_matches[i]);
assign line_read = read || ((fill || flush) && WRITEBACK);
end else begin : g_line_data_ro
`UNUSED_VAR (write)
`UNUSED_VAR (flush)
`UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_data)
`UNUSED_VAR (word_idx)
assign line_wdata = fill_data;
assign line_wren = 1'b1;
assign line_write = fill && way_en;
assign line_write = fill && ((NUM_WAYS == 1) || evict_way[i]);
assign line_read = read;
end
@ -158,8 +164,7 @@ module VX_cache_data #(
.DATAW (`CS_LINE_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (BYTEENW),
.NO_RWCHECK (1),
.RW_ASSERT (1)
.OUT_REG (1)
) data_store (
.clk (clk),
.reset (reset),
@ -172,9 +177,18 @@ module VX_cache_data #(
);
end
wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx;
VX_onehot_encoder #(
.N (NUM_WAYS)
) hit_idx_enc (
.data_in (tag_matches),
.data_out (hit_way_idx),
`UNUSED_PIN (valid_out)
);
if (`CS_WORDS_PER_LINE > 1) begin : g_read_data
// order the data layout to perform ways multiplexing last.
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
// this allows converting way index to binary in parallel with BRAM read and word indexing.
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
VX_transpose #(
.DATAW (`CS_WORD_WIDTH),
@ -184,27 +198,10 @@ module VX_cache_data #(
.data_in (line_rdata),
.data_out (transposed_rdata)
);
assign read_data = transposed_rdata[word_idx][way_idx_bin];
assign read_data = transposed_rdata[word_idx][hit_way_idx];
end else begin : g_read_data_1w
`UNUSED_VAR (word_idx)
assign read_data = line_rdata[way_idx_bin];
assign read_data = line_rdata[hit_way_idx];
end
`ifdef DBG_TRACE_CACHE
always @(posedge clk) begin
if (fill) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data))
end
if (flush) begin
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data))
end
if (read) begin
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid))
end
if (write) begin
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid))
end
end
`endif
endmodule

View file

@ -24,7 +24,7 @@
// arrival and are dequeued in the same order.
// Each entry has a next pointer to the next entry pending for the same cache line.
//
// During the fill request, the MSHR will release the MSHR entry at fill_id
// During the fill request, the MSHR will dequue the MSHR entry at the fill_id location
// which represents the first request in the pending list that initiated the memory fill.
//
// The dequeue response directly follows the fill request and will release
@ -35,7 +35,8 @@
// the slot id of the previous entry for the same cache line. This is used to
// link the new entry to the pending list.
//
// The release request is used to invalidate the allocated MSHR entry if we had a cache hit.
// The finalize request is used to persit or release the currently allocated MSHR entry
// if we had a cache miss or a hit, respectively.
//
// Warning: This MSHR implementation is strongly coupled with the bank pipeline
// and as such changes to either module requires careful evaluation.
@ -56,8 +57,6 @@ module VX_cache_mshr #(
parameter DATA_WIDTH = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Cache stall on read during write
RDW_STALL = 0,
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE)
) (
@ -67,7 +66,7 @@ module VX_cache_mshr #(
`IGNORE_UNUSED_BEGIN
input wire[`UP(UUID_WIDTH)-1:0] deq_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] alc_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] rel_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid,
`IGNORE_UNUSED_END
// memory fill
@ -90,11 +89,15 @@ module VX_cache_mshr #(
input wire [DATA_WIDTH-1:0] allocate_data,
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
output wire allocate_pending,
output wire [MSHR_ADDR_WIDTH-1:0] allocate_previd,
output wire allocate_ready,
// release
input wire release_valid,
input wire [MSHR_ADDR_WIDTH-1:0] release_id
// finalize
input wire finalize_valid,
input wire finalize_is_release,
input wire finalize_is_pending,
input wire [MSHR_ADDR_WIDTH-1:0] finalize_previd,
input wire [MSHR_ADDR_WIDTH-1:0] finalize_id
);
`UNUSED_PARAM (BANK_ID)
@ -112,8 +115,6 @@ module VX_cache_mshr #(
reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n;
wire [MSHR_ADDR_WIDTH-1:0] prev_idx;
reg [MSHR_ADDR_WIDTH-1:0] post_alloc_id, post_alloc_previd;
reg post_alloc_val;
wire allocate_fire = allocate_valid && allocate_ready;
wire dequeue_fire = dequeue_valid && dequeue_ready;
@ -157,19 +158,20 @@ module VX_cache_mshr #(
valid_table_n[dequeue_id] = 0;
if (next_table[dequeue_id]) begin
dequeue_id_n = next_index[dequeue_id];
end else if (!RDW_STALL && post_alloc_val && (post_alloc_previd == dequeue_id)) begin
dequeue_id_n = post_alloc_id;
end else if (finalize_valid && finalize_is_pending && (finalize_previd == dequeue_id)) begin
dequeue_id_n = finalize_id;
end else begin
dequeue_val_n = 0;
end
end
if (release_valid) begin
valid_table_n[release_id] = 0;
end
if (post_alloc_val) begin
next_table_x[post_alloc_previd] = 1;
if (finalize_valid) begin
if (finalize_is_release) begin
valid_table_n[finalize_id] = 0;
end
if (finalize_is_pending) begin
next_table_x[finalize_previd] = 1;
end
end
next_table_n = next_table_x;
@ -184,12 +186,10 @@ module VX_cache_mshr #(
valid_table <= '0;
allocate_rdy <= 0;
dequeue_val <= 0;
post_alloc_val <= 0;
end else begin
valid_table <= valid_table_n;
allocate_rdy <= allocate_rdy_n;
dequeue_val <= dequeue_val_n;
post_alloc_val <= allocate_fire && allocate_pending;
end
if (allocate_fire) begin
@ -197,22 +197,20 @@ module VX_cache_mshr #(
write_table[allocate_id] <= allocate_rw;
end
if (post_alloc_val) begin
next_index[post_alloc_previd] <= post_alloc_id;
if (finalize_valid && finalize_is_pending) begin
next_index[finalize_previd] <= finalize_id;
end
dequeue_id_r <= dequeue_id_n;
allocate_id_r <= allocate_id_n;
next_table <= next_table_n;
post_alloc_id <= allocate_id;
post_alloc_previd <= prev_idx;
end
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`RUNTIME_ASSERT(~(allocate_fire && valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, alc_req_uuid))
`RUNTIME_ASSERT((~release_valid || valid_table[release_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[release_id], BANK_ID), release_id, rel_req_uuid))
`RUNTIME_ASSERT(~(finalize_valid && ~valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
@ -220,7 +218,7 @@ module VX_cache_mshr #(
VX_dp_ram #(
.DATAW (DATA_WIDTH),
.SIZE (MSHR_SIZE),
.RADDR_REG (1)
.OUT_REG (1)
) entries (
.clk (clk),
.reset (reset),
@ -236,7 +234,9 @@ module VX_cache_mshr #(
assign fill_addr = addr_table[fill_id];
assign allocate_ready = allocate_rdy;
assign allocate_id = allocate_id_r;
assign allocate_id = allocate_id_r;
assign allocate_previd = prev_idx;
if (WRITEBACK) begin : g_pending_wb
assign allocate_pending = |addr_matches;
end else begin : g_pending_wt
@ -255,14 +255,17 @@ module VX_cache_mshr #(
if (reset) begin
show_table <= 0;
end else begin
show_table <= allocate_fire || post_alloc_val || release_valid || fill_valid || dequeue_fire;
show_table <= allocate_fire || finalize_valid || fill_valid || dequeue_fire;
end
if (allocate_fire) begin
`TRACE(3, ("%t: %s allocate: addr=0x%0h, id=%0d, pending=%b, prev=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id, allocate_pending, prev_idx, alc_req_uuid))
end
if (release_valid) begin
`TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, release_id, rel_req_uuid))
if (finalize_valid && finalize_is_release) begin
`TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_id, fin_req_uuid))
end
if (finalize_valid && finalize_is_pending) begin
`TRACE(3, ("%t: %s finalize: id=%0d (#%0d)\n", $time, INSTANCE_ID, finalize_id, fin_req_uuid))
end
if (fill_valid) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,

View file

@ -14,8 +14,6 @@
`include "VX_cache_define.vh"
module VX_cache_tags #(
parameter `STRING INSTANCE_ID = "",
parameter BANK_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
@ -27,99 +25,86 @@ module VX_cache_tags #(
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Request debug identifier
parameter UUID_WIDTH = 0
parameter WRITEBACK = 0
) (
input wire clk,
input wire reset,
input wire stall,
`IGNORE_UNUSED_BEGIN
input wire [`UP(UUID_WIDTH)-1:0] req_uuid,
`IGNORE_UNUSED_END
// init/fill/lookup
// inputs
input wire init,
input wire flush,
input wire fill,
input wire write,
input wire lookup,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire [NUM_WAYS-1:0] way_idx,
output wire [NUM_WAYS-1:0] tag_matches,
input wire [NUM_WAYS-1:0] flush_way,
// eviction
output wire evict_dirty,
// outputs
output wire [NUM_WAYS-1:0] tag_matches_r,
output wire [`CS_TAG_SEL_BITS-1:0] line_tag_r,
output wire [NUM_WAYS-1:0] evict_way,
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag
output wire [NUM_WAYS-1:0] evict_way_r,
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag_r
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (BANK_ID)
`UNUSED_VAR (lookup)
// valid, dirty, tag
localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS;
// valid, tag
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
wire [NUM_WAYS-1:0] read_valid;
wire [NUM_WAYS-1:0] read_dirty;
if (NUM_WAYS > 1) begin : g_evict_way
reg [NUM_WAYS-1:0] evict_way_r;
if (NUM_WAYS > 1) begin : g_evict_way
reg [NUM_WAYS-1:0] victim_way;
// cyclic assignment of replacement way
always @(posedge clk) begin
if (reset) begin
evict_way_r <= 1;
end else if (lookup) begin
evict_way_r <= {evict_way_r[NUM_WAYS-2:0], evict_way_r[NUM_WAYS-1]};
victim_way <= 1;
end else if (~stall) begin
victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]};
end
end
assign evict_way = fill ? victim_way : flush_way;
`BUFFER_EX(evict_way_r, evict_way, ~stall, 1);
end else begin : g_evict_way_0
`UNUSED_VAR (flush_way)
assign evict_way = 1'b1;
assign evict_way_r = 1'b1;
end
assign evict_way = fill ? evict_way_r : way_idx;
if (WRITEBACK) begin : g_evict_tag_wb
VX_onehot_mux #(
.DATAW (`CS_TAG_SEL_BITS),
.N (NUM_WAYS)
) evict_tag_sel (
.data_in (read_tag),
.sel_in (evict_way),
.data_out (evict_tag)
.sel_in (evict_way_r),
.data_out (evict_tag_r)
);
end else begin : g_evict_way_0
assign evict_way = 1'b1;
assign evict_tag = read_tag;
end else begin : g_evict_tag_wt
assign evict_tag_r = '0;
end
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store
wire do_fill = fill && evict_way[i];
wire do_flush = flush && (!WRITEBACK || way_idx[i]); // flush the whole line in writethrough mode
wire do_write = WRITEBACK && write && tag_matches[i];
wire do_fill = fill && evict_way[i];
wire do_flush = flush && (!WRITEBACK || evict_way[i]); // flush the whole line in writethrough mode
wire line_read = (WRITEBACK && (fill || flush));
wire line_write = init || do_fill || do_flush || do_write;
wire line_valid = ~(init || flush);
wire line_read = lookup || (WRITEBACK && (fill || flush));
wire line_write = init || do_fill || do_flush;
wire line_valid = fill;
wire [TAG_WIDTH-1:0] line_wdata;
wire [TAG_WIDTH-1:0] line_rdata;
if (WRITEBACK) begin : g_writeback
assign line_wdata = {line_valid, write, line_tag};
assign {read_valid[i], read_dirty[i], read_tag[i]} = line_rdata;
end else begin : g_writethrough
assign line_wdata = {line_valid, line_tag};
assign {read_valid[i], read_tag[i]} = line_rdata;
assign read_dirty[i] = 1'b0;
end
assign line_wdata = {line_valid, line_tag};
assign {read_valid[i], read_tag[i]} = line_rdata;
VX_sp_ram #(
.DATAW (TAG_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.NO_RWCHECK (1),
.RW_ASSERT (1)
.OUT_REG (1)
) tag_store (
.clk (clk),
.reset (reset),
@ -132,40 +117,10 @@ module VX_cache_tags #(
);
end
`BUFFER_EX(line_tag_r, line_tag, ~stall, 1);
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
assign tag_matches_r[i] = read_valid[i] && (line_tag_r == read_tag[i]);
end
assign evict_dirty = | (read_dirty & evict_way);
`ifdef DBG_TRACE_CACHE
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx};
always @(posedge clk) begin
if (fill) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
end
if (init) begin
`TRACE(3, ("%t: %s init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx))
end
if (flush) begin
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty))
end
if (lookup) begin
if (tag_matches != 0) begin
if (write) begin
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
end else begin
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
end
end else begin
if (write) begin
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
end else begin
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
end
end
end
end
`endif
endmodule