mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
cache hit timing optimization
This commit is contained in:
parent
8f29ad58ae
commit
b6bd6467ef
9 changed files with 257 additions and 350 deletions
15
hw/rtl/cache/VX_bank_flush.sv
vendored
15
hw/rtl/cache/VX_bank_flush.sv
vendored
|
@ -33,7 +33,7 @@ module VX_bank_flush #(
|
|||
output wire flush_init,
|
||||
output wire flush_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
|
||||
output wire [NUM_WAYS-1:0] flush_way,
|
||||
output wire [`CS_WAY_SEL_WIDTH-1:0] flush_way,
|
||||
input wire flush_ready,
|
||||
input wire mshr_empty,
|
||||
input wire bank_empty
|
||||
|
@ -113,17 +113,10 @@ module VX_bank_flush #(
|
|||
assign flush_valid = (state == STATE_FLUSH);
|
||||
assign flush_line = counter[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way
|
||||
VX_decoder #(
|
||||
.N (`CS_WAY_SEL_BITS),
|
||||
.D (NUM_WAYS)
|
||||
) ctr_decoder (
|
||||
.sel_in (counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
|
||||
.data_in (1'b1),
|
||||
.data_out (flush_way)
|
||||
);
|
||||
if (WRITEBACK && (NUM_WAYS > 1)) begin : g_flush_way
|
||||
assign flush_way = counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS];
|
||||
end else begin : g_flush_way_all
|
||||
assign flush_way = {NUM_WAYS{1'b1}};
|
||||
assign flush_way = '0;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
179
hw/rtl/cache/VX_cache_bank.sv
vendored
179
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -150,19 +150,19 @@ module VX_cache_bank #(
|
|||
wire is_creq_st0, is_creq_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_flush_st0, is_flush_st1;
|
||||
wire [NUM_WAYS-1:0] flush_way_st0;
|
||||
wire [NUM_WAYS-1:0] evict_way_st0, evict_way_st1;
|
||||
wire [`CS_WAY_SEL_WIDTH-1:0] flush_way_st0, evict_way_st0;
|
||||
wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st1;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag_st1;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel, line_idx_st0, line_idx_st1;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0, evict_tag_st1;
|
||||
wire rw_sel, rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
|
||||
wire [`CS_WORD_WIDTH-1:0] write_word_st0, write_word_st1;
|
||||
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
|
||||
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0;
|
||||
|
@ -170,18 +170,18 @@ module VX_cache_bank #(
|
|||
wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_previd_st0, mshr_previd_st1;
|
||||
wire is_hit_st0, is_hit_st1;
|
||||
wire mshr_empty;
|
||||
|
||||
wire flush_valid;
|
||||
wire init_valid;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
|
||||
wire [NUM_WAYS-1:0] flush_way;
|
||||
wire [`CS_WAY_SEL_WIDTH-1:0] flush_way;
|
||||
wire flush_ready;
|
||||
|
||||
// ensure we have no pending memory request in the bank
|
||||
wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty;
|
||||
|
||||
// flush unit
|
||||
VX_bank_flush #(
|
||||
.BANK_ID (BANK_ID),
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
|
@ -203,9 +203,7 @@ module VX_cache_bank #(
|
|||
.bank_empty (no_pending_req)
|
||||
);
|
||||
|
||||
logic rdw_hazard, post_hazard;
|
||||
|
||||
wire pipe_stall = crsp_queue_stall || rdw_hazard;
|
||||
wire pipe_stall = crsp_queue_stall;
|
||||
|
||||
// inputs arbitration:
|
||||
// mshr replay has highest priority to maximize utilization since there is no miss.
|
||||
|
@ -295,8 +293,6 @@ module VX_cache_bank #(
|
|||
assign req_uuid_sel = '0;
|
||||
end
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
wire is_init_sel = init_valid;
|
||||
wire is_creq_sel = creq_enable || replay_enable;
|
||||
wire is_fill_sel = fill_enable;
|
||||
|
@ -304,7 +300,7 @@ module VX_cache_bank #(
|
|||
wire is_replay_sel = replay_enable;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_WAY_SEL_WIDTH + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
|
@ -334,22 +330,18 @@ module VX_cache_bank #(
|
|||
|
||||
wire do_read_st1 = valid_st1 && is_read_st1;
|
||||
wire do_write_st1 = valid_st1 && is_write_st1;
|
||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||
wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK;
|
||||
|
||||
assign line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0];
|
||||
assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
|
||||
assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0);
|
||||
|
||||
assign write_word_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1;
|
||||
wire [NUM_WAYS-1:0] tag_matches_st1;
|
||||
|
||||
wire is_hit_st1 = (| tag_matches_st1);
|
||||
|
||||
wire do_lookup_st0 = do_read_st0 || do_write_st0;
|
||||
|
||||
wire do_lookup_st1 = do_read_st1 || do_write_st1;
|
||||
|
||||
reg [NUM_WAYS-1:0] victim_way_st0;
|
||||
wire [`CS_WAY_SEL_WIDTH-1:0] victim_way_st0;
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0;
|
||||
|
||||
VX_cache_repl #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
|
@ -363,10 +355,10 @@ module VX_cache_bank #(
|
|||
.stall (pipe_stall),
|
||||
.hit_valid (do_lookup_st1 && is_hit_st1 && ~pipe_stall),
|
||||
.hit_line (line_idx_st1),
|
||||
.hit_way (tag_matches_st1),
|
||||
.hit_way (way_idx_st1),
|
||||
.repl_valid (do_fill_st0 && ~pipe_stall),
|
||||
.repl_line (line_idx_st0),
|
||||
.repl_line_n(line_idx_sel),
|
||||
.repl_line (line_idx_st0),
|
||||
.repl_way (victim_way_st0)
|
||||
);
|
||||
|
||||
|
@ -388,27 +380,29 @@ module VX_cache_bank #(
|
|||
.flush (do_flush_st0 && ~pipe_stall),
|
||||
.fill (do_fill_st0 && ~pipe_stall),
|
||||
.lookup (do_lookup_st0 && ~pipe_stall),
|
||||
.line_addr (addr_st0),
|
||||
.line_idx_n (line_idx_sel),
|
||||
.line_idx (line_idx_st0),
|
||||
.line_tag (line_tag_st0),
|
||||
.evict_way (evict_way_st0),
|
||||
// outputs
|
||||
.tag_matches_r(tag_matches_st1),
|
||||
.line_tag_r (line_tag_st1),
|
||||
.evict_tag_r(evict_tag_st1),
|
||||
.evict_way_r(evict_way_st1)
|
||||
.tag_matches(tag_matches_st0),
|
||||
.evict_tag (evict_tag_st0)
|
||||
);
|
||||
|
||||
assign is_hit_st0 = (| tag_matches_st0);
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||
assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1})
|
||||
.data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, is_hit_st0, rw_st0, flags_st0, evict_tag_st0, line_tag_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, is_hit_st1, rw_st1, flags_st1, evict_tag_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
if (UUID_WIDTH != 0) begin : g_req_uuid_st1
|
||||
|
@ -422,58 +416,12 @@ module VX_cache_bank #(
|
|||
// ensure mshr replay always get a hit
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1 && ~is_hit_st1), ("%t: missed mshr replay", $time))
|
||||
|
||||
if (WRITE_ENABLE) begin : g_rdw_hazard
|
||||
// This implementation uses single-port BRAMs for the tags and data stores.
|
||||
// Using different stages for read and write operations requires a pipeline stall in between due to address port sharing.
|
||||
// Tags fill/flush can perform read and write in the same stage, since no dependency between.
|
||||
// Data fill/flush can perform read and write in the same stage, since way_idx is available in st0.
|
||||
// A data read should happen in st0 for its result to be available in st1.
|
||||
// A data write should happen in st1 when the tag hit status is available.
|
||||
// The r/w hazard is needed for consecutive writes since they both wonly write in st1.
|
||||
// The r/w hazard is also not needed for next writethrough fill/flush to the same line.
|
||||
// For reads or writeback fill/flush to the same line, we sill need the hazard
|
||||
// because the data writeen in st1 cannot be read at the same time in st0 without extra forwarding logic.
|
||||
wire is_write_sel = is_creq_sel && rw_sel;
|
||||
wire is_same_line = (line_idx_sel == line_idx_st0);
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
post_hazard <= 0;
|
||||
rdw_hazard <= 0;
|
||||
end else begin
|
||||
if (~crsp_queue_stall) begin
|
||||
post_hazard <= rdw_hazard;
|
||||
rdw_hazard <= do_write_st0 && valid_sel && ~(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel)));
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin : g_rdw_hazard_ro
|
||||
assign rdw_hazard = 0;
|
||||
assign post_hazard = 0;
|
||||
end
|
||||
|
||||
assign write_word_st1 = data_st1[`CS_WORD_WIDTH-1:0];
|
||||
`UNUSED_VAR (data_st1)
|
||||
|
||||
wire [`CS_LINE_WIDTH-1:0] evict_data_st1;
|
||||
wire[`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] read_data_st1;
|
||||
wire [LINE_SIZE-1:0] evict_byteen_st1;
|
||||
wire line_dirty_st1;
|
||||
|
||||
wire data_write;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] data_line_idx;
|
||||
|
||||
if (WRITE_ENABLE) begin : g_data_ctrl
|
||||
// by default all data accesses happen in sto and use line_idx_st0.
|
||||
// data writes should happen in st1 when the tag hit is available,
|
||||
// and use line_idx_st1 to ensure the correct line is updated.
|
||||
// if a rdw hazard is active due to conflict, ensure we don't write twice.
|
||||
assign data_write = do_write_st1 && ~post_hazard && ~crsp_queue_stall;
|
||||
assign data_line_idx = data_write ? line_idx_st1 : line_idx_st0;
|
||||
end else begin : g_data_ctrl_ro
|
||||
`UNUSED_VAR (post_hazard)
|
||||
`UNUSED_VAR (do_write_st1)
|
||||
assign data_write = 0;
|
||||
assign data_line_idx = line_idx_st0;
|
||||
end
|
||||
wire evict_dirty_st1;
|
||||
|
||||
VX_cache_data #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
|
@ -493,18 +441,18 @@ module VX_cache_bank #(
|
|||
.fill (do_fill_st0 && ~pipe_stall),
|
||||
.flush (do_flush_st0 && ~pipe_stall),
|
||||
.read (do_read_st0 && ~pipe_stall),
|
||||
.write (data_write),
|
||||
.write (do_write_st0 && ~pipe_stall),
|
||||
.evict_way (evict_way_st0),
|
||||
.tag_matches(tag_matches_st1),
|
||||
.line_idx (data_line_idx),
|
||||
.tag_matches(tag_matches_st0),
|
||||
.line_idx (line_idx_st0),
|
||||
.fill_data (data_st0),
|
||||
.write_word (write_word_st1),
|
||||
.word_idx (word_idx_st1),
|
||||
.write_byteen(byteen_st1),
|
||||
.write_word (write_word_st0),
|
||||
.word_idx (word_idx_st0),
|
||||
.write_byteen(byteen_st0),
|
||||
// outputs
|
||||
.way_idx (way_idx_st1),
|
||||
.read_data (read_data_st1),
|
||||
.line_dirty (line_dirty_st1),
|
||||
.evict_data (evict_data_st1),
|
||||
.evict_dirty(evict_dirty_st1),
|
||||
.evict_byteen(evict_byteen_st1)
|
||||
);
|
||||
|
||||
|
@ -600,7 +548,7 @@ module VX_cache_bank #(
|
|||
|
||||
assign crsp_queue_valid = do_read_st1 && is_hit_st1;
|
||||
assign crsp_queue_idx = req_idx_st1;
|
||||
assign crsp_queue_data = read_data_st1;
|
||||
assign crsp_queue_data = read_data_st1[word_idx_st1];
|
||||
assign crsp_queue_tag = tag_st1;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
|
@ -610,7 +558,7 @@ module VX_cache_bank #(
|
|||
) core_rsp_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (crsp_queue_valid && ~rdw_hazard),
|
||||
.valid_in (crsp_queue_valid),
|
||||
.ready_in (crsp_queue_ready),
|
||||
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
|
||||
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
|
||||
|
@ -618,9 +566,7 @@ module VX_cache_bank #(
|
|||
.ready_out (core_rsp_ready)
|
||||
);
|
||||
|
||||
// we use 'do_read_st1' instead 'crsp_queue_valid'
|
||||
// to remove costly 'is_hit_st1' signal from critical paths.
|
||||
assign crsp_queue_stall = do_read_st1 && ~crsp_queue_ready;
|
||||
assign crsp_queue_stall = crsp_queue_valid && ~crsp_queue_ready;
|
||||
|
||||
// schedule memory request
|
||||
|
||||
|
@ -634,7 +580,7 @@ module VX_cache_bank #(
|
|||
|
||||
wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK);
|
||||
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
|
||||
wire do_writeback_st1 = do_fill_or_flush_st1 && line_dirty_st1;
|
||||
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1};
|
||||
|
||||
if (WRITE_ENABLE) begin : g_mreq_queue
|
||||
|
@ -642,7 +588,7 @@ module VX_cache_bank #(
|
|||
if (DIRTY_BYTES) begin : g_dirty_bytes
|
||||
// ensure dirty bytes match the tag info
|
||||
wire has_dirty_bytes = (| evict_byteen_st1);
|
||||
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (line_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, line_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
|
||||
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
|
||||
end
|
||||
// issue a fill request on a read/write miss
|
||||
// issue a writeback on a dirty line eviction
|
||||
|
@ -651,8 +597,10 @@ module VX_cache_bank #(
|
|||
&& ~pipe_stall;
|
||||
assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1;
|
||||
assign mreq_queue_rw = is_fill_or_flush_st1;
|
||||
assign mreq_queue_data = evict_data_st1;
|
||||
assign mreq_queue_data = read_data_st1;
|
||||
assign mreq_queue_byteen = is_fill_or_flush_st1 ? evict_byteen_st1 : '1;
|
||||
`UNUSED_VAR (write_word_st1)
|
||||
`UNUSED_VAR (byteen_st1)
|
||||
end else begin : g_wt
|
||||
wire [LINE_SIZE-1:0] line_byteen;
|
||||
VX_decoder #(
|
||||
|
@ -675,7 +623,6 @@ module VX_cache_bank #(
|
|||
`UNUSED_VAR (is_fill_or_flush_st1)
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
`UNUSED_VAR (evict_addr_st1)
|
||||
`UNUSED_VAR (evict_data_st1)
|
||||
`UNUSED_VAR (evict_byteen_st1)
|
||||
end
|
||||
end else begin : g_mreq_queue_ro
|
||||
|
@ -688,8 +635,9 @@ module VX_cache_bank #(
|
|||
assign mreq_queue_byteen = '1;
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
`UNUSED_VAR (evict_addr_st1)
|
||||
`UNUSED_VAR (evict_data_st1)
|
||||
`UNUSED_VAR (evict_byteen_st1)
|
||||
`UNUSED_VAR (write_word_st1)
|
||||
`UNUSED_VAR (byteen_st1)
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid
|
||||
|
@ -722,10 +670,6 @@ module VX_cache_bank #(
|
|||
|
||||
assign mem_req_valid = ~mreq_queue_empty;
|
||||
|
||||
`UNUSED_VAR (do_fill_st1)
|
||||
`UNUSED_VAR (do_flush_st1)
|
||||
`UNUSED_VAR (evict_way_st1)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -740,8 +684,8 @@ module VX_cache_bank #(
|
|||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
|
||||
always @(posedge clk) begin
|
||||
if (input_stall || pipe_stall) begin
|
||||
`TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID,
|
||||
crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard))
|
||||
`TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID,
|
||||
crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full))
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
|
@ -764,32 +708,37 @@ module VX_cache_bank #(
|
|||
`TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0))
|
||||
end
|
||||
if (do_fill_st0 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0))
|
||||
end
|
||||
if (do_flush_st0 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0))
|
||||
end
|
||||
if (do_lookup_st1 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s tags-Lookup: addr=0x%0h, rw=%b, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1))
|
||||
if (is_hit_st1) begin
|
||||
`TRACE(3, ("%t: %s tags-hit: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s tags-miss: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1))
|
||||
end
|
||||
end
|
||||
if (do_fill_st0 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%0d, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0))
|
||||
end
|
||||
if (do_flush_st0 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0))
|
||||
end
|
||||
if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1))
|
||||
`TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, crsp_queue_data, req_uuid_st1))
|
||||
end
|
||||
if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin
|
||||
`TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1))
|
||||
`TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1))
|
||||
end
|
||||
if (crsp_queue_fire) begin
|
||||
`TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
|
||||
|
|
123
hw/rtl/cache/VX_cache_data.sv
vendored
123
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -41,38 +41,23 @@ module VX_cache_data #(
|
|||
input wire read,
|
||||
input wire write,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] line_idx,
|
||||
input wire [NUM_WAYS-1:0] evict_way,
|
||||
input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way,
|
||||
input wire [NUM_WAYS-1:0] tag_matches,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
|
||||
input wire [`CS_WORD_WIDTH-1:0] write_word,
|
||||
input wire [WORD_SIZE-1:0] write_byteen,
|
||||
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx,
|
||||
// outputs
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data,
|
||||
output wire line_dirty,
|
||||
output wire [`CS_LINE_WIDTH-1:0] evict_data,
|
||||
output wire [`CS_WAY_SEL_WIDTH-1:0] way_idx,
|
||||
output wire [`CS_LINE_WIDTH-1:0] read_data,
|
||||
output wire evict_dirty,
|
||||
output wire [LINE_SIZE-1:0] evict_byteen
|
||||
);
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1;
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||
|
||||
if (WRITEBACK != 0) begin : g_writeback
|
||||
localparam BYTEEN_DATAW = 1 + ((DIRTY_BYTES != 0) ? LINE_SIZE : 0);
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] evict_way_idx, evict_way_idx_r;
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
) fill_way_enc (
|
||||
.data_in (evict_way),
|
||||
.data_out (evict_way_idx),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
`BUFFER_EX(evict_way_idx_r, evict_way_idx, ~stall, 1);
|
||||
|
||||
wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_rdata;
|
||||
wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wdata;
|
||||
|
@ -80,7 +65,7 @@ module VX_cache_data #(
|
|||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata
|
||||
wire evict = fill || flush;
|
||||
wire evict_way_en = (NUM_WAYS == 1) || evict_way[i];
|
||||
wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i);
|
||||
wire dirty_data = write; // only asserted on writes
|
||||
wire dirty_wren = init || (evict && evict_way_en) || (write && tag_matches[i]);
|
||||
if (DIRTY_BYTES != 0) begin : g_dirty_bytes
|
||||
|
@ -121,54 +106,47 @@ module VX_cache_data #(
|
|||
);
|
||||
|
||||
if (DIRTY_BYTES != 0) begin : g_line_dirty_and_byteen
|
||||
assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r];
|
||||
assign {evict_dirty, evict_byteen} = byteen_rdata[way_idx];
|
||||
end else begin : g_line_dirty
|
||||
assign line_dirty = byteen_rdata[evict_way_idx_r];
|
||||
assign evict_dirty = byteen_rdata[way_idx];
|
||||
assign evict_byteen = '1;
|
||||
end
|
||||
|
||||
assign evict_data = line_rdata[evict_way_idx_r];
|
||||
|
||||
end else begin : g_no_writeback
|
||||
`UNUSED_VAR (init)
|
||||
`UNUSED_VAR (flush)
|
||||
assign line_dirty = 0;
|
||||
assign evict_data = '0;
|
||||
assign evict_dirty = 0;
|
||||
assign evict_byteen = '0;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store
|
||||
wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [BYTEENW-1:0] line_wren;
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||
|
||||
wire fill_way_en = (NUM_WAYS == 1) || evict_way[i];
|
||||
if (WRITE_ENABLE) begin : g_data_store
|
||||
// create a single write-enable block ram to reduce area overhead
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren;
|
||||
wire line_write;
|
||||
wire line_read;
|
||||
|
||||
if (WRITE_ENABLE != 0) begin : g_wdata
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_wdata
|
||||
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
|
||||
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
|
||||
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask
|
||||
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j);
|
||||
assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}};
|
||||
end
|
||||
assign line_wdata = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
|
||||
assign line_wren = {LINE_SIZE{fill && fill_way_en}}
|
||||
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
|
||||
|
||||
end else begin : g_ro_wdata
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_word)
|
||||
`UNUSED_VAR (word_idx)
|
||||
assign line_wdata = fill_data;
|
||||
assign line_wren = fill_way_en;
|
||||
assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
|
||||
assign line_wren[i] = {LINE_SIZE{fill && fill_way_en}}
|
||||
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
|
||||
end
|
||||
|
||||
wire line_write = fill || (write && WRITE_ENABLE);
|
||||
wire line_read = read || ((fill || flush) && WRITEBACK);
|
||||
assign line_write = fill || (write && WRITE_ENABLE);
|
||||
assign line_read = read || ((fill || flush) && WRITEBACK);
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH),
|
||||
.DATAW (NUM_WAYS * `CS_LINE_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (BYTEENW),
|
||||
.WRENW (NUM_WAYS * LINE_SIZE),
|
||||
.OUT_REG (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
|
@ -178,35 +156,46 @@ module VX_cache_data #(
|
|||
.wren (line_wren),
|
||||
.addr (line_idx),
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata[i])
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
end else begin : g_data_store
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_word)
|
||||
`UNUSED_VAR (word_idx)
|
||||
|
||||
// we don't merge the ways into a single block ram due to WREN overhead
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways
|
||||
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.OUT_REG (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (read),
|
||||
.write (fill && fill_way_en),
|
||||
.wren (1'b1),
|
||||
.addr (line_idx),
|
||||
.wdata (fill_data),
|
||||
.rdata (line_rdata[i])
|
||||
);
|
||||
end
|
||||
end
|
||||
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx;
|
||||
wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx;
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
) hit_idx_enc (
|
||||
) way_idx_enc (
|
||||
.data_in (tag_matches),
|
||||
.data_out (hit_way_idx),
|
||||
.data_out (hit_idx),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin : g_read_data
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM read and word indexing.
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
|
||||
VX_transpose #(
|
||||
.DATAW (`CS_WORD_WIDTH),
|
||||
.N (NUM_WAYS),
|
||||
.M (`CS_WORDS_PER_LINE)
|
||||
) transpose (
|
||||
.data_in (line_rdata),
|
||||
.data_out (transposed_rdata)
|
||||
);
|
||||
assign read_data = transposed_rdata[word_idx][hit_way_idx];
|
||||
end else begin : g_read_data_1w
|
||||
`UNUSED_VAR (word_idx)
|
||||
assign read_data = line_rdata[hit_way_idx];
|
||||
end
|
||||
`BUFFER_EX(way_idx, (read ? hit_idx : evict_way), ~stall, 1);
|
||||
|
||||
assign read_data = line_rdata[way_idx];
|
||||
|
||||
endmodule
|
||||
|
|
1
hw/rtl/cache/VX_cache_define.vh
vendored
1
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -22,6 +22,7 @@
|
|||
`define CS_LINE_WIDTH (8 * LINE_SIZE)
|
||||
`define CS_BANK_SIZE (CACHE_SIZE / NUM_BANKS)
|
||||
`define CS_WAY_SEL_BITS `CLOG2(NUM_WAYS)
|
||||
`define CS_WAY_SEL_WIDTH `UP(`CS_WAY_SEL_BITS)
|
||||
|
||||
`define CS_LINES_PER_BANK (`CS_BANK_SIZE / (LINE_SIZE * NUM_WAYS))
|
||||
`define CS_WORDS_PER_LINE (LINE_SIZE / WORD_SIZE)
|
||||
|
|
195
hw/rtl/cache/VX_cache_repl.sv
vendored
195
hw/rtl/cache/VX_cache_repl.sv
vendored
|
@ -97,135 +97,114 @@ module VX_cache_repl #(
|
|||
input wire stall,
|
||||
input wire hit_valid,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] hit_line,
|
||||
input wire [NUM_WAYS-1:0] hit_way,
|
||||
input wire [`CS_WAY_SEL_WIDTH-1:0] hit_way,
|
||||
input wire repl_valid,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] repl_line_n,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] repl_line,
|
||||
output wire [NUM_WAYS-1:0] repl_way
|
||||
output wire [`CS_WAY_SEL_WIDTH-1:0] repl_way
|
||||
);
|
||||
localparam WAY_SEL_WIDTH = `CS_WAY_SEL_WIDTH;
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
localparam WAY_IDX_BITS = $clog2(NUM_WAYS);
|
||||
localparam WAY_IDX_WIDTH = `UP(WAY_IDX_BITS);
|
||||
if (NUM_WAYS > 1) begin : g_enable
|
||||
if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru
|
||||
// Pseudo Least Recently Used replacement policy
|
||||
localparam LRU_WIDTH = `UP(NUM_WAYS-1);
|
||||
localparam USE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM;
|
||||
|
||||
if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru
|
||||
// Pseudo Least Recently Used replacement policy
|
||||
localparam LRU_WIDTH = `UP(NUM_WAYS-1);
|
||||
localparam FORCE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= 1024;
|
||||
wire [LRU_WIDTH-1:0] plru_rdata;
|
||||
wire [LRU_WIDTH-1:0] plru_wdata;
|
||||
wire [LRU_WIDTH-1:0] plru_wmask;
|
||||
|
||||
wire [WAY_IDX_WIDTH-1:0] repl_way_idx;
|
||||
wire [WAY_IDX_WIDTH-1:0] hit_way_idx;
|
||||
wire [LRU_WIDTH-1:0] plru_rdata;
|
||||
wire [LRU_WIDTH-1:0] plru_wdata;
|
||||
wire [LRU_WIDTH-1:0] plru_wmask;
|
||||
VX_dp_ram #(
|
||||
.DATAW (LRU_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (LRU_WIDTH),
|
||||
.OUT_REG (USE_BRAM)
|
||||
) plru_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (USE_BRAM ? ~stall : repl_valid),
|
||||
.write (hit_valid),
|
||||
.wren (plru_wmask),
|
||||
.waddr (hit_line),
|
||||
.raddr (USE_BRAM ? repl_line_n : repl_line),
|
||||
.wdata (plru_wdata),
|
||||
.rdata (plru_rdata)
|
||||
);
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (LRU_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (LRU_WIDTH),
|
||||
.OUT_REG (FORCE_BRAM)
|
||||
) plru_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (FORCE_BRAM ? ~stall : repl_valid),
|
||||
.write (hit_valid),
|
||||
.wren (plru_wmask),
|
||||
.waddr (hit_line),
|
||||
.raddr (FORCE_BRAM ? repl_line_n : repl_line),
|
||||
.wdata (plru_wdata),
|
||||
.rdata (plru_rdata)
|
||||
);
|
||||
plru_decoder #(
|
||||
.NUM_WAYS (NUM_WAYS)
|
||||
) plru_dec (
|
||||
.way_idx (hit_way),
|
||||
.lru_data (plru_wdata),
|
||||
.lru_mask (plru_wmask)
|
||||
);
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
) hit_way_enc (
|
||||
.data_in (hit_way),
|
||||
.data_out (hit_way_idx),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
plru_encoder #(
|
||||
.NUM_WAYS (NUM_WAYS)
|
||||
) plru_enc (
|
||||
.lru_in (plru_rdata),
|
||||
.way_idx (repl_way)
|
||||
);
|
||||
|
||||
plru_decoder #(
|
||||
.NUM_WAYS (NUM_WAYS)
|
||||
) plru_dec (
|
||||
.way_idx (hit_way_idx),
|
||||
.lru_data (plru_wdata),
|
||||
.lru_mask (plru_wmask)
|
||||
);
|
||||
end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic
|
||||
// Cyclic replacement policy
|
||||
localparam USE_BRAM = (WAY_SEL_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM;
|
||||
|
||||
plru_encoder #(
|
||||
.NUM_WAYS (NUM_WAYS)
|
||||
) plru_enc (
|
||||
.lru_in (plru_rdata),
|
||||
.way_idx (repl_way_idx)
|
||||
);
|
||||
`UNUSED_VAR (hit_valid)
|
||||
`UNUSED_VAR (hit_line)
|
||||
`UNUSED_VAR (hit_way)
|
||||
`UNUSED_VAR (repl_valid)
|
||||
|
||||
VX_decoder #(
|
||||
.N (WAY_IDX_BITS)
|
||||
) repl_way_dec (
|
||||
.sel_in (repl_way_idx),
|
||||
.data_in (1'b1),
|
||||
.data_out (repl_way)
|
||||
);
|
||||
wire [WAY_SEL_WIDTH-1:0] ctr_rdata;
|
||||
wire [WAY_SEL_WIDTH-1:0] ctr_wdata = ctr_rdata + 1;
|
||||
|
||||
end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic
|
||||
// Cyclic replacement policy
|
||||
localparam CTR_WIDTH = $clog2(NUM_WAYS);
|
||||
localparam FORCE_BRAM = (CTR_WIDTH * `CS_LINES_PER_BANK) >= 1024;
|
||||
VX_dp_ram #(
|
||||
.DATAW (WAY_SEL_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.OUT_REG (USE_BRAM)
|
||||
) ctr_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (USE_BRAM ? ~stall : repl_valid),
|
||||
.write (repl_valid),
|
||||
.wren (1'b1),
|
||||
.raddr (USE_BRAM ? repl_line_n : repl_line),
|
||||
.waddr (repl_line),
|
||||
.wdata (ctr_wdata),
|
||||
.rdata (ctr_rdata)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (hit_valid)
|
||||
`UNUSED_VAR (hit_line)
|
||||
`UNUSED_VAR (hit_way)
|
||||
`UNUSED_VAR (repl_valid)
|
||||
|
||||
wire [`UP(CTR_WIDTH)-1:0] ctr_rdata;
|
||||
wire [`UP(CTR_WIDTH)-1:0] ctr_wdata = ctr_rdata + 1;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (`UP(CTR_WIDTH)),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.OUT_REG (FORCE_BRAM)
|
||||
) ctr_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (FORCE_BRAM ? ~stall : repl_valid),
|
||||
.write (repl_valid),
|
||||
.wren (1'b1),
|
||||
.raddr (FORCE_BRAM ? repl_line_n : repl_line),
|
||||
.waddr (repl_line),
|
||||
.wdata (ctr_wdata),
|
||||
.rdata (ctr_rdata)
|
||||
);
|
||||
|
||||
VX_decoder #(
|
||||
.N (WAY_IDX_BITS)
|
||||
) ctr_decoder (
|
||||
.sel_in (ctr_rdata),
|
||||
.data_in (1'b1),
|
||||
.data_out (repl_way)
|
||||
);
|
||||
end else begin : g_random
|
||||
// Random replacement policy
|
||||
assign repl_way = ctr_rdata;
|
||||
end else begin : g_random
|
||||
// Random replacement policy
|
||||
`UNUSED_VAR (hit_valid)
|
||||
`UNUSED_VAR (hit_line)
|
||||
`UNUSED_VAR (hit_way)
|
||||
`UNUSED_VAR (repl_valid)
|
||||
`UNUSED_VAR (repl_line)
|
||||
`UNUSED_VAR (repl_line_n)
|
||||
reg [WAY_SEL_WIDTH-1:0] victim_idx;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
victim_idx <= 0;
|
||||
end else if (~stall) begin
|
||||
victim_idx <= victim_idx + 1;
|
||||
end
|
||||
end
|
||||
assign repl_way = victim_idx;
|
||||
end
|
||||
end else begin : g_disable
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (hit_valid)
|
||||
`UNUSED_VAR (hit_line)
|
||||
`UNUSED_VAR (hit_way)
|
||||
`UNUSED_VAR (repl_valid)
|
||||
`UNUSED_VAR (repl_line)
|
||||
`UNUSED_VAR (repl_line_n)
|
||||
if (NUM_WAYS > 1) begin : g_repl_way
|
||||
reg [NUM_WAYS-1:0] victim_way;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
victim_way <= 1;
|
||||
end else if (~stall) begin
|
||||
victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]};
|
||||
end
|
||||
end
|
||||
assign repl_way = victim_way;
|
||||
end else begin : g_repl_way_1
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
assign repl_way = 1'b1;
|
||||
end
|
||||
assign repl_way = 1'b0;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
55
hw/rtl/cache/VX_cache_tags.sv
vendored
55
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -36,50 +36,35 @@ module VX_cache_tags #(
|
|||
input wire flush,
|
||||
input wire fill,
|
||||
input wire lookup,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire [NUM_WAYS-1:0] evict_way,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] line_idx_n,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] line_idx,
|
||||
input wire [`CS_TAG_SEL_BITS-1:0] line_tag,
|
||||
input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way,
|
||||
|
||||
// outputs
|
||||
output wire [NUM_WAYS-1:0] tag_matches_r,
|
||||
output wire [`CS_TAG_SEL_BITS-1:0] line_tag_r,
|
||||
output wire [NUM_WAYS-1:0] evict_way_r,
|
||||
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag_r
|
||||
output wire [NUM_WAYS-1:0] tag_matches,
|
||||
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag
|
||||
);
|
||||
// valid, tag
|
||||
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire [NUM_WAYS-1:0] read_valid;
|
||||
|
||||
if (NUM_WAYS > 1) begin : g_evict_way
|
||||
`BUFFER_EX(evict_way_r, evict_way, ~stall, 1);
|
||||
end else begin : g_evict_way_0
|
||||
`UNUSED_VAR (evict_way)
|
||||
assign evict_way_r = 1'b1;
|
||||
end
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
if (WRITEBACK) begin : g_evict_tag_wb
|
||||
VX_onehot_mux #(
|
||||
.DATAW (`CS_TAG_SEL_BITS),
|
||||
.N (NUM_WAYS)
|
||||
) evict_tag_sel (
|
||||
.data_in (read_tag),
|
||||
.sel_in (evict_way_r),
|
||||
.data_out (evict_tag_r)
|
||||
);
|
||||
assign evict_tag = read_tag[evict_way];
|
||||
end else begin : g_evict_tag_wt
|
||||
assign evict_tag_r = '0;
|
||||
assign evict_tag = '0;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store
|
||||
|
||||
wire do_fill = fill && evict_way[i];
|
||||
wire do_flush = flush && (!WRITEBACK || evict_way[i]); // flush the whole line in writethrough mode
|
||||
wire way_en = (NUM_WAYS == 1) || (evict_way == i);
|
||||
wire do_fill = fill && way_en;
|
||||
wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode
|
||||
|
||||
wire line_read = lookup || (WRITEBACK && (fill || flush));
|
||||
//wire line_read = lookup || (WRITEBACK && (fill || flush));
|
||||
wire line_write = init || do_fill || do_flush;
|
||||
wire line_valid = fill;
|
||||
|
||||
|
@ -89,26 +74,26 @@ module VX_cache_tags #(
|
|||
assign line_wdata = {line_valid, line_tag};
|
||||
assign {read_valid[i], read_tag[i]} = line_rdata;
|
||||
|
||||
VX_sp_ram #(
|
||||
VX_dp_ram #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.OUT_REG (1)
|
||||
.OUT_REG (1),
|
||||
.WRITE_MODE ("W")
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.read (~stall),
|
||||
.write (line_write),
|
||||
.wren (1'b1),
|
||||
.addr (line_idx),
|
||||
.waddr (line_idx),
|
||||
.raddr (line_idx_n),
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
end
|
||||
|
||||
`BUFFER_EX(line_tag_r, line_tag, ~stall, 1);
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches
|
||||
assign tag_matches_r[i] = read_valid[i] && (line_tag_r == read_tag[i]);
|
||||
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -61,7 +61,7 @@ module VX_dp_ram #(
|
|||
|
||||
`ifdef SYNTHESIS
|
||||
`ifdef QUARTUS
|
||||
localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : "");
|
||||
localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : "auto");
|
||||
localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "-name add_pass_through_logic_to_inferred_rams off" : "";
|
||||
`define RAM_ARRAY (* ramstyle = RAM_STYLE_VALUE *) reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
|
||||
`define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \
|
||||
|
@ -70,9 +70,9 @@ module VX_dp_ram #(
|
|||
end \
|
||||
end
|
||||
`define RAM_NO_RWCHECK (* altera_attribute = RAM_NO_RWCHECK_VALUE *)
|
||||
`else
|
||||
localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : "");
|
||||
localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : "";
|
||||
`elif VIVADO
|
||||
localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : "auto");
|
||||
localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : "auto";
|
||||
`define RAM_ARRAY (* ram_style = RAM_STYLE_VALUE *) reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
|
@ -80,6 +80,14 @@ module VX_dp_ram #(
|
|||
end \
|
||||
end
|
||||
`define RAM_NO_RWCHECK (* rw_addr_collision = RAM_NO_RWCHECK_VALUE *)
|
||||
`else
|
||||
`define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
|
||||
end \
|
||||
end
|
||||
`define RAM_NO_RWCHECK
|
||||
`endif
|
||||
if (OUT_REG) begin : g_out_reg
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
|
@ -122,7 +130,7 @@ module VX_dp_ram #(
|
|||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
end end else if (WRITE_MODE == "U") begin : g_undefined
|
||||
end else if (WRITE_MODE == "U") begin : g_undefined
|
||||
`RAM_NO_RWCHECK `RAM_ARRAY
|
||||
`RAM_INITIALIZATION
|
||||
always @(posedge clk) begin
|
||||
|
@ -138,7 +146,8 @@ module VX_dp_ram #(
|
|||
end else begin
|
||||
`STATIC_ASSERT(0, ("invalid write mode: %s", WRITE_MODE))
|
||||
end
|
||||
else begin : g_no_out_reg
|
||||
assign rdata = rdata_r;
|
||||
end else begin : g_no_out_reg
|
||||
`UNUSED_VAR (read)
|
||||
`RAM_NO_RWCHECK `RAM_ARRAY
|
||||
`RAM_INITIALIZATION
|
||||
|
|
|
@ -20,7 +20,7 @@ module VX_fifo_queue #(
|
|||
parameter ALM_FULL = (DEPTH - 1),
|
||||
parameter ALM_EMPTY = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter LUTRAM = ((DATAW * DEPTH) < `MAX_LUTRAM),
|
||||
parameter LUTRAM = 0,
|
||||
parameter SIZEW = `CLOG2(DEPTH+1)
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -42,9 +42,6 @@ module VX_fifo_queue #(
|
|||
`STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!"))
|
||||
`STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!"))
|
||||
|
||||
`UNUSED_PARAM (OUT_REG)
|
||||
`UNUSED_PARAM (LUTRAM)
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (DEPTH),
|
||||
.ALM_EMPTY (ALM_EMPTY),
|
||||
|
@ -62,6 +59,8 @@ module VX_fifo_queue #(
|
|||
);
|
||||
|
||||
if (DEPTH == 1) begin : g_depth_1
|
||||
`UNUSED_PARAM (OUT_REG)
|
||||
`UNUSED_PARAM (LUTRAM)
|
||||
|
||||
reg [DATAW-1:0] head_r;
|
||||
|
||||
|
@ -75,6 +74,7 @@ module VX_fifo_queue #(
|
|||
|
||||
end else begin : g_depth_n
|
||||
|
||||
localparam USE_BRAM = !LUTRAM && ((DATAW * DEPTH) >= `MAX_LUTRAM);
|
||||
localparam ADDRW = `CLOG2(DEPTH);
|
||||
|
||||
wire [DATAW-1:0] data_out_w;
|
||||
|
@ -95,17 +95,17 @@ module VX_fifo_queue #(
|
|||
end
|
||||
end
|
||||
|
||||
wire [ADDRW-1:0] rd_ptr_w = LUTRAM ? rd_ptr_r : rd_ptr_n;
|
||||
wire [ADDRW-1:0] rd_ptr_w = USE_BRAM ? rd_ptr_n : rd_ptr_r;
|
||||
|
||||
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
|
||||
wire bypass = push && (empty || (going_empty && pop));
|
||||
wire read = ((OUT_REG != 0) || !LUTRAM) ? ~bypass : pop;
|
||||
wire read = ((OUT_REG != 0) || USE_BRAM) ? ~bypass : pop;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (DEPTH),
|
||||
.LUTRAM (LUTRAM),
|
||||
.OUT_REG(!LUTRAM),
|
||||
.LUTRAM (!USE_BRAM),
|
||||
.OUT_REG(USE_BRAM),
|
||||
.WRITE_MODE("W")
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
|
|
|
@ -21,4 +21,6 @@ RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
|
|||
|
||||
TOP := VX_fifo_queue
|
||||
|
||||
PARAMS := -GDATAW=32 -GDEPTH=8
|
||||
|
||||
include ../common.mk
|
Loading…
Add table
Add a link
Reference in a new issue