cache hit timing optimization

This commit is contained in:
Blaise Tine 2024-10-19 20:04:51 -07:00
parent 8f29ad58ae
commit b6bd6467ef
9 changed files with 257 additions and 350 deletions

View file

@ -33,7 +33,7 @@ module VX_bank_flush #(
output wire flush_init,
output wire flush_valid,
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
output wire [NUM_WAYS-1:0] flush_way,
output wire [`CS_WAY_SEL_WIDTH-1:0] flush_way,
input wire flush_ready,
input wire mshr_empty,
input wire bank_empty
@ -113,17 +113,10 @@ module VX_bank_flush #(
assign flush_valid = (state == STATE_FLUSH);
assign flush_line = counter[`CS_LINE_SEL_BITS-1:0];
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin : g_flush_way
VX_decoder #(
.N (`CS_WAY_SEL_BITS),
.D (NUM_WAYS)
) ctr_decoder (
.sel_in (counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
.data_in (1'b1),
.data_out (flush_way)
);
if (WRITEBACK && (NUM_WAYS > 1)) begin : g_flush_way
assign flush_way = counter[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS];
end else begin : g_flush_way_all
assign flush_way = {NUM_WAYS{1'b1}};
assign flush_way = '0;
end
endmodule

View file

@ -150,19 +150,19 @@ module VX_cache_bank #(
wire is_creq_st0, is_creq_st1;
wire is_fill_st0, is_fill_st1;
wire is_flush_st0, is_flush_st1;
wire [NUM_WAYS-1:0] flush_way_st0;
wire [NUM_WAYS-1:0] evict_way_st0, evict_way_st1;
wire [`CS_WAY_SEL_WIDTH-1:0] flush_way_st0, evict_way_st0;
wire [`CS_WAY_SEL_WIDTH-1:0] way_idx_st1;
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1;
wire [`CS_TAG_SEL_BITS-1:0] line_tag_st1;
wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel, line_idx_st0, line_idx_st1;
wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1;
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0, evict_tag_st1;
wire rw_sel, rw_st0, rw_st1;
wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1;
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
wire [`CS_WORD_WIDTH-1:0] write_word_st0, write_word_st1;
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0;
@ -170,18 +170,18 @@ module VX_cache_bank #(
wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1;
wire mshr_pending_st0, mshr_pending_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_previd_st0, mshr_previd_st1;
wire is_hit_st0, is_hit_st1;
wire mshr_empty;
wire flush_valid;
wire init_valid;
wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
wire [NUM_WAYS-1:0] flush_way;
wire [`CS_WAY_SEL_WIDTH-1:0] flush_way;
wire flush_ready;
// ensure we have no pending memory request in the bank
wire no_pending_req = ~valid_st0 && ~valid_st1 && mreq_queue_empty;
// flush unit
VX_bank_flush #(
.BANK_ID (BANK_ID),
.CACHE_SIZE (CACHE_SIZE),
@ -203,9 +203,7 @@ module VX_cache_bank #(
.bank_empty (no_pending_req)
);
logic rdw_hazard, post_hazard;
wire pipe_stall = crsp_queue_stall || rdw_hazard;
wire pipe_stall = crsp_queue_stall;
// inputs arbitration:
// mshr replay has highest priority to maximize utilization since there is no miss.
@ -295,8 +293,6 @@ module VX_cache_bank #(
assign req_uuid_sel = '0;
end
wire [`CS_LINE_SEL_BITS-1:0] line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0];
wire is_init_sel = init_valid;
wire is_creq_sel = creq_enable || replay_enable;
wire is_fill_sel = fill_enable;
@ -304,7 +300,7 @@ module VX_cache_bank #(
wire is_replay_sel = replay_enable;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_WAY_SEL_WIDTH + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
@ -334,22 +330,18 @@ module VX_cache_bank #(
wire do_read_st1 = valid_st1 && is_read_st1;
wire do_write_st1 = valid_st1 && is_write_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_flush_st1 = valid_st1 && is_flush_st1 && WRITEBACK;
assign line_idx_sel = addr_sel[`CS_LINE_SEL_BITS-1:0];
assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0);
assign write_word_st0 = data_st0[`CS_WORD_WIDTH-1:0];
assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st1;
wire [NUM_WAYS-1:0] tag_matches_st1;
wire is_hit_st1 = (| tag_matches_st1);
wire do_lookup_st0 = do_read_st0 || do_write_st0;
wire do_lookup_st1 = do_read_st1 || do_write_st1;
reg [NUM_WAYS-1:0] victim_way_st0;
wire [`CS_WAY_SEL_WIDTH-1:0] victim_way_st0;
wire [NUM_WAYS-1:0] tag_matches_st0;
VX_cache_repl #(
.CACHE_SIZE (CACHE_SIZE),
@ -363,10 +355,10 @@ module VX_cache_bank #(
.stall (pipe_stall),
.hit_valid (do_lookup_st1 && is_hit_st1 && ~pipe_stall),
.hit_line (line_idx_st1),
.hit_way (tag_matches_st1),
.hit_way (way_idx_st1),
.repl_valid (do_fill_st0 && ~pipe_stall),
.repl_line (line_idx_st0),
.repl_line_n(line_idx_sel),
.repl_line (line_idx_st0),
.repl_way (victim_way_st0)
);
@ -388,27 +380,29 @@ module VX_cache_bank #(
.flush (do_flush_st0 && ~pipe_stall),
.fill (do_fill_st0 && ~pipe_stall),
.lookup (do_lookup_st0 && ~pipe_stall),
.line_addr (addr_st0),
.line_idx_n (line_idx_sel),
.line_idx (line_idx_st0),
.line_tag (line_tag_st0),
.evict_way (evict_way_st0),
// outputs
.tag_matches_r(tag_matches_st1),
.line_tag_r (line_tag_st1),
.evict_tag_r(evict_tag_st1),
.evict_way_r(evict_way_st1)
.tag_matches(tag_matches_st0),
.evict_tag (evict_tag_st0)
);
assign is_hit_st0 = (| tag_matches_st0);
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
assign mshr_id_st0 = is_replay_st0 ? replay_id_st0 : mshr_alloc_id_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, rw_st0, flags_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, rw_st1, flags_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1})
.data_in ({valid_st0, is_fill_st0, is_flush_st0, is_creq_st0, is_replay_st0, is_hit_st0, rw_st0, flags_st0, evict_tag_st0, line_tag_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_previd_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_fill_st1, is_flush_st1, is_creq_st1, is_replay_st1, is_hit_st1, rw_st1, flags_st1, evict_tag_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_previd_st1, mshr_pending_st1})
);
if (UUID_WIDTH != 0) begin : g_req_uuid_st1
@ -422,58 +416,12 @@ module VX_cache_bank #(
// ensure mshr replay always get a hit
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1 && ~is_hit_st1), ("%t: missed mshr replay", $time))
if (WRITE_ENABLE) begin : g_rdw_hazard
// This implementation uses single-port BRAMs for the tags and data stores.
// Using different stages for read and write operations requires a pipeline stall in between due to address port sharing.
// Tags fill/flush can perform read and write in the same stage, since no dependency between.
// Data fill/flush can perform read and write in the same stage, since way_idx is available in st0.
// A data read should happen in st0 for its result to be available in st1.
// A data write should happen in st1 when the tag hit status is available.
// The r/w hazard is needed for consecutive writes since they both wonly write in st1.
// The r/w hazard is also not needed for next writethrough fill/flush to the same line.
// For reads or writeback fill/flush to the same line, we sill need the hazard
// because the data writeen in st1 cannot be read at the same time in st0 without extra forwarding logic.
wire is_write_sel = is_creq_sel && rw_sel;
wire is_same_line = (line_idx_sel == line_idx_st0);
always @(posedge clk) begin
if (reset) begin
post_hazard <= 0;
rdw_hazard <= 0;
end else begin
if (~crsp_queue_stall) begin
post_hazard <= rdw_hazard;
rdw_hazard <= do_write_st0 && valid_sel && ~(is_write_sel || (is_same_line && !WRITEBACK && (is_fill_sel || is_flush_sel)));
end
end
end
end else begin : g_rdw_hazard_ro
assign rdw_hazard = 0;
assign post_hazard = 0;
end
assign write_word_st1 = data_st1[`CS_WORD_WIDTH-1:0];
`UNUSED_VAR (data_st1)
wire [`CS_LINE_WIDTH-1:0] evict_data_st1;
wire[`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] read_data_st1;
wire [LINE_SIZE-1:0] evict_byteen_st1;
wire line_dirty_st1;
wire data_write;
wire [`CS_LINE_SEL_BITS-1:0] data_line_idx;
if (WRITE_ENABLE) begin : g_data_ctrl
// by default all data accesses happen in sto and use line_idx_st0.
// data writes should happen in st1 when the tag hit is available,
// and use line_idx_st1 to ensure the correct line is updated.
// if a rdw hazard is active due to conflict, ensure we don't write twice.
assign data_write = do_write_st1 && ~post_hazard && ~crsp_queue_stall;
assign data_line_idx = data_write ? line_idx_st1 : line_idx_st0;
end else begin : g_data_ctrl_ro
`UNUSED_VAR (post_hazard)
`UNUSED_VAR (do_write_st1)
assign data_write = 0;
assign data_line_idx = line_idx_st0;
end
wire evict_dirty_st1;
VX_cache_data #(
.CACHE_SIZE (CACHE_SIZE),
@ -493,18 +441,18 @@ module VX_cache_bank #(
.fill (do_fill_st0 && ~pipe_stall),
.flush (do_flush_st0 && ~pipe_stall),
.read (do_read_st0 && ~pipe_stall),
.write (data_write),
.write (do_write_st0 && ~pipe_stall),
.evict_way (evict_way_st0),
.tag_matches(tag_matches_st1),
.line_idx (data_line_idx),
.tag_matches(tag_matches_st0),
.line_idx (line_idx_st0),
.fill_data (data_st0),
.write_word (write_word_st1),
.word_idx (word_idx_st1),
.write_byteen(byteen_st1),
.write_word (write_word_st0),
.word_idx (word_idx_st0),
.write_byteen(byteen_st0),
// outputs
.way_idx (way_idx_st1),
.read_data (read_data_st1),
.line_dirty (line_dirty_st1),
.evict_data (evict_data_st1),
.evict_dirty(evict_dirty_st1),
.evict_byteen(evict_byteen_st1)
);
@ -600,7 +548,7 @@ module VX_cache_bank #(
assign crsp_queue_valid = do_read_st1 && is_hit_st1;
assign crsp_queue_idx = req_idx_st1;
assign crsp_queue_data = read_data_st1;
assign crsp_queue_data = read_data_st1[word_idx_st1];
assign crsp_queue_tag = tag_st1;
VX_elastic_buffer #(
@ -610,7 +558,7 @@ module VX_cache_bank #(
) core_rsp_queue (
.clk (clk),
.reset (reset),
.valid_in (crsp_queue_valid && ~rdw_hazard),
.valid_in (crsp_queue_valid),
.ready_in (crsp_queue_ready),
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
@ -618,9 +566,7 @@ module VX_cache_bank #(
.ready_out (core_rsp_ready)
);
// we use 'do_read_st1' instead 'crsp_queue_valid'
// to remove costly 'is_hit_st1' signal from critical paths.
assign crsp_queue_stall = do_read_st1 && ~crsp_queue_ready;
assign crsp_queue_stall = crsp_queue_valid && ~crsp_queue_ready;
// schedule memory request
@ -634,7 +580,7 @@ module VX_cache_bank #(
wire is_fill_or_flush_st1 = is_fill_st1 || (is_flush_st1 && WRITEBACK);
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
wire do_writeback_st1 = do_fill_or_flush_st1 && line_dirty_st1;
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_addr_st1 = {evict_tag_st1, line_idx_st1};
if (WRITE_ENABLE) begin : g_mreq_queue
@ -642,7 +588,7 @@ module VX_cache_bank #(
if (DIRTY_BYTES) begin : g_dirty_bytes
// ensure dirty bytes match the tag info
wire has_dirty_bytes = (| evict_byteen_st1);
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (line_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, line_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("%t: missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", $time, evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)))
end
// issue a fill request on a read/write miss
// issue a writeback on a dirty line eviction
@ -651,8 +597,10 @@ module VX_cache_bank #(
&& ~pipe_stall;
assign mreq_queue_addr = is_fill_or_flush_st1 ? evict_addr_st1 : addr_st1;
assign mreq_queue_rw = is_fill_or_flush_st1;
assign mreq_queue_data = evict_data_st1;
assign mreq_queue_data = read_data_st1;
assign mreq_queue_byteen = is_fill_or_flush_st1 ? evict_byteen_st1 : '1;
`UNUSED_VAR (write_word_st1)
`UNUSED_VAR (byteen_st1)
end else begin : g_wt
wire [LINE_SIZE-1:0] line_byteen;
VX_decoder #(
@ -675,7 +623,6 @@ module VX_cache_bank #(
`UNUSED_VAR (is_fill_or_flush_st1)
`UNUSED_VAR (do_writeback_st1)
`UNUSED_VAR (evict_addr_st1)
`UNUSED_VAR (evict_data_st1)
`UNUSED_VAR (evict_byteen_st1)
end
end else begin : g_mreq_queue_ro
@ -688,8 +635,9 @@ module VX_cache_bank #(
assign mreq_queue_byteen = '1;
`UNUSED_VAR (do_writeback_st1)
`UNUSED_VAR (evict_addr_st1)
`UNUSED_VAR (evict_data_st1)
`UNUSED_VAR (evict_byteen_st1)
`UNUSED_VAR (write_word_st1)
`UNUSED_VAR (byteen_st1)
end
if (UUID_WIDTH != 0) begin : g_mreq_queue_tag_uuid
@ -722,10 +670,6 @@ module VX_cache_bank #(
assign mem_req_valid = ~mreq_queue_empty;
`UNUSED_VAR (do_fill_st1)
`UNUSED_VAR (do_flush_st1)
`UNUSED_VAR (evict_way_st1)
///////////////////////////////////////////////////////////////////////////////
`ifdef PERF_ENABLE
@ -740,8 +684,8 @@ module VX_cache_bank #(
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
always @(posedge clk) begin
if (input_stall || pipe_stall) begin
`TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw=%b\n", $time, INSTANCE_ID,
crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard))
`TRACE(4, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID,
crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full))
end
if (mem_rsp_fire) begin
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
@ -764,32 +708,37 @@ module VX_cache_bank #(
`TRACE(3, ("%t: %s tags-init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), line_idx_st0))
end
if (do_fill_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID,
`TRACE(3, ("%t: %s tags-fill: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0))
end
if (do_flush_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID,
`TRACE(3, ("%t: %s tags-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0))
end
if (do_lookup_st1 && ~pipe_stall) begin
`TRACE(3, ("%t: %s tags-Lookup: addr=0x%0h, rw=%b, way=%b, line=%0d, tag=0x%0h, hit=%b (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, tag_matches_st1, line_idx_st1, line_tag_st1, is_hit_st1, req_uuid_st1))
if (is_hit_st1) begin
`TRACE(3, ("%t: %s tags-hit: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1))
end else begin
`TRACE(3, ("%t: %s tags-miss: addr=0x%0h, rw=%b, way=%0d, line=%0d, tag=0x%0h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), rw_st1, way_idx_st1, line_idx_st1, line_tag_st1, req_uuid_st1))
end
end
if (do_fill_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%b, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`TRACE(3, ("%t: %s data-fill: addr=0x%0h, way=%0d, line=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, data_st0, req_uuid_st0))
end
if (do_flush_st0 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%b, line=%0d (#%0d)\n", $time, INSTANCE_ID,
`TRACE(3, ("%t: %s data-flush: addr=0x%0h, way=%0d, line=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st0, BANK_ID), evict_way_st0, line_idx_st0, req_uuid_st0))
end
if (do_read_st1 && is_hit_st1 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, read_data_st1, req_uuid_st1))
`TRACE(3, ("%t: %s data-read: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, crsp_queue_data, req_uuid_st1))
end
if (do_write_st1 && is_hit_st1 && ~pipe_stall) begin
`TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), tag_matches_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1))
`TRACE(3, ("%t: %s data-write: addr=0x%0h, way=%0d, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), way_idx_st1, line_idx_st1, word_idx_st1, byteen_st1, write_word_st1, req_uuid_st1))
end
if (crsp_queue_fire) begin
`TRACE(2, ("%t: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID,

View file

@ -41,38 +41,23 @@ module VX_cache_data #(
input wire read,
input wire write,
input wire [`CS_LINE_SEL_BITS-1:0] line_idx,
input wire [NUM_WAYS-1:0] evict_way,
input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way,
input wire [NUM_WAYS-1:0] tag_matches,
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
input wire [`CS_WORD_WIDTH-1:0] write_word,
input wire [WORD_SIZE-1:0] write_byteen,
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx,
// outputs
output wire [`CS_WORD_WIDTH-1:0] read_data,
output wire line_dirty,
output wire [`CS_LINE_WIDTH-1:0] evict_data,
output wire [`CS_WAY_SEL_WIDTH-1:0] way_idx,
output wire [`CS_LINE_WIDTH-1:0] read_data,
output wire evict_dirty,
output wire [LINE_SIZE-1:0] evict_byteen
);
`UNUSED_PARAM (WORD_SIZE)
`UNUSED_VAR (stall)
localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1;
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
if (WRITEBACK != 0) begin : g_writeback
localparam BYTEEN_DATAW = 1 + ((DIRTY_BYTES != 0) ? LINE_SIZE : 0);
wire [`LOG2UP(NUM_WAYS)-1:0] evict_way_idx, evict_way_idx_r;
VX_onehot_encoder #(
.N (NUM_WAYS)
) fill_way_enc (
.data_in (evict_way),
.data_out (evict_way_idx),
`UNUSED_PIN (valid_out)
);
`BUFFER_EX(evict_way_idx_r, evict_way_idx, ~stall, 1);
wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_rdata;
wire [NUM_WAYS-1:0][BYTEEN_DATAW-1:0] byteen_wdata;
@ -80,7 +65,7 @@ module VX_cache_data #(
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_byteen_wdata
wire evict = fill || flush;
wire evict_way_en = (NUM_WAYS == 1) || evict_way[i];
wire evict_way_en = (NUM_WAYS == 1) || (evict_way == i);
wire dirty_data = write; // only asserted on writes
wire dirty_wren = init || (evict && evict_way_en) || (write && tag_matches[i]);
if (DIRTY_BYTES != 0) begin : g_dirty_bytes
@ -121,54 +106,47 @@ module VX_cache_data #(
);
if (DIRTY_BYTES != 0) begin : g_line_dirty_and_byteen
assign {line_dirty, evict_byteen} = byteen_rdata[evict_way_idx_r];
assign {evict_dirty, evict_byteen} = byteen_rdata[way_idx];
end else begin : g_line_dirty
assign line_dirty = byteen_rdata[evict_way_idx_r];
assign evict_dirty = byteen_rdata[way_idx];
assign evict_byteen = '1;
end
assign evict_data = line_rdata[evict_way_idx_r];
end else begin : g_no_writeback
`UNUSED_VAR (init)
`UNUSED_VAR (flush)
assign line_dirty = 0;
assign evict_data = '0;
assign evict_dirty = 0;
assign evict_byteen = '0;
end
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store
wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [BYTEENW-1:0] line_wren;
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
wire fill_way_en = (NUM_WAYS == 1) || evict_way[i];
if (WRITE_ENABLE) begin : g_data_store
// create a single write-enable block ram to reduce area overhead
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] line_wren;
wire line_write;
wire line_read;
if (WRITE_ENABLE != 0) begin : g_wdata
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_wdata
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_mask;
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_write_mask
wire word_en = (`CS_WORDS_PER_LINE == 1) || (word_idx == j);
assign write_mask[j] = write_byteen & {WORD_SIZE{word_en}};
end
assign line_wdata = (fill && fill_way_en) ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
assign line_wren = {LINE_SIZE{fill && fill_way_en}}
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
end else begin : g_ro_wdata
`UNUSED_VAR (write)
`UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_word)
`UNUSED_VAR (word_idx)
assign line_wdata = fill_data;
assign line_wren = fill_way_en;
assign line_wdata[i] = fill ? fill_data : {`CS_WORDS_PER_LINE{write_word}};
assign line_wren[i] = {LINE_SIZE{fill && fill_way_en}}
| ({LINE_SIZE{write && tag_matches[i]}} & write_mask);
end
wire line_write = fill || (write && WRITE_ENABLE);
wire line_read = read || ((fill || flush) && WRITEBACK);
assign line_write = fill || (write && WRITE_ENABLE);
assign line_read = read || ((fill || flush) && WRITEBACK);
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH),
.DATAW (NUM_WAYS * `CS_LINE_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (BYTEENW),
.WRENW (NUM_WAYS * LINE_SIZE),
.OUT_REG (1)
) data_store (
.clk (clk),
@ -178,35 +156,46 @@ module VX_cache_data #(
.wren (line_wren),
.addr (line_idx),
.wdata (line_wdata),
.rdata (line_rdata[i])
.rdata (line_rdata)
);
end else begin : g_data_store
`UNUSED_VAR (write)
`UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_word)
`UNUSED_VAR (word_idx)
// we don't merge the ways into a single block ram due to WREN overhead
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_ways
wire fill_way_en = (NUM_WAYS == 1) || (evict_way == i);
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (1)
) data_store (
.clk (clk),
.reset (reset),
.read (read),
.write (fill && fill_way_en),
.wren (1'b1),
.addr (line_idx),
.wdata (fill_data),
.rdata (line_rdata[i])
);
end
end
wire [`LOG2UP(NUM_WAYS)-1:0] hit_way_idx;
wire [`CS_WAY_SEL_WIDTH-1:0] hit_idx;
VX_onehot_encoder #(
.N (NUM_WAYS)
) hit_idx_enc (
) way_idx_enc (
.data_in (tag_matches),
.data_out (hit_way_idx),
.data_out (hit_idx),
`UNUSED_PIN (valid_out)
);
if (`CS_WORDS_PER_LINE > 1) begin : g_read_data
// order the data layout to perform ways multiplexing last.
// this allows converting way index to binary in parallel with BRAM read and word indexing.
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
VX_transpose #(
.DATAW (`CS_WORD_WIDTH),
.N (NUM_WAYS),
.M (`CS_WORDS_PER_LINE)
) transpose (
.data_in (line_rdata),
.data_out (transposed_rdata)
);
assign read_data = transposed_rdata[word_idx][hit_way_idx];
end else begin : g_read_data_1w
`UNUSED_VAR (word_idx)
assign read_data = line_rdata[hit_way_idx];
end
`BUFFER_EX(way_idx, (read ? hit_idx : evict_way), ~stall, 1);
assign read_data = line_rdata[way_idx];
endmodule

View file

@ -22,6 +22,7 @@
`define CS_LINE_WIDTH (8 * LINE_SIZE)
`define CS_BANK_SIZE (CACHE_SIZE / NUM_BANKS)
`define CS_WAY_SEL_BITS `CLOG2(NUM_WAYS)
`define CS_WAY_SEL_WIDTH `UP(`CS_WAY_SEL_BITS)
`define CS_LINES_PER_BANK (`CS_BANK_SIZE / (LINE_SIZE * NUM_WAYS))
`define CS_WORDS_PER_LINE (LINE_SIZE / WORD_SIZE)

View file

@ -97,135 +97,114 @@ module VX_cache_repl #(
input wire stall,
input wire hit_valid,
input wire [`CS_LINE_SEL_BITS-1:0] hit_line,
input wire [NUM_WAYS-1:0] hit_way,
input wire [`CS_WAY_SEL_WIDTH-1:0] hit_way,
input wire repl_valid,
input wire [`CS_LINE_SEL_BITS-1:0] repl_line_n,
input wire [`CS_LINE_SEL_BITS-1:0] repl_line,
output wire [NUM_WAYS-1:0] repl_way
output wire [`CS_WAY_SEL_WIDTH-1:0] repl_way
);
localparam WAY_SEL_WIDTH = `CS_WAY_SEL_WIDTH;
`UNUSED_VAR (stall)
localparam WAY_IDX_BITS = $clog2(NUM_WAYS);
localparam WAY_IDX_WIDTH = `UP(WAY_IDX_BITS);
if (NUM_WAYS > 1) begin : g_enable
if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru
// Pseudo Least Recently Used replacement policy
localparam LRU_WIDTH = `UP(NUM_WAYS-1);
localparam USE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM;
if (REPL_POLICY == `CS_REPL_PLRU) begin : g_plru
// Pseudo Least Recently Used replacement policy
localparam LRU_WIDTH = `UP(NUM_WAYS-1);
localparam FORCE_BRAM = (LRU_WIDTH * `CS_LINES_PER_BANK) >= 1024;
wire [LRU_WIDTH-1:0] plru_rdata;
wire [LRU_WIDTH-1:0] plru_wdata;
wire [LRU_WIDTH-1:0] plru_wmask;
wire [WAY_IDX_WIDTH-1:0] repl_way_idx;
wire [WAY_IDX_WIDTH-1:0] hit_way_idx;
wire [LRU_WIDTH-1:0] plru_rdata;
wire [LRU_WIDTH-1:0] plru_wdata;
wire [LRU_WIDTH-1:0] plru_wmask;
VX_dp_ram #(
.DATAW (LRU_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (LRU_WIDTH),
.OUT_REG (USE_BRAM)
) plru_store (
.clk (clk),
.reset (reset),
.read (USE_BRAM ? ~stall : repl_valid),
.write (hit_valid),
.wren (plru_wmask),
.waddr (hit_line),
.raddr (USE_BRAM ? repl_line_n : repl_line),
.wdata (plru_wdata),
.rdata (plru_rdata)
);
VX_dp_ram #(
.DATAW (LRU_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (LRU_WIDTH),
.OUT_REG (FORCE_BRAM)
) plru_store (
.clk (clk),
.reset (reset),
.read (FORCE_BRAM ? ~stall : repl_valid),
.write (hit_valid),
.wren (plru_wmask),
.waddr (hit_line),
.raddr (FORCE_BRAM ? repl_line_n : repl_line),
.wdata (plru_wdata),
.rdata (plru_rdata)
);
plru_decoder #(
.NUM_WAYS (NUM_WAYS)
) plru_dec (
.way_idx (hit_way),
.lru_data (plru_wdata),
.lru_mask (plru_wmask)
);
VX_onehot_encoder #(
.N (NUM_WAYS)
) hit_way_enc (
.data_in (hit_way),
.data_out (hit_way_idx),
`UNUSED_PIN (valid_out)
);
plru_encoder #(
.NUM_WAYS (NUM_WAYS)
) plru_enc (
.lru_in (plru_rdata),
.way_idx (repl_way)
);
plru_decoder #(
.NUM_WAYS (NUM_WAYS)
) plru_dec (
.way_idx (hit_way_idx),
.lru_data (plru_wdata),
.lru_mask (plru_wmask)
);
end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic
// Cyclic replacement policy
localparam USE_BRAM = (WAY_SEL_WIDTH * `CS_LINES_PER_BANK) >= `MAX_LUTRAM;
plru_encoder #(
.NUM_WAYS (NUM_WAYS)
) plru_enc (
.lru_in (plru_rdata),
.way_idx (repl_way_idx)
);
`UNUSED_VAR (hit_valid)
`UNUSED_VAR (hit_line)
`UNUSED_VAR (hit_way)
`UNUSED_VAR (repl_valid)
VX_decoder #(
.N (WAY_IDX_BITS)
) repl_way_dec (
.sel_in (repl_way_idx),
.data_in (1'b1),
.data_out (repl_way)
);
wire [WAY_SEL_WIDTH-1:0] ctr_rdata;
wire [WAY_SEL_WIDTH-1:0] ctr_wdata = ctr_rdata + 1;
end else if (REPL_POLICY == `CS_REPL_CYCLIC) begin : g_cyclic
// Cyclic replacement policy
localparam CTR_WIDTH = $clog2(NUM_WAYS);
localparam FORCE_BRAM = (CTR_WIDTH * `CS_LINES_PER_BANK) >= 1024;
VX_dp_ram #(
.DATAW (WAY_SEL_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (USE_BRAM)
) ctr_store (
.clk (clk),
.reset (reset),
.read (USE_BRAM ? ~stall : repl_valid),
.write (repl_valid),
.wren (1'b1),
.raddr (USE_BRAM ? repl_line_n : repl_line),
.waddr (repl_line),
.wdata (ctr_wdata),
.rdata (ctr_rdata)
);
`UNUSED_VAR (hit_valid)
`UNUSED_VAR (hit_line)
`UNUSED_VAR (hit_way)
`UNUSED_VAR (repl_valid)
wire [`UP(CTR_WIDTH)-1:0] ctr_rdata;
wire [`UP(CTR_WIDTH)-1:0] ctr_wdata = ctr_rdata + 1;
VX_dp_ram #(
.DATAW (`UP(CTR_WIDTH)),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (FORCE_BRAM)
) ctr_store (
.clk (clk),
.reset (reset),
.read (FORCE_BRAM ? ~stall : repl_valid),
.write (repl_valid),
.wren (1'b1),
.raddr (FORCE_BRAM ? repl_line_n : repl_line),
.waddr (repl_line),
.wdata (ctr_wdata),
.rdata (ctr_rdata)
);
VX_decoder #(
.N (WAY_IDX_BITS)
) ctr_decoder (
.sel_in (ctr_rdata),
.data_in (1'b1),
.data_out (repl_way)
);
end else begin : g_random
// Random replacement policy
assign repl_way = ctr_rdata;
end else begin : g_random
// Random replacement policy
`UNUSED_VAR (hit_valid)
`UNUSED_VAR (hit_line)
`UNUSED_VAR (hit_way)
`UNUSED_VAR (repl_valid)
`UNUSED_VAR (repl_line)
`UNUSED_VAR (repl_line_n)
reg [WAY_SEL_WIDTH-1:0] victim_idx;
always @(posedge clk) begin
if (reset) begin
victim_idx <= 0;
end else if (~stall) begin
victim_idx <= victim_idx + 1;
end
end
assign repl_way = victim_idx;
end
end else begin : g_disable
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (hit_valid)
`UNUSED_VAR (hit_line)
`UNUSED_VAR (hit_way)
`UNUSED_VAR (repl_valid)
`UNUSED_VAR (repl_line)
`UNUSED_VAR (repl_line_n)
if (NUM_WAYS > 1) begin : g_repl_way
reg [NUM_WAYS-1:0] victim_way;
always @(posedge clk) begin
if (reset) begin
victim_way <= 1;
end else if (~stall) begin
victim_way <= {victim_way[NUM_WAYS-2:0], victim_way[NUM_WAYS-1]};
end
end
assign repl_way = victim_way;
end else begin : g_repl_way_1
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign repl_way = 1'b1;
end
assign repl_way = 1'b0;
end
endmodule

View file

@ -36,50 +36,35 @@ module VX_cache_tags #(
input wire flush,
input wire fill,
input wire lookup,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire [NUM_WAYS-1:0] evict_way,
input wire [`CS_LINE_SEL_BITS-1:0] line_idx_n,
input wire [`CS_LINE_SEL_BITS-1:0] line_idx,
input wire [`CS_TAG_SEL_BITS-1:0] line_tag,
input wire [`CS_WAY_SEL_WIDTH-1:0] evict_way,
// outputs
output wire [NUM_WAYS-1:0] tag_matches_r,
output wire [`CS_TAG_SEL_BITS-1:0] line_tag_r,
output wire [NUM_WAYS-1:0] evict_way_r,
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag_r
output wire [NUM_WAYS-1:0] tag_matches,
output wire [`CS_TAG_SEL_BITS-1:0] evict_tag
);
// valid, tag
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
wire [NUM_WAYS-1:0] read_valid;
if (NUM_WAYS > 1) begin : g_evict_way
`BUFFER_EX(evict_way_r, evict_way, ~stall, 1);
end else begin : g_evict_way_0
`UNUSED_VAR (evict_way)
assign evict_way_r = 1'b1;
end
`UNUSED_VAR (lookup)
if (WRITEBACK) begin : g_evict_tag_wb
VX_onehot_mux #(
.DATAW (`CS_TAG_SEL_BITS),
.N (NUM_WAYS)
) evict_tag_sel (
.data_in (read_tag),
.sel_in (evict_way_r),
.data_out (evict_tag_r)
);
assign evict_tag = read_tag[evict_way];
end else begin : g_evict_tag_wt
assign evict_tag_r = '0;
assign evict_tag = '0;
end
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store
wire do_fill = fill && evict_way[i];
wire do_flush = flush && (!WRITEBACK || evict_way[i]); // flush the whole line in writethrough mode
wire way_en = (NUM_WAYS == 1) || (evict_way == i);
wire do_fill = fill && way_en;
wire do_flush = flush && (!WRITEBACK || way_en); // flush the whole line in writethrough mode
wire line_read = lookup || (WRITEBACK && (fill || flush));
//wire line_read = lookup || (WRITEBACK && (fill || flush));
wire line_write = init || do_fill || do_flush;
wire line_valid = fill;
@ -89,26 +74,26 @@ module VX_cache_tags #(
assign line_wdata = {line_valid, line_tag};
assign {read_valid[i], read_tag[i]} = line_rdata;
VX_sp_ram #(
VX_dp_ram #(
.DATAW (TAG_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.OUT_REG (1)
.OUT_REG (1),
.WRITE_MODE ("W")
) tag_store (
.clk (clk),
.reset (reset),
.read (line_read),
.read (~stall),
.write (line_write),
.wren (1'b1),
.addr (line_idx),
.waddr (line_idx),
.raddr (line_idx_n),
.wdata (line_wdata),
.rdata (line_rdata)
);
end
`BUFFER_EX(line_tag_r, line_tag, ~stall, 1);
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_matches
assign tag_matches_r[i] = read_valid[i] && (line_tag_r == read_tag[i]);
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
end
endmodule

View file

@ -61,7 +61,7 @@ module VX_dp_ram #(
`ifdef SYNTHESIS
`ifdef QUARTUS
localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : "");
localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "MLAB, no_rw_check" : "auto");
localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "-name add_pass_through_logic_to_inferred_rams off" : "";
`define RAM_ARRAY (* ramstyle = RAM_STYLE_VALUE *) reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
`define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \
@ -70,9 +70,9 @@ module VX_dp_ram #(
end \
end
`define RAM_NO_RWCHECK (* altera_attribute = RAM_NO_RWCHECK_VALUE *)
`else
localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : "");
localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : "";
`elif VIVADO
localparam `STRING RAM_STYLE_VALUE = USE_BRAM ? "block" : (LUTRAM ? "distributed" : "auto");
localparam `STRING RAM_NO_RWCHECK_VALUE = NO_RWCHECK ? "no" : "auto";
`define RAM_ARRAY (* ram_style = RAM_STYLE_VALUE *) reg [DATAW-1:0] ram [0:SIZE-1];
`define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
@ -80,6 +80,14 @@ module VX_dp_ram #(
end \
end
`define RAM_NO_RWCHECK (* rw_addr_collision = RAM_NO_RWCHECK_VALUE *)
`else
`define RAM_ARRAY reg [DATAW-1:0] ram [0:SIZE-1];
`define RAM_WRITE for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
ram[waddr][i * WSELW +: WSELW] <= wdata[i * WSELW +: WSELW]; \
end \
end
`define RAM_NO_RWCHECK
`endif
if (OUT_REG) begin : g_out_reg
reg [DATAW-1:0] rdata_r;
@ -122,7 +130,7 @@ module VX_dp_ram #(
rdata_r <= ram[raddr];
end
end
end end else if (WRITE_MODE == "U") begin : g_undefined
end else if (WRITE_MODE == "U") begin : g_undefined
`RAM_NO_RWCHECK `RAM_ARRAY
`RAM_INITIALIZATION
always @(posedge clk) begin
@ -138,7 +146,8 @@ module VX_dp_ram #(
end else begin
`STATIC_ASSERT(0, ("invalid write mode: %s", WRITE_MODE))
end
else begin : g_no_out_reg
assign rdata = rdata_r;
end else begin : g_no_out_reg
`UNUSED_VAR (read)
`RAM_NO_RWCHECK `RAM_ARRAY
`RAM_INITIALIZATION

View file

@ -20,7 +20,7 @@ module VX_fifo_queue #(
parameter ALM_FULL = (DEPTH - 1),
parameter ALM_EMPTY = 1,
parameter OUT_REG = 0,
parameter LUTRAM = ((DATAW * DEPTH) < `MAX_LUTRAM),
parameter LUTRAM = 0,
parameter SIZEW = `CLOG2(DEPTH+1)
) (
input wire clk,
@ -42,9 +42,6 @@ module VX_fifo_queue #(
`STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!"))
`STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!"))
`UNUSED_PARAM (OUT_REG)
`UNUSED_PARAM (LUTRAM)
VX_pending_size #(
.SIZE (DEPTH),
.ALM_EMPTY (ALM_EMPTY),
@ -62,6 +59,8 @@ module VX_fifo_queue #(
);
if (DEPTH == 1) begin : g_depth_1
`UNUSED_PARAM (OUT_REG)
`UNUSED_PARAM (LUTRAM)
reg [DATAW-1:0] head_r;
@ -75,6 +74,7 @@ module VX_fifo_queue #(
end else begin : g_depth_n
localparam USE_BRAM = !LUTRAM && ((DATAW * DEPTH) >= `MAX_LUTRAM);
localparam ADDRW = `CLOG2(DEPTH);
wire [DATAW-1:0] data_out_w;
@ -95,17 +95,17 @@ module VX_fifo_queue #(
end
end
wire [ADDRW-1:0] rd_ptr_w = LUTRAM ? rd_ptr_r : rd_ptr_n;
wire [ADDRW-1:0] rd_ptr_w = USE_BRAM ? rd_ptr_n : rd_ptr_r;
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
wire bypass = push && (empty || (going_empty && pop));
wire read = ((OUT_REG != 0) || !LUTRAM) ? ~bypass : pop;
wire read = ((OUT_REG != 0) || USE_BRAM) ? ~bypass : pop;
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (DEPTH),
.LUTRAM (LUTRAM),
.OUT_REG(!LUTRAM),
.LUTRAM (!USE_BRAM),
.OUT_REG(USE_BRAM),
.WRITE_MODE("W")
) dp_ram (
.clk (clk),

View file

@ -21,4 +21,6 @@ RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
TOP := VX_fifo_queue
PARAMS := -GDATAW=32 -GDEPTH=8
include ../common.mk