cache elastic buffer optimization

This commit is contained in:
Blaise Tine 2021-07-15 11:59:49 -07:00
parent 8678150ce0
commit d9425cc484
3 changed files with 108 additions and 116 deletions

View file

@ -50,7 +50,7 @@ module VX_bank #(
`endif
// Core Request
input wire [NUM_PORTS-1:0] core_req_valid,
input wire core_req_valid,
input wire [NUM_PORTS-1:0] core_req_pmask,
input wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] core_req_wsel,
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen,
@ -97,10 +97,7 @@ module VX_bank #(
`IGNORE_WARNINGS_END
`endif
wire creq_pop;
wire creq_full;
wire creq_empty;
wire [NUM_PORTS-1:0] creq_pmask;
wire [NUM_PORTS-1:0] creq_pmask;
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] creq_wsel;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data;
@ -108,26 +105,22 @@ module VX_bank #(
wire creq_rw;
wire [`LINE_ADDR_WIDTH-1:0] creq_addr;
wire [CORE_TAG_WIDTH-1:0] creq_tag;
wire creq_push = core_req_valid && core_req_ready;
assign core_req_ready = !creq_full;
VX_fifo_queue #(
.DATAW (CORE_TAG_WIDTH + 1 + `LINE_ADDR_WIDTH + (1 + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
wire creq_out_valid, creq_out_ready;
VX_elastic_buffer #(
.DATAW (CORE_TAG_WIDTH + 1 + `LINE_ADDR_WIDTH + (1 + `UP(`WORD_SELECT_BITS) + WORD_SIZE + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.SIZE (CREQ_SIZE),
.BUFFERED (1)
) core_req_queue (
.clk (clk),
.reset (reset),
.push (creq_push),
.pop (creq_pop),
.ready_in (core_req_ready),
.valid_in (core_req_valid),
.data_in ({core_req_tag, core_req_rw, core_req_addr, core_req_pmask, core_req_wsel, core_req_byteen, core_req_data, core_req_tid}),
.data_out ({creq_tag, creq_rw, creq_addr, creq_pmask, creq_wsel, creq_byteen, creq_data, creq_tid}),
.empty (creq_empty),
.full (creq_full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
.ready_out (creq_out_ready),
.valid_out (creq_out_valid)
);
wire mshr_alm_full;
@ -166,8 +159,8 @@ module VX_bank #(
wire crsq_in_valid, crsq_in_ready, crsq_in_stall;
wire mreq_alm_full;
wire mrsq_pop;
wire creq_out_fire = creq_out_valid && creq_out_ready;
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
VX_pending_size #(
@ -175,7 +168,7 @@ module VX_bank #(
) mshr_pending_size (
.clk (clk),
.reset (reset),
.push (creq_pop && !creq_rw),
.push (creq_out_fire && !creq_rw),
.pop (crsq_in_fire),
.full (mshr_alm_full),
`UNUSED_PIN (empty),
@ -183,26 +176,29 @@ module VX_bank #(
);
// determine which queue to pop next in priority order
wire mshr_pop_unqual = mshr_valid
&& !mreq_alm_full; // ensure memory request queue not full (deadlock prevention)
wire mrsq_pop_unqual = !mshr_pop_unqual && mem_rsp_valid;
wire creq_pop_unqual = !mshr_pop_unqual && !mrsq_pop_unqual && !creq_empty && !flush_enable;
wire mshr_grant = !mreq_alm_full; // ensure memory request queue not full (deadlock prevention)
wire mshr_enable = mshr_grant && mshr_valid;
wire mrsq_grant = !mshr_enable;
wire mrsq_enable = mrsq_grant && mem_rsp_valid;
wire creq_grant = !mshr_enable && !mrsq_enable && !flush_enable;
wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1);
assign mshr_pop = mshr_pop_unqual
assign mshr_pop = mshr_enable
&& !(is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed
&& !crsq_in_stall; // ensure core response ready
&& !crsq_in_stall; // ensure core response ready
assign mrsq_pop = mrsq_pop_unqual
&& !crsq_in_stall; // ensure core response ready
assign creq_out_ready = creq_grant
&& !mreq_alm_full // ensure memory request ready
&& !mshr_alm_full // ensure mshr enqueue ready
&& !crsq_in_stall; // ensure core response ready
assign creq_pop = creq_pop_unqual
&& !mreq_alm_full // ensure memory request ready
&& !mshr_alm_full // ensure mshr enqueue ready
&& !crsq_in_stall; // ensure core response ready
assign mem_rsp_ready = mrsq_grant
&& !crsq_in_stall; // ensure core response ready
assign mem_rsp_ready = mrsq_pop;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
// we have a miss in mshr or entering it for the current address
wire mshr_pending_sel = mshr_pending
@ -210,7 +206,7 @@ module VX_bank #(
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_sel, debug_wid_sel} = mshr_pop_unqual ? mshr_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS] : creq_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
assign {debug_pc_sel, debug_wid_sel} = mshr_enable ? mshr_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS] : creq_tag[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
end else begin
assign {debug_pc_sel, debug_wid_sel} = 0;
end
@ -245,18 +241,18 @@ module VX_bank #(
.reset (reset),
.enable (!crsq_in_stall),
.data_in ({
flush_enable || mshr_pop || mrsq_pop || creq_pop,
flush_enable || mshr_pop || mem_rsp_fire || creq_out_fire,
flush_enable,
mshr_pop_unqual,
mrsq_pop_unqual || flush_enable,
mshr_pop_unqual ? 1'b0 : creq_rw,
mshr_pop_unqual ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)),
mem_rsp_valid ? mem_rsp_data : creq_line_data,
mshr_pop_unqual ? mshr_wsel : creq_wsel,
mshr_pop_unqual ? mshr_byteen : creq_byteen,
mshr_pop_unqual ? mshr_tid : creq_tid,
mshr_pop_unqual ? mshr_pmask : creq_pmask,
mshr_pop_unqual ? mshr_tag : creq_tag,
mshr_enable,
mrsq_enable || flush_enable,
mshr_enable ? 1'b0 : creq_rw,
mshr_enable ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)),
mem_rsp_valid ? mem_rsp_data : creq_line_data,
mshr_enable ? mshr_wsel : creq_wsel,
mshr_enable ? mshr_byteen : creq_byteen,
mshr_enable ? mshr_tid : creq_tid,
mshr_enable ? mshr_pmask : creq_pmask,
mshr_enable ? mshr_tag : creq_tag,
mshr_pending_sel
}),
.data_out ({valid_st0, is_flush_st0, is_mshr_st0, is_fill_st0, mem_rw_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_tid_st0, pmask_st0, tag_st0, mshr_pending_st0})
@ -444,7 +440,7 @@ module VX_bank #(
.lookup_match (mshr_pending),
// fill update
.fill_update (mrsq_pop),
.fill_update (mem_rsp_fire),
// schedule
.schedule (mshr_pop),
@ -562,13 +558,13 @@ module VX_bank #(
if (flush_enable) begin
$display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID));
end
if (mrsq_pop) begin
if (mem_rsp_fire) begin
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_data);
end
if (mshr_pop) begin
$display("%t: cache%0d:%0d mshr-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, mshr_byteen, debug_wid_sel, debug_pc_sel);
end
if (creq_pop) begin
if (creq_out_fire) begin
if (creq_rw)
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel);
else

View file

@ -241,32 +241,25 @@ module VX_cache #(
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual;
wire [`MEM_ADDR_WIDTH-1:0] mem_rsp_tag_nc_a, mem_rsp_tag_qual;
wire mrsq_full, mrsq_empty;
wire mrsq_push, mrsq_pop;
assign mrsq_push = mem_rsp_valid_nc && mem_rsp_ready_nc;
assign mem_rsp_ready_nc = !mrsq_full;
wire mrsq_out_valid, mrsq_out_ready;
// trim out shared memory and non-cacheable flags
assign mem_rsp_tag_nc_a = mem_rsp_tag_nc[NC_ENABLE +: `MEM_ADDR_WIDTH];
VX_fifo_queue #(
VX_elastic_buffer #(
.DATAW (`MEM_ADDR_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (MRSQ_SIZE),
.BUFFERED (1)
) mem_rsp_queue (
.clk (clk),
.reset (reset),
.push (mrsq_push),
.pop (mrsq_pop),
.ready_in (mem_rsp_ready_nc),
.valid_in (mem_rsp_valid_nc),
.data_in ({mem_rsp_tag_nc_a, mem_rsp_data_nc}),
.data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}),
.empty (mrsq_empty),
.full (mrsq_full),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
.data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}),
.ready_out (mrsq_out_ready),
.valid_out (mrsq_out_valid)
);
`UNUSED_VAR (mem_rsp_tag_nc)
@ -289,7 +282,7 @@ module VX_cache #(
///////////////////////////////////////////////////////////////////////////
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_pmask;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
@ -318,9 +311,9 @@ module VX_cache #(
if (NUM_BANKS == 1) begin
`UNUSED_VAR (mem_rsp_tag_qual)
assign mrsq_pop = !mrsq_empty && per_bank_mem_rsp_ready;
assign mrsq_out_ready = per_bank_mem_rsp_ready;
end else begin
assign mrsq_pop = !mrsq_empty && per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)];
assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)];
end
VX_core_req_bank_sel #(
@ -360,7 +353,7 @@ module VX_cache #(
///////////////////////////////////////////////////////////////////////////
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire [NUM_PORTS-1:0] curr_bank_core_req_valid;
wire curr_bank_core_req_valid;
wire [NUM_PORTS-1:0] curr_bank_core_req_pmask;
wire [NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] curr_bank_core_req_wsel;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
@ -424,10 +417,10 @@ module VX_cache #(
// Memory response
if (NUM_BANKS == 1) begin
assign curr_bank_mem_rsp_valid = !mrsq_empty;
assign curr_bank_mem_rsp_valid = mrsq_out_valid;
assign curr_bank_mem_rsp_addr = mem_rsp_tag_qual;
end else begin
assign curr_bank_mem_rsp_valid = !mrsq_empty && (`MEM_ADDR_BANK(mem_rsp_tag_qual) == i);
assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_ADDR_BANK(mem_rsp_tag_qual) == i);
assign curr_bank_mem_rsp_addr = `MEM_TO_LINE_ADDR(mem_rsp_tag_qual);
end
assign curr_bank_mem_rsp_data = mem_rsp_data_qual;
@ -464,7 +457,7 @@ module VX_cache #(
// Core request
.core_req_valid (curr_bank_core_req_valid),
.core_req_pmask (curr_bank_core_req_pmask),
.core_req_pmask (curr_bank_core_req_pmask),
.core_req_rw (curr_bank_core_req_rw),
.core_req_byteen (curr_bank_core_req_byteen),
.core_req_addr (curr_bank_core_req_addr),

View file

@ -107,19 +107,24 @@ module VX_shared_mem #(
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire creq_push, creq_pop, creq_empty, creq_full;
wire creq_in_ready;
wire creq_out_valid;
wire crsq_in_fire_last;
wire [NUM_BANKS-1:0] per_bank_rsp_valid = per_bank_core_req_valid & ~per_bank_core_req_rw;
wire [NUM_BANKS-1:0] per_bank_req_reads = per_bank_core_req_valid & ~per_bank_core_req_rw;
wire core_req_has_read = (| per_bank_rsp_valid);
assign creq_push = (| core_req_valid) && ~creq_full;
wire per_bank_req_has_reads = (| per_bank_req_reads);
assign creq_pop = (~creq_empty && ~core_req_has_read)
|| crsq_in_fire_last;
wire creq_in_valid = (| core_req_valid);
wire creq_out_ready = ~per_bank_req_has_reads // is write only
|| crsq_in_fire_last; // is sending last read response
assign per_bank_core_req_ready_unqual = ~creq_full;
assign per_bank_core_req_ready_unqual = creq_in_ready;
wire creq_in_fire = creq_in_valid && creq_in_ready;
wire creq_out_fire = creq_out_valid && creq_out_ready;
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual;
`UNUSED_VAR (per_bank_core_req_addr_unqual)
@ -127,35 +132,33 @@ module VX_shared_mem #(
assign per_bank_core_req_addr_qual[i] = per_bank_core_req_addr_unqual[i][`LINE_SELECT_BITS-1:0];
end
VX_fifo_queue #(
VX_elastic_buffer #(
.DATAW (NUM_BANKS * (1 + 1 + `LINE_SELECT_BITS + WORD_SIZE + `WORD_WIDTH + CORE_TAG_WIDTH + `REQS_BITS)),
.SIZE (CREQ_SIZE),
.BUFFERED (1)
.BUFFERED (1) // output should be registered for the data_store addr port
) core_req_queue (
.clk (clk),
.reset (reset),
.push (creq_push),
.pop (creq_pop),
.data_in ({per_bank_core_req_valid_unqual,
per_bank_core_req_rw_unqual,
per_bank_core_req_addr_qual,
per_bank_core_req_byteen_unqual,
per_bank_core_req_data_unqual,
per_bank_core_req_tag_unqual,
per_bank_core_req_tid_unqual}),
.data_out({per_bank_core_req_valid,
per_bank_core_req_rw,
per_bank_core_req_addr,
per_bank_core_req_byteen,
per_bank_core_req_data,
per_bank_core_req_tag,
per_bank_core_req_tid}),
.empty (creq_empty),
.full (creq_full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
.clk (clk),
.reset (reset),
.ready_in (creq_in_ready),
.valid_in (creq_in_valid),
.data_in ({per_bank_core_req_valid_unqual,
per_bank_core_req_rw_unqual,
per_bank_core_req_addr_qual,
per_bank_core_req_byteen_unqual,
per_bank_core_req_data_unqual,
per_bank_core_req_tag_unqual,
per_bank_core_req_tid_unqual}),
.data_out ({per_bank_core_req_valid,
per_bank_core_req_rw,
per_bank_core_req_addr,
per_bank_core_req_byteen,
per_bank_core_req_data,
per_bank_core_req_tag,
per_bank_core_req_tid}),
.ready_out (creq_out_ready),
.valid_out (creq_out_valid)
);
`UNUSED_VAR (creq_in_fire)
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
@ -163,14 +166,14 @@ module VX_shared_mem #(
wire wren = per_bank_core_req_rw[i]
&& per_bank_core_req_valid[i]
&& creq_pop;
&& creq_out_fire;
VX_sp_ram #(
.DATAW (`WORD_WIDTH),
.SIZE (`LINES_PER_BANK),
.BYTEENW (WORD_SIZE),
.RWCHECK (1)
) data (
) data_store (
.clk (clk),
.addr (per_bank_core_req_addr[i]),
.wren (wren),
@ -187,23 +190,23 @@ module VX_shared_mem #(
wire crsq_in_valid, crsq_in_ready;
reg [NUM_BANKS-1:0] bank_rsp_sel, bank_rsp_sel_r;
reg [NUM_BANKS-1:0] bank_rsp_sel_prv, bank_rsp_sel_cur;
wire [NUM_BANKS-1:0] bank_rsp_sel_n = bank_rsp_sel | bank_rsp_sel_r;
wire [NUM_BANKS-1:0] bank_rsp_sel_n = bank_rsp_sel_prv | bank_rsp_sel_cur;
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
assign crsq_in_fire_last = crsq_in_fire && (bank_rsp_sel_n == per_bank_rsp_valid);
assign crsq_in_fire_last = crsq_in_fire && (bank_rsp_sel_n == per_bank_req_reads);
always @(posedge clk) begin
if (reset) begin
bank_rsp_sel <= 0;
bank_rsp_sel_prv <= 0;
end else begin
if (crsq_in_fire) begin
if (bank_rsp_sel_n == per_bank_rsp_valid) begin
bank_rsp_sel <= 0;
if (bank_rsp_sel_n == per_bank_req_reads) begin
bank_rsp_sel_prv <= 0;
end else begin
bank_rsp_sel <= bank_rsp_sel_n;
bank_rsp_sel_prv <= bank_rsp_sel_n;
end
end
end
@ -217,10 +220,10 @@ module VX_shared_mem #(
core_rsp_valids_in = 0;
core_rsp_data_in = 'x;
core_rsp_tag_in = 'x;
bank_rsp_sel_r = 0;
bank_rsp_sel_cur = 0;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_rsp_valid[i] && ~bank_rsp_sel[i]) begin
if (per_bank_req_reads[i] && ~bank_rsp_sel_prv[i]) begin
core_rsp_tag_in = per_bank_core_req_tag[i];
end
end
@ -230,12 +233,12 @@ module VX_shared_mem #(
&& (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
bank_rsp_sel_r[i] = 1;
bank_rsp_sel_cur[i] = 1;
end
end
end
assign crsq_in_valid = ~creq_empty && core_req_has_read;
assign crsq_in_valid = creq_out_valid && per_bank_req_has_reads;
VX_skid_buffer #(
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH)
@ -297,7 +300,7 @@ module VX_shared_mem #(
if (is_multi_tag_req) begin
$display("%t: *** cache%0d multi-tag request!", $time, CACHE_ID);
end
if (creq_push) begin
if (creq_in_fire) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid_unqual[i]) begin
if (per_bank_core_req_rw_unqual[i]) begin
@ -312,7 +315,7 @@ module VX_shared_mem #(
end
end
end
if (creq_pop) begin
if (creq_out_fire) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid[i]) begin
if (per_bank_core_req_rw[i]) begin