fixed shared memory multi-tag requests bug

This commit is contained in:
Blaise Tine 2021-05-26 15:03:48 -07:00
parent d8517d4d08
commit 4c5104e96a

View file

@ -4,25 +4,25 @@ module VX_shared_mem #(
parameter CACHE_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
parameter CACHE_SIZE = (1024*16),
// Number of banks
parameter NUM_BANKS = 4,
parameter NUM_BANKS = 2,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of Word requests per cycle
parameter NUM_REQS = NUM_BANKS,
parameter NUM_REQS = 4,
// Core Request Queue Size
parameter CREQ_SIZE = 4,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
parameter CREQ_SIZE = 8,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
parameter CORE_TAG_ID_BITS = 8,
// core request tag size
parameter CORE_TAG_WIDTH = (2 + CORE_TAG_ID_BITS),
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0
parameter BANK_ADDR_OFFSET = `CLOG2(256)
) (
input wire clk,
input wire reset,
@ -54,13 +54,6 @@ module VX_shared_mem #(
localparam CACHE_LINE_SIZE = WORD_SIZE;
`ifdef DBG_CACHE_REQ_INFO
/* verilator lint_off UNUSED */
wire [31:0] debug_pc_st0;
wire [`NW_BITS-1:0] debug_wid_st0;
/* verilator lint_on UNUSED */
`endif
wire [NUM_BANKS-1:0] per_bank_core_req_valid_unqual;
wire [NUM_BANKS-1:0] per_bank_core_req_rw_unqual;
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_unqual;
@ -109,20 +102,26 @@ module VX_shared_mem #(
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire creq_push, creq_pop, creq_empty, creq_full;
wire crsq_in_ready;
wire crsq_in_fire_last;
wire [NUM_BANKS-1:0] per_bank_rsp_valid = per_bank_core_req_valid & ~per_bank_core_req_rw;
wire core_req_has_read = (| per_bank_rsp_valid);
assign creq_push = (| core_req_valid) && !creq_full;
assign creq_pop = ~creq_empty && crsq_in_ready;
assign creq_push = (| core_req_valid) && ~creq_full;
assign creq_pop = (~creq_empty && ~core_req_has_read)
|| crsq_in_fire_last;
assign per_bank_core_req_ready_unqual = ~creq_full;
wire [NUM_REQS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual;
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual;
`UNUSED_VAR (per_bank_core_req_addr_unqual)
for (genvar i = 0; i < NUM_REQS; i++) begin
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_core_req_addr_qual[i] = per_bank_core_req_addr_unqual[i][`LINE_SELECT_BITS-1:0];
end
@ -179,6 +178,34 @@ module VX_shared_mem #(
.dout (per_bank_core_rsp_data[i])
);
end
// The core response bus handles a single tag at the time
// We first need to select the current tag to process,
// then send all bank responses for that tag as a batch
wire crsq_in_valid, crsq_in_ready;
reg [NUM_BANKS-1:0] bank_rsp_sel, bank_rsp_sel_r;
wire [NUM_BANKS-1:0] bank_rsp_sel_n = bank_rsp_sel | bank_rsp_sel_r;
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
assign crsq_in_fire_last = crsq_in_fire && (bank_rsp_sel_n == per_bank_rsp_valid);
always @(posedge clk) begin
if (reset) begin
bank_rsp_sel <= 0;
end else begin
if (crsq_in_fire) begin
if (bank_rsp_sel_n == per_bank_rsp_valid) begin
bank_rsp_sel <= 0;
end else begin
bank_rsp_sel <= bank_rsp_sel_n;
end
end
end
end
reg [NUM_REQS-1:0] core_rsp_valids_in;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
@ -186,31 +213,30 @@ module VX_shared_mem #(
always @(*) begin
core_rsp_valids_in = 0;
core_rsp_data_in = 'x;
core_rsp_data_in = 'x;
core_rsp_tag_in = 'x;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_req_valid[i]) begin
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
bank_rsp_sel_r = 0;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_rsp_valid[i] && ~bank_rsp_sel[i]) begin
core_rsp_tag_in = per_bank_core_req_tag[i];
end
end
end
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st0, debug_wid_st0} = core_rsp_tag_in[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
end else begin
assign {debug_pc_st0, debug_wid_st0} = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_req_valid[i]
&& (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
bank_rsp_sel_r[i] = 1;
end
end
end
`endif
wire [NUM_REQS-1:0] core_rsp_valids_out;
wire core_rsp_valid_out;
wire core_rsp_rw = | (per_bank_core_req_valid & per_bank_core_req_rw);
wire crsq_in_valid = ~creq_empty && ~core_rsp_rw;
assign crsq_in_valid = ~creq_empty && core_req_has_read;
VX_skid_buffer #(
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH)
@ -227,25 +253,82 @@ module VX_shared_mem #(
assign core_rsp_valid = core_rsp_valids_out & {NUM_REQS{core_rsp_valid_out}};
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
wire [NUM_BANKS-1:0][31:0] debug_pc_st0, debug_pc_st1;
wire [NUM_BANKS-1:0][`NW_BITS-1:0] debug_wid_st0, debug_wid_st1;
`IGNORE_WARNINGS_END
for (genvar i = 0; i < NUM_BANKS; ++i) begin
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st0[i], debug_wid_st0[i]} = per_bank_core_req_tag_unqual[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
assign {debug_pc_st1[i], debug_wid_st1[i]} = per_bank_core_req_tag[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
end else begin
assign {debug_pc_st0[i], debug_wid_st0[i]} = 0;
assign {debug_pc_st1[i], debug_wid_st1[i]} = 0;
end
end
`endif
`ifdef DBG_PRINT_CACHE_BANK
reg is_multi_tag_req;
`IGNORE_WARNINGS_BEGIN
reg [CORE_TAG_WIDTH-1:0] core_req_tag_sel;
`IGNORE_WARNINGS_END
always @(*) begin
core_req_tag_sel ='x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_req_valid[i]) begin
core_req_tag_sel = per_bank_core_req_tag[i];
end
end
is_multi_tag_req = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid[i]
&& (core_req_tag_sel[CORE_TAG_ID_BITS-1:0] != per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
is_multi_tag_req = !creq_empty;
end
end
end
always @(posedge clk) begin
if (!crsq_in_ready) begin
$display("%t: cache%0d pipeline-stall", $time, CACHE_ID);
$display("%t: *** cache%0d pipeline-stall", $time, CACHE_ID);
end
if (is_multi_tag_req) begin
$display("%t: *** cache%0d multi-tag request!", $time, CACHE_ID);
end
if (creq_push) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid_unqual[i]) begin
if (per_bank_core_req_rw_unqual[i]) begin
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i],
debug_wid_st0[i], debug_pc_st0[i]);
end else begin
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i],
debug_wid_st0[i], debug_pc_st0[i]);
end
end
end
end
if (creq_pop) begin
if (core_rsp_rw) begin
$write("%t: cache%0d core-wr-req: tmask=%0b, addr=", $time, CACHE_ID, per_bank_core_req_valid);
end else begin
$write("%t: cache%0d core-rd-req: tmask=%0b, addr=", $time, CACHE_ID, per_bank_core_req_valid);
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid[i]) begin
if (per_bank_core_req_rw[i]) begin
$display("%t: cache%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_req_data[i],
debug_wid_st1[i], debug_pc_st1[i]);
end else begin
$display("%t: cache%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i],
debug_wid_st1[i], debug_pc_st1[i]);
end
end
end
`PRINT_ARRAY1D(per_bank_core_req_addr, `NUM_THREADS);
$write(", tag=%0h, byteen=%b, data=", per_bank_core_req_tag, per_bank_core_req_byteen);
if (core_rsp_rw) begin
`PRINT_ARRAY1D(per_bank_core_req_data, `NUM_THREADS);
end else begin
`PRINT_ARRAY1D(per_bank_core_rsp_data, `NUM_THREADS);
end
$write(", wid=%0d, PC=%0h\n", debug_wid_st0, debug_pc_st0);
end
end
`endif