writeback cache fixes

This commit is contained in:
Blaise Tine 2024-07-30 00:06:44 -07:00
parent 5600a8dd42
commit 22b0525c51
5 changed files with 55 additions and 45 deletions

View file

@ -537,7 +537,7 @@
// Enable Cache Writeback
`ifndef DCACHE_WRITEBACK
`define DCACHE_WRITEBACK 0
`define DCACHE_WRITEBACK 1
`endif
// LMEM Configurable Knobs ////////////////////////////////////////////////////
@ -601,7 +601,7 @@
// Enable Cache Writeback
`ifndef L2_WRITEBACK
`define L2_WRITEBACK 0
`define L2_WRITEBACK 1
`endif
// L3cache Configurable Knobs /////////////////////////////////////////////////
@ -647,7 +647,7 @@
// Enable Cache Writeback
`ifndef L3_WRITEBACK
`define L3_WRITEBACK 0
`define L3_WRITEBACK 1
`endif
// ISA Extensions /////////////////////////////////////////////////////////////

View file

@ -140,6 +140,7 @@ module VX_cache_bank #(
wire [NUM_WAYS-1:0] flush_way_st0;
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1;
wire rw_sel, rw_st0, rw_st1;
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
@ -291,6 +292,8 @@ module VX_cache_bank #(
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
wire [NUM_WAYS-1:0] evict_way_st0;
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
@ -338,7 +341,7 @@ module VX_cache_bank #(
assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, addr_st0[`CS_LINE_SEL_BITS-1:0]} : addr_st0;
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
@ -381,16 +384,20 @@ module VX_cache_bank #(
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0];
`UNUSED_VAR (do_write_miss_st1)
// ensure mshr replay always get a hit
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("missed mshr replay"));
// detect BRAM's read-during-write hazard
// both tag and data stores use BRAM with no read-during-write protection.
// we ned to stall the pipeline to prevent read-after-write hazards.
assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
always @(posedge clk) begin // stall reads following writes to same address
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (addr_st0 == addr_st1)
always @(posedge clk) begin
// stall reads following writes to same line address
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1)
&& ~rdw_hazard3_st1; // release pipeline stall
end
@ -588,20 +595,21 @@ module VX_cache_bank #(
wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
`UNUSED_VAR (do_writeback_st1)
if (WRITEBACK) begin
if (DIRTY_BYTES) begin
// ensure dirty bytes are valid
// ensure dirty bytes match the tag info
wire has_dirty_bytes = (| dirty_byteen_st1);
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes"));
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)));
end
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|| do_writeback_st1);
|| do_writeback_st1)
&& ~rdw_hazard3_st1;
end else begin
`UNUSED_VAR (evict_dirty_st1)
`UNUSED_VAR (do_writeback_st1)
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|| do_creq_wr_st1);
|| do_creq_wr_st1)
&& ~rdw_hazard3_st1;
end
assign mreq_queue_pop = mem_req_valid && mem_req_ready;

View file

@ -73,16 +73,17 @@ module VX_cache_data #(
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
if (WRITEBACK) begin
if (DIRTY_BYTES) begin
wire [`CLOG2(`CS_LINES_PER_BANK * NUM_WAYS)-1:0] way_addr;
if (NUM_WAYS > 1) begin
assign way_addr = {line_sel, way_idx};
end else begin
assign way_addr = line_sel;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
for (genvar i = 0; i < NUM_WAYS; ++i) begin
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
end
VX_sp_ram #(
@ -93,21 +94,23 @@ module VX_cache_data #(
.read (write || fill || flush),
.write (init || write || fill || flush),
`UNUSED_PIN (wren),
.addr (way_addr),
.wdata (write ? (dirty_byteen | write_byteen) : ((init || fill || flush) ? '0 : dirty_byteen)),
.rdata (dirty_byteen)
.addr (line_sel),
.wdata (bs_wdata),
.rdata (bs_rdata)
);
assign dirty_byteen = bs_rdata[way_idx];
end else begin
assign dirty_byteen = {LINE_SIZE{1'b1}};
end
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] dirty_data_w;
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
for (genvar j = 0; j < NUM_WAYS; ++j) begin
assign dirty_data_w[j][i] = rdata[i][j];
assign flipped_rdata[j][i] = line_rdata[i][j];
end
end
assign dirty_data = dirty_data_w[way_idx];
assign dirty_data = flipped_rdata[way_idx];
end else begin
assign dirty_byteen = '0;
assign dirty_data = '0;
@ -116,28 +119,25 @@ module VX_cache_data #(
// order the data layout to perform ways multiplexing last.
// this allows converting way index to binary in parallel with BRAM read.
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
wire [BYTEENW-1:0] wren;
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [BYTEENW-1:0] line_wren;
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
assign wdata[i] = (fill || !WRITE_ENABLE) ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{write_data[i]}};
end
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
for (genvar j = 0; j < NUM_WAYS; ++j) begin
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
end
end
assign wren = wren_w;
assign line_wren = wren_w;
end else begin
`UNUSED_VAR (write)
`UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_data)
assign wdata = fill_data;
assign wren = fill;
assign line_wdata = fill_data;
assign line_wren = fill;
end
VX_onehot_encoder #(
@ -151,6 +151,8 @@ module VX_cache_data #(
wire line_read = (read && ~stall)
|| (WRITEBACK && (fill || flush));
wire line_write = write || fill;
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK),
@ -160,19 +162,19 @@ module VX_cache_data #(
) data_store (
.clk (clk),
.read (line_read),
.write (write || fill),
.wren (wren),
.write (line_write),
.wren (line_wren),
.addr (line_sel),
.wdata (wdata),
.rdata (rdata)
.wdata (line_wdata),
.rdata (line_rdata)
);
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
if (`CS_WORDS_PER_LINE > 1) begin
assign per_way_rdata = rdata[wsel];
assign per_way_rdata = line_rdata[wsel];
end else begin
`UNUSED_VAR (wsel)
assign per_way_rdata = rdata;
assign per_way_rdata = line_rdata;
end
assign read_data = per_way_rdata[way_idx];

View file

@ -143,7 +143,7 @@ module VX_cache_tags #(
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
end
assign evict_dirty = (| read_dirty);
assign evict_dirty = | (read_dirty & evict_way);
`ifdef DBG_TRACE_CACHE
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
@ -162,12 +162,12 @@ module VX_cache_tags #(
if (write)
`TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
else
`TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
`TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
end else begin
if (write)
`TRACE(3, ("%d: %s read-miss: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
`TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
else
`TRACE(3, ("%d: %s read-miss: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
`TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
end
end
end

View file

@ -314,7 +314,7 @@ module VX_dp_ram #(
end else begin
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
if (RW_ASSERT) begin
`RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("read after write mismatch"));
`RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("read after write hazard"));
end
end
end