mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
writeback cache fixes
This commit is contained in:
parent
5600a8dd42
commit
22b0525c51
5 changed files with 55 additions and 45 deletions
|
@ -537,7 +537,7 @@
|
|||
|
||||
// Enable Cache Writeback
|
||||
`ifndef DCACHE_WRITEBACK
|
||||
`define DCACHE_WRITEBACK 0
|
||||
`define DCACHE_WRITEBACK 1
|
||||
`endif
|
||||
|
||||
// LMEM Configurable Knobs ////////////////////////////////////////////////////
|
||||
|
@ -601,7 +601,7 @@
|
|||
|
||||
// Enable Cache Writeback
|
||||
`ifndef L2_WRITEBACK
|
||||
`define L2_WRITEBACK 0
|
||||
`define L2_WRITEBACK 1
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
@ -647,7 +647,7 @@
|
|||
|
||||
// Enable Cache Writeback
|
||||
`ifndef L3_WRITEBACK
|
||||
`define L3_WRITEBACK 0
|
||||
`define L3_WRITEBACK 1
|
||||
`endif
|
||||
|
||||
// ISA Extensions /////////////////////////////////////////////////////////////
|
||||
|
|
28
hw/rtl/cache/VX_cache_bank.sv
vendored
28
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -140,6 +140,7 @@ module VX_cache_bank #(
|
|||
wire [NUM_WAYS-1:0] flush_way_st0;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1;
|
||||
wire rw_sel, rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
|
||||
|
@ -291,6 +292,8 @@ module VX_cache_bank #(
|
|||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
|
||||
assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
wire [NUM_WAYS-1:0] evict_way_st0;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
|
||||
|
||||
|
@ -338,7 +341,7 @@ module VX_cache_bank #(
|
|||
|
||||
assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
|
||||
|
||||
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, addr_st0[`CS_LINE_SEL_BITS-1:0]} : addr_st0;
|
||||
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
|
||||
|
@ -381,16 +384,20 @@ module VX_cache_bank #(
|
|||
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
|
||||
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
|
||||
|
||||
assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
`UNUSED_VAR (do_write_miss_st1)
|
||||
|
||||
// ensure mshr replay always get a hit
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("missed mshr replay"));
|
||||
|
||||
// detect BRAM's read-during-write hazard
|
||||
// both tag and data stores use BRAM with no read-during-write protection.
|
||||
// we ned to stall the pipeline to prevent read-after-write hazards.
|
||||
assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill
|
||||
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
|
||||
always @(posedge clk) begin // stall reads following writes to same address
|
||||
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (addr_st0 == addr_st1)
|
||||
always @(posedge clk) begin
|
||||
// stall reads following writes to same line address
|
||||
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1)
|
||||
&& ~rdw_hazard3_st1; // release pipeline stall
|
||||
end
|
||||
|
||||
|
@ -588,20 +595,21 @@ module VX_cache_bank #(
|
|||
wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
|
||||
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
|
||||
wire do_writeback_st1 = do_fill_or_flush_st1 && evict_dirty_st1;
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
|
||||
if (WRITEBACK) begin
|
||||
if (DIRTY_BYTES) begin
|
||||
// ensure dirty bytes are valid
|
||||
// ensure dirty bytes match the tag info
|
||||
wire has_dirty_bytes = (| dirty_byteen_st1);
|
||||
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes"));
|
||||
`RUNTIME_ASSERT (~do_fill_or_flush_st1 || (evict_dirty_st1 == has_dirty_bytes), ("missmatch dirty bytes: dirty_line=%b, dirty_bytes=%b, addr=0x%0h", evict_dirty_st1, has_dirty_bytes, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID)));
|
||||
end
|
||||
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|
||||
|| do_writeback_st1);
|
||||
|| do_writeback_st1)
|
||||
&& ~rdw_hazard3_st1;
|
||||
end else begin
|
||||
`UNUSED_VAR (evict_dirty_st1)
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1);
|
||||
|| do_creq_wr_st1)
|
||||
&& ~rdw_hazard3_st1;
|
||||
end
|
||||
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
|
|
56
hw/rtl/cache/VX_cache_data.sv
vendored
56
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -73,16 +73,17 @@ module VX_cache_data #(
|
|||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
||||
|
||||
if (WRITEBACK) begin
|
||||
if (DIRTY_BYTES) begin
|
||||
wire [`CLOG2(`CS_LINES_PER_BANK * NUM_WAYS)-1:0] way_addr;
|
||||
if (NUM_WAYS > 1) begin
|
||||
assign way_addr = {line_sel, way_idx};
|
||||
end else begin
|
||||
assign way_addr = line_sel;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
|
||||
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
|
||||
end
|
||||
|
||||
VX_sp_ram #(
|
||||
|
@ -93,21 +94,23 @@ module VX_cache_data #(
|
|||
.read (write || fill || flush),
|
||||
.write (init || write || fill || flush),
|
||||
`UNUSED_PIN (wren),
|
||||
.addr (way_addr),
|
||||
.wdata (write ? (dirty_byteen | write_byteen) : ((init || fill || flush) ? '0 : dirty_byteen)),
|
||||
.rdata (dirty_byteen)
|
||||
.addr (line_sel),
|
||||
.wdata (bs_wdata),
|
||||
.rdata (bs_rdata)
|
||||
);
|
||||
|
||||
assign dirty_byteen = bs_rdata[way_idx];
|
||||
end else begin
|
||||
assign dirty_byteen = {LINE_SIZE{1'b1}};
|
||||
end
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] dirty_data_w;
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] flipped_rdata;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign dirty_data_w[j][i] = rdata[i][j];
|
||||
assign flipped_rdata[j][i] = line_rdata[i][j];
|
||||
end
|
||||
end
|
||||
assign dirty_data = dirty_data_w[way_idx];
|
||||
assign dirty_data = flipped_rdata[way_idx];
|
||||
end else begin
|
||||
assign dirty_byteen = '0;
|
||||
assign dirty_data = '0;
|
||||
|
@ -116,28 +119,25 @@ module VX_cache_data #(
|
|||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM read.
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
|
||||
wire [BYTEENW-1:0] wren;
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [BYTEENW-1:0] line_wren;
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
assign wdata[i] = (fill || !WRITE_ENABLE) ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{write_data[i]}};
|
||||
end
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
|
||||
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
|
||||
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
|
||||
end
|
||||
end
|
||||
assign wren = wren_w;
|
||||
assign line_wren = wren_w;
|
||||
end else begin
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign wdata = fill_data;
|
||||
assign wren = fill;
|
||||
assign line_wdata = fill_data;
|
||||
assign line_wren = fill;
|
||||
end
|
||||
|
||||
VX_onehot_encoder #(
|
||||
|
@ -151,6 +151,8 @@ module VX_cache_data #(
|
|||
wire line_read = (read && ~stall)
|
||||
|| (WRITEBACK && (fill || flush));
|
||||
|
||||
wire line_write = write || fill;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
|
@ -160,19 +162,19 @@ module VX_cache_data #(
|
|||
) data_store (
|
||||
.clk (clk),
|
||||
.read (line_read),
|
||||
.write (write || fill),
|
||||
.wren (wren),
|
||||
.write (line_write),
|
||||
.wren (line_wren),
|
||||
.addr (line_sel),
|
||||
.wdata (wdata),
|
||||
.rdata (rdata)
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
assign per_way_rdata = rdata[wsel];
|
||||
assign per_way_rdata = line_rdata[wsel];
|
||||
end else begin
|
||||
`UNUSED_VAR (wsel)
|
||||
assign per_way_rdata = rdata;
|
||||
assign per_way_rdata = line_rdata;
|
||||
end
|
||||
assign read_data = per_way_rdata[way_idx];
|
||||
|
||||
|
|
8
hw/rtl/cache/VX_cache_tags.sv
vendored
8
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -143,7 +143,7 @@ module VX_cache_tags #(
|
|||
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
|
||||
end
|
||||
|
||||
assign evict_dirty = (| read_dirty);
|
||||
assign evict_dirty = | (read_dirty & evict_way);
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
|
||||
|
@ -162,12 +162,12 @@ module VX_cache_tags #(
|
|||
if (write)
|
||||
`TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
else
|
||||
`TRACE(3, ("%d: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
`TRACE(3, ("%d: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
end else begin
|
||||
if (write)
|
||||
`TRACE(3, ("%d: %s read-miss: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
`TRACE(3, ("%d: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
else
|
||||
`TRACE(3, ("%d: %s read-miss: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
`TRACE(3, ("%d: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -314,7 +314,7 @@ module VX_dp_ram #(
|
|||
end else begin
|
||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
if (RW_ASSERT) begin
|
||||
`RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("read after write mismatch"));
|
||||
`RUNTIME_ASSERT(~read || (rdata == ram[raddr]), ("read after write hazard"));
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue