writeback cache fixes

This commit is contained in:
Blaise Tine 2024-07-29 13:32:35 -07:00
parent e34f824bf9
commit 0709d656ca
7 changed files with 53 additions and 39 deletions

View file

@ -184,10 +184,11 @@ module VX_cache_bank #(
.mshr_empty (mshr_empty)
);
wire rdw_hazard_st0;
reg rdw_hazard_st1;
wire rdw_hazard1_sel;
wire rdw_hazard2_sel;
reg rdw_hazard3_st1;
wire pipe_stall = crsp_queue_stall || rdw_hazard_st1;
wire pipe_stall = crsp_queue_stall || rdw_hazard3_st1;
// inputs arbitration:
// mshr replay has highest priority to maximize utilization since there is no miss.
@ -206,14 +207,16 @@ module VX_cache_bank #(
wire creq_enable = creq_grant && core_req_valid;
assign replay_ready = replay_grant
&& ~rdw_hazard_st0
&& ~rdw_hazard1_sel
&& ~pipe_stall;
assign mem_rsp_ready = fill_grant
&& ~rdw_hazard2_sel
&& ~pipe_stall;
assign line_flush_ready = flush_grant
&& ~mreq_queue_alm_full
&& ~rdw_hazard2_sel
&& ~pipe_stall;
assign core_req_ready = creq_grant
@ -376,15 +379,14 @@ module VX_cache_bank #(
`UNUSED_VAR (do_write_miss_st1)
// ensure mshr replay always get a hit
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: missed mshr replay"));
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("missed mshr replay"));
// detect BRAM's read-during-write hazard
assign rdw_hazard_st0 = do_fill_st0; // stall cycle after a fill
wire rdw_case1 = do_cache_rd_st0 && do_cache_wr_st1 && (addr_st0 == addr_st1); // standard cache access
wire rdw_case2 = WRITEBACK && (do_flush_st0 || do_fill_st0) && do_cache_wr_st1; // a writeback can evict preceeding write
always @(posedge clk) begin // after a write to same address
rdw_hazard_st1 <= (rdw_case1 || rdw_case2)
&& ~rdw_hazard_st1; // invalidate if pipeline stalled to avoid repeats
assign rdw_hazard1_sel = do_fill_st0; // stall first replay following a fill
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
always @(posedge clk) begin // stall reads following writes to same address
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (addr_st0 == addr_st1)
&& ~rdw_hazard3_st1; // release pipeline stall
end
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
@ -427,8 +429,8 @@ module VX_cache_bank #(
.stall (pipe_stall),
.read (do_cache_rd_st1),
.fill (do_fill_st1 && ~rdw_hazard_st1),
.flush (do_flush_st1 && ~rdw_hazard_st1),
.fill (do_fill_st1),
.flush (do_flush_st1),
.write (do_cache_wr_st1),
.way_sel (way_sel_st1),
.line_addr (addr_st1),
@ -556,7 +558,7 @@ module VX_cache_bank #(
) core_rsp_queue (
.clk (clk),
.reset (crsp_queue_reset),
.valid_in (crsp_queue_valid && ~rdw_hazard_st1),
.valid_in (crsp_queue_valid && ~rdw_hazard3_st1),
.ready_in (crsp_queue_ready),
.data_in ({crsp_queue_tag, crsp_queue_data, crsp_queue_idx}),
.data_out ({core_rsp_tag, core_rsp_data, core_rsp_idx}),
@ -582,13 +584,11 @@ module VX_cache_bank #(
if (WRITEBACK) begin
assign mreq_queue_push = (((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|| do_writeback_st1)
&& ~rdw_hazard_st1;
|| do_writeback_st1);
end else begin
`UNUSED_VAR (evict_dirty_st1)
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|| do_creq_wr_st1)
&& ~rdw_hazard_st1;
|| do_creq_wr_st1);
end
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
@ -636,7 +636,7 @@ module VX_cache_bank #(
&& ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_fire);
always @(posedge clk) begin
if (input_stall || pipe_stall) begin
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw_st0=%b, rdw_st1=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard_st0, rdw_hazard_st1));
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1));
end
if (mem_rsp_fire) begin
`TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));

View file

@ -82,10 +82,12 @@ module VX_cache_data #(
VX_sp_ram #(
.DATAW (LINE_SIZE * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK)
.SIZE (`CS_LINES_PER_BANK),
.NO_RWCHECK (1),
.RW_ASSERT (1)
) byteen_store (
.clk (clk),
.read (1'b1),
.read (write || fill || flush),
.write (write || fill || flush),
`UNUSED_PIN (wren),
.addr (way_addr),
@ -140,14 +142,18 @@ module VX_cache_data #(
`UNUSED_PIN (valid_out)
);
wire line_read = (read && ~stall)
|| (WRITEBACK && (fill || flush));
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (BYTEENW),
.NO_RWCHECK (1)
.NO_RWCHECK (1),
.RW_ASSERT (1)
) data_store (
.clk (clk),
.read (1'b1),
.read (line_read),
.write (write || fill),
.wren (wren),
.addr (line_sel),

View file

@ -97,12 +97,17 @@ module VX_cache_tags #(
assign evict_tag = read_tag;
end
// fill and flush need to also read in writeback mode
wire fill_s = fill && (!WRITEBACK || ~stall);
wire flush_s = flush && (!WRITEBACK || ~stall);
for (genvar i = 0; i < NUM_WAYS; ++i) begin
wire do_fill = fill && evict_way[i];
wire do_flush = flush && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
wire do_fill = fill_s && evict_way[i];
wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
wire do_write = WRITEBACK && write && tag_matches[i];
wire line_read = (lookup && ~stall) || (WRITEBACK && (fill_s || flush_s));
wire line_write = init || do_fill || do_flush || do_write;
wire line_valid = ~(init || flush);
@ -121,10 +126,11 @@ module VX_cache_tags #(
VX_sp_ram #(
.DATAW (TAG_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.NO_RWCHECK (1)
.NO_RWCHECK (1),
.RW_ASSERT (1)
) tag_store (
.clk (clk),
.read (1'b1),
.read (line_read),
.write (line_write),
`UNUSED_PIN (wren),
.addr (line_sel),
@ -139,20 +145,16 @@ module VX_cache_tags #(
assign evict_dirty = (| read_dirty);
// ensure fills and flushes do not stall
`RUNTIME_ASSERT (~fill || ~stall, ("runtime error: stalled fill"));
`RUNTIME_ASSERT (~flush || ~stall, ("runtime error: stalled fill"));
`ifdef DBG_TRACE_CACHE
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
always @(posedge clk) begin
if (fill) begin
if (fill && ~stall) begin
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)));
end
if (init) begin
`TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
end
if (flush) begin
if (flush && ~stall) begin
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty));
end
if (lookup && ~stall) begin

View file

@ -108,7 +108,7 @@ module VX_alu_int #(
2'b00: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; // AND
2'b01: msc_result[i] = alu_in1[i] | alu_in2_imm[i]; // OR
2'b10: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i]; // XOR
2'b11: msc_result[i] = alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS-1:0]; // SLL
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS-1:0]; // SLL
endcase
end
assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0])); // SLLW
@ -126,7 +126,7 @@ module VX_alu_int #(
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
3'b110: alu_result[i] = shr_result_w[i]; // SRLW, SRAW, SRLIW, SRAIW
3'b111: alu_result[i] = msc_result_w[i]; // SLLW
default: alu_result[i] = msc_result_w[i]; // SLLW
endcase
end
end

View file

@ -99,7 +99,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
3'h4: r_type = `INST_ALU_XOR;
3'h5: r_type = func7[5] ? `INST_ALU_SRA : `INST_ALU_SRL;
3'h6: r_type = `INST_ALU_OR;
3'h7: r_type = `INST_ALU_AND;
default: r_type = `INST_ALU_AND;
endcase
end
@ -111,8 +111,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
3'h4: b_type = `INST_BR_LT;
3'h5: b_type = `INST_BR_GE;
3'h6: b_type = `INST_BR_LTU;
3'h7: b_type = `INST_BR_GEU;
default: b_type = 'x;
default: b_type = `INST_BR_GEU;
endcase
end
@ -139,7 +138,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
3'h4: m_type = `INST_M_DIV;
3'h5: m_type = `INST_M_DIVU;
3'h6: m_type = `INST_M_REM;
3'h7: m_type = `INST_M_REMU;
default: m_type = `INST_M_REMU;
endcase
end
`endif

View file

@ -22,6 +22,7 @@ module VX_dp_ram #(
parameter OUT_REG = 0,
parameter NO_RWCHECK = 0,
parameter LUTRAM = 0,
parameter RW_ASSERT = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0,
@ -50,6 +51,7 @@ module VX_dp_ram #(
end \
end
`UNUSED_PARAM (RW_ASSERT)
`UNUSED_VAR (read)
`ifdef SYNTHESIS
@ -307,6 +309,9 @@ module VX_dp_ram #(
assign rdata = ram[raddr];
end else begin
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
if (RW_ASSERT) begin
`RUNTIME_ASSERT (~read || (rdata == ram[raddr]), ("read after write mismatch"));
end
end
end
`endif

View file

@ -21,6 +21,7 @@ module VX_sp_ram #(
parameter WRENW = 1,
parameter OUT_REG = 0,
parameter NO_RWCHECK = 0,
parameter RW_ASSERT = 0,
parameter LUTRAM = 0,
parameter INIT_ENABLE = 0,
parameter INIT_FILE = "",
@ -42,6 +43,7 @@ module VX_sp_ram #(
.WRENW (WRENW),
.OUT_REG (OUT_REG),
.NO_RWCHECK (NO_RWCHECK),
.RW_ASSERT (RW_ASSERT),
.LUTRAM (LUTRAM),
.INIT_ENABLE (INIT_ENABLE),
.INIT_FILE (INIT_FILE),