mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
writeback cache fixes
This commit is contained in:
parent
fc50b66819
commit
3fe8f963aa
5 changed files with 92 additions and 76 deletions
60
hw/rtl/cache/VX_bank_flush.sv
vendored
60
hw/rtl/cache/VX_bank_flush.sv
vendored
|
@ -27,32 +27,34 @@ module VX_bank_flush #(
|
|||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire flush_in_valid,
|
||||
output wire flush_in_ready,
|
||||
output wire flush_out_init,
|
||||
output wire flush_out_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_out_line,
|
||||
output wire [NUM_WAYS-1:0] flush_out_way,
|
||||
input wire flush_out_ready,
|
||||
input wire flush_begin,
|
||||
output wire flush_end,
|
||||
output wire flush_init,
|
||||
output wire flush_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_line,
|
||||
output wire [NUM_WAYS-1:0] flush_way,
|
||||
input wire flush_ready,
|
||||
input wire mshr_empty
|
||||
);
|
||||
// ways interation is only needed when eviction is enabled
|
||||
localparam CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
|
||||
|
||||
localparam STATE_IDLE = 2'd0;
|
||||
localparam STATE_INIT = 2'd1;
|
||||
localparam STATE_FLUSH = 2'd2;
|
||||
localparam STATE_DONE = 2'd3;
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_INIT = 1;
|
||||
localparam STATE_WAIT = 2;
|
||||
localparam STATE_FLUSH = 3;
|
||||
localparam STATE_DONE = 4;
|
||||
|
||||
reg [2:0] state_r, state_n;
|
||||
|
||||
reg [CTR_WIDTH-1:0] counter_r;
|
||||
reg [1:0] state_r, state_n;
|
||||
|
||||
always @(*) begin
|
||||
state_n = state_r;
|
||||
case (state_r)
|
||||
STATE_IDLE: begin
|
||||
if (flush_in_valid && mshr_empty) begin
|
||||
state_n = STATE_FLUSH;
|
||||
if (flush_begin) begin
|
||||
state_n = STATE_WAIT;
|
||||
end
|
||||
end
|
||||
STATE_INIT: begin
|
||||
|
@ -60,8 +62,14 @@ module VX_bank_flush #(
|
|||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_WAIT: begin
|
||||
// wait for pending requests to complete
|
||||
if (mshr_empty) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_out_ready) begin
|
||||
if (counter_r == ((2 ** CTR_WIDTH)-1) && flush_ready) begin
|
||||
state_n = STATE_DONE;
|
||||
end
|
||||
end
|
||||
|
@ -79,7 +87,8 @@ module VX_bank_flush #(
|
|||
end else begin
|
||||
state_r <= state_n;
|
||||
if (state_r != STATE_IDLE) begin
|
||||
if ((state_r == STATE_INIT) || flush_out_ready) begin
|
||||
if ((state_r == STATE_INIT)
|
||||
|| ((state_r == STATE_FLUSH) && flush_ready)) begin
|
||||
counter_r <= counter_r + CTR_WIDTH'(1);
|
||||
end
|
||||
end else begin
|
||||
|
@ -88,21 +97,20 @@ module VX_bank_flush #(
|
|||
end
|
||||
end
|
||||
|
||||
assign flush_in_ready = (state_r == STATE_DONE);
|
||||
|
||||
assign flush_out_init = (state_r == STATE_INIT);
|
||||
assign flush_out_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_out_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
assign flush_end = (state_r == STATE_DONE);
|
||||
assign flush_init = (state_r == STATE_INIT);
|
||||
assign flush_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
|
||||
reg [NUM_WAYS-1:0] flush_out_way_r;
|
||||
reg [NUM_WAYS-1:0] flush_way_r;
|
||||
always @(*) begin
|
||||
flush_out_way_r = '0;
|
||||
flush_out_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
||||
flush_way_r = '0;
|
||||
flush_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
||||
end
|
||||
assign flush_out_way = flush_out_way_r;
|
||||
assign flush_way = flush_way_r;
|
||||
end else begin
|
||||
assign flush_out_way = {NUM_WAYS{1'b1}};
|
||||
assign flush_way = {NUM_WAYS{1'b1}};
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
16
hw/rtl/cache/VX_cache.sv
vendored
16
hw/rtl/cache/VX_cache.sv
vendored
|
@ -109,8 +109,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus2_if[NUM_REQS]();
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_ready;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_begin;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_end;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
||||
|
||||
|
@ -127,8 +127,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if (core_bus2_if),
|
||||
.bank_req_fire (per_bank_core_req_fire),
|
||||
.flush_valid (per_bank_flush_valid),
|
||||
.flush_ready (per_bank_flush_ready)
|
||||
.flush_begin (per_bank_flush_begin),
|
||||
.flush_end (per_bank_flush_end)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
@ -324,6 +324,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (CORE_REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.ARBITER ("F"),
|
||||
.OUT_BUF (REQ_XBAR_BUF)
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
|
@ -432,8 +433,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
||||
|
||||
.flush_valid (per_bank_flush_valid[bank_id]),
|
||||
.flush_ready (per_bank_flush_ready[bank_id])
|
||||
.flush_begin (per_bank_flush_begin[bank_id]),
|
||||
.flush_end (per_bank_flush_end[bank_id])
|
||||
);
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
|
@ -457,7 +458,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.NUM_OUTPUTS (NUM_REQS),
|
||||
.DATAW (CORE_RSP_DATAW)
|
||||
.DATAW (CORE_RSP_DATAW),
|
||||
.ARBITER ("F")
|
||||
) rsp_xbar (
|
||||
.clk (clk),
|
||||
.reset (rsp_xbar_reset),
|
||||
|
|
66
hw/rtl/cache/VX_cache_bank.sv
vendored
66
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -108,8 +108,8 @@ module VX_cache_bank #(
|
|||
output wire mem_rsp_ready,
|
||||
|
||||
// flush
|
||||
input wire flush_valid,
|
||||
output wire flush_ready
|
||||
input wire flush_begin,
|
||||
output wire flush_end
|
||||
);
|
||||
|
||||
localparam PIPELINE_STAGES = 2;
|
||||
|
@ -162,11 +162,11 @@ module VX_cache_bank #(
|
|||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
wire mshr_empty;
|
||||
|
||||
wire line_flush_valid;
|
||||
wire line_flush_init;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_flush_sel;
|
||||
wire [NUM_WAYS-1:0] line_flush_way;
|
||||
wire line_flush_ready;
|
||||
wire flush_valid;
|
||||
wire init_valid;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] flush_sel;
|
||||
wire [NUM_WAYS-1:0] flush_way;
|
||||
wire flush_ready;
|
||||
|
||||
// flush unit
|
||||
VX_bank_flush #(
|
||||
|
@ -176,16 +176,16 @@ module VX_cache_bank #(
|
|||
.NUM_WAYS (NUM_WAYS),
|
||||
.WRITEBACK (WRITEBACK)
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.flush_in_valid (flush_valid),
|
||||
.flush_in_ready (flush_ready),
|
||||
.flush_out_init (line_flush_init),
|
||||
.flush_out_valid (line_flush_valid),
|
||||
.flush_out_line (line_flush_sel),
|
||||
.flush_out_way (line_flush_way),
|
||||
.flush_out_ready (line_flush_ready),
|
||||
.mshr_empty (mshr_empty)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.flush_begin (flush_begin),
|
||||
.flush_end (flush_end),
|
||||
.flush_init (init_valid),
|
||||
.flush_valid (flush_valid),
|
||||
.flush_line (flush_sel),
|
||||
.flush_way (flush_way),
|
||||
.flush_ready (flush_ready),
|
||||
.mshr_empty (mshr_empty)
|
||||
);
|
||||
|
||||
wire rdw_hazard1_sel;
|
||||
|
@ -198,16 +198,16 @@ module VX_cache_bank #(
|
|||
// mshr replay has highest priority to maximize utilization since there is no miss.
|
||||
// handle memory responses next to prevent deadlock with potential memory request from a miss.
|
||||
// flush has precedence over core requests to ensure that the cache is in a consistent state.
|
||||
wire replay_grant = ~line_flush_init;
|
||||
wire replay_grant = ~init_valid;
|
||||
wire replay_enable = replay_grant && replay_valid;
|
||||
|
||||
wire fill_grant = ~line_flush_init && ~replay_enable;
|
||||
wire fill_grant = ~init_valid && ~replay_enable;
|
||||
wire fill_enable = fill_grant && mem_rsp_valid;
|
||||
|
||||
wire flush_grant = ~line_flush_init && ~replay_enable && ~fill_enable;
|
||||
wire flush_enable = flush_grant && line_flush_valid;
|
||||
wire flush_grant = ~init_valid && ~replay_enable && ~fill_enable;
|
||||
wire flush_enable = flush_grant && flush_valid;
|
||||
|
||||
wire creq_grant = ~line_flush_init && ~replay_enable && ~fill_enable && ~flush_enable;
|
||||
wire creq_grant = ~init_valid && ~replay_enable && ~fill_enable && ~flush_enable;
|
||||
wire creq_enable = creq_grant && core_req_valid;
|
||||
|
||||
assign replay_ready = replay_grant
|
||||
|
@ -219,23 +219,23 @@ module VX_cache_bank #(
|
|||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign line_flush_ready = flush_grant
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
assign flush_ready = flush_grant
|
||||
&& (!WRITEBACK || ~mreq_queue_alm_full) // needed for evictions
|
||||
&& ~rdw_hazard2_sel
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = line_flush_init;
|
||||
wire init_fire = init_valid;
|
||||
wire replay_fire = replay_valid && replay_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire line_flush_fire = line_flush_valid && line_flush_ready;
|
||||
wire flush_fire = flush_valid && flush_ready;
|
||||
wire core_req_fire = core_req_valid && core_req_ready;
|
||||
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || line_flush_fire || core_req_fire;
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
|
||||
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
|
||||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||
|
@ -243,7 +243,7 @@ module VX_cache_bank #(
|
|||
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
|
||||
assign creq_flush_sel = core_req_valid && core_req_flush;
|
||||
|
||||
assign addr_sel = (line_flush_init | line_flush_valid) ? `CS_LINE_ADDR_WIDTH'(line_flush_sel) :
|
||||
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
|
||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
|
@ -270,7 +270,7 @@ module VX_cache_bank #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_sel, line_flush_init, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, line_flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
);
|
||||
|
||||
|
@ -663,8 +663,8 @@ module VX_cache_bank #(
|
|||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
|
||||
wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || line_flush_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_fire);
|
||||
wire input_stall = (replay_valid || mem_rsp_valid || core_req_valid || flush_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || flush_fire);
|
||||
always @(posedge clk) begin
|
||||
if (input_stall || pipe_stall) begin
|
||||
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1));
|
||||
|
|
2
hw/rtl/cache/VX_cache_data.sv
vendored
2
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -117,7 +117,7 @@ module VX_cache_data #(
|
|||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM read.
|
||||
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [BYTEENW-1:0] line_wren;
|
||||
|
|
24
hw/rtl/cache/VX_cache_flush.sv
vendored
24
hw/rtl/cache/VX_cache_flush.sv
vendored
|
@ -26,13 +26,16 @@ module VX_cache_flush #(
|
|||
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
|
||||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||
input wire [NUM_BANKS-1:0] bank_req_fire,
|
||||
output wire [NUM_BANKS-1:0] flush_valid,
|
||||
input wire [NUM_BANKS-1:0] flush_ready
|
||||
output wire [NUM_BANKS-1:0] flush_begin,
|
||||
input wire [NUM_BANKS-1:0] flush_end
|
||||
);
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_WAIT = 1;
|
||||
localparam STATE_WAIT1 = 1;
|
||||
localparam STATE_FLUSH = 2;
|
||||
localparam STATE_DONE = 3;
|
||||
localparam STATE_WAIT2 = 3;
|
||||
localparam STATE_DONE = 4;
|
||||
|
||||
reg [2:0] state, state_n;
|
||||
|
||||
// track in-flight core requests
|
||||
|
||||
|
@ -76,7 +79,6 @@ module VX_cache_flush #(
|
|||
`UNUSED_VAR (bank_req_fire)
|
||||
end
|
||||
|
||||
reg [1:0] state, state_n;
|
||||
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
|
||||
|
||||
wire [NUM_REQS-1:0] flush_req_mask;
|
||||
|
@ -112,17 +114,21 @@ module VX_cache_flush #(
|
|||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (flush_req_enable) begin
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT : STATE_FLUSH;
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_WAIT: begin
|
||||
STATE_WAIT1: begin
|
||||
if (no_inflight_reqs) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
// generate a flush request pulse
|
||||
state_n = STATE_WAIT2;
|
||||
end
|
||||
STATE_WAIT2: begin
|
||||
// wait for all banks to finish flushing
|
||||
flush_done_n = flush_done | flush_ready;
|
||||
flush_done_n = flush_done | flush_end;
|
||||
if (flush_done_n == {NUM_BANKS{1'b1}}) begin
|
||||
state_n = STATE_DONE;
|
||||
flush_done_n = '0;
|
||||
|
@ -154,6 +160,6 @@ module VX_cache_flush #(
|
|||
end
|
||||
end
|
||||
|
||||
assign flush_valid = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
|
||||
endmodule
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue