mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
fixed cache mshr critical path
This commit is contained in:
parent
9a077b97f3
commit
762b8e2e3e
8 changed files with 210 additions and 1103 deletions
|
@ -11,7 +11,7 @@
|
|||
#define RESET_DELAY 2
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 24
|
||||
#define DRAM_LATENCY 300
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
||||
|
|
|
@ -37,6 +37,8 @@ module vortex_afu #(
|
|||
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
|
||||
);
|
||||
|
||||
localparam RESET_DELAY = 2;
|
||||
|
||||
localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr);
|
||||
localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data);
|
||||
localparam DRAM_LINE_LW = $clog2(DRAM_LINE_WIDTH);
|
||||
|
@ -324,6 +326,15 @@ wire cmd_write_done;
|
|||
wire cmd_csr_done;
|
||||
wire cmd_run_done;
|
||||
|
||||
reg [$clog2(RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||
always @(posedge clk) begin
|
||||
if (state == STATE_IDLE) begin
|
||||
vx_reset_ctr <= 0;
|
||||
end else if (state == STATE_START) begin
|
||||
vx_reset_ctr <= vx_reset_ctr + 1;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
|
@ -392,8 +403,10 @@ always @(posedge clk) begin
|
|||
end
|
||||
end
|
||||
|
||||
STATE_START: begin // vortex reset cycle
|
||||
state <= STATE_RUN;
|
||||
STATE_START: begin
|
||||
// vortex reset cycles
|
||||
if (vx_reset_ctr == $bits(vx_reset_ctr)'(RESET_DELAY))
|
||||
state <= STATE_RUN;
|
||||
end
|
||||
|
||||
STATE_RUN: begin
|
||||
|
|
File diff suppressed because it is too large
Load diff
108
hw/rtl/cache/VX_bank.v
vendored
108
hw/rtl/cache/VX_bank.v
vendored
|
@ -99,8 +99,8 @@ module VX_bank #(
|
|||
|
||||
wire drsq_pop;
|
||||
wire drsq_empty;
|
||||
|
||||
wire [`CACHE_LINE_WIDTH-1:0] drsq_filldata_st0;
|
||||
|
||||
wire [`CACHE_LINE_WIDTH-1:0] drsq_filldata;
|
||||
|
||||
wire drsq_push = dram_rsp_valid && dram_rsp_ready;
|
||||
|
||||
|
@ -119,7 +119,7 @@ module VX_bank #(
|
|||
.push (drsq_push),
|
||||
.pop (drsq_pop),
|
||||
.data_in (dram_rsp_data),
|
||||
.data_out(drsq_filldata_st0),
|
||||
.data_out(drsq_filldata),
|
||||
.empty (drsq_empty),
|
||||
.full (drsq_full),
|
||||
`UNUSED_PIN (size)
|
||||
|
@ -127,9 +127,9 @@ module VX_bank #(
|
|||
end else begin
|
||||
`UNUSED_VAR (dram_rsp_valid)
|
||||
`UNUSED_VAR (dram_rsp_data)
|
||||
assign drsq_empty = 1;
|
||||
assign drsq_filldata_st0 = 0;
|
||||
assign dram_rsp_ready = 0;
|
||||
assign drsq_empty = 1;
|
||||
assign drsq_filldata = 0;
|
||||
assign dram_rsp_ready = 0;
|
||||
end
|
||||
|
||||
wire creq_pop;
|
||||
|
@ -194,10 +194,9 @@ module VX_bank #(
|
|||
wire is_mshr_st0, is_mshr_st1;
|
||||
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
|
||||
wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1;
|
||||
wire [`WORD_WIDTH-1:0] readword_st0, readword_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] readdata_st0, readdata_st1;
|
||||
wire [`WORD_WIDTH-1:0] writeword_st0, writeword_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] writedata_st0, writedata_st1;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] filldata_st0, filldata_st1;
|
||||
wire [`TAG_SELECT_BITS-1:0] readtag_st0, readtag_st1;
|
||||
wire miss_st0, miss_st1;
|
||||
wire force_miss_st0, force_miss_st1;
|
||||
|
@ -259,14 +258,14 @@ module VX_bank #(
|
|||
assign is_mshr_st0 = mshr_pop_unqual;
|
||||
assign is_fill_st0 = drsq_pop_unqual;
|
||||
|
||||
assign valid_st0 = drsq_pop || mshr_pop || creq_pop;
|
||||
assign valid_st0 = mshr_pop || drsq_pop || creq_pop;
|
||||
assign addr_st0 = creq_pop_unqual ? creq_addr_st0 : mshr_addr_st0;
|
||||
assign tag_st0 = creq_pop_unqual ? `REQ_TAG_WIDTH'(creq_tag_st0) : `REQ_TAG_WIDTH'(mshr_tag_st0);
|
||||
assign mem_rw_st0 = creq_pop_unqual ? creq_rw_st0 : mshr_rw_st0;
|
||||
assign byteen_st0 = creq_pop_unqual ? creq_byteen_st0 : mshr_byteen_st0;
|
||||
assign req_tid_st0 = creq_pop_unqual ? creq_tid_st0 : mshr_tid_st0;
|
||||
assign writeword_st0 = creq_pop_unqual ? creq_writeword_st0 : mshr_writeword_st0;
|
||||
assign writedata_st0 = drsq_filldata_st0;
|
||||
assign filldata_st0 = drsq_filldata;
|
||||
|
||||
if (`WORD_SELECT_BITS != 0) begin
|
||||
assign wsel_st0 = creq_pop_unqual ? creq_wsel_st0 : mshr_wsel_st0;
|
||||
|
@ -307,9 +306,9 @@ if (DRAM_ENABLE) begin
|
|||
.stall (pipeline_stall),
|
||||
|
||||
// read/Fill
|
||||
.lookup_in (valid_st0 && !is_fill_st0),
|
||||
.lookup_in (creq_pop || mshr_pop),
|
||||
.raddr_in (addr_st0),
|
||||
.do_fill_in (valid_st0 && is_fill_st0),
|
||||
.do_fill_in (drsq_pop),
|
||||
.miss_out (miss_st0),
|
||||
.readtag_out (readtag_st0),
|
||||
.dirty_out (dirty_st0),
|
||||
|
@ -388,36 +387,16 @@ end else begin
|
|||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `WORD_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg2 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!pipeline_stall),
|
||||
.data_in ({valid_st0, mshr_push_st0, crsq_push_st0, dreq_push_st0, do_writeback_st0, core_req_hit_st0, is_mshr_st0, writeen_st0, force_miss_st0, is_fill_st0, addr_st0, wsel_st0, readword_st0, writeword_st0, readtag_st0, miss_st0, writedata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}),
|
||||
.data_out ({valid_st1, mshr_push_st1, crsq_push_st1, dreq_push_st1, do_writeback_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_fill_st1, addr_st1, wsel_st1, readword_st1, writeword_st1, readtag_st1, miss_st1, writedata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1})
|
||||
.data_in ({valid_st0, mshr_push_st0, crsq_push_st0, dreq_push_st0, do_writeback_st0, core_req_hit_st0, is_mshr_st0, writeen_st0, force_miss_st0, is_fill_st0, addr_st0, wsel_st0, dirtyb_st0, readdata_st0, writeword_st0, readtag_st0, miss_st0, filldata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}),
|
||||
.data_out ({valid_st1, mshr_push_st1, crsq_push_st1, dreq_push_st1, do_writeback_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_fill_st1, addr_st1, wsel_st1, dirtyb_st1, readdata_st1, writeword_st1, readtag_st1, miss_st1, filldata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1})
|
||||
);
|
||||
|
||||
if (WRITE_THROUGH) begin
|
||||
|
||||
assign dirtyb_st1 = dirtyb_st0;
|
||||
assign readdata_st1 = readdata_st0;
|
||||
|
||||
end else begin
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (CACHE_LINE_SIZE + `CACHE_LINE_WIDTH),
|
||||
.RESETW (0)
|
||||
) pipe_reg2b (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!pipeline_stall),
|
||||
.data_in ({dirtyb_st0, readdata_st0}),
|
||||
.data_out ({dirtyb_st1, readdata_st1})
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
|
||||
assign {debug_pc_st01, debug_wid_st01} = tag_st01[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
|
||||
|
@ -452,10 +431,7 @@ end
|
|||
|
||||
// reading
|
||||
.readen_in (valid_st0 && !mem_rw_st0 && !is_fill_st0),
|
||||
.raddr_in (addr_st0),
|
||||
.rwsel_in (wsel_st0),
|
||||
.rbyteen_in (byteen_st0),
|
||||
.readword_out (readword_st0),
|
||||
.raddr_in (addr_st0),
|
||||
.readdata_out (readdata_st0),
|
||||
.dirtyb_out (dirtyb_st0),
|
||||
|
||||
|
@ -466,7 +442,8 @@ end
|
|||
.wwsel_in (wsel_st01),
|
||||
.wbyteen_in (byteen_st01),
|
||||
.writeword_in (writeword_st01),
|
||||
.writedata_in (writedata_st1)
|
||||
.readdata_in (readdata_st1),
|
||||
.filldata_in (filldata_st1)
|
||||
);
|
||||
|
||||
`ifdef DBG_CACHE_REQ_INFO
|
||||
|
@ -490,7 +467,7 @@ end
|
|||
|
||||
wire mshr_dequeue_st1 = valid_st1 && is_mshr_st1 && !mshr_push_unqual && !pipeline_stall;
|
||||
|
||||
// push missed requests as 'ready' if it was a forced miss that actually had a hit
|
||||
// push a missed request as 'ready' if it was a forced miss that actually had a hit
|
||||
// or the fill request for this block is comming
|
||||
wire mshr_init_ready_state_st1 = !miss_st1 || incoming_fill_st1;
|
||||
|
||||
|
@ -521,7 +498,6 @@ end
|
|||
.enqueue_data ({writeword_st1, req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}),
|
||||
.enqueue_is_mshr (is_mshr_st1),
|
||||
.enqueue_ready (mshr_init_ready_state_st1),
|
||||
`UNUSED_PIN (enqueue_full),
|
||||
|
||||
// lookup
|
||||
.lookup_ready (drsq_pop),
|
||||
|
@ -570,9 +546,20 @@ end
|
|||
|
||||
wire crsq_pop = core_rsp_valid && core_rsp_ready;
|
||||
|
||||
wire [`REQS_BITS-1:0] crsq_tid_st1 = req_tid_st1;
|
||||
wire [CORE_TAG_WIDTH-1:0] crsq_tag_st1 = CORE_TAG_WIDTH'(tag_st1);
|
||||
wire [`WORD_WIDTH-1:0] crsq_data_st1 = readword_st1;
|
||||
wire [`REQS_BITS-1:0] crsq_tid_st1 = req_tid_st1;
|
||||
wire [CORE_TAG_WIDTH-1:0] crsq_tag_st1 = CORE_TAG_WIDTH'(tag_st1);
|
||||
wire [`WORD_WIDTH-1:0] crsq_data_st1;
|
||||
|
||||
if (`WORD_SELECT_BITS != 0) begin
|
||||
wire [`WORD_WIDTH-1:0] readword = readdata_st1[wsel_st1 * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
for (genvar i = 0; i < WORD_SIZE; i++) begin
|
||||
assign crsq_data_st1[i * 8 +: 8] = readword[i * 8 +: 8] & {8{byteen_st1[i]}};
|
||||
end
|
||||
end else begin
|
||||
for (genvar i = 0; i < WORD_SIZE; i++) begin
|
||||
assign crsq_data_st1[i * 8 +: 8] = readdata_st1[i * 8 +: 8] & {8{byteen_st1[i]}};
|
||||
end
|
||||
end
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH),
|
||||
|
@ -612,13 +599,33 @@ end
|
|||
wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = (WRITE_THROUGH || !writeback) ? addr_st1 :
|
||||
{readtag_st1, addr_st1[`LINE_SELECT_BITS-1:0]};
|
||||
|
||||
wire [CACHE_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st1 : {CACHE_LINE_SIZE{1'b1}};
|
||||
wire [`CACHE_LINE_WIDTH-1:0] dreq_data;
|
||||
wire [CACHE_LINE_SIZE-1:0] dreq_byteen, dreq_byteen_unqual;
|
||||
|
||||
if (WRITE_THROUGH) begin
|
||||
`UNUSED_VAR (dirtyb_st1)
|
||||
if (`WORD_SELECT_BITS != 0) begin
|
||||
for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin
|
||||
assign dreq_byteen_unqual[i * WORD_SIZE +: WORD_SIZE] = (wsel_st1 == `WORD_SELECT_BITS'(i)) ? byteen_st1 : {WORD_SIZE{1'b0}};
|
||||
assign dreq_data[i * `WORD_WIDTH +: `WORD_WIDTH] = writeword_st1;
|
||||
end
|
||||
end else begin
|
||||
assign dreq_byteen_unqual = byteen_st1;
|
||||
assign dreq_data = writeword_st1;
|
||||
end
|
||||
end else begin
|
||||
assign dreq_byteen_unqual = dirtyb_st1;
|
||||
assign dreq_data = readdata_st1;
|
||||
end
|
||||
|
||||
assign dreq_byteen = writeback ? dreq_byteen_unqual : {CACHE_LINE_SIZE{1'b1}};
|
||||
|
||||
if (DRAM_ENABLE) begin
|
||||
always @(posedge clk) begin
|
||||
assert (!(dreq_push && !do_writeback_st1 && incoming_fill_st1))
|
||||
else $error("%t: incoming fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
|
||||
end
|
||||
end
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH),
|
||||
.SIZE (DREQ_SIZE),
|
||||
|
@ -629,7 +636,7 @@ end
|
|||
.reset (reset),
|
||||
.push (dreq_push),
|
||||
.pop (dreq_pop),
|
||||
.data_in ({writeback, dreq_byteen, dreq_addr, readdata_st1}),
|
||||
.data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}),
|
||||
.data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
|
||||
.empty (dreq_empty),
|
||||
.full (dreq_full),
|
||||
|
@ -639,6 +646,7 @@ end
|
|||
`UNUSED_VAR (dreq_push)
|
||||
`UNUSED_VAR (dreq_pop)
|
||||
`UNUSED_VAR (dreq_addr)
|
||||
`UNUSED_VAR (dreq_data)
|
||||
`UNUSED_VAR (dreq_byteen)
|
||||
`UNUSED_VAR (readtag_st1)
|
||||
`UNUSED_VAR (dirtyb_st1)
|
||||
|
@ -685,7 +693,7 @@ end
|
|||
$display("%t: cache%0d:%0d pipeline-stall: mshr=%b, cwbq=%b, dwbq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, crsq_push_stall, dreq_push_stall);
|
||||
end
|
||||
if (drsq_pop) begin
|
||||
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), drsq_filldata_st0);
|
||||
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), drsq_filldata);
|
||||
end
|
||||
if (creq_pop) begin
|
||||
if (creq_rw_st0)
|
||||
|
@ -698,7 +706,7 @@ end
|
|||
end
|
||||
if (dreq_push) begin
|
||||
if (do_writeback_st1)
|
||||
$display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), readdata_st1, dreq_byteen, debug_wid_st1, debug_pc_st1);
|
||||
$display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), dreq_data, dreq_byteen, debug_wid_st1, debug_pc_st1);
|
||||
else
|
||||
$display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st1, debug_pc_st1);
|
||||
end
|
||||
|
|
50
hw/rtl/cache/VX_data_access.v
vendored
50
hw/rtl/cache/VX_data_access.v
vendored
|
@ -44,9 +44,6 @@ module VX_data_access #(
|
|||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[`LINE_ADDR_WIDTH-1:0] raddr_in,
|
||||
`IGNORE_WARNINGS_END
|
||||
input wire [`UP(`WORD_SELECT_BITS)-1:0] rwsel_in,
|
||||
input wire [WORD_SIZE-1:0] rbyteen_in,
|
||||
output wire[`WORD_WIDTH-1:0] readword_out,
|
||||
output wire [`CACHE_LINE_WIDTH-1:0] readdata_out,
|
||||
output wire [CACHE_LINE_SIZE-1:0] dirtyb_out,
|
||||
|
||||
|
@ -59,11 +56,12 @@ module VX_data_access #(
|
|||
input wire [WORD_SIZE-1:0] wbyteen_in,
|
||||
input wire wfill_in,
|
||||
input wire [`WORD_WIDTH-1:0] writeword_in,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] writedata_in
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] readdata_in,
|
||||
input wire [`CACHE_LINE_WIDTH-1:0] filldata_in
|
||||
);
|
||||
|
||||
wire [CACHE_LINE_SIZE-1:0] read_dirtyb, dirtyb_qual;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] read_data, readdata_qual;
|
||||
wire [CACHE_LINE_SIZE-1:0] read_dirtyb;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] read_data;
|
||||
|
||||
wire [CACHE_LINE_SIZE-1:0] byte_enable;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] write_data;
|
||||
|
@ -96,49 +94,29 @@ module VX_data_access #(
|
|||
);
|
||||
|
||||
wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wbyteen_qual;
|
||||
wire [`CACHE_LINE_WIDTH-1:0] writeword_qual;
|
||||
wire [`WORDS_PER_LINE-1:0][`WORD_WIDTH-1:0] writedata_qual;
|
||||
|
||||
if (`WORD_SELECT_BITS != 0) begin
|
||||
for (genvar i = 0; i < `WORDS_PER_LINE; i++) begin
|
||||
assign wbyteen_qual[i] = (wwsel_in == `WORD_SELECT_BITS'(i)) ? wbyteen_in : {WORD_SIZE{1'b0}};
|
||||
assign writeword_qual[i * `WORD_WIDTH +: `WORD_WIDTH] = writeword_in;
|
||||
assign wbyteen_qual[i] = (wwsel_in == `WORD_SELECT_BITS'(i)) ? wbyteen_in : {WORD_SIZE{1'b0}};
|
||||
assign writedata_qual[i] = (wwsel_in == `WORD_SELECT_BITS'(i)) ? writeword_in : readdata_in[i * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (wwsel_in)
|
||||
`UNUSED_VAR (readdata_in)
|
||||
assign wbyteen_qual = wbyteen_in;
|
||||
assign writeword_qual = writeword_in;
|
||||
assign writedata_qual = writeword_in;
|
||||
end
|
||||
|
||||
assign byte_enable = wfill_in ? {CACHE_LINE_SIZE{1'b1}} : wbyteen_qual;
|
||||
assign write_data = wfill_in ? writedata_in : writeword_qual;
|
||||
assign write_data = wfill_in ? filldata_in : writedata_qual;
|
||||
|
||||
assign write_enable = writeen_in && !stall;
|
||||
assign write_enable = writeen_in && !stall;
|
||||
|
||||
wire rw_hazard = DRAM_ENABLE && (raddr == waddr) && writeen_in;
|
||||
for (genvar i = 0; i < CACHE_LINE_SIZE; i++) begin
|
||||
assign dirtyb_qual[i] = rw_hazard ? byte_enable[i] : read_dirtyb[i];
|
||||
assign readdata_qual[i * 8 +: 8] = (rw_hazard && byte_enable[i]) ? write_data[i * 8 +: 8] : read_data[i * 8 +: 8];
|
||||
end
|
||||
|
||||
if (WRITE_THROUGH) begin
|
||||
`UNUSED_VAR (dirtyb_qual)
|
||||
assign dirtyb_out = wbyteen_qual;
|
||||
assign readdata_out = writeword_qual;
|
||||
end else begin
|
||||
assign dirtyb_out = dirtyb_qual;
|
||||
assign readdata_out = readdata_qual;
|
||||
end
|
||||
|
||||
if (`WORD_SELECT_BITS != 0) begin
|
||||
wire [`WORD_WIDTH-1:0] readword = readdata_qual[rwsel_in * `WORD_WIDTH +: `WORD_WIDTH];
|
||||
for (genvar i = 0; i < WORD_SIZE; i++) begin
|
||||
assign readword_out[i * 8 +: 8] = readword[i * 8 +: 8] & {8{rbyteen_in[i]}};
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (rwsel_in)
|
||||
for (genvar i = 0; i < WORD_SIZE; i++) begin
|
||||
assign readword_out[i * 8 +: 8] = readdata_qual[i * 8 +: 8] & {8{rbyteen_in[i]}};
|
||||
end
|
||||
assign dirtyb_out[i] = rw_hazard ? byte_enable[i] : read_dirtyb[i];
|
||||
assign readdata_out[i * 8 +: 8] = (rw_hazard && byte_enable[i]) ? write_data[i * 8 +: 8] : read_data[i * 8 +: 8];
|
||||
end
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_DATA
|
||||
|
@ -152,7 +130,7 @@ module VX_data_access #(
|
|||
end
|
||||
end
|
||||
if (readen_in) begin
|
||||
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID), rdebug_wid, rdebug_pc, dirtyb_out, raddr, rwsel_in, read_data);
|
||||
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(raddr_in, BANK_ID), rdebug_wid, rdebug_pc, dirtyb_out, raddr, read_data);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
3
hw/rtl/cache/VX_data_store.v
vendored
3
hw/rtl/cache/VX_data_store.v
vendored
|
@ -45,14 +45,13 @@ module VX_data_store #(
|
|||
VX_dp_ram #(
|
||||
.DATAW(CACHE_LINE_SIZE * 8),
|
||||
.SIZE(`LINES_PER_BANK),
|
||||
.BYTEENW(CACHE_LINE_SIZE),
|
||||
.RWCHECK(1)
|
||||
) data (
|
||||
.clk(clk),
|
||||
.waddr(write_addr),
|
||||
.raddr(read_addr),
|
||||
.wren(write_enable),
|
||||
.byteen(byte_enable),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
.din(write_data),
|
||||
.dout(read_data)
|
||||
|
|
123
hw/rtl/cache/VX_miss_resrv.v
vendored
123
hw/rtl/cache/VX_miss_resrv.v
vendored
|
@ -37,7 +37,6 @@ module VX_miss_resrv #(
|
|||
input wire [`MSHR_DATA_WIDTH-1:0] enqueue_data,
|
||||
input wire enqueue_is_mshr,
|
||||
input wire enqueue_ready,
|
||||
output wire enqueue_full,
|
||||
|
||||
// lookup
|
||||
input wire lookup_ready,
|
||||
|
@ -55,80 +54,89 @@ module VX_miss_resrv #(
|
|||
);
|
||||
`USE_FAST_BRAM reg [MSHR_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
|
||||
|
||||
reg [MSHR_SIZE-1:0] valid_table;
|
||||
reg [MSHR_SIZE-1:0] ready_table;
|
||||
reg [`LOG2UP(MSHR_SIZE)-1:0] schedule_ptr, restore_ptr;
|
||||
reg [`LOG2UP(MSHR_SIZE)-1:0] head_ptr, tail_ptr;
|
||||
reg [`LOG2UP(MSHR_SIZE+1)-1:0] size;
|
||||
|
||||
assign enqueue_full = (size == $bits(size)'(MSHR_SIZE));
|
||||
reg [MSHR_SIZE-1:0] valid_table;
|
||||
reg [MSHR_SIZE-1:0] ready_table;
|
||||
reg [`LOG2UP(MSHR_SIZE)-1:0] schedule_ptr, schedule_n_ptr, restore_ptr;
|
||||
reg [`LOG2UP(MSHR_SIZE)-1:0] head_ptr, tail_ptr;
|
||||
reg [`LOG2UP(MSHR_SIZE)-1:0] used_r;
|
||||
reg full_r;
|
||||
|
||||
reg [`MSHR_DATA_WIDTH-1:0] dout_r;
|
||||
reg [`LINE_ADDR_WIDTH-1:0] schedule_addr_r;
|
||||
reg schedule_valid_r;
|
||||
|
||||
wire [MSHR_SIZE-1:0] valid_address_match;
|
||||
for (genvar i = 0; i < MSHR_SIZE; i++) begin
|
||||
assign valid_address_match[i] = valid_table[i] && (addr_table[i] == lookup_addr);
|
||||
end
|
||||
|
||||
assign lookup_match = (| valid_address_match);
|
||||
|
||||
wire dequeue_ready = ready_table[schedule_ptr];
|
||||
|
||||
assign schedule_valid = dequeue_ready;
|
||||
assign schedule_addr = addr_table[schedule_ptr];
|
||||
|
||||
wire mshr_push = enqueue && !enqueue_is_mshr;
|
||||
wire push_new = enqueue && !enqueue_is_mshr;
|
||||
wire restore = enqueue && enqueue_is_mshr;
|
||||
|
||||
wire [`LOG2UP(MSHR_SIZE)-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_table <= 0;
|
||||
ready_table <= 0;
|
||||
schedule_ptr <= 0;
|
||||
restore_ptr <= 0;
|
||||
head_ptr <= 0;
|
||||
tail_ptr <= 0;
|
||||
size <= 0;
|
||||
valid_table <= 0;
|
||||
ready_table <= 0;
|
||||
schedule_ptr <= 0;
|
||||
schedule_n_ptr <= 1;
|
||||
restore_ptr <= 0;
|
||||
head_ptr <= 0;
|
||||
tail_ptr <= 0;
|
||||
end else begin
|
||||
if (lookup_ready) begin
|
||||
ready_table <= ready_table | valid_address_match;
|
||||
end
|
||||
|
||||
if (enqueue) begin
|
||||
assert(!enqueue_full);
|
||||
if (enqueue) begin
|
||||
if (enqueue_is_mshr) begin
|
||||
// returning missed msrq entry, restore schedule
|
||||
// restore schedule, returning missed msrq entry
|
||||
valid_table[restore_ptr] <= 1;
|
||||
ready_table[restore_ptr] <= enqueue_ready;
|
||||
restore_ptr <= restore_ptr + $bits(restore_ptr)'(1);
|
||||
schedule_ptr <= head_ptr;
|
||||
restore_ptr <= restore_ptr + $bits(restore_ptr)'(1);
|
||||
schedule_ptr <= head_ptr;
|
||||
schedule_n_ptr <= head_ptr_n;
|
||||
end else begin
|
||||
// push new entry
|
||||
assert(!full_r);
|
||||
valid_table[tail_ptr] <= 1;
|
||||
ready_table[tail_ptr] <= enqueue_ready;
|
||||
tail_ptr <= tail_ptr + $bits(tail_ptr)'(1);
|
||||
size <= size + $bits(size)'(1);
|
||||
end
|
||||
end else if (dequeue) begin
|
||||
head_ptr <= head_ptr_n;
|
||||
// remove scheduled entry from buffer
|
||||
head_ptr <= head_ptr_n;
|
||||
restore_ptr <= head_ptr_n;
|
||||
valid_table[head_ptr] <= 0;
|
||||
size <= size - $bits(size)'(1);
|
||||
end
|
||||
|
||||
if (lookup_ready) begin
|
||||
ready_table <= ready_table | valid_address_match;
|
||||
end
|
||||
|
||||
if (schedule) begin
|
||||
// schedule next entry
|
||||
assert(schedule_valid);
|
||||
valid_table[schedule_ptr] <= 0;
|
||||
ready_table[schedule_ptr] <= 0;
|
||||
schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1);
|
||||
ready_table[schedule_ptr] <= 0;
|
||||
|
||||
schedule_ptr <= schedule_n_ptr;
|
||||
if (MSHR_SIZE > 2) begin
|
||||
schedule_n_ptr <= schedule_ptr + $bits(schedule_ptr)'(2);
|
||||
end else begin // (SIZE == 2);
|
||||
schedule_n_ptr <= ~schedule_n_ptr;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (enqueue && !enqueue_is_mshr) begin
|
||||
if (push_new) begin
|
||||
addr_table[tail_ptr] <= enqueue_addr;
|
||||
end
|
||||
end
|
||||
|
||||
wire [`MSHR_DATA_WIDTH-1:0] dout;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW(`MSHR_DATA_WIDTH),
|
||||
.SIZE(MSHR_SIZE),
|
||||
|
@ -137,14 +145,51 @@ module VX_miss_resrv #(
|
|||
) entries (
|
||||
.clk(clk),
|
||||
.waddr(tail_ptr),
|
||||
.raddr(schedule_ptr),
|
||||
.wren(mshr_push),
|
||||
.raddr(schedule_n_ptr),
|
||||
.wren(push_new),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
.din(enqueue_data),
|
||||
.dout(schedule_data)
|
||||
.dout(dout)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
used_r <= 0;
|
||||
full_r <= 0;
|
||||
end else begin
|
||||
used_r <= used_r + $bits(used_r)'($signed(2'(enqueue) - 2'(schedule)));
|
||||
full_r <= (used_r == $bits(used_r)'(MSHR_SIZE-1)) && enqueue;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
schedule_valid_r <= 0;
|
||||
end else begin
|
||||
if (lookup_ready) begin
|
||||
schedule_valid_r <= 1;
|
||||
end else if (schedule) begin
|
||||
schedule_valid_r <= ready_table[schedule_n_ptr];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if ((push_new && (used_r == 0 || (used_r == 1 && schedule)))
|
||||
|| restore) begin
|
||||
schedule_addr_r <= enqueue_addr;
|
||||
dout_r <= enqueue_data;
|
||||
end else if (schedule) begin
|
||||
schedule_addr_r <= addr_table[schedule_n_ptr];
|
||||
dout_r <= dout;
|
||||
end
|
||||
end
|
||||
|
||||
assign schedule_valid = schedule_valid_r;
|
||||
assign schedule_addr = schedule_addr_r;
|
||||
assign schedule_data = dout_r;
|
||||
|
||||
`ifdef DBG_PRINT_CACHE_MSHR
|
||||
always @(posedge clk) begin
|
||||
if (lookup_ready || schedule || enqueue || dequeue) begin
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#define RESET_DELAY 2
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 24
|
||||
#define DRAM_LATENCY 300
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue