fixed DRAM response backpressure inside Cache

This commit is contained in:
Blaise Tine 2020-11-10 05:24:57 -08:00
parent ed69be4027
commit 725322807e
19 changed files with 69 additions and 36 deletions

View file

@ -20,11 +20,11 @@ DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CORE_REQ_INFO
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
DEBUG=1
#DEBUG=1
#SCOPE=1
CFLAGS += -fPIC

View file

@ -263,7 +263,16 @@ void opae_sim::avs_bus() {
if (dram_rd_it != dram_reads_.end()) {
vortex_afu_->avs_readdatavalid = 1;
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
uint32_t tag = dram_rd_it->tag;
dram_reads_.erase(dram_rd_it);
/*printf("%0ld: VLSIM: DRAM rsp: addr=%x, pending={", timestamp, tag);
for (auto& req : dram_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.tag);
else
printf(" %0x", req.tag);
}
printf("}\n");*/
}
// handle DRAM stalls
@ -293,10 +302,19 @@ void opae_sim::avs_bus() {
if (vortex_afu_->avs_read) {
assert(0 == vortex_afu_->mem_bank_select);
dram_rd_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY;
dram_req.cycles_left = DRAM_LATENCY;
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
dram_req.tag = base_addr;
dram_reads_.emplace_back(dram_req);
/*printf("%0ld: VLSIM: DRAM req: addr=%x, pending={", timestamp, base_addr);
for (auto& req : dram_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.tag);
else
printf(" %0x", req.tag);
}
printf("}\n");*/
}
}

View file

@ -275,7 +275,7 @@ module VX_cluster #(
);
assign busy = (| per_core_busy);
assign ebreak = (& per_core_ebreak);
assign ebreak = (| per_core_ebreak);
if (`L2_ENABLE) begin

View file

@ -8,7 +8,7 @@
`endif
`ifndef NUM_CORES
`define NUM_CORES 2
`define NUM_CORES 4
`endif
`ifndef NUM_WARPS
@ -223,16 +223,16 @@
`define DDREQ_SIZE 8
`endif
// Snoop Response Queue Size
`ifndef DSNPQ_SIZE
`define DSNPQ_SIZE 8
`endif
// DRAM Response Queue Size
`ifndef DDRFQ_SIZE
`define DDRFQ_SIZE 8
`endif
// Snoop Response Queue Size
`ifndef DSNPQ_SIZE
`define DSNPQ_SIZE 8
`endif
// Snoop Req Queue Size
`ifndef DSNRQ_SIZE
`define DSNRQ_SIZE 8
@ -359,7 +359,7 @@
`define L2DRFQ_SIZE 8
`endif
// Snoop Req Queue Size
// Snoop Request Queue Size
`ifndef L2SNRQ_SIZE
`define L2SNRQ_SIZE 8
`endif
@ -416,7 +416,7 @@
`define L3DRFQ_SIZE 8
`endif
// Snoop Req Queue Size
// Snoop Request Queue Size
`ifndef L3SNRQ_SIZE
`define L3SNRQ_SIZE 8
`endif

View file

@ -276,7 +276,7 @@ module VX_core #(
);
// select io bus
wire is_io_addr = ({core_dcache_req_if.addr[0], 2'b0} >= `IO_BUS_BASE_ADDR);
wire is_io_addr = ({core_dcache_req_if.addr[0], 2'b0} >= `IO_BUS_BASE_ADDR);
wire io_req_select = (| core_dcache_req_if.valid) ? is_io_addr : 0;
wire io_rsp_select = (| arb_io_rsp_if.valid);

View file

@ -60,7 +60,7 @@ module VX_csr_io_arb #(
.grant_onehot (rsp_1hot)
);
wire stall = csr_io_rsp_valid_out && ~csr_io_rsp_ready_out;
wire stall = ~csr_io_rsp_ready_out && csr_io_rsp_valid_out;
VX_generic_register #(
.N(1 + 32),

View file

@ -1,5 +1,6 @@
`include "VX_define.vh"
`TRACING_OFF
module VX_gpr_ram (
input wire clk,
input wire [`NUM_THREADS-1:0] we,
@ -30,4 +31,5 @@ module VX_gpr_ram (
assign rs1_data = q1;
assign rs2_data = q2;
endmodule
endmodule
`TRACING_ON

View file

@ -64,7 +64,7 @@ module VX_io_arb #(
.grant_onehot (req_1hot)
);
wire stall = (| io_req_valid_out) && ~io_req_ready_out;
wire stall = ~io_req_ready_out && (| io_req_valid_out);
VX_generic_register #(
.N(`NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH)),
@ -91,6 +91,7 @@ module VX_io_arb #(
assign io_rsp_tag_in[i] = io_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
assign io_rsp_data_in[i] = io_rsp_data_out;
end
assign io_rsp_ready_out = io_rsp_ready_in[rsp_sel];
end else begin

View file

@ -59,7 +59,7 @@ module VX_mem_arb #(
.grant_onehot (req_1hot)
);
wire stall = mem_req_valid_out && ~mem_req_ready_out;
wire stall = ~mem_req_ready_out && mem_req_valid_out;
VX_generic_register #(
.N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + WORD_SIZE + WORD_WIDTH),
@ -86,6 +86,7 @@ module VX_mem_arb #(
assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
assign mem_rsp_data_in[i] = mem_rsp_data_out;
end
assign mem_rsp_ready_out = mem_rsp_ready_in[rsp_sel];
end else begin

View file

@ -46,8 +46,8 @@
if (!(cond)) $error msg; \
endgenerate
`define ENABLE_TRACING /* verilator tracing_on */
`define DISABLE_TRACING /* verilator tracing_off */
`define TRACING_ON /* verilator tracing_on */
`define TRACING_OFF /* verilator tracing_off */
///////////////////////////////////////////////////////////////////////////////

View file

@ -64,13 +64,23 @@ module VX_scoreboard #(
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay);
`ifdef DBG_PRINT_PIPELINE
reg [31:0] stall_ctr;
always @(posedge clk) begin
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
if (reset) begin
stall_ctr <= 0;
end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
stall_ctr <= stall_ctr + 1;
if (stall_ctr >= 2000) begin
$fflush();
assert(0);
end
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin
stall_ctr <= 0;
end
end
end
`endif
endmodule

View file

@ -73,7 +73,7 @@ module VX_writeback #(
0;
always @(*) assert(writeback_if.ready);
wire stall = 0/*~writeback_if.ready && writeback_if.valid*/;
wire stall =~writeback_if.ready && writeback_if.valid;
VX_generic_register #(
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))

View file

@ -316,7 +316,7 @@ module Vortex (
);
assign busy = (| per_cluster_busy);
assign ebreak = (& per_cluster_ebreak);
assign ebreak = (| per_cluster_ebreak);
// L3 Cache ///////////////////////////////////////////////////////////

View file

@ -561,7 +561,7 @@ module VX_bank #(
wire[WORD_SIZE-1:0] req_byteen_st3;
wire msrq_push_unqual = miss_st3 || force_miss_st3;
assign msrq_push_stall = (miss_st3 || force_miss_st3) && msrq_full;
assign msrq_push_stall = msrq_push_unqual && msrq_full;
wire msrq_push = msrq_push_unqual
&& !msrq_full

View file

@ -221,7 +221,7 @@ module VX_cache #(
);
assign dram_req_tag = dram_req_addr;
assign dram_rsp_ready = (| per_bank_dram_rsp_ready);
assign dram_rsp_ready = (& per_bank_dram_rsp_ready);
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;

View file

@ -178,7 +178,7 @@ module VX_cache_miss_resrv #(
`ifdef DBG_PRINT_CACHE_MSRQ
always @(posedge clk) begin
if (schedule_st0 || enqueue_st3 || dequeue_st3) begin
if (update_ready_st0 || schedule_st0 || enqueue_st3 || dequeue_st3) begin
if (schedule_st0)
$display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(dequeue_addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0);
if (enqueue_st3) begin

View file

@ -109,7 +109,7 @@ module VX_snp_forwarder #(
.grant_onehot (sel_1hot)
);
wire stall = fwdin_valid && ~fwdin_ready;
wire stall = ~fwdin_ready && fwdin_valid;
VX_generic_register #(
.N(1 + `LOG2UP(SNRQ_SIZE)),

View file

@ -2,7 +2,9 @@
`include "fpnew_pkg.sv"
`include "defs_div_sqrt_mvp.sv"
module VX_fpnew #(
`TRACING_OFF
module VX_fpnew
#(
parameter TAGW = 1,
parameter FMULADD = 1,
parameter FDIVSQRT = 1,
@ -130,8 +132,6 @@ module VX_fpnew #(
default:;
endcase
end
`DISABLE_TRACING
for (genvar i = 0; i < `NUM_THREADS; i++) begin
if (0 == i) begin
@ -191,8 +191,6 @@ module VX_fpnew #(
end
end
`ENABLE_TRACING
assign fpu_valid_in = valid_in;
assign ready_in = fpu_ready_in;
@ -207,4 +205,5 @@ module VX_fpnew #(
assign valid_out = fpu_valid_out;
assign fpu_ready_out = ready_out;
endmodule
endmodule
`TRACING_ON

View file

@ -1,5 +1,6 @@
`include "VX_platform.vh"
`TRACING_OFF
module VX_dp_ram #(
parameter DATAW = 1,
parameter SIZE = 1,
@ -284,4 +285,5 @@ module VX_dp_ram #(
end
end
endmodule
endmodule
`TRACING_ON