mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
fixed DRAM response backpressure inside Cache
This commit is contained in:
parent
ed69be4027
commit
725322807e
19 changed files with 69 additions and 36 deletions
|
@ -20,11 +20,11 @@ DBG_FLAGS += $(DBG_PRINT_FLAGS)
|
|||
DBG_FLAGS += -DDBG_CORE_REQ_INFO
|
||||
|
||||
#CONFIGS += -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1
|
||||
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
#CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
DEBUG=1
|
||||
#DEBUG=1
|
||||
#SCOPE=1
|
||||
|
||||
CFLAGS += -fPIC
|
||||
|
|
|
@ -263,7 +263,16 @@ void opae_sim::avs_bus() {
|
|||
if (dram_rd_it != dram_reads_.end()) {
|
||||
vortex_afu_->avs_readdatavalid = 1;
|
||||
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
|
||||
uint32_t tag = dram_rd_it->tag;
|
||||
dram_reads_.erase(dram_rd_it);
|
||||
/*printf("%0ld: VLSIM: DRAM rsp: addr=%x, pending={", timestamp, tag);
|
||||
for (auto& req : dram_reads_) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.tag);
|
||||
else
|
||||
printf(" %0x", req.tag);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
|
||||
// handle DRAM stalls
|
||||
|
@ -293,10 +302,19 @@ void opae_sim::avs_bus() {
|
|||
if (vortex_afu_->avs_read) {
|
||||
assert(0 == vortex_afu_->mem_bank_select);
|
||||
dram_rd_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
|
||||
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
|
||||
dram_req.tag = base_addr;
|
||||
dram_reads_.emplace_back(dram_req);
|
||||
/*printf("%0ld: VLSIM: DRAM req: addr=%x, pending={", timestamp, base_addr);
|
||||
for (auto& req : dram_reads_) {
|
||||
if (req.cycles_left != 0)
|
||||
printf(" !%0x", req.tag);
|
||||
else
|
||||
printf(" %0x", req.tag);
|
||||
}
|
||||
printf("}\n");*/
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -275,7 +275,7 @@ module VX_cluster #(
|
|||
);
|
||||
|
||||
assign busy = (| per_core_busy);
|
||||
assign ebreak = (& per_core_ebreak);
|
||||
assign ebreak = (| per_core_ebreak);
|
||||
|
||||
if (`L2_ENABLE) begin
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
`endif
|
||||
|
||||
`ifndef NUM_CORES
|
||||
`define NUM_CORES 2
|
||||
`define NUM_CORES 4
|
||||
`endif
|
||||
|
||||
`ifndef NUM_WARPS
|
||||
|
@ -223,16 +223,16 @@
|
|||
`define DDREQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Response Queue Size
|
||||
`ifndef DSNPQ_SIZE
|
||||
`define DSNPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// DRAM Response Queue Size
|
||||
`ifndef DDRFQ_SIZE
|
||||
`define DDRFQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Response Queue Size
|
||||
`ifndef DSNPQ_SIZE
|
||||
`define DSNPQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Req Queue Size
|
||||
`ifndef DSNRQ_SIZE
|
||||
`define DSNRQ_SIZE 8
|
||||
|
@ -359,7 +359,7 @@
|
|||
`define L2DRFQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Req Queue Size
|
||||
// Snoop Request Queue Size
|
||||
`ifndef L2SNRQ_SIZE
|
||||
`define L2SNRQ_SIZE 8
|
||||
`endif
|
||||
|
@ -416,7 +416,7 @@
|
|||
`define L3DRFQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Snoop Req Queue Size
|
||||
// Snoop Request Queue Size
|
||||
`ifndef L3SNRQ_SIZE
|
||||
`define L3SNRQ_SIZE 8
|
||||
`endif
|
||||
|
|
|
@ -276,7 +276,7 @@ module VX_core #(
|
|||
);
|
||||
|
||||
// select io bus
|
||||
wire is_io_addr = ({core_dcache_req_if.addr[0], 2'b0} >= `IO_BUS_BASE_ADDR);
|
||||
wire is_io_addr = ({core_dcache_req_if.addr[0], 2'b0} >= `IO_BUS_BASE_ADDR);
|
||||
wire io_req_select = (| core_dcache_req_if.valid) ? is_io_addr : 0;
|
||||
wire io_rsp_select = (| arb_io_rsp_if.valid);
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ module VX_csr_io_arb #(
|
|||
.grant_onehot (rsp_1hot)
|
||||
);
|
||||
|
||||
wire stall = csr_io_rsp_valid_out && ~csr_io_rsp_ready_out;
|
||||
wire stall = ~csr_io_rsp_ready_out && csr_io_rsp_valid_out;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + 32),
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_gpr_ram (
|
||||
input wire clk,
|
||||
input wire [`NUM_THREADS-1:0] we,
|
||||
|
@ -30,4 +31,5 @@ module VX_gpr_ram (
|
|||
assign rs1_data = q1;
|
||||
assign rs2_data = q2;
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -64,7 +64,7 @@ module VX_io_arb #(
|
|||
.grant_onehot (req_1hot)
|
||||
);
|
||||
|
||||
wire stall = (| io_req_valid_out) && ~io_req_ready_out;
|
||||
wire stall = ~io_req_ready_out && (| io_req_valid_out);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH)),
|
||||
|
@ -91,6 +91,7 @@ module VX_io_arb #(
|
|||
assign io_rsp_tag_in[i] = io_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
|
||||
assign io_rsp_data_in[i] = io_rsp_data_out;
|
||||
end
|
||||
|
||||
assign io_rsp_ready_out = io_rsp_ready_in[rsp_sel];
|
||||
|
||||
end else begin
|
||||
|
|
|
@ -59,7 +59,7 @@ module VX_mem_arb #(
|
|||
.grant_onehot (req_1hot)
|
||||
);
|
||||
|
||||
wire stall = mem_req_valid_out && ~mem_req_ready_out;
|
||||
wire stall = ~mem_req_ready_out && mem_req_valid_out;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + TAG_OUT_WIDTH + ADDR_WIDTH + 1 + WORD_SIZE + WORD_WIDTH),
|
||||
|
@ -86,6 +86,7 @@ module VX_mem_arb #(
|
|||
assign mem_rsp_tag_in[i] = mem_rsp_tag_out[REQS_BITS +: TAG_IN_WIDTH];
|
||||
assign mem_rsp_data_in[i] = mem_rsp_data_out;
|
||||
end
|
||||
|
||||
assign mem_rsp_ready_out = mem_rsp_ready_in[rsp_sel];
|
||||
|
||||
end else begin
|
||||
|
|
|
@ -46,8 +46,8 @@
|
|||
if (!(cond)) $error msg; \
|
||||
endgenerate
|
||||
|
||||
`define ENABLE_TRACING /* verilator tracing_on */
|
||||
`define DISABLE_TRACING /* verilator tracing_off */
|
||||
`define TRACING_ON /* verilator tracing_on */
|
||||
`define TRACING_OFF /* verilator tracing_off */
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
|
@ -64,13 +64,23 @@ module VX_scoreboard #(
|
|||
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
reg [31:0] stall_ctr;
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
|
||||
if (reset) begin
|
||||
stall_ctr <= 0;
|
||||
end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
|
||||
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
|
||||
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
|
||||
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
|
||||
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
|
||||
stall_ctr <= stall_ctr + 1;
|
||||
if (stall_ctr >= 2000) begin
|
||||
$fflush();
|
||||
assert(0);
|
||||
end
|
||||
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin
|
||||
stall_ctr <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
|
@ -73,7 +73,7 @@ module VX_writeback #(
|
|||
0;
|
||||
|
||||
always @(*) assert(writeback_if.ready);
|
||||
wire stall = 0/*~writeback_if.ready && writeback_if.valid*/;
|
||||
wire stall =~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
|
||||
|
|
|
@ -316,7 +316,7 @@ module Vortex (
|
|||
);
|
||||
|
||||
assign busy = (| per_cluster_busy);
|
||||
assign ebreak = (& per_cluster_ebreak);
|
||||
assign ebreak = (| per_cluster_ebreak);
|
||||
|
||||
// L3 Cache ///////////////////////////////////////////////////////////
|
||||
|
||||
|
|
2
hw/rtl/cache/VX_bank.v
vendored
2
hw/rtl/cache/VX_bank.v
vendored
|
@ -561,7 +561,7 @@ module VX_bank #(
|
|||
wire[WORD_SIZE-1:0] req_byteen_st3;
|
||||
|
||||
wire msrq_push_unqual = miss_st3 || force_miss_st3;
|
||||
assign msrq_push_stall = (miss_st3 || force_miss_st3) && msrq_full;
|
||||
assign msrq_push_stall = msrq_push_unqual && msrq_full;
|
||||
|
||||
wire msrq_push = msrq_push_unqual
|
||||
&& !msrq_full
|
||||
|
|
2
hw/rtl/cache/VX_cache.v
vendored
2
hw/rtl/cache/VX_cache.v
vendored
|
@ -221,7 +221,7 @@ module VX_cache #(
|
|||
);
|
||||
|
||||
assign dram_req_tag = dram_req_addr;
|
||||
assign dram_rsp_ready = (| per_bank_dram_rsp_ready);
|
||||
assign dram_rsp_ready = (& per_bank_dram_rsp_ready);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
|
||||
|
|
2
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
2
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
|
@ -178,7 +178,7 @@ module VX_cache_miss_resrv #(
|
|||
|
||||
`ifdef DBG_PRINT_CACHE_MSRQ
|
||||
always @(posedge clk) begin
|
||||
if (schedule_st0 || enqueue_st3 || dequeue_st3) begin
|
||||
if (update_ready_st0 || schedule_st0 || enqueue_st3 || dequeue_st3) begin
|
||||
if (schedule_st0)
|
||||
$display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(dequeue_addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0);
|
||||
if (enqueue_st3) begin
|
||||
|
|
2
hw/rtl/cache/VX_snp_forwarder.v
vendored
2
hw/rtl/cache/VX_snp_forwarder.v
vendored
|
@ -109,7 +109,7 @@ module VX_snp_forwarder #(
|
|||
.grant_onehot (sel_1hot)
|
||||
);
|
||||
|
||||
wire stall = fwdin_valid && ~fwdin_ready;
|
||||
wire stall = ~fwdin_ready && fwdin_valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `LOG2UP(SNRQ_SIZE)),
|
||||
|
|
|
@ -2,7 +2,9 @@
|
|||
`include "fpnew_pkg.sv"
|
||||
`include "defs_div_sqrt_mvp.sv"
|
||||
|
||||
module VX_fpnew #(
|
||||
`TRACING_OFF
|
||||
module VX_fpnew
|
||||
#(
|
||||
parameter TAGW = 1,
|
||||
parameter FMULADD = 1,
|
||||
parameter FDIVSQRT = 1,
|
||||
|
@ -130,8 +132,6 @@ module VX_fpnew #(
|
|||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
`DISABLE_TRACING
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (0 == i) begin
|
||||
|
@ -191,8 +191,6 @@ module VX_fpnew #(
|
|||
end
|
||||
end
|
||||
|
||||
`ENABLE_TRACING
|
||||
|
||||
assign fpu_valid_in = valid_in;
|
||||
assign ready_in = fpu_ready_in;
|
||||
|
||||
|
@ -207,4 +205,5 @@ module VX_fpnew #(
|
|||
assign valid_out = fpu_valid_out;
|
||||
assign fpu_ready_out = ready_out;
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -1,5 +1,6 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_dp_ram #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
|
@ -284,4 +285,5 @@ module VX_dp_ram #(
|
|||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
`TRACING_ON
|
Loading…
Add table
Add a link
Reference in a new issue