fixed bank_core_req_abr critical path

This commit is contained in:
Blaise Tine 2020-11-08 18:25:32 -08:00
parent 10505caae1
commit 203a184008
7 changed files with 167 additions and 120 deletions

View file

@ -938,7 +938,7 @@ always @(posedge clk) begin
vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next);
snp_req_ctr <= snp_req_ctr_next;
`ifdef DBG_PRINT_OPAE
$display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next));
$display("%t: AFU Snp Req: addr=%0h, tag=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next));
`endif
end
@ -947,7 +947,7 @@ always @(posedge clk) begin
assert(snp_rsp_ctr != 0);
snp_rsp_ctr <= snp_rsp_ctr_next;
`ifdef DBG_PRINT_OPAE
$display("%t: AFU Snp Rsp: tag=%0d, rem=%0d", $time, vx_snp_rsp_tag, snp_rsp_ctr_next);
$display("%t: AFU Snp Rsp: tag=%0h, rem=%0d", $time, vx_snp_rsp_tag, snp_rsp_ctr_next);
`endif
end
end

View file

@ -91,6 +91,10 @@ module VX_lsu_unit #(
wire [1:0] rsp_sext;
reg [`NUM_THREADS-1:0][31:0] rsp_data;
`DEBUG_BLOCK(
reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags;
)
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] mem_rsp_mask;
wire [`DCORE_TAG_ID_BITS-1:0] req_tag, rsp_tag;
@ -113,7 +117,7 @@ module VX_lsu_unit #(
) lsu_cam (
.clk (clk),
.reset (reset),
.write_addr (req_tag),
.write_addr (req_tag),
.acquire_slot (lsuq_push),
.read_addr (rsp_tag),
.write_data ({req_wid, req_pc, req_rd, req_wb, req_offset, req_sext}),
@ -126,6 +130,7 @@ module VX_lsu_unit #(
always @(posedge clk) begin
if (lsuq_push) begin
mem_rsp_mask[req_tag] <= req_tmask;
pending_tags[req_tag] <= dcache_req_if.tag;
end
if (lsuq_pop_part) begin
mem_rsp_mask[rsp_tag] <= mem_rsp_mask_n;
@ -215,6 +220,13 @@ module VX_lsu_unit #(
$display("%t: D$%0d rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data);
end
if (lsuq_full) begin
$write("%t: D$%0d queue-full:", $time, CORE_ID);
for (integer j = 0; j < `LSUQ_SIZE; j++) begin
$write(" tag%0d=%0h", j, pending_tags[j]);
end
$write("\n");
end
end
`endif

View file

@ -833,25 +833,25 @@ module VX_bank #(
$display("%t: cache%0d:%0d pipeline-stall: msrq=%b, cwbq=%b, dwbq=%b, snpq=%b", $time, CACHE_ID, BANK_ID, msrq_push_stall, cwbq_push_stall, dwbq_push_stall, snpq_push_stall);
end
if (dfpq_pop) begin
$display("%t: cache%0d:%0d dram-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0);
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0);
end
if (reqq_pop) begin
$display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, debug_wid_st0, debug_pc_st0);
$display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0h, tid=%0d, rw=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_rw_st0, debug_wid_st0, debug_pc_st0);
end
if (snrq_pop) begin
$display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0d, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_invalidate_st0);
$display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_invalidate_st0);
end
if (cwbq_push) begin
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), cwbq_tag_st3, cwbq_data_st3, debug_wid_st3, debug_pc_st3);
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), cwbq_tag_st3, cwbq_tid_st3, cwbq_data_st3, debug_wid_st3, debug_pc_st3);
end
if (dwbq_push) begin
if (dwbq_is_dwb_in)
$display("%t: cache%0d:%0d dram-wb: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_req_addr, BANK_ID), readdata_st3, dirtyb_st3, debug_wid_st3, debug_pc_st3);
$display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_req_addr, BANK_ID), readdata_st3, dirtyb_st3, debug_wid_st3, debug_pc_st3);
else
$display("%t: cache%0d:%0d dram-fill: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_req_addr, BANK_ID), debug_wid_st3, debug_pc_st3);
$display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_req_addr, BANK_ID), debug_wid_st3, debug_pc_st3);
end
if (snpq_push) begin
$display("%t: cache%0d:%0d snp-rsp: addr=%0h, tag=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), snpq_tag_st3);
$display("%t: cache%0d:%0d snp-rsp: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), snpq_tag_st3);
end
end
`endif

View file

@ -15,125 +15,168 @@ module VX_bank_core_req_arb #(
input wire clk,
input wire reset,
// Enqueue Data
// Enqueue
input wire push,
input wire [NUM_REQUESTS-1:0] valids_in,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag_in,
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] addr_in,
input wire [`CORE_REQ_TAG_COUNT-1:0] rw_in,
input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] byteen_in,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] writedata_in,
input wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] addr_in,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag_in,
// Dequeue Data
input wire pop,
output wire [`REQS_BITS-1:0] tid_out,
output wire rw_out,
output wire [WORD_SIZE-1:0] byteen_out,
output wire [`WORD_ADDR_WIDTH-1:0] addr_out,
output wire [`WORD_WIDTH-1:0] writedata_out,
output wire [CORE_TAG_WIDTH-1:0] tag_out,
// Dequeue
input wire pop,
output wire [CORE_TAG_WIDTH-1:0] tag_out,
output wire [`WORD_ADDR_WIDTH-1:0] addr_out,
output wire rw_out,
output wire [WORD_SIZE-1:0] byteen_out,
output wire [`WORD_WIDTH-1:0] writedata_out,
output wire [`REQS_BITS-1:0] tid_out,
// State Data
output wire empty,
output wire full
// States
output wire empty,
output wire full
);
wire [NUM_REQUESTS-1:0] out_per_valids;
wire [`CORE_REQ_TAG_COUNT-1:0] out_per_rw;
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] out_per_byteen;
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] out_per_addr;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] out_per_writedata;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] out_per_tag;
reg [NUM_REQUESTS-1:0] use_per_valids;
reg [`CORE_REQ_TAG_COUNT-1:0] use_per_rw;
reg [NUM_REQUESTS-1:0][WORD_SIZE-1:0] use_per_byteen;
reg [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] use_per_addr;
reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] use_per_writedata;
reg [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] use_per_tag;
wire [NUM_REQUESTS-1:0] qual_valids;
wire [`CORE_REQ_TAG_COUNT-1:0] qual_rw;
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] qual_byteen;
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] qual_addr;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] qual_writedata;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] qual_tag;
wire o_empty;
wire use_empty = !(| use_per_valids);
wire out_empty = !(| out_per_valids) || o_empty;
wire push_qual = push && !full;
wire pop_qual = !out_empty && use_empty;
wire [NUM_REQUESTS-1:0] q_valids;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] q_tag;
wire [`CORE_REQ_TAG_COUNT-1:0] q_rw;
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] q_byteen;
wire [NUM_REQUESTS-1:0][`WORD_ADDR_WIDTH-1:0] q_addr;
wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] q_writedata;
wire q_push;
wire q_pop;
wire q_empty;
wire q_full;
always @(*) begin
assert(!push || (| valids_in));
assert(!push || !full);
assert(!pop || !empty);
end
VX_generic_queue #(
.DATAW($bits(valids_in) + $bits(addr_in) + $bits(writedata_in) + $bits(tag_in) + $bits(rw_in) + $bits(byteen_in)),
.DATAW($bits(valids_in) + $bits(tag_in) + $bits(addr_in) + $bits(rw_in) + $bits(byteen_in) + $bits(writedata_in)),
.SIZE(CREQ_SIZE)
) reqq_queue (
) req_queue (
.clk (clk),
.reset (reset),
.push (push_qual),
.data_in ({valids_in, rw_in, byteen_in, addr_in, writedata_in, tag_in}),
.pop (pop_qual),
.data_out ({out_per_valids, out_per_rw, out_per_byteen, out_per_addr, out_per_writedata, out_per_tag}),
.empty (o_empty),
.full (full),
.push (q_push),
.pop (q_pop),
.data_in ({valids_in, tag_in, addr_in, rw_in, byteen_in, writedata_in}),
.data_out ({q_valids, q_tag, q_addr, q_rw, q_byteen, q_writedata}),
.empty (q_empty),
.full (q_full),
`UNUSED_PIN (size)
);
wire[NUM_REQUESTS-1:0] real_out_per_valids = out_per_valids & {NUM_REQUESTS{~out_empty}};
if (NUM_REQUESTS > 1) begin
assign qual_valids = use_per_valids;
assign qual_addr = use_per_addr;
assign qual_writedata = use_per_writedata;
assign qual_tag = use_per_tag;
assign qual_rw = use_per_rw;
assign qual_byteen = use_per_byteen;
reg [CORE_TAG_WIDTH-1:0] sel_tag;
reg [`REQS_BITS-1:0] sel_tid;
reg [`WORD_ADDR_WIDTH-1:0] sel_addr;
reg sel_rw;
reg [WORD_SIZE-1:0] sel_byteen;
reg [`WORD_WIDTH-1:0] sel_writedata;
reg [$clog2(NUM_REQUESTS+1)-1:0] q_valids_cnt_r;
wire [$clog2(NUM_REQUESTS+1)-1:0] q_valids_cnt;
reg [NUM_REQUESTS-1:0] pop_mask;
reg fast_track;
wire sel_valid;
wire[`REQS_BITS-1:0] sel_idx;
VX_fixed_arbiter #(
.N(NUM_REQUESTS)
) sel_bank (
.clk (clk),
.reset (reset),
.requests (qual_valids),
.grant_valid (sel_valid),
.grant_index (sel_idx),
`UNUSED_PIN (grant_onehot)
);
assign q_push = push;
assign q_pop = pop && (q_valids_cnt_r == 1 || q_valids_cnt_r == 2) && !fast_track;
assign empty = !sel_valid;
assign tid_out = sel_idx;
assign byteen_out = qual_byteen[sel_idx];
assign addr_out = qual_addr[sel_idx];
assign writedata_out = qual_writedata[sel_idx];
if (CORE_TAG_ID_BITS != 0) begin
assign tag_out = qual_tag;
assign rw_out = qual_rw;
end else begin
assign tag_out = qual_tag[sel_idx];
assign rw_out = qual_rw[sel_idx];
end
wire [`REQS_BITS-1:0] sel_idx;
VX_fixed_arbiter #(
.N(NUM_REQUESTS)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (q_valids & ~pop_mask),
`UNUSED_PIN (grant_valid),
.grant_index (sel_idx),
`UNUSED_PIN (grant_onehot)
);
always @(posedge clk) begin
if (reset) begin
use_per_valids <= 0;
end else begin
if (pop_qual) begin
use_per_valids <= real_out_per_valids;
use_per_rw <= out_per_rw;
use_per_byteen <= out_per_byteen;
use_per_addr <= out_per_addr;
use_per_writedata <= out_per_writedata;
use_per_tag <= out_per_tag;
end else if (pop) begin
use_per_valids[sel_idx] <= 0;
VX_countones #(
.N(NUM_REQUESTS)
) counter (
.valids (q_valids),
.count (q_valids_cnt)
);
always @(posedge clk) begin
if (reset) begin
pop_mask <= 0;
fast_track <= 0;
q_valids_cnt_r <= 0;
end else begin
if (!q_empty
&& ((0 == q_valids_cnt_r) || (pop && fast_track))) begin
q_valids_cnt_r <= q_valids_cnt;
pop_mask <= (NUM_REQUESTS'(1) << sel_idx);
fast_track <= 0;
end else if (pop) begin
q_valids_cnt_r <= q_valids_cnt_r - 1;
fast_track <= (q_valids_cnt_r == 2);
if (q_valids_cnt_r == 1 || q_valids_cnt_r == 2) begin
pop_mask <= 0;
end else begin
pop_mask[sel_idx] <= 1;
end
end
if ((0 == q_valids_cnt_r) || pop) begin
sel_tid <= sel_idx;
sel_byteen <= q_byteen[sel_idx];
sel_addr <= q_addr[sel_idx];
sel_writedata <= q_writedata[sel_idx];
end
end
end
end
if (CORE_TAG_ID_BITS != 0) begin
always @(posedge clk) begin
if ((0 == q_valids_cnt_r) || pop) begin
sel_tag <= q_tag;
sel_rw <= q_rw;
end
end
end else begin
always @(posedge clk) begin
if ((0 == q_valids_cnt_r) || pop) begin
sel_tag <= q_tag[sel_idx];
sel_rw <= q_rw[sel_idx];
end
end
end
assign tag_out = sel_tag;
assign addr_out = sel_addr;
assign rw_out = sel_rw;
assign byteen_out = sel_byteen;
assign writedata_out = sel_writedata;
assign tid_out = sel_tid;
assign empty = (0 == q_valids_cnt_r);
assign full = q_full;
end else begin
`UNUSED_VAR (q_valids)
assign q_push = push;
assign q_pop = pop;
assign tag_out = q_tag;
assign addr_out = q_addr;
assign rw_out = q_rw;
assign byteen_out = q_byteen;
assign writedata_out = q_writedata;
assign tid_out = 0;
assign empty = q_empty;
assign full = q_full;
end
endmodule

View file

@ -178,15 +178,15 @@ module VX_cache_miss_resrv #(
`ifdef DBG_PRINT_CACHE_MSRQ
always @(posedge clk) begin
if (enqueue_st3 || schedule_st0 || dequeue_st3) begin
if (schedule_st0 || enqueue_st3 || dequeue_st3) begin
if (schedule_st0)
$display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(dequeue_addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0);
if (enqueue_st3) begin
if (enqueue_msrq_st3)
$display("%t: cache%0d:%0d msrq-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3);
else
$display("%t: cache%0d:%0d msrq-enq: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr_st3, BANK_ID), enqueue_ready_st3, debug_wid_st3, debug_pc_st3);
end
if (schedule_st0)
$display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(dequeue_addr_st0, BANK_ID), debug_wid_st0, debug_pc_st0);
if (dequeue_st3)
$display("%t: cache%0d:%0d msrq-deq addr%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, debug_wid_st3, debug_pc_st3);
$write("%t: cache%0d:%0d msrq-table", $time, CACHE_ID, BANK_ID);
@ -197,11 +197,6 @@ module VX_cache_miss_resrv #(
if (~ready_table[j]) $write("!");
$write("addr%0d=%0h", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
end
else if (schedule_ptr == $bits(schedule_ptr)'(j)) begin
$write(" *");
if (~ready_table[j]) $write("!");
$write("[addr%0d=%0h]", j, `LINE_TO_BYTE_ADDR(addr_table[j], BANK_ID));
end
end
$write("\n");
end

View file

@ -109,9 +109,6 @@ module VX_snp_forwarder #(
.grant_onehot (sel_1hot)
);
assign fwdin_valid = snp_fwdin_valid[sel_idx];
assign fwdin_tag = snp_fwdin_tag[sel_idx];
wire stall = fwdin_valid && ~fwdin_ready;
VX_generic_register #(

View file

@ -222,7 +222,7 @@ void Simulator::eval_snp_bus() {
--snp_req_size_;
++pending_snp_reqs_;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
#endif
} else {
vortex_->snp_req_valid = 0;
@ -296,7 +296,7 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
snp_req_active_ = true;
#ifdef DBG_PRINT_CACHE_SNP
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << std::dec << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
std::cout << timestamp << ": [sim] snp req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << snp_req_size_ << std::endl;
#endif
}