round robin arbiter + auto buffered queue + fixed dcache arbiter

This commit is contained in:
Blaise Tine 2020-06-20 17:56:04 -04:00
parent 9c157e4929
commit d3440de403
30 changed files with 339 additions and 209 deletions

View file

@ -10,7 +10,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CACHE_SNP \
-DDBG_PRINT_CACHE_MSRQ \
-DDBG_PRINT_DRAM \
-DDBG_PRINT_WB \
-DDBG_PRINT_PIPELINE \
-DDBG_PRINT_OPAE
#DBG_PRINT=$(DBG_PRINT_FLAGS)

View file

@ -10,7 +10,7 @@ QI:vortex_afu.qsf
#+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_WB
#+define+DBG_PRINT_PIPELINE
#+define+DBG_PRINT_OPAE
#+define+DBG_PRINT_SCOPE
@ -27,13 +27,13 @@ QI:vortex_afu.qsf
../rtl/cache/VX_cache_config.vh
../rtl/cache/VX_cache.v
../rtl/cache/VX_bank.v
../rtl/cache/VX_cache_core_rsp_merge.v
../rtl/cache/VX_cache_core_req_bank_sel.v
../rtl/cache/VX_cache_dram_req_arb.v
../rtl/cache/VX_cache_dfq_queue.v
../rtl/cache/VX_cache_req_queue.v
../rtl/cache/VX_cache_dram_fill_arb.v
../rtl/cache/VX_cache_miss_resrv.v
../rtl/cache/VX_bank.v
../rtl/cache/VX_bank_core_req_arb.v
../rtl/cache/VX_snp_rsp_arb.v
../rtl/cache/VX_tag_data_access.v
../rtl/cache/VX_tag_data_structure.v
@ -66,10 +66,11 @@ QI:vortex_afu.qsf
../rtl/libs/VX_mult.v
../rtl/libs/VX_divide.v
../rtl/libs/VX_generic_stack.v
../rtl/libs/VX_generic_priority_encoder.v
../rtl/libs/VX_priority_encoder.v
../rtl/libs/VX_generic_queue.v
../rtl/libs/VX_indexable_queue.v
../rtl/libs/VX_fixed_arbiter.v
../rtl/libs/VX_rr_arbiter.v
../rtl/libs/VX_countones.v
../rtl/libs/VX_scope.v

View file

@ -1,48 +1,48 @@
`include "VX_define.vh"
module VX_dcache_arb (
input wire io_select,
input wire req_select,
// Core request
VX_cache_core_req_if core_req_if,
// input request
VX_cache_core_req_if in_core_req_if,
// Dcache request
VX_cache_core_req_if core_dcache_req_if,
// output 0 request
VX_cache_core_req_if out0_core_req_if,
// I/O request
VX_cache_core_req_if core_io_req_if,
// output 1 request
VX_cache_core_req_if out1_core_req_if,
// Dcache response
VX_cache_core_rsp_if core_dcache_rsp_if,
// input 0 response
VX_cache_core_rsp_if in0_core_rsp_if,
// I/O response
VX_cache_core_rsp_if core_io_rsp_if,
// input 1 response
VX_cache_core_rsp_if in1_core_rsp_if,
// Core response
VX_cache_core_rsp_if core_rsp_if
// output response
VX_cache_core_rsp_if out_core_rsp_if
);
assign core_dcache_req_if.core_req_valid = core_req_if.core_req_valid & {`NUM_THREADS{~io_select}};
assign core_dcache_req_if.core_req_rw = core_req_if.core_req_rw;
assign core_dcache_req_if.core_req_byteen = core_req_if.core_req_byteen;
assign core_dcache_req_if.core_req_addr = core_req_if.core_req_addr;
assign core_dcache_req_if.core_req_data = core_req_if.core_req_data;
assign core_dcache_req_if.core_req_tag = core_req_if.core_req_tag;
assign out0_core_req_if.core_req_valid = in_core_req_if.core_req_valid & {`NUM_THREADS{~req_select}};
assign out0_core_req_if.core_req_rw = in_core_req_if.core_req_rw;
assign out0_core_req_if.core_req_byteen = in_core_req_if.core_req_byteen;
assign out0_core_req_if.core_req_addr = in_core_req_if.core_req_addr;
assign out0_core_req_if.core_req_data = in_core_req_if.core_req_data;
assign out0_core_req_if.core_req_tag = in_core_req_if.core_req_tag;
assign core_io_req_if.core_req_valid = core_req_if.core_req_valid & {`NUM_THREADS{io_select}};
assign core_io_req_if.core_req_rw = core_req_if.core_req_rw;
assign core_io_req_if.core_req_byteen = core_req_if.core_req_byteen;
assign core_io_req_if.core_req_addr = core_req_if.core_req_addr;
assign core_io_req_if.core_req_data = core_req_if.core_req_data;
assign core_io_req_if.core_req_tag = core_req_if.core_req_tag;
assign out1_core_req_if.core_req_valid = in_core_req_if.core_req_valid & {`NUM_THREADS{req_select}};
assign out1_core_req_if.core_req_rw = in_core_req_if.core_req_rw;
assign out1_core_req_if.core_req_byteen = in_core_req_if.core_req_byteen;
assign out1_core_req_if.core_req_addr = in_core_req_if.core_req_addr;
assign out1_core_req_if.core_req_data = in_core_req_if.core_req_data;
assign out1_core_req_if.core_req_tag = in_core_req_if.core_req_tag;
assign core_req_if.core_req_ready = io_select ? core_io_req_if.core_req_ready : core_dcache_req_if.core_req_ready;
assign in_core_req_if.core_req_ready = req_select ? out1_core_req_if.core_req_ready : out0_core_req_if.core_req_ready;
wire dcache_rsp_valid = (| core_dcache_rsp_if.core_rsp_valid);
wire rsp_select0 = (| in0_core_rsp_if.core_rsp_valid);
assign core_rsp_if.core_rsp_valid = dcache_rsp_valid ? core_dcache_rsp_if.core_rsp_valid : core_io_rsp_if.core_rsp_valid;
assign core_rsp_if.core_rsp_data = dcache_rsp_valid ? core_dcache_rsp_if.core_rsp_data : core_io_rsp_if.core_rsp_data;
assign core_rsp_if.core_rsp_tag = dcache_rsp_valid ? core_dcache_rsp_if.core_rsp_tag : core_io_rsp_if.core_rsp_tag;
assign core_dcache_rsp_if.core_rsp_ready = core_rsp_if.core_rsp_ready;
assign core_io_rsp_if.core_rsp_ready = core_rsp_if.core_rsp_ready && ~dcache_rsp_valid;
assign out_core_rsp_if.core_rsp_valid = rsp_select0 ? in0_core_rsp_if.core_rsp_valid : in1_core_rsp_if.core_rsp_valid;
assign out_core_rsp_if.core_rsp_data = rsp_select0 ? in0_core_rsp_if.core_rsp_data : in1_core_rsp_if.core_rsp_data;
assign out_core_rsp_if.core_rsp_tag = rsp_select0 ? in0_core_rsp_if.core_rsp_tag : in1_core_rsp_if.core_rsp_tag;
assign in0_core_rsp_if.core_rsp_ready = out_core_rsp_if.core_rsp_ready && rsp_select0;
assign in1_core_rsp_if.core_rsp_ready = out_core_rsp_if.core_rsp_ready && ~rsp_select0;
endmodule

View file

@ -210,7 +210,7 @@
////////////////////////// SM Configurable Knobs //////////////////////////////
// Cache ID
`define SCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 3)
`define SCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 2)
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define SNUM_REQUESTS `NUM_THREADS

View file

@ -71,12 +71,12 @@ module VX_exec_unit (
wire jal_branch_found_valid;
`DEBUG_END
VX_generic_priority_encoder #(
VX_priority_encoder #(
.N(`NUM_THREADS)
) choose_alu_result (
.valids(exec_unit_req_if.valid),
.index (jal_branch_use_index),
.found (jal_branch_found_valid)
.data_in (exec_unit_req_if.valid),
.data_out (jal_branch_use_index),
.valid_out (jal_branch_found_valid)
);
wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index];

View file

@ -97,10 +97,10 @@ module VX_icache_stage #(
`ifdef DBG_PRINT_CORE_ICACHE
always @(posedge clk) begin
if (icache_req_if.core_req_valid && icache_req_if.core_req_ready) begin
$display("%t: I%01d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
$display("%t: I%0d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, mrq_write_addr, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
end
if (icache_rsp_if.core_rsp_valid && icache_rsp_if.core_rsp_ready) begin
$display("%t: I%01d$ rsp: tag=%0h, pc=%0h, warp=%0d, instr=%0h", $time, CORE_ID, mrq_read_addr, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
$display("%t: I%0d$ rsp: tag=%0h, pc=%0h, warp=%0d, instr=%0h", $time, CORE_ID, mrq_read_addr, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
end
end
`endif

View file

@ -181,11 +181,11 @@ module VX_lsu_unit #(
`ifdef DBG_PRINT_CORE_DCACHE
always @(posedge clk) begin
if ((| dcache_req_if.core_req_valid) && dcache_req_if.core_req_ready) begin
$display("%t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h",
$display("%t: D%0d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, byteen=%0h, data=%0h",
$time, CORE_ID, use_valid, use_address, mrq_write_addr, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, mem_req_byteen, mem_req_data);
end
if ((| dcache_rsp_if.core_rsp_valid) && dcache_rsp_if.core_rsp_ready) begin
$display("%t: D%01d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h",
$display("%t: D%0d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h",
$time, CORE_ID, mem_wb_if.valid, mrq_read_addr, mem_wb_if.curr_PC, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
end
end

View file

@ -65,13 +65,16 @@ module VX_mem_arb #(
reg [REQS_BITS-1:0] bus_req_sel;
always @(posedge clk) begin
if (reset) begin
bus_req_sel <= 0;
end else begin
bus_req_sel <= bus_req_sel + 1;
end
end
VX_rr_arbiter #(
.N(NUM_REQUESTS)
) arbiter (
.clk (clk),
.reset (reset),
.requests (in_mem_req_valid),
.grant_index (bus_req_sel),
`UNUSED_PIN (grant_valid),
`UNUSED_PIN (grant_onehot)
);
assign out_mem_req_valid = in_mem_req_valid [bus_req_sel];
assign out_mem_req_rw = in_mem_req_rw [bus_req_sel];
@ -93,7 +96,7 @@ module VX_mem_arb #(
assign in_mem_rsp_data[i] = out_mem_rsp_data;
assign in_mem_rsp_tag[i] = out_mem_rsp_tag[REQS_BITS +: TAG_IN_WIDTH];
end
assign out_mem_rsp_ready = out_mem_rsp_valid ? in_mem_rsp_ready[bus_rsp_sel] : 0;
assign out_mem_rsp_ready = in_mem_rsp_ready[bus_rsp_sel];
end

View file

@ -40,17 +40,18 @@ module VX_mem_unit # (
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS)
) core_dcache_rsp_qual_if(), core_smem_rsp_if();
// use "case equality" to handle uninitialized entry
wire smem_select = (({core_dcache_req_if.core_req_addr[0], 2'b0} >= `SHARED_MEM_BASE_ADDR) === 1'b1);
// select shared memory address
wire is_smem_addr = (({core_dcache_req_if.core_req_addr[0], 2'b0} - `SHARED_MEM_BASE_ADDR) <= `SCACHE_SIZE);
wire smem_select = (| core_dcache_req_if.core_req_valid) ? is_smem_addr : 0;
VX_dcache_arb dcache_smem_arb (
.io_select (smem_select),
.core_req_if (core_dcache_req_if),
.core_dcache_req_if (core_dcache_req_qual_if),
.core_io_req_if (core_smem_req_if),
.core_dcache_rsp_if (core_dcache_rsp_qual_if),
.core_io_rsp_if (core_smem_rsp_if),
.core_rsp_if (core_dcache_rsp_if)
.req_select (smem_select),
.in_core_req_if (core_dcache_req_if),
.out0_core_req_if (core_dcache_req_qual_if),
.out1_core_req_if (core_smem_req_if),
.in0_core_rsp_if (core_dcache_rsp_qual_if),
.in1_core_rsp_if (core_smem_rsp_if),
.out_core_rsp_if (core_dcache_rsp_if)
);
VX_cache #(

View file

@ -180,10 +180,13 @@ module VX_pipeline #(
`SCOPE_ASSIGN(scope_exec_delay, exec_delay);
`SCOPE_ASSIGN(scope_gpr_stage_delay, gpr_stage_delay);
`ifdef DBG_PRINT_WB
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if ((| writeback_if.valid) && (writeback_if.wb != 0)) begin
$display("%t: Writeback: wid=%0d, rd=%0d, data=%0h", $time, writeback_if.warp_num, writeback_if.rd, writeback_if.data);
$display("%t: Core%0d-WB: warp=%0d, rd=%0d, data=%0h", $time, CORE_ID, writeback_if.warp_num, writeback_if.rd, writeback_if.data);
end
if (schedule_delay || memory_delay || exec_delay || gpr_stage_delay) begin
$display("%t: Core%0d-Delay: sched=%b, mem=%b, exec=%b, gpr=%b ", $time, CORE_ID, schedule_delay, memory_delay, exec_delay, gpr_stage_delay);
end
end
`endif

View file

@ -308,12 +308,15 @@ module VX_warp_sched (
assign use_active = (count_visible_active != 0) ? visible_active : (warp_active & (~warp_stalled) & (~total_barrier_stall) & (~warp_lock));
// Choosing a warp to schedule
VX_priority_encoder #(
VX_rr_arbiter #(
.N(`NUM_WARPS)
) choose_schedule (
.valids (use_active),
.index (warp_to_schedule),
.found (schedule)
.clk (clk),
.reset (reset),
.requests (use_active),
.grant_index (warp_to_schedule),
.grant_valid (schedule),
`UNUSED_PIN (grant_onehot)
);
// always @(*) begin

View file

@ -261,17 +261,18 @@ module Vortex #(
.icache_dram_rsp_if (icache_dram_rsp_if)
);
// use "case equality" to handle uninitialized address value
wire io_select = (({core_dcache_req_if.core_req_addr[0], 2'b0} >= `IO_BUS_BASE_ADDR) === 1'b1);
// select io address
wire is_io_addr = ({core_dcache_req_if.core_req_addr[0], 2'b0} >= `IO_BUS_BASE_ADDR);
wire io_select = (| core_dcache_req_if.core_req_valid) ? is_io_addr : 0;
VX_dcache_arb dcache_io_arb (
.io_select (io_select),
.core_req_if (core_dcache_req_if),
.core_dcache_req_if (arb_dcache_req_if),
.core_io_req_if (arb_io_req_if),
.core_dcache_rsp_if (arb_dcache_rsp_if),
.core_io_rsp_if (arb_io_rsp_if),
.core_rsp_if (core_dcache_rsp_if)
.req_select (io_select),
.in_core_req_if (core_dcache_req_if),
.out0_core_req_if (arb_dcache_req_if),
.out1_core_req_if (arb_io_req_if),
.in0_core_rsp_if (arb_dcache_rsp_if),
.in1_core_rsp_if (arb_io_rsp_if),
.out_core_rsp_if (core_dcache_rsp_if)
);
endmodule // Vortex

View file

@ -192,13 +192,13 @@ module VX_bank #(
wire [`WORD_WIDTH-1:0] reqq_req_writeword_st0;
wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0;
VX_cache_req_queue #(
VX_bank_core_req_arb #(
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.CREQ_SIZE (CREQ_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) req_queue (
) core_req_arb (
.clk (clk),
.reset (reset),
// Enqueue
@ -722,25 +722,25 @@ module VX_bank #(
`ifdef DBG_PRINT_CACHE_BANK
always @(posedge clk) begin
if ((|core_req_valid) && core_req_ready) begin
$display("%t: bank%0d-%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr, BANK_ID), core_req_tag);
$display("%t: bank%0d:%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr[0], BANK_ID), core_req_tag);
end
if (core_rsp_valid && core_rsp_ready) begin
$display("%t: bank%0d-%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
$display("%t: bank%0d:%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
end
if (dram_fill_req_valid && dram_fill_req_ready) begin
$display("%t: bank%0d-%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
$display("%t: bank%0d:%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
end
if (dram_wb_req_firevalid && dram_wb_req_ready) begin
$display("%t: bank%0d-%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
if (dram_wb_req_valid && dram_wb_req_ready) begin
$display("%t: bank%0d:%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
end
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
$display("%t: bank%0d-%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
$display("%t: bank%0d:%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
end
if (snp_req_valid && snp_req_ready) begin
$display("%t: bank%0d-%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
$display("%t: bank%0d:%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: bank%0d-%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
$display("%t: bank%0d:%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
end
end
`endif

View file

@ -1,6 +1,6 @@
`include "VX_cache_config.vh"
module VX_cache_req_queue #(
module VX_bank_core_req_arb #(
// Size of a word in bytes
parameter WORD_SIZE = 0,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
@ -95,12 +95,15 @@ module VX_cache_req_queue #(
wire[`REQS_BITS-1:0] qual_request_index;
wire qual_has_request;
VX_generic_priority_encoder #(
VX_fixed_arbiter #(
.N(NUM_REQUESTS)
) sel_bank (
.valids(qual_valids),
.index (qual_request_index),
.found (qual_has_request)
.clk (clk),
.reset (reset),
.requests (qual_valids),
.grant_index (qual_request_index),
.grant_valid (qual_has_request),
`UNUSED_PIN (grant_onehot)
);
assign reqq_empty = !qual_has_request;

View file

@ -458,22 +458,26 @@ module VX_cache #(
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) cache_core_rsp_merge (
.clk (clk),
.reset (reset),
.per_bank_core_rsp_tid (per_bank_core_rsp_tid),
.per_bank_core_rsp_valid (per_bank_core_rsp_valid),
.per_bank_core_rsp_data (per_bank_core_rsp_data),
.per_bank_core_rsp_tag (per_bank_core_rsp_tag),
.per_bank_core_rsp_ready (per_bank_core_rsp_ready),
.core_rsp_valid (core_rsp_valid),
.core_rsp_data (core_rsp_data),
.core_rsp_tag (core_rsp_tag),
.core_rsp_ready (core_rsp_ready)
.core_rsp_valid (core_rsp_valid),
.core_rsp_data (core_rsp_data),
.core_rsp_tag (core_rsp_tag),
.core_rsp_ready (core_rsp_ready)
);
VX_snp_rsp_arb #(
.NUM_BANKS (NUM_BANKS),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
) snp_rsp_arb (
) snp_rsp_arb (
.clk (clk),
.reset (reset),
.per_bank_snp_rsp_valid (per_bank_snp_rsp_valid),
.per_bank_snp_rsp_tag (per_bank_snp_rsp_tag),
.per_bank_snp_rsp_ready (per_bank_snp_rsp_ready),
@ -486,6 +490,5 @@ module VX_cache #(
`SCOPE_ASSIGN(scope_idram_req_ready, dram_fill_req_ready);
`SCOPE_ASSIGN(scope_idram_rsp_valid, per_bank_core_rsp_valid[0]);
`SCOPE_ASSIGN(scope_idram_rsp_ready, per_bank_core_rsp_ready[0]);
endmodule

View file

@ -72,6 +72,6 @@
`define LINE_TO_DRAM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
`define LINE_TO_BYTE_ADDR(x, i) {x, (((`BANK_SELECT_BITS + `BASE_ADDR_BITS)'(i)) << `BASE_ADDR_BITS)}
`define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))}
`endif

View file

@ -12,6 +12,9 @@ module VX_cache_core_rsp_merge #(
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
) (
input wire clk,
input wire reset,
// Per Bank WB
input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
@ -32,12 +35,15 @@ module VX_cache_core_rsp_merge #(
wire [`BANK_BITS-1:0] main_bank_index;
VX_generic_priority_encoder #(
VX_fixed_arbiter #(
.N(NUM_BANKS)
) sel_bank (
.valids(per_bank_core_rsp_valid),
.index (main_bank_index),
`UNUSED_PIN (found)
.clk (clk),
.reset (reset),
.requests (per_bank_core_rsp_valid),
.grant_index (main_bank_index),
`UNUSED_PIN (grant_valid),
`UNUSED_PIN (grant_onehot)
);
integer i;

View file

@ -1,6 +1,6 @@
`include "VX_cache_config.vh"
module VX_cache_dfq_queue #(
module VX_cache_dram_fill_arb #(
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
@ -60,12 +60,15 @@ module VX_cache_dfq_queue #(
wire[`BANK_BITS-1:0] qual_request_index;
wire qual_has_request;
VX_generic_priority_encoder #(
VX_fixed_arbiter #(
.N(NUM_BANKS)
) sel_bank (
.valids(use_per_bqual_bank_dram_fill_req_valid),
.index (qual_request_index),
.found (qual_has_request)
.clk (clk),
.reset (reset),
.requests (use_per_bqual_bank_dram_fill_req_valid),
.grant_index (qual_request_index),
.grant_valid (qual_has_request),
`UNUSED_PIN (grant_onehot)
);
assign dfqq_empty = !qual_has_request;

View file

@ -74,11 +74,11 @@ module VX_cache_dram_req_arb #(
wire dfqq_push = (| per_bank_dram_fill_req_valid);
wire dfqq_full;
VX_cache_dfq_queue #(
VX_cache_dram_fill_arb #(
.BANK_LINE_SIZE(BANK_LINE_SIZE),
.NUM_BANKS(NUM_BANKS),
.DFQQ_SIZE(DFQQ_SIZE)
) cache_dfq_queue (
) dram_fill_arb (
.clk (clk),
.reset (reset),
.dfqq_push (dfqq_push),
@ -95,12 +95,15 @@ module VX_cache_dram_req_arb #(
wire [`BANK_BITS-1:0] dwb_bank;
VX_generic_priority_encoder #(
VX_fixed_arbiter #(
.N(NUM_BANKS)
) sel_dwb (
.valids(per_bank_dram_wb_req_valid),
.index (dwb_bank),
.found (dwb_valid)
.clk (clk),
.reset (reset),
.requests (per_bank_dram_wb_req_valid),
.grant_index (dwb_bank),
.grant_valid (dwb_valid),
`UNUSED_PIN (grant_onehot)
);
genvar i;

View file

@ -81,7 +81,7 @@ module VX_cache_miss_resrv #(
genvar i;
generate
for (i = 0; i < MRVQ_SIZE; i++) begin
assign valid_address_match[i] = valid_table[i] && (addr_table[i] === fill_addr_st1);
assign valid_address_match[i] = valid_table[i] ? (addr_table[i] == fill_addr_st1) : 0;
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
end
endgenerate
@ -155,7 +155,7 @@ module VX_cache_miss_resrv #(
integer j;
always @(posedge clk) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
$write("%t: bank%0d-%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
$write("%t: bank%0d:%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
for (j = 0; j < MRVQ_SIZE; j++) begin
if (valid_table[j]) begin
$write(" ");

View file

@ -40,8 +40,7 @@ module VX_snp_forwarder #(
`STATIC_ASSERT(NUM_REQUESTS > 1, "invalid value");
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];
reg [`REQS_BITS-1:0] fwdin_sel;
wire [`LOG2UP(SNRQ_SIZE)-1:0] sfq_write_addr, sfq_read_addr, dbg_sfq_write_addr;
wire sfq_push, sfq_pop, sfq_full;
@ -100,13 +99,18 @@ module VX_snp_forwarder #(
assign snp_req_ready = !sfq_full && fwdout_ready;
always @(posedge clk) begin
if (reset) begin
fwdin_sel <= 0;
end else if (NUM_REQUESTS > 1) begin
fwdin_sel <= fwdin_sel + 1;
end
end
reg [`REQS_BITS-1:0] fwdin_sel;
VX_fixed_arbiter #(
.N(NUM_REQUESTS)
) arbiter (
.clk (clk),
.reset (reset),
.requests (snp_fwdin_valid),
.grant_index (fwdin_sel),
`UNUSED_PIN (grant_valid),
`UNUSED_PIN (grant_onehot)
);
assign fwdin_valid = snp_fwdin_valid[fwdin_sel];
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
@ -124,7 +128,7 @@ module VX_snp_forwarder #(
$display("%t: cache%0d snp fwd_out: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `DRAM_TO_BYTE_ADDR(snp_fwdout_addr[0]), snp_fwdout_invalidate[0], snp_fwdout_tag[0]);
end
if (fwdin_valid && fwdin_ready) begin
$display("%t: cache%0d snp fwd_in[%01d]: tag=%0h", $time, CACHE_ID, fwdin_sel, fwdin_tag);
$display("%t: cache%0d snp fwd_in[%0d]: tag=%0h", $time, CACHE_ID, fwdin_sel, fwdin_tag);
end
if (snp_rsp_valid && snp_rsp_ready) begin
$display("%t: cache%0d snp rsp: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag);

View file

@ -5,6 +5,9 @@ module VX_snp_rsp_arb #(
parameter BANK_LINE_SIZE = 0,
parameter SNP_REQ_TAG_WIDTH = 0
) (
input wire clk,
input wire reset,
input wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid,
input wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag,
output wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready,
@ -17,12 +20,15 @@ module VX_snp_rsp_arb #(
wire [`BANK_BITS-1:0] fsq_bank;
wire fsq_valid;
VX_generic_priority_encoder #(
VX_fixed_arbiter #(
.N(NUM_BANKS)
) sel_ffsq (
.valids (per_bank_snp_rsp_valid),
.index (fsq_bank),
.found (fsq_valid)
.clk (clk),
.reset (reset),
.requests (per_bank_snp_rsp_valid),
.grant_index (fsq_bank),
.grant_valid (fsq_valid),
`UNUSED_PIN (grant_onehot)
);
assign snp_rsp_valid = fsq_valid;

View file

@ -77,9 +77,8 @@ module VX_tag_data_access #(
wire invalidate_line;
wire tags_match;
wire real_writefill = writefill_st1e
&& ((valid_req_st1e && !use_read_valid_st1e)
|| (valid_req_st1e && use_read_valid_st1e && !tags_match));
wire real_writefill = valid_req_st1e && writefill_st1e
&& ((!use_read_valid_st1e) || (use_read_valid_st1e && !tags_match));
wire[`TAG_SELECT_BITS-1:0] writetag_st1e = writeaddr_st1e[`TAG_LINE_ADDR_RNG];
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0];
@ -173,7 +172,7 @@ module VX_tag_data_access #(
assign use_write_data = data_write;
// use "case equality" to handle uninitialized tag when block entry is not valid
assign tags_match = ((writetag_st1e == use_read_tag_st1e) === 1'b1);
assign tags_match = (writetag_st1e === use_read_tag_st1e);
wire snoop_hit_no_pending = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && (use_read_dirty_st1e || snp_invalidate_st1e) && !force_request_miss_st1e;
wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e;

View file

@ -0,0 +1,43 @@
`include "VX_define.vh"
module VX_fixed_arbiter #(
parameter N = 0
) (
input wire clk,
input wire reset,
input wire [N-1:0] requests,
output wire [`LOG2UP(N)-1:0] grant_index,
output wire [N-1:0] grant_onehot,
output wire grant_valid
);
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
if (N == 1) begin
assign grant_index = 0;
assign grant_onehot = requests;
assign grant_valid = requests[0];
end else begin
reg [N-1:0] grant_onehot_r;
VX_priority_encoder # (
.N(N)
) priority_encoder (
.data_in (requests),
.data_out (grant_index),
.valid_out (grant_valid)
);
always @(*) begin
grant_onehot_r = N'(0);
grant_onehot_r[grant_index] = 1;
end
assign grant_onehot = grant_onehot_r;
end
endmodule

View file

@ -1,22 +0,0 @@
`include "VX_define.vh"
module VX_generic_priority_encoder #(
parameter N = 1
) (
input wire[N-1:0] valids,
output reg[(`LOG2UP(N))-1:0] index,
output reg found
);
integer i;
always @(*) begin
index = 0;
found = 0;
for (i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin
index = i[(`LOG2UP(N))-1:0];
found = 1;
end
end
end
endmodule

View file

@ -3,7 +3,7 @@
module VX_generic_queue #(
parameter DATAW,
parameter SIZE = 16,
parameter BUFFERED_OUTPUT = 1
parameter BUFFERED_OUTPUT = (SIZE > 8)
) (
input wire clk,
input wire reset,

View file

@ -5,49 +5,67 @@ module VX_matrix_arbiter #(
) (
input wire clk,
input wire reset,
input wire [N-1:0] requests,
output wire grant_valid,
input wire [N-1:0] requests,
output wire [`LOG2UP(N)-1:0] grant_index,
output wire [N-1:0] grant_onehot,
output wire [`LOG2UP(N)-1:0] grant_index
output wire grant_valid
);
reg [N-1:0] state [0:N-1];
wire [N-1:0] dis [0:N-1];
if (N == 1) begin
genvar i, j;
for (i = 0; i < N; ++i) begin
for (j = i + 1; j < N; ++j) begin
always @(posedge clk) begin
if (reset) begin
state[i][j] <= 0;
end else begin
state[i][j] <= (state[i][j] || grant_onehot[j]) && ~grant_onehot[i];
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign grant_index = 0;
assign grant_onehot = requests;
assign grant_valid = requests[0];
end else begin
reg [N-1:1] state [0:N-1];
wire [N-1:0] pri [0:N-1];
genvar i, j;
for (i = 0; i < N; ++i) begin
for (j = 0; j < N; ++j) begin
if (j > i) begin
assign pri[j][i] = requests[i] & state[i][j];
end
else if (j < i) begin
assign pri[j][i] = requests[i] & ~state[j][i];
end
else begin
assign pri[j][i] = 0;
end
end
assign grant_onehot[i] = requests[i] & ~(| pri[i]);
end
for (i = 0; i < N; ++i) begin
for (j = i + 1; j < N; ++j) begin
always @(posedge clk) begin
if (reset) begin
state[i][j] <= 0;
end
else begin
state[i][j] <= (state[i][j] || grant_onehot[j]) && ~grant_onehot[i];
end
end
end
end
end
for (i = 0; i < N; ++i) begin
for (j = 0; j < N; ++j) begin
if (j > i) begin
assign dis[j][i] = requests[i] & state[i][j];
end else if (j < i) begin
assign dis[j][i] = requests[i] & ~state[j][i];
end else begin
assign dis[j][i] = 0;
end
end
VX_encoder_onehot #(
.N(N)
) encoder (
.onehot (grant_onehot),
`UNUSED_PIN (valid),
.value (grant_index)
);
assign grant_valid = (| requests);
assign grant_onehot[i] = requests[i] & ~(| dis[i]);
end
VX_encoder_onehot #(
.N(N)
) encoder (
.onehot(grant_onehot),
.valid(grant_valid),
.value(grant_index)
);
endmodule

View file

@ -3,26 +3,20 @@
module VX_priority_encoder #(
parameter N
) (
input wire [N-1:0] valids,
output wire [`LOG2UP(N)-1:0] index,
output wire found
input wire [N-1:0] data_in,
output reg [`LOG2UP(N)-1:0] data_out,
output reg valid_out
);
reg [`LOG2UP(N)-1:0] index_r;
reg found_r;
integer i;
always @(*) begin
index_r = 0;
found_r = 0;
for (i = `NUM_WARPS-1; i >= 0; i = i - 1) begin
if (valids[i]) begin
index_r = `NW_BITS'(i);
found_r = 1;
data_out = 0;
valid_out = 0;
for (i = N-1; i >= 0; i = i - 1) begin
if (data_in[i]) begin
data_out = `LOG2UP(N)'(i);
valid_out = 1;
end
end
end
assign index = index_r;
assign found = found_r;
endmodule

View file

@ -0,0 +1,58 @@
`include "VX_define.vh"
module VX_rr_arbiter #(
parameter N = 0
) (
input wire clk,
input wire reset,
input wire [N-1:0] requests,
output wire [`LOG2UP(N)-1:0] grant_index,
output wire [N-1:0] grant_onehot,
output wire grant_valid
);
if (N == 1) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign grant_index = 0;
assign grant_onehot = requests;
assign grant_valid = requests[0];
end else begin
reg [`CLOG2(N)-1:0] grant_table [0:N-1];
reg [`CLOG2(N)-1:0] state;
reg [N-1:0] grant_onehot_r;
integer i, j;
always @(*) begin
for (i = 0; i < N; ++i) begin
grant_table[i] = `CLOG2(N)'(i);
for (j = 0; j < N; ++j) begin
if (requests[(i+j) % N]) begin
grant_table[i] = `CLOG2(N)'((i+j) % N);
end
end
end
grant_onehot_r = N'(0);
grant_onehot_r[grant_index] = 1;
end
always @(posedge clk) begin
if (reset) begin
state <= 0;
end
else begin
state <= grant_index;
end
end
assign grant_index = grant_table[state];
assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests);
end
endmodule

View file

@ -9,7 +9,7 @@ DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CACHE_SNP \
-DDBG_PRINT_CACHE_MSRQ \
-DDBG_PRINT_DRAM \
-DDBG_PRINT_WB \
-DDBG_PRINT_PIPELINE \
-DDBG_PRINT_OPAE
#DBG_PRINT=$(DBG_PRINT_FLAGS)