snooping response handling

This commit is contained in:
Blaise Tine 2020-05-11 22:55:44 -04:00
parent b6c4aa0baa
commit c49f01b769
36 changed files with 848 additions and 456 deletions

View file

@ -5,9 +5,9 @@ CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#DEBUG = 1
DEBUG = 1
CFLAGS += -fPIC

View file

@ -3,6 +3,6 @@
#define DEV_MEM_SRC_ADDR 0x10000000
#define DEV_MEM_DST_ADDR 0x20000000
#define NUM_BLOCKS 16
#define NUM_BLOCKS 1
#endif

Binary file not shown.

View file

@ -111,7 +111,7 @@
`define DDFPQ_SIZE 32
`endif
// Snoop Req Queue
// Snoop Req Queue Size
`ifndef DSNRQ_SIZE
`define DSNRQ_SIZE 32
`endif
@ -136,9 +136,9 @@
`define DLLVQ_SIZE 0
`endif
// Fill Forward SNP Queue
`ifndef DFFSQ_SIZE
`define DFFSQ_SIZE 32
// Snoop Rsp Queue Size
`ifndef DSRPQ_SIZE
`define DSRPQ_SIZE 32
`endif
// Prefetcher
@ -197,7 +197,7 @@
`define IDFPQ_SIZE 32
`endif
// Snoop Req Queue
// Snoop Req Queue Size
`ifndef ISNRQ_SIZE
`define ISNRQ_SIZE 32
`endif
@ -222,9 +222,9 @@
`define ILLVQ_SIZE 16
`endif
// Fill Forward SNP Queue
`ifndef IFFSQ_SIZE
`define IFFSQ_SIZE 8
// Snoop Rsp Queue Size
`ifndef ISRPQ_SIZE
`define ISRPQ_SIZE 8
`endif
// Prefetcher
@ -283,7 +283,7 @@
`define SDFPQ_SIZE 0
`endif
// Snoop Req Queue
// Snoop Req Queue Size
`ifndef SSNRQ_SIZE
`define SSNRQ_SIZE 16
`endif
@ -308,9 +308,9 @@
`define SLLVQ_SIZE 16
`endif
// Fill Forward SNP Queue
`ifndef SFFSQ_SIZE
`define SFFSQ_SIZE 16
// Snoop Rsp Queue Size
`ifndef SSRPQ_SIZE
`define SSRPQ_SIZE 16
`endif
// Prefetcher
@ -369,7 +369,7 @@
`define L2DFPQ_SIZE 32
`endif
// Snoop Req Queue
// Snoop Req Queue Size
`ifndef L2SNRQ_SIZE
`define L2SNRQ_SIZE 32
`endif
@ -394,9 +394,9 @@
`define L2LLVQ_SIZE 32
`endif
// Fill Forward SNP Queue
`ifndef L2FFSQ_SIZE
`define L2FFSQ_SIZE 32
// Snoop Rsp Queue Size
`ifndef L2SRPQ_SIZE
`define L2SRPQ_SIZE 32
`endif
// Prefetcher
@ -455,7 +455,7 @@
`define L3DFPQ_SIZE 32
`endif
// Snoop Req Queue
// Snoop Req Queue Size
`ifndef L3SNRQ_SIZE
`define L3SNRQ_SIZE 32
`endif
@ -480,9 +480,9 @@
`define L3LLVQ_SIZE 0
`endif
// Fill Forward SNP Queue
`ifndef L3FFSQ_SIZE
`define L3FFSQ_SIZE 8
// Snoop Rsp Queue Size
`ifndef L3SRPQ_SIZE
`define L3SRPQ_SIZE 8
`endif
// Prefetcher

View file

@ -68,7 +68,7 @@ module VX_csr_pipe #(
assign csr_wb_if.wb = wb_s2;
genvar i;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
for (i = 0; i < `NUM_THREADS; i++) begin
assign csr_wb_if.data[i] = (csr_address_s2 == `CSR_LTID) ? i :
(csr_address_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
csr_read_data_s2;

View file

@ -11,11 +11,11 @@ module VX_csr_wrapper (
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : thread_ids_init
for (i = 0; i < `NUM_THREADS; i++) begin : thread_ids_init
assign thread_ids[i] = i;
end
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : warp_ids_init
for (i = 0; i < `NUM_THREADS; i++) begin : warp_ids_init
assign warp_ids[i] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
end
endgenerate

View file

@ -29,6 +29,11 @@
if (!(cond)) $error(msg); \
endgenerate
`define UNUSED(x) \
`IGNORE_WARNINGS_BEGIN \
if (x != 0) begin end \
`IGNORE_WARNINGS_END
`define CLOG2(x) $clog2(x)
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > x) ? 1 : 0))
`define LOG2UP(x) ((x > 1) ? $clog2(x) : 1)
@ -138,6 +143,9 @@
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define DNUM_REQUESTS `NUM_THREADS
// Snoop request tag bits
`define DSNP_TAG_WIDTH `LOG2UP(`L2SNRQ_SIZE)
////////////////////////// Icache Configurable Knobs //////////////////////////
// DRAM request data bits
@ -177,6 +185,9 @@
// DRAM request tag bits
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
// Snoop request tag bits
`define L2SNP_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `LOG2UP(`L3SNRQ_SIZE) : 1)
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define L2NUM_REQUESTS (2*`NUM_CORES)
@ -191,6 +202,9 @@
// DRAM request tag bits
`define L3DRAM_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
// Snoop request tag bits
`define L3SNP_TAG_WIDTH 1
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define L3NUM_REQUESTS `NUM_CLUSTERS

View file

@ -12,6 +12,7 @@ module VX_dmem_ctrl (
VX_cache_dram_req_if dcache_dram_req_if,
VX_cache_dram_rsp_if dcache_dram_rsp_if,
VX_cache_snp_req_if dcache_snp_req_if,
VX_cache_snp_rsp_if dcache_snp_rsp_if,
// Core <-> Icache
VX_cache_core_req_if icache_core_req_if,
@ -63,7 +64,7 @@ module VX_dmem_ctrl (
.DWBQ_SIZE (`SDWBQ_SIZE),
.DFQQ_SIZE (`SDFQQ_SIZE),
.LLVQ_SIZE (`SLLVQ_SIZE),
.FFSQ_SIZE (`SFFSQ_SIZE),
.SRPQ_SIZE (`SSRPQ_SIZE),
.PRFQ_SIZE (`SPRFQ_SIZE),
.PRFQ_STRIDE (`SPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
@ -110,12 +111,24 @@ module VX_dmem_ctrl (
// Snoop request
.snp_req_valid (0),
.snp_req_addr (0),
.snp_req_tag (0),
.snp_req_ready (),
// Snoop forwarding
.snp_fwd_valid (),
.snp_fwd_addr (),
.snp_fwd_ready (0)
// Snoop response
.snp_rsp_valid (),
.snp_rsp_tag (),
.snp_rsp_ready (0),
// Snoop forward out
.snp_fwdout_valid (),
.snp_fwdout_addr (),
.snp_fwdout_tag (),
.snp_fwdout_ready (0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_tag (0),
.snp_fwdin_ready ()
`IGNORE_WARNINGS_END
);
@ -134,7 +147,7 @@ module VX_dmem_ctrl (
.DWBQ_SIZE (`DDWBQ_SIZE),
.DFQQ_SIZE (`DDFQQ_SIZE),
.LLVQ_SIZE (`DLLVQ_SIZE),
.FFSQ_SIZE (`DFFSQ_SIZE),
.SRPQ_SIZE (`DSRPQ_SIZE),
.PRFQ_SIZE (`DPRFQ_SIZE),
.PRFQ_STRIDE (`DPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
@ -143,7 +156,8 @@ module VX_dmem_ctrl (
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
.CORE_TAG_ID_BITS (`CORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH)
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH)
) gpu_dcache (
.clk (clk),
.reset (reset),
@ -177,16 +191,28 @@ module VX_dmem_ctrl (
.dram_rsp_tag (dcache_dram_rsp_if.dram_rsp_tag),
.dram_rsp_ready (dcache_dram_rsp_if.dram_rsp_ready),
// Snoop Request
// Snoop request
.snp_req_valid (dcache_snp_req_if.snp_req_valid),
.snp_req_addr (dcache_snp_req_if.snp_req_addr),
.snp_req_tag (dcache_snp_req_if.snp_req_tag),
.snp_req_ready (dcache_snp_req_if.snp_req_ready),
// Snoop response
.snp_rsp_valid (dcache_snp_rsp_if.snp_rsp_valid),
.snp_rsp_tag (dcache_snp_rsp_if.snp_rsp_tag),
.snp_rsp_ready (dcache_snp_rsp_if.snp_rsp_ready),
`IGNORE_WARNINGS_BEGIN
// Snoop Forward
.snp_fwd_valid (),
.snp_fwd_addr (),
.snp_fwd_ready (0)
// Snoop forward out
.snp_fwdout_valid (),
.snp_fwdout_addr (),
.snp_fwdout_tag (),
.snp_fwdout_ready (0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_tag (0),
.snp_fwdin_ready ()
`IGNORE_WARNINGS_END
);
@ -205,7 +231,7 @@ module VX_dmem_ctrl (
.DWBQ_SIZE (`IDWBQ_SIZE),
.DFQQ_SIZE (`IDFQQ_SIZE),
.LLVQ_SIZE (`ILLVQ_SIZE),
.FFSQ_SIZE (`IFFSQ_SIZE),
.SRPQ_SIZE (`ISRPQ_SIZE),
.PRFQ_SIZE (`IPRFQ_SIZE),
.PRFQ_STRIDE (`IPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
@ -249,15 +275,27 @@ module VX_dmem_ctrl (
.dram_rsp_ready (icache_dram_rsp_if.dram_rsp_ready),
`IGNORE_WARNINGS_BEGIN
// Snoop Request
// Snoop request
.snp_req_valid (0),
.snp_req_addr (0),
.snp_req_tag (0),
.snp_req_ready (),
// Snoop Forward
.snp_fwd_valid (),
.snp_fwd_addr (),
.snp_fwd_ready (0)
// Snoop response
.snp_rsp_valid (),
.snp_rsp_tag (),
.snp_rsp_ready (0),
// Snoop forward out
.snp_fwdout_valid (),
.snp_fwdout_addr (),
.snp_fwdout_tag (),
.snp_fwdout_ready (0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_tag (0),
.snp_fwdin_ready ()
`IGNORE_WARNINGS_END
);

View file

@ -15,7 +15,7 @@ module VX_dram_arb #(
input wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`BANK_LINE_WIDTH-1:0] core_req_data,
input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output reg [NUM_REQUESTS-1:0] core_req_ready,
output wire [NUM_REQUESTS-1:0] core_req_ready,
// Core response
output wire [NUM_REQUESTS-1:0] core_rsp_valid,
@ -24,11 +24,11 @@ module VX_dram_arb #(
input wire [NUM_REQUESTS-1:0] core_rsp_ready,
// DRAM request
output reg dram_req_read,
output reg dram_req_write,
output reg [`DRAM_ADDR_WIDTH-1:0] dram_req_addr,
output reg [`BANK_LINE_WIDTH-1:0] dram_req_data,
output reg [DRAM_TAG_WIDTH-1:0] dram_req_tag,
output wire dram_req_read,
output wire dram_req_write,
output wire [`DRAM_ADDR_WIDTH-1:0] dram_req_addr,
output wire [`BANK_LINE_WIDTH-1:0] dram_req_data,
output wire [DRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready,
// DRAM response
@ -37,47 +37,34 @@ module VX_dram_arb #(
input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready
);
reg [`REQS_BITS-1:0] bus_req_idx;
reg [`REQS_BITS-1:0] bus_req_sel;
always @(posedge clk) begin
if (reset) begin
bus_req_idx <= 0;
bus_req_sel <= 0;
end else begin
bus_req_idx <= bus_req_idx + 1;
bus_req_sel <= bus_req_sel + 1;
end
end
integer i;
generate
always @(*) begin
dram_req_read = 0;
dram_req_write = 0;
dram_req_addr = 'z;
dram_req_data = 'z;
dram_req_tag = 'z;
assign dram_req_read = core_req_read [bus_req_sel];
assign dram_req_write = core_req_write [bus_req_sel];
assign dram_req_addr = core_req_addr [bus_req_sel];
assign dram_req_data = core_req_data [bus_req_sel];
assign dram_req_tag = {core_req_tag [bus_req_sel], (`REQS_BITS)'(bus_req_sel)};
for (i = 0; i < NUM_REQUESTS; i++) begin
if (bus_req_idx == (`REQS_BITS)'(i)) begin
dram_req_read = core_req_read[i];
dram_req_write = core_req_write[i];
dram_req_addr = core_req_addr[i];
dram_req_data = core_req_data[i];
dram_req_tag = {core_req_tag[i], (`REQS_BITS)'(i)};
core_req_ready[i] = dram_req_ready;
end else begin
core_req_ready[i] = 0;
end
end
end
endgenerate
genvar j;
wire [`REQS_BITS-1:0] bus_rsp_idx = dram_rsp_tag[`REQS_BITS-1:0];
for (j = 0; j < NUM_REQUESTS; j++) begin
assign core_rsp_valid[j] = dram_rsp_valid && (bus_rsp_idx == (`REQS_BITS)'(j));
assign core_rsp_data[j] = dram_rsp_data;
assign core_rsp_tag[j] = dram_rsp_tag[`REQS_BITS +: CORE_TAG_WIDTH];
for (i = 0; i < NUM_REQUESTS; i++) begin
assign core_req_ready[i] = dram_req_ready && (bus_req_sel == `REQS_BITS'(i));
end
assign dram_rsp_ready = core_rsp_ready[bus_rsp_idx];
wire [`REQS_BITS-1:0] bus_rsp_sel = dram_rsp_tag[`REQS_BITS-1:0];
genvar i;
for (i = 0; i < NUM_REQUESTS; i++) begin
assign core_rsp_valid[i] = dram_rsp_valid && (bus_rsp_sel == `REQS_BITS'(i));
assign core_rsp_data[i] = dram_rsp_data;
assign core_rsp_tag[i] = dram_rsp_tag[`REQS_BITS +: CORE_TAG_WIDTH];
end
assign dram_rsp_ready = core_rsp_ready[bus_rsp_sel];
endmodule

View file

@ -47,7 +47,7 @@ module VX_exec_unit (
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : alu_defs
for (i = 0; i < `NUM_THREADS; i++) begin : alu_defs
VX_alu_unit alu_unit (
.clk (clk),
.reset (reset),
@ -102,7 +102,7 @@ module VX_exec_unit (
wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data;
generate
for (i = 0; i < `NUM_THREADS; i=i+1) begin
for (i = 0; i < `NUM_THREADS; i++) begin
assign duplicate_PC_data[i] = exec_unit_req_if.PC_next;
end
endgenerate

View file

@ -39,7 +39,7 @@ module VX_gpr (
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
genvar i;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
for (i = 0; i < `NUM_THREADS; i++) begin
wire local_write = write_enable & writeback_if.wb_valid[i];
assign write_bit_mask[i] = {`NUM_GPRS{~local_write}};
end
@ -57,8 +57,8 @@ module VX_gpr (
`ifndef SYN
genvar j;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
for (j = 0; j < `NUM_GPRS; j = j + 1) begin
for (i = 0; i < `NUM_THREADS; i++) begin
for (j = 0; j < `NUM_GPRS; j++) begin
assign a_reg_data_uqual[i][j] = ((temp_a[i][j] === 1'dx) || cena_1 )? 1'b0 : temp_a[i][j];
assign b_reg_data_uqual[i][j] = ((temp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : temp_b[i][j];
end

View file

@ -21,7 +21,7 @@ module VX_gpr_ram (
end else begin
if (we) begin
integer i;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
for (i = 0; i < `NUM_THREADS; i++) begin
if (be[i]) begin
ram[waddr][i][0] <= wdata[i][7:0];
ram[waddr][i][1] <= wdata[i][15:8];

View file

@ -17,7 +17,7 @@ module VX_gpr_wrapper (
wire[`NUM_THREADS-1:0][31:0] jal_data;
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : jal_data_assign
for (i = 0; i < `NUM_THREADS; i++) begin : jal_data_assign
assign jal_data[i] = gpr_jal_if.curr_PC;
end
endgenerate
@ -47,7 +47,7 @@ module VX_gpr_wrapper (
`endif
generate
for (i = 0; i < `NUM_WARPS; i = i + 1) begin : warp_gprs
for (i = 0; i < `NUM_WARPS; i++) begin : warp_gprs
wire valid_write_request = i == writeback_if.warp_num;
VX_gpr gpr(
.clk (clk),

View file

@ -15,7 +15,7 @@ module VX_gpu_inst (
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i=i+1) begin : tmc_new_mask_init
for (i = 0; i < `NUM_THREADS; i++) begin : tmc_new_mask_init
assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_inst_req_if.a_reg_data[0];
end
endgenerate
@ -34,7 +34,7 @@ module VX_gpu_inst (
wire[`NUM_WARPS-1:0] wspawn_new_active;
generate
for (i = 0; i < `NUM_WARPS; i=i+1) begin : wspawn_new_active_init
for (i = 0; i < `NUM_WARPS; i++) begin : wspawn_new_active_init
assign wspawn_new_active[i] = all_active ? 1 : i < gpu_inst_req_if.a_reg_data[0];
end
endgenerate
@ -56,7 +56,7 @@ module VX_gpu_inst (
wire[`NUM_THREADS-1:0] split_new_later_mask;
generate
for (i = 0; i < `NUM_THREADS; i=i+1) begin : masks_init
for (i = 0; i < `NUM_THREADS; i++) begin : masks_init
wire curr_bool = (gpu_inst_req_if.a_reg_data[i] == 32'b1);
assign split_new_use_mask[i] = curr_valids[i] & (curr_bool);
assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool);

View file

@ -23,7 +23,7 @@ module VX_inst_multiplex (
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : mask_init
for (i = 0; i < `NUM_THREADS; i++) begin : mask_init
assign is_mem_mask[i] = is_mem;
assign is_gpu_mask[i] = is_gpu;
assign is_csr_mask[i] = is_csr;

View file

@ -8,7 +8,7 @@ module VX_lsu_addr_gen (
);
genvar i;
generate
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : addresses
for (i = 0; i < `NUM_THREADS; i++) begin : addresses
assign address[i] = base_address[i] + offset;
end
endgenerate

View file

@ -52,7 +52,7 @@ module VX_scheduler (
always @(posedge clk) begin
if (reset) begin
for (w = 0; w < `NUM_WARPS; w=w+1) begin
for (i = 0; i < 32; i = i + 1) begin
for (i = 0; i < 32; i++) begin
rename_table[w][i] <= 0;
end
end

View file

@ -260,7 +260,7 @@ module VX_warp_sched (
genvar i;
generate
for (i = 0; i < `NUM_WARPS; i = i + 1) begin : stacks
for (i = 0; i < `NUM_WARPS; i++) begin : stacks
wire correct_warp_s = (i == split_warp_num);
wire correct_warp_j = (i == join_warp_num);

View file

@ -8,7 +8,7 @@ module Vortex #(
input wire clk,
input wire reset,
// DRAM Dcache Req
// DRAM Dcache request
output wire D_dram_req_read,
output wire D_dram_req_write,
output wire [`DDRAM_ADDR_WIDTH-1:0] D_dram_req_addr,
@ -16,13 +16,13 @@ module Vortex #(
output wire [`DDRAM_TAG_WIDTH-1:0] D_dram_req_tag,
input wire D_dram_req_ready,
// DRAM Dcache Rsp
// DRAM Dcache reponse
input wire D_dram_rsp_valid,
input wire [`DDRAM_LINE_WIDTH-1:0] D_dram_rsp_data,
input wire [`DDRAM_TAG_WIDTH-1:0] D_dram_rsp_tag,
output wire D_dram_rsp_ready,
// DRAM Icache Req
// DRAM Icache request
output wire I_dram_req_read,
output wire I_dram_req_write,
output wire [`IDRAM_ADDR_WIDTH-1:0] I_dram_req_addr,
@ -30,17 +30,22 @@ module Vortex #(
output wire [`IDRAM_TAG_WIDTH-1:0] I_dram_req_tag,
input wire I_dram_req_ready,
// DRAM Icache Rsp
// DRAM Icache response
input wire I_dram_rsp_valid,
input wire [`IDRAM_LINE_WIDTH-1:0] I_dram_rsp_data,
input wire [`IDRAM_TAG_WIDTH-1:0] I_dram_rsp_tag,
output wire I_dram_rsp_ready,
// Cache Snooping
// Snoop request
input wire snp_req_valid,
input wire [`DDRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire [`DSNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
output wire snp_rsp_valid,
output wire [`DSNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// I/O request
output wire io_req_read,
output wire io_req_write,
@ -172,12 +177,24 @@ module Vortex #(
VX_warp_ctl_if warp_ctl_if();
// Cache snooping
VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if();
VX_cache_snp_req_if #(
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
.SNP_TAG_WIDTH(`DSNP_TAG_WIDTH)
) dcache_snp_req_if();
VX_cache_snp_rsp_if #(
.SNP_TAG_WIDTH(`DSNP_TAG_WIDTH)
) dcache_snp_rsp_if();
assign dcache_snp_req_if.snp_req_valid = snp_req_valid;
assign dcache_snp_req_if.snp_req_addr = snp_req_addr;
assign dcache_snp_req_if.snp_req_tag = snp_req_tag;
assign snp_req_ready = dcache_snp_req_if.snp_req_ready;
assign snp_rsp_valid = dcache_snp_rsp_if.snp_rsp_valid;
assign snp_rsp_tag = dcache_snp_rsp_if.snp_rsp_tag;
assign dcache_snp_rsp_if.snp_rsp_ready = snp_rsp_ready;
VX_front_end #(
.CORE_ID(CORE_ID)
) front_end (
@ -236,6 +253,7 @@ module Vortex #(
.dcache_dram_req_if (dcache_dram_req_if),
.dcache_dram_rsp_if (dcache_dram_rsp_if),
.dcache_snp_req_if (dcache_snp_req_if),
.dcache_snp_rsp_if (dcache_snp_rsp_if),
// Core <-> Icache
.icache_core_req_if (icache_core_req_if),

View file

@ -8,7 +8,7 @@ module Vortex_Cluster #(
input wire clk,
input wire reset,
// DRAM Req
// DRAM request
output wire dram_req_read,
output wire dram_req_write,
output wire[`L2DRAM_ADDR_WIDTH-1:0] dram_req_addr,
@ -16,16 +16,22 @@ module Vortex_Cluster #(
output wire[`L2DRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready,
// DRAM Rsp
// DRAM response
input wire dram_rsp_valid,
input wire[`L2DRAM_LINE_WIDTH-1:0] dram_rsp_data,
input wire[`L2DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready,
// Cache Snooping
input wire snp_req_valid,
input wire[`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr,
output wire snp_req_ready,
// Snoop request
input wire snp_req_valid,
input wire[`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire[`L2SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop response
output wire snp_rsp_valid,
output wire[`L2SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// I/O request
output wire io_req_read,
@ -69,9 +75,14 @@ module Vortex_Cluster #(
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
wire[`NUM_CORES-1:0] per_core_snp_fwd_valid;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_fwd_addr;
wire[`NUM_CORES-1:0] per_core_snp_fwd_ready;
wire[`NUM_CORES-1:0] per_core_snp_req_valid;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_req_addr;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_req_tag;
wire[`NUM_CORES-1:0] per_core_snp_req_ready;
wire[`NUM_CORES-1:0] per_core_snp_rsp_valid;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_rsp_tag;
wire[`NUM_CORES-1:0] per_core_snp_rsp_ready;
`IGNORE_WARNINGS_BEGIN
wire[`NUM_CORES-1:0] per_core_io_req_read;
@ -88,7 +99,7 @@ module Vortex_Cluster #(
wire[`NUM_CORES-1:0] per_core_ebreak;
genvar i;
for (i = 0; i < `NUM_CORES; i = i + 1) begin
for (i = 0; i < `NUM_CORES; i++) begin
Vortex #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) vortex_core (
@ -118,9 +129,14 @@ module Vortex_Cluster #(
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.snp_req_valid (per_core_snp_fwd_valid [i]),
.snp_req_addr (per_core_snp_fwd_addr [i]),
.snp_req_ready (per_core_snp_fwd_ready [i]),
.snp_req_valid (per_core_snp_req_valid [i]),
.snp_req_addr (per_core_snp_req_addr [i]),
.snp_req_tag (per_core_snp_req_tag [i]),
.snp_req_ready (per_core_snp_req_ready [i]),
.snp_rsp_valid (per_core_snp_rsp_valid [i]),
.snp_rsp_tag (per_core_snp_rsp_tag [i]),
.snp_rsp_ready (per_core_snp_rsp_ready [i]),
.io_req_read (per_core_io_req_read [i]),
.io_req_write (per_core_io_req_write [i]),
@ -169,9 +185,14 @@ module Vortex_Cluster #(
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_rsp_tag;
wire l2_core_rsp_ready;
wire l2_snp_fwd_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] l2_snp_fwd_addr;
wire l2_snp_fwd_ready;
wire[`NUM_CORES-1:0] l2_snp_fwdout_valid;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] l2_snp_fwdout_addr;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdout_tag;
wire[`NUM_CORES-1:0] l2_snp_fwdout_ready;
wire[`NUM_CORES-1:0] l2_snp_fwdin_valid;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdin_tag;
wire[`NUM_CORES-1:0] l2_snp_fwdin_ready;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]);
@ -204,12 +225,17 @@ module Vortex_Cluster #(
assign per_core_D_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i];
assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1];
assign per_core_snp_fwd_valid [(i/2)] = l2_snp_fwd_valid && l2_snp_fwd_ready;
assign per_core_snp_fwd_addr [(i/2)] = l2_snp_fwd_addr;
assign per_core_snp_req_valid [(i/2)] = l2_snp_fwdout_valid [(i/2)];
assign per_core_snp_req_addr [(i/2)] = l2_snp_fwdout_addr [(i/2)];
assign per_core_snp_req_tag [(i/2)] = l2_snp_fwdout_tag [(i/2)];
assign l2_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
assign l2_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)];
assign l2_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)];
assign per_core_snp_rsp_ready [(i/2)] = l2_snp_fwdin_ready [(i/2)];
end
assign l2_core_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready);
assign l2_snp_fwd_ready = (& per_core_snp_fwd_ready);
VX_cache #(
.CACHE_SIZE (`L2CACHE_SIZE),
@ -226,7 +252,7 @@ module Vortex_Cluster #(
.DWBQ_SIZE (`L2DWBQ_SIZE),
.DFQQ_SIZE (`L2DFQQ_SIZE),
.LLVQ_SIZE (`L2LLVQ_SIZE),
.FFSQ_SIZE (`L2FFSQ_SIZE),
.SRPQ_SIZE (`L2SRPQ_SIZE),
.PRFQ_SIZE (`L2PRFQ_SIZE),
.PRFQ_STRIDE (`L2PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
@ -235,7 +261,10 @@ module Vortex_Cluster #(
.SNOOP_FORWARDING (1),
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH)
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.NUM_SNP_REQUESTS (`NUM_CORES),
.SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH)
) gpu_l2cache (
.clk (clk),
.reset (reset),
@ -267,17 +296,29 @@ module Vortex_Cluster #(
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_ready (dram_rsp_ready),
.dram_rsp_ready (dram_rsp_ready),
// Snoop request
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
// Snoop forwarding
.snp_fwd_valid (l2_snp_fwd_valid),
.snp_fwd_addr (l2_snp_fwd_addr),
.snp_fwd_ready (l2_snp_fwd_ready)
// Snoop response
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready),
// Snoop forwarding out
.snp_fwdout_valid (l2_snp_fwdout_valid),
.snp_fwdout_addr (l2_snp_fwdout_addr),
.snp_fwdout_tag (l2_snp_fwdout_tag),
.snp_fwdout_ready (l2_snp_fwdout_ready),
// Snoop forwarding in
.snp_fwdin_valid (l2_snp_fwdin_valid),
.snp_fwdin_tag (l2_snp_fwdin_tag),
.snp_fwdin_ready (l2_snp_fwdin_ready)
);
end else begin
@ -294,9 +335,14 @@ module Vortex_Cluster #(
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_core_rsp_tag;
wire[`L2NUM_REQUESTS-1:0] arb_core_rsp_ready;
wire arb_snp_fwd_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] arb_snp_fwd_addr;
wire arb_snp_fwd_ready;
wire[`NUM_CORES-1:0] arb_snp_fwdout_valid;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_snp_fwdout_addr;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdout_tag;
wire[`NUM_CORES-1:0] arb_snp_fwdout_ready;
wire[`NUM_CORES-1:0] arb_snp_fwdin_valid;
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdin_tag;
wire[`NUM_CORES-1:0] arb_snp_fwdin_ready;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign arb_core_req_read [i] = per_core_D_dram_req_read[(i/2)];
@ -329,15 +375,47 @@ module Vortex_Cluster #(
assign arb_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)];
assign arb_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)];
assign per_core_snp_fwd_valid [(i/2)] = arb_snp_fwd_valid && arb_snp_fwd_ready;
assign per_core_snp_fwd_addr [(i/2)] = arb_snp_fwd_addr;
end
assign arb_snp_fwd_valid = snp_req_valid;
assign arb_snp_fwd_addr = snp_req_addr;
assign arb_snp_fwd_ready = (& per_core_snp_fwd_ready);
assign per_core_snp_req_valid [(i/2)] = arb_snp_fwdout_valid [(i/2)];
assign per_core_snp_req_addr [(i/2)] = arb_snp_fwdout_addr [(i/2)];
assign per_core_snp_req_tag [(i/2)] = arb_snp_fwdout_tag [(i/2)];
assign arb_snp_fwdout_ready [(i/2)] = per_core_snp_req_ready[(i/2)];
assign snp_req_ready = arb_snp_fwd_ready;
assign arb_snp_fwdin_valid [(i/2)] = per_core_snp_rsp_valid [(i/2)];
assign arb_snp_fwdin_tag [(i/2)] = per_core_snp_rsp_tag [(i/2)];
assign per_core_snp_rsp_ready [(i/2)] = arb_snp_fwdin_ready [(i/2)];
end
VX_snp_forwarder #(
.BANK_LINE_SIZE(`L2BANK_LINE_SIZE),
.NUM_REQUESTS(`NUM_CORES),
.SNRQ_SIZE(`L2SNRQ_SIZE),
.SNP_REQ_TAG_WIDTH(`L2SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH(`DSNP_TAG_WIDTH)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
`IGNORE_WARNINGS_BEGIN
.snp_rsp_addr (),
`IGNORE_WARNINGS_END
.snp_rsp_ready (snp_rsp_ready),
.snp_fwdout_valid (arb_snp_fwdout_valid),
.snp_fwdout_addr (arb_snp_fwdout_addr),
.snp_fwdout_tag (arb_snp_fwdout_tag),
.snp_fwdout_ready (arb_snp_fwdout_ready),
.snp_fwdin_valid (arb_snp_fwdin_valid),
.snp_fwdin_tag (arb_snp_fwdin_tag),
.snp_fwdin_ready (arb_snp_fwdin_ready)
);
VX_dram_arb #(
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),

View file

@ -15,16 +15,22 @@ module Vortex_Socket (
input wire dram_req_ready,
// DRAM response
input wire dram_rsp_valid,
input wire[`L3DRAM_LINE_WIDTH-1:0] dram_rsp_data,
input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
input wire dram_rsp_valid,
input wire[`L3DRAM_LINE_WIDTH-1:0] dram_rsp_data,
input wire[`L3DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready,
// Cache snooping
input wire snp_req_valid,
input wire[`L3DRAM_ADDR_WIDTH-1:0] snp_req_addr,
// Snoop request
input wire snp_req_valid,
input wire[`L3DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire[`L3SNP_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop response
output wire snp_rsp_valid,
output wire[`L3SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// I/O request
output wire io_req_read,
output wire io_req_write,
@ -66,8 +72,13 @@ module Vortex_Socket (
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready),
.io_req_read (io_req_read),
.io_req_write (io_req_write),
.io_req_addr (io_req_addr),
@ -99,9 +110,14 @@ module Vortex_Socket (
wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_valid;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_ADDR_WIDTH-1:0] per_cluster_snp_fwd_addr;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr;
wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid;
wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready;
`IGNORE_WARNINGS_BEGIN
wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_read;
@ -118,7 +134,7 @@ module Vortex_Socket (
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
genvar i;
for (i = 0; i < `NUM_CLUSTERS; i=i+1) begin
for (i = 0; i < `NUM_CLUSTERS; i++) begin
Vortex_Cluster #(
.CLUSTER_ID(i)
) Vortex_Cluster (
@ -137,9 +153,14 @@ module Vortex_Socket (
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
.snp_req_valid (per_cluster_snp_fwd_valid [i]),
.snp_req_addr (per_cluster_snp_fwd_addr [i]),
.snp_req_ready (per_cluster_snp_fwd_ready [i]),
.snp_req_valid (per_cluster_snp_req_valid [i]),
.snp_req_addr (per_cluster_snp_req_addr [i]),
.snp_req_tag (per_cluster_snp_req_tag [i]),
.snp_req_ready (per_cluster_snp_req_ready [i]),
.snp_rsp_valid (per_cluster_snp_rsp_valid [i]),
.snp_rsp_tag (per_cluster_snp_rsp_tag [i]),
.snp_rsp_ready (per_cluster_snp_rsp_ready [i]),
.io_req_read (per_cluster_io_req_read [i]),
.io_req_write (per_cluster_io_req_write [i]),
@ -185,11 +206,16 @@ module Vortex_Socket (
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
wire l3_core_rsp_ready;
wire l3_snp_fwd_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] l3_snp_fwd_addr;
wire l3_snp_fwd_ready;
wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid;
wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr;
wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag;
wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready;
for (i = 0; i < `L3NUM_REQUESTS; i=i+1) begin
wire[`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid;
wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag;
wire[`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready;
for (i = 0; i < `L3NUM_REQUESTS; i++) begin
// Core Request
assign l3_core_req_valid [i] = (per_cluster_dram_req_read [i] | per_cluster_dram_req_write [i]);
assign l3_core_req_read [i] = per_cluster_dram_req_read [i] ? `BYTE_EN_LW : `BYTE_EN_NO;
@ -203,13 +229,19 @@ module Vortex_Socket (
assign per_cluster_dram_rsp_data [i] = l3_core_rsp_data [i];
assign per_cluster_dram_rsp_tag [i] = l3_core_rsp_tag [i];
// Snoop Forwarding
assign per_cluster_snp_fwd_valid [i] = l3_snp_fwd_valid && l3_snp_fwd_ready;
assign per_cluster_snp_fwd_addr [i] = l3_snp_fwd_addr;
// Snoop Forwarding out
assign per_cluster_snp_req_valid [i] = l3_snp_fwdout_valid[i];
assign per_cluster_snp_req_addr [i] = l3_snp_fwdout_addr[i];
assign per_cluster_snp_req_tag [i] = l3_snp_fwdout_tag[i];
assign l3_snp_fwdout_ready [i] = per_cluster_snp_req_ready[i];
// Snoop Forwarding in
assign l3_snp_fwdin_valid [i] = per_cluster_snp_rsp_valid [i];
assign l3_snp_fwdin_tag [i] = per_cluster_snp_rsp_tag [i];
assign per_cluster_snp_rsp_ready [i] = l3_snp_fwdin_ready [i];
end
assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready);
assign l3_snp_fwd_ready = (& per_cluster_snp_fwd_ready);
VX_cache #(
.CACHE_SIZE (`L3CACHE_SIZE),
@ -226,7 +258,7 @@ module Vortex_Socket (
.DWBQ_SIZE (`L3DWBQ_SIZE),
.DFQQ_SIZE (`L3DFQQ_SIZE),
.LLVQ_SIZE (`L3LLVQ_SIZE),
.FFSQ_SIZE (`L3FFSQ_SIZE),
.SRPQ_SIZE (`L3SRPQ_SIZE),
.PRFQ_SIZE (`L3PRFQ_SIZE),
.PRFQ_STRIDE (`L3PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
@ -235,7 +267,10 @@ module Vortex_Socket (
.SNOOP_FORWARDING (1),
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH)
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH),
.NUM_SNP_REQUESTS (`NUM_CLUSTERS),
.SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH)
) gpu_l3cache (
.clk (clk),
.reset (reset),
@ -272,12 +307,24 @@ module Vortex_Socket (
// Snoop request
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
// Snoop forwarding
.snp_fwd_valid (l3_snp_fwd_valid),
.snp_fwd_addr (l3_snp_fwd_addr),
.snp_fwd_ready (l3_snp_fwd_ready)
// Snoop response
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready),
// Snoop forwarding out
.snp_fwdout_valid (l3_snp_fwdout_valid),
.snp_fwdout_addr (l3_snp_fwdout_addr),
.snp_fwdout_tag (l3_snp_fwdout_tag),
.snp_fwdout_ready (l3_snp_fwdout_ready),
// Snoop forwarding in
.snp_fwdin_valid (l3_snp_fwdin_valid),
.snp_fwdin_tag (l3_snp_fwdin_tag),
.snp_fwdin_ready (l3_snp_fwdin_ready)
);
end

203
hw/rtl/cache/VX_bank.v vendored
View file

@ -21,7 +21,7 @@ module VX_bank #(
parameter MRVQ_SIZE = 0,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 0,
// Snoop Req Queue
// Snoop Req Queue Size
parameter SNRQ_SIZE = 0,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
@ -33,8 +33,8 @@ module VX_bank #(
parameter DFQQ_SIZE = 0,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 0,
// Fill Forward SNP Queue
parameter FFSQ_SIZE = 0,
// Snoop Rsp Queue Size
parameter SRPQ_SIZE = 0,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 0,
@ -52,33 +52,34 @@ module VX_bank #(
parameter CORE_TAG_WIDTH = 0,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
parameter CORE_TAG_ID_BITS = 0,
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 0
) (
input wire clk,
input wire reset,
// Core Request
input wire core_req_ready,
input wire [NUM_REQUESTS-1:0] core_req_valids,
input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_read,
input wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] core_req_write,
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
input wire [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_full,
output wire core_req_ready,
// Core Response
output wire core_rsp_valid,
output wire [`REQS_BITS-1:0] core_rsp_tid,
output wire [`WORD_WIDTH-1:0] core_rsp_data,
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_pop,
input wire core_rsp_ready,
// Dram Fill Requests
output wire dram_fill_req_valid,
output wire[`LINE_ADDR_WIDTH-1:0] dram_fill_req_addr,
output wire dram_fill_req_is_snp,
input wire dram_fill_req_full,
input wire dram_fill_req_ready,
// Dram Fill Response
input wire dram_fill_rsp_valid,
@ -90,57 +91,47 @@ module VX_bank #(
output wire dram_wb_req_valid,
output wire [`LINE_ADDR_WIDTH-1:0] dram_wb_req_addr,
output wire [`BANK_LINE_WIDTH-1:0] dram_wb_req_data,
input wire dram_wb_req_pop,
input wire dram_wb_req_ready,
// Snp Request
input wire snp_req_valid,
input wire [`LINE_ADDR_WIDTH-1:0] snp_req_addr,
output wire snp_req_full,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
output wire snp_fwd_valid,
output wire [`LINE_ADDR_WIDTH-1:0] snp_fwd_addr,
input wire snp_fwd_pop
output wire snp_rsp_valid,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready
);
reg snoop_state = 0;
always @(posedge clk) begin
if (reset) begin
snoop_state <= 0;
end else begin
snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING;
end
end
wire snrq_pop;
wire snrq_empty;
wire snrq_valid_st0;
wire[`LINE_ADDR_WIDTH-1:0] snrq_addr_st0;
assign snrq_valid_st0 = !snrq_empty;
wire snrq_pop;
wire snrq_empty;
wire snrq_full;
wire [`LINE_ADDR_WIDTH-1:0] snrq_addr_st0;
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st0;
VX_generic_queue #(
.DATAW(`LINE_ADDR_WIDTH),
.DATAW(`LINE_ADDR_WIDTH + SNP_REQ_TAG_WIDTH),
.SIZE(SNRQ_SIZE)
) snr_queue (
) snp_req_queue (
.clk (clk),
.reset (reset),
.push (snp_req_valid),
.data_in (snp_req_addr),
.data_in ({snp_req_addr, snp_req_tag}),
.pop (snrq_pop),
.data_out(snrq_addr_st0),
.data_out({snrq_addr_st0, snrq_tag_st0}),
.empty (snrq_empty),
.full (snp_req_full)
.full (snrq_full)
);
assign snp_req_ready = ~snrq_full;
wire dfpq_pop;
wire dfpq_empty;
wire dfpq_full;
wire [`LINE_ADDR_WIDTH-1:0] dfpq_addr_st0;
wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0;
assign dram_fill_rsp_ready = !dfpq_full;
wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0;
VX_generic_queue #(
.DATAW(`LINE_ADDR_WIDTH + $bits(dram_fill_rsp_data)),
@ -156,9 +147,12 @@ module VX_bank #(
.full (dfpq_full)
);
assign dram_fill_rsp_ready = !dfpq_full;
wire reqq_pop;
wire reqq_push;
wire reqq_empty;
wire reqq_full;
wire reqq_req_st0;
wire[`REQS_BITS-1:0] reqq_req_tid_st0;
`IGNORE_WARNINGS_BEGIN
@ -169,14 +163,12 @@ module VX_bank #(
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0;
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0;
assign reqq_push = core_req_ready && (| core_req_valids);
VX_cache_req_queue #(
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.REQQ_SIZE (REQQ_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.REQQ_SIZE (REQQ_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS(CORE_TAG_ID_BITS)
) req_queue (
.clk (clk),
.reset (reset),
@ -199,8 +191,11 @@ module VX_bank #(
.reqq_req_mem_read_st0 (reqq_req_mem_read_st0),
.reqq_req_mem_write_st0(reqq_req_mem_write_st0),
.reqq_empty (reqq_empty),
.reqq_full (core_req_full)
);
.reqq_full (reqq_full)
);
assign core_req_ready = ~reqq_full;
assign reqq_push = (| core_req_valids) && core_req_ready;
wire mrvq_pop;
wire mrvq_full;
@ -237,7 +232,7 @@ module VX_bank #(
integer j;
always @(*) begin
is_fill_in_pipe = 0;
for (j = 0; j < STAGE_1_CYCLES; j=j+1) begin
for (j = 0; j < STAGE_1_CYCLES; j++) begin
if (is_fill_st1[j]) begin
is_fill_in_pipe = 1;
end
@ -251,7 +246,7 @@ module VX_bank #(
assign mrvq_pop = mrvq_valid_st0 && !stall_bank_pipe;
assign dfpq_pop = !mrvq_pop && !dfpq_empty && !stall_bank_pipe;
assign reqq_pop = !mrvq_stop && !mrvq_pop && !dfpq_pop && !reqq_empty && reqq_req_st0 && !stall_bank_pipe && !is_fill_st1[0] && !is_fill_in_pipe;
assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && snrq_valid_st0 && !stall_bank_pipe;
assign snrq_pop = !reqq_pop && !reqq_pop && !mrvq_pop && !dfpq_pop && !snrq_empty && !stall_bank_pipe;
wire qual_is_fill_st0;
wire qual_valid_st0;
@ -262,7 +257,7 @@ module VX_bank #(
wire [`BANK_LINE_WIDTH-1:0] qual_writedata_st0;
wire [`REQ_INST_META_WIDTH-1:0] qual_inst_meta_st0;
wire qual_going_to_write_st0;
wire qual_is_snp;
wire qual_is_snp_st0;
wire valid_st1 [STAGE_1_CYCLES-1:0];
wire [`LINE_ADDR_WIDTH-1:0] addr_st1 [STAGE_1_CYCLES-1:0];
@ -270,6 +265,7 @@ module VX_bank #(
wire [`WORD_WIDTH-1:0] writeword_st1 [STAGE_1_CYCLES-1:0];
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0];
wire [`BANK_LINE_WIDTH-1:0] writedata_st1 [STAGE_1_CYCLES-1:0];
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st1 [STAGE_1_CYCLES-1:0];
wire is_snp_st1 [STAGE_1_CYCLES-1:0];
assign qual_is_fill_st0 = dfpq_pop;
@ -298,34 +294,34 @@ module VX_bank #(
(snrq_pop) ? 1 :
0;
assign qual_is_snp = snrq_pop ? 1 : 0;
assign qual_is_snp_st0 = snrq_pop ? 1 : 0;
assign qual_writeword_st0 = mrvq_pop ? mrvq_writeword_st0 :
reqq_pop ? reqq_req_writeword_st0 :
0;
VX_generic_register #(
.N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
.N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH + SNP_REQ_TAG_WIDTH)
) s0_1_c0 (
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (0),
.in ({qual_is_snp, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
.out ({is_snp_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
.in ({qual_is_snp_st0, snrq_tag_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
.out ({is_snp_st1[0], snrq_tag_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
);
genvar i;
for (i = 1; i < STAGE_1_CYCLES; i = i + 1) begin
for (i = 1; i < STAGE_1_CYCLES; i++) begin
VX_generic_register #(
.N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
.N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH + SNP_REQ_TAG_WIDTH)
) s0_1_cc (
.clk (clk),
.reset(reset),
.stall(stall_bank_pipe),
.flush(0),
.in ({is_snp_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}),
.out ({is_snp_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]})
.in ({is_snp_st1[i-1], snrq_tag_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}),
.out ({is_snp_st1[i], snrq_tag_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]})
);
end
@ -355,10 +351,10 @@ module VX_bank #(
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE)
) tag_data_access (
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.stall_bank_pipe(stall_bank_pipe),
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.stall_bank_pipe (stall_bank_pipe),
// Initial Read
.readaddr_st10 (addr_st1[0][`LINE_SELECT_BITS-1:0]),
@ -397,17 +393,18 @@ module VX_bank #(
wire [`REQ_INST_META_WIDTH-1:0] inst_meta_st2;
wire [`TAG_SELECT_BITS-1:0] readtag_st2;
wire fill_saw_dirty_st2;
wire [SNP_REQ_TAG_WIDTH-1:0] snrq_tag_st2;
wire is_snp_st2;
VX_generic_register #(
.N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `REQ_INST_META_WIDTH)
.N(1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `WORD_WIDTH + `BANK_LINE_WIDTH + `TAG_SELECT_BITS + 1 + 1 + `REQ_INST_META_WIDTH + SNP_REQ_TAG_WIDTH)
) st_1e_2 (
.clk (clk),
.reset(reset),
.stall(stall_bank_pipe),
.flush(0),
.in ({is_snp_st1e, fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
.out ({is_snp_st2 , fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 })
.in ({is_snp_st1e, snrq_tag_st1[STAGE_1_CYCLES-1], fill_saw_dirty_st1e, is_fill_st1[STAGE_1_CYCLES-1] , qual_valid_st1e_2, addr_st1[STAGE_1_CYCLES-1], wsel_st1[STAGE_1_CYCLES-1], writeword_st1[STAGE_1_CYCLES-1], readword_st1e, readdata_st1e, readtag_st1e, miss_st1e, dirty_st1e, inst_meta_st1[STAGE_1_CYCLES-1]}),
.out ({is_snp_st2 , snrq_tag_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2 , wsel_st2, writeword_st2 , readword_st2 , readdata_st2 , readtag_st2 , miss_st2 , dirty_st2 , inst_meta_st2 })
);
wire should_flush;
@ -415,7 +412,7 @@ module VX_bank #(
wire cwbq_full;
wire dwbq_full;
wire ffsq_full;
wire srpq_full;
wire invalidate_fill;
// Enqueue to miss reserv if it's a valid miss
@ -424,11 +421,11 @@ module VX_bank #(
&& miss_st2
&& !mrvq_full
&& !(should_flush && dwbq_push)
&& !((is_snp_st2 && valid_st2 && ffsq_full)
&& !((is_snp_st2 && valid_st2 && srpq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
|| (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready));
assign miss_add_addr = addr_st2;
assign miss_add_wsel = wsel_st2;
@ -474,21 +471,23 @@ module VX_bank #(
);
// Enqueue to CWB Queue
// TODO: should investigae the need for "SNOOP_FORWARDING" here
wire cwbq_push = (valid_st2 && !miss_st2)
&& !cwbq_full
&& (miss_add_mem_write == `BYTE_EN_NO)
&& !((is_snp_st2 && valid_st2 && ffsq_full)
&& !((is_snp_st2 && valid_st2 && srpq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
|| (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready));
wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2;
wire [`REQS_BITS-1:0] cwbq_tid = miss_add_tid;
wire [CORE_TAG_WIDTH-1:0] cwbq_tag = miss_add_tag;
wire cwbq_empty;
wire cwbq_pop;
assign core_rsp_valid = !cwbq_empty;
assign cwbq_pop = core_rsp_valid && core_rsp_ready;
VX_generic_queue #(
.DATAW(`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH),
@ -500,29 +499,28 @@ module VX_bank #(
.push (cwbq_push),
.data_in ({cwbq_tid, cwbq_tag, cwbq_data}),
.pop (core_rsp_pop),
.pop (cwbq_pop),
.data_out({core_rsp_tid, core_rsp_tag, core_rsp_data}),
.empty (cwbq_empty),
.full (cwbq_full)
);
assign should_flush = snoop_state
&& valid_st2
assign should_flush = valid_st2
&& (miss_add_mem_write != `BYTE_EN_NO)
&& !is_snp_st2 && !is_fill_st2;
&& !is_snp_st2
&& !is_fill_st2;
// Enqueue to DWB Queue
assign dwbq_push = ((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2 || should_flush)
&& !dwbq_full
&& !((is_snp_st2 && valid_st2 && ffsq_full)
&& !((is_snp_st2 && valid_st2 && srpq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
|| (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready));
wire[`LINE_ADDR_WIDTH-1:0] dwbq_req_addr;
wire dwbq_empty;
wire[`BANK_LINE_WIDTH-1:0] dwbq_req_data;
wire dwbq_empty;
if (SNOOP_FORWARDING) begin
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
@ -532,7 +530,7 @@ module VX_bank #(
assign dwbq_req_addr = {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
end
wire possible_fill = valid_st2 && miss_st2 && !dram_fill_req_full && !is_snp_st2;
wire possible_fill = valid_st2 && miss_st2 && dram_fill_req_ready && ~is_snp_st2;
wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2;
VX_fill_invalidator #(
@ -549,9 +547,8 @@ module VX_bank #(
);
// Enqueue in dram_fill_req
assign dram_fill_req_valid = possible_fill && !invalidate_fill;
assign dram_fill_req_is_snp = is_snp_st2 && valid_st2 && miss_st2;
assign dram_fill_req_addr = addr_st2;
assign dram_fill_req_valid = possible_fill && !invalidate_fill;
assign dram_fill_req_addr = addr_st2;
assign dram_wb_req_valid = !dwbq_empty;
@ -565,43 +562,43 @@ module VX_bank #(
.push (dwbq_push),
.data_in ({dwbq_req_addr, dwbq_req_data}),
.pop (dram_wb_req_pop),
.pop (dram_wb_req_ready),
.data_out({dram_wb_req_addr, dram_wb_req_data}),
.empty (dwbq_empty),
.full (dwbq_full)
);
wire snp_fwd_push;
wire ffsq_empty;
wire snp_rsp_push;
wire srpq_empty;
assign snp_fwd_push = is_snp_st2
assign snp_rsp_push = is_snp_st2
&& valid_st2
&& !ffsq_full
&& !srpq_full
&& !(((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
|| (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready));
assign snp_fwd_valid = !ffsq_empty;
assign snp_rsp_valid = !srpq_empty;
VX_generic_queue #(
.DATAW(`LINE_ADDR_WIDTH),
.SIZE(FFSQ_SIZE)
) ffs_queue (
.DATAW(SNP_REQ_TAG_WIDTH),
.SIZE(SRPQ_SIZE)
) snp_rsp_queue (
.clk (clk),
.reset (reset),
.push (snp_fwd_push),
.data_in (addr_st2),
.pop (snp_fwd_pop),
.data_out(snp_fwd_addr),
.empty (ffsq_empty),
.full (ffsq_full)
.push (snp_rsp_push),
.data_in (snrq_tag_st2),
.pop (snp_rsp_ready),
.data_out(snp_rsp_tag),
.empty (srpq_empty),
.full (srpq_full)
);
assign stall_bank_pipe = (is_snp_st2 && valid_st2 && ffsq_full)
assign stall_bank_pipe = (is_snp_st2 && valid_st2 && srpq_full)
|| ((valid_st2 && !miss_st2) && cwbq_full)
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full);
|| (valid_st2 && miss_st2 && !invalidate_fill && ~dram_fill_req_ready);
endmodule : VX_bank

View file

@ -22,7 +22,7 @@ module VX_cache #(
parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
// Snoop Req Queue
// Snoop Req Queue Size
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
@ -34,8 +34,8 @@ module VX_cache #(
parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
// Fill Forward SNP Queue
parameter FFSQ_SIZE = 8,
// Snoop Rsp Queue Size
parameter SRPQ_SIZE = 8,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
@ -60,7 +60,16 @@ module VX_cache #(
parameter CORE_TAG_ID_BITS = 0,
// dram request tag size
parameter DRAM_TAG_WIDTH = 1
parameter DRAM_TAG_WIDTH = 1,
// Number of snoop forwarding requests
parameter NUM_SNP_REQUESTS = 2,
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 1,
// Snooping forward tag width
parameter SNP_FWD_TAG_WIDTH = 1
) (
input wire clk,
input wire reset,
@ -94,56 +103,117 @@ module VX_cache #(
input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready,
// Snoop Req
// Snoop request
input wire snp_req_valid,
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop Forward
output wire snp_fwd_valid,
output wire [`DRAM_ADDR_WIDTH-1:0] snp_fwd_addr,
input wire snp_fwd_ready
// Snoop response
output wire snp_rsp_valid,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// Snoop Forwarding out
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_valid,
output wire [NUM_SNP_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdout_tag,
`IGNORE_WARNINGS_BEGIN
input wire [NUM_SNP_REQUESTS-1:0] snp_fwdout_ready,
// Snoop forwarding in
input wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_valid,
input wire [NUM_SNP_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdin_tag,
`IGNORE_WARNINGS_END
output wire [NUM_SNP_REQUESTS-1:0] snp_fwdin_ready
);
wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valids;
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
wire [NUM_BANKS-1:0] per_bank_core_rsp_pop;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
wire dfqq_full;
wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid;
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr;
wire dram_fill_req_ready;
wire [NUM_BANKS-1:0] per_bank_dram_fill_rsp_ready;
wire [NUM_BANKS-1:0] per_bank_dram_wb_queue_pop;
wire [NUM_BANKS-1:0] per_bank_dram_wb_req_ready;
wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid;
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr;
wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data;
wire [NUM_BANKS-1:0] per_bank_reqq_full;
wire [NUM_BANKS-1:0] per_bank_snp_req_full;
wire [NUM_BANKS-1:0] per_bank_snp_req_ready;
wire [NUM_BANKS-1:0] per_bank_snp_fwd_valid;
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_snp_fwd_addr;
wire [NUM_BANKS-1:0] per_bank_snp_fwd_pop;
wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid;
wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready;
`DEBUG_BEGIN
wire [NUM_BANKS-1:0] per_bank_dram_fill_req_is_snp;
`DEBUG_END
wire snp_req_valid_qual;
wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual;
wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag_qual;
wire snp_req_ready_qual;
assign dram_req_tag = dram_req_addr;
assign core_req_ready = ~(| per_bank_reqq_full);
assign snp_req_ready = ~(| per_bank_snp_req_full);
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
if (SNOOP_FORWARDING) begin
VX_snp_forwarder #(
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_REQUESTS (NUM_SNP_REQUESTS),
.SNRQ_SIZE (SNRQ_SIZE),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (SNP_FWD_TAG_WIDTH)
) snp_forwarder (
.clk (clk),
.reset (reset),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_tag (snp_req_tag),
.snp_req_ready (snp_req_ready),
.snp_rsp_valid (snp_req_valid_qual),
.snp_rsp_addr (snp_req_addr_qual),
.snp_rsp_tag (snp_req_tag_qual),
.snp_rsp_ready (snp_req_ready_qual),
.snp_fwdout_valid (snp_fwdout_valid),
.snp_fwdout_addr (snp_fwdout_addr),
.snp_fwdout_tag (snp_fwdout_tag),
.snp_fwdout_ready (snp_fwdout_ready),
.snp_fwdin_valid (snp_fwdin_valid),
.snp_fwdin_tag (snp_fwdin_tag),
.snp_fwdin_ready (snp_fwdin_ready)
);
end else begin
assign snp_fwdout_valid = 0;
assign snp_fwdout_addr = 0;
assign snp_fwdout_tag = 0;
assign snp_fwdin_ready = 0;
assign snp_req_valid_qual = snp_req_valid;
assign snp_req_addr_qual = snp_req_addr;
assign snp_req_tag_qual = snp_req_tag;
assign snp_req_ready = snp_req_ready_qual;
end
assign dram_req_tag = dram_req_addr;
assign core_req_ready = (& per_bank_core_req_ready);
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
assign snp_req_ready_qual = (& per_bank_snp_req_ready);
VX_cache_core_req_bank_sel #(
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS)
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS)
) cache_core_req_bank_sell (
.core_req_valid (core_req_valid),
.core_req_addr (core_req_addr),
@ -152,7 +222,7 @@ module VX_cache #(
genvar i;
generate
for (i = 0; i < NUM_BANKS; i = i + 1) begin
for (i = 0; i < NUM_BANKS; i++) begin
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valids;
wire [NUM_REQUESTS-1:0][31:0] curr_bank_core_req_addr;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
@ -160,58 +230,57 @@ module VX_cache #(
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_read;
wire [NUM_REQUESTS-1:0][`BYTE_EN_BITS-1:0] curr_bank_core_req_write;
wire curr_bank_core_rsp_pop;
wire curr_bank_core_rsp_valid;
wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid;
wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;
wire curr_bank_core_rsp_ready;
wire curr_bank_dram_fill_rsp_valid;
wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_fill_rsp_data;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_rsp_addr;
wire curr_bank_dram_fill_rsp_ready;
wire curr_bank_dram_fill_req_full;
wire curr_bank_dram_fill_req_valid;
wire curr_bank_dram_fill_req_is_snp;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_fill_req_addr;
wire curr_bank_dram_fill_req_ready;
wire curr_bank_dram_wb_req_pop;
wire curr_bank_dram_wb_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_wb_req_addr;
wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_wb_req_data;
wire curr_bank_dram_wb_req_ready;
wire curr_bank_snp_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
wire curr_bank_snp_req_full;
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
wire curr_bank_snp_req_ready;
wire curr_bank_snp_fwd_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_fwd_addr;
wire curr_bank_snp_fwd_pop;
wire curr_bank_snp_rsp_valid;
wire [SNP_REQ_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
wire curr_bank_snp_rsp_ready;
wire curr_bank_reqq_full;
wire curr_bank_core_req_ready;
// Core Req
assign curr_bank_core_req_valids = per_bank_valids[i];
assign curr_bank_core_req_valids = per_bank_valids[i] & {NUM_REQUESTS{core_req_ready}};
assign curr_bank_core_req_addr = core_req_addr;
assign curr_bank_core_req_data = core_req_data;
assign curr_bank_core_req_tag = core_req_tag;
assign curr_bank_core_req_read = core_req_read;
assign curr_bank_core_req_write = core_req_write;
assign per_bank_reqq_full[i] = curr_bank_reqq_full;
assign per_bank_core_req_ready[i] = curr_bank_core_req_ready;
// Core WB
assign curr_bank_core_rsp_pop = per_bank_core_rsp_pop[i];
assign curr_bank_core_rsp_ready = per_bank_core_rsp_ready[i];
assign per_bank_core_rsp_valid [i] = curr_bank_core_rsp_valid;
assign per_bank_core_rsp_tid [i] = curr_bank_core_rsp_tid;
assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag;
assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data;
// Dram fill request
assign curr_bank_dram_fill_req_full = dfqq_full;
// Dram fill request
assign per_bank_dram_fill_req_valid[i] = curr_bank_dram_fill_req_valid;
assign per_bank_dram_fill_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_fill_req_addr, i);
assign per_bank_dram_fill_req_is_snp[i] = curr_bank_dram_fill_req_is_snp;
assign curr_bank_dram_fill_req_ready = dram_fill_req_ready;
// Dram fill response
assign curr_bank_dram_fill_rsp_valid = dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i);
@ -219,44 +288,46 @@ module VX_cache #(
assign curr_bank_dram_fill_rsp_data = dram_rsp_data;
assign per_bank_dram_fill_rsp_ready[i] = curr_bank_dram_fill_rsp_ready;
// Dram writeback request
assign curr_bank_dram_wb_req_pop = per_bank_dram_wb_queue_pop[i];
// Dram writeback request
assign per_bank_dram_wb_req_valid[i] = curr_bank_dram_wb_req_valid;
assign per_bank_dram_wb_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_wb_req_addr, i);
assign per_bank_dram_wb_req_data[i] = curr_bank_dram_wb_req_data;
assign curr_bank_dram_wb_req_ready = per_bank_dram_wb_req_ready[i];
// Snoop Request
assign curr_bank_snp_req_valid = snp_req_valid && (`DRAM_ADDR_BANK(snp_req_addr) == i);
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr);
assign per_bank_snp_req_full[i] = curr_bank_snp_req_full;
// Snoop request
assign curr_bank_snp_req_valid = snp_req_valid_qual && (`DRAM_ADDR_BANK(snp_req_addr_qual) == i);
assign curr_bank_snp_req_addr = `DRAM_TO_LINE_ADDR(snp_req_addr_qual);
assign curr_bank_snp_req_tag = snp_req_tag_qual;
assign per_bank_snp_req_ready[i] = curr_bank_snp_req_ready;
// Snoop Fwd
assign per_bank_snp_fwd_valid[i] = curr_bank_snp_fwd_valid;
assign per_bank_snp_fwd_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_snp_fwd_addr, i);
assign curr_bank_snp_fwd_pop = per_bank_snp_fwd_pop[i];
// Snoop response
assign per_bank_snp_rsp_valid[i] = curr_bank_snp_rsp_valid;
assign per_bank_snp_rsp_tag[i] = curr_bank_snp_rsp_tag;
assign curr_bank_snp_rsp_ready = per_bank_snp_rsp_ready[i];
VX_bank #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FFSQ_SIZE (FFSQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE),
.SNOOP_FORWARDING (SNOOP_FORWARDING),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.SRPQ_SIZE (SRPQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE),
.SNOOP_FORWARDING (SNOOP_FORWARDING),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
) bank (
.clk (clk),
.reset (reset),
@ -267,21 +338,19 @@ module VX_cache #(
.core_req_addr (curr_bank_core_req_addr),
.core_req_data (curr_bank_core_req_data),
.core_req_tag (curr_bank_core_req_tag),
.core_req_full (curr_bank_reqq_full),
.core_req_ready (core_req_ready),
.core_req_ready (curr_bank_core_req_ready),
// Core response
.core_rsp_valid (curr_bank_core_rsp_valid),
.core_rsp_tid (curr_bank_core_rsp_tid),
.core_rsp_data (curr_bank_core_rsp_data),
.core_rsp_tag (curr_bank_core_rsp_tag),
.core_rsp_pop (curr_bank_core_rsp_pop),
.core_rsp_ready (curr_bank_core_rsp_ready),
// Dram fill request
.dram_fill_req_valid (curr_bank_dram_fill_req_valid),
.dram_fill_req_addr (curr_bank_dram_fill_req_addr),
.dram_fill_req_is_snp (curr_bank_dram_fill_req_is_snp),
.dram_fill_req_full (curr_bank_dram_fill_req_full),
.dram_fill_req_ready (curr_bank_dram_fill_req_ready),
// Dram fill response
.dram_fill_rsp_valid (curr_bank_dram_fill_rsp_valid),
@ -293,20 +362,45 @@ module VX_cache #(
.dram_wb_req_valid (curr_bank_dram_wb_req_valid),
.dram_wb_req_addr (curr_bank_dram_wb_req_addr),
.dram_wb_req_data (curr_bank_dram_wb_req_data),
.dram_wb_req_pop (curr_bank_dram_wb_req_pop),
.dram_wb_req_ready (curr_bank_dram_wb_req_ready),
// Snoop request
.snp_req_valid (curr_bank_snp_req_valid),
.snp_req_addr (curr_bank_snp_req_addr),
.snp_req_full (curr_bank_snp_req_full),
.snp_req_tag (curr_bank_snp_req_tag),
.snp_req_ready (curr_bank_snp_req_ready),
// Snoop forwarding
.snp_fwd_valid (curr_bank_snp_fwd_valid),
.snp_fwd_addr (curr_bank_snp_fwd_addr),
.snp_fwd_pop (curr_bank_snp_fwd_pop)
// Snoop response
.snp_rsp_valid (curr_bank_snp_rsp_valid),
.snp_rsp_tag (curr_bank_snp_rsp_tag),
.snp_rsp_ready (curr_bank_snp_rsp_ready)
);
end
endgenerate
endgenerate
VX_cache_dram_req_arb #(
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.PRFQ_SIZE (PRFQ_SIZE),
.PRFQ_STRIDE (PRFQ_STRIDE)
) cache_dram_req_arb (
.clk (clk),
.reset (reset),
.per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid),
.per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr),
.dram_fill_req_ready (dram_fill_req_ready),
.per_bank_dram_wb_req_valid (per_bank_dram_wb_req_valid),
.per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr),
.per_bank_dram_wb_req_data (per_bank_dram_wb_req_data),
.per_bank_dram_wb_req_ready (per_bank_dram_wb_req_ready),
.dram_req_read (dram_req_read),
.dram_req_write (dram_req_write),
.dram_req_addr (dram_req_addr),
.dram_req_data (dram_req_data),
.dram_req_ready (dram_req_ready)
);
VX_cache_core_rsp_merge #(
.NUM_BANKS (NUM_BANKS),
@ -319,48 +413,24 @@ module VX_cache #(
.per_bank_core_rsp_valid (per_bank_core_rsp_valid),
.per_bank_core_rsp_data (per_bank_core_rsp_data),
.per_bank_core_rsp_tag (per_bank_core_rsp_tag),
.per_bank_core_rsp_pop (per_bank_core_rsp_pop),
.per_bank_core_rsp_ready (per_bank_core_rsp_ready),
.core_rsp_valid (core_rsp_valid),
.core_rsp_data (core_rsp_data),
.core_rsp_tag (core_rsp_tag),
.core_rsp_ready (core_rsp_ready)
);
);
VX_cache_dram_req_arb #(
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.PRFQ_SIZE (PRFQ_SIZE),
.PRFQ_STRIDE (PRFQ_STRIDE)
) cache_dram_req_arb (
.clk (clk),
.reset (reset),
.dfqq_full (dfqq_full),
.per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid),
.per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr),
.per_bank_dram_wb_queue_pop (per_bank_dram_wb_queue_pop),
.per_bank_dram_wb_req_valid (per_bank_dram_wb_req_valid),
.per_bank_dram_wb_req_addr (per_bank_dram_wb_req_addr),
.per_bank_dram_wb_req_data (per_bank_dram_wb_req_data),
.dram_req_read (dram_req_read),
.dram_req_write (dram_req_write),
.dram_req_addr (dram_req_addr),
.dram_req_data (dram_req_data),
.dram_req_ready (dram_req_ready)
);
VX_snp_fwd_arb #(
.NUM_BANKS(NUM_BANKS),
.BANK_LINE_SIZE(BANK_LINE_SIZE)
) snp_fwd_arb (
.per_bank_snp_fwd_valid (per_bank_snp_fwd_valid),
.per_bank_snp_fwd_addr (per_bank_snp_fwd_addr),
.per_bank_snp_fwd_pop (per_bank_snp_fwd_pop),
.snp_fwd_valid (snp_fwd_valid),
.snp_fwd_addr (snp_fwd_addr),
.snp_fwd_ready (snp_fwd_ready)
VX_snp_rsp_arb #(
.NUM_BANKS (NUM_BANKS),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
) snp_rsp_arb (
.per_bank_snp_rsp_valid (per_bank_snp_rsp_valid),
.per_bank_snp_rsp_tag (per_bank_snp_rsp_tag),
.per_bank_snp_rsp_ready (per_bank_snp_rsp_ready),
.snp_rsp_valid (snp_rsp_valid),
.snp_rsp_tag (snp_rsp_tag),
.snp_rsp_ready (snp_rsp_ready)
);
endmodule

View file

@ -21,7 +21,7 @@ module VX_cache_core_req_bank_sel #(
integer i;
always @(*) begin
per_bank_valids = 0;
for (i = 0; i < NUM_REQUESTS; i = i + 1) begin
for (i = 0; i < NUM_REQUESTS; i++) begin
if (NUM_BANKS == 1) begin
// If there is only one bank, then only map requests to that bank
per_bank_valids[0][i] = core_req_valid[i];

View file

@ -17,7 +17,7 @@ module VX_cache_core_rsp_merge #(
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
output wire [NUM_BANKS-1:0] per_bank_core_rsp_pop,
output wire [NUM_BANKS-1:0] per_bank_core_rsp_ready,
// Core Writeback
output reg [NUM_REQUESTS-1:0] core_rsp_valid,
@ -28,7 +28,7 @@ module VX_cache_core_rsp_merge #(
reg [NUM_BANKS-1:0] per_bank_core_rsp_pop_unqual;
assign per_bank_core_rsp_pop = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}};
assign per_bank_core_rsp_ready = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}};
wire [`BANK_BITS-1:0] main_bank_index;
wire found_bank;
@ -48,7 +48,7 @@ module VX_cache_core_rsp_merge #(
always @(*) begin
core_rsp_valid = 0;
core_rsp_data = 0;
for (i = 0; i < NUM_BANKS; i = i + 1) begin
for (i = 0; i < NUM_BANKS; i++) begin
if (found_bank
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
@ -68,7 +68,7 @@ module VX_cache_core_rsp_merge #(
core_rsp_valid = 0;
core_rsp_data = 0;
core_rsp_tag = 0;
for (i = 0; i < NUM_BANKS; i = i + 1) begin
for (i = 0; i < NUM_BANKS; i++) begin
if (found_bank
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]

View file

@ -19,13 +19,13 @@ module VX_cache_dram_req_arb #(
// Fill Request
input wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid,
input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_fill_req_addr,
output wire dfqq_full,
output wire dram_fill_req_ready,
// Writeback Request
input wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid,
input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_wb_req_addr,
input wire [NUM_BANKS-1:0][`BANK_LINE_WIDTH-1:0] per_bank_dram_wb_req_data,
output wire [NUM_BANKS-1:0] per_bank_dram_wb_queue_pop,
output wire [NUM_BANKS-1:0] per_bank_dram_wb_req_ready,
// Merged Request
output wire dram_req_read,
@ -70,6 +70,7 @@ module VX_cache_dram_req_arb #(
wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop
wire dfqq_push = (| per_bank_dram_fill_req_valid);
wire dfqq_full;
VX_cache_dfq_queue #(
.BANK_LINE_SIZE(BANK_LINE_SIZE),
@ -100,7 +101,9 @@ module VX_cache_dram_req_arb #(
.found (dwb_valid)
);
assign per_bank_dram_wb_queue_pop = dram_req_ready ? (use_wb_valid & ((1 << dwb_bank))) : 0;
assign dram_fill_req_ready = ~dfqq_full;
assign per_bank_dram_wb_req_ready = dram_req_ready ? (use_wb_valid & ((1 << dwb_bank))) : 0;
wire dram_req_valid = dwb_valid || dfqq_req || pref_pop;

View file

@ -66,7 +66,7 @@ module VX_cache_miss_resrv #(
reg [MRVQ_SIZE-1:0] make_ready;
genvar i;
generate
for (i = 0; i < MRVQ_SIZE; i=i+1) begin
for (i = 0; i < MRVQ_SIZE; i++) begin
assign make_ready[i] = is_fill_st1 && valid_table[i] && (addr_table[i] == fill_addr_st1);
end
endgenerate

116
hw/rtl/cache/VX_snp_forwarder.v vendored Normal file
View file

@ -0,0 +1,116 @@
`include "VX_define.vh"
module VX_snp_forwarder #(
parameter BANK_LINE_SIZE = 0,
parameter NUM_REQUESTS = 0,
parameter SNRQ_SIZE = 0,
parameter SNP_REQ_TAG_WIDTH = 0,
parameter SNP_FWD_TAG_WIDTH = 0
) (
input wire clk,
input wire reset,
// Snoop request
input wire snp_req_valid,
input wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr,
input wire [SNP_REQ_TAG_WIDTH-1:0] snp_req_tag,
output wire snp_req_ready,
// Snoop response
output wire snp_rsp_valid,
output wire [`DRAM_ADDR_WIDTH-1:0] snp_rsp_addr,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// Snoop Forwarding out
output wire [NUM_REQUESTS-1:0] snp_fwdout_valid,
output wire [NUM_REQUESTS-1:0][`DRAM_ADDR_WIDTH-1:0] snp_fwdout_addr,
output wire [NUM_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdout_tag,
input wire [NUM_REQUESTS-1:0] snp_fwdout_ready,
// Snoop forwarding in
input wire [NUM_REQUESTS-1:0] snp_fwdin_valid,
input wire [NUM_REQUESTS-1:0][SNP_FWD_TAG_WIDTH-1:0] snp_fwdin_tag,
output wire [NUM_REQUESTS-1:0] snp_fwdin_ready
);
reg [`DRAM_ADDR_WIDTH+SNP_REQ_TAG_WIDTH-1:0] pending_reqs [SNRQ_SIZE-1:0];
reg [`REQS_BITS-1:0] pending_cntrs [SNRQ_SIZE-1:0];
reg [`LOG2UP(SNRQ_SIZE)-1:0] rd_ptr, wr_ptr;
reg [`LOG2UP(SNRQ_SIZE)-1:0] pending_size;
reg [`REQS_BITS-1:0] fwdin_sel;
wire enqueue, dequeue;
wire fwdout_ready;
wire fwdin_valid;
wire [SNP_FWD_TAG_WIDTH-1:0] fwdin_tag;
wire fwdin_ready;
wire fwdin_taken;
assign fwdout_ready = (& snp_fwdout_ready);
assign snp_req_ready = (pending_size != `LOG2UP(SNRQ_SIZE)'(SNRQ_SIZE-1)) // not full
&& fwdout_ready;
genvar i;
for (i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdout_valid[i] = enqueue && fwdout_ready;
assign snp_fwdout_addr[i] = snp_req_addr;
assign snp_fwdout_tag[i] = wr_ptr;
end
assign fwdin_ready = snp_rsp_ready;
assign fwdin_taken = fwdin_valid && fwdin_ready;
assign snp_rsp_valid = fwdin_taken && (1 == pending_cntrs[fwdin_tag]); // send response
assign {snp_rsp_addr, snp_rsp_tag} = pending_reqs[fwdin_tag];
assign enqueue = snp_req_valid && snp_req_ready;
assign dequeue = snp_rsp_valid && (rd_ptr == fwdin_tag);
always @(posedge clk) begin
if (reset) begin
rd_ptr <= 0;
wr_ptr <= 0;
pending_size <= 0;
fwdin_sel <= 0;
end else begin
if (enqueue) begin
pending_reqs[wr_ptr] <= {snp_req_addr, snp_req_tag};
pending_cntrs[wr_ptr] <= `REQS_BITS'(NUM_REQUESTS);
wr_ptr <= wr_ptr + 1;
if (!dequeue) begin
pending_size <= pending_size + 1;
end
end
if (dequeue) begin
rd_ptr <= rd_ptr + 1;
if (!enqueue) begin
pending_size <= pending_size - 1;
end
end
if (fwdin_taken) begin
pending_cntrs[fwdin_tag] <= pending_cntrs[fwdin_tag] - 1;
end
end
end
always @(posedge clk) begin
if (reset) begin
fwdin_sel <= 0;
end else begin
fwdin_sel <= fwdin_sel + 1;
end
end
assign fwdin_valid = snp_fwdin_valid[fwdin_sel];
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
for (i = 0; i < NUM_REQUESTS; i++) begin
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
end
endmodule

View file

@ -1,39 +0,0 @@
`include "VX_cache_config.vh"
module VX_snp_fwd_arb #(
parameter NUM_BANKS = 1,
parameter BANK_LINE_SIZE = 1
) (
input wire [NUM_BANKS-1:0] per_bank_snp_fwd_valid,
input wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_snp_fwd_addr,
output reg [NUM_BANKS-1:0] per_bank_snp_fwd_pop,
output wire snp_fwd_valid,
output wire [`DRAM_ADDR_WIDTH-1:0] snp_fwd_addr,
input wire snp_fwd_ready
);
wire [NUM_BANKS-1:0] qual_per_bank_snp_fwd = per_bank_snp_fwd_valid & {NUM_BANKS{snp_fwd_ready}};
wire [`BANK_BITS-1:0] fsq_bank;
wire fsq_valid;
VX_generic_priority_encoder #(
.N(NUM_BANKS)
) sel_ffsq (
.valids (qual_per_bank_snp_fwd),
.index (fsq_bank),
.found (fsq_valid)
);
assign snp_fwd_valid = fsq_valid;
assign snp_fwd_addr = per_bank_snp_fwd_addr[fsq_bank];
always @(*) begin
per_bank_snp_fwd_pop = 0;
if (fsq_valid) begin
per_bank_snp_fwd_pop[fsq_bank] = 1;
end
end
endmodule

38
hw/rtl/cache/VX_snp_rsp_arb.v vendored Normal file
View file

@ -0,0 +1,38 @@
`include "VX_cache_config.vh"
module VX_snp_rsp_arb #(
parameter NUM_BANKS = 0,
parameter BANK_LINE_SIZE = 0,
parameter SNP_REQ_TAG_WIDTH = 0
) (
input wire [NUM_BANKS-1:0] per_bank_snp_rsp_valid,
input wire [NUM_BANKS-1:0][SNP_REQ_TAG_WIDTH-1:0] per_bank_snp_rsp_tag,
output wire [NUM_BANKS-1:0] per_bank_snp_rsp_ready,
output wire snp_rsp_valid,
output wire [SNP_REQ_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready
);
wire [NUM_BANKS-1:0] qual_per_bank_snp_rsp = per_bank_snp_rsp_valid & {NUM_BANKS{snp_rsp_ready}};
wire [`BANK_BITS-1:0] fsq_bank;
wire fsq_valid;
VX_generic_priority_encoder #(
.N(NUM_BANKS)
) sel_ffsq (
.valids (qual_per_bank_snp_rsp),
.index (fsq_bank),
.found (fsq_valid)
);
assign snp_rsp_valid = fsq_valid;
assign snp_rsp_tag = per_bank_snp_rsp_tag[fsq_bank];
genvar i;
for (i = 0; i < NUM_BANKS; i++) begin
assign per_bank_snp_rsp_ready[i] = fsq_valid && (fsq_bank == `BANK_BITS'(i));
end
endmodule

View file

@ -110,7 +110,7 @@ module VX_tag_data_access #(
);
genvar i;
for (i = 1; i < STAGE_1_CYCLES-1; i = i + 1) begin
for (i = 1; i < STAGE_1_CYCLES-1; i++) begin
VX_generic_register #(
.N( 1 + 1 + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
) s0_1_cc (
@ -127,7 +127,7 @@ module VX_tag_data_access #(
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && DRAM_ENABLE; // Dirty only applies in Dcache
assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writeaddr_st1e[`TAG_LINE_ADDR_RNG]; // Tag is always the same in SM
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
assign use_read_data_st1e[i * `WORD_WIDTH +: `WORD_WIDTH] = read_data_st1c[STAGE_1_CYCLES-1][i * `WORD_WIDTH +: `WORD_WIDTH];
end
@ -144,7 +144,7 @@ module VX_tag_data_access #(
&& !miss_st1e
&& !is_snp_st1e;
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
assign we[i] = (force_write || (should_write && !real_writefill)) ? 4'b1111 : 4'b0000;
end
@ -199,7 +199,7 @@ module VX_tag_data_access #(
assign readword_st1e = data_Qual;
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
wire normal_write = (block_offset == i[`WORD_SELECT_BITS-1:0]) && should_write && !real_writefill;
assign we[i] = (force_write) ? 4'b1111 :

View file

@ -44,7 +44,7 @@ module VX_tag_data_structure #(
integer i;
always @(posedge clk) begin
if (reset) begin
for (i = 0; i < `BANK_LINE_COUNT; i = i + 1) begin
for (i = 0; i < `BANK_LINE_COUNT; i++) begin
valid[i] <= 0;
dirty[i] <= 0;
end
@ -65,7 +65,7 @@ module VX_tag_data_structure #(
valid[write_addr] <= 0;
end
for (i = 0; i < `BANK_LINE_WORDS; i = i + 1) begin
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
if (write_enable[i][0]) data[write_addr][i][0] <= write_data[i * `WORD_WIDTH + 0 * `BYTE_WIDTH +: `BYTE_WIDTH];
if (write_enable[i][1]) data[write_addr][i][1] <= write_data[i * `WORD_WIDTH + 1 * `BYTE_WIDTH +: `BYTE_WIDTH];
if (write_enable[i][2]) data[write_addr][i][2] <= write_data[i * `WORD_WIDTH + 2 * `BYTE_WIDTH +: `BYTE_WIDTH];

View file

@ -4,11 +4,13 @@
`include "../cache/VX_cache_config.vh"
interface VX_cache_snp_req_if #(
parameter DRAM_ADDR_WIDTH = 1
parameter DRAM_ADDR_WIDTH = 0,
parameter SNP_TAG_WIDTH = 0
) ();
wire snp_req_valid;
wire [DRAM_ADDR_WIDTH-1:0] snp_req_addr;
wire [SNP_TAG_WIDTH-1:0] snp_req_tag;
wire snp_req_ready;
endinterface

View file

@ -0,0 +1,16 @@
`ifndef VX_CACHE_SNP_RSP_IF
`define VX_CACHE_SNP_RSP_IF
`include "../cache/VX_cache_config.vh"
interface VX_cache_snp_rsp_if #(
parameter SNP_TAG_WIDTH = 0
) ();
wire snp_rsp_valid;
wire [SNP_TAG_WIDTH-1:0] snp_rsp_tag;
wire snp_rsp_ready;
endinterface
`endif

View file

@ -160,21 +160,29 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
// align address to LLC block boundaries
auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE;
auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
int outstanding_snp_reqs = 0;
// submit snoop requests for the needed blocks
vortex_->snp_req_addr = aligned_addr_start;
vortex_->snp_req_addr = aligned_addr_start;
vortex_->snp_req_valid = true;
vortex_->snp_rsp_ready = true;
for (;;) {
this->step();
if (vortex_->snp_rsp_valid) {
--outstanding_snp_reqs;
}
if (vortex_->snp_req_valid && vortex_->snp_req_ready) {
++outstanding_snp_reqs;
vortex_->snp_req_addr += 1;
if (vortex_->snp_req_addr >= aligned_addr_end) {
vortex_->snp_req_valid = false;
break;
vortex_->snp_req_valid = false;
}
}
}
this->wait(PIPELINE_FLUSH_LATENCY);
if (!vortex_->snp_req_valid
&& 0 == outstanding_snp_reqs) {
break;
}
}
}
bool Simulator::run() {

View file

@ -18,7 +18,6 @@
#define DRAM_LATENCY 100
#define DRAM_RQ_SIZE 16
#define DRAM_STALLS_MODULO 16
#define PIPELINE_FLUSH_LATENCY 1000
typedef struct {
int cycles_left;