rtl gpr multicore fix

This commit is contained in:
Blaise Tine 2020-05-06 09:05:10 -04:00
parent b1fdf0a947
commit 330bbc4f56
18 changed files with 210 additions and 200 deletions

View file

@ -4,6 +4,9 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
DEBUG = 1
CFLAGS += -fPIC

View file

@ -116,9 +116,9 @@ int main(int argc, char *argv[]) {
// parse command arguments
parse_args(argc, argv);
uint32_t block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
uint32_t block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
if (data_stride == 0xffffffff) {
@ -204,9 +204,9 @@ int main(int argc, char *argv[]) {
// upload kernel argument
std::cout << "upload kernel argument" << std::endl;
{
kernel_arg.num_warps = max_warps;
kernel_arg.num_warps = max_warps;
kernel_arg.num_threads = max_threads;
kernel_arg.stride = data_stride;
kernel_arg.stride = data_stride;
auto buf_ptr = (int*)vx_host_ptr(buffer);
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));

View file

@ -87,11 +87,6 @@
`define DWORD_SIZE 4
`endif
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`ifndef DNUM_REQUESTS
`define DNUM_REQUESTS `NUM_THREADS
`endif
// Number of cycles to complete stage 1 (read from memory)
`ifndef DSTAGE_1_CYCLES
`define DSTAGE_1_CYCLES 1
@ -178,11 +173,6 @@
`define IWORD_SIZE 4
`endif
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`ifndef INUM_REQUESTS
`define INUM_REQUESTS 1
`endif
// Number of cycles to complete stage 1 (read from memory)
`ifndef ISTAGE_1_CYCLES
`define ISTAGE_1_CYCLES 1
@ -269,11 +259,6 @@
`define SWORD_SIZE 4
`endif
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`ifndef SNUM_REQUESTS
`define SNUM_REQUESTS `NUM_THREADS
`endif
// Number of cycles to complete stage 1 (read from memory)
`ifndef SSTAGE_1_CYCLES
`define SSTAGE_1_CYCLES 1
@ -360,11 +345,6 @@
`define L2WORD_SIZE `L2BANK_LINE_SIZE
`endif
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`ifndef L2NUM_REQUESTS
`define L2NUM_REQUESTS (2*`NUM_CORES)
`endif
// Number of cycles to complete stage 1 (read from memory)
`ifndef L2STAGE_1_CYCLES
`define L2STAGE_1_CYCLES 1
@ -451,11 +431,6 @@
`define L3WORD_SIZE `L3BANK_LINE_SIZE
`endif
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`ifndef L3NUM_REQUESTS
`define L3NUM_REQUESTS `NUM_CLUSTERS
`endif
// Number of cycles to complete stage 1 (read from memory)
`ifndef L3STAGE_1_CYCLES
`define L3STAGE_1_CYCLES 1

View file

@ -125,6 +125,9 @@
// DRAM request tag bits
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define DNUM_REQUESTS `NUM_THREADS
////////////////////////// Icache Configurable Knobs //////////////////////////
// DRAM request data bits
@ -136,6 +139,9 @@
// DRAM request tag bits
`define IDRAM_TAG_WIDTH `IDRAM_ADDR_WIDTH
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define INUM_REQUESTS 1
////////////////////////// SM Configurable Knobs //////////////////////////////
// DRAM request data bits
@ -147,6 +153,9 @@
// DRAM request tag bits
`define SDRAM_TAG_WIDTH `SDRAM_ADDR_WIDTH
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define SNUM_REQUESTS `NUM_THREADS
////////////////////////// L2cache Configurable Knobs /////////////////////////
// DRAM request data bits
@ -158,6 +167,9 @@
// DRAM request tag bits
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`L2DRAM_ADDR_WIDTH+`CLOG2(`NUM_CORES*2)))
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define L2NUM_REQUESTS (2*`NUM_CORES)
////////////////////////// L3cache Configurable Knobs /////////////////////////
// DRAM request data bits
@ -169,5 +181,8 @@
// DRAM request tag bits
`define L3DRAM_TAG_WIDTH ((`NUM_CLUSTERS > 1) ? `L3DRAM_ADDR_WIDTH : `L2DRAM_TAG_WIDTH)
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define L3NUM_REQUESTS `NUM_CLUSTERS
// VX_DEFINE
`endif

View file

@ -37,33 +37,32 @@ module VX_dram_arb #(
input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready
);
reg [`LOG2UP(NUM_REQUESTS)-1:0] bus_sel;
reg [`REQS_BITS-1:0] bus_req_idx;
always @(posedge clk) begin
if (reset) begin
bus_sel <= 0;
bus_req_idx <= 0;
end else begin
bus_sel <= bus_sel + 1;
bus_req_idx <= bus_req_idx + 1;
end
end
integer i;
generate
always @(*) begin
dram_req_read = 'z;
dram_req_write = 'z;
dram_req_read = 0;
dram_req_write = 0;
dram_req_addr = 'z;
dram_req_data = 'z;
dram_req_tag = 'z;
for (i = 0; i < NUM_REQUESTS; i++) begin
if (bus_sel == (`LOG2UP(NUM_REQUESTS))'(i)) begin
if (bus_req_idx == (`REQS_BITS)'(i)) begin
dram_req_read = core_req_read[i];
dram_req_write = core_req_write[i];
dram_req_addr = core_req_addr[i];
dram_req_data = core_req_data[i];
dram_req_tag = {core_req_tag[i], (`LOG2UP(NUM_REQUESTS))'(i)};
dram_req_tag = {core_req_tag[i], (`REQS_BITS)'(i)};
core_req_ready[i] = dram_req_ready;
end else begin
core_req_ready[i] = 0;
@ -72,24 +71,13 @@ module VX_dram_arb #(
end
endgenerate
reg is_valid;
generate
always @(*) begin
dram_rsp_ready = 0;
for (i = 0; i < NUM_REQUESTS; i++) begin
is_valid = (dram_rsp_tag[`LOG2UP(NUM_REQUESTS)-1:0] == (`LOG2UP(NUM_REQUESTS))'(i));
core_rsp_valid[i] = dram_rsp_valid & is_valid;
core_rsp_data[i] = dram_rsp_data;
core_rsp_tag[i] = dram_rsp_tag[`LOG2UP(NUM_REQUESTS) +: CORE_TAG_WIDTH];
if (is_valid) begin
dram_rsp_ready = core_rsp_ready[i];
end
end
end
endgenerate
genvar j;
wire [`REQS_BITS-1:0] bus_rsp_idx = dram_rsp_tag[`REQS_BITS-1:0];
for (j = 0; j < NUM_REQUESTS; j++) begin
assign core_rsp_valid[j] = dram_rsp_valid && (bus_rsp_idx == (`REQS_BITS)'(j));
assign core_rsp_data[j] = dram_rsp_data;
assign core_rsp_tag[j] = dram_rsp_tag[`REQS_BITS +: CORE_TAG_WIDTH];
end
assign dram_rsp_ready = core_rsp_ready[bus_rsp_idx];
endmodule

View file

@ -37,9 +37,9 @@ module Vortex #(
output wire I_dram_rsp_ready,
// Cache Snooping
input wire llc_snp_req_valid,
input wire [`DDRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
input wire snp_req_valid,
input wire [`DDRAM_ADDR_WIDTH-1:0] snp_req_addr,
output wire snp_req_ready,
// I/O request
output wire io_req_read,
@ -174,9 +174,9 @@ module Vortex #(
// Cache snooping
VX_cache_snp_req_if #(.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH)) dcache_snp_req_if();
assign dcache_snp_req_if.snp_req_valid = llc_snp_req_valid;
assign dcache_snp_req_if.snp_req_addr = llc_snp_req_addr;
assign llc_snp_req_ready = dcache_snp_req_if.snp_req_ready;
assign dcache_snp_req_if.snp_req_valid = snp_req_valid;
assign dcache_snp_req_if.snp_req_addr = snp_req_addr;
assign snp_req_ready = dcache_snp_req_if.snp_req_ready;
VX_front_end front_end (
.clk (clk),

View file

@ -23,9 +23,9 @@ module Vortex_Cluster #(
output wire dram_rsp_ready,
// Cache Snooping
input wire llc_snp_req_valid,
input wire[`L2DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
input wire snp_req_valid,
input wire[`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr,
output wire snp_req_ready,
// I/O request
output wire io_req_read,
@ -69,8 +69,8 @@ module Vortex_Cluster #(
wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag;
wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready;
wire snp_fwd_valid;
wire[`DDRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CORES-1:0] per_core_snp_fwd_valid;
wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_fwd_addr;
wire[`NUM_CORES-1:0] per_core_snp_fwd_ready;
`IGNORE_WARNINGS_BEGIN
@ -118,9 +118,9 @@ module Vortex_Cluster #(
.I_dram_rsp_data (per_core_I_dram_rsp_data [i]),
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [i]),
.llc_snp_req_valid (snp_fwd_valid),
.llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_core_snp_fwd_ready [i]),
.snp_req_valid (per_core_snp_fwd_valid [i]),
.snp_req_addr (per_core_snp_fwd_addr [i]),
.snp_req_ready (per_core_snp_fwd_ready [i]),
.io_req_read (per_core_io_req_read [i]),
.io_req_write (per_core_io_req_write [i]),
@ -167,7 +167,11 @@ module Vortex_Cluster #(
wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_valid;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] l2_core_rsp_data;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] l2_core_rsp_tag;
wire[`L2NUM_REQUESTS-1:0] l2_core_rsp_ready;
wire l2_core_rsp_ready;
wire l2_snp_fwd_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] l2_snp_fwd_addr;
wire l2_snp_fwd_ready;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign l2_core_req_valid [i] = (per_core_D_dram_req_read[(i/2)] | per_core_D_dram_req_write[(i/2)]);
@ -191,19 +195,22 @@ module Vortex_Cluster #(
assign per_core_D_dram_req_ready [(i/2)] = l2_core_req_ready;
assign per_core_I_dram_req_ready [(i/2)] = l2_core_req_ready;
assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i];
assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1];
assign per_core_D_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i] && l2_core_rsp_ready;
assign per_core_I_dram_rsp_valid [(i/2)] = l2_core_rsp_valid[i+1] && l2_core_rsp_ready;
assign per_core_D_dram_rsp_data [(i/2)] = l2_core_rsp_data[i];
assign per_core_I_dram_rsp_data [(i/2)] = l2_core_rsp_data[i+1];
assign per_core_D_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i];
assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1];
assign per_core_I_dram_rsp_tag [(i/2)] = l2_core_rsp_tag[i+1];
assign l2_core_rsp_ready [i] = per_core_D_dram_rsp_ready [(i/2)];
assign l2_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)];
assign per_core_snp_fwd_valid [(i/2)] = l2_snp_fwd_valid && l2_snp_fwd_ready;
assign per_core_snp_fwd_addr [(i/2)] = l2_snp_fwd_addr;
end
assign l2_core_rsp_ready = (& per_core_D_dram_rsp_ready) && (& per_core_I_dram_rsp_ready);
assign l2_snp_fwd_ready = (& per_core_snp_fwd_ready);
VX_cache #(
.CACHE_SIZE (`L2CACHE_SIZE),
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
@ -246,7 +253,7 @@ module Vortex_Cluster #(
.core_rsp_valid (l2_core_rsp_valid),
.core_rsp_data (l2_core_rsp_data),
.core_rsp_tag (l2_core_rsp_tag),
.core_rsp_ready (& l2_core_rsp_ready),
.core_rsp_ready (l2_core_rsp_ready),
// DRAM request
.dram_req_read (dram_req_read),
@ -263,61 +270,74 @@ module Vortex_Cluster #(
.dram_rsp_ready (dram_rsp_ready),
// Snoop request
.snp_req_valid (llc_snp_req_valid),
.snp_req_addr (llc_snp_req_addr),
.snp_req_ready (llc_snp_req_ready),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_ready (snp_req_ready),
// Snoop forwarding
.snp_fwd_valid (snp_fwd_valid),
.snp_fwd_addr (snp_fwd_addr),
.snp_fwd_ready (& per_core_snp_fwd_ready)
.snp_fwd_valid (l2_snp_fwd_valid),
.snp_fwd_addr (l2_snp_fwd_addr),
.snp_fwd_ready (l2_snp_fwd_ready)
);
end else begin
wire[`L2NUM_REQUESTS-1:0] per_core_req_read;
wire[`L2NUM_REQUESTS-1:0] per_core_req_write;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_req_addr;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_req_tag;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_req_data;
wire[`L2NUM_REQUESTS-1:0] per_core_req_ready;
wire[`L2NUM_REQUESTS-1:0] arb_core_req_read;
wire[`L2NUM_REQUESTS-1:0] arb_core_req_write;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_core_req_addr;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_core_req_tag;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_core_req_data;
wire[`L2NUM_REQUESTS-1:0] arb_core_req_ready;
wire[`L2NUM_REQUESTS-1:0] per_core_rsp_valid;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_rsp_data;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_rsp_tag;
wire[`L2NUM_REQUESTS-1:0] per_core_rsp_ready;
wire[`L2NUM_REQUESTS-1:0] arb_core_rsp_valid;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_core_rsp_data;
wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_core_rsp_tag;
wire[`L2NUM_REQUESTS-1:0] arb_core_rsp_ready;
wire arb_snp_fwd_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] arb_snp_fwd_addr;
wire arb_snp_fwd_ready;
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
assign per_core_req_read [i] = per_core_D_dram_req_read[(i/2)];
assign per_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)];
assign arb_core_req_read [i] = per_core_D_dram_req_read[(i/2)];
assign arb_core_req_read [i+1] = per_core_I_dram_req_read[(i/2)];
assign per_core_req_write [i] = per_core_D_dram_req_write[(i/2)];
assign per_core_req_write [i+1] = 0;
assign arb_core_req_write [i] = per_core_D_dram_req_write[(i/2)];
assign arb_core_req_write [i+1] = 0;
assign per_core_req_addr [i] = per_core_D_dram_req_addr[(i/2)];
assign per_core_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)];
assign arb_core_req_addr [i] = per_core_D_dram_req_addr[(i/2)];
assign arb_core_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)];
assign per_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
assign per_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
assign arb_core_req_data [i] = per_core_D_dram_req_data[(i/2)];
assign arb_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)];
assign per_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign per_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign arb_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)];
assign arb_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)];
assign per_core_D_dram_req_ready [(i/2)] = per_core_req_ready[i];
assign per_core_I_dram_req_ready [(i/2)] = per_core_req_ready[i+1];
assign per_core_D_dram_req_ready [(i/2)] = arb_core_req_ready[i];
assign per_core_I_dram_req_ready [(i/2)] = arb_core_req_ready[i+1];
assign per_core_D_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i];
assign per_core_I_dram_rsp_valid [(i/2)] = per_core_rsp_valid[i+1];
assign per_core_D_dram_rsp_valid [(i/2)] = arb_core_rsp_valid[i];
assign per_core_I_dram_rsp_valid [(i/2)] = arb_core_rsp_valid[i+1];
assign per_core_D_dram_rsp_data [(i/2)] = per_core_rsp_data[i];
assign per_core_I_dram_rsp_data [(i/2)] = per_core_rsp_data[i+1];
assign per_core_D_dram_rsp_data [(i/2)] = arb_core_rsp_data[i];
assign per_core_I_dram_rsp_data [(i/2)] = arb_core_rsp_data[i+1];
assign per_core_D_dram_rsp_tag [(i/2)] = per_core_rsp_tag[i];
assign per_core_I_dram_rsp_tag [(i/2)] = per_core_rsp_tag[i+1];
assign per_core_D_dram_rsp_tag [(i/2)] = arb_core_rsp_tag[i];
assign per_core_I_dram_rsp_tag [(i/2)] = arb_core_rsp_tag[i+1];
assign per_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)];
assign per_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)];
assign arb_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)];
assign arb_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)];
assign per_core_snp_fwd_valid [(i/2)] = arb_snp_fwd_valid && arb_snp_fwd_ready;
assign per_core_snp_fwd_addr [(i/2)] = arb_snp_fwd_addr;
end
assign arb_snp_fwd_valid = snp_req_valid;
assign arb_snp_fwd_addr = snp_req_addr;
assign arb_snp_fwd_ready = (& per_core_snp_fwd_ready);
assign snp_req_ready = arb_snp_fwd_ready;
VX_dram_arb #(
.BANK_LINE_SIZE (`L2BANK_LINE_SIZE),
@ -329,18 +349,18 @@ module Vortex_Cluster #(
.reset (reset),
// Core request
.core_req_read (per_core_req_read),
.core_req_write (per_core_req_write),
.core_req_addr (per_core_req_addr),
.core_req_data (per_core_req_data),
.core_req_tag (per_core_req_tag),
.core_req_ready (per_core_req_ready),
.core_req_read (arb_core_req_read),
.core_req_write (arb_core_req_write),
.core_req_addr (arb_core_req_addr),
.core_req_data (arb_core_req_data),
.core_req_tag (arb_core_req_tag),
.core_req_ready (arb_core_req_ready),
// Core response
.core_rsp_valid (per_core_rsp_valid),
.core_rsp_data (per_core_rsp_data),
.core_rsp_tag (per_core_rsp_tag),
.core_rsp_ready (per_core_rsp_ready),
.core_rsp_valid (arb_core_rsp_valid),
.core_rsp_data (arb_core_rsp_data),
.core_rsp_tag (arb_core_rsp_tag),
.core_rsp_ready (arb_core_rsp_ready),
// DRAM request
.dram_req_read (dram_req_read),
@ -356,11 +376,7 @@ module Vortex_Cluster #(
.dram_rsp_data (dram_rsp_data),
.dram_rsp_ready (dram_rsp_ready)
);
// Cache snooping
assign snp_fwd_valid = llc_snp_req_valid;
assign snp_fwd_addr = llc_snp_req_addr;
assign llc_snp_req_ready = & per_core_snp_fwd_ready;
end
endmodule

View file

@ -21,9 +21,9 @@ module Vortex_Socket (
output wire dram_rsp_ready,
// Cache snooping
input wire llc_snp_req_valid,
input wire[`L3DRAM_ADDR_WIDTH-1:0] llc_snp_req_addr,
output wire llc_snp_req_ready,
input wire snp_req_valid,
input wire[`L3DRAM_ADDR_WIDTH-1:0] snp_req_addr,
output wire snp_req_ready,
// I/O request
output wire io_req_read,
@ -64,9 +64,9 @@ module Vortex_Socket (
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_ready (dram_rsp_ready),
.llc_snp_req_valid (llc_snp_req_valid),
.llc_snp_req_addr (llc_snp_req_addr),
.llc_snp_req_ready (llc_snp_req_ready),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_ready (snp_req_ready),
.io_req_read (io_req_read),
.io_req_write (io_req_write),
@ -99,8 +99,8 @@ module Vortex_Socket (
wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
wire snp_fwd_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] snp_fwd_addr;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_valid;
wire[`NUM_CLUSTERS-1:0][`L3DRAM_ADDR_WIDTH-1:0] per_cluster_snp_fwd_addr;
wire[`NUM_CLUSTERS-1:0] per_cluster_snp_fwd_ready;
`IGNORE_WARNINGS_BEGIN
@ -137,9 +137,9 @@ module Vortex_Socket (
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
.llc_snp_req_valid (snp_fwd_valid),
.llc_snp_req_addr (snp_fwd_addr),
.llc_snp_req_ready (per_cluster_snp_fwd_ready [i]),
.snp_req_valid (per_cluster_snp_fwd_valid [i]),
.snp_req_addr (per_cluster_snp_fwd_addr [i]),
.snp_req_ready (per_cluster_snp_fwd_ready [i]),
.io_req_read (per_cluster_io_req_read [i]),
.io_req_write (per_cluster_io_req_write [i]),
@ -183,7 +183,11 @@ module Vortex_Socket (
wire[`L3NUM_REQUESTS-1:0] l3_core_rsp_valid;
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data;
wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag;
wire[`L3NUM_REQUESTS-1:0] l3_core_rsp_ready;
wire l3_core_rsp_ready;
wire l3_snp_fwd_valid;
wire[`L3DRAM_ADDR_WIDTH-1:0] l3_snp_fwd_addr;
wire l3_snp_fwd_ready;
for (i = 0; i < `L3NUM_REQUESTS; i=i+1) begin
// Core Request
@ -192,17 +196,21 @@ module Vortex_Socket (
assign l3_core_req_write [i] = per_cluster_dram_req_write [i] ? `BYTE_EN_LW : `BYTE_EN_NO;
assign l3_core_req_addr [i] = {per_cluster_dram_req_addr [i], {`LOG2UP(`L2BANK_LINE_SIZE){1'b0}}};
assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i];
assign l3_core_req_data [i] = per_cluster_dram_req_data [i];
assign l3_core_req_data [i] = per_cluster_dram_req_data [i];
// Core Response
assign l3_core_rsp_ready [i] = per_cluster_dram_rsp_ready[i];
// Cache Fill Response
assign per_cluster_dram_rsp_valid [i] = l3_core_rsp_valid [i];
// Core Response
assign per_cluster_dram_rsp_valid [i] = l3_core_rsp_valid [i] && l3_core_rsp_ready;
assign per_cluster_dram_rsp_data [i] = l3_core_rsp_data [i];
assign per_cluster_dram_rsp_tag [i] = l3_core_rsp_tag [i];
// Snoop Forwarding
assign per_cluster_snp_fwd_valid [i] = l3_snp_fwd_valid && l3_snp_fwd_ready;
assign per_cluster_snp_fwd_addr [i] = l3_snp_fwd_addr;
end
assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready);
assign l3_snp_fwd_ready = (& per_cluster_snp_fwd_ready);
VX_cache #(
.CACHE_SIZE (`L3CACHE_SIZE),
.BANK_LINE_SIZE (`L3BANK_LINE_SIZE),
@ -245,7 +253,7 @@ module Vortex_Socket (
.core_rsp_valid (l3_core_rsp_valid),
.core_rsp_data (l3_core_rsp_data),
.core_rsp_tag (l3_core_rsp_tag),
.core_rsp_ready (& l3_core_rsp_ready),
.core_rsp_ready (l3_core_rsp_ready),
// DRAM request
.dram_req_write (dram_req_write),
@ -262,14 +270,14 @@ module Vortex_Socket (
.dram_rsp_ready (dram_rsp_ready),
// Snoop request
.snp_req_valid (llc_snp_req_valid),
.snp_req_addr (llc_snp_req_addr),
.snp_req_ready (llc_snp_req_ready),
.snp_req_valid (snp_req_valid),
.snp_req_addr (snp_req_addr),
.snp_req_ready (snp_req_ready),
// Snoop forwarding
.snp_fwd_valid (snp_fwd_valid),
.snp_fwd_addr (snp_fwd_addr),
.snp_fwd_ready (& per_cluster_snp_fwd_ready)
.snp_fwd_valid (l3_snp_fwd_valid),
.snp_fwd_addr (l3_snp_fwd_addr),
.snp_fwd_ready (l3_snp_fwd_ready)
);
end

View file

@ -69,7 +69,7 @@ module VX_bank #(
// Core Response
output wire core_rsp_valid,
output wire [`LOG2UP(NUM_REQUESTS)-1:0] core_rsp_tid,
output wire [`REQS_BITS-1:0] core_rsp_tid,
output wire [`WORD_WIDTH-1:0] core_rsp_data,
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_pop,
@ -156,18 +156,18 @@ module VX_bank #(
.full (dfpq_full)
);
wire reqq_pop;
wire reqq_push;
wire reqq_empty;
wire reqq_req_st0;
wire[`LOG2UP(NUM_REQUESTS)-1:0] reqq_req_tid_st0;
wire reqq_pop;
wire reqq_push;
wire reqq_empty;
wire reqq_req_st0;
wire[`REQS_BITS-1:0] reqq_req_tid_st0;
`IGNORE_WARNINGS_BEGIN
wire [31:0] reqq_req_addr_st0;
wire [31:0] reqq_req_addr_st0;
`IGNORE_WARNINGS_END
wire [`WORD_WIDTH-1:0] reqq_req_writeword_st0;
wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0;
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0;
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0;
wire [`WORD_WIDTH-1:0] reqq_req_writeword_st0;
wire [CORE_TAG_WIDTH-1:0] reqq_req_tag_st0;
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0;
wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0;
assign reqq_push = core_req_ready && (| core_req_valids);
@ -218,7 +218,7 @@ module VX_bank #(
wire mrvq_full;
wire mrvq_stop;
wire mrvq_valid_st0;
wire[`LOG2UP(NUM_REQUESTS)-1:0] mrvq_tid_st0;
wire[`REQS_BITS-1:0] mrvq_tid_st0;
wire [`LINE_ADDR_WIDTH-1:0] mrvq_addr_st0;
wire [`BASE_ADDR_BITS-1:0] mrvq_wsel_st0;
wire [`WORD_WIDTH-1:0] mrvq_writeword_st0;
@ -230,7 +230,7 @@ module VX_bank #(
wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr;
wire[`BASE_ADDR_BITS-1:0] miss_add_wsel;
wire[`WORD_WIDTH-1:0] miss_add_data;
wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid;
wire[`REQS_BITS-1:0] miss_add_tid;
wire[CORE_TAG_WIDTH-1:0] miss_add_tag;
wire[`BYTE_EN_BITS-1:0] miss_add_mem_read;
wire[`BYTE_EN_BITS-1:0] miss_add_mem_write;
@ -348,7 +348,7 @@ module VX_bank #(
wire dirty_st1e;
`DEBUG_BEGIN
wire [CORE_TAG_WIDTH-1:0] tag_st1e;
wire [`LOG2UP(NUM_REQUESTS)-1:0] tid_st1e;
wire [`REQS_BITS-1:0] tid_st1e;
`DEBUG_END
wire [`BYTE_EN_BITS-1:0] mem_read_st1e;
wire [`BYTE_EN_BITS-1:0] mem_write_st1e;
@ -515,14 +515,15 @@ module VX_bank #(
|| (valid_st2 && miss_st2 && mrvq_full)
|| (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_full));
wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2;
wire [`LOG2UP(NUM_REQUESTS)-1:0] cwbq_tid = miss_add_tid;
wire [CORE_TAG_WIDTH-1:0] cwbq_tag = miss_add_tag;
wire [`WORD_WIDTH-1:0] cwbq_data = readword_st2;
wire [`REQS_BITS-1:0] cwbq_tid = miss_add_tid;
wire [CORE_TAG_WIDTH-1:0] cwbq_tag = miss_add_tag;
wire cwbq_empty;
wire cwbq_empty;
assign core_rsp_valid = !cwbq_empty;
VX_generic_queue #(
.DATAW(`LOG2UP(NUM_REQUESTS) + CORE_TAG_WIDTH + `WORD_WIDTH),
.DATAW(`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH),
.SIZE(CWBQ_SIZE)
) cwb_queue (
.clk (clk),

View file

@ -109,7 +109,7 @@ module VX_cache #(
wire [NUM_BANKS-1:0] per_bank_core_rsp_pop;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
wire [NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_core_rsp_tid;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag;
@ -173,7 +173,7 @@ module VX_cache #(
wire curr_bank_core_rsp_pop;
wire curr_bank_core_rsp_valid;
wire [`LOG2UP(NUM_REQUESTS)-1:0] curr_bank_core_rsp_tid;
wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid;
wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;

View file

@ -12,10 +12,14 @@
`define BYTE_EN_BITS 3
// data tid tag read write base addr
`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `LOG2UP(NUM_REQUESTS) + CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `BASE_ADDR_BITS)
`define MRVQ_METADATA_WIDTH (`WORD_WIDTH + `REQS_BITS + CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `BASE_ADDR_BITS)
// tag read write reqs
`define REQ_INST_META_WIDTH (CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `LOG2UP(NUM_REQUESTS))
`define REQ_INST_META_WIDTH (CORE_TAG_WIDTH + `BYTE_EN_BITS + `BYTE_EN_BITS + `REQS_BITS)
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
`define BANK_BITS `LOG2UP(NUM_BANKS)
`define WORD_WIDTH (8 * WORD_SIZE)
`define BYTE_WIDTH (`WORD_WIDTH / 4)

View file

@ -46,7 +46,7 @@ module VX_cache_core_rsp_merge #(
parameter DRAM_TAG_WIDTH = 1
) (
// Per Bank WB
input wire [NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_core_rsp_tid,
input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,
input wire [NUM_BANKS-1:0] per_bank_core_rsp_valid,
input wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data,
input wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag,
@ -63,8 +63,8 @@ module VX_cache_core_rsp_merge #(
assign per_bank_core_rsp_pop = per_bank_core_rsp_pop_unqual & {NUM_BANKS{core_rsp_ready}};
wire [`LOG2UP(NUM_BANKS)-1:0] main_bank_index;
wire found_bank;
wire [`BANK_BITS-1:0] main_bank_index;
wire found_bank;
VX_generic_priority_encoder #(
.N(NUM_BANKS)
@ -86,7 +86,7 @@ module VX_cache_core_rsp_merge #(
if (found_bank
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
&& ((main_bank_index == `BANK_BITS'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
@ -106,7 +106,7 @@ module VX_cache_core_rsp_merge #(
if (found_bank
&& per_bank_core_rsp_valid[i]
&& !core_rsp_valid[per_bank_core_rsp_tid[i]]
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(i))
&& ((main_bank_index == `BANK_BITS'(i))
|| (per_bank_core_rsp_tid[i] != per_bank_core_rsp_tid[main_bank_index]))) begin
core_rsp_valid[per_bank_core_rsp_tid[i]] = 1;
core_rsp_data[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i];

View file

@ -86,8 +86,8 @@ module VX_cache_dfq_queue #(
assign use_per_bqual_bank_dram_fill_req_valid = use_empty ? (out_per_bank_dram_fill_req_valid & {NUM_BANKS{!o_empty}}) : (use_per_bank_dram_fill_req_valid & {NUM_BANKS{!use_empty}});
assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr;
wire[`LOG2UP(NUM_BANKS)-1:0] qual_request_index;
wire qual_has_request;
wire[`BANK_BITS-1:0] qual_request_index;
wire qual_has_request;
VX_generic_priority_encoder #(
.N(NUM_BANKS)

View file

@ -113,7 +113,7 @@ module VX_cache_dram_req_arb #(
.dfqq_full (dfqq_full)
);
wire [`LOG2UP(NUM_BANKS)-1:0] dwb_bank;
wire [`BANK_BITS-1:0] dwb_bank;
wire [NUM_BANKS-1:0] use_wb_valid = per_bank_dram_wb_req_valid;

View file

@ -48,7 +48,7 @@ module VX_cache_miss_resrv #(
input wire[`LINE_ADDR_WIDTH-1:0] miss_add_addr,
input wire[`BASE_ADDR_BITS-1:0] miss_add_wsel,
input wire[`WORD_WIDTH-1:0] miss_add_data,
input wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid,
input wire[`REQS_BITS-1:0] miss_add_tid,
input wire[CORE_TAG_WIDTH-1:0] miss_add_tag,
input wire[`BYTE_EN_BITS-1:0] miss_add_mem_read,
input wire[`BYTE_EN_BITS-1:0] miss_add_mem_write,
@ -69,7 +69,7 @@ module VX_cache_miss_resrv #(
output wire[`LINE_ADDR_WIDTH-1:0] miss_resrv_addr_st0,
output wire[`BASE_ADDR_BITS-1:0] miss_resrv_wsel_st0,
output wire[`WORD_WIDTH-1:0] miss_resrv_data_st0,
output wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_resrv_tid_st0,
output wire[`REQS_BITS-1:0] miss_resrv_tid_st0,
output wire[CORE_TAG_WIDTH-1:0] miss_resrv_tag_st0,
output wire[`BYTE_EN_BITS-1:0] miss_resrv_mem_read_st0,
output wire[`BYTE_EN_BITS-1:0] miss_resrv_mem_write_st0

View file

@ -58,7 +58,7 @@ module VX_cache_req_queue #(
// Dequeue Data
input wire reqq_pop,
output wire reqq_req_st0,
output wire [`LOG2UP(NUM_REQUESTS)-1:0] reqq_req_tid_st0,
output wire [`REQS_BITS-1:0] reqq_req_tid_st0,
output wire [`BYTE_EN_BITS-1:0] reqq_req_mem_read_st0,
output wire [`BYTE_EN_BITS-1:0] reqq_req_mem_write_st0,
output wire [`WORD_WIDTH-1:0] reqq_req_writedata_st0,
@ -126,8 +126,8 @@ module VX_cache_req_queue #(
assign qual_mem_read = use_per_mem_read;
assign qual_mem_write = use_per_mem_write;
wire[`LOG2UP(NUM_REQUESTS)-1:0] qual_request_index;
wire qual_has_request;
wire[`REQS_BITS-1:0] qual_request_index;
wire qual_has_request;
VX_generic_priority_encoder #(
.N(NUM_REQUESTS)

View file

@ -15,8 +15,8 @@ module VX_snp_fwd_arb #(
wire [NUM_BANKS-1:0] qual_per_bank_snp_fwd = per_bank_snp_fwd_valid & {NUM_BANKS{snp_fwd_ready}};
wire [`LOG2UP(NUM_BANKS)-1:0] fsq_bank;
wire fsq_valid;
wire [`BANK_BITS-1:0] fsq_bank;
wire fsq_valid;
VX_generic_priority_encoder #(
.N(NUM_BANKS)

View file

@ -157,18 +157,18 @@ void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;
// submit snoop requests for the needed blocks
vortex_->llc_snp_req_addr = aligned_addr_start;
vortex_->llc_snp_req_valid = false;
vortex_->snp_req_addr = aligned_addr_start;
vortex_->snp_req_valid = false;
for (;;) {
this->step();
if (vortex_->llc_snp_req_valid) {
vortex_->llc_snp_req_valid = false;
if (vortex_->llc_snp_req_addr >= aligned_addr_end)
if (vortex_->snp_req_valid) {
vortex_->snp_req_valid = false;
if (vortex_->snp_req_addr >= aligned_addr_end)
break;
vortex_->llc_snp_req_addr += 1;
vortex_->snp_req_addr += 1;
}
if (vortex_->llc_snp_req_ready) {
vortex_->llc_snp_req_valid = true;
if (vortex_->snp_req_ready) {
vortex_->snp_req_valid = true;
}
}
this->wait(PIPELINE_FLUSH_LATENCY);