mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
trace INSTANCE_ID refactoring
This commit is contained in:
parent
1322499c3f
commit
3efced37c5
32 changed files with 299 additions and 263 deletions
|
@ -96,7 +96,8 @@ def append_value(text, reg, value, tmask_arr, sep):
|
|||
return text, sep
|
||||
|
||||
def parse_rtlsim(log_filename):
|
||||
line_pattern = r"\d+: core(\d+)-(decode|issue|commit)"
|
||||
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=(\d+), num_barriers=(\d+)"
|
||||
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
|
||||
ex_pattern = r"ex=([a-zA-Z]+)"
|
||||
|
@ -118,21 +119,42 @@ def parse_rtlsim(log_filename):
|
|||
entries = []
|
||||
with open(log_filename, 'r') as log_file:
|
||||
instr_data = {}
|
||||
num_threads = 0
|
||||
num_warps = 0
|
||||
num_cores = 0
|
||||
num_clusters = 0
|
||||
socket_size = 0
|
||||
local_mem_base = 0
|
||||
num_barriers = 0
|
||||
num_sockets = 0
|
||||
for lineno, line in enumerate(log_file, start=1):
|
||||
try:
|
||||
config_match = re.search(config_pattern, line)
|
||||
if config_match:
|
||||
num_threads = int(config_match.group(1))
|
||||
num_warps = int(config_match.group(2))
|
||||
num_cores = int(config_match.group(3))
|
||||
num_clusters = int(config_match.group(4))
|
||||
socket_size = int(config_match.group(5))
|
||||
local_mem_base = int(config_match.group(6))
|
||||
num_barriers = int(config_match.group(7))
|
||||
num_sockets = (num_cores + socket_size - 1) // socket_size
|
||||
continue
|
||||
line_match = re.search(line_pattern, line)
|
||||
if line_match:
|
||||
PC = re.search(pc_pattern, line).group(1)
|
||||
warp_id = re.search(warp_id_pattern, line).group(1)
|
||||
tmask = re.search(tmask_pattern, line).group(1)
|
||||
uuid = re.search(uuid_pattern, line).group(1)
|
||||
core_id = line_match.group(1)
|
||||
stage = line_match.group(2)
|
||||
cluster_id = line_match.group(1)
|
||||
socket_id = line_match.group(2)
|
||||
core_id = line_match.group(3)
|
||||
stage = line_match.group(4)
|
||||
if stage == "decode":
|
||||
trace = {}
|
||||
trace["uuid"] = uuid
|
||||
trace["PC"] = PC
|
||||
trace["core_id"] = core_id
|
||||
trace["core_id"] = ((((cluster_id * num_sockets) + socket_id) * socket_size) + core_id)
|
||||
trace["warp_id"] = warp_id
|
||||
trace["tmask"] = reverse_binary(tmask)
|
||||
trace["instr"] = re.search(instr_pattern, line).group(1)
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_cluster import VX_gpu_pkg::*; #(
|
||||
parameter CLUSTER_ID = 0
|
||||
parameter CLUSTER_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -85,7 +86,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (l2_reset, reset);
|
||||
|
||||
VX_cache_wrap #(
|
||||
.INSTANCE_ID ("l2cache"),
|
||||
.INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)),
|
||||
.CACHE_SIZE (`L2_CACHE_SIZE),
|
||||
.LINE_SIZE (`L2_LINE_SIZE),
|
||||
.NUM_BANKS (`L2_NUM_BANKS),
|
||||
|
@ -122,17 +123,19 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [`NUM_SOCKETS-1:0] per_socket_busy;
|
||||
|
||||
VX_dcr_bus_if socket_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1));
|
||||
|
||||
// Generate all sockets
|
||||
for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin
|
||||
for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : sockets
|
||||
|
||||
`RESET_RELAY (socket_reset, reset);
|
||||
|
||||
VX_socket #(
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i)
|
||||
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
|
||||
.INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id))
|
||||
) socket (
|
||||
`SCOPE_IO_BIND (scope_socket+i)
|
||||
`SCOPE_IO_BIND (scope_socket+socket_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (socket_reset),
|
||||
|
@ -143,13 +146,13 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
|
||||
.dcr_bus_if (socket_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_socket_mem_bus_if[i]),
|
||||
.mem_bus_if (per_socket_mem_bus_if[socket_id]),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (per_socket_gbar_bus_if[i]),
|
||||
.gbar_bus_if (per_socket_gbar_bus_if[socket_id]),
|
||||
`endif
|
||||
|
||||
.busy (per_socket_busy[i])
|
||||
.busy (per_socket_busy[socket_id])
|
||||
);
|
||||
end
|
||||
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_socket import VX_gpu_pkg::*; #(
|
||||
parameter SOCKET_ID = 0
|
||||
parameter SOCKET_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -40,6 +41,11 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_core = 0;
|
||||
`SCOPE_IO_SWITCH (`SOCKET_SIZE);
|
||||
`endif
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE]();
|
||||
|
||||
|
@ -81,7 +87,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (icache_reset, reset);
|
||||
|
||||
VX_cache_cluster #(
|
||||
.INSTANCE_ID ($sformatf("socket%0d-icache", SOCKET_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)),
|
||||
.NUM_UNITS (`NUM_ICACHES),
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.TAG_SEL_IDX (0),
|
||||
|
@ -126,7 +132,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (dcache_reset, reset);
|
||||
|
||||
VX_cache_cluster #(
|
||||
.INSTANCE_ID ($sformatf("socket%0d-dcache", SOCKET_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)),
|
||||
.NUM_UNITS (`NUM_DCACHES),
|
||||
.NUM_INPUTS (`SOCKET_SIZE),
|
||||
.TAG_SEL_IDX (0),
|
||||
|
@ -144,7 +150,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.NC_ENABLE (1),
|
||||
.CORE_OUT_BUF (`LMEM_ENABLED ? 2 : 1),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2)
|
||||
) dcache (
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -194,19 +200,19 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [`SOCKET_SIZE-1:0] per_core_busy;
|
||||
|
||||
VX_dcr_bus_if core_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1));
|
||||
|
||||
`SCOPE_IO_SWITCH (`SOCKET_SIZE)
|
||||
|
||||
// Generate all cores
|
||||
for (genvar i = 0; i < `SOCKET_SIZE; ++i) begin
|
||||
for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : cores
|
||||
|
||||
`RESET_RELAY (core_reset, reset);
|
||||
|
||||
VX_core #(
|
||||
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + i)
|
||||
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id),
|
||||
.INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id))
|
||||
) core (
|
||||
`SCOPE_IO_BIND (i)
|
||||
`SCOPE_IO_BIND (scope_core + core_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (core_reset),
|
||||
|
@ -217,15 +223,15 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
|
||||
.dcr_bus_if (core_dcr_bus_if),
|
||||
|
||||
.dcache_bus_if (per_core_dcache_bus_if[i * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
|
||||
.dcache_bus_if (per_core_dcache_bus_if[core_id * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
|
||||
|
||||
.icache_bus_if (per_core_icache_bus_if[i]),
|
||||
.icache_bus_if (per_core_icache_bus_if[core_id]),
|
||||
|
||||
`ifdef GBAR_ENABLE
|
||||
.gbar_bus_if (per_core_gbar_bus_if[i]),
|
||||
.gbar_bus_if (per_core_gbar_bus_if[core_id]),
|
||||
`endif
|
||||
|
||||
.busy (per_core_busy[i])
|
||||
.busy (per_core_busy[core_id])
|
||||
);
|
||||
end
|
||||
|
||||
|
|
|
@ -44,6 +44,11 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
output wire busy
|
||||
);
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_cluster = 0;
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS);
|
||||
`endif
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_mem_perf_if mem_perf_if();
|
||||
assign mem_perf_if.icache = 'x;
|
||||
|
@ -121,19 +126,19 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
`SCOPE_IO_SWITCH (`NUM_CLUSTERS)
|
||||
|
||||
// Generate all clusters
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
|
||||
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters
|
||||
|
||||
`RESET_RELAY (cluster_reset, reset);
|
||||
|
||||
VX_dcr_bus_if cluster_dcr_bus_if();
|
||||
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
|
||||
|
||||
VX_cluster #(
|
||||
.CLUSTER_ID (i)
|
||||
.CLUSTER_ID (cluster_id),
|
||||
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
|
||||
) cluster (
|
||||
`SCOPE_IO_BIND (i)
|
||||
`SCOPE_IO_BIND (scope_cluster + cluster_id)
|
||||
|
||||
.clk (clk),
|
||||
.reset (cluster_reset),
|
||||
|
@ -144,9 +149,9 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
|
||||
.dcr_bus_if (cluster_dcr_bus_if),
|
||||
|
||||
.mem_bus_if (per_cluster_mem_bus_if[i]),
|
||||
.mem_bus_if (per_cluster_mem_bus_if[cluster_id]),
|
||||
|
||||
.busy (per_cluster_busy[i])
|
||||
.busy (per_cluster_busy[cluster_id])
|
||||
);
|
||||
end
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_int #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter BLOCK_IDX = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
|
@ -29,7 +29,7 @@ module VX_alu_int #(
|
|||
VX_branch_ctl_if.master branch_ctl_if
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam LANE_WIDTH = `UP(LANE_BITS);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
@ -121,7 +121,7 @@ module VX_alu_int #(
|
|||
case ({is_alu_w, op_class})
|
||||
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
|
||||
3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR*
|
||||
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
|
||||
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
|
||||
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
|
||||
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
|
||||
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
|
||||
|
@ -194,8 +194,8 @@ module VX_alu_int #(
|
|||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (branch_ctl_if.valid) begin
|
||||
`TRACE(1, ("%d: core%0d-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, CORE_ID, branch_ctl_if.wid, {commit_if.data.PC, 1'b0}, branch_ctl_if.taken, {branch_ctl_if.dest, 1'b0}, commit_if.data.uuid));
|
||||
`TRACE(1, ("%d: %s-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, branch_ctl_if.wid, {commit_if.data.PC, 1'b0}, branch_ctl_if.taken, {branch_ctl_if.dest, 1'b0}, commit_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_muldiv #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -26,7 +26,7 @@ module VX_alu_muldiv #(
|
|||
// Outputs
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + PID_WIDTH + 1 + 1;
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -27,7 +27,7 @@ module VX_alu_unit #(
|
|||
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS]
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_ALU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
@ -75,7 +75,7 @@ module VX_alu_unit #(
|
|||
`RESET_RELAY (int_reset, block_reset);
|
||||
|
||||
VX_alu_int #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
|
||||
.BLOCK_IDX (block_idx),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) alu_int (
|
||||
|
@ -90,32 +90,32 @@ module VX_alu_unit #(
|
|||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_execute_if();
|
||||
) muldiv_execute_if();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_commit_if();
|
||||
) muldiv_commit_if();
|
||||
|
||||
assign mdv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign mdv_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
|
||||
`RESET_RELAY (mdv_reset, block_reset);
|
||||
`RESET_RELAY (muldiv_reset, block_reset);
|
||||
|
||||
VX_alu_muldiv #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_unit (
|
||||
) muldiv_unit (
|
||||
.clk (clk),
|
||||
.reset (mdv_reset),
|
||||
.execute_if (mdv_execute_if),
|
||||
.commit_if (mdv_commit_if)
|
||||
.reset (muldiv_reset),
|
||||
.execute_if (muldiv_execute_if),
|
||||
.commit_if (muldiv_commit_if)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
assign per_block_execute_if[block_idx].ready =
|
||||
`ifdef EXT_M_ENABLE
|
||||
is_muldiv_op ? mdv_execute_if.ready :
|
||||
is_muldiv_op ? muldiv_execute_if.ready :
|
||||
`endif
|
||||
int_execute_if.ready;
|
||||
|
||||
|
@ -130,19 +130,19 @@ module VX_alu_unit #(
|
|||
.reset (block_reset),
|
||||
.valid_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.valid,
|
||||
muldiv_commit_if.valid,
|
||||
`endif
|
||||
int_commit_if.valid
|
||||
}),
|
||||
.ready_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.ready,
|
||||
muldiv_commit_if.ready,
|
||||
`endif
|
||||
int_commit_if.ready
|
||||
}),
|
||||
.data_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.data,
|
||||
muldiv_commit_if.data,
|
||||
`endif
|
||||
int_commit_if.data
|
||||
}),
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -27,7 +27,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
VX_commit_csr_if.master commit_csr_if,
|
||||
VX_commit_sched_if.master commit_sched_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
|
||||
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
|
||||
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
|
||||
|
@ -171,7 +171,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
|
||||
always @(posedge clk) begin
|
||||
if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin
|
||||
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
|
||||
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
|
||||
trace_ex_type(1, j);
|
||||
`TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS);
|
||||
|
|
|
@ -18,7 +18,8 @@
|
|||
`endif
|
||||
|
||||
module VX_core import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -94,13 +95,14 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_SWITCH (3)
|
||||
|
||||
VX_schedule #(
|
||||
.INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) schedule (
|
||||
.clk (clk),
|
||||
.reset (schedule_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_schedule_if (pipeline_perf_if.schedule),
|
||||
.sched_perf (pipeline_perf_if.sched),
|
||||
`endif
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
|
@ -121,7 +123,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_fetch #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID))
|
||||
) fetch (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
|
@ -132,7 +134,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_decode #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID))
|
||||
) decode (
|
||||
.clk (clk),
|
||||
.reset (decode_reset),
|
||||
|
@ -142,7 +144,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_issue #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID))
|
||||
) issue (
|
||||
`SCOPE_IO_BIND (1)
|
||||
|
||||
|
@ -150,7 +152,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
.reset (issue_reset),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.perf_issue_if (pipeline_perf_if.issue),
|
||||
.issue_perf (pipeline_perf_if.issue),
|
||||
`endif
|
||||
|
||||
.decode_if (decode_if),
|
||||
|
@ -159,6 +161,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_execute #(
|
||||
.INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) execute (
|
||||
`SCOPE_IO_BIND (2)
|
||||
|
@ -186,7 +189,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_commit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID))
|
||||
) commit (
|
||||
.clk (clk),
|
||||
.reset (commit_reset),
|
||||
|
@ -210,7 +213,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (lmem_unit_reset, reset);
|
||||
|
||||
VX_lmem_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-lmem", INSTANCE_ID))
|
||||
) lmem_unit (
|
||||
.clk (clk),
|
||||
.reset (lmem_unit_reset),
|
||||
|
@ -242,7 +245,7 @@ module VX_core import VX_gpu_pkg::*; #(
|
|||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
VX_mem_coalescer #(
|
||||
.INSTANCE_ID ($sformatf("core%0d-coalescer", CORE_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
|
||||
.NUM_REQS (`NUM_LSU_LANES),
|
||||
.DATA_IN_SIZE (LSU_WORD_SIZE),
|
||||
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
|
||||
|
|
|
@ -144,6 +144,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_core #(
|
||||
.INSTANCE_ID ($sformatf("core")),
|
||||
.CORE_ID (CORE_ID)
|
||||
) core (
|
||||
`SCOPE_IO_BIND (0)
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
|
@ -36,7 +37,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
VX_execute_if.slave execute_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
`endif
|
||||
|
||||
module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -43,7 +43,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
|
||||
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + (`NR_BITS * 4);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
|
@ -144,6 +144,12 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
end
|
||||
`endif
|
||||
|
||||
`STATIC_ASSERT($bits(alu_args_t) == $bits(op_args_t), ("alu_args_t size mismatch: current=%0d, expected=%0d", $bits(alu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(fpu_args_t) == $bits(op_args_t), ("fpu_args_t size mismatch: current=%0d, expected=%0d", $bits(fpu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(lsu_args_t) == $bits(op_args_t), ("lsu_args_t size mismatch: current=%0d, expected=%0d", $bits(lsu_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(csr_args_t) == $bits(op_args_t), ("csr_args_t size mismatch: current=%0d, expected=%0d", $bits(csr_args_t), $bits(op_args_t)));
|
||||
`STATIC_ASSERT($bits(wctl_args_t) == $bits(op_args_t), ("wctl_args_t size mismatch: current=%0d, expected=%0d", $bits(wctl_args_t), $bits(op_args_t)));
|
||||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = '0;
|
||||
|
@ -551,7 +557,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
|||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
`TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
|
||||
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
|
||||
trace_ex_type(1, decode_if.data.ex_type);
|
||||
`TRACE(1, (", op="));
|
||||
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_execute import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
@ -55,7 +56,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (sfu_reset, reset);
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID))
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (alu_reset),
|
||||
|
@ -67,7 +68,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`SCOPE_IO_SWITCH (1)
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID))
|
||||
) lsu_unit (
|
||||
`SCOPE_IO_BIND (0)
|
||||
.clk (clk),
|
||||
|
@ -81,7 +82,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (fpu_reset, reset);
|
||||
|
||||
VX_fpu_unit #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID))
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (fpu_reset),
|
||||
|
@ -92,6 +93,7 @@ module VX_execute import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_sfu_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID)
|
||||
) sfu_unit (
|
||||
.clk (clk),
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fetch import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -30,7 +30,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
// outputs
|
||||
VX_fetch_if.master fetch_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire icache_req_valid;
|
||||
|
@ -129,45 +129,33 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
assign icache_bus_if.rsp_ready = fetch_if.ready;
|
||||
|
||||
`ifdef DBG_SCOPE_FETCH
|
||||
if (CORE_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
|
||||
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
|
||||
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({
|
||||
reset,
|
||||
schedule_fire,
|
||||
icache_req_fire,
|
||||
icache_rsp_fire
|
||||
}),
|
||||
.probes({
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
|
||||
}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
ila_fetch ila_fetch_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
|
||||
.probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
|
||||
.probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
wire schedule_fire = schedule_if.valid && schedule_if.ready;
|
||||
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (1),
|
||||
.TRIGGERW (4),
|
||||
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
|
||||
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
|
||||
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers ({
|
||||
reset,
|
||||
schedule_fire,
|
||||
icache_req_fire,
|
||||
icache_rsp_fire
|
||||
}),
|
||||
.probes ({
|
||||
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
|
||||
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
|
||||
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
|
||||
}),
|
||||
.bus_in (scope_bus_in),
|
||||
.bus_out (scope_bus_out)
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
@ -177,10 +165,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
wire fetch_fire = fetch_if.valid && fetch_if.ready;
|
||||
always @(posedge clk) begin
|
||||
if (schedule_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
`TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
|
||||
end
|
||||
if (fetch_fire) begin
|
||||
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
`TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_unit import VX_fpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -26,7 +26,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
|
|||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_fpu_csr_if.master fpu_csr_if[`NUM_FPU_BLOCKS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_FPU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_FPU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,11 +14,11 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_lmem_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output cache_perf_t cache_perf,
|
||||
`endif
|
||||
|
@ -42,14 +42,14 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (req_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
|
||||
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
|
||||
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
|
||||
assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL];
|
||||
end
|
||||
|
||||
|
||||
wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask);
|
||||
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
|
||||
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
|
||||
|
||||
wire req_global_ready;
|
||||
wire req_local_ready;
|
||||
|
@ -61,7 +61,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
) req_global_buf (
|
||||
.clk (clk),
|
||||
.reset (req_reset),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
|
||||
lsu_mem_in_if[i].req_data.rw,
|
||||
|
@ -81,7 +81,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
lsu_mem_out_if[i].req_data.atype,
|
||||
lsu_mem_out_if[i].req_data.data,
|
||||
lsu_mem_out_if[i].req_data.tag
|
||||
}),
|
||||
}),
|
||||
.ready_out (lsu_mem_out_if[i].req_ready)
|
||||
);
|
||||
|
||||
|
@ -92,7 +92,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
) req_local_buf (
|
||||
.clk (clk),
|
||||
.reset (req_reset),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
|
||||
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
|
||||
.data_in ({
|
||||
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
|
||||
lsu_mem_in_if[i].req_data.rw,
|
||||
|
@ -112,18 +112,18 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
lmem_lsu_if[i].req_data.atype,
|
||||
lmem_lsu_if[i].req_data.data,
|
||||
lmem_lsu_if[i].req_data.tag
|
||||
}),
|
||||
}),
|
||||
.ready_out (lmem_lsu_if[i].req_ready)
|
||||
);
|
||||
|
||||
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
|
||||
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
|
||||
|| (req_local_ready && is_addr_local);
|
||||
end
|
||||
|
||||
`RESET_RELAY (rsp_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
|
||||
|
||||
|
||||
wire rsp_arb_valid;
|
||||
wire rsp_arb_index;
|
||||
wire rsp_arb_ready;
|
||||
|
@ -135,7 +135,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
) arbiter (
|
||||
.clk (clk),
|
||||
.reset (rsp_reset),
|
||||
.requests ({
|
||||
.requests ({
|
||||
lmem_lsu_if[i].rsp_valid,
|
||||
lsu_mem_out_if[i].rsp_valid
|
||||
}),
|
||||
|
@ -144,7 +144,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PIN (grant_onehot),
|
||||
.grant_unlock(rsp_arb_ready)
|
||||
);
|
||||
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (RSP_DATAW),
|
||||
.SIZE (2),
|
||||
|
@ -152,7 +152,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
) rsp_buf (
|
||||
.clk (clk),
|
||||
.reset (rsp_reset),
|
||||
.valid_in (rsp_arb_valid),
|
||||
.valid_in (rsp_arb_valid),
|
||||
.data_in ({
|
||||
rsp_arb_index ? lmem_lsu_if[i].rsp_data.mask : lsu_mem_out_if[i].rsp_data.mask,
|
||||
rsp_arb_index ? lmem_lsu_if[i].rsp_data.data : lsu_mem_out_if[i].rsp_data.data,
|
||||
|
@ -161,10 +161,10 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
.ready_in (rsp_arb_ready),
|
||||
.valid_out (lsu_mem_in_if[i].rsp_valid),
|
||||
.data_out ({
|
||||
lsu_mem_in_if[i].rsp_data.mask,
|
||||
lsu_mem_in_if[i].rsp_data.data,
|
||||
lsu_mem_in_if[i].rsp_data.mask,
|
||||
lsu_mem_in_if[i].rsp_data.data,
|
||||
lsu_mem_in_if[i].rsp_data.tag
|
||||
}),
|
||||
}),
|
||||
.ready_out (lsu_mem_in_if[i].rsp_ready)
|
||||
);
|
||||
|
||||
|
@ -187,7 +187,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_lsu_adapter #(
|
||||
.NUM_LANES (`NUM_LSU_LANES),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.DATA_SIZE (LSU_WORD_SIZE),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
|
||||
.REQ_OUT_BUF (2),
|
||||
|
@ -205,17 +205,17 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
`RESET_RELAY (lmem_reset, reset);
|
||||
|
||||
|
||||
VX_local_mem #(
|
||||
.INSTANCE_ID($sformatf("core%0d-lmem", CORE_ID)),
|
||||
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
|
||||
.SIZE (1 << `LMEM_LOG_SIZE),
|
||||
.NUM_REQS (LSU_NUM_REQS),
|
||||
.NUM_BANKS (`LMEM_NUM_BANKS),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.TAG_WIDTH (LSU_TAG_WIDTH)
|
||||
) local_mem (
|
||||
) local_mem (
|
||||
.clk (clk),
|
||||
.reset (lmem_reset),
|
||||
`ifdef PERF_ENABLE
|
||||
|
|
|
@ -13,9 +13,8 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_slice import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter BLOCK_ID = 0
|
||||
module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -312,7 +311,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (mem_scheduler_reset, reset);
|
||||
|
||||
VX_mem_scheduler #(
|
||||
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched%0d", CORE_ID, BLOCK_ID)),
|
||||
.INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)),
|
||||
.CORE_REQS (NUM_LANES),
|
||||
.MEM_CHANNELS(NUM_LANES),
|
||||
.WORD_SIZE (LSU_WORD_SIZE),
|
||||
|
@ -504,11 +503,11 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`ifdef DBG_TRACE_MEM
|
||||
always @(posedge clk) begin
|
||||
if (execute_if.valid && fence_lock) begin
|
||||
`TRACE(1, ("%d: *** D$%0d fence wait\n", $time, CORE_ID));
|
||||
`TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID));
|
||||
end
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
|
@ -516,7 +515,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid));
|
||||
end else begin
|
||||
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
|
@ -524,8 +523,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid));
|
||||
end
|
||||
|
@ -533,36 +532,20 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
`ifdef DBG_SCOPE_LSU
|
||||
if (CORE_ID == 0 && BLOCK_ID == 0) begin
|
||||
`ifdef SCOPE
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (3),
|
||||
.TRIGGERW (3),
|
||||
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN)
|
||||
) scope_tap (
|
||||
.clk(clk),
|
||||
.reset(scope_reset),
|
||||
.start(1'b0),
|
||||
.stop(1'b0),
|
||||
.triggers({reset, mem_req_fire, mem_rsp_fire}),
|
||||
.probes({execute_if.data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
|
||||
.bus_in(scope_bus_in),
|
||||
.bus_out(scope_bus_out)
|
||||
);
|
||||
`endif
|
||||
`ifdef CHIPSCOPE
|
||||
wire [31:0] full_addr_0 = full_addr[0];
|
||||
wire [31:0] mem_req_data_0 = mem_req_data[0];
|
||||
wire [31:0] rsp_data_0 = rsp_data[0];
|
||||
ila_lsu ila_lsu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({mem_req_data_0, execute_if.data.uuid, execute_if.data.wid, execute_if.data.PC, mem_req_mask, full_addr_0, mem_req_byteen, mem_req_rw, mem_req_ready, mem_req_valid}),
|
||||
.probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, mem_rsp_mask, rsp_wid, mem_rsp_ready, mem_rsp_valid}),
|
||||
.probe2 ({lsu_mem_if.req_data.data, lsu_mem_if.req_data.tag, lsu_mem_if.req_data.byteen, lsu_mem_if.req_data.addr, lsu_mem_if.req_data.rw, lsu_mem_if.req_ready, lsu_mem_if.req_valid}),
|
||||
.probe3 ({lsu_mem_if.rsp_data.data, lsu_mem_if.rsp_data.tag, lsu_mem_if.rsp_ready, lsu_mem_if.rsp_valid})
|
||||
);
|
||||
`endif
|
||||
end
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (3),
|
||||
.TRIGGERW (3),
|
||||
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN)
|
||||
) scope_tap (
|
||||
.clk (clk),
|
||||
.reset (scope_reset),
|
||||
.start (1'b0),
|
||||
.stop (1'b0),
|
||||
.triggers ({reset, mem_req_fire, mem_rsp_fire}),
|
||||
.probes ({execute_if.data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
|
||||
.bus_in (scope_bus_in),
|
||||
.bus_out (scope_bus_out)
|
||||
);
|
||||
`else
|
||||
`SCOPE_IO_UNUSED()
|
||||
`endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -14,8 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
input wire clk,
|
||||
|
@ -24,7 +24,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
// Inputs
|
||||
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
|
||||
|
||||
// Outputs
|
||||
// Outputs
|
||||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_lsu_mem_if.master lsu_mem_if [`NUM_LSU_BLOCKS]
|
||||
);
|
||||
|
@ -32,10 +32,9 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
|
||||
`ifdef SCOPE
|
||||
localparam scope_lsu = 0;
|
||||
`SCOPE_IO_SWITCH (BLOCK_SIZE);
|
||||
`endif
|
||||
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
@ -55,15 +54,14 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_slices
|
||||
|
||||
`RESET_RELAY (block_reset, reset);
|
||||
|
||||
VX_lsu_slice #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.BLOCK_ID (block_idx)
|
||||
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx))
|
||||
) lsu_slice(
|
||||
`SCOPE_IO_BIND (scope_lsu+block_idx)
|
||||
`SCOPE_IO_BIND (block_idx)
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.execute_if (per_block_execute_if[block_idx]),
|
||||
|
@ -82,5 +80,5 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.commit_in_if (per_block_commit_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -14,13 +14,14 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_schedule import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_pipeline_perf_if.schedule perf_schedule_if,
|
||||
output sched_perf_t sched_perf,
|
||||
`endif
|
||||
|
||||
// configuration
|
||||
|
@ -42,6 +43,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
// status
|
||||
output wire busy
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // updated when a warp is activated or disabled
|
||||
|
@ -290,7 +292,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (split_join_reset, reset);
|
||||
|
||||
VX_split_join #(
|
||||
.CORE_ID (CORE_ID)
|
||||
.INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID))
|
||||
) split_join (
|
||||
.clk (clk),
|
||||
.reset (split_join_reset),
|
||||
|
@ -412,7 +414,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
end
|
||||
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps))
|
||||
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** %s-scheduler-timeout: stalled_warps=%b", $time, INSTANCE_ID, stalled_warps))
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`PERF_CTR_BITS-1:0] perf_sched_idles;
|
||||
|
@ -431,8 +433,8 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
assign perf_schedule_if.sched_idles = perf_sched_idles;
|
||||
assign perf_schedule_if.sched_stalls = perf_sched_stalls;
|
||||
assign sched_perf.idles = perf_sched_idles;
|
||||
assign sched_perf.stalls = perf_sched_stalls;
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_sfu_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -39,7 +40,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
VX_commit_if.master commit_if [`ISSUE_WIDTH],
|
||||
VX_warp_ctl_if.master warp_ctl_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
|
@ -83,7 +84,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (wctl_reset, reset);
|
||||
|
||||
VX_wctl_unit #(
|
||||
.CORE_ID (CORE_ID),
|
||||
.INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_unit (
|
||||
.clk (clk),
|
||||
|
@ -111,6 +112,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (csr_reset, reset);
|
||||
|
||||
VX_csr_unit #(
|
||||
.INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)),
|
||||
.CORE_ID (CORE_ID),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_unit (
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_split_join import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -31,7 +31,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
input wire [`NW_WIDTH-1:0] stack_wid,
|
||||
output wire [`DV_STACK_SIZEW-1:0] stack_ptr
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_data [`NUM_WARPS-1:0];
|
||||
wire [`DV_STACK_SIZEW-1:0] ipdom_q_ptr [`NUM_WARPS-1:0];
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_wctl_unit import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -27,7 +27,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
VX_warp_ctl_if.master warp_ctl_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -13,39 +13,29 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_pipeline_perf_if ();
|
||||
wire [`PERF_CTR_BITS-1:0] sched_idles;
|
||||
wire [`PERF_CTR_BITS-1:0] sched_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] scb_stalls;
|
||||
wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS];
|
||||
wire [`PERF_CTR_BITS-1:0] sfu_uses [`NUM_SFU_UNITS];
|
||||
interface VX_pipeline_perf_if import VX_gpu_pkg::*; ();
|
||||
sched_perf_t sched;
|
||||
issue_perf_t issue;
|
||||
|
||||
wire [`PERF_CTR_BITS-1:0] ifetches;
|
||||
wire [`PERF_CTR_BITS-1:0] loads;
|
||||
wire [`PERF_CTR_BITS-1:0] stores;
|
||||
wire [`PERF_CTR_BITS-1:0] stores;
|
||||
wire [`PERF_CTR_BITS-1:0] ifetch_latency;
|
||||
wire [`PERF_CTR_BITS-1:0] load_latency;
|
||||
|
||||
modport schedule (
|
||||
output sched_idles,
|
||||
output sched_stalls
|
||||
);
|
||||
|
||||
modport issue (
|
||||
output ibf_stalls,
|
||||
output scb_stalls,
|
||||
output units_uses,
|
||||
output sfu_uses
|
||||
modport master (
|
||||
output sched,
|
||||
output issue,
|
||||
output ifetches,
|
||||
output loads,
|
||||
output stores,
|
||||
output ifetch_latency,
|
||||
output load_latency
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input sched_idles,
|
||||
input sched_stalls,
|
||||
input ibf_stalls,
|
||||
input scb_stalls,
|
||||
input units_uses,
|
||||
input sfu_uses,
|
||||
input sched,
|
||||
input issue,
|
||||
input ifetches,
|
||||
input loads,
|
||||
input stores,
|
||||
|
|
|
@ -175,6 +175,17 @@ public:
|
|||
ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
|
||||
// dump device configuration
|
||||
std::cout << "CONFIGS:"
|
||||
<< " num_threads=" << NUM_THREADS
|
||||
<< ", num_warps=" << NUM_WARPS
|
||||
<< ", num_cores=" << NUM_CORES
|
||||
<< ", num_clusters=" << NUM_CLUSTERS
|
||||
<< ", socket_size=" << SOCKET_SIZE
|
||||
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
|
||||
<< ", num_barriers=" << NUM_BARRIERS
|
||||
<< std::endl;
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
|
|
|
@ -145,6 +145,17 @@ public:
|
|||
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
|
||||
// dump device configuration
|
||||
std::cout << "CONFIGS:"
|
||||
<< " num_threads=" << NUM_THREADS
|
||||
<< ", num_warps=" << NUM_WARPS
|
||||
<< ", num_cores=" << NUM_CORES
|
||||
<< ", num_clusters=" << NUM_CLUSTERS
|
||||
<< ", socket_size=" << SOCKET_SIZE
|
||||
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
|
||||
<< ", num_barriers=" << NUM_BARRIERS
|
||||
<< std::endl;
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
|
|
|
@ -29,11 +29,7 @@ private:
|
|||
uint16_t num_cores_;
|
||||
uint16_t num_clusters_;
|
||||
uint16_t socket_size_;
|
||||
uint16_t vsize_;
|
||||
uint16_t num_regs_;
|
||||
uint16_t num_csrs_;
|
||||
uint16_t num_barriers_;
|
||||
uint16_t ipdom_size_;
|
||||
uint64_t local_mem_base_;
|
||||
|
||||
public:
|
||||
|
@ -43,26 +39,10 @@ public:
|
|||
, num_cores_(num_cores)
|
||||
, num_clusters_(NUM_CLUSTERS)
|
||||
, socket_size_(SOCKET_SIZE)
|
||||
, vsize_(16)
|
||||
, num_regs_(32)
|
||||
, num_csrs_(4096)
|
||||
, num_barriers_(NUM_BARRIERS)
|
||||
, ipdom_size_((num_threads-1) * 2)
|
||||
, local_mem_base_(LMEM_BASE_ADDR)
|
||||
{}
|
||||
|
||||
uint16_t vsize() const {
|
||||
return vsize_;
|
||||
}
|
||||
|
||||
uint16_t num_regs() const {
|
||||
return num_regs_;
|
||||
}
|
||||
|
||||
uint16_t num_csrs() const {
|
||||
return num_csrs_;
|
||||
}
|
||||
|
||||
uint16_t num_barriers() const {
|
||||
return num_barriers_;
|
||||
}
|
||||
|
@ -71,10 +51,6 @@ public:
|
|||
return local_mem_base_;
|
||||
}
|
||||
|
||||
uint16_t ipdom_size() const {
|
||||
return ipdom_size_;
|
||||
}
|
||||
|
||||
uint16_t num_threads() const {
|
||||
return num_threads_;
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -31,4 +31,6 @@
|
|||
|
||||
#define DCACHE_WORD_SIZE LSU_LINE_SIZE
|
||||
#define DCACHE_CHANNELS UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE)
|
||||
#define DCACHE_NUM_REQS (NUM_LSU_BLOCKS * DCACHE_CHANNELS)
|
||||
#define DCACHE_NUM_REQS (NUM_LSU_BLOCKS * DCACHE_CHANNELS)
|
||||
|
||||
#define NUM_SOCKETS UP(NUM_CORES / SOCKET_SIZE)
|
|
@ -42,8 +42,8 @@ Emulator::ipdom_entry_t::ipdom_entry_t(const ThreadMask &tmask)
|
|||
{}
|
||||
|
||||
Emulator::warp_t::warp_t(const Arch& arch)
|
||||
: ireg_file(arch.num_threads(), std::vector<Word>(arch.num_regs()))
|
||||
, freg_file(arch.num_threads(), std::vector<uint64_t>(arch.num_regs()))
|
||||
: ireg_file(arch.num_threads(), std::vector<Word>(MAX_NUM_REGS))
|
||||
, freg_file(arch.num_threads(), std::vector<uint64_t>(MAX_NUM_REGS))
|
||||
, uuid(0)
|
||||
{}
|
||||
|
||||
|
@ -74,6 +74,7 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core)
|
|||
, core_(core)
|
||||
, warps_(arch.num_warps(), arch)
|
||||
, barriers_(arch.num_barriers(), 0)
|
||||
, ipdom_size_((arch.num_threads()-1) * 2)
|
||||
{
|
||||
this->clear();
|
||||
}
|
||||
|
@ -186,7 +187,7 @@ instr_trace_t* Emulator::step() {
|
|||
this->execute(*instr, scheduled_warp, trace);
|
||||
|
||||
DP(5, "Register state:");
|
||||
for (uint32_t i = 0; i < arch_.num_regs(); ++i) {
|
||||
for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) {
|
||||
DPN(5, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
// Integer register file
|
||||
for (uint32_t j = 0; j < arch_.num_threads(); ++j) {
|
||||
|
|
|
@ -119,6 +119,7 @@ private:
|
|||
std::vector<WarpMask> barriers_;
|
||||
std::unordered_map<int, std::stringstream> print_bufs_;
|
||||
MemoryUnit mmu_;
|
||||
uint32_t ipdom_size_;
|
||||
Word csr_mscratch_;
|
||||
wspawn_t wspawn_;
|
||||
};
|
||||
|
|
|
@ -1336,7 +1336,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
|
||||
bool is_divergent = then_tmask.any() && else_tmask.any();
|
||||
if (is_divergent) {
|
||||
if (stack_size == arch_.ipdom_size()) {
|
||||
if (stack_size == ipdom_size_) {
|
||||
std::cout << "IPDOM stack is full! size=" << std::dec << stack_size << ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << trace->uuid << ")\n" << std::flush;
|
||||
std::abort();
|
||||
}
|
||||
|
|
|
@ -70,6 +70,17 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
|
|||
--perf_mem_pending_reads_;
|
||||
});
|
||||
|
||||
// dump device configuration
|
||||
std::cout << "CONFIGS:"
|
||||
<< " num_threads=" << arch.num_threads()
|
||||
<< ", num_warps=" << arch.num_warps()
|
||||
<< ", num_cores=" << arch.num_cores()
|
||||
<< ", num_clusters=" << arch.num_clusters()
|
||||
<< ", socket_size=" << arch.socket_size()
|
||||
<< ", local_mem_base=0x" << std::hex << arch.local_mem_base() << std::dec
|
||||
<< ", num_barriers=" << arch.num_barriers()
|
||||
<< std::endl;
|
||||
|
||||
this->reset();
|
||||
}
|
||||
|
||||
|
|
|
@ -164,6 +164,17 @@ public:
|
|||
ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
|
||||
Stats::statlist.output("ramulator.ddr4.log");
|
||||
|
||||
// dump device configuration
|
||||
std::cout << "CONFIGS:"
|
||||
<< " num_threads=" << NUM_THREADS
|
||||
<< ", num_warps=" << NUM_WARPS
|
||||
<< ", num_cores=" << NUM_CORES
|
||||
<< ", num_clusters=" << NUM_CLUSTERS
|
||||
<< ", socket_size=" << SOCKET_SIZE
|
||||
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
|
||||
<< ", num_barriers=" << NUM_BARRIERS
|
||||
<< std::endl;
|
||||
|
||||
// reset the device
|
||||
this->reset();
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue