trace INSTANCE_ID refactoring

This commit is contained in:
Blaise Tine 2024-07-09 13:33:17 -07:00
parent 1322499c3f
commit 3efced37c5
32 changed files with 299 additions and 263 deletions

View file

@ -96,7 +96,8 @@ def append_value(text, reg, value, tmask_arr, sep):
return text, sep
def parse_rtlsim(log_filename):
line_pattern = r"\d+: core(\d+)-(decode|issue|commit)"
config_pattern = r"CONFIGS: num_threads=(\d+), num_warps=(\d+), num_cores=(\d+), num_clusters=(\d+), socket_size=(\d+), local_mem_base=(\d+), num_barriers=(\d+)"
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
ex_pattern = r"ex=([a-zA-Z]+)"
@ -118,21 +119,42 @@ def parse_rtlsim(log_filename):
entries = []
with open(log_filename, 'r') as log_file:
instr_data = {}
num_threads = 0
num_warps = 0
num_cores = 0
num_clusters = 0
socket_size = 0
local_mem_base = 0
num_barriers = 0
num_sockets = 0
for lineno, line in enumerate(log_file, start=1):
try:
config_match = re.search(config_pattern, line)
if config_match:
num_threads = int(config_match.group(1))
num_warps = int(config_match.group(2))
num_cores = int(config_match.group(3))
num_clusters = int(config_match.group(4))
socket_size = int(config_match.group(5))
local_mem_base = int(config_match.group(6))
num_barriers = int(config_match.group(7))
num_sockets = (num_cores + socket_size - 1) // socket_size
continue
line_match = re.search(line_pattern, line)
if line_match:
PC = re.search(pc_pattern, line).group(1)
warp_id = re.search(warp_id_pattern, line).group(1)
tmask = re.search(tmask_pattern, line).group(1)
uuid = re.search(uuid_pattern, line).group(1)
core_id = line_match.group(1)
stage = line_match.group(2)
cluster_id = line_match.group(1)
socket_id = line_match.group(2)
core_id = line_match.group(3)
stage = line_match.group(4)
if stage == "decode":
trace = {}
trace["uuid"] = uuid
trace["PC"] = PC
trace["core_id"] = core_id
trace["core_id"] = ((((cluster_id * num_sockets) + socket_id) * socket_size) + core_id)
trace["warp_id"] = warp_id
trace["tmask"] = reverse_binary(tmask)
trace["instr"] = re.search(instr_pattern, line).group(1)

View file

@ -14,7 +14,8 @@
`include "VX_define.vh"
module VX_cluster import VX_gpu_pkg::*; #(
parameter CLUSTER_ID = 0
parameter CLUSTER_ID = 0,
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
@ -85,7 +86,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
`RESET_RELAY (l2_reset, reset);
VX_cache_wrap #(
.INSTANCE_ID ("l2cache"),
.INSTANCE_ID ($sformatf("%s-l2cache", INSTANCE_ID)),
.CACHE_SIZE (`L2_CACHE_SIZE),
.LINE_SIZE (`L2_LINE_SIZE),
.NUM_BANKS (`L2_NUM_BANKS),
@ -122,17 +123,19 @@ module VX_cluster import VX_gpu_pkg::*; #(
wire [`NUM_SOCKETS-1:0] per_socket_busy;
VX_dcr_bus_if socket_dcr_bus_if();
`BUFFER_DCR_BUS_IF (socket_dcr_bus_if, socket_dcr_bus_tmp_if, (`NUM_SOCKETS > 1));
// Generate all sockets
for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin
for (genvar socket_id = 0; socket_id < `NUM_SOCKETS; ++socket_id) begin : sockets
`RESET_RELAY (socket_reset, reset);
VX_socket #(
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i)
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + socket_id),
.INSTANCE_ID ($sformatf("%s-socket%0d", INSTANCE_ID, socket_id))
) socket (
`SCOPE_IO_BIND (scope_socket+i)
`SCOPE_IO_BIND (scope_socket+socket_id)
.clk (clk),
.reset (socket_reset),
@ -143,13 +146,13 @@ module VX_cluster import VX_gpu_pkg::*; #(
.dcr_bus_if (socket_dcr_bus_if),
.mem_bus_if (per_socket_mem_bus_if[i]),
.mem_bus_if (per_socket_mem_bus_if[socket_id]),
`ifdef GBAR_ENABLE
.gbar_bus_if (per_socket_gbar_bus_if[i]),
.gbar_bus_if (per_socket_gbar_bus_if[socket_id]),
`endif
.busy (per_socket_busy[i])
.busy (per_socket_busy[socket_id])
);
end

View file

@ -14,7 +14,8 @@
`include "VX_define.vh"
module VX_socket import VX_gpu_pkg::*; #(
parameter SOCKET_ID = 0
parameter SOCKET_ID = 0,
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
@ -40,6 +41,11 @@ module VX_socket import VX_gpu_pkg::*; #(
output wire busy
);
`ifdef SCOPE
localparam scope_core = 0;
`SCOPE_IO_SWITCH (`SOCKET_SIZE);
`endif
`ifdef GBAR_ENABLE
VX_gbar_bus_if per_core_gbar_bus_if[`SOCKET_SIZE]();
@ -81,7 +87,7 @@ module VX_socket import VX_gpu_pkg::*; #(
`RESET_RELAY (icache_reset, reset);
VX_cache_cluster #(
.INSTANCE_ID ($sformatf("socket%0d-icache", SOCKET_ID)),
.INSTANCE_ID ($sformatf("%s-icache", INSTANCE_ID)),
.NUM_UNITS (`NUM_ICACHES),
.NUM_INPUTS (`SOCKET_SIZE),
.TAG_SEL_IDX (0),
@ -126,7 +132,7 @@ module VX_socket import VX_gpu_pkg::*; #(
`RESET_RELAY (dcache_reset, reset);
VX_cache_cluster #(
.INSTANCE_ID ($sformatf("socket%0d-dcache", SOCKET_ID)),
.INSTANCE_ID ($sformatf("%s-dcache", INSTANCE_ID)),
.NUM_UNITS (`NUM_DCACHES),
.NUM_INPUTS (`SOCKET_SIZE),
.TAG_SEL_IDX (0),
@ -144,7 +150,7 @@ module VX_socket import VX_gpu_pkg::*; #(
.UUID_WIDTH (`UUID_WIDTH),
.WRITE_ENABLE (1),
.NC_ENABLE (1),
.CORE_OUT_BUF (`LMEM_ENABLED ? 2 : 1),
.CORE_OUT_BUF (2),
.MEM_OUT_BUF (2)
) dcache (
`ifdef PERF_ENABLE
@ -194,19 +200,19 @@ module VX_socket import VX_gpu_pkg::*; #(
wire [`SOCKET_SIZE-1:0] per_core_busy;
VX_dcr_bus_if core_dcr_bus_if();
`BUFFER_DCR_BUS_IF (core_dcr_bus_if, dcr_bus_if, (`SOCKET_SIZE > 1));
`SCOPE_IO_SWITCH (`SOCKET_SIZE)
// Generate all cores
for (genvar i = 0; i < `SOCKET_SIZE; ++i) begin
for (genvar core_id = 0; core_id < `SOCKET_SIZE; ++core_id) begin : cores
`RESET_RELAY (core_reset, reset);
VX_core #(
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + i)
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + core_id),
.INSTANCE_ID ($sformatf("%s-core%0d", INSTANCE_ID, core_id))
) core (
`SCOPE_IO_BIND (i)
`SCOPE_IO_BIND (scope_core + core_id)
.clk (clk),
.reset (core_reset),
@ -217,15 +223,15 @@ module VX_socket import VX_gpu_pkg::*; #(
.dcr_bus_if (core_dcr_bus_if),
.dcache_bus_if (per_core_dcache_bus_if[i * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
.dcache_bus_if (per_core_dcache_bus_if[core_id * DCACHE_NUM_REQS +: DCACHE_NUM_REQS]),
.icache_bus_if (per_core_icache_bus_if[i]),
.icache_bus_if (per_core_icache_bus_if[core_id]),
`ifdef GBAR_ENABLE
.gbar_bus_if (per_core_gbar_bus_if[i]),
.gbar_bus_if (per_core_gbar_bus_if[core_id]),
`endif
.busy (per_core_busy[i])
.busy (per_core_busy[core_id])
);
end

View file

@ -44,6 +44,11 @@ module Vortex import VX_gpu_pkg::*; (
output wire busy
);
`ifdef SCOPE
localparam scope_cluster = 0;
`SCOPE_IO_SWITCH (`NUM_CLUSTERS);
`endif
`ifdef PERF_ENABLE
VX_mem_perf_if mem_perf_if();
assign mem_perf_if.icache = 'x;
@ -121,19 +126,19 @@ module Vortex import VX_gpu_pkg::*; (
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
`SCOPE_IO_SWITCH (`NUM_CLUSTERS)
// Generate all clusters
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters
`RESET_RELAY (cluster_reset, reset);
VX_dcr_bus_if cluster_dcr_bus_if();
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
VX_cluster #(
.CLUSTER_ID (i)
.CLUSTER_ID (cluster_id),
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
) cluster (
`SCOPE_IO_BIND (i)
`SCOPE_IO_BIND (scope_cluster + cluster_id)
.clk (clk),
.reset (cluster_reset),
@ -144,9 +149,9 @@ module Vortex import VX_gpu_pkg::*; (
.dcr_bus_if (cluster_dcr_bus_if),
.mem_bus_if (per_cluster_mem_bus_if[i]),
.mem_bus_if (per_cluster_mem_bus_if[cluster_id]),
.busy (per_cluster_busy[i])
.busy (per_cluster_busy[cluster_id])
);
end

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_alu_int #(
parameter CORE_ID = 0,
parameter `STRING INSTANCE_ID = "",
parameter BLOCK_IDX = 0,
parameter NUM_LANES = 1
) (
@ -29,7 +29,7 @@ module VX_alu_int #(
VX_branch_ctl_if.master branch_ctl_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam LANE_BITS = `CLOG2(NUM_LANES);
localparam LANE_WIDTH = `UP(LANE_BITS);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
@ -121,7 +121,7 @@ module VX_alu_int #(
case ({is_alu_w, op_class})
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
3'b001: alu_result[i] = sub_slt_br_result; // SUB, SLTU, SLTI, BR*
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
3'b010: alu_result[i] = shr_zic_result[i]; // SRL, SRA, SRLI, SRAI, CZERO*
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
@ -194,8 +194,8 @@ module VX_alu_int #(
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin
if (branch_ctl_if.valid) begin
`TRACE(1, ("%d: core%0d-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
$time, CORE_ID, branch_ctl_if.wid, {commit_if.data.PC, 1'b0}, branch_ctl_if.taken, {branch_ctl_if.dest, 1'b0}, commit_if.data.uuid));
`TRACE(1, ("%d: %s-branch: wid=%0d, PC=0x%0h, taken=%b, dest=0x%0h (#%0d)\n",
$time, INSTANCE_ID, branch_ctl_if.wid, {commit_if.data.PC, 1'b0}, branch_ctl_if.taken, {branch_ctl_if.dest, 1'b0}, commit_if.data.uuid));
end
end
`endif

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_alu_muldiv #(
parameter CORE_ID = 0,
parameter `STRING INSTANCE_ID = "",
parameter NUM_LANES = 1
) (
input wire clk,
@ -26,7 +26,7 @@ module VX_alu_muldiv #(
// Outputs
VX_commit_if.master commit_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam TAG_WIDTH = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + PID_WIDTH + 1 + 1;

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_alu_unit #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
@ -27,7 +27,7 @@ module VX_alu_unit #(
VX_branch_ctl_if.master branch_ctl_if [`NUM_ALU_BLOCKS]
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
localparam NUM_LANES = `NUM_ALU_LANES;
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
@ -75,7 +75,7 @@ module VX_alu_unit #(
`RESET_RELAY (int_reset, block_reset);
VX_alu_int #(
.CORE_ID (CORE_ID),
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
.BLOCK_IDX (block_idx),
.NUM_LANES (NUM_LANES)
) alu_int (
@ -90,32 +90,32 @@ module VX_alu_unit #(
VX_execute_if #(
.NUM_LANES (NUM_LANES)
) mdv_execute_if();
) muldiv_execute_if();
VX_commit_if #(
.NUM_LANES (NUM_LANES)
) mdv_commit_if();
) muldiv_commit_if();
assign mdv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
assign mdv_execute_if.data = per_block_execute_if[block_idx].data;
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
`RESET_RELAY (mdv_reset, block_reset);
`RESET_RELAY (muldiv_reset, block_reset);
VX_alu_muldiv #(
.CORE_ID (CORE_ID),
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
.NUM_LANES (NUM_LANES)
) mdv_unit (
) muldiv_unit (
.clk (clk),
.reset (mdv_reset),
.execute_if (mdv_execute_if),
.commit_if (mdv_commit_if)
.reset (muldiv_reset),
.execute_if (muldiv_execute_if),
.commit_if (muldiv_commit_if)
);
`endif
assign per_block_execute_if[block_idx].ready =
`ifdef EXT_M_ENABLE
is_muldiv_op ? mdv_execute_if.ready :
is_muldiv_op ? muldiv_execute_if.ready :
`endif
int_execute_if.ready;
@ -130,19 +130,19 @@ module VX_alu_unit #(
.reset (block_reset),
.valid_in ({
`ifdef EXT_M_ENABLE
mdv_commit_if.valid,
muldiv_commit_if.valid,
`endif
int_commit_if.valid
}),
.ready_in ({
`ifdef EXT_M_ENABLE
mdv_commit_if.ready,
muldiv_commit_if.ready,
`endif
int_commit_if.ready
}),
.data_in ({
`ifdef EXT_M_ENABLE
mdv_commit_if.data,
muldiv_commit_if.data,
`endif
int_commit_if.data
}),

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
@ -27,7 +27,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
VX_commit_csr_if.master commit_csr_if,
VX_commit_sched_if.master commit_sched_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + 1 + `NR_BITS + `NUM_THREADS * `XLEN + 1 + 1 + 1;
localparam COMMIT_SIZEW = `CLOG2(`NUM_THREADS + 1);
localparam COMMIT_ALL_SIZEW = COMMIT_SIZEW + `ISSUE_WIDTH - 1;
@ -171,7 +171,7 @@ module VX_commit import VX_gpu_pkg::*, VX_trace_pkg::*; #(
for (genvar j = 0; j < `NUM_EX_UNITS; ++j) begin
always @(posedge clk) begin
if (commit_if[j * `ISSUE_WIDTH + i].valid && commit_if[j * `ISSUE_WIDTH + i].ready) begin
`TRACE(1, ("%d: core%0d-commit: wid=%0d, PC=0x%0h, ex=", $time, CORE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, commit_if[j * `ISSUE_WIDTH + i].data.wid, {commit_if[j * `ISSUE_WIDTH + i].data.PC, 1'b0}));
trace_ex_type(1, j);
`TRACE(1, (", tmask=%b, wb=%0d, rd=%0d, sop=%b, eop=%b, data=", commit_if[j * `ISSUE_WIDTH + i].data.tmask, commit_if[j * `ISSUE_WIDTH + i].data.wb, commit_if[j * `ISSUE_WIDTH + i].data.rd, commit_if[j * `ISSUE_WIDTH + i].data.sop, commit_if[j * `ISSUE_WIDTH + i].data.eop));
`TRACE_ARRAY1D(1, "0x%0h", commit_if[j * `ISSUE_WIDTH + i].data.data, `NUM_THREADS);

View file

@ -18,7 +18,8 @@
`endif
module VX_core import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter CORE_ID = 0,
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
@ -94,13 +95,14 @@ module VX_core import VX_gpu_pkg::*; #(
`SCOPE_IO_SWITCH (3)
VX_schedule #(
.INSTANCE_ID ($sformatf("%s-schedule", INSTANCE_ID)),
.CORE_ID (CORE_ID)
) schedule (
.clk (clk),
.reset (schedule_reset),
`ifdef PERF_ENABLE
.perf_schedule_if (pipeline_perf_if.schedule),
.sched_perf (pipeline_perf_if.sched),
`endif
.base_dcrs (base_dcrs),
@ -121,7 +123,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_fetch #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-fetch", INSTANCE_ID))
) fetch (
`SCOPE_IO_BIND (0)
.clk (clk),
@ -132,7 +134,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_decode #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-decode", INSTANCE_ID))
) decode (
.clk (clk),
.reset (decode_reset),
@ -142,7 +144,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_issue #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-issue", INSTANCE_ID))
) issue (
`SCOPE_IO_BIND (1)
@ -150,7 +152,7 @@ module VX_core import VX_gpu_pkg::*; #(
.reset (issue_reset),
`ifdef PERF_ENABLE
.perf_issue_if (pipeline_perf_if.issue),
.issue_perf (pipeline_perf_if.issue),
`endif
.decode_if (decode_if),
@ -159,6 +161,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_execute #(
.INSTANCE_ID ($sformatf("%s-execute", INSTANCE_ID)),
.CORE_ID (CORE_ID)
) execute (
`SCOPE_IO_BIND (2)
@ -186,7 +189,7 @@ module VX_core import VX_gpu_pkg::*; #(
);
VX_commit #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-commit", INSTANCE_ID))
) commit (
.clk (clk),
.reset (commit_reset),
@ -210,7 +213,7 @@ module VX_core import VX_gpu_pkg::*; #(
`RESET_RELAY (lmem_unit_reset, reset);
VX_lmem_unit #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-lmem", INSTANCE_ID))
) lmem_unit (
.clk (clk),
.reset (lmem_unit_reset),
@ -242,7 +245,7 @@ module VX_core import VX_gpu_pkg::*; #(
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
VX_mem_coalescer #(
.INSTANCE_ID ($sformatf("core%0d-coalescer", CORE_ID)),
.INSTANCE_ID ($sformatf("%s-coalescer%0d", INSTANCE_ID, i)),
.NUM_REQS (`NUM_LSU_LANES),
.DATA_IN_SIZE (LSU_WORD_SIZE),
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),

View file

@ -144,6 +144,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
`endif
VX_core #(
.INSTANCE_ID ($sformatf("core")),
.CORE_ID (CORE_ID)
) core (
`SCOPE_IO_BIND (0)

View file

@ -14,6 +14,7 @@
`include "VX_define.vh"
module VX_csr_unit import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0,
parameter NUM_LANES = 1
) (
@ -36,7 +37,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
VX_execute_if.slave execute_if,
VX_commit_if.master commit_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;

View file

@ -28,7 +28,7 @@
`endif
module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
@ -43,7 +43,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
localparam DATAW = `UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + 1 + (`NR_BITS * 4);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
@ -144,6 +144,12 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
end
`endif
`STATIC_ASSERT($bits(alu_args_t) == $bits(op_args_t), ("alu_args_t size mismatch: current=%0d, expected=%0d", $bits(alu_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(fpu_args_t) == $bits(op_args_t), ("fpu_args_t size mismatch: current=%0d, expected=%0d", $bits(fpu_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(lsu_args_t) == $bits(op_args_t), ("lsu_args_t size mismatch: current=%0d, expected=%0d", $bits(lsu_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(csr_args_t) == $bits(op_args_t), ("csr_args_t size mismatch: current=%0d, expected=%0d", $bits(csr_args_t), $bits(op_args_t)));
`STATIC_ASSERT($bits(wctl_args_t) == $bits(op_args_t), ("wctl_args_t size mismatch: current=%0d, expected=%0d", $bits(wctl_args_t), $bits(op_args_t)));
always @(*) begin
ex_type = '0;
@ -551,7 +557,7 @@ module VX_decode import VX_gpu_pkg::*, VX_trace_pkg::*; #(
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin
if (decode_if.valid && decode_if.ready) begin
`TRACE(1, ("%d: core%0d-decode: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, CORE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
`TRACE(1, ("%d: %s: wid=%0d, PC=0x%0h, instr=0x%0h, ex=", $time, INSTANCE_ID, decode_if.data.wid, {decode_if.data.PC, 1'd0}, instr));
trace_ex_type(1, decode_if.data.ex_type);
`TRACE(1, (", op="));
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);

View file

@ -14,6 +14,7 @@
`include "VX_define.vh"
module VX_execute import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0
) (
`SCOPE_IO_DECL
@ -55,7 +56,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`RESET_RELAY (sfu_reset, reset);
VX_alu_unit #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-alu", INSTANCE_ID))
) alu_unit (
.clk (clk),
.reset (alu_reset),
@ -67,7 +68,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`SCOPE_IO_SWITCH (1)
VX_lsu_unit #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-lsu", INSTANCE_ID))
) lsu_unit (
`SCOPE_IO_BIND (0)
.clk (clk),
@ -81,7 +82,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`RESET_RELAY (fpu_reset, reset);
VX_fpu_unit #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-fpu", INSTANCE_ID))
) fpu_unit (
.clk (clk),
.reset (fpu_reset),
@ -92,6 +93,7 @@ module VX_execute import VX_gpu_pkg::*; #(
`endif
VX_sfu_unit #(
.INSTANCE_ID ($sformatf("%s-sfu", INSTANCE_ID)),
.CORE_ID (CORE_ID)
) sfu_unit (
.clk (clk),

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_fetch import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
@ -30,7 +30,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
// outputs
VX_fetch_if.master fetch_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_VAR (reset)
wire icache_req_valid;
@ -129,45 +129,33 @@ module VX_fetch import VX_gpu_pkg::*; #(
assign icache_bus_if.rsp_ready = fetch_if.ready;
`ifdef DBG_SCOPE_FETCH
if (CORE_ID == 0) begin
`ifdef SCOPE
wire schedule_fire = schedule_if.valid && schedule_if.ready;
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
VX_scope_tap #(
.SCOPE_ID (1),
.TRIGGERW (4),
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
) scope_tap (
.clk(clk),
.reset(scope_reset),
.start(1'b0),
.stop(1'b0),
.triggers({
reset,
schedule_fire,
icache_req_fire,
icache_rsp_fire
}),
.probes({
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
);
`endif
`ifdef CHIPSCOPE
ila_fetch ila_fetch_inst (
.clk (clk),
.probe0 ({reset, schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC, schedule_if.ready, schedule_if.valid}),
.probe1 ({icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
.probe2 ({icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
);
`endif
end
wire schedule_fire = schedule_if.valid && schedule_if.ready;
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
VX_scope_tap #(
.SCOPE_ID (1),
.TRIGGERW (4),
.PROBEW (`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS +
ICACHE_TAG_WIDTH + ICACHE_WORD_SIZE + ICACHE_ADDR_WIDTH +
(ICACHE_WORD_SIZE*8) + ICACHE_TAG_WIDTH)
) scope_tap (
.clk (clk),
.reset (scope_reset),
.start (1'b0),
.stop (1'b0),
.triggers ({
reset,
schedule_fire,
icache_req_fire,
icache_rsp_fire
}),
.probes ({
schedule_if.data.uuid, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.PC,
icache_bus_if.req_data.tag, icache_bus_if.req_data.byteen, icache_bus_if.req_data.addr,
icache_bus_if.rsp_data.data, icache_bus_if.rsp_data.tag
}),
.bus_in (scope_bus_in),
.bus_out (scope_bus_out)
);
`else
`SCOPE_IO_UNUSED()
`endif
@ -177,10 +165,10 @@ module VX_fetch import VX_gpu_pkg::*; #(
wire fetch_fire = fetch_if.valid && fetch_if.ready;
always @(posedge clk) begin
if (schedule_fire) begin
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
`TRACE(1, ("%d: %s req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, INSTANCE_ID, schedule_if.data.wid, {schedule_if.data.PC, 1'b0}, schedule_if.data.tmask, schedule_if.data.uuid));
end
if (fetch_fire) begin
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
`TRACE(1, ("%d: %s rsp: wid=%0d, PC=0x%0h, tmask=%b, instr=0x%0h (#%0d)\n", $time, INSTANCE_ID, fetch_if.data.wid, {fetch_if.data.PC, 1'b0}, fetch_if.data.tmask, fetch_if.data.instr, fetch_if.data.uuid));
end
end
`endif

View file

@ -14,7 +14,7 @@
`include "VX_fpu_define.vh"
module VX_fpu_unit import VX_fpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
@ -26,7 +26,7 @@ module VX_fpu_unit import VX_fpu_pkg::*; #(
VX_commit_if.master commit_if [`ISSUE_WIDTH],
VX_fpu_csr_if.master fpu_csr_if[`NUM_FPU_BLOCKS]
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = `NUM_FPU_BLOCKS;
localparam NUM_LANES = `NUM_FPU_LANES;
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,11 +14,11 @@
`include "VX_define.vh"
module VX_lmem_unit import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
@ -42,14 +42,14 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
`RESET_RELAY (req_reset, reset);
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL];
end
wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask);
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
wire is_addr_local = | (lsu_mem_in_if[i].req_data.mask & is_addr_local_mask);
wire req_global_ready;
wire req_local_ready;
@ -61,7 +61,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
) req_global_buf (
.clk (clk),
.reset (req_reset),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_global),
.data_in ({
lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask,
lsu_mem_in_if[i].req_data.rw,
@ -81,7 +81,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
lsu_mem_out_if[i].req_data.atype,
lsu_mem_out_if[i].req_data.data,
lsu_mem_out_if[i].req_data.tag
}),
}),
.ready_out (lsu_mem_out_if[i].req_ready)
);
@ -92,7 +92,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
) req_local_buf (
.clk (clk),
.reset (req_reset),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
.valid_in (lsu_mem_in_if[i].req_valid && is_addr_local),
.data_in ({
lsu_mem_in_if[i].req_data.mask & is_addr_local_mask,
lsu_mem_in_if[i].req_data.rw,
@ -112,18 +112,18 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
lmem_lsu_if[i].req_data.atype,
lmem_lsu_if[i].req_data.data,
lmem_lsu_if[i].req_data.tag
}),
}),
.ready_out (lmem_lsu_if[i].req_ready)
);
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
assign lsu_mem_in_if[i].req_ready = (req_global_ready && is_addr_global)
|| (req_local_ready && is_addr_local);
end
`RESET_RELAY (rsp_reset, reset);
for (genvar i = 0; i < `NUM_LSU_BLOCKS; ++i) begin
wire rsp_arb_valid;
wire rsp_arb_index;
wire rsp_arb_ready;
@ -135,7 +135,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
) arbiter (
.clk (clk),
.reset (rsp_reset),
.requests ({
.requests ({
lmem_lsu_if[i].rsp_valid,
lsu_mem_out_if[i].rsp_valid
}),
@ -144,7 +144,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
`UNUSED_PIN (grant_onehot),
.grant_unlock(rsp_arb_ready)
);
VX_elastic_buffer #(
.DATAW (RSP_DATAW),
.SIZE (2),
@ -152,7 +152,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
) rsp_buf (
.clk (clk),
.reset (rsp_reset),
.valid_in (rsp_arb_valid),
.valid_in (rsp_arb_valid),
.data_in ({
rsp_arb_index ? lmem_lsu_if[i].rsp_data.mask : lsu_mem_out_if[i].rsp_data.mask,
rsp_arb_index ? lmem_lsu_if[i].rsp_data.data : lsu_mem_out_if[i].rsp_data.data,
@ -161,10 +161,10 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
.ready_in (rsp_arb_ready),
.valid_out (lsu_mem_in_if[i].rsp_valid),
.data_out ({
lsu_mem_in_if[i].rsp_data.mask,
lsu_mem_in_if[i].rsp_data.data,
lsu_mem_in_if[i].rsp_data.mask,
lsu_mem_in_if[i].rsp_data.data,
lsu_mem_in_if[i].rsp_data.tag
}),
}),
.ready_out (lsu_mem_in_if[i].rsp_ready)
);
@ -187,7 +187,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
VX_lsu_adapter #(
.NUM_LANES (`NUM_LSU_LANES),
.DATA_SIZE (LSU_WORD_SIZE),
.DATA_SIZE (LSU_WORD_SIZE),
.TAG_WIDTH (LSU_TAG_WIDTH),
.TAG_SEL_BITS (LSU_TAG_WIDTH - `UUID_WIDTH),
.REQ_OUT_BUF (2),
@ -205,17 +205,17 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
end
`RESET_RELAY (lmem_reset, reset);
VX_local_mem #(
.INSTANCE_ID($sformatf("core%0d-lmem", CORE_ID)),
.INSTANCE_ID($sformatf("%s-lmem", INSTANCE_ID)),
.SIZE (1 << `LMEM_LOG_SIZE),
.NUM_REQS (LSU_NUM_REQS),
.NUM_BANKS (`LMEM_NUM_BANKS),
.WORD_SIZE (LSU_WORD_SIZE),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.TAG_WIDTH (LSU_TAG_WIDTH)
) local_mem (
) local_mem (
.clk (clk),
.reset (lmem_reset),
`ifdef PERF_ENABLE

View file

@ -13,9 +13,8 @@
`include "VX_define.vh"
module VX_lsu_slice import VX_gpu_pkg::*; #(
parameter CORE_ID = 0,
parameter BLOCK_ID = 0
module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
@ -312,7 +311,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`RESET_RELAY (mem_scheduler_reset, reset);
VX_mem_scheduler #(
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched%0d", CORE_ID, BLOCK_ID)),
.INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)),
.CORE_REQS (NUM_LANES),
.MEM_CHANNELS(NUM_LANES),
.WORD_SIZE (LSU_WORD_SIZE),
@ -504,11 +503,11 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`ifdef DBG_TRACE_MEM
always @(posedge clk) begin
if (execute_if.valid && fence_lock) begin
`TRACE(1, ("%d: *** D$%0d fence wait\n", $time, CORE_ID));
`TRACE(1, ("%d: *** %s fence wait\n", $time, INSTANCE_ID));
end
if (mem_req_fire) begin
if (mem_req_rw) begin
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
@ -516,7 +515,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid));
end else begin
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
@ -524,8 +523,8 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
end
end
if (mem_rsp_fire) begin
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
$time, CORE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
`TRACE(1, ("%d: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES);
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid));
end
@ -533,36 +532,20 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
`endif
`ifdef DBG_SCOPE_LSU
if (CORE_ID == 0 && BLOCK_ID == 0) begin
`ifdef SCOPE
VX_scope_tap #(
.SCOPE_ID (3),
.TRIGGERW (3),
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN)
) scope_tap (
.clk(clk),
.reset(scope_reset),
.start(1'b0),
.stop(1'b0),
.triggers({reset, mem_req_fire, mem_rsp_fire}),
.probes({execute_if.data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
);
`endif
`ifdef CHIPSCOPE
wire [31:0] full_addr_0 = full_addr[0];
wire [31:0] mem_req_data_0 = mem_req_data[0];
wire [31:0] rsp_data_0 = rsp_data[0];
ila_lsu ila_lsu_inst (
.clk (clk),
.probe0 ({mem_req_data_0, execute_if.data.uuid, execute_if.data.wid, execute_if.data.PC, mem_req_mask, full_addr_0, mem_req_byteen, mem_req_rw, mem_req_ready, mem_req_valid}),
.probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, mem_rsp_mask, rsp_wid, mem_rsp_ready, mem_rsp_valid}),
.probe2 ({lsu_mem_if.req_data.data, lsu_mem_if.req_data.tag, lsu_mem_if.req_data.byteen, lsu_mem_if.req_data.addr, lsu_mem_if.req_data.rw, lsu_mem_if.req_ready, lsu_mem_if.req_valid}),
.probe3 ({lsu_mem_if.rsp_data.data, lsu_mem_if.rsp_data.tag, lsu_mem_if.rsp_ready, lsu_mem_if.rsp_valid})
);
`endif
end
VX_scope_tap #(
.SCOPE_ID (3),
.TRIGGERW (3),
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN)
) scope_tap (
.clk (clk),
.reset (scope_reset),
.start (1'b0),
.stop (1'b0),
.triggers ({reset, mem_req_fire, mem_rsp_fire}),
.probes ({execute_if.data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
.bus_in (scope_bus_in),
.bus_out (scope_bus_out)
);
`else
`SCOPE_IO_UNUSED()
`endif

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,8 +14,8 @@
`include "VX_define.vh"
module VX_lsu_unit import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
) (
parameter `STRING INSTANCE_ID = ""
) (
`SCOPE_IO_DECL
input wire clk,
@ -24,7 +24,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
// Inputs
VX_dispatch_if.slave dispatch_if [`ISSUE_WIDTH],
// Outputs
// Outputs
VX_commit_if.master commit_if [`ISSUE_WIDTH],
VX_lsu_mem_if.master lsu_mem_if [`NUM_LSU_BLOCKS]
);
@ -32,10 +32,9 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
localparam NUM_LANES = `NUM_LSU_LANES;
`ifdef SCOPE
localparam scope_lsu = 0;
`SCOPE_IO_SWITCH (BLOCK_SIZE);
`endif
VX_execute_if #(
.NUM_LANES (NUM_LANES)
) per_block_execute_if[BLOCK_SIZE]();
@ -55,15 +54,14 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
.NUM_LANES (NUM_LANES)
) per_block_commit_if[BLOCK_SIZE]();
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : lsu_slices
`RESET_RELAY (block_reset, reset);
VX_lsu_slice #(
.CORE_ID (CORE_ID),
.BLOCK_ID (block_idx)
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, block_idx))
) lsu_slice(
`SCOPE_IO_BIND (scope_lsu+block_idx)
`SCOPE_IO_BIND (block_idx)
.clk (clk),
.reset (block_reset),
.execute_if (per_block_execute_if[block_idx]),
@ -82,5 +80,5 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
.commit_in_if (per_block_commit_if),
.commit_out_if (commit_if)
);
endmodule

View file

@ -14,13 +14,14 @@
`include "VX_define.vh"
module VX_schedule import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
`ifdef PERF_ENABLE
VX_pipeline_perf_if.schedule perf_schedule_if,
output sched_perf_t sched_perf,
`endif
// configuration
@ -42,6 +43,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
// status
output wire busy
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (CORE_ID)
reg [`NUM_WARPS-1:0] active_warps, active_warps_n; // updated when a warp is activated or disabled
@ -290,7 +292,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
`RESET_RELAY (split_join_reset, reset);
VX_split_join #(
.CORE_ID (CORE_ID)
.INSTANCE_ID ($sformatf("%s-splitjoin", INSTANCE_ID))
) split_join (
.clk (clk),
.reset (split_join_reset),
@ -412,7 +414,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
end
end
end
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps))
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** %s-scheduler-timeout: stalled_warps=%b", $time, INSTANCE_ID, stalled_warps))
`ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_sched_idles;
@ -431,8 +433,8 @@ module VX_schedule import VX_gpu_pkg::*; #(
end
end
assign perf_schedule_if.sched_idles = perf_sched_idles;
assign perf_schedule_if.sched_stalls = perf_sched_stalls;
assign sched_perf.idles = perf_sched_idles;
assign sched_perf.stalls = perf_sched_stalls;
`endif
endmodule

View file

@ -14,6 +14,7 @@
`include "VX_define.vh"
module VX_sfu_unit import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter CORE_ID = 0
) (
input wire clk,
@ -39,7 +40,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
VX_commit_if.master commit_if [`ISSUE_WIDTH],
VX_warp_ctl_if.master warp_ctl_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = 1;
localparam NUM_LANES = `NUM_SFU_LANES;
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
@ -83,7 +84,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
`RESET_RELAY (wctl_reset, reset);
VX_wctl_unit #(
.CORE_ID (CORE_ID),
.INSTANCE_ID ($sformatf("%s-wctl", INSTANCE_ID)),
.NUM_LANES (NUM_LANES)
) wctl_unit (
.clk (clk),
@ -111,6 +112,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
`RESET_RELAY (csr_reset, reset);
VX_csr_unit #(
.INSTANCE_ID ($sformatf("%s-csr", INSTANCE_ID)),
.CORE_ID (CORE_ID),
.NUM_LANES (NUM_LANES)
) csr_unit (

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_split_join import VX_gpu_pkg::*; #(
parameter CORE_ID = 0
parameter `STRING INSTANCE_ID = ""
) (
input wire clk,
input wire reset,
@ -31,7 +31,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
input wire [`NW_WIDTH-1:0] stack_wid,
output wire [`DV_STACK_SIZEW-1:0] stack_ptr
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
wire [(`NUM_THREADS+`PC_BITS)-1:0] ipdom_data [`NUM_WARPS-1:0];
wire [`DV_STACK_SIZEW-1:0] ipdom_q_ptr [`NUM_WARPS-1:0];

View file

@ -14,7 +14,7 @@
`include "VX_define.vh"
module VX_wctl_unit import VX_gpu_pkg::*; #(
parameter CORE_ID = 0,
parameter `STRING INSTANCE_ID = "",
parameter NUM_LANES = 1
) (
input wire clk,
@ -27,7 +27,7 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
VX_warp_ctl_if.master warp_ctl_if,
VX_commit_if.master commit_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_SPARAM (INSTANCE_ID)
localparam LANE_BITS = `CLOG2(NUM_LANES);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -13,39 +13,29 @@
`include "VX_define.vh"
interface VX_pipeline_perf_if ();
wire [`PERF_CTR_BITS-1:0] sched_idles;
wire [`PERF_CTR_BITS-1:0] sched_stalls;
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
wire [`PERF_CTR_BITS-1:0] scb_stalls;
wire [`PERF_CTR_BITS-1:0] units_uses [`NUM_EX_UNITS];
wire [`PERF_CTR_BITS-1:0] sfu_uses [`NUM_SFU_UNITS];
interface VX_pipeline_perf_if import VX_gpu_pkg::*; ();
sched_perf_t sched;
issue_perf_t issue;
wire [`PERF_CTR_BITS-1:0] ifetches;
wire [`PERF_CTR_BITS-1:0] loads;
wire [`PERF_CTR_BITS-1:0] stores;
wire [`PERF_CTR_BITS-1:0] stores;
wire [`PERF_CTR_BITS-1:0] ifetch_latency;
wire [`PERF_CTR_BITS-1:0] load_latency;
modport schedule (
output sched_idles,
output sched_stalls
);
modport issue (
output ibf_stalls,
output scb_stalls,
output units_uses,
output sfu_uses
modport master (
output sched,
output issue,
output ifetches,
output loads,
output stores,
output ifetch_latency,
output load_latency
);
modport slave (
input sched_idles,
input sched_stalls,
input ibf_stalls,
input scb_stalls,
input units_uses,
input sfu_uses,
input sched,
input issue,
input ifetches,
input loads,
input stores,

View file

@ -175,6 +175,17 @@ public:
ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
// dump device configuration
std::cout << "CONFIGS:"
<< " num_threads=" << NUM_THREADS
<< ", num_warps=" << NUM_WARPS
<< ", num_cores=" << NUM_CORES
<< ", num_clusters=" << NUM_CLUSTERS
<< ", socket_size=" << SOCKET_SIZE
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
<< ", num_barriers=" << NUM_BARRIERS
<< std::endl;
// reset the device
this->reset();

View file

@ -145,6 +145,17 @@ public:
dram_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
// dump device configuration
std::cout << "CONFIGS:"
<< " num_threads=" << NUM_THREADS
<< ", num_warps=" << NUM_WARPS
<< ", num_cores=" << NUM_CORES
<< ", num_clusters=" << NUM_CLUSTERS
<< ", socket_size=" << SOCKET_SIZE
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
<< ", num_barriers=" << NUM_BARRIERS
<< std::endl;
// reset the device
this->reset();

View file

@ -29,11 +29,7 @@ private:
uint16_t num_cores_;
uint16_t num_clusters_;
uint16_t socket_size_;
uint16_t vsize_;
uint16_t num_regs_;
uint16_t num_csrs_;
uint16_t num_barriers_;
uint16_t ipdom_size_;
uint64_t local_mem_base_;
public:
@ -43,26 +39,10 @@ public:
, num_cores_(num_cores)
, num_clusters_(NUM_CLUSTERS)
, socket_size_(SOCKET_SIZE)
, vsize_(16)
, num_regs_(32)
, num_csrs_(4096)
, num_barriers_(NUM_BARRIERS)
, ipdom_size_((num_threads-1) * 2)
, local_mem_base_(LMEM_BASE_ADDR)
{}
uint16_t vsize() const {
return vsize_;
}
uint16_t num_regs() const {
return num_regs_;
}
uint16_t num_csrs() const {
return num_csrs_;
}
uint16_t num_barriers() const {
return num_barriers_;
}
@ -71,10 +51,6 @@ public:
return local_mem_base_;
}
uint16_t ipdom_size() const {
return ipdom_size_;
}
uint16_t num_threads() const {
return num_threads_;
}

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -31,4 +31,6 @@
#define DCACHE_WORD_SIZE LSU_LINE_SIZE
#define DCACHE_CHANNELS UP((NUM_LSU_LANES * (XLEN / 8)) / DCACHE_WORD_SIZE)
#define DCACHE_NUM_REQS (NUM_LSU_BLOCKS * DCACHE_CHANNELS)
#define DCACHE_NUM_REQS (NUM_LSU_BLOCKS * DCACHE_CHANNELS)
#define NUM_SOCKETS UP(NUM_CORES / SOCKET_SIZE)

View file

@ -42,8 +42,8 @@ Emulator::ipdom_entry_t::ipdom_entry_t(const ThreadMask &tmask)
{}
Emulator::warp_t::warp_t(const Arch& arch)
: ireg_file(arch.num_threads(), std::vector<Word>(arch.num_regs()))
, freg_file(arch.num_threads(), std::vector<uint64_t>(arch.num_regs()))
: ireg_file(arch.num_threads(), std::vector<Word>(MAX_NUM_REGS))
, freg_file(arch.num_threads(), std::vector<uint64_t>(MAX_NUM_REGS))
, uuid(0)
{}
@ -74,6 +74,7 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core)
, core_(core)
, warps_(arch.num_warps(), arch)
, barriers_(arch.num_barriers(), 0)
, ipdom_size_((arch.num_threads()-1) * 2)
{
this->clear();
}
@ -186,7 +187,7 @@ instr_trace_t* Emulator::step() {
this->execute(*instr, scheduled_warp, trace);
DP(5, "Register state:");
for (uint32_t i = 0; i < arch_.num_regs(); ++i) {
for (uint32_t i = 0; i < MAX_NUM_REGS; ++i) {
DPN(5, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
// Integer register file
for (uint32_t j = 0; j < arch_.num_threads(); ++j) {

View file

@ -119,6 +119,7 @@ private:
std::vector<WarpMask> barriers_;
std::unordered_map<int, std::stringstream> print_bufs_;
MemoryUnit mmu_;
uint32_t ipdom_size_;
Word csr_mscratch_;
wspawn_t wspawn_;
};

View file

@ -1336,7 +1336,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
bool is_divergent = then_tmask.any() && else_tmask.any();
if (is_divergent) {
if (stack_size == arch_.ipdom_size()) {
if (stack_size == ipdom_size_) {
std::cout << "IPDOM stack is full! size=" << std::dec << stack_size << ", PC=0x" << std::hex << warp.PC << " (#" << std::dec << trace->uuid << ")\n" << std::flush;
std::abort();
}

View file

@ -70,6 +70,17 @@ ProcessorImpl::ProcessorImpl(const Arch& arch)
--perf_mem_pending_reads_;
});
// dump device configuration
std::cout << "CONFIGS:"
<< " num_threads=" << arch.num_threads()
<< ", num_warps=" << arch.num_warps()
<< ", num_cores=" << arch.num_cores()
<< ", num_clusters=" << arch.num_clusters()
<< ", socket_size=" << arch.socket_size()
<< ", local_mem_base=0x" << std::hex << arch.local_mem_base() << std::dec
<< ", num_barriers=" << arch.num_barriers()
<< std::endl;
this->reset();
}

View file

@ -164,6 +164,17 @@ public:
ramulator_ = new ramulator::Gem5Wrapper(ram_config, MEM_BLOCK_SIZE);
Stats::statlist.output("ramulator.ddr4.log");
// dump device configuration
std::cout << "CONFIGS:"
<< " num_threads=" << NUM_THREADS
<< ", num_warps=" << NUM_WARPS
<< ", num_cores=" << NUM_CORES
<< ", num_clusters=" << NUM_CLUSTERS
<< ", socket_size=" << SOCKET_SIZE
<< ", local_mem_base=0x" << std::hex << LMEM_BASE_ADDR << std::dec
<< ", num_barriers=" << NUM_BARRIERS
<< std::endl;
// reset the device
this->reset();