mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 13:57:17 -04:00
bug fixes
This commit is contained in:
parent
dccf5937ff
commit
92d0092e39
25 changed files with 438 additions and 234 deletions
|
@ -80,7 +80,7 @@ def parse_simx(log_lines):
|
|||
elif line.startswith("DEBUG Dest"):
|
||||
instr_data["destination"] = re.search(destination_pattern, line).group(1)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
print("Error: {}; {}".format(e, line))
|
||||
instr_data = None
|
||||
if instr_data:
|
||||
entries.append(instr_data)
|
||||
|
@ -116,9 +116,28 @@ def append_value(text, reg, value, tmask_arr, sep):
|
|||
text += "}"
|
||||
return text, sep
|
||||
|
||||
def simd_data(sub_array, index, count, default=0):
|
||||
size = len(sub_array)
|
||||
total_subsets = count // size
|
||||
new_array = [default] * count
|
||||
start_index = index * size
|
||||
if start_index + size <= count:
|
||||
new_array[start_index:start_index + size] = sub_array
|
||||
return new_array
|
||||
|
||||
def merge_data(trace, key, new_data, mask):
|
||||
if key in trace:
|
||||
merged_data = trace[key]
|
||||
for i in range(len(mask)):
|
||||
if mask[i] == 1:
|
||||
merged_data[i] = new_data[i]
|
||||
trace[key] = merged_data
|
||||
else:
|
||||
trace[key] = new_data
|
||||
|
||||
def parse_rtlsim(log_lines):
|
||||
global configs
|
||||
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
|
||||
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue\d+|commit):"
|
||||
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
|
||||
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
|
||||
ex_pattern = r"ex=([a-zA-Z]+)"
|
||||
|
@ -126,7 +145,8 @@ def parse_rtlsim(log_lines):
|
|||
warp_id_pattern = r"wid=(\d+)"
|
||||
tmask_pattern = r"tmask=(\d+)"
|
||||
wb_pattern = r"wb=(\d)"
|
||||
opds_pattern = r"opds=(\d+)"
|
||||
used_rs_pattern = r"used_rs=(\d+)"
|
||||
sid_pattern = r"sid=(\d+)"
|
||||
rd_pattern = r"rd=(\d+)"
|
||||
rs1_pattern = r"rs1=(\d+)"
|
||||
rs2_pattern = r"rs2=(\d+)"
|
||||
|
@ -141,6 +161,7 @@ def parse_rtlsim(log_lines):
|
|||
instr_data = {}
|
||||
num_cores = configs['num_cores']
|
||||
socket_size = configs['socket_size']
|
||||
num_threads = configs['num_threads']
|
||||
num_sockets = (num_cores + socket_size - 1) // socket_size
|
||||
for lineno, line in enumerate(log_lines, start=1):
|
||||
try:
|
||||
|
@ -163,42 +184,37 @@ def parse_rtlsim(log_lines):
|
|||
trace["tmask"] = reverse_binary(tmask)
|
||||
trace["instr"] = re.search(instr_pattern, line).group(1)
|
||||
trace["opcode"] = re.search(op_pattern, line).group(1)
|
||||
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
|
||||
trace["used_rs"] = bin_to_array(reverse_binary(re.search(used_rs_pattern, line).group(1)))
|
||||
trace["rd"] = re.search(rd_pattern, line).group(1)
|
||||
trace["rs1"] = re.search(rs1_pattern, line).group(1)
|
||||
trace["rs2"] = re.search(rs2_pattern, line).group(1)
|
||||
trace["rs3"] = re.search(rs3_pattern, line).group(1)
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "issue":
|
||||
elif re.match(r"issue\d+", stage):
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
sid = int(re.search(sid_pattern, line).group(1))
|
||||
src_tmask_arr = simd_data(bin_to_array(tmask)[::-1], sid, num_threads, 0)
|
||||
trace["lineno"] = lineno
|
||||
opds = trace["opds"]
|
||||
if opds[1]:
|
||||
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[2]:
|
||||
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if opds[3]:
|
||||
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
|
||||
used_rs = trace["used_rs"]
|
||||
if used_rs[0]:
|
||||
merge_data(trace, 'rs1_data', simd_data(re.search(rs1_data_pattern, line).group(1).split(', ')[::-1], sid, num_threads, '0x0'), src_tmask_arr)
|
||||
if used_rs[1]:
|
||||
merge_data(trace, 'rs2_data', simd_data(re.search(rs2_data_pattern, line).group(1).split(', ')[::-1], sid, num_threads, '0x0'), src_tmask_arr)
|
||||
if used_rs[2]:
|
||||
merge_data(trace, 'rs3_data', simd_data(re.search(rs3_data_pattern, line).group(1).split(', ')[::-1], sid, num_threads, '0x0'), src_tmask_arr)
|
||||
trace["issued"] = True
|
||||
instr_data[uuid] = trace
|
||||
elif stage == "commit":
|
||||
if uuid in instr_data:
|
||||
trace = instr_data[uuid]
|
||||
if "issued" in trace:
|
||||
opds = trace["opds"]
|
||||
dst_tmask_arr = bin_to_array(tmask)[::-1]
|
||||
sid = int(re.search(sid_pattern, line).group(1))
|
||||
used_rs = trace["used_rs"]
|
||||
dst_tmask_arr = simd_data(bin_to_array(tmask)[::-1], sid, num_threads, 0)
|
||||
wb = re.search(wb_pattern, line).group(1) == "1"
|
||||
if wb:
|
||||
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
|
||||
if 'rd_data' in trace:
|
||||
merged_rd_data = trace['rd_data']
|
||||
for i in range(len(dst_tmask_arr)):
|
||||
if dst_tmask_arr[i] == 1:
|
||||
merged_rd_data[i] = rd_data[i]
|
||||
trace['rd_data'] = merged_rd_data
|
||||
else:
|
||||
trace['rd_data'] = rd_data
|
||||
merge_data(trace, 'rd_data', simd_data(re.search(rd_data_pattern, line).group(1).split(', ')[::-1], sid, num_threads, '0x0'), dst_tmask_arr)
|
||||
instr_data[uuid] = trace
|
||||
eop = re.search(eop_pattern, line).group(1) == "1"
|
||||
if eop:
|
||||
|
@ -210,17 +226,17 @@ def parse_rtlsim(log_lines):
|
|||
trace["destination"] = destination
|
||||
operands = ''
|
||||
sep = False
|
||||
if opds[1]:
|
||||
if used_rs[0]:
|
||||
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
|
||||
del trace["rs1_data"]
|
||||
if opds[2]:
|
||||
if used_rs[1]:
|
||||
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
|
||||
del trace["rs2_data"]
|
||||
if opds[3]:
|
||||
if used_rs[2]:
|
||||
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
|
||||
del trace["rs3_data"]
|
||||
trace["operands"] = operands
|
||||
del trace["opds"]
|
||||
del trace["used_rs"]
|
||||
del trace["rd"]
|
||||
del trace["rs1"]
|
||||
del trace["rs2"]
|
||||
|
@ -229,7 +245,7 @@ def parse_rtlsim(log_lines):
|
|||
del instr_data[uuid]
|
||||
entries.append(trace)
|
||||
except Exception as e:
|
||||
print("Error at line {}: {}".format(lineno, e))
|
||||
print("Error: {}; {}".format(e, line))
|
||||
return entries
|
||||
|
||||
def write_csv(sublogs, csv_filename, log_type):
|
||||
|
|
14
hw/rtl/cache/VX_cache_mshr.sv
vendored
14
hw/rtl/cache/VX_cache_mshr.sv
vendored
|
@ -122,13 +122,13 @@ module VX_cache_mshr import VX_gpu_pkg::*; #(
|
|||
assign addr_matches[i] = valid_table[i] && (addr_table[i] == allocate_addr);
|
||||
end
|
||||
|
||||
VX_lzc #(
|
||||
.N (MSHR_SIZE),
|
||||
.REVERSE (1)
|
||||
VX_priority_encoder #(
|
||||
.N (MSHR_SIZE)
|
||||
) allocate_sel (
|
||||
.data_in (~valid_table_n),
|
||||
.data_out (allocate_id_n),
|
||||
.valid_out (allocate_rdy_n)
|
||||
.index_out (allocate_id_n),
|
||||
.valid_out (allocate_rdy_n),
|
||||
`UNUSED_PIN (onehot_out)
|
||||
);
|
||||
|
||||
// find matching tail-entry
|
||||
|
@ -137,8 +137,8 @@ module VX_cache_mshr import VX_gpu_pkg::*; #(
|
|||
) prev_sel (
|
||||
.data_in (addr_matches & ~next_table_x),
|
||||
.index_out (prev_idx),
|
||||
`UNUSED_PIN (onehot_out),
|
||||
`UNUSED_PIN (valid_out)
|
||||
`UNUSED_PIN (valid_out),
|
||||
`UNUSED_PIN (onehot_out)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
|
|
|
@ -136,15 +136,23 @@ module VX_alu_int import VX_gpu_pkg::*; #(
|
|||
wire [PC_BITS-1:0] PC_r;
|
||||
wire [INST_BR_BITS-1:0] br_op_r;
|
||||
wire [PC_BITS-1:0] cbr_dest, cbr_dest_r;
|
||||
wire [LANE_WIDTH-1:0] tid, tid_r;
|
||||
wire [LANE_WIDTH-1:0] last_tid, last_tid_r;
|
||||
wire is_br_op_r;
|
||||
|
||||
assign cbr_dest = add_result[0][1 +: PC_BITS];
|
||||
|
||||
if (LANE_BITS != 0) begin : g_tid
|
||||
assign tid = execute_if.data.tid[0 +: LANE_BITS];
|
||||
if (LANE_BITS != 0) begin : g_last_tid
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_LANES),
|
||||
.REVERSE (1)
|
||||
) last_tid_sel (
|
||||
.data_in (execute_if.data.tmask),
|
||||
.index_out (last_tid),
|
||||
`UNUSED_PIN (onehot_out),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
end else begin : g_tid_0
|
||||
assign tid = 0;
|
||||
assign last_tid = 0;
|
||||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
|
@ -154,8 +162,8 @@ module VX_alu_int import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.valid_in (execute_if.valid),
|
||||
.ready_in (execute_if.ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, cbr_dest, is_br_op, br_op, tid}),
|
||||
.data_out ({result_if.data.uuid, result_if.data.wid, result_if.data.tmask, result_if.data.rd, result_if.data.wb, result_if.data.pid, result_if.data.sop, result_if.data.eop, alu_result_r, PC_r, cbr_dest_r, is_br_op_r, br_op_r, tid_r}),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, cbr_dest, is_br_op, br_op, last_tid}),
|
||||
.data_out ({result_if.data.uuid, result_if.data.wid, result_if.data.tmask, result_if.data.rd, result_if.data.wb, result_if.data.pid, result_if.data.sop, result_if.data.eop, alu_result_r, PC_r, cbr_dest_r, is_br_op_r, br_op_r, last_tid_r}),
|
||||
.valid_out (result_if.valid),
|
||||
.ready_out (result_if.ready)
|
||||
);
|
||||
|
@ -165,7 +173,7 @@ module VX_alu_int import VX_gpu_pkg::*; #(
|
|||
wire is_br_less = inst_br_is_less(br_op_r);
|
||||
wire is_br_static = inst_br_is_static(br_op_r);
|
||||
|
||||
wire [`XLEN-1:0] br_result = alu_result_r[tid_r];
|
||||
wire [`XLEN-1:0] br_result = alu_result_r[last_tid_r];
|
||||
wire is_less = br_result[0];
|
||||
wire is_equal = br_result[1];
|
||||
|
||||
|
|
|
@ -573,8 +573,8 @@ module VX_decode import VX_gpu_pkg::*; #(
|
|||
trace_ex_type(1, decode_if.data.ex_type);
|
||||
`TRACE(1, (", op="))
|
||||
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, opds=%b%b%b%b",
|
||||
decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3))
|
||||
`TRACE(1, (", tmask=%b, wb=%b, used_rs=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d",
|
||||
decode_if.data.tmask, decode_if.data.wb, decode_if.data.used_rs, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3))
|
||||
trace_op_args(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", decode_if.data.uuid))
|
||||
end
|
||||
|
|
|
@ -32,25 +32,7 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (ISSUE_ID)
|
||||
|
||||
localparam DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + NT_WIDTH;
|
||||
|
||||
wire [`SIMD_WIDTH-1:0][NT_WIDTH-1:0] tids;
|
||||
for (genvar i = 0; i < `SIMD_WIDTH; ++i) begin : g_tids
|
||||
assign tids[i] = NT_WIDTH'(i);
|
||||
end
|
||||
|
||||
wire [NT_WIDTH-1:0] last_active_tid;
|
||||
|
||||
VX_find_first #(
|
||||
.N (`SIMD_WIDTH),
|
||||
.DATAW (NT_WIDTH),
|
||||
.REVERSE (1)
|
||||
) last_tid_select (
|
||||
.valid_in (operands_if.data.tmask),
|
||||
.data_in (tids),
|
||||
.data_out (last_active_tid),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
localparam DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + 1 + 1;
|
||||
|
||||
wire [NUM_EX_UNITS-1:0] operands_ready_in;
|
||||
assign operands_if.ready = operands_ready_in[operands_if.data.ex_type];
|
||||
|
@ -75,10 +57,11 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
operands_if.data.op_args,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.rd,
|
||||
last_active_tid,
|
||||
operands_if.data.rs1_data,
|
||||
operands_if.data.rs2_data,
|
||||
operands_if.data.rs3_data
|
||||
operands_if.data.rs3_data,
|
||||
operands_if.data.sop,
|
||||
operands_if.data.eop
|
||||
}),
|
||||
.data_out (dispatch_if[i].data),
|
||||
.valid_out (dispatch_if[i].valid),
|
||||
|
|
|
@ -39,12 +39,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
localparam BATCH_COUNT = `ISSUE_WIDTH / BLOCK_SIZE;
|
||||
localparam BATCH_COUNT_W= `LOG2UP(BATCH_COUNT);
|
||||
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam IN_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + INST_OP_BITS + INST_ARGS_BITS + 1 + PC_BITS + NR_BITS + NT_WIDTH + (3 * `SIMD_WIDTH * `XLEN);
|
||||
localparam OUT_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + INST_OP_BITS + INST_ARGS_BITS + 1 + PC_BITS + NR_BITS + NT_WIDTH + (3 * NUM_LANES * `XLEN) + GPID_WIDTH + 1 + 1;
|
||||
localparam IN_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + INST_OP_BITS + INST_ARGS_BITS + 1 + PC_BITS + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + 1 + 1;
|
||||
localparam OUT_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + INST_OP_BITS + INST_ARGS_BITS + 1 + PC_BITS + NR_BITS + (NUM_SRC_OPDS * NUM_LANES * `XLEN) + GPID_WIDTH + 1 + 1;
|
||||
localparam FANOUT_ENABLE= (`SIMD_WIDTH > (MAX_FANOUT + MAX_FANOUT /2));
|
||||
|
||||
localparam DATA_TMASK_OFF = IN_DATAW - (UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH);
|
||||
localparam DATA_REGS_OFF = 0;
|
||||
localparam DATA_REGS_OFF = 1 + 1;
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] dispatch_valid;
|
||||
wire [`ISSUE_WIDTH-1:0][IN_DATAW-1:0] dispatch_data;
|
||||
|
@ -112,6 +112,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
wire [ISSUE_W-1:0] issue_idx = issue_indices[block_idx];
|
||||
wire [ISSUE_WIS_W-1:0] dispatch_wis = dispatch_data[issue_idx][DATA_TMASK_OFF + `SIMD_WIDTH + SIMD_IDX_W +: ISSUE_WIS_W];
|
||||
wire [SIMD_IDX_W-1:0] dispatch_sid = dispatch_data[issue_idx][DATA_TMASK_OFF + `SIMD_WIDTH +: SIMD_IDX_W];
|
||||
wire dispatch_sop = dispatch_data[issue_idx][1];
|
||||
wire dispatch_eop = dispatch_data[issue_idx][0];
|
||||
|
||||
wire [`SIMD_WIDTH-1:0] dispatch_tmask = dispatch_data[issue_idx][DATA_TMASK_OFF +: `SIMD_WIDTH];
|
||||
wire [`SIMD_WIDTH-1:0][`XLEN-1:0] dispatch_rs1_data = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `SIMD_WIDTH * `XLEN +: `SIMD_WIDTH * `XLEN];
|
||||
|
@ -245,8 +247,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_wis, isw);
|
||||
wire [GPID_WIDTH-1:0] warp_pid = GPID_WIDTH'(block_pid[block_idx]) + GPID_WIDTH'(dispatch_sid * NUM_PACKETS);
|
||||
wire warp_sop = block_sop[block_idx] && (dispatch_sid == 0);
|
||||
wire warp_eop = block_eop[block_idx] && (dispatch_sid == SIMD_IDX_W'(SIMD_COUNT-1));
|
||||
wire warp_sop = block_sop[block_idx] && dispatch_sop;
|
||||
wire warp_eop = block_eop[block_idx] && dispatch_eop;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (OUT_DATAW),
|
||||
|
@ -261,7 +263,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
dispatch_data[issue_idx][IN_DATAW-1 -: UUID_WIDTH],
|
||||
block_wid,
|
||||
block_tmask[block_idx],
|
||||
dispatch_data[issue_idx][DATA_TMASK_OFF-1 : DATA_REGS_OFF + 3 * `SIMD_WIDTH * `XLEN],
|
||||
dispatch_data[issue_idx][DATA_TMASK_OFF-1 : (DATA_REGS_OFF + NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN)],
|
||||
block_regs[block_idx][0],
|
||||
block_regs[block_idx][1],
|
||||
block_regs[block_idx][2],
|
||||
|
|
|
@ -55,7 +55,6 @@ module VX_fpu_unit import VX_gpu_pkg::*, VX_fpu_pkg::*; #(
|
|||
) per_block_result_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_fpus
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.tid)
|
||||
`UNUSED_VAR (per_block_execute_if[block_idx].data.wb)
|
||||
|
||||
// Store request info
|
||||
|
|
|
@ -175,7 +175,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
`TRACE(1, (", rs3_data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `SIMD_WIDTH)
|
||||
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
|
||||
`TRACE(1, (", sop=%b, eop=%b (#%0d)\n", operands_if.data.sop, operands_if.data.eop, operands_if.data.uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -52,7 +52,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
) result_no_rsp_if();
|
||||
|
||||
`UNUSED_VAR (execute_if.data.rs3_data)
|
||||
`UNUSED_VAR (execute_if.data.tid)
|
||||
|
||||
// full address calculation
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
|
||||
localparam SCB_DATAW = UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (NUM_OPDS * REG_IDX_BITS);
|
||||
localparam OUT_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN);
|
||||
localparam OUT_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + 1 + 1;
|
||||
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_FETCH = 1;
|
||||
|
@ -50,9 +50,13 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
reg [NUM_SRC_OPDS-1:0] opds_needed, opds_needed_n;
|
||||
reg [NUM_SRC_OPDS-1:0] opds_busy, opds_busy_n;
|
||||
reg [2:0] state, state_n;
|
||||
reg [SIMD_IDX_W-1:0] simd_index, simd_index_n;
|
||||
|
||||
wire scboard_fire = scoreboard_if.valid && scoreboard_if.ready;
|
||||
wire [`SIMD_WIDTH-1:0] simd_out;
|
||||
wire [SIMD_IDX_W-1:0] simd_pid;
|
||||
wire simd_sop;
|
||||
wire simd_eop;
|
||||
|
||||
wire staging_fire = staging_if.valid && staging_if.ready;
|
||||
wire gpr_req_fire = gpr_if.req_valid && gpr_if.req_ready;
|
||||
wire gpr_rsp_fire = gpr_if.rsp_valid;
|
||||
|
||||
|
@ -71,9 +75,8 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
wire output_ready;
|
||||
wire dispatched = (state == STATE_DISPATCH) && output_ready;
|
||||
wire is_last_simd = (simd_index == SIMD_IDX_W'(SIMD_COUNT-1));
|
||||
|
||||
assign staging_if.ready = dispatched && is_last_simd;
|
||||
assign staging_if.ready = dispatched && simd_eop;
|
||||
|
||||
wire [NR_BITS-1:0] rs1 = to_reg_number(staging_if.data.rs1);
|
||||
wire [NR_BITS-1:0] rs2 = to_reg_number(staging_if.data.rs2);
|
||||
|
@ -86,12 +89,11 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
state_n = state;
|
||||
opds_needed_n = opds_needed;
|
||||
opds_busy_n = opds_busy;
|
||||
simd_index_n = simd_index;
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (scboard_fire) begin
|
||||
opds_needed_n = scoreboard_if.data.used_rs;
|
||||
opds_busy_n = scoreboard_if.data.used_rs;
|
||||
if (staging_if.valid) begin
|
||||
opds_needed_n = staging_if.data.used_rs;
|
||||
opds_busy_n = staging_if.data.used_rs;
|
||||
if (opds_busy_n == 0) begin
|
||||
state_n = STATE_DISPATCH;
|
||||
end else begin
|
||||
|
@ -112,12 +114,11 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
STATE_DISPATCH: begin
|
||||
if (output_ready) begin
|
||||
if (is_last_simd) begin
|
||||
if (simd_eop) begin
|
||||
state_n = STATE_IDLE;
|
||||
end else begin
|
||||
opds_needed_n = staging_if.data.used_rs;
|
||||
opds_busy_n = staging_if.data.used_rs;
|
||||
simd_index_n = simd_index + 1;
|
||||
state_n = STATE_FETCH;
|
||||
end
|
||||
end
|
||||
|
@ -130,12 +131,10 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
state <= STATE_IDLE;
|
||||
opds_needed <= '0;
|
||||
opds_busy <= '0;
|
||||
simd_index <= 0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
opds_needed <= opds_needed_n;
|
||||
opds_busy <= opds_busy_n;
|
||||
simd_index <= simd_index_n;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -145,16 +144,16 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
VX_priority_encoder #(
|
||||
.N (NUM_SRC_OPDS)
|
||||
) opd_id_sel (
|
||||
.data_in (opds_needed),
|
||||
.data_in (opds_needed),
|
||||
.index_out (opd_id),
|
||||
`UNUSED_PIN (onehot_out),
|
||||
.valid_out (opd_fetch_valid)
|
||||
.valid_out (opd_fetch_valid),
|
||||
`UNUSED_PIN (onehot_out)
|
||||
);
|
||||
|
||||
// operands fetch request
|
||||
assign gpr_if.req_valid = opd_fetch_valid;
|
||||
assign gpr_if.req_data.opd_id = opd_id;
|
||||
assign gpr_if.req_data.sid = simd_index;
|
||||
assign gpr_if.req_data.sid = simd_pid;
|
||||
assign gpr_if.req_data.wis = staging_if.data.wis;
|
||||
assign gpr_if.req_data.reg_id = src_regs[opd_id];
|
||||
|
||||
|
@ -173,7 +172,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
// output scheduler info
|
||||
assign pending_sid = simd_index;
|
||||
assign pending_sid = simd_pid;
|
||||
assign pending_wis = staging_if.data.wis;
|
||||
always @(*) begin
|
||||
pending_regs = '0;
|
||||
|
@ -184,6 +183,23 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
VX_nz_iterator #(
|
||||
.DATAW (`SIMD_WIDTH),
|
||||
.N (SIMD_COUNT),
|
||||
.OUT_REG (1)
|
||||
) valid_iter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in(staging_if.valid),
|
||||
.data_in (staging_if.data.tmask),
|
||||
.next (staging_fire),
|
||||
`UNUSED_PIN (valid_out),
|
||||
.data_out(simd_out),
|
||||
.pid (simd_pid),
|
||||
.sop (simd_sop),
|
||||
.eop (simd_eop)
|
||||
);
|
||||
|
||||
// instruction dispatch
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (OUT_DATAW),
|
||||
|
@ -196,8 +212,8 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
.data_in ({
|
||||
staging_if.data.uuid,
|
||||
staging_if.data.wis,
|
||||
simd_index,
|
||||
staging_if.data.tmask[simd_index * `SIMD_WIDTH +: `SIMD_WIDTH],
|
||||
simd_pid,
|
||||
simd_out,
|
||||
staging_if.data.PC,
|
||||
staging_if.data.ex_type,
|
||||
staging_if.data.op_type,
|
||||
|
@ -206,7 +222,9 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
to_reg_number(staging_if.data.rd),
|
||||
opd_values[0],
|
||||
opd_values[1],
|
||||
opd_values[2]
|
||||
opd_values[2],
|
||||
simd_sop,
|
||||
simd_eop
|
||||
}),
|
||||
.ready_in (output_ready),
|
||||
.valid_out(operands_if.valid),
|
||||
|
@ -217,7 +235,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (scoreboard_if.valid && scoreboard_if.ready) begin
|
||||
`TRACE(1, ("%t: %s-input: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(scoreboard_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}))
|
||||
`TRACE(1, ("%t: %s-input: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(scoreboard_if.data.wis, ISSUE_ID), {scoreboard_if.data.PC, 1'b0}))
|
||||
trace_ex_type(1, scoreboard_if.data.ex_type);
|
||||
`TRACE(1, (", op="))
|
||||
trace_ex_op(1, scoreboard_if.data.ex_type, scoreboard_if.data.op_type, scoreboard_if.data.op_args);
|
||||
|
@ -243,7 +261,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
`TRACE(1, (", rs3_data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `SIMD_WIDTH)
|
||||
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
|
||||
`TRACE(1, (", sop=%b, eop=%b (#%0d)\n", operands_if.data.sop, operands_if.data.eop, operands_if.data.uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -37,7 +37,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
);
|
||||
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
|
||||
localparam SCB_DATAW = UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (REG_IDX_BITS * NUM_OPDS);
|
||||
localparam OPD_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN);
|
||||
localparam OPD_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + 1 + 1;
|
||||
|
||||
VX_gpr_if per_opc_gpr_if[`NUM_OPCS]();
|
||||
VX_scoreboard_if per_opc_scoreboard_if[`NUM_OPCS]();
|
||||
|
@ -52,7 +52,8 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (1),
|
||||
.NUM_OUTPUTS (`NUM_OPCS),
|
||||
.DATAW (SCB_DATAW)
|
||||
.DATAW (SCB_DATAW),
|
||||
.OUT_BUF (0)
|
||||
) scboard_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -124,7 +125,8 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
VX_stream_arb #(
|
||||
.NUM_INPUTS (`NUM_OPCS),
|
||||
.NUM_OUTPUTS (1),
|
||||
.DATAW (OPD_DATAW)
|
||||
.DATAW (OPD_DATAW),
|
||||
.OUT_BUF (3)
|
||||
) operands_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -31,7 +31,7 @@ module VX_pe_switch import VX_gpu_pkg::*; #(
|
|||
);
|
||||
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam REQ_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + INST_ALU_BITS + $bits(op_args_t) + 1 + NR_BITS + NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam REQ_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + INST_ALU_BITS + $bits(op_args_t) + 1 + NR_BITS + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
|
||||
wire [PE_COUNT-1:0] pe_req_valid;
|
||||
|
|
|
@ -306,13 +306,13 @@ module VX_schedule import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~stalled_warps;
|
||||
|
||||
VX_lzc #(
|
||||
.N (`NUM_WARPS),
|
||||
.REVERSE (1)
|
||||
VX_priority_encoder #(
|
||||
.N (`NUM_WARPS)
|
||||
) wid_select (
|
||||
.data_in (ready_warps),
|
||||
.data_out (schedule_wid),
|
||||
.valid_out (schedule_valid)
|
||||
.index_out (schedule_wid),
|
||||
.valid_out (schedule_valid),
|
||||
`UNUSED_PIN (onehot_out)
|
||||
);
|
||||
|
||||
wire [`NUM_WARPS-1:0][(`NUM_THREADS + PC_BITS)-1:0] schedule_data;
|
||||
|
|
|
@ -49,15 +49,23 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
|
|||
wire is_join = (execute_if.data.op_type == INST_SFU_JOIN);
|
||||
wire is_bar = (execute_if.data.op_type == INST_SFU_BAR);
|
||||
|
||||
wire [`UP(LANE_BITS)-1:0] tid;
|
||||
if (LANE_BITS != 0) begin : g_tid
|
||||
assign tid = execute_if.data.tid[0 +: LANE_BITS];
|
||||
wire [`UP(LANE_BITS)-1:0] last_tid;
|
||||
if (LANE_BITS != 0) begin : g_last_tid
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_LANES),
|
||||
.REVERSE (1)
|
||||
) last_tid_select (
|
||||
.data_in (execute_if.data.tmask),
|
||||
.index_out (last_tid),
|
||||
`UNUSED_PIN (onehot_out),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
end else begin : g_no_tid
|
||||
assign tid = 0;
|
||||
assign last_tid = 0;
|
||||
end
|
||||
|
||||
wire [`XLEN-1:0] rs1_data = execute_if.data.rs1_data[tid];
|
||||
wire [`XLEN-1:0] rs2_data = execute_if.data.rs2_data[tid];
|
||||
wire [`XLEN-1:0] rs1_data = execute_if.data.rs1_data[last_tid];
|
||||
wire [`XLEN-1:0] rs2_data = execute_if.data.rs2_data[last_tid];
|
||||
`UNUSED_VAR (rs1_data)
|
||||
|
||||
wire not_pred = execute_if.data.op_args.wctl.is_neg;
|
||||
|
|
|
@ -25,10 +25,11 @@ interface VX_dispatch_if import VX_gpu_pkg::*; ();
|
|||
op_args_t op_args;
|
||||
logic wb;
|
||||
logic [NR_BITS-1:0] rd;
|
||||
logic [NT_WIDTH-1:0] tid;
|
||||
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs1_data;
|
||||
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs2_data;
|
||||
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs3_data;
|
||||
logic sop;
|
||||
logic eop;
|
||||
} data_t;
|
||||
|
||||
logic valid;
|
||||
|
|
|
@ -26,7 +26,6 @@ interface VX_execute_if import VX_gpu_pkg::*; #(
|
|||
op_args_t op_args;
|
||||
logic wb;
|
||||
logic [NR_BITS-1:0] rd;
|
||||
logic [NT_WIDTH-1:0] tid;
|
||||
logic [NUM_LANES-1:0][`XLEN-1:0] rs1_data;
|
||||
logic [NUM_LANES-1:0][`XLEN-1:0] rs2_data;
|
||||
logic [NUM_LANES-1:0][`XLEN-1:0] rs3_data;
|
||||
|
|
|
@ -29,6 +29,8 @@ interface VX_operands_if import VX_gpu_pkg::*; ();
|
|||
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs1_data;
|
||||
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs2_data;
|
||||
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs3_data;
|
||||
logic sop;
|
||||
logic eop;
|
||||
} data_t;
|
||||
|
||||
logic valid;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -21,67 +21,67 @@ module VX_allocator #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire acquire_en,
|
||||
output wire [ADDRW-1:0] acquire_addr,
|
||||
|
||||
input wire acquire_en,
|
||||
output wire [ADDRW-1:0] acquire_addr,
|
||||
|
||||
input wire release_en,
|
||||
input wire [ADDRW-1:0] release_addr,
|
||||
|
||||
input wire [ADDRW-1:0] release_addr,
|
||||
|
||||
output wire empty,
|
||||
output wire full
|
||||
output wire full
|
||||
);
|
||||
reg [SIZE-1:0] free_slots, free_slots_n;
|
||||
reg [ADDRW-1:0] acquire_addr_r;
|
||||
reg empty_r, full_r;
|
||||
reg empty_r, full_r;
|
||||
wire [ADDRW-1:0] free_index;
|
||||
wire free_valid;
|
||||
|
||||
always @(*) begin
|
||||
free_slots_n = free_slots;
|
||||
if (release_en) begin
|
||||
free_slots_n[release_addr] = 1;
|
||||
free_slots_n[release_addr] = 1;
|
||||
end
|
||||
if (acquire_en) begin
|
||||
free_slots_n[acquire_addr_r] = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_lzc #(
|
||||
.N (SIZE),
|
||||
.REVERSE (1)
|
||||
VX_priority_encoder #(
|
||||
.N (SIZE)
|
||||
) free_slots_sel (
|
||||
.data_in (free_slots_n),
|
||||
.data_out (free_index),
|
||||
.valid_out (free_valid)
|
||||
);
|
||||
.index_out (free_index),
|
||||
.valid_out (free_valid),
|
||||
`UNUSED_PIN (onehot_out)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
acquire_addr_r <= ADDRW'(1'b0);
|
||||
free_slots <= {SIZE{1'b1}};
|
||||
empty_r <= 1'b1;
|
||||
full_r <= 1'b0;
|
||||
full_r <= 1'b0;
|
||||
end else begin
|
||||
if (release_en) begin
|
||||
`ASSERT(0 == free_slots[release_addr], ("%t: releasing invalid addr %d", $time, release_addr));
|
||||
end
|
||||
if (acquire_en) begin
|
||||
if (acquire_en) begin
|
||||
`ASSERT(~full_r, ("%t: allocator is full", $time));
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if (acquire_en || (release_en && full_r)) begin
|
||||
acquire_addr_r <= free_index;
|
||||
end
|
||||
|
||||
free_slots <= free_slots_n;
|
||||
free_slots <= free_slots_n;
|
||||
empty_r <= (& free_slots_n);
|
||||
full_r <= ~free_valid;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign acquire_addr = acquire_addr_r;
|
||||
assign empty = empty_r;
|
||||
assign full = full_r;
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -58,7 +58,7 @@ module VX_cyclic_arbiter #(
|
|||
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) priority_encoder (
|
||||
) grant_sel (
|
||||
.data_in (requests),
|
||||
.onehot_out (grant_onehot_um),
|
||||
.index_out (grant_index_um),
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
module VX_find_first #(
|
||||
parameter N = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter REVERSE = 0
|
||||
parameter REVERSE = 0 // 0 -> first valid, 1 -> last valid
|
||||
) (
|
||||
input wire [N-1:0][DATAW-1:0] data_in,
|
||||
input wire [N-1:0] valid_in,
|
||||
|
@ -33,12 +33,12 @@ module VX_find_first #(
|
|||
wire [DATAW-1:0] d_n [TN];
|
||||
`IGNORE_UNOPTFLAT_END
|
||||
|
||||
for (genvar i = 0; i < N; ++i) begin : g_reverse
|
||||
for (genvar i = 0; i < N; ++i) begin : g_fill
|
||||
assign s_n[TL+i] = REVERSE ? valid_in[N-1-i] : valid_in[i];
|
||||
assign d_n[TL+i] = REVERSE ? data_in[N-1-i] : data_in[i];
|
||||
end
|
||||
|
||||
if (TL < (TN-N)) begin : g_fill
|
||||
if (TL < (TN-N)) begin : g_padding
|
||||
for (genvar i = TL+N; i < TN; ++i) begin : g_i
|
||||
assign s_n[i] = 0;
|
||||
assign d_n[i] = '0;
|
||||
|
@ -49,7 +49,7 @@ module VX_find_first #(
|
|||
localparam I = 1 << j;
|
||||
for (genvar i = 0; i < I; ++i) begin : g_i
|
||||
localparam K = I+i-1;
|
||||
assign s_n[K] = s_n[2*K+1] | s_n[2*K+2];
|
||||
assign s_n[K] = s_n[2*K+2] | s_n[2*K+1];
|
||||
assign d_n[K] = s_n[2*K+1] ? d_n[2*K+1] : d_n[2*K+2];
|
||||
end
|
||||
end
|
||||
|
|
|
@ -33,7 +33,6 @@ module VX_lzc #(
|
|||
end else begin : g_lzc
|
||||
|
||||
wire [N-1:0][LOGN-1:0] indices;
|
||||
|
||||
for (genvar i = 0; i < N; ++i) begin : g_indices
|
||||
assign indices[i] = REVERSE ? LOGN'(i) : LOGN'(N-1-i);
|
||||
end
|
||||
|
@ -43,8 +42,8 @@ module VX_lzc #(
|
|||
.DATAW (LOGN),
|
||||
.REVERSE (!REVERSE)
|
||||
) find_first (
|
||||
.data_in (indices),
|
||||
.valid_in (data_in),
|
||||
.data_in (indices),
|
||||
.data_out (data_out),
|
||||
.valid_out (valid_out)
|
||||
);
|
||||
|
|
|
@ -131,11 +131,11 @@ module VX_mem_coalescer #(
|
|||
|
||||
VX_priority_encoder #(
|
||||
.N (DATA_RATIO)
|
||||
) priority_encoder (
|
||||
) batch_sel (
|
||||
.data_in (batch_mask),
|
||||
.index_out (batch_idx),
|
||||
`UNUSED_PIN (onehot_out),
|
||||
.valid_out (batch_valid_n[i])
|
||||
.valid_out (batch_valid_n[i]),
|
||||
`UNUSED_PIN (onehot_out)
|
||||
);
|
||||
|
||||
wire [DATA_RATIO-1:0][OUT_ADDR_WIDTH-1:0] addr_base;
|
||||
|
|
114
hw/rtl/libs/VX_nz_iterator.sv
Normal file
114
hw/rtl/libs/VX_nz_iterator.sv
Normal file
|
@ -0,0 +1,114 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_nz_iterator #(
|
||||
parameter DATAW = 8, // Bit-width of each data element
|
||||
parameter N = 4, // Number of elements in the stream
|
||||
parameter OUT_REG = 0, // Output register
|
||||
parameter LPID_WIDTH = `LOG2UP(N)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_in, // Stream input valid
|
||||
input wire [N-1:0][DATAW-1:0] data_in, // Stream input data
|
||||
input wire next, // Advances iterator
|
||||
output wire valid_out, // Current output valid
|
||||
output reg [DATAW-1:0] data_out, // Current output data
|
||||
output reg [LPID_WIDTH-1:0] pid, // Index of the current element
|
||||
output reg sop, // Start of valid stream
|
||||
output reg eop // End of valid stream
|
||||
);
|
||||
if (N > 1) begin : g_iterator
|
||||
|
||||
reg [N-1:0] sent_mask_p;
|
||||
wire [LPID_WIDTH-1:0] start_p_n, start_p, end_p;
|
||||
wire valid_in_r;
|
||||
|
||||
wire [N-1:0] packet_valids;
|
||||
for (genvar i = 0; i < N; ++i) begin : g_packet_valids
|
||||
assign packet_valids[i] = (| data_in[i]);
|
||||
end
|
||||
|
||||
wire [N-1:0][LPID_WIDTH-1:0] packet_ids;
|
||||
for (genvar i = 0; i < N; ++i) begin : g_packet_ids
|
||||
assign packet_ids[i] = LPID_WIDTH'(i);
|
||||
end
|
||||
|
||||
VX_find_first #(
|
||||
.N (N),
|
||||
.DATAW (LPID_WIDTH),
|
||||
.REVERSE (0)
|
||||
) find_first (
|
||||
.valid_in (packet_valids & ~sent_mask_p),
|
||||
.data_in (packet_ids),
|
||||
.data_out (start_p_n),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
VX_find_first #(
|
||||
.N (N),
|
||||
.DATAW (LPID_WIDTH),
|
||||
.REVERSE (1)
|
||||
) find_last (
|
||||
.valid_in (packet_valids),
|
||||
.data_in (packet_ids),
|
||||
.data_out (end_p),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + LPID_WIDTH),
|
||||
.RESETW (1),
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset || next), // should flush on fire
|
||||
.enable (1'b1),
|
||||
.data_in ({valid_in, start_p_n}),
|
||||
.data_out ({valid_in_r, start_p})
|
||||
);
|
||||
|
||||
reg is_first_p;
|
||||
wire is_last_p = (start_p == end_p);
|
||||
wire fire_eop = next && is_last_p;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || fire_eop) begin
|
||||
sent_mask_p <= '0;
|
||||
is_first_p <= 1;
|
||||
end else if (next) begin
|
||||
sent_mask_p[start_p] <= 1;
|
||||
is_first_p <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
assign valid_out = valid_in_r;
|
||||
assign data_out = data_in[start_p];
|
||||
assign pid = start_p;
|
||||
assign sop = is_first_p;
|
||||
assign eop = is_last_p;
|
||||
|
||||
end else begin : g_passthru
|
||||
|
||||
assign data_out = data_in[0];
|
||||
assign pid = 0;
|
||||
assign sop = 1;
|
||||
assign eop = 1;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -33,7 +33,7 @@ module VX_priority_arbiter #(
|
|||
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) priority_encoder (
|
||||
) grant_sel (
|
||||
.data_in (requests),
|
||||
.index_out (grant_index),
|
||||
.onehot_out (grant_onehot),
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
`TRACING_OFF
|
||||
module VX_priority_encoder #(
|
||||
parameter N = 1,
|
||||
parameter REVERSE = 0,
|
||||
parameter REVERSE = 0, // 0 -> LSB, 1 -> MSB
|
||||
parameter MODEL = 1,
|
||||
parameter LN = `LOG2UP(N)
|
||||
) (
|
||||
|
@ -25,105 +25,159 @@ module VX_priority_encoder #(
|
|||
output wire [LN-1:0] index_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire [N-1:0] reversed;
|
||||
if (REVERSE) begin : g_msb
|
||||
|
||||
if (REVERSE != 0) begin : g_reverse
|
||||
for (genvar i = 0; i < N; ++i) begin : g_i
|
||||
assign reversed[N-i-1] = data_in[i];
|
||||
end
|
||||
end else begin : g_no_reverse
|
||||
assign reversed = data_in;
|
||||
end
|
||||
if (N == 1) begin : g_n1
|
||||
|
||||
if (N == 1) begin : g_n1
|
||||
assign onehot_out = data_in;
|
||||
assign index_out = '0;
|
||||
assign valid_out = data_in;
|
||||
|
||||
assign onehot_out = reversed;
|
||||
assign index_out = '0;
|
||||
assign valid_out = reversed;
|
||||
end else if (N == 2) begin : g_n2
|
||||
|
||||
end else if (N == 2) begin : g_n2
|
||||
assign onehot_out = {data_in[1], data_in[0] & ~data_in[1]};
|
||||
assign index_out = data_in[1];
|
||||
assign valid_out = (| data_in);
|
||||
|
||||
assign onehot_out = {reversed[1] && ~reversed[0], reversed[0]};
|
||||
assign index_out = ~reversed[0];
|
||||
assign valid_out = (| reversed);
|
||||
end else if (MODEL != 0) begin : g_model1
|
||||
|
||||
end else if (MODEL == 1) begin : g_model1
|
||||
wire [N-1:0] higher_pri_regs;
|
||||
assign higher_pri_regs[N-1] = 1'b0;
|
||||
for (genvar i = N-2; i >= 0; --i) begin : g_higher_pri_regs
|
||||
assign higher_pri_regs[i] = higher_pri_regs[i+1] | data_in[i+1];
|
||||
end
|
||||
assign onehot_out = data_in & ~higher_pri_regs;
|
||||
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
wire [N-1:0] higher_pri_regs;
|
||||
`IGNORE_UNOPTFLAT_END
|
||||
wire [N-1:0][LN-1:0] indices;
|
||||
for (genvar i = 0; i < N; ++i) begin : g_indices
|
||||
assign indices[i] = LN'(i);
|
||||
end
|
||||
|
||||
assign higher_pri_regs[0] = 1'b0;
|
||||
for (genvar i = 1; i < N; ++i) begin : g_higher_pri_regs
|
||||
assign higher_pri_regs[i] = higher_pri_regs[i-1] | reversed[i-1];
|
||||
end
|
||||
assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0];
|
||||
VX_find_first #(
|
||||
.N (N),
|
||||
.DATAW (LN),
|
||||
.REVERSE (1)
|
||||
) find_first (
|
||||
.valid_in (data_in),
|
||||
.data_in (indices),
|
||||
.data_out (index_out),
|
||||
.valid_out (valid_out)
|
||||
);
|
||||
|
||||
VX_lzc #(
|
||||
.N (N),
|
||||
.REVERSE (1)
|
||||
) lzc (
|
||||
.data_in (reversed),
|
||||
.data_out (index_out),
|
||||
.valid_out (valid_out)
|
||||
);
|
||||
end else begin : g_model0
|
||||
|
||||
end else if (MODEL == 2) begin : g_model2
|
||||
reg [LN-1:0] index_w;
|
||||
reg [N-1:0] onehot_w;
|
||||
|
||||
wire [N-1:0] scan_lo;
|
||||
|
||||
VX_scan #(
|
||||
.N (N),
|
||||
.OP ("|")
|
||||
) scan (
|
||||
.data_in (reversed),
|
||||
.data_out (scan_lo)
|
||||
);
|
||||
|
||||
VX_lzc #(
|
||||
.N (N),
|
||||
.REVERSE (1)
|
||||
) lzc (
|
||||
.data_in (reversed),
|
||||
.data_out (index_out),
|
||||
.valid_out(valid_out)
|
||||
);
|
||||
|
||||
assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
|
||||
|
||||
end else if (MODEL == 3) begin : g_model3
|
||||
|
||||
assign onehot_out = reversed & -reversed;
|
||||
|
||||
VX_lzc #(
|
||||
.N (N),
|
||||
.REVERSE (1)
|
||||
) lzc (
|
||||
.data_in (reversed),
|
||||
.data_out (index_out),
|
||||
.valid_out (valid_out)
|
||||
);
|
||||
|
||||
end else begin : g_model0
|
||||
|
||||
reg [LN-1:0] index_w;
|
||||
reg [N-1:0] onehot_w;
|
||||
|
||||
always @(*) begin
|
||||
index_w = 'x;
|
||||
onehot_w = 'x;
|
||||
for (integer i = N-1; i >= 0; --i) begin
|
||||
if (reversed[i]) begin
|
||||
index_w = LN'(i);
|
||||
onehot_w = N'(1) << i;
|
||||
always @(*) begin
|
||||
index_w = 'x;
|
||||
onehot_w = 'x;
|
||||
for (integer i = 0; i < N-1; ++i) begin
|
||||
if (data_in[i]) begin
|
||||
index_w = LN'(i);
|
||||
onehot_w = N'(1) << i;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign index_out = index_w;
|
||||
assign onehot_out = onehot_w;
|
||||
assign valid_out = (| data_in);
|
||||
|
||||
end
|
||||
|
||||
assign index_out = index_w;
|
||||
assign onehot_out = onehot_w;
|
||||
assign valid_out = (| reversed);
|
||||
end else begin: g_lsb
|
||||
|
||||
if (N == 1) begin : g_n1
|
||||
|
||||
assign onehot_out = data_in;
|
||||
assign index_out = '0;
|
||||
assign valid_out = data_in;
|
||||
|
||||
end else if (N == 2) begin : g_n2
|
||||
|
||||
assign onehot_out = {data_in[1] && ~data_in[0], data_in[0]};
|
||||
assign index_out = ~data_in[0];
|
||||
assign valid_out = (| data_in);
|
||||
|
||||
end else if (MODEL == 1) begin : g_model1
|
||||
|
||||
`IGNORE_UNOPTFLAT_BEGIN
|
||||
wire [N-1:0] higher_pri_regs;
|
||||
`IGNORE_UNOPTFLAT_END
|
||||
|
||||
assign higher_pri_regs[0] = 1'b0;
|
||||
for (genvar i = 1; i < N; ++i) begin : g_higher_pri_regs
|
||||
assign higher_pri_regs[i] = higher_pri_regs[i-1] | data_in[i-1];
|
||||
end
|
||||
assign onehot_out[N-1:0] = data_in[N-1:0] & ~higher_pri_regs[N-1:0];
|
||||
|
||||
VX_lzc #(
|
||||
.N (N),
|
||||
.REVERSE (1)
|
||||
) lzc (
|
||||
.data_in (data_in),
|
||||
.data_out (index_out),
|
||||
.valid_out (valid_out)
|
||||
);
|
||||
|
||||
end else if (MODEL == 2) begin : g_model2
|
||||
|
||||
wire [N-1:0] scan_lo;
|
||||
|
||||
VX_scan #(
|
||||
.N (N),
|
||||
.OP ("|")
|
||||
) scan (
|
||||
.data_in (data_in),
|
||||
.data_out (scan_lo)
|
||||
);
|
||||
|
||||
assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
|
||||
|
||||
VX_lzc #(
|
||||
.N (N),
|
||||
.REVERSE (1)
|
||||
) lzc (
|
||||
.data_in (data_in),
|
||||
.data_out (index_out),
|
||||
.valid_out(valid_out)
|
||||
);
|
||||
|
||||
end else if (MODEL == 3) begin : g_model3
|
||||
|
||||
assign onehot_out = data_in & -data_in;
|
||||
|
||||
VX_lzc #(
|
||||
.N (N),
|
||||
.REVERSE (1)
|
||||
) lzc (
|
||||
.data_in (data_in),
|
||||
.data_out (index_out),
|
||||
.valid_out (valid_out)
|
||||
);
|
||||
|
||||
end else begin : g_model0
|
||||
|
||||
reg [LN-1:0] index_w;
|
||||
reg [N-1:0] onehot_w;
|
||||
|
||||
always @(*) begin
|
||||
index_w = 'x;
|
||||
onehot_w = 'x;
|
||||
for (integer i = N-1; i >= 0; --i) begin
|
||||
if (data_in[i]) begin
|
||||
index_w = LN'(i);
|
||||
onehot_w = N'(1) << i;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign index_out = index_w;
|
||||
assign onehot_out = onehot_w;
|
||||
assign valid_out = (| data_in);
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue