bug fixes

This commit is contained in:
tinebp 2025-02-22 22:44:08 -08:00
parent dccf5937ff
commit 92d0092e39
25 changed files with 438 additions and 234 deletions

View file

@ -80,7 +80,7 @@ def parse_simx(log_lines):
elif line.startswith("DEBUG Dest"):
instr_data["destination"] = re.search(destination_pattern, line).group(1)
except Exception as e:
print("Error at line {}: {}".format(lineno, e))
print("Error: {}; {}".format(e, line))
instr_data = None
if instr_data:
entries.append(instr_data)
@ -116,9 +116,28 @@ def append_value(text, reg, value, tmask_arr, sep):
text += "}"
return text, sep
def simd_data(sub_array, index, count, default=0):
size = len(sub_array)
total_subsets = count // size
new_array = [default] * count
start_index = index * size
if start_index + size <= count:
new_array[start_index:start_index + size] = sub_array
return new_array
def merge_data(trace, key, new_data, mask):
if key in trace:
merged_data = trace[key]
for i in range(len(mask)):
if mask[i] == 1:
merged_data[i] = new_data[i]
trace[key] = merged_data
else:
trace[key] = new_data
def parse_rtlsim(log_lines):
global configs
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue|commit)"
line_pattern = r"\d+: cluster(\d+)-socket(\d+)-core(\d+)-(decode|issue\d+|commit):"
pc_pattern = r"PC=(0x[0-9a-fA-F]+)"
instr_pattern = r"instr=(0x[0-9a-fA-F]+)"
ex_pattern = r"ex=([a-zA-Z]+)"
@ -126,7 +145,8 @@ def parse_rtlsim(log_lines):
warp_id_pattern = r"wid=(\d+)"
tmask_pattern = r"tmask=(\d+)"
wb_pattern = r"wb=(\d)"
opds_pattern = r"opds=(\d+)"
used_rs_pattern = r"used_rs=(\d+)"
sid_pattern = r"sid=(\d+)"
rd_pattern = r"rd=(\d+)"
rs1_pattern = r"rs1=(\d+)"
rs2_pattern = r"rs2=(\d+)"
@ -141,6 +161,7 @@ def parse_rtlsim(log_lines):
instr_data = {}
num_cores = configs['num_cores']
socket_size = configs['socket_size']
num_threads = configs['num_threads']
num_sockets = (num_cores + socket_size - 1) // socket_size
for lineno, line in enumerate(log_lines, start=1):
try:
@ -163,42 +184,37 @@ def parse_rtlsim(log_lines):
trace["tmask"] = reverse_binary(tmask)
trace["instr"] = re.search(instr_pattern, line).group(1)
trace["opcode"] = re.search(op_pattern, line).group(1)
trace["opds"] = bin_to_array(re.search(opds_pattern, line).group(1))
trace["used_rs"] = bin_to_array(reverse_binary(re.search(used_rs_pattern, line).group(1)))
trace["rd"] = re.search(rd_pattern, line).group(1)
trace["rs1"] = re.search(rs1_pattern, line).group(1)
trace["rs2"] = re.search(rs2_pattern, line).group(1)
trace["rs3"] = re.search(rs3_pattern, line).group(1)
instr_data[uuid] = trace
elif stage == "issue":
elif re.match(r"issue\d+", stage):
if uuid in instr_data:
trace = instr_data[uuid]
sid = int(re.search(sid_pattern, line).group(1))
src_tmask_arr = simd_data(bin_to_array(tmask)[::-1], sid, num_threads, 0)
trace["lineno"] = lineno
opds = trace["opds"]
if opds[1]:
trace["rs1_data"] = re.search(rs1_data_pattern, line).group(1).split(', ')[::-1]
if opds[2]:
trace["rs2_data"] = re.search(rs2_data_pattern, line).group(1).split(', ')[::-1]
if opds[3]:
trace["rs3_data"] = re.search(rs3_data_pattern, line).group(1).split(', ')[::-1]
used_rs = trace["used_rs"]
if used_rs[0]:
merge_data(trace, 'rs1_data', simd_data(re.search(rs1_data_pattern, line).group(1).split(', ')[::-1], sid, num_threads, '0x0'), src_tmask_arr)
if used_rs[1]:
merge_data(trace, 'rs2_data', simd_data(re.search(rs2_data_pattern, line).group(1).split(', ')[::-1], sid, num_threads, '0x0'), src_tmask_arr)
if used_rs[2]:
merge_data(trace, 'rs3_data', simd_data(re.search(rs3_data_pattern, line).group(1).split(', ')[::-1], sid, num_threads, '0x0'), src_tmask_arr)
trace["issued"] = True
instr_data[uuid] = trace
elif stage == "commit":
if uuid in instr_data:
trace = instr_data[uuid]
if "issued" in trace:
opds = trace["opds"]
dst_tmask_arr = bin_to_array(tmask)[::-1]
sid = int(re.search(sid_pattern, line).group(1))
used_rs = trace["used_rs"]
dst_tmask_arr = simd_data(bin_to_array(tmask)[::-1], sid, num_threads, 0)
wb = re.search(wb_pattern, line).group(1) == "1"
if wb:
rd_data = re.search(rd_data_pattern, line).group(1).split(', ')[::-1]
if 'rd_data' in trace:
merged_rd_data = trace['rd_data']
for i in range(len(dst_tmask_arr)):
if dst_tmask_arr[i] == 1:
merged_rd_data[i] = rd_data[i]
trace['rd_data'] = merged_rd_data
else:
trace['rd_data'] = rd_data
merge_data(trace, 'rd_data', simd_data(re.search(rd_data_pattern, line).group(1).split(', ')[::-1], sid, num_threads, '0x0'), dst_tmask_arr)
instr_data[uuid] = trace
eop = re.search(eop_pattern, line).group(1) == "1"
if eop:
@ -210,17 +226,17 @@ def parse_rtlsim(log_lines):
trace["destination"] = destination
operands = ''
sep = False
if opds[1]:
if used_rs[0]:
operands, sep = append_value(operands, trace["rs1"], trace["rs1_data"], tmask_arr, sep)
del trace["rs1_data"]
if opds[2]:
if used_rs[1]:
operands, sep = append_value(operands, trace["rs2"], trace["rs2_data"], tmask_arr, sep)
del trace["rs2_data"]
if opds[3]:
if used_rs[2]:
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
del trace["rs3_data"]
trace["operands"] = operands
del trace["opds"]
del trace["used_rs"]
del trace["rd"]
del trace["rs1"]
del trace["rs2"]
@ -229,7 +245,7 @@ def parse_rtlsim(log_lines):
del instr_data[uuid]
entries.append(trace)
except Exception as e:
print("Error at line {}: {}".format(lineno, e))
print("Error: {}; {}".format(e, line))
return entries
def write_csv(sublogs, csv_filename, log_type):

View file

@ -122,13 +122,13 @@ module VX_cache_mshr import VX_gpu_pkg::*; #(
assign addr_matches[i] = valid_table[i] && (addr_table[i] == allocate_addr);
end
VX_lzc #(
.N (MSHR_SIZE),
.REVERSE (1)
VX_priority_encoder #(
.N (MSHR_SIZE)
) allocate_sel (
.data_in (~valid_table_n),
.data_out (allocate_id_n),
.valid_out (allocate_rdy_n)
.index_out (allocate_id_n),
.valid_out (allocate_rdy_n),
`UNUSED_PIN (onehot_out)
);
// find matching tail-entry
@ -137,8 +137,8 @@ module VX_cache_mshr import VX_gpu_pkg::*; #(
) prev_sel (
.data_in (addr_matches & ~next_table_x),
.index_out (prev_idx),
`UNUSED_PIN (onehot_out),
`UNUSED_PIN (valid_out)
`UNUSED_PIN (valid_out),
`UNUSED_PIN (onehot_out)
);
always @(*) begin

View file

@ -136,15 +136,23 @@ module VX_alu_int import VX_gpu_pkg::*; #(
wire [PC_BITS-1:0] PC_r;
wire [INST_BR_BITS-1:0] br_op_r;
wire [PC_BITS-1:0] cbr_dest, cbr_dest_r;
wire [LANE_WIDTH-1:0] tid, tid_r;
wire [LANE_WIDTH-1:0] last_tid, last_tid_r;
wire is_br_op_r;
assign cbr_dest = add_result[0][1 +: PC_BITS];
if (LANE_BITS != 0) begin : g_tid
assign tid = execute_if.data.tid[0 +: LANE_BITS];
if (LANE_BITS != 0) begin : g_last_tid
VX_priority_encoder #(
.N (NUM_LANES),
.REVERSE (1)
) last_tid_sel (
.data_in (execute_if.data.tmask),
.index_out (last_tid),
`UNUSED_PIN (onehot_out),
`UNUSED_PIN (valid_out)
);
end else begin : g_tid_0
assign tid = 0;
assign last_tid = 0;
end
VX_elastic_buffer #(
@ -154,8 +162,8 @@ module VX_alu_int import VX_gpu_pkg::*; #(
.reset (reset),
.valid_in (execute_if.valid),
.ready_in (execute_if.ready),
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, cbr_dest, is_br_op, br_op, tid}),
.data_out ({result_if.data.uuid, result_if.data.wid, result_if.data.tmask, result_if.data.rd, result_if.data.wb, result_if.data.pid, result_if.data.sop, result_if.data.eop, alu_result_r, PC_r, cbr_dest_r, is_br_op_r, br_op_r, tid_r}),
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, alu_result, execute_if.data.PC, cbr_dest, is_br_op, br_op, last_tid}),
.data_out ({result_if.data.uuid, result_if.data.wid, result_if.data.tmask, result_if.data.rd, result_if.data.wb, result_if.data.pid, result_if.data.sop, result_if.data.eop, alu_result_r, PC_r, cbr_dest_r, is_br_op_r, br_op_r, last_tid_r}),
.valid_out (result_if.valid),
.ready_out (result_if.ready)
);
@ -165,7 +173,7 @@ module VX_alu_int import VX_gpu_pkg::*; #(
wire is_br_less = inst_br_is_less(br_op_r);
wire is_br_static = inst_br_is_static(br_op_r);
wire [`XLEN-1:0] br_result = alu_result_r[tid_r];
wire [`XLEN-1:0] br_result = alu_result_r[last_tid_r];
wire is_less = br_result[0];
wire is_equal = br_result[1];

View file

@ -573,8 +573,8 @@ module VX_decode import VX_gpu_pkg::*; #(
trace_ex_type(1, decode_if.data.ex_type);
`TRACE(1, (", op="))
trace_ex_op(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, opds=%b%b%b%b",
decode_if.data.tmask, decode_if.data.wb, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, use_rd, use_rs1, use_rs2, use_rs3))
`TRACE(1, (", tmask=%b, wb=%b, used_rs=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d",
decode_if.data.tmask, decode_if.data.wb, decode_if.data.used_rs, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3))
trace_op_args(1, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args);
`TRACE(1, (" (#%0d)\n", decode_if.data.uuid))
end

View file

@ -32,25 +32,7 @@ module VX_dispatch import VX_gpu_pkg::*; #(
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (ISSUE_ID)
localparam DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + NT_WIDTH;
wire [`SIMD_WIDTH-1:0][NT_WIDTH-1:0] tids;
for (genvar i = 0; i < `SIMD_WIDTH; ++i) begin : g_tids
assign tids[i] = NT_WIDTH'(i);
end
wire [NT_WIDTH-1:0] last_active_tid;
VX_find_first #(
.N (`SIMD_WIDTH),
.DATAW (NT_WIDTH),
.REVERSE (1)
) last_tid_select (
.valid_in (operands_if.data.tmask),
.data_in (tids),
.data_out (last_active_tid),
`UNUSED_PIN (valid_out)
);
localparam DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + 1 + 1;
wire [NUM_EX_UNITS-1:0] operands_ready_in;
assign operands_if.ready = operands_ready_in[operands_if.data.ex_type];
@ -75,10 +57,11 @@ module VX_dispatch import VX_gpu_pkg::*; #(
operands_if.data.op_args,
operands_if.data.wb,
operands_if.data.rd,
last_active_tid,
operands_if.data.rs1_data,
operands_if.data.rs2_data,
operands_if.data.rs3_data
operands_if.data.rs3_data,
operands_if.data.sop,
operands_if.data.eop
}),
.data_out (dispatch_if[i].data),
.valid_out (dispatch_if[i].valid),

View file

@ -39,12 +39,12 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
localparam BATCH_COUNT = `ISSUE_WIDTH / BLOCK_SIZE;
localparam BATCH_COUNT_W= `LOG2UP(BATCH_COUNT);
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
localparam IN_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + INST_OP_BITS + INST_ARGS_BITS + 1 + PC_BITS + NR_BITS + NT_WIDTH + (3 * `SIMD_WIDTH * `XLEN);
localparam OUT_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + INST_OP_BITS + INST_ARGS_BITS + 1 + PC_BITS + NR_BITS + NT_WIDTH + (3 * NUM_LANES * `XLEN) + GPID_WIDTH + 1 + 1;
localparam IN_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + INST_OP_BITS + INST_ARGS_BITS + 1 + PC_BITS + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + 1 + 1;
localparam OUT_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + INST_OP_BITS + INST_ARGS_BITS + 1 + PC_BITS + NR_BITS + (NUM_SRC_OPDS * NUM_LANES * `XLEN) + GPID_WIDTH + 1 + 1;
localparam FANOUT_ENABLE= (`SIMD_WIDTH > (MAX_FANOUT + MAX_FANOUT /2));
localparam DATA_TMASK_OFF = IN_DATAW - (UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH);
localparam DATA_REGS_OFF = 0;
localparam DATA_REGS_OFF = 1 + 1;
wire [`ISSUE_WIDTH-1:0] dispatch_valid;
wire [`ISSUE_WIDTH-1:0][IN_DATAW-1:0] dispatch_data;
@ -112,6 +112,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
wire [ISSUE_W-1:0] issue_idx = issue_indices[block_idx];
wire [ISSUE_WIS_W-1:0] dispatch_wis = dispatch_data[issue_idx][DATA_TMASK_OFF + `SIMD_WIDTH + SIMD_IDX_W +: ISSUE_WIS_W];
wire [SIMD_IDX_W-1:0] dispatch_sid = dispatch_data[issue_idx][DATA_TMASK_OFF + `SIMD_WIDTH +: SIMD_IDX_W];
wire dispatch_sop = dispatch_data[issue_idx][1];
wire dispatch_eop = dispatch_data[issue_idx][0];
wire [`SIMD_WIDTH-1:0] dispatch_tmask = dispatch_data[issue_idx][DATA_TMASK_OFF +: `SIMD_WIDTH];
wire [`SIMD_WIDTH-1:0][`XLEN-1:0] dispatch_rs1_data = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `SIMD_WIDTH * `XLEN +: `SIMD_WIDTH * `XLEN];
@ -245,8 +247,8 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
wire [NW_WIDTH-1:0] block_wid = wis_to_wid(dispatch_wis, isw);
wire [GPID_WIDTH-1:0] warp_pid = GPID_WIDTH'(block_pid[block_idx]) + GPID_WIDTH'(dispatch_sid * NUM_PACKETS);
wire warp_sop = block_sop[block_idx] && (dispatch_sid == 0);
wire warp_eop = block_eop[block_idx] && (dispatch_sid == SIMD_IDX_W'(SIMD_COUNT-1));
wire warp_sop = block_sop[block_idx] && dispatch_sop;
wire warp_eop = block_eop[block_idx] && dispatch_eop;
VX_elastic_buffer #(
.DATAW (OUT_DATAW),
@ -261,7 +263,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
dispatch_data[issue_idx][IN_DATAW-1 -: UUID_WIDTH],
block_wid,
block_tmask[block_idx],
dispatch_data[issue_idx][DATA_TMASK_OFF-1 : DATA_REGS_OFF + 3 * `SIMD_WIDTH * `XLEN],
dispatch_data[issue_idx][DATA_TMASK_OFF-1 : (DATA_REGS_OFF + NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN)],
block_regs[block_idx][0],
block_regs[block_idx][1],
block_regs[block_idx][2],

View file

@ -55,7 +55,6 @@ module VX_fpu_unit import VX_gpu_pkg::*, VX_fpu_pkg::*; #(
) per_block_result_if[BLOCK_SIZE]();
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : g_fpus
`UNUSED_VAR (per_block_execute_if[block_idx].data.tid)
`UNUSED_VAR (per_block_execute_if[block_idx].data.wb)
// Store request info

View file

@ -175,7 +175,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
`TRACE(1, (", rs3_data="))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `SIMD_WIDTH)
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
`TRACE(1, (", sop=%b, eop=%b (#%0d)\n", operands_if.data.sop, operands_if.data.eop, operands_if.data.uuid))
end
end
`endif

View file

@ -52,7 +52,6 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
) result_no_rsp_if();
`UNUSED_VAR (execute_if.data.rs3_data)
`UNUSED_VAR (execute_if.data.tid)
// full address calculation

View file

@ -39,7 +39,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
localparam SCB_DATAW = UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (NUM_OPDS * REG_IDX_BITS);
localparam OUT_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN);
localparam OUT_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + 1 + 1;
localparam STATE_IDLE = 0;
localparam STATE_FETCH = 1;
@ -50,9 +50,13 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
reg [NUM_SRC_OPDS-1:0] opds_needed, opds_needed_n;
reg [NUM_SRC_OPDS-1:0] opds_busy, opds_busy_n;
reg [2:0] state, state_n;
reg [SIMD_IDX_W-1:0] simd_index, simd_index_n;
wire scboard_fire = scoreboard_if.valid && scoreboard_if.ready;
wire [`SIMD_WIDTH-1:0] simd_out;
wire [SIMD_IDX_W-1:0] simd_pid;
wire simd_sop;
wire simd_eop;
wire staging_fire = staging_if.valid && staging_if.ready;
wire gpr_req_fire = gpr_if.req_valid && gpr_if.req_ready;
wire gpr_rsp_fire = gpr_if.rsp_valid;
@ -71,9 +75,8 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
wire output_ready;
wire dispatched = (state == STATE_DISPATCH) && output_ready;
wire is_last_simd = (simd_index == SIMD_IDX_W'(SIMD_COUNT-1));
assign staging_if.ready = dispatched && is_last_simd;
assign staging_if.ready = dispatched && simd_eop;
wire [NR_BITS-1:0] rs1 = to_reg_number(staging_if.data.rs1);
wire [NR_BITS-1:0] rs2 = to_reg_number(staging_if.data.rs2);
@ -86,12 +89,11 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
state_n = state;
opds_needed_n = opds_needed;
opds_busy_n = opds_busy;
simd_index_n = simd_index;
case (state)
STATE_IDLE: begin
if (scboard_fire) begin
opds_needed_n = scoreboard_if.data.used_rs;
opds_busy_n = scoreboard_if.data.used_rs;
if (staging_if.valid) begin
opds_needed_n = staging_if.data.used_rs;
opds_busy_n = staging_if.data.used_rs;
if (opds_busy_n == 0) begin
state_n = STATE_DISPATCH;
end else begin
@ -112,12 +114,11 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
end
STATE_DISPATCH: begin
if (output_ready) begin
if (is_last_simd) begin
if (simd_eop) begin
state_n = STATE_IDLE;
end else begin
opds_needed_n = staging_if.data.used_rs;
opds_busy_n = staging_if.data.used_rs;
simd_index_n = simd_index + 1;
state_n = STATE_FETCH;
end
end
@ -130,12 +131,10 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
state <= STATE_IDLE;
opds_needed <= '0;
opds_busy <= '0;
simd_index <= 0;
end else begin
state <= state_n;
opds_needed <= opds_needed_n;
opds_busy <= opds_busy_n;
simd_index <= simd_index_n;
end
end
@ -145,16 +144,16 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
VX_priority_encoder #(
.N (NUM_SRC_OPDS)
) opd_id_sel (
.data_in (opds_needed),
.data_in (opds_needed),
.index_out (opd_id),
`UNUSED_PIN (onehot_out),
.valid_out (opd_fetch_valid)
.valid_out (opd_fetch_valid),
`UNUSED_PIN (onehot_out)
);
// operands fetch request
assign gpr_if.req_valid = opd_fetch_valid;
assign gpr_if.req_data.opd_id = opd_id;
assign gpr_if.req_data.sid = simd_index;
assign gpr_if.req_data.sid = simd_pid;
assign gpr_if.req_data.wis = staging_if.data.wis;
assign gpr_if.req_data.reg_id = src_regs[opd_id];
@ -173,7 +172,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
end
// output scheduler info
assign pending_sid = simd_index;
assign pending_sid = simd_pid;
assign pending_wis = staging_if.data.wis;
always @(*) begin
pending_regs = '0;
@ -184,6 +183,23 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
end
end
VX_nz_iterator #(
.DATAW (`SIMD_WIDTH),
.N (SIMD_COUNT),
.OUT_REG (1)
) valid_iter (
.clk (clk),
.reset (reset),
.valid_in(staging_if.valid),
.data_in (staging_if.data.tmask),
.next (staging_fire),
`UNUSED_PIN (valid_out),
.data_out(simd_out),
.pid (simd_pid),
.sop (simd_sop),
.eop (simd_eop)
);
// instruction dispatch
VX_elastic_buffer #(
.DATAW (OUT_DATAW),
@ -196,8 +212,8 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
.data_in ({
staging_if.data.uuid,
staging_if.data.wis,
simd_index,
staging_if.data.tmask[simd_index * `SIMD_WIDTH +: `SIMD_WIDTH],
simd_pid,
simd_out,
staging_if.data.PC,
staging_if.data.ex_type,
staging_if.data.op_type,
@ -206,7 +222,9 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
to_reg_number(staging_if.data.rd),
opd_values[0],
opd_values[1],
opd_values[2]
opd_values[2],
simd_sop,
simd_eop
}),
.ready_in (output_ready),
.valid_out(operands_if.valid),
@ -217,7 +235,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin
if (scoreboard_if.valid && scoreboard_if.ready) begin
`TRACE(1, ("%t: %s-input: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(scoreboard_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}))
`TRACE(1, ("%t: %s-input: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(scoreboard_if.data.wis, ISSUE_ID), {scoreboard_if.data.PC, 1'b0}))
trace_ex_type(1, scoreboard_if.data.ex_type);
`TRACE(1, (", op="))
trace_ex_op(1, scoreboard_if.data.ex_type, scoreboard_if.data.op_type, scoreboard_if.data.op_args);
@ -243,7 +261,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
`TRACE(1, (", rs3_data="))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `SIMD_WIDTH)
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
`TRACE(1, (", sop=%b, eop=%b (#%0d)\n", operands_if.data.sop, operands_if.data.eop, operands_if.data.uuid))
end
end
`endif

View file

@ -37,7 +37,7 @@ module VX_operands import VX_gpu_pkg::*; #(
);
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
localparam SCB_DATAW = UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (REG_IDX_BITS * NUM_OPDS);
localparam OPD_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN);
localparam OPD_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + 1 + 1;
VX_gpr_if per_opc_gpr_if[`NUM_OPCS]();
VX_scoreboard_if per_opc_scoreboard_if[`NUM_OPCS]();
@ -52,7 +52,8 @@ module VX_operands import VX_gpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (1),
.NUM_OUTPUTS (`NUM_OPCS),
.DATAW (SCB_DATAW)
.DATAW (SCB_DATAW),
.OUT_BUF (0)
) scboard_arb (
.clk (clk),
.reset (reset),
@ -124,7 +125,8 @@ module VX_operands import VX_gpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (`NUM_OPCS),
.NUM_OUTPUTS (1),
.DATAW (OPD_DATAW)
.DATAW (OPD_DATAW),
.OUT_BUF (3)
) operands_arb (
.clk (clk),
.reset (reset),

View file

@ -31,7 +31,7 @@ module VX_pe_switch import VX_gpu_pkg::*; #(
);
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam REQ_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + INST_ALU_BITS + $bits(op_args_t) + 1 + NR_BITS + NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
localparam REQ_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + INST_ALU_BITS + $bits(op_args_t) + 1 + NR_BITS + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
localparam RSP_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
wire [PE_COUNT-1:0] pe_req_valid;

View file

@ -306,13 +306,13 @@ module VX_schedule import VX_gpu_pkg::*; #(
wire [`NUM_WARPS-1:0] ready_warps = active_warps & ~stalled_warps;
VX_lzc #(
.N (`NUM_WARPS),
.REVERSE (1)
VX_priority_encoder #(
.N (`NUM_WARPS)
) wid_select (
.data_in (ready_warps),
.data_out (schedule_wid),
.valid_out (schedule_valid)
.index_out (schedule_wid),
.valid_out (schedule_valid),
`UNUSED_PIN (onehot_out)
);
wire [`NUM_WARPS-1:0][(`NUM_THREADS + PC_BITS)-1:0] schedule_data;

View file

@ -49,15 +49,23 @@ module VX_wctl_unit import VX_gpu_pkg::*; #(
wire is_join = (execute_if.data.op_type == INST_SFU_JOIN);
wire is_bar = (execute_if.data.op_type == INST_SFU_BAR);
wire [`UP(LANE_BITS)-1:0] tid;
if (LANE_BITS != 0) begin : g_tid
assign tid = execute_if.data.tid[0 +: LANE_BITS];
wire [`UP(LANE_BITS)-1:0] last_tid;
if (LANE_BITS != 0) begin : g_last_tid
VX_priority_encoder #(
.N (NUM_LANES),
.REVERSE (1)
) last_tid_select (
.data_in (execute_if.data.tmask),
.index_out (last_tid),
`UNUSED_PIN (onehot_out),
`UNUSED_PIN (valid_out)
);
end else begin : g_no_tid
assign tid = 0;
assign last_tid = 0;
end
wire [`XLEN-1:0] rs1_data = execute_if.data.rs1_data[tid];
wire [`XLEN-1:0] rs2_data = execute_if.data.rs2_data[tid];
wire [`XLEN-1:0] rs1_data = execute_if.data.rs1_data[last_tid];
wire [`XLEN-1:0] rs2_data = execute_if.data.rs2_data[last_tid];
`UNUSED_VAR (rs1_data)
wire not_pred = execute_if.data.op_args.wctl.is_neg;

View file

@ -25,10 +25,11 @@ interface VX_dispatch_if import VX_gpu_pkg::*; ();
op_args_t op_args;
logic wb;
logic [NR_BITS-1:0] rd;
logic [NT_WIDTH-1:0] tid;
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs1_data;
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs2_data;
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs3_data;
logic sop;
logic eop;
} data_t;
logic valid;

View file

@ -26,7 +26,6 @@ interface VX_execute_if import VX_gpu_pkg::*; #(
op_args_t op_args;
logic wb;
logic [NR_BITS-1:0] rd;
logic [NT_WIDTH-1:0] tid;
logic [NUM_LANES-1:0][`XLEN-1:0] rs1_data;
logic [NUM_LANES-1:0][`XLEN-1:0] rs2_data;
logic [NUM_LANES-1:0][`XLEN-1:0] rs3_data;

View file

@ -29,6 +29,8 @@ interface VX_operands_if import VX_gpu_pkg::*; ();
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs1_data;
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs2_data;
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs3_data;
logic sop;
logic eop;
} data_t;
logic valid;

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -21,67 +21,67 @@ module VX_allocator #(
input wire clk,
input wire reset,
input wire acquire_en,
output wire [ADDRW-1:0] acquire_addr,
input wire acquire_en,
output wire [ADDRW-1:0] acquire_addr,
input wire release_en,
input wire [ADDRW-1:0] release_addr,
input wire [ADDRW-1:0] release_addr,
output wire empty,
output wire full
output wire full
);
reg [SIZE-1:0] free_slots, free_slots_n;
reg [ADDRW-1:0] acquire_addr_r;
reg empty_r, full_r;
reg empty_r, full_r;
wire [ADDRW-1:0] free_index;
wire free_valid;
always @(*) begin
free_slots_n = free_slots;
if (release_en) begin
free_slots_n[release_addr] = 1;
free_slots_n[release_addr] = 1;
end
if (acquire_en) begin
free_slots_n[acquire_addr_r] = 0;
end
end
end
VX_lzc #(
.N (SIZE),
.REVERSE (1)
VX_priority_encoder #(
.N (SIZE)
) free_slots_sel (
.data_in (free_slots_n),
.data_out (free_index),
.valid_out (free_valid)
);
.index_out (free_index),
.valid_out (free_valid),
`UNUSED_PIN (onehot_out)
);
always @(posedge clk) begin
if (reset) begin
acquire_addr_r <= ADDRW'(1'b0);
free_slots <= {SIZE{1'b1}};
empty_r <= 1'b1;
full_r <= 1'b0;
full_r <= 1'b0;
end else begin
if (release_en) begin
`ASSERT(0 == free_slots[release_addr], ("%t: releasing invalid addr %d", $time, release_addr));
end
if (acquire_en) begin
if (acquire_en) begin
`ASSERT(~full_r, ("%t: allocator is full", $time));
end
end
if (acquire_en || (release_en && full_r)) begin
acquire_addr_r <= free_index;
end
free_slots <= free_slots_n;
free_slots <= free_slots_n;
empty_r <= (& free_slots_n);
full_r <= ~free_valid;
end
end
end
assign acquire_addr = acquire_addr_r;
assign empty = empty_r;
assign full = full_r;
endmodule
`TRACING_ON

View file

@ -58,7 +58,7 @@ module VX_cyclic_arbiter #(
VX_priority_encoder #(
.N (NUM_REQS)
) priority_encoder (
) grant_sel (
.data_in (requests),
.onehot_out (grant_onehot_um),
.index_out (grant_index_um),

View file

@ -17,7 +17,7 @@
module VX_find_first #(
parameter N = 1,
parameter DATAW = 1,
parameter REVERSE = 0
parameter REVERSE = 0 // 0 -> first valid, 1 -> last valid
) (
input wire [N-1:0][DATAW-1:0] data_in,
input wire [N-1:0] valid_in,
@ -33,12 +33,12 @@ module VX_find_first #(
wire [DATAW-1:0] d_n [TN];
`IGNORE_UNOPTFLAT_END
for (genvar i = 0; i < N; ++i) begin : g_reverse
for (genvar i = 0; i < N; ++i) begin : g_fill
assign s_n[TL+i] = REVERSE ? valid_in[N-1-i] : valid_in[i];
assign d_n[TL+i] = REVERSE ? data_in[N-1-i] : data_in[i];
end
if (TL < (TN-N)) begin : g_fill
if (TL < (TN-N)) begin : g_padding
for (genvar i = TL+N; i < TN; ++i) begin : g_i
assign s_n[i] = 0;
assign d_n[i] = '0;
@ -49,7 +49,7 @@ module VX_find_first #(
localparam I = 1 << j;
for (genvar i = 0; i < I; ++i) begin : g_i
localparam K = I+i-1;
assign s_n[K] = s_n[2*K+1] | s_n[2*K+2];
assign s_n[K] = s_n[2*K+2] | s_n[2*K+1];
assign d_n[K] = s_n[2*K+1] ? d_n[2*K+1] : d_n[2*K+2];
end
end

View file

@ -33,7 +33,6 @@ module VX_lzc #(
end else begin : g_lzc
wire [N-1:0][LOGN-1:0] indices;
for (genvar i = 0; i < N; ++i) begin : g_indices
assign indices[i] = REVERSE ? LOGN'(i) : LOGN'(N-1-i);
end
@ -43,8 +42,8 @@ module VX_lzc #(
.DATAW (LOGN),
.REVERSE (!REVERSE)
) find_first (
.data_in (indices),
.valid_in (data_in),
.data_in (indices),
.data_out (data_out),
.valid_out (valid_out)
);

View file

@ -131,11 +131,11 @@ module VX_mem_coalescer #(
VX_priority_encoder #(
.N (DATA_RATIO)
) priority_encoder (
) batch_sel (
.data_in (batch_mask),
.index_out (batch_idx),
`UNUSED_PIN (onehot_out),
.valid_out (batch_valid_n[i])
.valid_out (batch_valid_n[i]),
`UNUSED_PIN (onehot_out)
);
wire [DATA_RATIO-1:0][OUT_ADDR_WIDTH-1:0] addr_base;

View file

@ -0,0 +1,114 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_platform.vh"
`TRACING_OFF
module VX_nz_iterator #(
parameter DATAW = 8, // Bit-width of each data element
parameter N = 4, // Number of elements in the stream
parameter OUT_REG = 0, // Output register
parameter LPID_WIDTH = `LOG2UP(N)
) (
input wire clk,
input wire reset,
input wire valid_in, // Stream input valid
input wire [N-1:0][DATAW-1:0] data_in, // Stream input data
input wire next, // Advances iterator
output wire valid_out, // Current output valid
output reg [DATAW-1:0] data_out, // Current output data
output reg [LPID_WIDTH-1:0] pid, // Index of the current element
output reg sop, // Start of valid stream
output reg eop // End of valid stream
);
if (N > 1) begin : g_iterator
reg [N-1:0] sent_mask_p;
wire [LPID_WIDTH-1:0] start_p_n, start_p, end_p;
wire valid_in_r;
wire [N-1:0] packet_valids;
for (genvar i = 0; i < N; ++i) begin : g_packet_valids
assign packet_valids[i] = (| data_in[i]);
end
wire [N-1:0][LPID_WIDTH-1:0] packet_ids;
for (genvar i = 0; i < N; ++i) begin : g_packet_ids
assign packet_ids[i] = LPID_WIDTH'(i);
end
VX_find_first #(
.N (N),
.DATAW (LPID_WIDTH),
.REVERSE (0)
) find_first (
.valid_in (packet_valids & ~sent_mask_p),
.data_in (packet_ids),
.data_out (start_p_n),
`UNUSED_PIN (valid_out)
);
VX_find_first #(
.N (N),
.DATAW (LPID_WIDTH),
.REVERSE (1)
) find_last (
.valid_in (packet_valids),
.data_in (packet_ids),
.data_out (end_p),
`UNUSED_PIN (valid_out)
);
VX_pipe_register #(
.DATAW (1 + LPID_WIDTH),
.RESETW (1),
.DEPTH (OUT_REG)
) pipe_reg (
.clk (clk),
.reset (reset || next), // should flush on fire
.enable (1'b1),
.data_in ({valid_in, start_p_n}),
.data_out ({valid_in_r, start_p})
);
reg is_first_p;
wire is_last_p = (start_p == end_p);
wire fire_eop = next && is_last_p;
always @(posedge clk) begin
if (reset || fire_eop) begin
sent_mask_p <= '0;
is_first_p <= 1;
end else if (next) begin
sent_mask_p[start_p] <= 1;
is_first_p <= 0;
end
end
assign valid_out = valid_in_r;
assign data_out = data_in[start_p];
assign pid = start_p;
assign sop = is_first_p;
assign eop = is_last_p;
end else begin : g_passthru
assign data_out = data_in[0];
assign pid = 0;
assign sop = 1;
assign eop = 1;
end
endmodule
`TRACING_ON

View file

@ -33,7 +33,7 @@ module VX_priority_arbiter #(
VX_priority_encoder #(
.N (NUM_REQS)
) priority_encoder (
) grant_sel (
.data_in (requests),
.index_out (grant_index),
.onehot_out (grant_onehot),

View file

@ -16,7 +16,7 @@
`TRACING_OFF
module VX_priority_encoder #(
parameter N = 1,
parameter REVERSE = 0,
parameter REVERSE = 0, // 0 -> LSB, 1 -> MSB
parameter MODEL = 1,
parameter LN = `LOG2UP(N)
) (
@ -25,105 +25,159 @@ module VX_priority_encoder #(
output wire [LN-1:0] index_out,
output wire valid_out
);
wire [N-1:0] reversed;
if (REVERSE) begin : g_msb
if (REVERSE != 0) begin : g_reverse
for (genvar i = 0; i < N; ++i) begin : g_i
assign reversed[N-i-1] = data_in[i];
end
end else begin : g_no_reverse
assign reversed = data_in;
end
if (N == 1) begin : g_n1
if (N == 1) begin : g_n1
assign onehot_out = data_in;
assign index_out = '0;
assign valid_out = data_in;
assign onehot_out = reversed;
assign index_out = '0;
assign valid_out = reversed;
end else if (N == 2) begin : g_n2
end else if (N == 2) begin : g_n2
assign onehot_out = {data_in[1], data_in[0] & ~data_in[1]};
assign index_out = data_in[1];
assign valid_out = (| data_in);
assign onehot_out = {reversed[1] && ~reversed[0], reversed[0]};
assign index_out = ~reversed[0];
assign valid_out = (| reversed);
end else if (MODEL != 0) begin : g_model1
end else if (MODEL == 1) begin : g_model1
wire [N-1:0] higher_pri_regs;
assign higher_pri_regs[N-1] = 1'b0;
for (genvar i = N-2; i >= 0; --i) begin : g_higher_pri_regs
assign higher_pri_regs[i] = higher_pri_regs[i+1] | data_in[i+1];
end
assign onehot_out = data_in & ~higher_pri_regs;
`IGNORE_UNOPTFLAT_BEGIN
wire [N-1:0] higher_pri_regs;
`IGNORE_UNOPTFLAT_END
wire [N-1:0][LN-1:0] indices;
for (genvar i = 0; i < N; ++i) begin : g_indices
assign indices[i] = LN'(i);
end
assign higher_pri_regs[0] = 1'b0;
for (genvar i = 1; i < N; ++i) begin : g_higher_pri_regs
assign higher_pri_regs[i] = higher_pri_regs[i-1] | reversed[i-1];
end
assign onehot_out[N-1:0] = reversed[N-1:0] & ~higher_pri_regs[N-1:0];
VX_find_first #(
.N (N),
.DATAW (LN),
.REVERSE (1)
) find_first (
.valid_in (data_in),
.data_in (indices),
.data_out (index_out),
.valid_out (valid_out)
);
VX_lzc #(
.N (N),
.REVERSE (1)
) lzc (
.data_in (reversed),
.data_out (index_out),
.valid_out (valid_out)
);
end else begin : g_model0
end else if (MODEL == 2) begin : g_model2
reg [LN-1:0] index_w;
reg [N-1:0] onehot_w;
wire [N-1:0] scan_lo;
VX_scan #(
.N (N),
.OP ("|")
) scan (
.data_in (reversed),
.data_out (scan_lo)
);
VX_lzc #(
.N (N),
.REVERSE (1)
) lzc (
.data_in (reversed),
.data_out (index_out),
.valid_out(valid_out)
);
assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
end else if (MODEL == 3) begin : g_model3
assign onehot_out = reversed & -reversed;
VX_lzc #(
.N (N),
.REVERSE (1)
) lzc (
.data_in (reversed),
.data_out (index_out),
.valid_out (valid_out)
);
end else begin : g_model0
reg [LN-1:0] index_w;
reg [N-1:0] onehot_w;
always @(*) begin
index_w = 'x;
onehot_w = 'x;
for (integer i = N-1; i >= 0; --i) begin
if (reversed[i]) begin
index_w = LN'(i);
onehot_w = N'(1) << i;
always @(*) begin
index_w = 'x;
onehot_w = 'x;
for (integer i = 0; i < N-1; ++i) begin
if (data_in[i]) begin
index_w = LN'(i);
onehot_w = N'(1) << i;
end
end
end
assign index_out = index_w;
assign onehot_out = onehot_w;
assign valid_out = (| data_in);
end
assign index_out = index_w;
assign onehot_out = onehot_w;
assign valid_out = (| reversed);
end else begin: g_lsb
if (N == 1) begin : g_n1
assign onehot_out = data_in;
assign index_out = '0;
assign valid_out = data_in;
end else if (N == 2) begin : g_n2
assign onehot_out = {data_in[1] && ~data_in[0], data_in[0]};
assign index_out = ~data_in[0];
assign valid_out = (| data_in);
end else if (MODEL == 1) begin : g_model1
`IGNORE_UNOPTFLAT_BEGIN
wire [N-1:0] higher_pri_regs;
`IGNORE_UNOPTFLAT_END
assign higher_pri_regs[0] = 1'b0;
for (genvar i = 1; i < N; ++i) begin : g_higher_pri_regs
assign higher_pri_regs[i] = higher_pri_regs[i-1] | data_in[i-1];
end
assign onehot_out[N-1:0] = data_in[N-1:0] & ~higher_pri_regs[N-1:0];
VX_lzc #(
.N (N),
.REVERSE (1)
) lzc (
.data_in (data_in),
.data_out (index_out),
.valid_out (valid_out)
);
end else if (MODEL == 2) begin : g_model2
wire [N-1:0] scan_lo;
VX_scan #(
.N (N),
.OP ("|")
) scan (
.data_in (data_in),
.data_out (scan_lo)
);
assign onehot_out = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
VX_lzc #(
.N (N),
.REVERSE (1)
) lzc (
.data_in (data_in),
.data_out (index_out),
.valid_out(valid_out)
);
end else if (MODEL == 3) begin : g_model3
assign onehot_out = data_in & -data_in;
VX_lzc #(
.N (N),
.REVERSE (1)
) lzc (
.data_in (data_in),
.data_out (index_out),
.valid_out (valid_out)
);
end else begin : g_model0
reg [LN-1:0] index_w;
reg [N-1:0] onehot_w;
always @(*) begin
index_w = 'x;
onehot_w = 'x;
for (integer i = N-1; i >= 0; --i) begin
if (data_in[i]) begin
index_w = LN'(i);
onehot_w = N'(1) << i;
end
end
end
assign index_out = index_w;
assign onehot_out = onehot_w;
assign valid_out = (| data_in);
end
end
endmodule