bug fixes
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions

This commit is contained in:
tinebp 2025-02-23 04:43:44 -08:00
parent 92d0092e39
commit fcd81b291f
8 changed files with 46 additions and 52 deletions

View file

@ -32,7 +32,7 @@ module VX_alu_int import VX_gpu_pkg::*; #(
`UNUSED_SPARAM (INSTANCE_ID)
localparam LANE_BITS = `CLOG2(NUM_LANES);
localparam LANE_WIDTH = `UP(LANE_BITS);
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam SHIFT_IMM_BITS = `CLOG2(`XLEN);

View file

@ -27,7 +27,7 @@ module VX_alu_muldiv import VX_gpu_pkg::*; #(
VX_result_if.master result_if
);
`UNUSED_SPARAM (INSTANCE_ID)
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam TAG_WIDTH = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + 1 + PID_WIDTH + 1 + 1;

View file

@ -38,7 +38,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
VX_result_if.master result_if
);
`UNUSED_SPARAM (INSTANCE_ID)
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;

View file

@ -29,7 +29,7 @@ module VX_fpu_unit import VX_gpu_pkg::*, VX_fpu_pkg::*; #(
`UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = `NUM_FPU_BLOCKS;
localparam NUM_LANES = `NUM_FPU_LANES;
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam TAG_WIDTH = `LOG2UP(`FPUQ_SIZE);
localparam IBUF_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + PID_WIDTH + 1 + 1;

View file

@ -29,7 +29,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
VX_lsu_mem_if.master lsu_mem_if
);
localparam NUM_LANES = `NUM_LSU_LANES;
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam RSP_ARB_DATAW= UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_IN_SIZE);

View file

@ -50,15 +50,11 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
reg [NUM_SRC_OPDS-1:0] opds_needed, opds_needed_n;
reg [NUM_SRC_OPDS-1:0] opds_busy, opds_busy_n;
reg [2:0] state, state_n;
wire output_ready;
wire [`SIMD_WIDTH-1:0] simd_out;
wire [SIMD_IDX_W-1:0] simd_pid;
wire simd_sop;
wire simd_eop;
wire staging_fire = staging_if.valid && staging_if.ready;
wire gpr_req_fire = gpr_if.req_valid && gpr_if.req_ready;
wire gpr_rsp_fire = gpr_if.rsp_valid;
wire simd_sop, simd_eop;
VX_pipe_buffer #(
.DATAW (SCB_DATAW)
@ -73,10 +69,13 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
.ready_out(staging_if.ready)
);
wire output_ready;
wire dispatched = (state == STATE_DISPATCH) && output_ready;
//wire enqueue = (state == STATE_IDLE) && staging_if.valid;
wire dequeue = (state == STATE_DISPATCH) && output_ready;
assign staging_if.ready = dispatched && simd_eop;
assign staging_if.ready = dequeue && simd_eop;
wire gpr_req_fire = gpr_if.req_valid && gpr_if.req_ready;
wire gpr_rsp_fire = gpr_if.rsp_valid;
wire [NR_BITS-1:0] rs1 = to_reg_number(staging_if.data.rs1);
wire [NR_BITS-1:0] rs2 = to_reg_number(staging_if.data.rs2);
@ -94,7 +93,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
if (staging_if.valid) begin
opds_needed_n = staging_if.data.used_rs;
opds_busy_n = staging_if.data.used_rs;
if (opds_busy_n == 0) begin
if (staging_if.data.used_rs == 0) begin
state_n = STATE_DISPATCH;
end else begin
state_n = STATE_FETCH;
@ -116,7 +115,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
if (output_ready) begin
if (simd_eop) begin
state_n = STATE_IDLE;
end else begin
end else if (staging_if.data.used_rs != 0) begin
opds_needed_n = staging_if.data.used_rs;
opds_busy_n = staging_if.data.used_rs;
state_n = STATE_FETCH;
@ -160,7 +159,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
// operands fetch response
reg [NUM_SRC_OPDS-1:0][`SIMD_WIDTH-1:0][`XLEN-1:0] opd_values;
always @(posedge clk) begin
if (reset || dispatched) begin
if (reset || dequeue) begin
for (integer i = 0; i < NUM_SRC_OPDS; ++i) begin
opd_values[i] <= '0;
end
@ -171,7 +170,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
end
end
// output scheduler info
// output pending info
assign pending_sid = simd_pid;
assign pending_wis = staging_if.data.wis;
always @(*) begin
@ -183,16 +182,17 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
end
end
// simd iterator
VX_nz_iterator #(
.DATAW (`SIMD_WIDTH),
.N (SIMD_COUNT),
.OUT_REG (1)
) valid_iter (
) simd_iter (
.clk (clk),
.reset (reset),
.valid_in(staging_if.valid),
.data_in (staging_if.data.tmask),
.next (staging_fire),
.next (dequeue),
`UNUSED_PIN (valid_out),
.data_out(simd_out),
.pid (simd_pid),
@ -239,7 +239,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
trace_ex_type(1, scoreboard_if.data.ex_type);
`TRACE(1, (", op="))
trace_ex_op(1, scoreboard_if.data.ex_type, scoreboard_if.data.op_type, scoreboard_if.data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d (#%0d)\n", scoreboard_if.data.tmask, scoreboard_if.data.wb, scoreboard_if.data.rd, scoreboard_if.data.rs1, scoreboard_if.data.rs2, scoreboard_if.data.rs3, scoreboard_if.data.uuid))
`TRACE(1, (", tmask=%b, wb=%b, used_rs=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d (#%0d)\n", scoreboard_if.data.tmask, scoreboard_if.data.wb, scoreboard_if.data.used_rs, scoreboard_if.data.rd, scoreboard_if.data.rs1, scoreboard_if.data.rs2, scoreboard_if.data.rs3, scoreboard_if.data.uuid))
end
if (gpr_if.req_valid && gpr_if.req_ready) begin
`TRACE(1, ("%t: %s-gpr-req: opd=%0d, wis=%0d, sid=%0d, reg=%0d\n", $time, INSTANCE_ID, gpr_if.req_data.opd_id, wis_to_wid(gpr_if.req_data.wis, ISSUE_ID), gpr_if.req_data.sid, gpr_if.req_data.reg_id))

View file

@ -29,7 +29,7 @@ module VX_pe_switch import VX_gpu_pkg::*; #(
VX_execute_if.master execute_out_if[PE_COUNT],
VX_result_if .slave result_in_if[PE_COUNT]
);
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam REQ_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + INST_ALU_BITS + $bits(op_args_t) + 1 + NR_BITS + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
localparam RSP_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;

View file

@ -24,7 +24,7 @@ module VX_nz_iterator #(
input wire reset,
input wire valid_in, // Stream input valid
input wire [N-1:0][DATAW-1:0] data_in, // Stream input data
input wire next, // Advances iterator
input wire next, // Advance iterator
output wire valid_out, // Current output valid
output reg [DATAW-1:0] data_out, // Current output data
output reg [LPID_WIDTH-1:0] pid, // Index of the current element
@ -34,8 +34,7 @@ module VX_nz_iterator #(
if (N > 1) begin : g_iterator
reg [N-1:0] sent_mask_p;
wire [LPID_WIDTH-1:0] start_p_n, start_p, end_p;
wire valid_in_r;
wire [LPID_WIDTH-1:0] start_p, end_p;
wire [N-1:0] packet_valids;
for (genvar i = 0; i < N; ++i) begin : g_packet_valids
@ -52,9 +51,9 @@ module VX_nz_iterator #(
.DATAW (LPID_WIDTH),
.REVERSE (0)
) find_first (
.valid_in (packet_valids & ~sent_mask_p),
.data_in (packet_ids),
.data_out (start_p_n),
.valid_in (packet_valids & ~sent_mask_p),
.data_in (packet_ids),
.data_out (start_p),
`UNUSED_PIN (valid_out)
);
@ -63,43 +62,38 @@ module VX_nz_iterator #(
.DATAW (LPID_WIDTH),
.REVERSE (1)
) find_last (
.valid_in (packet_valids),
.data_in (packet_ids),
.data_out (end_p),
.valid_in (packet_valids),
.data_in (packet_ids),
.data_out (end_p),
`UNUSED_PIN (valid_out)
);
VX_pipe_register #(
.DATAW (1 + LPID_WIDTH),
.RESETW (1),
.DEPTH (OUT_REG)
) pipe_reg (
.clk (clk),
.reset (reset || next), // should flush on fire
.enable (1'b1),
.data_in ({valid_in, start_p_n}),
.data_out ({valid_in_r, start_p})
);
reg is_first_p;
wire is_last_p = (start_p == end_p);
wire fire_eop = next && is_last_p;
wire enable = valid_in && (~valid_out || next);
always @(posedge clk) begin
if (reset || fire_eop) begin
if (reset || (enable && (is_last_p || eop))) begin
sent_mask_p <= '0;
is_first_p <= 1;
end else if (next) begin
is_first_p <= 1;
end else if (enable) begin
sent_mask_p[start_p] <= 1;
is_first_p <= 0;
end
end
assign valid_out = valid_in_r;
assign data_out = data_in[start_p];
assign pid = start_p;
assign sop = is_first_p;
assign eop = is_last_p;
VX_pipe_register #(
.DATAW (1 + DATAW + LPID_WIDTH + 1 + 1),
.RESETW (1),
.DEPTH (OUT_REG)
) pipe_reg (
.clk (clk),
.reset (reset || (enable && eop)),
.enable (enable),
.data_in ({valid_in, data_in[start_p], start_p, is_first_p, is_last_p}),
.data_out ({valid_out, data_out, pid, sop, eop})
);
end else begin : g_passthru