minor update
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions

This commit is contained in:
tinebp 2025-02-24 04:36:34 -08:00
parent 07d447b2ca
commit 9b93e6dba3
4 changed files with 52 additions and 30 deletions

View file

@ -208,11 +208,11 @@
`define CONCAT(out, left_in, right_in, L, R) \ `define CONCAT(out, left_in, right_in, L, R) \
/* verilator lint_off GENUNNAMED */ \ /* verilator lint_off GENUNNAMED */ \
if (L == 0) begin \ if ((L) != 0 && (R) == 0) begin \
assign out = right_in; \
end else if (R == 0) begin \
assign out = left_in; \ assign out = left_in; \
end else begin \ end else if ((L) == 0 && (R) != 0) begin \
assign out = right_in; \
end else if ((L) != 0 && (R) != 0) begin \
assign out = {left_in, right_in}; \ assign out = {left_in, right_in}; \
end \ end \
/* verilator lint_off GENUNNAMED */ /* verilator lint_off GENUNNAMED */

View file

@ -50,7 +50,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
localparam PER_BANK_REG_BITS = NR_BITS - BANKID_REG_BITS; localparam PER_BANK_REG_BITS = NR_BITS - BANKID_REG_BITS;
localparam PER_BANK_WIS_WIDTH = `UP(PER_BANK_WIS_BITS); localparam PER_BANK_WIS_WIDTH = `UP(PER_BANK_WIS_BITS);
localparam PER_BANK_REG_WIDTH = `UP(PER_BANK_REG_BITS); localparam PER_BANK_REG_WIDTH = `UP(PER_BANK_REG_BITS);
localparam GPR_REQ_DATAW = SRC_OPD_WIDTH + SIMD_IDX_W + PER_BANK_WIS_WIDTH + PER_BANK_REG_BITS; localparam GPR_REQ_DATAW = SRC_OPD_WIDTH + SIMD_IDX_BITS + PER_BANK_WIS_BITS + PER_BANK_REG_BITS;
localparam GPR_RSP_DATAW = SRC_OPD_WIDTH + `SIMD_WIDTH * `XLEN; localparam GPR_RSP_DATAW = SRC_OPD_WIDTH + `SIMD_WIDTH * `XLEN;
localparam BYTEENW = GPR_BANK_DATAW / 8; localparam BYTEENW = GPR_BANK_DATAW / 8;
@ -60,7 +60,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0] bank_req_valid; wire [NUM_BANKS-1:0] bank_req_valid;
wire [NUM_BANKS-1:0][GPR_REQ_DATAW-1:0] bank_req_data; wire [NUM_BANKS-1:0][GPR_REQ_DATAW-1:0] bank_req_data;
wire [NUM_BANKS-1:0][1:0] bank_req_opd_id; wire [NUM_BANKS-1:0][SRC_OPD_WIDTH-1:0] bank_req_opd_id;
wire [NUM_BANKS-1:0][SIMD_IDX_W-1:0] bank_req_sid; wire [NUM_BANKS-1:0][SIMD_IDX_W-1:0] bank_req_sid;
wire [NUM_BANKS-1:0][PER_BANK_WIS_WIDTH-1:0] bank_req_wis; wire [NUM_BANKS-1:0][PER_BANK_WIS_WIDTH-1:0] bank_req_wis;
wire [NUM_BANKS-1:0][PER_BANK_REG_WIDTH-1:0] bank_req_id; wire [NUM_BANKS-1:0][PER_BANK_REG_WIDTH-1:0] bank_req_id;
@ -70,7 +70,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0] bank_rsp_valid; wire [NUM_BANKS-1:0] bank_rsp_valid;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] bank_rsp_idx; wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] bank_rsp_idx;
wire [NUM_BANKS-1:0][1:0] bank_rsp_opd_id; wire [NUM_BANKS-1:0][SRC_OPD_WIDTH-1:0] bank_rsp_opd_id;
wire [NUM_BANKS-1:0][GPR_RSP_DATAW-1:0] bank_rsp_data; wire [NUM_BANKS-1:0][GPR_RSP_DATAW-1:0] bank_rsp_data;
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
@ -82,13 +82,16 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_gpr_req for (genvar i = 0; i < NUM_REQS; ++i) begin : g_gpr_req
assign gpr_req_valid[i] = gpr_if[i].req_valid; assign gpr_req_valid[i] = gpr_if[i].req_valid;
assign gpr_req_data[i] = { if (SIMD_IDX_BITS != 0 || PER_BANK_WIS_BITS != 0) begin : g_simd_wis
gpr_if[i].req_data.opd_id, wire [SIMD_IDX_BITS + PER_BANK_WIS_BITS-1:0] tmp;
gpr_if[i].req_data.sid, `CONCAT(tmp, gpr_if[i].req_data.sid, gpr_if[i].req_data.wis[ISSUE_WIS_W-1:BANKID_WIS_BITS], SIMD_IDX_BITS, PER_BANK_WIS_BITS);
gpr_if[i].req_data.wis[ISSUE_WIS_W-1:BANKID_WIS_BITS], assign gpr_req_data[i] = {gpr_if[i].req_data.opd_id, tmp, gpr_if[i].req_data.reg_id[NR_BITS-1:BANKID_REG_BITS]};
gpr_if[i].req_data.reg_id[NR_BITS-1:BANKID_REG_BITS] end else begin : g_no_simd_wis
}; assign gpr_req_data[i] = {gpr_if[i].req_data.opd_id, gpr_if[i].req_data.reg_id[NR_BITS-1:BANKID_REG_BITS]};
end
`CONCAT(gpr_req_bank_idx[i], gpr_if[i].req_data.wis[BANKID_WIS_BITS-1:0], gpr_if[i].req_data.reg_id[BANKID_REG_BITS-1:0], BANKID_WIS_BITS, BANKID_REG_BITS) `CONCAT(gpr_req_bank_idx[i], gpr_if[i].req_data.wis[BANKID_WIS_BITS-1:0], gpr_if[i].req_data.reg_id[BANKID_REG_BITS-1:0], BANKID_WIS_BITS, BANKID_REG_BITS)
`UNUSED_VAR (gpr_if[i].req_data.sid)
`UNUSED_VAR (gpr_if[i].req_data.wis)
assign gpr_if[i].req_ready = gpr_req_ready[i]; assign gpr_if[i].req_ready = gpr_req_ready[i];
end end
@ -139,7 +142,18 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
end end
for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_bank_req_data for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_bank_req_data
assign {bank_req_opd_id[b], bank_req_sid[b], bank_req_wis[b], bank_req_id[b]} = bank_req_data[b]; assign bank_req_opd_id[b] = bank_req_data[b][GPR_REQ_DATAW-1 -: SRC_OPD_WIDTH];
if (SIMD_IDX_BITS != 0) begin : g_simd
assign bank_req_sid[b] = bank_req_data[b][GPR_REQ_DATAW-SRC_OPD_WIDTH-1 -: SIMD_IDX_BITS];
end else begin : g_no_simd
assign bank_req_sid[b] = '0;
end
if (PER_BANK_WIS_BITS != 0) begin : g_wis
assign bank_req_wis[b] = bank_req_data[b][GPR_REQ_DATAW-SRC_OPD_WIDTH-SIMD_IDX_BITS-1 -: PER_BANK_WIS_BITS];
end else begin : g_no_wis
assign bank_req_wis[b] = '0;
end
assign bank_req_id[b] = bank_req_data[b][PER_BANK_REG_WIDTH-1:0];
end end
for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_banks for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_banks
@ -181,7 +195,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
); );
VX_pipe_buffer #( VX_pipe_buffer #(
.DATAW (REQ_SEL_WIDTH + 2) .DATAW (REQ_SEL_WIDTH + SRC_OPD_WIDTH)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View file

@ -29,35 +29,39 @@ module VX_issue_top import VX_gpu_pkg::*; #(
input wire [INST_OP_BITS-1:0] decode_op_type, input wire [INST_OP_BITS-1:0] decode_op_type,
input op_args_t decode_op_args, input op_args_t decode_op_args,
input wire decode_wb, input wire decode_wb,
input wire [NR_BITS-1:0] decode_rd, input logic [NUM_SRC_OPDS-1:0] decode_used_rs,
input wire [NR_BITS-1:0] decode_rs1, input reg_idx_t decode_rd,
input wire [NR_BITS-1:0] decode_rs2, input reg_idx_t decode_rs1,
input wire [NR_BITS-1:0] decode_rs3, input reg_idx_t decode_rs2,
input reg_idx_t decode_rs3,
output wire decode_ready, output wire decode_ready,
input wire writeback_valid[`ISSUE_WIDTH], input wire writeback_valid[`ISSUE_WIDTH],
input wire [UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH], input wire [UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH],
input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH], input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH],
input wire [`NUM_THREADS-1:0] writeback_tmask[`ISSUE_WIDTH], input wire [SIMD_IDX_W-1:0] writeback_sid[`ISSUE_WIDTH],
input wire [`SIMD_WIDTH-1:0] writeback_tmask[`ISSUE_WIDTH],
input wire [PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH], input wire [PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH],
input wire [NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH], input wire [NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH],
input wire [`NUM_THREADS-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH], input wire [`SIMD_WIDTH-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH],
input wire writeback_sop[`ISSUE_WIDTH], input wire writeback_sop[`ISSUE_WIDTH],
input wire writeback_eop[`ISSUE_WIDTH], input wire writeback_eop[`ISSUE_WIDTH],
output wire dispatch_valid[NUM_EX_UNITS * `ISSUE_WIDTH], output wire dispatch_valid[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [UUID_WIDTH-1:0] dispatch_uuid[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [UUID_WIDTH-1:0] dispatch_uuid[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [ISSUE_WIS_W-1:0] dispatch_wis[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [ISSUE_WIS_W-1:0] dispatch_wis[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0] dispatch_tmask[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [SIMD_IDX_W-1:0] dispatch_sid[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`SIMD_WIDTH-1:0] dispatch_tmask[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [PC_BITS-1:0] dispatch_PC[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [PC_BITS-1:0] dispatch_PC[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [INST_ALU_BITS-1:0] dispatch_op_type[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [INST_ALU_BITS-1:0] dispatch_op_type[NUM_EX_UNITS * `ISSUE_WIDTH],
output op_args_t dispatch_op_args[NUM_EX_UNITS * `ISSUE_WIDTH], output op_args_t dispatch_op_args[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire dispatch_wb[NUM_EX_UNITS * `ISSUE_WIDTH], output wire dispatch_wb[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [NR_BITS-1:0] dispatch_rd[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [NR_BITS-1:0] dispatch_rd[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [NT_WIDTH-1:0] dispatch_tid[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [`SIMD_WIDTH-1:0][`XLEN-1:0] dispatch_rs1_data[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [`SIMD_WIDTH-1:0][`XLEN-1:0] dispatch_rs2_data[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [`SIMD_WIDTH-1:0][`XLEN-1:0] dispatch_rs3_data[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data[NUM_EX_UNITS * `ISSUE_WIDTH], output wire dispatch_sop[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire dispatch_eop[NUM_EX_UNITS * `ISSUE_WIDTH],
input wire dispatch_ready[NUM_EX_UNITS * `ISSUE_WIDTH] input wire dispatch_ready[NUM_EX_UNITS * `ISSUE_WIDTH]
); );
VX_decode_if decode_if(); VX_decode_if decode_if();
@ -73,6 +77,7 @@ module VX_issue_top import VX_gpu_pkg::*; #(
assign decode_if.data.op_type = decode_op_type; assign decode_if.data.op_type = decode_op_type;
assign decode_if.data.op_args = decode_op_args; assign decode_if.data.op_args = decode_op_args;
assign decode_if.data.wb = decode_wb; assign decode_if.data.wb = decode_wb;
assign decode_if.data.used_rs = decode_used_rs;
assign decode_if.data.rd = decode_rd; assign decode_if.data.rd = decode_rd;
assign decode_if.data.rs1 = decode_rs1; assign decode_if.data.rs1 = decode_rs1;
assign decode_if.data.rs2 = decode_rs2; assign decode_if.data.rs2 = decode_rs2;
@ -83,6 +88,7 @@ module VX_issue_top import VX_gpu_pkg::*; #(
assign writeback_if[i].valid = writeback_valid[i]; assign writeback_if[i].valid = writeback_valid[i];
assign writeback_if[i].data.uuid = writeback_uuid[i]; assign writeback_if[i].data.uuid = writeback_uuid[i];
assign writeback_if[i].data.wis = writeback_wis[i]; assign writeback_if[i].data.wis = writeback_wis[i];
assign writeback_if[i].data.sid = writeback_sid[i];
assign writeback_if[i].data.tmask = writeback_tmask[i]; assign writeback_if[i].data.tmask = writeback_tmask[i];
assign writeback_if[i].data.PC = writeback_PC[i]; assign writeback_if[i].data.PC = writeback_PC[i];
assign writeback_if[i].data.rd = writeback_rd[i]; assign writeback_if[i].data.rd = writeback_rd[i];
@ -95,16 +101,18 @@ module VX_issue_top import VX_gpu_pkg::*; #(
assign dispatch_valid[i] = dispatch_if[i].valid; assign dispatch_valid[i] = dispatch_if[i].valid;
assign dispatch_uuid[i] = dispatch_if[i].data.uuid; assign dispatch_uuid[i] = dispatch_if[i].data.uuid;
assign dispatch_wis[i] = dispatch_if[i].data.wis; assign dispatch_wis[i] = dispatch_if[i].data.wis;
assign dispatch_sid[i] = dispatch_if[i].data.sid;
assign dispatch_tmask[i] = dispatch_if[i].data.tmask; assign dispatch_tmask[i] = dispatch_if[i].data.tmask;
assign dispatch_PC[i] = dispatch_if[i].data.PC; assign dispatch_PC[i] = dispatch_if[i].data.PC;
assign dispatch_op_type[i] = dispatch_if[i].data.op_type; assign dispatch_op_type[i] = dispatch_if[i].data.op_type;
assign dispatch_op_args[i] = dispatch_if[i].data.op_args; assign dispatch_op_args[i] = dispatch_if[i].data.op_args;
assign dispatch_wb[i] = dispatch_if[i].data.wb; assign dispatch_wb[i] = dispatch_if[i].data.wb;
assign dispatch_rd[i] = dispatch_if[i].data.rd; assign dispatch_rd[i] = dispatch_if[i].data.rd;
assign dispatch_tid[i] = dispatch_if[i].data.tid;
assign dispatch_rs1_data[i] = dispatch_if[i].data.rs1_data; assign dispatch_rs1_data[i] = dispatch_if[i].data.rs1_data;
assign dispatch_rs2_data[i] = dispatch_if[i].data.rs2_data; assign dispatch_rs2_data[i] = dispatch_if[i].data.rs2_data;
assign dispatch_rs3_data[i] = dispatch_if[i].data.rs3_data; assign dispatch_rs3_data[i] = dispatch_if[i].data.rs3_data;
assign dispatch_sop[i] = dispatch_if[i].data.sop;
assign dispatch_eop[i] = dispatch_if[i].data.eop;
assign dispatch_if[i].ready = dispatch_ready[i]; assign dispatch_if[i].ready = dispatch_ready[i];
end end

View file

@ -233,9 +233,9 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
end end
`RUNTIME_ASSERT((timeout_ctr < STALL_TIMEOUT), `RUNTIME_ASSERT((timeout_ctr < STALL_TIMEOUT),
("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)", ("%t: *** %s timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
$time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr, $time, INSTANCE_ID, w, {staging_if[w].data.PC, 1'b0}, staging_if[w].data.tmask, timeout_ctr,
operands_busy, staging_if[w].data.uuid)) operands_busy, staging_if[w].data.uuid))
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0, `RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if.data.rd] != 0,
("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)", ("%t: *** %s invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",