mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
bug fixes
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
This commit is contained in:
parent
92d0092e39
commit
fcd81b291f
8 changed files with 46 additions and 52 deletions
|
@ -32,7 +32,7 @@ module VX_alu_int import VX_gpu_pkg::*; #(
|
|||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam LANE_BITS = `CLOG2(NUM_LANES);
|
||||
localparam LANE_WIDTH = `UP(LANE_BITS);
|
||||
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam SHIFT_IMM_BITS = `CLOG2(`XLEN);
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ module VX_alu_muldiv import VX_gpu_pkg::*; #(
|
|||
VX_result_if.master result_if
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam TAG_WIDTH = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + 1 + PID_WIDTH + 1 + 1;
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ module VX_csr_unit import VX_gpu_pkg::*; #(
|
|||
VX_result_if.master result_if
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ module VX_fpu_unit import VX_gpu_pkg::*, VX_fpu_pkg::*; #(
|
|||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_FPU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_FPU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam TAG_WIDTH = `LOG2UP(`FPUQ_SIZE);
|
||||
localparam IBUF_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + PID_WIDTH + 1 + 1;
|
||||
|
|
|
@ -29,7 +29,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
VX_lsu_mem_if.master lsu_mem_if
|
||||
);
|
||||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_IN_SIZE);
|
||||
|
|
|
@ -50,15 +50,11 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
reg [NUM_SRC_OPDS-1:0] opds_needed, opds_needed_n;
|
||||
reg [NUM_SRC_OPDS-1:0] opds_busy, opds_busy_n;
|
||||
reg [2:0] state, state_n;
|
||||
wire output_ready;
|
||||
|
||||
wire [`SIMD_WIDTH-1:0] simd_out;
|
||||
wire [SIMD_IDX_W-1:0] simd_pid;
|
||||
wire simd_sop;
|
||||
wire simd_eop;
|
||||
|
||||
wire staging_fire = staging_if.valid && staging_if.ready;
|
||||
wire gpr_req_fire = gpr_if.req_valid && gpr_if.req_ready;
|
||||
wire gpr_rsp_fire = gpr_if.rsp_valid;
|
||||
wire simd_sop, simd_eop;
|
||||
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (SCB_DATAW)
|
||||
|
@ -73,10 +69,13 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
.ready_out(staging_if.ready)
|
||||
);
|
||||
|
||||
wire output_ready;
|
||||
wire dispatched = (state == STATE_DISPATCH) && output_ready;
|
||||
//wire enqueue = (state == STATE_IDLE) && staging_if.valid;
|
||||
wire dequeue = (state == STATE_DISPATCH) && output_ready;
|
||||
|
||||
assign staging_if.ready = dispatched && simd_eop;
|
||||
assign staging_if.ready = dequeue && simd_eop;
|
||||
|
||||
wire gpr_req_fire = gpr_if.req_valid && gpr_if.req_ready;
|
||||
wire gpr_rsp_fire = gpr_if.rsp_valid;
|
||||
|
||||
wire [NR_BITS-1:0] rs1 = to_reg_number(staging_if.data.rs1);
|
||||
wire [NR_BITS-1:0] rs2 = to_reg_number(staging_if.data.rs2);
|
||||
|
@ -94,7 +93,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
if (staging_if.valid) begin
|
||||
opds_needed_n = staging_if.data.used_rs;
|
||||
opds_busy_n = staging_if.data.used_rs;
|
||||
if (opds_busy_n == 0) begin
|
||||
if (staging_if.data.used_rs == 0) begin
|
||||
state_n = STATE_DISPATCH;
|
||||
end else begin
|
||||
state_n = STATE_FETCH;
|
||||
|
@ -116,7 +115,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
if (output_ready) begin
|
||||
if (simd_eop) begin
|
||||
state_n = STATE_IDLE;
|
||||
end else begin
|
||||
end else if (staging_if.data.used_rs != 0) begin
|
||||
opds_needed_n = staging_if.data.used_rs;
|
||||
opds_busy_n = staging_if.data.used_rs;
|
||||
state_n = STATE_FETCH;
|
||||
|
@ -160,7 +159,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
// operands fetch response
|
||||
reg [NUM_SRC_OPDS-1:0][`SIMD_WIDTH-1:0][`XLEN-1:0] opd_values;
|
||||
always @(posedge clk) begin
|
||||
if (reset || dispatched) begin
|
||||
if (reset || dequeue) begin
|
||||
for (integer i = 0; i < NUM_SRC_OPDS; ++i) begin
|
||||
opd_values[i] <= '0;
|
||||
end
|
||||
|
@ -171,7 +170,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
// output scheduler info
|
||||
// output pending info
|
||||
assign pending_sid = simd_pid;
|
||||
assign pending_wis = staging_if.data.wis;
|
||||
always @(*) begin
|
||||
|
@ -183,16 +182,17 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
// simd iterator
|
||||
VX_nz_iterator #(
|
||||
.DATAW (`SIMD_WIDTH),
|
||||
.N (SIMD_COUNT),
|
||||
.OUT_REG (1)
|
||||
) valid_iter (
|
||||
) simd_iter (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in(staging_if.valid),
|
||||
.data_in (staging_if.data.tmask),
|
||||
.next (staging_fire),
|
||||
.next (dequeue),
|
||||
`UNUSED_PIN (valid_out),
|
||||
.data_out(simd_out),
|
||||
.pid (simd_pid),
|
||||
|
@ -239,7 +239,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
trace_ex_type(1, scoreboard_if.data.ex_type);
|
||||
`TRACE(1, (", op="))
|
||||
trace_ex_op(1, scoreboard_if.data.ex_type, scoreboard_if.data.op_type, scoreboard_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d (#%0d)\n", scoreboard_if.data.tmask, scoreboard_if.data.wb, scoreboard_if.data.rd, scoreboard_if.data.rs1, scoreboard_if.data.rs2, scoreboard_if.data.rs3, scoreboard_if.data.uuid))
|
||||
`TRACE(1, (", tmask=%b, wb=%b, used_rs=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d (#%0d)\n", scoreboard_if.data.tmask, scoreboard_if.data.wb, scoreboard_if.data.used_rs, scoreboard_if.data.rd, scoreboard_if.data.rs1, scoreboard_if.data.rs2, scoreboard_if.data.rs3, scoreboard_if.data.uuid))
|
||||
end
|
||||
if (gpr_if.req_valid && gpr_if.req_ready) begin
|
||||
`TRACE(1, ("%t: %s-gpr-req: opd=%0d, wis=%0d, sid=%0d, reg=%0d\n", $time, INSTANCE_ID, gpr_if.req_data.opd_id, wis_to_wid(gpr_if.req_data.wis, ISSUE_ID), gpr_if.req_data.sid, gpr_if.req_data.reg_id))
|
||||
|
|
|
@ -29,7 +29,7 @@ module VX_pe_switch import VX_gpu_pkg::*; #(
|
|||
VX_execute_if.master execute_out_if[PE_COUNT],
|
||||
VX_result_if .slave result_in_if[PE_COUNT]
|
||||
);
|
||||
localparam PID_BITS = `CLOG2(`SIMD_WIDTH / NUM_LANES);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam REQ_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + INST_ALU_BITS + $bits(op_args_t) + 1 + NR_BITS + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
|
|
|
@ -24,7 +24,7 @@ module VX_nz_iterator #(
|
|||
input wire reset,
|
||||
input wire valid_in, // Stream input valid
|
||||
input wire [N-1:0][DATAW-1:0] data_in, // Stream input data
|
||||
input wire next, // Advances iterator
|
||||
input wire next, // Advance iterator
|
||||
output wire valid_out, // Current output valid
|
||||
output reg [DATAW-1:0] data_out, // Current output data
|
||||
output reg [LPID_WIDTH-1:0] pid, // Index of the current element
|
||||
|
@ -34,8 +34,7 @@ module VX_nz_iterator #(
|
|||
if (N > 1) begin : g_iterator
|
||||
|
||||
reg [N-1:0] sent_mask_p;
|
||||
wire [LPID_WIDTH-1:0] start_p_n, start_p, end_p;
|
||||
wire valid_in_r;
|
||||
wire [LPID_WIDTH-1:0] start_p, end_p;
|
||||
|
||||
wire [N-1:0] packet_valids;
|
||||
for (genvar i = 0; i < N; ++i) begin : g_packet_valids
|
||||
|
@ -52,9 +51,9 @@ module VX_nz_iterator #(
|
|||
.DATAW (LPID_WIDTH),
|
||||
.REVERSE (0)
|
||||
) find_first (
|
||||
.valid_in (packet_valids & ~sent_mask_p),
|
||||
.data_in (packet_ids),
|
||||
.data_out (start_p_n),
|
||||
.valid_in (packet_valids & ~sent_mask_p),
|
||||
.data_in (packet_ids),
|
||||
.data_out (start_p),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
|
@ -63,43 +62,38 @@ module VX_nz_iterator #(
|
|||
.DATAW (LPID_WIDTH),
|
||||
.REVERSE (1)
|
||||
) find_last (
|
||||
.valid_in (packet_valids),
|
||||
.data_in (packet_ids),
|
||||
.data_out (end_p),
|
||||
.valid_in (packet_valids),
|
||||
.data_in (packet_ids),
|
||||
.data_out (end_p),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + LPID_WIDTH),
|
||||
.RESETW (1),
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset || next), // should flush on fire
|
||||
.enable (1'b1),
|
||||
.data_in ({valid_in, start_p_n}),
|
||||
.data_out ({valid_in_r, start_p})
|
||||
);
|
||||
|
||||
reg is_first_p;
|
||||
wire is_last_p = (start_p == end_p);
|
||||
wire fire_eop = next && is_last_p;
|
||||
|
||||
wire enable = valid_in && (~valid_out || next);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset || fire_eop) begin
|
||||
if (reset || (enable && (is_last_p || eop))) begin
|
||||
sent_mask_p <= '0;
|
||||
is_first_p <= 1;
|
||||
end else if (next) begin
|
||||
is_first_p <= 1;
|
||||
end else if (enable) begin
|
||||
sent_mask_p[start_p] <= 1;
|
||||
is_first_p <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
assign valid_out = valid_in_r;
|
||||
assign data_out = data_in[start_p];
|
||||
assign pid = start_p;
|
||||
assign sop = is_first_p;
|
||||
assign eop = is_last_p;
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + DATAW + LPID_WIDTH + 1 + 1),
|
||||
.RESETW (1),
|
||||
.DEPTH (OUT_REG)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset || (enable && eop)),
|
||||
.enable (enable),
|
||||
.data_in ({valid_in, data_in[start_p], start_p, is_first_p, is_last_p}),
|
||||
.data_out ({valid_out, data_out, pid, sop, eop})
|
||||
);
|
||||
|
||||
end else begin : g_passthru
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue