mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-19 11:34:59 -04:00
bug fixes
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
This commit is contained in:
parent
7c7ec029fc
commit
d23bca3a3d
9 changed files with 97 additions and 72 deletions
|
@ -171,7 +171,7 @@ module VX_commit import VX_gpu_pkg::*; #(
|
|||
assign writeback_if[i].data.data = commit_arb_if[i].data.data;
|
||||
assign writeback_if[i].data.sop = commit_arb_if[i].data.sop;
|
||||
assign writeback_if[i].data.eop = commit_arb_if[i].data.eop;
|
||||
assign commit_arb_if[i].ready = writeback_if[i].ready;
|
||||
assign commit_arb_if[i].ready = 1;
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
|
|
|
@ -61,6 +61,7 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
.perf_sfu_uses (issue_perf.sfu_uses),
|
||||
`endif
|
||||
.writeback_if (writeback_if),
|
||||
.operands_if (operands_if),
|
||||
.ibuffer_if (ibuffer_if),
|
||||
.scoreboard_if (scoreboard_if)
|
||||
);
|
||||
|
|
|
@ -31,7 +31,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
localparam NUM_LANES = `NUM_LSU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_DATAW= UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + 1 + NR_BITS + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_IN_SIZE);
|
||||
localparam REQ_ASHIFT = `CLOG2(LSU_WORD_SIZE);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
|
@ -105,7 +105,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
wire mem_rsp_sop_pkt, mem_rsp_eop_pkt;
|
||||
wire no_rsp_buf_valid, no_rsp_buf_ready;
|
||||
|
||||
wire [LSUQ_SIZEW-1:0] reqq_waddr, reqq_raddr;
|
||||
wire [LSUQ_SIZEW-1:0] pkt_waddr, pkt_raddr;
|
||||
|
||||
// fence handling
|
||||
|
||||
|
@ -215,52 +215,69 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
|
||||
if (PID_BITS != 0) begin : g_pid
|
||||
reg [`LSUQ_IN_SIZE-1:0][PID_BITS:0] pkt_ctr;
|
||||
reg [`LSUQ_IN_SIZE-1:0] pkt_sop;
|
||||
reg [`LSUQ_IN_SIZE-1:0] pkt_eop;
|
||||
reg [`LSUQ_IN_SIZE-1:0] pkt_sop, pkt_eop;
|
||||
|
||||
wire mem_req_rd_fire = mem_req_fire && ~mem_req_rw;
|
||||
wire mem_req_rd_fire = mem_req_fire && ~mem_req_rw;
|
||||
wire mem_req_rd_sop_fire = mem_req_rd_fire && execute_if.data.sop;
|
||||
wire mem_req_rd_eop_fire = mem_req_rd_fire && execute_if.data.eop;
|
||||
wire mem_rsp_sop_fire = mem_rsp_fire && mem_rsp_sop;
|
||||
wire mem_rsp_eop_fire = mem_rsp_fire && mem_rsp_eop;
|
||||
wire mem_rsp_eop_fire = mem_rsp_fire && mem_rsp_eop;
|
||||
wire mem_rsp_eop_pkt_fire= mem_rsp_fire && mem_rsp_eop_pkt;
|
||||
wire full;
|
||||
|
||||
assign mem_rsp_sop_pkt = pkt_sop[reqq_raddr];
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop && pkt_eop[reqq_raddr] && (pkt_ctr[reqq_raddr] == 1);
|
||||
VX_allocator #(
|
||||
.SIZE (`LSUQ_IN_SIZE)
|
||||
) pkt_allocator (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_en (mem_req_rd_eop_fire),
|
||||
.acquire_addr(pkt_waddr),
|
||||
.release_en (mem_rsp_eop_pkt_fire),
|
||||
.release_addr(pkt_raddr),
|
||||
`UNUSED_PIN (empty),
|
||||
.full (full)
|
||||
);
|
||||
|
||||
wire rw_collision = mem_req_rd_fire && mem_rsp_eop_fire && (pkt_raddr == pkt_waddr);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < `LSUQ_IN_SIZE; ++i) begin
|
||||
pkt_ctr[i] <= '0;
|
||||
pkt_sop[i] <= 1;
|
||||
pkt_eop[i] <= 0;
|
||||
end
|
||||
pkt_ctr <= '0;
|
||||
pkt_sop <= '0;
|
||||
pkt_eop <= '0;
|
||||
end else begin
|
||||
if (mem_req_rd_eop_fire) begin
|
||||
pkt_eop[reqq_waddr] <= 1;
|
||||
if (mem_req_rd_sop_fire) begin
|
||||
pkt_sop[pkt_waddr] <= 1;
|
||||
end
|
||||
if (~(mem_req_rd_fire && mem_rsp_eop_fire && (reqq_raddr == reqq_waddr))) begin
|
||||
if (mem_req_rd_eop_fire) begin
|
||||
pkt_eop[pkt_waddr] <= 1;
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
pkt_sop[pkt_raddr] <= 0;
|
||||
end
|
||||
if (mem_rsp_eop_pkt_fire) begin
|
||||
pkt_eop[pkt_raddr] <= 0;
|
||||
end
|
||||
if (~rw_collision) begin
|
||||
if (mem_req_rd_fire) begin
|
||||
pkt_ctr[reqq_waddr] <= pkt_ctr[reqq_waddr] + PID_BITS'(1);
|
||||
pkt_ctr[pkt_waddr] <= pkt_ctr[pkt_waddr] + PID_BITS'(1);
|
||||
end
|
||||
if (mem_rsp_eop_fire) begin
|
||||
pkt_ctr[reqq_raddr] <= pkt_ctr[reqq_raddr] - PID_BITS'(1);
|
||||
pkt_ctr[pkt_raddr] <= pkt_ctr[pkt_raddr] - PID_BITS'(1);
|
||||
end
|
||||
end
|
||||
if (mem_rsp_sop_fire) begin
|
||||
pkt_sop[reqq_raddr] <= 0;
|
||||
end
|
||||
if (mem_rsp_eop_fire && mem_rsp_eop_pkt) begin
|
||||
pkt_sop[reqq_raddr] <= 1;
|
||||
pkt_eop[reqq_raddr] <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_fire && pkt_eop[reqq_waddr]), ("%t: oops! broken eop request! (#%0d)", $time, execute_if.data.uuid))
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_fire && (2**PID_BITS-1) == pkt_ctr[reqq_waddr]), ("%t: oops! broken ctr request! (#%0d)", $time, execute_if.data.uuid))
|
||||
`RUNTIME_ASSERT(~(mem_rsp_fire && 0 == pkt_ctr[reqq_raddr]), ("%t: oops! broken ctr response! (#%0d)", $time, rsp_uuid))
|
||||
|
||||
assign mem_rsp_sop_pkt = pkt_sop[pkt_raddr];
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop && pkt_eop[pkt_raddr] && (pkt_ctr[pkt_raddr] == 1);
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_fire && full), ("%t: allocator full!", $time))
|
||||
`RUNTIME_ASSERT(~(mem_req_rd_sop_fire && pkt_ctr[pkt_waddr] != 0), ("%t: oops! broken sop request!", $time))
|
||||
`UNUSED_VAR (mem_rsp_sop)
|
||||
end else begin : g_no_pid
|
||||
assign pkt_waddr = 0;
|
||||
assign mem_rsp_sop_pkt = mem_rsp_sop;
|
||||
assign mem_rsp_eop_pkt = mem_rsp_eop;
|
||||
`UNUSED_VAR (reqq_raddr)
|
||||
`UNUSED_VAR (pkt_raddr)
|
||||
end
|
||||
|
||||
// pack memory request tag
|
||||
|
@ -273,7 +290,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
execute_if.data.op_type,
|
||||
req_align,
|
||||
execute_if.data.pid,
|
||||
reqq_waddr,
|
||||
pkt_waddr,
|
||||
req_is_fence
|
||||
};
|
||||
|
||||
|
@ -322,12 +339,10 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
.core_req_data (mem_req_data),
|
||||
.core_req_tag (mem_req_tag),
|
||||
.core_req_ready (mem_req_ready),
|
||||
.core_req_queue_id (reqq_waddr),
|
||||
|
||||
// request queue info
|
||||
`UNUSED_PIN (req_queue_empty),
|
||||
`UNUSED_PIN (req_queue_pop),
|
||||
`UNUSED_PIN (req_queue_id),
|
||||
`UNUSED_PIN (req_queue_rw_notify),
|
||||
|
||||
// Output response
|
||||
.core_rsp_valid (mem_rsp_valid),
|
||||
|
@ -393,7 +408,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
rsp_op_type,
|
||||
rsp_align,
|
||||
rsp_pid,
|
||||
reqq_raddr,
|
||||
pkt_raddr,
|
||||
rsp_is_fence
|
||||
} = mem_rsp_tag;
|
||||
|
||||
|
@ -448,7 +463,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.valid_in (mem_rsp_valid),
|
||||
.ready_in (mem_rsp_ready),
|
||||
.data_in ({rsp_uuid, rsp_wid, mem_rsp_mask, rsp_pc, rsp_wb, rsp_rd, rsp_data, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}),
|
||||
.data_in ({rsp_uuid, rsp_wid, mem_rsp_mask, rsp_pc, rsp_wb, rsp_rd, rsp_data, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}),
|
||||
.data_out ({result_rsp_if.data.uuid, result_rsp_if.data.wid, result_rsp_if.data.tmask, result_rsp_if.data.PC, result_rsp_if.data.wb, result_rsp_if.data.rd, result_rsp_if.data.data, result_rsp_if.data.pid, result_rsp_if.data.sop, result_rsp_if.data.eop}),
|
||||
.valid_out (result_rsp_if.valid),
|
||||
.ready_out (result_rsp_if.ready)
|
||||
|
@ -462,7 +477,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.valid_in (no_rsp_buf_valid),
|
||||
.ready_in (no_rsp_buf_ready),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}),
|
||||
.data_in ({execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}),
|
||||
.data_out ({result_no_rsp_if.data.uuid, result_no_rsp_if.data.wid, result_no_rsp_if.data.tmask, result_no_rsp_if.data.PC, result_no_rsp_if.data.pid, result_no_rsp_if.data.sop, result_no_rsp_if.data.eop}),
|
||||
.valid_out (result_no_rsp_if.valid),
|
||||
.ready_out (result_no_rsp_if.ready)
|
||||
|
@ -513,7 +528,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
|
|||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(2, ("%t: %s Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, pid=%0d, sop=%b, eop=%b, data=",
|
||||
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, rsp_pid, mem_rsp_sop, mem_rsp_eop))
|
||||
$time, INSTANCE_ID, rsp_wid, {rsp_pc, 1'b0}, mem_rsp_mask, rsp_rd, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt))
|
||||
`TRACE_ARRAY1D(2, "0x%0h", mem_rsp_data, NUM_LANES)
|
||||
`TRACE(2, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid))
|
||||
end
|
||||
|
|
|
@ -51,9 +51,9 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
|
||||
// collector selection
|
||||
|
||||
reg [`NUM_OPCS-1:0] ready_opcs;
|
||||
reg [`NUM_OPCS-1:0] select_opcs;
|
||||
always @(*) begin
|
||||
ready_opcs = per_opc_scoreboard_ready;
|
||||
select_opcs = '1;
|
||||
if (`NUM_OPCS > 1 && SIMD_COUNT > 1) begin
|
||||
// SFU cannot handle multiple inflight WCTL instructions, always assign them same collector
|
||||
// LD/ST instructions should also be ordered via the same collector
|
||||
|
@ -61,12 +61,12 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
&& inst_sfu_is_wctl(scoreboard_if.data.op_type)) begin
|
||||
// select collector 0
|
||||
for (int i = 0; i < `NUM_OPCS; ++i) begin
|
||||
if (i != 0) ready_opcs[i] = 0;
|
||||
if (i != 0) select_opcs[i] = 0;
|
||||
end
|
||||
end else if (scoreboard_if.data.ex_type == EX_LSU) begin
|
||||
// select collector 1
|
||||
for (int i = 0; i < `NUM_OPCS; ++i) begin
|
||||
if (i != 1) ready_opcs[i] = 0;
|
||||
if (i != 1) select_opcs[i] = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -75,6 +75,8 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
wire opc_sel_valid;
|
||||
wire [`NUM_OPCS-1:0] opc_sel_mask;
|
||||
|
||||
wire [`NUM_OPCS-1:0] ready_opcs = select_opcs & per_opc_scoreboard_ready;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.N (`NUM_OPCS)
|
||||
) opc_sel (
|
||||
|
@ -123,12 +125,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
wire war_dp_check = (opc_pending_regs[writeback_if.data.rd] == 0);
|
||||
|
||||
VX_writeback_if writeback_if_s();
|
||||
assign writeback_if_s.valid = writeback_if.valid && war_dp_check;
|
||||
assign writeback_if_s.data = writeback_if.data;
|
||||
assign writeback_if.ready = war_dp_check;
|
||||
`UNUSED_VAR (writeback_if_s.ready)
|
||||
`UNUSED_VAR (war_dp_check)
|
||||
|
||||
VX_gpr_unit #(
|
||||
.INSTANCE_ID (`SFORMATF(("%s-gpr", INSTANCE_ID))),
|
||||
|
@ -140,7 +137,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
`ifdef PERF_ENABLE
|
||||
.perf_stalls (perf_stalls),
|
||||
`endif
|
||||
.writeback_if (writeback_if_s),
|
||||
.writeback_if (writeback_if),
|
||||
.gpr_if (per_opc_gpr_if)
|
||||
);
|
||||
|
||||
|
@ -170,13 +167,13 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
if (reset) begin
|
||||
timeout_ctr <= '0;
|
||||
end else begin
|
||||
if (writeback_if.valid && ~writeback_if.ready) begin
|
||||
if (writeback_if.valid) begin
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
`TRACE(4, ("%t: *** %s-stall: wid=%0d, sid=%0d, tmask=%b, PC=0x%0h, cycles=%0d (#%0d)\n",
|
||||
$time, INSTANCE_ID, wis_to_wid(writeback_if.data.wis, ISSUE_ID), writeback_if.data.sid, writeback_if.data.tmask, {writeback_if.data.PC, 1'b0}, timeout_ctr, writeback_if.data.uuid))
|
||||
`endif
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
end else if (writeback_if.valid && writeback_if.ready) begin
|
||||
end else if (writeback_if.valid) begin
|
||||
timeout_ctr <= '0;
|
||||
end
|
||||
end
|
||||
|
|
|
@ -27,6 +27,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_operands_if.slave operands_if,
|
||||
VX_ibuffer_if.slave ibuffer_if [PER_ISSUE_WARPS],
|
||||
VX_scoreboard_if.master scoreboard_if
|
||||
);
|
||||
|
@ -38,7 +39,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
localparam DATAW = UUID_WIDTH + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (REG_IDX_BITS * NUM_OPDS);
|
||||
|
||||
VX_ibuffer_if staging_if [PER_ISSUE_WARPS]();
|
||||
reg [PER_ISSUE_WARPS-1:0] operands_ready;
|
||||
wire [PER_ISSUE_WARPS-1:0] operands_ready;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [PER_ISSUE_WARPS-1:0][NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
|
||||
|
@ -122,14 +123,19 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
for (genvar w = 0; w < PER_ISSUE_WARPS; ++w) begin : g_scoreboard
|
||||
reg [NUM_REGS-1:0] inuse_regs, inuse_regs_n;
|
||||
wire [NUM_OPDS-1:0] operands_busy;
|
||||
reg in_use_warp, in_use_warp_n;
|
||||
|
||||
wire ibuffer_fire = ibuffer_if[w].valid && ibuffer_if[w].ready;
|
||||
wire staging_fire = staging_if[w].valid && staging_if[w].ready;
|
||||
|
||||
wire writeback_fire = writeback_if.valid && writeback_if.ready
|
||||
wire writeback_fire = writeback_if.valid
|
||||
&& (writeback_if.data.wis == ISSUE_WIS_W'(w))
|
||||
&& writeback_if.data.eop;
|
||||
|
||||
wire operands_fire = operands_if.valid && operands_if.ready
|
||||
&& (operands_if.data.wis == ISSUE_WIS_W'(w))
|
||||
&& operands_if.data.eop;
|
||||
|
||||
reg_idx_t [NUM_OPDS-1:0] ibf_opds, stg_opds;
|
||||
assign ibf_opds = {ibuffer_if[w].data.rs3, ibuffer_if[w].data.rs2, ibuffer_if[w].data.rs1, ibuffer_if[w].data.rd};
|
||||
assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd};
|
||||
|
@ -174,6 +180,15 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
in_use_warp_n = in_use_warp;
|
||||
if (operands_fire) begin
|
||||
in_use_warp_n = 0;
|
||||
end else if (staging_fire) begin
|
||||
in_use_warp_n = 1;
|
||||
end
|
||||
end
|
||||
|
||||
wire [REG_TYPES-1:0][31:0] in_use_mask;
|
||||
for (genvar i = 0; i < REG_TYPES; ++i) begin : g_in_use_mask
|
||||
wire [31:0] ibf_reg_mask = ibf_opd_mask[0][i] | ibf_opd_mask[1][i] | ibf_opd_mask[2][i] | ibf_opd_mask[3][i];
|
||||
|
@ -192,15 +207,21 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
assign operands_busy[i] = (in_use_mask[rtype] & stg_opd_mask[i][rtype]) != 0;
|
||||
end
|
||||
|
||||
reg operands_ready_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
inuse_regs <= '0;
|
||||
in_use_warp <= 0;
|
||||
end else begin
|
||||
inuse_regs <= inuse_regs_n;
|
||||
in_use_warp <= in_use_warp_n;
|
||||
end
|
||||
operands_ready[w] <= ~(| regs_busy);
|
||||
operands_ready_r <= ~(| regs_busy);
|
||||
end
|
||||
|
||||
assign operands_ready[w] = operands_ready_r && ~in_use_warp;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
always @(posedge clk) begin
|
||||
if (staging_fire && staging_if[w].data.wb) begin
|
||||
|
|
|
@ -29,18 +29,15 @@ interface VX_writeback_if import VX_gpu_pkg::*; ();
|
|||
|
||||
logic valid;
|
||||
data_t data;
|
||||
logic ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output data,
|
||||
input ready
|
||||
output data
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input data,
|
||||
output ready
|
||||
input data
|
||||
);
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -55,12 +55,10 @@ module VX_mem_scheduler #(
|
|||
input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire core_req_ready,
|
||||
output wire [CORE_QUEUE_ADDRW-1:0] core_req_queue_id,
|
||||
|
||||
// Core request queue
|
||||
output wire req_queue_empty,
|
||||
output wire req_queue_pop,
|
||||
output wire [CORE_QUEUE_ADDRW-1:0] req_queue_id,
|
||||
output wire req_queue_rw_notify,
|
||||
|
||||
// Core response
|
||||
output wire core_rsp_valid,
|
||||
|
@ -189,13 +187,9 @@ module VX_mem_scheduler #(
|
|||
// can accept another request?
|
||||
assign core_req_ready = reqq_ready_in && ibuf_ready;
|
||||
|
||||
// return core queue id
|
||||
assign core_req_queue_id = ibuf_waddr;
|
||||
|
||||
// request qeueue info
|
||||
assign req_queue_pop = reqq_valid && reqq_ready;
|
||||
assign req_queue_rw_notify = reqq_valid && reqq_ready && reqq_rw;
|
||||
assign req_queue_empty = !reqq_valid && ibuf_empty;
|
||||
assign req_queue_id = reqq_tag[CORE_QUEUE_ADDRW-1:0];
|
||||
|
||||
// Index buffer ///////////////////////////////////////////////////////////
|
||||
|
||||
|
|
|
@ -107,7 +107,7 @@ module VX_lsu_mem_arb import VX_gpu_pkg::*; #(
|
|||
wire [NUM_OUTPUTS-1:0][RSP_DATAW-1:0] rsp_data_in;
|
||||
wire [NUM_OUTPUTS-1:0] rsp_ready_in;
|
||||
|
||||
if (NUM_INPUTS > NUM_OUTPUTS) begin : g_rsp_enabled
|
||||
if (NUM_INPUTS > NUM_OUTPUTS) begin : g_rsp_select
|
||||
|
||||
wire [NUM_OUTPUTS-1:0][LOG_NUM_REQS-1:0] rsp_sel_in;
|
||||
|
||||
|
@ -148,7 +148,7 @@ module VX_lsu_mem_arb import VX_gpu_pkg::*; #(
|
|||
.ready_out (rsp_ready_out)
|
||||
);
|
||||
|
||||
end else begin : g_passthru
|
||||
end else begin : g_rsp_arb
|
||||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_rsp_data_in
|
||||
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
|
||||
|
|
|
@ -105,7 +105,7 @@ module VX_mem_arb import VX_gpu_pkg::*; #(
|
|||
wire [NUM_OUTPUTS-1:0][RSP_DATAW-1:0] rsp_data_in;
|
||||
wire [NUM_OUTPUTS-1:0] rsp_ready_in;
|
||||
|
||||
if (NUM_INPUTS > NUM_OUTPUTS) begin : g_rsp_enabled
|
||||
if (NUM_INPUTS > NUM_OUTPUTS) begin : g_rsp_select
|
||||
|
||||
wire [NUM_OUTPUTS-1:0][LOG_NUM_REQS-1:0] rsp_sel_in;
|
||||
|
||||
|
@ -142,7 +142,7 @@ module VX_mem_arb import VX_gpu_pkg::*; #(
|
|||
.ready_out (rsp_ready_out)
|
||||
);
|
||||
|
||||
end else begin : g_passthru
|
||||
end else begin : g_rsp_arb
|
||||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin : g_rsp_data_in
|
||||
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
|
||||
|
|
Loading…
Add table
Reference in a new issue