timing optimizations

This commit is contained in:
tinebp 2025-01-11 03:19:55 -08:00
parent 84b1c8a43c
commit 083cf04afd
2 changed files with 18 additions and 22 deletions

View file

@ -68,8 +68,6 @@ module VX_schedule import VX_gpu_pkg::*; #(
reg [`PERF_CTR_BITS-1:0] cycles; reg [`PERF_CTR_BITS-1:0] cycles;
reg [`NUM_WARPS-1:0][`UUID_WIDTH-1:0] issued_instrs;
wire schedule_fire = schedule_valid && schedule_ready; wire schedule_fire = schedule_valid && schedule_ready;
wire schedule_if_fire = schedule_if.valid && schedule_if.ready; wire schedule_if_fire = schedule_if.valid && schedule_if.ready;
@ -113,6 +111,16 @@ module VX_schedule import VX_gpu_pkg::*; #(
barrier_stalls_n= barrier_stalls; barrier_stalls_n= barrier_stalls;
warp_pcs_n = warp_pcs; warp_pcs_n = warp_pcs;
// decode unlock
if (decode_sched_if.valid && decode_sched_if.unlock) begin
stalled_warps_n[decode_sched_if.wid] = 0;
end
// CSR unlock
if (sched_csr_if.unlock_warp) begin
stalled_warps_n[sched_csr_if.unlock_wid] = 0;
end
// wspawn handling // wspawn handling
if (wspawn.valid && is_single_warp) begin if (wspawn.valid && is_single_warp) begin
active_warps_n |= wspawn.wmask; active_warps_n |= wspawn.wmask;
@ -170,6 +178,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
end end
end end
`ifdef GBAR_ENABLE `ifdef GBAR_ENABLE
if (gbar_bus_if.rsp_valid && (gbar_req_id == gbar_bus_if.rsp_data.id)) begin if (gbar_bus_if.rsp_valid && (gbar_req_id == gbar_bus_if.rsp_data.id)) begin
barrier_ctrs_n[warp_ctl_if.barrier.id] = '0; // reset barrier counter barrier_ctrs_n[warp_ctl_if.barrier.id] = '0; // reset barrier counter
@ -188,16 +197,6 @@ module VX_schedule import VX_gpu_pkg::*; #(
end end
end end
// decode unlock
if (decode_sched_if.valid && decode_sched_if.unlock) begin
stalled_warps_n[decode_sched_if.wid] = 0;
end
// CSR unlock
if (sched_csr_if.unlock_warp) begin
stalled_warps_n[sched_csr_if.unlock_wid] = 0;
end
// stall the warp until decode stage // stall the warp until decode stage
if (schedule_fire) begin if (schedule_fire) begin
stalled_warps_n[schedule_wid] = 1; stalled_warps_n[schedule_wid] = 1;
@ -223,7 +222,6 @@ module VX_schedule import VX_gpu_pkg::*; #(
active_warps <= '0; active_warps <= '0;
thread_masks <= '0; thread_masks <= '0;
barrier_stalls <= '0; barrier_stalls <= '0;
issued_instrs <= '0;
cycles <= '0; cycles <= '0;
wspawn.valid <= 0; wspawn.valid <= 0;
@ -268,10 +266,6 @@ module VX_schedule import VX_gpu_pkg::*; #(
end end
`endif `endif
if (schedule_if_fire) begin
issued_instrs[schedule_if.data.wid] <= issued_instrs[schedule_if.data.wid] + `UUID_WIDTH'(1);
end
if (busy) begin if (busy) begin
cycles <= cycles + 1; cycles <= cycles + 1;
end end

View file

@ -151,11 +151,14 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
end end
`endif `endif
always @(*) begin for (genvar i = 0; i < NUM_OPDS; ++i) begin : g_operands_busy_n
for (integer i = 0; i < NUM_OPDS; ++i) begin always @(*) begin
operands_busy_n[i] = operands_busy[i]; operands_busy_n[i] = operands_busy[i];
if (ibuffer_fire) begin if (ibuffer_fire) begin
operands_busy_n[i] = inuse_regs[ibuf_opds[i]]; operands_busy_n[i] = inuse_regs[ibuf_opds[i]];
if (staging_fire && staging_if[w].data.wb && staging_if[w].data.rd == ibuf_opds[i]) begin
operands_busy_n[i] = 1;
end
end end
if (writeback_fire) begin if (writeback_fire) begin
if (ibuffer_fire) begin if (ibuffer_fire) begin
@ -168,9 +171,6 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
end end
end end
end end
if (staging_fire && staging_if[w].data.wb && staging_if[w].data.rd == ibuf_opds[i]) begin
operands_busy_n[i] = 1;
end
end end
end end
@ -185,8 +185,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
inuse_regs[staging_if[w].data.rd] <= 1; inuse_regs[staging_if[w].data.rd] <= 1;
end end
end end
operands_busy <= operands_busy_n; operands_busy <= operands_busy_n;
operands_ready[w] <= ~(| operands_busy_n); operands_ready[w] <= ~(| operands_busy_n);
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
if (staging_fire && staging_if[w].data.wb) begin if (staging_fire && staging_if[w].data.wb) begin
inuse_units[staging_if[w].data.rd] <= staging_if[w].data.ex_type; inuse_units[staging_if[w].data.rd] <= staging_if[w].data.ex_type;