timing optimizations

This commit is contained in:
tinebp 2025-01-11 03:19:55 -08:00
parent 84b1c8a43c
commit 083cf04afd
2 changed files with 18 additions and 22 deletions

View file

@ -68,8 +68,6 @@ module VX_schedule import VX_gpu_pkg::*; #(
reg [`PERF_CTR_BITS-1:0] cycles;
reg [`NUM_WARPS-1:0][`UUID_WIDTH-1:0] issued_instrs;
wire schedule_fire = schedule_valid && schedule_ready;
wire schedule_if_fire = schedule_if.valid && schedule_if.ready;
@ -113,6 +111,16 @@ module VX_schedule import VX_gpu_pkg::*; #(
barrier_stalls_n= barrier_stalls;
warp_pcs_n = warp_pcs;
// decode unlock
if (decode_sched_if.valid && decode_sched_if.unlock) begin
stalled_warps_n[decode_sched_if.wid] = 0;
end
// CSR unlock
if (sched_csr_if.unlock_warp) begin
stalled_warps_n[sched_csr_if.unlock_wid] = 0;
end
// wspawn handling
if (wspawn.valid && is_single_warp) begin
active_warps_n |= wspawn.wmask;
@ -170,6 +178,7 @@ module VX_schedule import VX_gpu_pkg::*; #(
stalled_warps_n[warp_ctl_if.wid] = 0; // unlock warp
end
end
`ifdef GBAR_ENABLE
if (gbar_bus_if.rsp_valid && (gbar_req_id == gbar_bus_if.rsp_data.id)) begin
barrier_ctrs_n[warp_ctl_if.barrier.id] = '0; // reset barrier counter
@ -188,16 +197,6 @@ module VX_schedule import VX_gpu_pkg::*; #(
end
end
// decode unlock
if (decode_sched_if.valid && decode_sched_if.unlock) begin
stalled_warps_n[decode_sched_if.wid] = 0;
end
// CSR unlock
if (sched_csr_if.unlock_warp) begin
stalled_warps_n[sched_csr_if.unlock_wid] = 0;
end
// stall the warp until decode stage
if (schedule_fire) begin
stalled_warps_n[schedule_wid] = 1;
@ -223,7 +222,6 @@ module VX_schedule import VX_gpu_pkg::*; #(
active_warps <= '0;
thread_masks <= '0;
barrier_stalls <= '0;
issued_instrs <= '0;
cycles <= '0;
wspawn.valid <= 0;
@ -268,10 +266,6 @@ module VX_schedule import VX_gpu_pkg::*; #(
end
`endif
if (schedule_if_fire) begin
issued_instrs[schedule_if.data.wid] <= issued_instrs[schedule_if.data.wid] + `UUID_WIDTH'(1);
end
if (busy) begin
cycles <= cycles + 1;
end

View file

@ -151,11 +151,14 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
end
`endif
always @(*) begin
for (integer i = 0; i < NUM_OPDS; ++i) begin
for (genvar i = 0; i < NUM_OPDS; ++i) begin : g_operands_busy_n
always @(*) begin
operands_busy_n[i] = operands_busy[i];
if (ibuffer_fire) begin
operands_busy_n[i] = inuse_regs[ibuf_opds[i]];
if (staging_fire && staging_if[w].data.wb && staging_if[w].data.rd == ibuf_opds[i]) begin
operands_busy_n[i] = 1;
end
end
if (writeback_fire) begin
if (ibuffer_fire) begin
@ -168,9 +171,6 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
end
end
end
if (staging_fire && staging_if[w].data.wb && staging_if[w].data.rd == ibuf_opds[i]) begin
operands_busy_n[i] = 1;
end
end
end
@ -185,8 +185,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
inuse_regs[staging_if[w].data.rd] <= 1;
end
end
operands_busy <= operands_busy_n;
operands_ready[w] <= ~(| operands_busy_n);
`ifdef PERF_ENABLE
if (staging_fire && staging_if[w].data.wb) begin
inuse_units[staging_if[w].data.rd] <= staging_if[w].data.ex_type;