mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
per-warp ibuffer scoreboard scheduling optimization
This commit is contained in:
parent
a78ac7a246
commit
bb4c150aaf
10 changed files with 173 additions and 92 deletions
|
@ -269,7 +269,7 @@
|
|||
|
||||
// Size of Instruction Buffer
|
||||
`ifndef IBUF_SIZE
|
||||
`define IBUF_SIZE (2 * (`NUM_WARPS / `ISSUE_WIDTH))
|
||||
`define IBUF_SIZE 4
|
||||
`endif
|
||||
|
||||
// Size of LSU Request Queue
|
||||
|
|
|
@ -67,25 +67,23 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
|
||||
`ifndef L1_ENABLE
|
||||
// Ensure that the ibuffer doesn't fill up.
|
||||
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
|
||||
// This issue is particularly prevalent when the icache and dcache is disabled and both requests share the same bus.
|
||||
wire [ISSUE_ISW-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid);
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache requests.
|
||||
// This issue is particularly prevalent when the icache and dcache are disabled and both requests share the same bus.
|
||||
wire [`NUM_WARPS-1:0] pending_ibuf_full;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
VX_pending_size #(
|
||||
.SIZE (`IBUF_SIZE)
|
||||
) pending_reads (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (icache_req_fire && schedule_isw == i),
|
||||
.incr (icache_req_fire && schedule_if.data.wid == i),
|
||||
.decr (fetch_if.ibuf_pop[i]),
|
||||
.full (pending_ibuf_full[i]),
|
||||
`UNUSED_PIN (size),
|
||||
`UNUSED_PIN (empty)
|
||||
);
|
||||
end
|
||||
wire ibuf_ready = ~pending_ibuf_full[schedule_isw];
|
||||
wire ibuf_ready = ~pending_ibuf_full[schedule_if.data.wid];
|
||||
`else
|
||||
wire ibuf_ready = 1'b1;
|
||||
`endif
|
||||
|
|
|
@ -23,20 +23,16 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
|||
VX_decode_if.slave decode_if,
|
||||
|
||||
// outputs
|
||||
VX_ibuffer_if.master ibuffer_if [`ISSUE_WIDTH]
|
||||
VX_ibuffer_if.master ibuffer_if [`NUM_WARPS]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam ISW_WIDTH = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4);
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `XLEN + 1 + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4);
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] ibuf_ready_in;
|
||||
|
||||
wire [ISW_WIDTH-1:0] decode_isw = wid_to_isw(decode_if.data.wid);
|
||||
wire [ISSUE_WIS_W-1:0] decode_wis = wid_to_wis(decode_if.data.wid);
|
||||
wire [`NUM_WARPS-1:0] ibuf_ready_in;
|
||||
|
||||
assign decode_if.ready = ibuf_ready_in[decode_isw];
|
||||
assign decode_if.ready = ibuf_ready_in[decode_if.data.wid];
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (`IBUF_SIZE),
|
||||
|
@ -44,11 +40,10 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
|||
) instr_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (decode_if.valid && decode_isw == i),
|
||||
.valid_in (decode_if.valid && decode_if.data.wid == i),
|
||||
.ready_in (ibuf_ready_in[i]),
|
||||
.data_in ({
|
||||
decode_if.data.uuid,
|
||||
decode_wis,
|
||||
decode_if.data.tmask,
|
||||
decode_if.data.ex_type,
|
||||
decode_if.data.op_type,
|
||||
|
@ -63,7 +58,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
|||
decode_if.data.rs2,
|
||||
decode_if.data.rs3}),
|
||||
.data_out(ibuffer_if[i].data),
|
||||
.valid_out (ibuffer_if[i].valid),
|
||||
.valid_out(ibuffer_if[i].valid),
|
||||
.ready_out(ibuffer_if[i].ready)
|
||||
);
|
||||
`ifndef L1_ENABLE
|
||||
|
|
|
@ -36,8 +36,8 @@ module VX_issue #(
|
|||
`endif
|
||||
VX_dispatch_if.master sfu_dispatch_if [`ISSUE_WIDTH]
|
||||
);
|
||||
VX_ibuffer_if ibuffer_if [`ISSUE_WIDTH]();
|
||||
VX_ibuffer_if scoreboard_if [`ISSUE_WIDTH]();
|
||||
VX_ibuffer_if ibuffer_if [`NUM_WARPS]();
|
||||
VX_scoreboard_if scoreboard_if [`ISSUE_WIDTH]();
|
||||
VX_operands_if operands_if [`ISSUE_WIDTH]();
|
||||
|
||||
`RESET_RELAY (ibuf_reset, reset);
|
||||
|
|
|
@ -21,7 +21,7 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
input wire reset,
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.slave scoreboard_if [`ISSUE_WIDTH],
|
||||
VX_scoreboard_if.slave scoreboard_if [`ISSUE_WIDTH],
|
||||
VX_operands_if.master operands_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
|
|
@ -26,27 +26,27 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
|
||||
VX_writeback_if.slave writeback_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.slave ibuffer_if [`ISSUE_WIDTH],
|
||||
VX_ibuffer_if.master scoreboard_if [`ISSUE_WIDTH]
|
||||
VX_ibuffer_if.slave ibuffer_if [`NUM_WARPS],
|
||||
VX_scoreboard_if.master scoreboard_if [`ISSUE_WIDTH]
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4) + 1;
|
||||
|
||||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4) + 1;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`ISSUE_WIDTH-1:0][`NUM_EX_UNITS-1:0] perf_issue_units_per_cycle;
|
||||
reg [`NUM_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_issue_units_per_cycle;
|
||||
wire [`NUM_EX_UNITS-1:0] perf_units_per_cycle, perf_units_per_cycle_r;
|
||||
|
||||
reg [`ISSUE_WIDTH-1:0][`NUM_SFU_UNITS-1:0] perf_issue_sfu_per_cycle;
|
||||
reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_issue_sfu_per_cycle;
|
||||
wire [`NUM_SFU_UNITS-1:0] perf_sfu_per_cycle, perf_sfu_per_cycle_r;
|
||||
|
||||
wire [`ISSUE_WIDTH-1:0] perf_issue_stalls_per_cycle;
|
||||
wire [`CLOG2(`ISSUE_WIDTH+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
|
||||
wire [`NUM_WARPS-1:0] perf_issue_stalls_per_cycle;
|
||||
wire [`CLOG2(`NUM_WARPS+1)-1:0] perf_stalls_per_cycle, perf_stalls_per_cycle_r;
|
||||
|
||||
`POP_COUNT(perf_stalls_per_cycle, perf_issue_stalls_per_cycle);
|
||||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_EX_UNITS),
|
||||
.N (`ISSUE_WIDTH),
|
||||
.N (`NUM_WARPS),
|
||||
.OP ("|")
|
||||
) perf_units_reduce (
|
||||
.data_in (perf_issue_units_per_cycle),
|
||||
|
@ -55,7 +55,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_reduce #(
|
||||
.DATAW_IN (`NUM_SFU_UNITS),
|
||||
.N (`ISSUE_WIDTH),
|
||||
.N (`NUM_WARPS),
|
||||
.OP ("|")
|
||||
) perf_sfu_reduce (
|
||||
.data_in (perf_issue_sfu_per_cycle),
|
||||
|
@ -93,21 +93,34 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
reg [`UP(ISSUE_RATIO)-1:0][`NUM_REGS-1:0] inuse_regs;
|
||||
`RESET_RELAY (arb_reset, reset);
|
||||
|
||||
wire writeback_fire = writeback_if[i].valid && writeback_if[i].data.eop;
|
||||
reg [`NUM_WARPS-1:0] operands_ready;
|
||||
|
||||
wire inuse_rd = inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd];
|
||||
wire inuse_rs1 = inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1];
|
||||
wire inuse_rs2 = inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2];
|
||||
wire inuse_rs3 = inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3];
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
reg [`NUM_REGS-1:0] inuse_regs;
|
||||
reg [3:0] operands_busy;
|
||||
|
||||
localparam iw = i % `ISSUE_WIDTH;
|
||||
localparam wis = i / `ISSUE_WIDTH;
|
||||
|
||||
wire writeback_fire = writeback_if[iw].valid
|
||||
&& (writeback_if[iw].data.wis == ISSUE_WIS_W'(wis))
|
||||
&& writeback_if[iw].data.eop;
|
||||
|
||||
wire inuse_rd = inuse_regs[ibuffer_if[i].data.rd];
|
||||
wire inuse_rs1 = inuse_regs[ibuffer_if[i].data.rs1];
|
||||
wire inuse_rs2 = inuse_regs[ibuffer_if[i].data.rs2];
|
||||
wire inuse_rs3 = inuse_regs[ibuffer_if[i].data.rs3];
|
||||
|
||||
wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
||||
assign operands_ready[i] = ~(| operands_busy);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`UP(ISSUE_RATIO)-1:0][`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units;
|
||||
reg [`UP(ISSUE_RATIO)-1:0][`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu;
|
||||
reg [`NUM_REGS-1:0][`EX_WIDTH-1:0] inuse_units;
|
||||
reg [`NUM_REGS-1:0][`SFU_WIDTH-1:0] inuse_sfu;
|
||||
|
||||
reg [`SFU_WIDTH-1:0] sfu_type;
|
||||
always @(*) begin
|
||||
|
@ -119,32 +132,32 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
endcase
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
always @(*) begin
|
||||
perf_issue_units_per_cycle[i] = '0;
|
||||
perf_issue_sfu_per_cycle[i] = '0;
|
||||
if (ibuffer_if[i].valid) begin
|
||||
if (inuse_rd) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd]] = 1;
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rd]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.rd] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rd]] = 1;
|
||||
end
|
||||
end
|
||||
if (inuse_rs1) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs1]] = 1;
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs1]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.rs1] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs1]] = 1;
|
||||
end
|
||||
end
|
||||
if (inuse_rs2) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs2]] = 1;
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs2]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.rs2] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs2]] = 1;
|
||||
end
|
||||
end
|
||||
if (inuse_rs3) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rs3]] = 1;
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs3]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.rs3] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs3]] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -152,43 +165,23 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
assign perf_issue_stalls_per_cycle[i] = ibuffer_if[i].valid && ~ibuffer_if[i].ready;
|
||||
`endif
|
||||
|
||||
wire [3:0] operands_busy = {inuse_rd, inuse_rs1, inuse_rs2, inuse_rs3};
|
||||
wire operands_ready = ~(| operands_busy);
|
||||
|
||||
wire stg_valid_in, stg_ready_in;
|
||||
assign stg_valid_in = ibuffer_if[i].valid && operands_ready;
|
||||
assign ibuffer_if[i].ready = stg_ready_in && operands_ready;
|
||||
|
||||
VX_stream_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) staging_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (stg_valid_in),
|
||||
.data_in (ibuffer_if[i].data),
|
||||
.ready_in (stg_ready_in),
|
||||
.valid_out (scoreboard_if[i].valid),
|
||||
.data_out (scoreboard_if[i].data),
|
||||
.ready_out (scoreboard_if[i].ready)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
inuse_regs <= '0;
|
||||
end else begin
|
||||
end else begin
|
||||
if (writeback_fire) begin
|
||||
inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] <= 0;
|
||||
inuse_regs[writeback_if[iw].data.rd] <= 0;
|
||||
end
|
||||
if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin
|
||||
inuse_regs[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= 1;
|
||||
inuse_regs[ibuffer_if[i].data.rd] <= 1;
|
||||
end
|
||||
end
|
||||
`ifdef PERF_ENABLE
|
||||
if (ibuffer_if[i].valid && ibuffer_if[i].ready && ibuffer_if[i].data.wb) begin
|
||||
inuse_units[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= ibuffer_if[i].data.ex_type;
|
||||
inuse_units[ibuffer_if[i].data.rd] <= ibuffer_if[i].data.ex_type;
|
||||
if (ibuffer_if[i].data.ex_type == `EX_SFU) begin
|
||||
inuse_sfu[ibuffer_if[i].data.wis][ibuffer_if[i].data.rd] <= sfu_type;
|
||||
end
|
||||
inuse_sfu[ibuffer_if[i].data.rd] <= sfu_type;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
@ -203,7 +196,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
if (ibuffer_if[i].valid && ~ibuffer_if[i].ready) begin
|
||||
`ifdef DBG_TRACE_CORE_PIPELINE
|
||||
`TRACE(3, ("%d: *** core%0d-scoreboard-stall: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)\n",
|
||||
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||
$time, CORE_ID, i, ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||
operands_busy, ibuffer_if[i].data.uuid));
|
||||
`endif
|
||||
timeout_ctr <= timeout_ctr + 1;
|
||||
|
@ -215,14 +208,58 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
|
||||
`RUNTIME_ASSERT((timeout_ctr < `STALL_TIMEOUT),
|
||||
("%t: *** core%0d-scoreboard-timeout: wid=%0d, PC=0x%0h, tmask=%b, cycles=%0d, inuse=%b (#%0d)",
|
||||
$time, CORE_ID, wis_to_wid(ibuffer_if[i].data.wis, i), ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||
$time, CORE_ID, i, ibuffer_if[i].data.PC, ibuffer_if[i].data.tmask, timeout_ctr,
|
||||
operands_busy, ibuffer_if[i].data.uuid));
|
||||
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[i].data.wis][writeback_if[i].data.rd] != 0,
|
||||
`RUNTIME_ASSERT(~writeback_fire || inuse_regs[writeback_if[iw].data.rd] != 0,
|
||||
("%t: *** core%0d: invalid writeback register: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d (#%0d)",
|
||||
$time, CORE_ID, wis_to_wid(writeback_if[i].data.wis, i), writeback_if[i].data.PC, writeback_if[i].data.tmask, writeback_if[i].data.rd, writeback_if[i].data.uuid));
|
||||
$time, CORE_ID, i, writeback_if[iw].data.PC, writeback_if[iw].data.tmask, writeback_if[iw].data.rd, writeback_if[iw].data.uuid));
|
||||
`endif
|
||||
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
|
||||
wire [ISSUE_RATIO-1:0] valid_in;
|
||||
wire [ISSUE_RATIO-1:0][DATAW-1:0] data_in;
|
||||
wire [ISSUE_RATIO-1:0] ready_in;
|
||||
|
||||
for (genvar j = 0; j < ISSUE_RATIO; ++j) begin
|
||||
assign valid_in[j] = ibuffer_if[j * `ISSUE_WIDTH + i].valid && operands_ready[j * `ISSUE_WIDTH + i];
|
||||
assign data_in[j] = ibuffer_if[j * `ISSUE_WIDTH + i].data;
|
||||
assign ibuffer_if[j * `ISSUE_WIDTH + i].ready = ready_in[j] && operands_ready[j * `ISSUE_WIDTH + i];
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (ISSUE_RATIO),
|
||||
.DATAW (DATAW),
|
||||
.ARBITER ("F"),
|
||||
.OUT_REG (2)
|
||||
) out_arb (
|
||||
.clk (clk),
|
||||
.reset (arb_reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.data_in (data_in),
|
||||
.data_out ({
|
||||
scoreboard_if[i].data.uuid,
|
||||
scoreboard_if[i].data.tmask,
|
||||
scoreboard_if[i].data.ex_type,
|
||||
scoreboard_if[i].data.op_type,
|
||||
scoreboard_if[i].data.op_mod,
|
||||
scoreboard_if[i].data.wb,
|
||||
scoreboard_if[i].data.use_PC,
|
||||
scoreboard_if[i].data.use_imm,
|
||||
scoreboard_if[i].data.PC,
|
||||
scoreboard_if[i].data.imm,
|
||||
scoreboard_if[i].data.rd,
|
||||
scoreboard_if[i].data.rs1,
|
||||
scoreboard_if[i].data.rs2,
|
||||
scoreboard_if[i].data.rs3
|
||||
}),
|
||||
.valid_out (scoreboard_if[i].valid),
|
||||
.ready_out (scoreboard_if[i].ready),
|
||||
.sel_out (scoreboard_if[i].data.wis)
|
||||
);
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -36,8 +36,8 @@ interface VX_decode_if ();
|
|||
logic valid;
|
||||
data_t data;
|
||||
logic ready;
|
||||
`ifndef L1_ENABLE
|
||||
logic [`ISSUE_WIDTH-1:0] ibuf_pop;
|
||||
`ifndef L1_ENABLE
|
||||
wire [`NUM_WARPS-1:0] ibuf_pop;
|
||||
`endif
|
||||
|
||||
modport master (
|
||||
|
|
|
@ -26,8 +26,8 @@ interface VX_fetch_if ();
|
|||
logic valid;
|
||||
data_t data;
|
||||
logic ready;
|
||||
`ifndef L1_ENABLE
|
||||
logic [`ISSUE_WIDTH-1:0] ibuf_pop;
|
||||
`ifndef L1_ENABLE
|
||||
logic [`NUM_WARPS-1:0] ibuf_pop;
|
||||
`endif
|
||||
|
||||
modport master (
|
||||
|
|
|
@ -17,7 +17,6 @@ interface VX_ibuffer_if import VX_gpu_pkg::*; ();
|
|||
|
||||
typedef struct packed {
|
||||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [ISSUE_WIS_W-1:0] wis;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`EX_BITS-1:0] ex_type;
|
||||
logic [`INST_OP_BITS-1:0] op_type;
|
||||
|
|
52
hw/rtl/interfaces/VX_scoreboard_if.sv
Normal file
52
hw/rtl/interfaces/VX_scoreboard_if.sv
Normal file
|
@ -0,0 +1,52 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_scoreboard_if import VX_gpu_pkg::*; ();
|
||||
|
||||
typedef struct packed {
|
||||
logic [`UUID_WIDTH-1:0] uuid;
|
||||
logic [ISSUE_WIS_W-1:0] wis;
|
||||
logic [`NUM_THREADS-1:0] tmask;
|
||||
logic [`EX_BITS-1:0] ex_type;
|
||||
logic [`INST_OP_BITS-1:0] op_type;
|
||||
logic [`INST_MOD_BITS-1:0] op_mod;
|
||||
logic wb;
|
||||
logic use_PC;
|
||||
logic use_imm;
|
||||
logic [`XLEN-1:0] PC;
|
||||
logic [`XLEN-1:0] imm;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic [`NR_BITS-1:0] rs1;
|
||||
logic [`NR_BITS-1:0] rs2;
|
||||
logic [`NR_BITS-1:0] rs3;
|
||||
} data_t;
|
||||
|
||||
logic valid;
|
||||
data_t data;
|
||||
logic ready;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
output data,
|
||||
input ready
|
||||
);
|
||||
|
||||
modport slave (
|
||||
input valid,
|
||||
input data,
|
||||
output ready
|
||||
);
|
||||
|
||||
endinterface
|
Loading…
Add table
Add a link
Reference in a new issue