maxfanout update

This commit is contained in:
Blaise Tine 2024-03-12 01:46:42 -07:00
parent 840ced22a9
commit a8e892593e
5 changed files with 27 additions and 25 deletions

View file

@ -140,21 +140,21 @@
///////////////////////////////////////////////////////////////////////////////
`ifdef QUARTUS
`define MAX_FANOUT 4
`define MAX_FANOUT 8
`define IF_DATA_SIZE(x) $bits(x.data)
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
`define DISABLE_BRAM (* ramstyle = "logic" *)
`define PRESERVE_NET (* preserve *)
`elsif VIVADO
`define MAX_FANOUT 4
`define MAX_FANOUT 8
`define IF_DATA_SIZE(x) $bits(x.data)
`define USE_FAST_BRAM (* ram_style = "distributed" *)
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
`define DISABLE_BRAM (* ram_style = "registers" *)
`define PRESERVE_NET (* keep = "true" *)
`else
`define MAX_FANOUT 4
`define MAX_FANOUT 8
`define IF_DATA_SIZE(x) x.DATA_WIDTH
`define USE_FAST_BRAM
`define NO_RW_RAM_CHECK
@ -186,6 +186,8 @@
`define UP(x) (((x) != 0) ? (x) : 1)
`define CDIV(n,d) ((n + d - 1) / (d))
`define RTRIM(x, s) x[$bits(x)-1:($bits(x)-s)]

View file

@ -39,7 +39,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
localparam FANOUT_ENABLE= (`NUM_THREADS > (MAX_FANOUT + MAX_FANOUT/2));
localparam FANOUT_ENABLE= (`NUM_THREADS > MAX_FANOUT);
localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS);
localparam DATA_REGS_OFF = 0;

View file

@ -33,10 +33,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4) + 1;
`ifdef PERF_ENABLE
reg [`NUM_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_issue_units_per_cycle;
reg [`NUM_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
wire [`NUM_EX_UNITS-1:0] perf_units_per_cycle, perf_units_per_cycle_r;
reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_issue_sfu_per_cycle;
reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
wire [`NUM_SFU_UNITS-1:0] perf_sfu_per_cycle, perf_sfu_per_cycle_r;
wire [`NUM_WARPS-1:0] perf_issue_stalls_per_cycle;
@ -49,7 +49,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
.N (`NUM_WARPS),
.OP ("|")
) perf_units_reduce (
.data_in (perf_issue_units_per_cycle),
.data_in (perf_inuse_units_per_cycle),
.data_out (perf_units_per_cycle)
);
@ -58,13 +58,13 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
.N (`NUM_WARPS),
.OP ("|")
) perf_sfu_reduce (
.data_in (perf_issue_sfu_per_cycle),
.data_in (perf_inuse_sfu_per_cycle),
.data_out (perf_sfu_per_cycle)
);
`BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle);
`BUFFER(perf_units_per_cycle_r, perf_units_per_cycle);
`BUFFER(perf_sfu_per_cycle_r, perf_sfu_per_cycle);
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT));
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT));
always @(posedge clk) begin
if (reset) begin
@ -132,31 +132,31 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
end
always @(*) begin
perf_issue_units_per_cycle[i] = '0;
perf_issue_sfu_per_cycle[i] = '0;
perf_inuse_units_per_cycle[i] = '0;
perf_inuse_sfu_per_cycle[i] = '0;
if (ibuffer_if[i].valid) begin
if (inuse_rd) begin
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rd]] = 1;
perf_inuse_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rd]] = 1;
if (inuse_units[ibuffer_if[i].data.rd] == `EX_SFU) begin
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rd]] = 1;
perf_inuse_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rd]] = 1;
end
end
if (inuse_rs1) begin
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs1]] = 1;
perf_inuse_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs1]] = 1;
if (inuse_units[ibuffer_if[i].data.rs1] == `EX_SFU) begin
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs1]] = 1;
perf_inuse_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs1]] = 1;
end
end
if (inuse_rs2) begin
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs2]] = 1;
perf_inuse_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs2]] = 1;
if (inuse_units[ibuffer_if[i].data.rs2] == `EX_SFU) begin
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs2]] = 1;
perf_inuse_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs2]] = 1;
end
end
if (inuse_rs3) begin
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs3]] = 1;
perf_inuse_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs3]] = 1;
if (inuse_units[ibuffer_if[i].data.rs3] == `EX_SFU) begin
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs3]] = 1;
perf_inuse_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs3]] = 1;
end
end
end

View file

@ -22,7 +22,7 @@ module VX_reset_relay #(
input wire reset,
output wire [N-1:0] reset_o
);
if (MAX_FANOUT >= 0 && N > (MAX_FANOUT + MAX_FANOUT/2)) begin
if (MAX_FANOUT >= 0 && N > MAX_FANOUT) begin
localparam F = `UP(MAX_FANOUT);
localparam R = N / F;
`PRESERVE_NET reg [R-1:0] reset_r;

View file

@ -71,11 +71,11 @@ module VX_stream_arb #(
);
end
end else if (MAX_FANOUT != 0 && (NUM_INPUTS > (MAX_FANOUT + MAX_FANOUT/2))) begin
end else if (MAX_FANOUT != 0 && (NUM_INPUTS > MAX_FANOUT)) begin
// (#inputs > max_fanout) and (#outputs == 1)
localparam NUM_BATCHES = (NUM_INPUTS + MAX_FANOUT - 1) / MAX_FANOUT;
localparam NUM_BATCHES = `CDIV(NUM_INPUTS, MAX_FANOUT);
localparam LOG_NUM_REQS2 = `CLOG2(MAX_FANOUT);
localparam LOG_NUM_REQS3 = `CLOG2(NUM_BATCHES);
@ -232,11 +232,11 @@ module VX_stream_arb #(
end
end
end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > (MAX_FANOUT + MAX_FANOUT/2))) begin
end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > MAX_FANOUT)) begin
// (#inputs == 1) and (#outputs > max_fanout)
localparam NUM_BATCHES = (NUM_OUTPUTS + MAX_FANOUT - 1) / MAX_FANOUT;
localparam NUM_BATCHES = `CDIV(NUM_OUTPUTS, MAX_FANOUT);
wire [NUM_BATCHES-1:0] valid_tmp;
wire [NUM_BATCHES-1:0][DATAW-1:0] data_tmp;