mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
maxfanout update
This commit is contained in:
parent
840ced22a9
commit
a8e892593e
5 changed files with 27 additions and 25 deletions
|
@ -140,21 +140,21 @@
|
|||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef QUARTUS
|
||||
`define MAX_FANOUT 4
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) $bits(x.data)
|
||||
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
|
||||
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
|
||||
`define DISABLE_BRAM (* ramstyle = "logic" *)
|
||||
`define PRESERVE_NET (* preserve *)
|
||||
`elsif VIVADO
|
||||
`define MAX_FANOUT 4
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) $bits(x.data)
|
||||
`define USE_FAST_BRAM (* ram_style = "distributed" *)
|
||||
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
|
||||
`define DISABLE_BRAM (* ram_style = "registers" *)
|
||||
`define PRESERVE_NET (* keep = "true" *)
|
||||
`else
|
||||
`define MAX_FANOUT 4
|
||||
`define MAX_FANOUT 8
|
||||
`define IF_DATA_SIZE(x) x.DATA_WIDTH
|
||||
`define USE_FAST_BRAM
|
||||
`define NO_RW_RAM_CHECK
|
||||
|
@ -186,6 +186,8 @@
|
|||
|
||||
`define UP(x) (((x) != 0) ? (x) : 1)
|
||||
|
||||
`define CDIV(n,d) ((n + d - 1) / (d))
|
||||
|
||||
|
||||
`define RTRIM(x, s) x[$bits(x)-1:($bits(x)-s)]
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
|
|||
localparam ISSUE_W = `LOG2UP(`ISSUE_WIDTH);
|
||||
localparam IN_DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * `NUM_THREADS * `XLEN);
|
||||
localparam OUT_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + 1 + `XLEN + `XLEN + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam FANOUT_ENABLE= (`NUM_THREADS > (MAX_FANOUT + MAX_FANOUT/2));
|
||||
localparam FANOUT_ENABLE= (`NUM_THREADS > MAX_FANOUT);
|
||||
|
||||
localparam DATA_TMASK_OFF = IN_DATAW - (`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS);
|
||||
localparam DATA_REGS_OFF = 0;
|
||||
|
|
|
@ -33,10 +33,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
localparam DATAW = `UUID_WIDTH + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + 1 + `XLEN + (`NR_BITS * 4) + 1;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [`NUM_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_issue_units_per_cycle;
|
||||
reg [`NUM_WARPS-1:0][`NUM_EX_UNITS-1:0] perf_inuse_units_per_cycle;
|
||||
wire [`NUM_EX_UNITS-1:0] perf_units_per_cycle, perf_units_per_cycle_r;
|
||||
|
||||
reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_issue_sfu_per_cycle;
|
||||
reg [`NUM_WARPS-1:0][`NUM_SFU_UNITS-1:0] perf_inuse_sfu_per_cycle;
|
||||
wire [`NUM_SFU_UNITS-1:0] perf_sfu_per_cycle, perf_sfu_per_cycle_r;
|
||||
|
||||
wire [`NUM_WARPS-1:0] perf_issue_stalls_per_cycle;
|
||||
|
@ -49,7 +49,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
.N (`NUM_WARPS),
|
||||
.OP ("|")
|
||||
) perf_units_reduce (
|
||||
.data_in (perf_issue_units_per_cycle),
|
||||
.data_in (perf_inuse_units_per_cycle),
|
||||
.data_out (perf_units_per_cycle)
|
||||
);
|
||||
|
||||
|
@ -58,13 +58,13 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
.N (`NUM_WARPS),
|
||||
.OP ("|")
|
||||
) perf_sfu_reduce (
|
||||
.data_in (perf_issue_sfu_per_cycle),
|
||||
.data_in (perf_inuse_sfu_per_cycle),
|
||||
.data_out (perf_sfu_per_cycle)
|
||||
);
|
||||
|
||||
`BUFFER(perf_stalls_per_cycle_r, perf_stalls_per_cycle);
|
||||
`BUFFER(perf_units_per_cycle_r, perf_units_per_cycle);
|
||||
`BUFFER(perf_sfu_per_cycle_r, perf_sfu_per_cycle);
|
||||
`BUFFER_EX(perf_units_per_cycle_r, perf_units_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT));
|
||||
`BUFFER_EX(perf_sfu_per_cycle_r, perf_sfu_per_cycle, 1'b1, `CDIV(`NUM_WARPS, `MAX_FANOUT));
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -132,31 +132,31 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
end
|
||||
|
||||
always @(*) begin
|
||||
perf_issue_units_per_cycle[i] = '0;
|
||||
perf_issue_sfu_per_cycle[i] = '0;
|
||||
perf_inuse_units_per_cycle[i] = '0;
|
||||
perf_inuse_sfu_per_cycle[i] = '0;
|
||||
if (ibuffer_if[i].valid) begin
|
||||
if (inuse_rd) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rd]] = 1;
|
||||
perf_inuse_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rd]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.rd] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rd]] = 1;
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rd]] = 1;
|
||||
end
|
||||
end
|
||||
if (inuse_rs1) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs1]] = 1;
|
||||
perf_inuse_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs1]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.rs1] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs1]] = 1;
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs1]] = 1;
|
||||
end
|
||||
end
|
||||
if (inuse_rs2) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs2]] = 1;
|
||||
perf_inuse_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs2]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.rs2] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs2]] = 1;
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs2]] = 1;
|
||||
end
|
||||
end
|
||||
if (inuse_rs3) begin
|
||||
perf_issue_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs3]] = 1;
|
||||
perf_inuse_units_per_cycle[i][inuse_units[ibuffer_if[i].data.rs3]] = 1;
|
||||
if (inuse_units[ibuffer_if[i].data.rs3] == `EX_SFU) begin
|
||||
perf_issue_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs3]] = 1;
|
||||
perf_inuse_sfu_per_cycle[i][inuse_sfu[ibuffer_if[i].data.rs3]] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -22,7 +22,7 @@ module VX_reset_relay #(
|
|||
input wire reset,
|
||||
output wire [N-1:0] reset_o
|
||||
);
|
||||
if (MAX_FANOUT >= 0 && N > (MAX_FANOUT + MAX_FANOUT/2)) begin
|
||||
if (MAX_FANOUT >= 0 && N > MAX_FANOUT) begin
|
||||
localparam F = `UP(MAX_FANOUT);
|
||||
localparam R = N / F;
|
||||
`PRESERVE_NET reg [R-1:0] reset_r;
|
||||
|
|
|
@ -71,11 +71,11 @@ module VX_stream_arb #(
|
|||
);
|
||||
end
|
||||
|
||||
end else if (MAX_FANOUT != 0 && (NUM_INPUTS > (MAX_FANOUT + MAX_FANOUT/2))) begin
|
||||
end else if (MAX_FANOUT != 0 && (NUM_INPUTS > MAX_FANOUT)) begin
|
||||
|
||||
// (#inputs > max_fanout) and (#outputs == 1)
|
||||
|
||||
localparam NUM_BATCHES = (NUM_INPUTS + MAX_FANOUT - 1) / MAX_FANOUT;
|
||||
localparam NUM_BATCHES = `CDIV(NUM_INPUTS, MAX_FANOUT);
|
||||
localparam LOG_NUM_REQS2 = `CLOG2(MAX_FANOUT);
|
||||
localparam LOG_NUM_REQS3 = `CLOG2(NUM_BATCHES);
|
||||
|
||||
|
@ -232,11 +232,11 @@ module VX_stream_arb #(
|
|||
end
|
||||
end
|
||||
|
||||
end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > (MAX_FANOUT + MAX_FANOUT/2))) begin
|
||||
end else if (MAX_FANOUT != 0 && (NUM_OUTPUTS > MAX_FANOUT)) begin
|
||||
|
||||
// (#inputs == 1) and (#outputs > max_fanout)
|
||||
|
||||
localparam NUM_BATCHES = (NUM_OUTPUTS + MAX_FANOUT - 1) / MAX_FANOUT;
|
||||
localparam NUM_BATCHES = `CDIV(NUM_OUTPUTS, MAX_FANOUT);
|
||||
|
||||
wire [NUM_BATCHES-1:0] valid_tmp;
|
||||
wire [NUM_BATCHES-1:0][DATAW-1:0] data_tmp;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue