minor update

This commit is contained in:
Blaise Tine 2024-07-30 17:55:21 -07:00
parent 029609b3fd
commit e1c5b5277e
2 changed files with 49 additions and 51 deletions

View file

@ -43,8 +43,8 @@ module VX_operands import VX_gpu_pkg::*; #(
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam PER_BANK_REGS = `NUM_REGS / NUM_BANKS;
localparam METADATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS;
localparam DATAW = `UUID_WIDTH + METADATAW + 3 * `NUM_THREADS * `XLEN;
localparam META_DATAW = ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS + `UUID_WIDTH;
localparam DATAW = META_DATAW + 3 * `NUM_THREADS * `XLEN;
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * PER_ISSUE_WARPS);
localparam PER_BANK_ADDRW = RAM_ADDRW - BANK_SEL_BITS;
localparam XLEN_SIZE = `XLEN / 8;
@ -69,8 +69,7 @@ module VX_operands import VX_gpu_pkg::*; #(
wire pipe_in_ready;
reg pipe_out_valid;
wire pipe_out_ready;
reg [`UUID_WIDTH-1:0] pipe_out_uuid;
reg [METADATAW-1:0] pipe_out_data;
reg [META_DATAW-1:0] pipe_out_data;
reg [NUM_SRC_REGS-1:0][`NUM_THREADS-1:0][`XLEN-1:0] src_data, src_data_n;
reg [NUM_SRC_REGS-1:0] data_fetched;
@ -174,7 +173,6 @@ module VX_operands import VX_gpu_pkg::*; #(
end
end
if (~pipe_stall) begin
pipe_out_uuid <= scoreboard_if.data.uuid;
pipe_out_data <= {
scoreboard_if.data.wis,
scoreboard_if.data.tmask,
@ -183,7 +181,8 @@ module VX_operands import VX_gpu_pkg::*; #(
scoreboard_if.data.ex_type,
scoreboard_if.data.op_type,
scoreboard_if.data.op_args,
scoreboard_if.data.rd
scoreboard_if.data.rd,
scoreboard_if.data.uuid
};
has_collision <= has_collision_n;
gpr_rd_addr <= gpr_rd_addr_n;
@ -205,14 +204,12 @@ module VX_operands import VX_gpu_pkg::*; #(
.valid_in (stg_in_valid),
.ready_in (stg_in_ready),
.data_in ({
pipe_out_uuid,
pipe_out_data,
src_data_n[0],
src_data_n[1],
src_data_n[2]
}),
.data_out ({
operands_if.data.uuid,
operands_if.data.wis,
operands_if.data.tmask,
operands_if.data.PC,
@ -221,6 +218,7 @@ module VX_operands import VX_gpu_pkg::*; #(
operands_if.data.op_type,
operands_if.data.op_args,
operands_if.data.rd,
operands_if.data.uuid,
operands_if.data.rs1_data,
operands_if.data.rs2_data,
operands_if.data.rs3_data

View file

@ -46,14 +46,14 @@ module VX_stream_arb #(
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
localparam BATCH_BEGIN = i * NUM_REQS;
localparam BATCH_END = `MIN(BATCH_BEGIN + NUM_REQS, NUM_INPUTS);
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
localparam SLICE_BEGIN = i * NUM_REQS;
localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_INPUTS);
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
`RESET_RELAY (slice_reset, reset);
VX_stream_arb #(
.NUM_INPUTS (BATCH_SIZE),
.NUM_INPUTS (SLICE_SIZE),
.NUM_OUTPUTS (1),
.DATAW (DATAW),
.ARBITER (ARBITER),
@ -63,9 +63,9 @@ module VX_stream_arb #(
) arb_slice (
.clk (clk),
.reset (slice_reset),
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]),
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]),
.valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]),
.ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]),
.data_in (data_in[SLICE_END-1: SLICE_BEGIN]),
.data_out (data_out[i]),
.sel_out (sel_out[i]),
.valid_out (valid_out[i]),
@ -77,28 +77,28 @@ module VX_stream_arb #(
// (#inputs > max_fanout) and (#outputs == 1)
localparam NUM_BATCHES = `CDIV(NUM_INPUTS, MAX_FANOUT);
localparam NUM_SLICES = `CDIV(NUM_INPUTS, MAX_FANOUT);
localparam LOG_NUM_REQS2 = `CLOG2(MAX_FANOUT);
localparam LOG_NUM_REQS3 = `CLOG2(NUM_BATCHES);
localparam LOG_NUM_REQS3 = `CLOG2(NUM_SLICES);
wire [NUM_BATCHES-1:0] valid_tmp;
wire [NUM_BATCHES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
wire [NUM_BATCHES-1:0] ready_tmp;
wire [NUM_SLICES-1:0] valid_tmp;
wire [NUM_SLICES-1:0][DATAW+LOG_NUM_REQS2-1:0] data_tmp;
wire [NUM_SLICES-1:0] ready_tmp;
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
for (genvar i = 0; i < NUM_SLICES; ++i) begin
localparam BATCH_BEGIN = i * MAX_FANOUT;
localparam BATCH_END = `MIN(BATCH_BEGIN + MAX_FANOUT, NUM_INPUTS);
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
localparam SLICE_BEGIN = i * MAX_FANOUT;
localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_INPUTS);
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
wire [DATAW-1:0] data_tmp_u;
wire [`LOG2UP(BATCH_SIZE)-1:0] sel_tmp_u;
wire [`LOG2UP(SLICE_SIZE)-1:0] sel_tmp_u;
`RESET_RELAY (slice_reset, reset);
if (MAX_FANOUT != 1) begin
VX_stream_arb #(
.NUM_INPUTS (BATCH_SIZE),
.NUM_INPUTS (SLICE_SIZE),
.NUM_OUTPUTS (1),
.DATAW (DATAW),
.ARBITER (ARBITER),
@ -108,9 +108,9 @@ module VX_stream_arb #(
) fanout_slice_arb (
.clk (clk),
.reset (slice_reset),
.valid_in (valid_in[BATCH_END-1: BATCH_BEGIN]),
.data_in (data_in[BATCH_END-1: BATCH_BEGIN]),
.ready_in (ready_in[BATCH_END-1: BATCH_BEGIN]),
.valid_in (valid_in[SLICE_END-1: SLICE_BEGIN]),
.data_in (data_in[SLICE_END-1: SLICE_BEGIN]),
.ready_in (ready_in[SLICE_END-1: SLICE_BEGIN]),
.valid_out (valid_tmp[i]),
.data_out (data_tmp_u),
.sel_out (sel_tmp_u),
@ -125,7 +125,7 @@ module VX_stream_arb #(
wire [LOG_NUM_REQS3-1:0] sel_out_u;
VX_stream_arb #(
.NUM_INPUTS (NUM_BATCHES),
.NUM_INPUTS (NUM_SLICES),
.NUM_OUTPUTS (1),
.DATAW (DATAW + LOG_NUM_REQS2),
.ARBITER (ARBITER),
@ -214,15 +214,15 @@ module VX_stream_arb #(
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
localparam BATCH_BEGIN = i * NUM_REQS;
localparam BATCH_END = `MIN(BATCH_BEGIN + NUM_REQS, NUM_OUTPUTS);
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
localparam SLICE_BEGIN = i * NUM_REQS;
localparam SLICE_END = `MIN(SLICE_BEGIN + NUM_REQS, NUM_OUTPUTS);
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
`RESET_RELAY (slice_reset, reset);
VX_stream_arb #(
.NUM_INPUTS (1),
.NUM_OUTPUTS (BATCH_SIZE),
.NUM_OUTPUTS (SLICE_SIZE),
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
@ -234,13 +234,13 @@ module VX_stream_arb #(
.valid_in (valid_in[i]),
.ready_in (ready_in[i]),
.data_in (data_in[i]),
.data_out (data_out[BATCH_END-1: BATCH_BEGIN]),
.valid_out (valid_out[BATCH_END-1: BATCH_BEGIN]),
.ready_out (ready_out[BATCH_END-1: BATCH_BEGIN]),
.data_out (data_out[SLICE_END-1: SLICE_BEGIN]),
.valid_out (valid_out[SLICE_END-1: SLICE_BEGIN]),
.ready_out (ready_out[SLICE_END-1: SLICE_BEGIN]),
`UNUSED_PIN (sel_out)
);
for (genvar j = BATCH_BEGIN; j < BATCH_END; ++j) begin
for (genvar j = SLICE_BEGIN; j < SLICE_END; ++j) begin
assign sel_out[j] = i;
end
end
@ -249,15 +249,15 @@ module VX_stream_arb #(
// (#inputs == 1) and (#outputs > max_fanout)
localparam NUM_BATCHES = `CDIV(NUM_OUTPUTS, MAX_FANOUT);
localparam NUM_SLICES = `CDIV(NUM_OUTPUTS, MAX_FANOUT);
wire [NUM_BATCHES-1:0] valid_tmp;
wire [NUM_BATCHES-1:0][DATAW-1:0] data_tmp;
wire [NUM_BATCHES-1:0] ready_tmp;
wire [NUM_SLICES-1:0] valid_tmp;
wire [NUM_SLICES-1:0][DATAW-1:0] data_tmp;
wire [NUM_SLICES-1:0] ready_tmp;
VX_stream_arb #(
.NUM_INPUTS (1),
.NUM_OUTPUTS (NUM_BATCHES),
.NUM_OUTPUTS (NUM_SLICES),
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
@ -275,17 +275,17 @@ module VX_stream_arb #(
`UNUSED_PIN (sel_out)
);
for (genvar i = 0; i < NUM_BATCHES; ++i) begin
for (genvar i = 0; i < NUM_SLICES; ++i) begin
localparam BATCH_BEGIN = i * MAX_FANOUT;
localparam BATCH_END = `MIN(BATCH_BEGIN + MAX_FANOUT, NUM_OUTPUTS);
localparam BATCH_SIZE = BATCH_END - BATCH_BEGIN;
localparam SLICE_BEGIN = i * MAX_FANOUT;
localparam SLICE_END = `MIN(SLICE_BEGIN + MAX_FANOUT, NUM_OUTPUTS);
localparam SLICE_SIZE = SLICE_END - SLICE_BEGIN;
`RESET_RELAY (slice_reset, reset);
VX_stream_arb #(
.NUM_INPUTS (1),
.NUM_OUTPUTS (BATCH_SIZE),
.NUM_OUTPUTS (SLICE_SIZE),
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
@ -297,9 +297,9 @@ module VX_stream_arb #(
.valid_in (valid_tmp[i]),
.ready_in (ready_tmp[i]),
.data_in (data_tmp[i]),
.data_out (data_out[BATCH_END-1: BATCH_BEGIN]),
.valid_out (valid_out[BATCH_END-1: BATCH_BEGIN]),
.ready_out (ready_out[BATCH_END-1: BATCH_BEGIN]),
.data_out (data_out[SLICE_END-1: SLICE_BEGIN]),
.valid_out (valid_out[SLICE_END-1: SLICE_BEGIN]),
.ready_out (ready_out[SLICE_END-1: SLICE_BEGIN]),
`UNUSED_PIN (sel_out)
);
end