adding PE switch

This commit is contained in:
Blaise Tine 2024-09-02 02:34:08 -07:00
parent d7eae0c886
commit 40e04a409e
3 changed files with 173 additions and 153 deletions

View file

@ -30,16 +30,20 @@ module VX_alu_unit #(
`UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
localparam NUM_LANES = `NUM_ALU_LANES;
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
localparam PE_COUNT = 1 + `EXT_M_ENABLED;
localparam PE_SEL_BITS = `CLOG2(PE_COUNT);
localparam PE_IDX_INT = 0;
localparam PE_IDX_MDV = PE_IDX_INT + `EXT_M_ENABLED;
VX_execute_if #(
.NUM_LANES (NUM_LANES)
) per_block_execute_if[BLOCK_SIZE]();
VX_commit_if #(
.NUM_LANES (NUM_LANES)
) per_block_commit_if[BLOCK_SIZE]();
VX_dispatch_unit #(
.BLOCK_SIZE (BLOCK_SIZE),
.NUM_LANES (NUM_LANES),
@ -51,26 +55,41 @@ module VX_alu_unit #(
.execute_if (per_block_execute_if)
);
VX_commit_if #(
.NUM_LANES (NUM_LANES)
) per_block_commit_if[BLOCK_SIZE]();
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alus
`RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV);
VX_execute_if #(
.NUM_LANES (NUM_LANES)
) int_execute_if();
) pe_execute_if[PE_COUNT]();
VX_commit_if #(
VX_commit_if#(
.NUM_LANES (NUM_LANES)
) int_commit_if();
) pe_commit_if[PE_COUNT]();
assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op;
assign int_execute_if.data = per_block_execute_if[block_idx].data;
reg [PE_SEL_BITS-1:0] pe_select;
always @(*) begin
if (`EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV))
pe_select = PE_IDX_MDV;
else
pe_select = PE_IDX_INT;
end
VX_pe_switch #(
.PE_COUNT (PE_COUNT),
.NUM_LANES (NUM_LANES),
.ARBITER ("R"),
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (PARTIAL_BW ? 1 : 3)
) pe_switch (
.clk (clk),
.reset (block_reset),
.pe_sel (pe_select),
.execute_in_if (per_block_execute_if[block_idx]),
.commit_out_if (per_block_commit_if[block_idx]),
.execute_out_if (pe_execute_if),
.commit_in_if (pe_commit_if)
);
VX_alu_int #(
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
@ -79,76 +98,22 @@ module VX_alu_unit #(
) alu_int (
.clk (clk),
.reset (block_reset),
.execute_if (int_execute_if),
.execute_if (pe_execute_if[PE_IDX_INT]),
.branch_ctl_if (branch_ctl_if[block_idx]),
.commit_if (int_commit_if)
.commit_if (pe_commit_if[PE_IDX_INT])
);
`ifdef EXT_M_ENABLE
VX_execute_if #(
.NUM_LANES (NUM_LANES)
) muldiv_execute_if();
VX_commit_if #(
.NUM_LANES (NUM_LANES)
) muldiv_commit_if();
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
VX_alu_muldiv #(
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
.NUM_LANES (NUM_LANES)
) muldiv_unit (
.clk (clk),
.reset (block_reset),
.execute_if (muldiv_execute_if),
.commit_if (muldiv_commit_if)
.execute_if (pe_execute_if[PE_IDX_MDV]),
.commit_if (pe_commit_if[PE_IDX_MDV])
);
`endif
// can accept new request?
assign per_block_execute_if[block_idx].ready =
`ifdef EXT_M_ENABLE
is_muldiv_op ? muldiv_execute_if.ready :
`endif
int_execute_if.ready;
// send response
VX_stream_arb #(
.NUM_INPUTS (RSP_ARB_SIZE),
.DATAW (RSP_ARB_DATAW),
.OUT_BUF (PARTIAL_BW ? 1 : 3),
.ARBITER ("R")
) rsp_arb (
.clk (clk),
.reset (block_reset),
.valid_in ({
`ifdef EXT_M_ENABLE
muldiv_commit_if.valid,
`endif
int_commit_if.valid
}),
.ready_in ({
`ifdef EXT_M_ENABLE
muldiv_commit_if.ready,
`endif
int_commit_if.ready
}),
.data_in ({
`ifdef EXT_M_ENABLE
muldiv_commit_if.data,
`endif
int_commit_if.data
}),
.data_out (per_block_commit_if[block_idx].data),
.valid_out (per_block_commit_if[block_idx].valid),
.ready_out (per_block_commit_if[block_idx].ready),
`UNUSED_PIN (sel_out)
);
end
VX_gather_unit #(

View file

@ -0,0 +1,92 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_pe_switch import VX_gpu_pkg::*; #(
parameter PE_COUNT = 0,
parameter NUM_LANES = 0,
parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0,
parameter `STRING ARBITER = "R"
) (
input wire clk,
input wire reset,
input wire [PE_SEL_BITS-1:0] pe_sel,
VX_execute_if.slave execute_in_if,
VX_commit_if.master commit_out_if,
VX_execute_if.master execute_out_if[PE_COUNT],
VX_commit_if .slave commit_in_if[PE_COUNT]
);
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam REQ_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `INST_ALU_BITS + $bits(op_args_t) + 1 + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
localparam RSP_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
localparam PE_SEL_BITS = `CLOG2(PE_COUNT);
wire [PE_COUNT-1:0] pe_req_valid;
wire [PE_COUNT-1:0][REQ_DATAW-1:0] pe_req_data;
wire [PE_COUNT-1:0] pe_req_ready;
VX_stream_switch #(
.DATAW (REQ_DATAW),
.NUM_OUTPUTS (PE_COUNT),
.OUT_BUF (REQ_OUT_BUF)
) req_switch (
.clk (clk),
.reset (reset),
.sel_in (pe_sel),
.valid_in (execute_in_if.valid),
.ready_in (execute_in_if.ready),
.data_in (execute_in_if.data),
.data_out (pe_req_data),
.valid_out (pe_req_valid),
.ready_out (pe_req_ready)
);
for (genvar i = 0; i < PE_COUNT; ++i) begin
assign execute_out_if[i].valid = pe_req_valid[i];
assign execute_out_if[i].data = pe_req_data[i];
assign pe_req_ready[i] = execute_out_if[i].ready;
end
///////////////////////////////////////////////////////////////////////////
wire [PE_COUNT-1:0] pe_rsp_valid;
wire [PE_COUNT-1:0][RSP_DATAW-1:0] pe_rsp_data;
wire [PE_COUNT-1:0] pe_rsp_ready;
for (genvar i = 0; i < PE_COUNT; ++i) begin
assign pe_rsp_valid[i] = commit_in_if[i].valid;
assign pe_rsp_data[i] = commit_in_if[i].data;
assign commit_in_if[i].ready = pe_rsp_ready[i];
end
VX_stream_arb #(
.NUM_INPUTS (PE_COUNT),
.DATAW (RSP_DATAW),
.ARBITER (ARBITER),
.OUT_BUF (RSP_OUT_BUF)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (pe_rsp_valid),
.ready_in (pe_rsp_ready),
.data_in (pe_rsp_data),
.data_out (commit_out_if.data),
.valid_out (commit_out_if.valid),
.ready_out (commit_out_if.ready),
`UNUSED_PIN (sel_out)
);
endmodule

View file

@ -41,20 +41,21 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
VX_warp_ctl_if.master warp_ctl_if
);
`UNUSED_SPARAM (INSTANCE_ID)
localparam BLOCK_SIZE = 1;
localparam NUM_LANES = `NUM_SFU_LANES;
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
localparam PID_WIDTH = `UP(PID_BITS);
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `PC_BITS + PID_WIDTH + 1 + 1;
localparam RSP_ARB_SIZE = 1 + 1;
localparam RSP_ARB_IDX_WCTL = 0;
localparam RSP_ARB_IDX_CSRS = 1;
localparam BLOCK_SIZE = 1;
localparam NUM_LANES = `NUM_SFU_LANES;
localparam PE_COUNT = 2;
localparam PE_SEL_BITS = `CLOG2(PE_COUNT);
localparam PE_IDX_WCTL = 0;
localparam PE_IDX_CSRS = 1;
VX_execute_if #(
.NUM_LANES (NUM_LANES)
) per_block_execute_if[BLOCK_SIZE]();
VX_commit_if #(
.NUM_LANES (NUM_LANES)
) per_block_commit_if[BLOCK_SIZE]();
VX_dispatch_unit #(
.BLOCK_SIZE (BLOCK_SIZE),
.NUM_LANES (NUM_LANES),
@ -66,20 +67,37 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
.execute_if (per_block_execute_if)
);
wire [RSP_ARB_SIZE-1:0] rsp_arb_valid_in;
wire [RSP_ARB_SIZE-1:0] rsp_arb_ready_in;
wire [RSP_ARB_SIZE-1:0][RSP_ARB_DATAW-1:0] rsp_arb_data_in;
// Warp control block
VX_execute_if #(
.NUM_LANES (NUM_LANES)
) wctl_execute_if();
) pe_execute_if[PE_COUNT]();
VX_commit_if#(
.NUM_LANES (NUM_LANES)
) wctl_commit_if();
) pe_commit_if[PE_COUNT]();
assign wctl_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_WCTL(per_block_execute_if[0].data.op_type);
assign wctl_execute_if.data = per_block_execute_if[0].data;
reg [PE_SEL_BITS-1:0] pe_select;
always @(*) begin
if (`INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type))
pe_select = PE_IDX_CSRS;
else
pe_select = PE_IDX_WCTL;
end
VX_pe_switch #(
.PE_COUNT (PE_COUNT),
.NUM_LANES (NUM_LANES),
.ARBITER ("R"),
.REQ_OUT_BUF(0),
.RSP_OUT_BUF(3)
) pe_switch (
.clk (clk),
.reset (reset),
.pe_sel (pe_select),
.execute_in_if (per_block_execute_if[0]),
.commit_out_if (per_block_commit_if[0]),
.execute_out_if (pe_execute_if),
.commit_in_if (pe_commit_if)
);
`RESET_RELAY (wctl_reset, reset);
@ -89,26 +107,11 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
) wctl_unit (
.clk (clk),
.reset (wctl_reset),
.execute_if (wctl_execute_if),
.execute_if (pe_execute_if[PE_IDX_WCTL]),
.warp_ctl_if(warp_ctl_if),
.commit_if (wctl_commit_if)
.commit_if (pe_commit_if[PE_IDX_WCTL])
);
assign rsp_arb_valid_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.valid;
assign rsp_arb_data_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.data;
assign wctl_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_WCTL];
// CSR unit
VX_execute_if #(
.NUM_LANES (NUM_LANES)
) csr_execute_if();
VX_commit_if #(
.NUM_LANES (NUM_LANES)
) csr_commit_if();
assign csr_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type);
assign csr_execute_if.data = per_block_execute_if[0].data;
`RESET_RELAY (csr_reset, reset);
VX_csr_unit #(
@ -120,7 +123,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
.reset (csr_reset),
.base_dcrs (base_dcrs),
.execute_if (csr_execute_if),
.execute_if (pe_execute_if[PE_IDX_CSRS]),
`ifdef PERF_ENABLE
.mem_perf_if (mem_perf_if),
@ -133,47 +136,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
.sched_csr_if (sched_csr_if),
.commit_csr_if (commit_csr_if),
.commit_if (csr_commit_if)
);
assign rsp_arb_valid_in[RSP_ARB_IDX_CSRS] = csr_commit_if.valid;
assign rsp_arb_data_in[RSP_ARB_IDX_CSRS] = csr_commit_if.data;
assign csr_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_CSRS];
// can accept new request?
reg sfu_req_ready;
always @(*) begin
case (per_block_execute_if[0].data.op_type)
`INST_SFU_CSRRW,
`INST_SFU_CSRRS,
`INST_SFU_CSRRC: sfu_req_ready = csr_execute_if.ready;
default: sfu_req_ready = wctl_execute_if.ready;
endcase
end
assign per_block_execute_if[0].ready = sfu_req_ready;
// response arbitration
VX_commit_if #(
.NUM_LANES (NUM_LANES)
) arb_commit_if[BLOCK_SIZE]();
VX_stream_arb #(
.NUM_INPUTS (RSP_ARB_SIZE),
.DATAW (RSP_ARB_DATAW),
.ARBITER ("R"),
.OUT_BUF (3)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_arb_valid_in),
.ready_in (rsp_arb_ready_in),
.data_in (rsp_arb_data_in),
.data_out (arb_commit_if[0].data),
.valid_out (arb_commit_if[0].valid),
.ready_out (arb_commit_if[0].ready),
`UNUSED_PIN (sel_out)
.commit_if (pe_commit_if[PE_IDX_CSRS])
);
VX_gather_unit #(
@ -181,9 +144,9 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
.NUM_LANES (NUM_LANES),
.OUT_BUF (3)
) gather_unit (
.clk (clk),
.reset (reset),
.commit_in_if (arb_commit_if),
.clk (clk),
.reset (reset),
.commit_in_if (per_block_commit_if),
.commit_out_if (commit_if)
);