mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
adding PE switch
This commit is contained in:
parent
d7eae0c886
commit
40e04a409e
3 changed files with 173 additions and 153 deletions
|
@ -30,16 +30,20 @@ module VX_alu_unit #(
|
|||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = `NUM_ALU_BLOCKS;
|
||||
localparam NUM_LANES = `NUM_ALU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
|
||||
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
||||
localparam PE_COUNT = 1 + `EXT_M_ENABLED;
|
||||
localparam PE_SEL_BITS = `CLOG2(PE_COUNT);
|
||||
localparam PE_IDX_INT = 0;
|
||||
localparam PE_IDX_MDV = PE_IDX_INT + `EXT_M_ENABLED;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
|
@ -51,26 +55,41 @@ module VX_alu_unit #(
|
|||
.execute_if (per_block_execute_if)
|
||||
);
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin : alus
|
||||
|
||||
`RESET_RELAY_EN (block_reset, reset, (BLOCK_SIZE > 1));
|
||||
|
||||
wire is_muldiv_op = `EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV);
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_execute_if();
|
||||
) pe_execute_if[PE_COUNT]();
|
||||
|
||||
VX_commit_if #(
|
||||
VX_commit_if#(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_commit_if();
|
||||
) pe_commit_if[PE_COUNT]();
|
||||
|
||||
assign int_execute_if.valid = per_block_execute_if[block_idx].valid && ~is_muldiv_op;
|
||||
assign int_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
reg [PE_SEL_BITS-1:0] pe_select;
|
||||
always @(*) begin
|
||||
if (`EXT_M_ENABLED && (per_block_execute_if[block_idx].data.op_args.alu.xtype == `ALU_TYPE_MULDIV))
|
||||
pe_select = PE_IDX_MDV;
|
||||
else
|
||||
pe_select = PE_IDX_INT;
|
||||
end
|
||||
|
||||
VX_pe_switch #(
|
||||
.PE_COUNT (PE_COUNT),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF (0),
|
||||
.RSP_OUT_BUF (PARTIAL_BW ? 1 : 3)
|
||||
) pe_switch (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.pe_sel (pe_select),
|
||||
.execute_in_if (per_block_execute_if[block_idx]),
|
||||
.commit_out_if (per_block_commit_if[block_idx]),
|
||||
.execute_out_if (pe_execute_if),
|
||||
.commit_in_if (pe_commit_if)
|
||||
);
|
||||
|
||||
VX_alu_int #(
|
||||
.INSTANCE_ID ($sformatf("%s-int%0d", INSTANCE_ID, block_idx)),
|
||||
|
@ -79,76 +98,22 @@ module VX_alu_unit #(
|
|||
) alu_int (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.execute_if (int_execute_if),
|
||||
.execute_if (pe_execute_if[PE_IDX_INT]),
|
||||
.branch_ctl_if (branch_ctl_if[block_idx]),
|
||||
.commit_if (int_commit_if)
|
||||
.commit_if (pe_commit_if[PE_IDX_INT])
|
||||
);
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) muldiv_execute_if();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) muldiv_commit_if();
|
||||
|
||||
assign muldiv_execute_if.valid = per_block_execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign muldiv_execute_if.data = per_block_execute_if[block_idx].data;
|
||||
|
||||
VX_alu_muldiv #(
|
||||
.INSTANCE_ID ($sformatf("%s-muldiv%0d", INSTANCE_ID, block_idx)),
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) muldiv_unit (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.execute_if (muldiv_execute_if),
|
||||
.commit_if (muldiv_commit_if)
|
||||
.execute_if (pe_execute_if[PE_IDX_MDV]),
|
||||
.commit_if (pe_commit_if[PE_IDX_MDV])
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
// can accept new request?
|
||||
assign per_block_execute_if[block_idx].ready =
|
||||
`ifdef EXT_M_ENABLE
|
||||
is_muldiv_op ? muldiv_execute_if.ready :
|
||||
`endif
|
||||
int_execute_if.ready;
|
||||
|
||||
// send response
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.OUT_BUF (PARTIAL_BW ? 1 : 3),
|
||||
.ARBITER ("R")
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (block_reset),
|
||||
.valid_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
muldiv_commit_if.valid,
|
||||
`endif
|
||||
int_commit_if.valid
|
||||
}),
|
||||
.ready_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
muldiv_commit_if.ready,
|
||||
`endif
|
||||
int_commit_if.ready
|
||||
}),
|
||||
.data_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
muldiv_commit_if.data,
|
||||
`endif
|
||||
int_commit_if.data
|
||||
}),
|
||||
.data_out (per_block_commit_if[block_idx].data),
|
||||
.valid_out (per_block_commit_if[block_idx].valid),
|
||||
.ready_out (per_block_commit_if[block_idx].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
end
|
||||
|
||||
VX_gather_unit #(
|
||||
|
|
92
hw/rtl/core/VX_pe_switch.sv
Normal file
92
hw/rtl/core/VX_pe_switch.sv
Normal file
|
@ -0,0 +1,92 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_pe_switch import VX_gpu_pkg::*; #(
|
||||
parameter PE_COUNT = 0,
|
||||
parameter NUM_LANES = 0,
|
||||
parameter REQ_OUT_BUF = 0,
|
||||
parameter RSP_OUT_BUF = 0,
|
||||
parameter `STRING ARBITER = "R"
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [PE_SEL_BITS-1:0] pe_sel,
|
||||
VX_execute_if.slave execute_in_if,
|
||||
VX_commit_if.master commit_out_if,
|
||||
VX_execute_if.master execute_out_if[PE_COUNT],
|
||||
VX_commit_if .slave commit_in_if[PE_COUNT]
|
||||
);
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam REQ_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `INST_ALU_BITS + $bits(op_args_t) + 1 + `NR_BITS + `NT_WIDTH + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `PC_BITS + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam PE_SEL_BITS = `CLOG2(PE_COUNT);
|
||||
|
||||
wire [PE_COUNT-1:0] pe_req_valid;
|
||||
wire [PE_COUNT-1:0][REQ_DATAW-1:0] pe_req_data;
|
||||
wire [PE_COUNT-1:0] pe_req_ready;
|
||||
|
||||
VX_stream_switch #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.NUM_OUTPUTS (PE_COUNT),
|
||||
.OUT_BUF (REQ_OUT_BUF)
|
||||
) req_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel_in (pe_sel),
|
||||
.valid_in (execute_in_if.valid),
|
||||
.ready_in (execute_in_if.ready),
|
||||
.data_in (execute_in_if.data),
|
||||
.data_out (pe_req_data),
|
||||
.valid_out (pe_req_valid),
|
||||
.ready_out (pe_req_ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < PE_COUNT; ++i) begin
|
||||
assign execute_out_if[i].valid = pe_req_valid[i];
|
||||
assign execute_out_if[i].data = pe_req_data[i];
|
||||
assign pe_req_ready[i] = execute_out_if[i].ready;
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [PE_COUNT-1:0] pe_rsp_valid;
|
||||
wire [PE_COUNT-1:0][RSP_DATAW-1:0] pe_rsp_data;
|
||||
wire [PE_COUNT-1:0] pe_rsp_ready;
|
||||
|
||||
for (genvar i = 0; i < PE_COUNT; ++i) begin
|
||||
assign pe_rsp_valid[i] = commit_in_if[i].valid;
|
||||
assign pe_rsp_data[i] = commit_in_if[i].data;
|
||||
assign commit_in_if[i].ready = pe_rsp_ready[i];
|
||||
end
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (PE_COUNT),
|
||||
.DATAW (RSP_DATAW),
|
||||
.ARBITER (ARBITER),
|
||||
.OUT_BUF (RSP_OUT_BUF)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (pe_rsp_valid),
|
||||
.ready_in (pe_rsp_ready),
|
||||
.data_in (pe_rsp_data),
|
||||
.data_out (commit_out_if.data),
|
||||
.valid_out (commit_out_if.valid),
|
||||
.ready_out (commit_out_if.ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -41,20 +41,21 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
VX_warp_ctl_if.master warp_ctl_if
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
|
||||
localparam RSP_ARB_DATAW = `UUID_WIDTH + `NW_WIDTH + NUM_LANES + (NUM_LANES * `XLEN) + `NR_BITS + 1 + `PC_BITS + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + 1;
|
||||
localparam RSP_ARB_IDX_WCTL = 0;
|
||||
localparam RSP_ARB_IDX_CSRS = 1;
|
||||
localparam BLOCK_SIZE = 1;
|
||||
localparam NUM_LANES = `NUM_SFU_LANES;
|
||||
localparam PE_COUNT = 2;
|
||||
localparam PE_SEL_BITS = `CLOG2(PE_COUNT);
|
||||
localparam PE_IDX_WCTL = 0;
|
||||
localparam PE_IDX_CSRS = 1;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_execute_if[BLOCK_SIZE]();
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) per_block_commit_if[BLOCK_SIZE]();
|
||||
|
||||
VX_dispatch_unit #(
|
||||
.BLOCK_SIZE (BLOCK_SIZE),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
|
@ -66,20 +67,37 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
.execute_if (per_block_execute_if)
|
||||
);
|
||||
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_valid_in;
|
||||
wire [RSP_ARB_SIZE-1:0] rsp_arb_ready_in;
|
||||
wire [RSP_ARB_SIZE-1:0][RSP_ARB_DATAW-1:0] rsp_arb_data_in;
|
||||
|
||||
// Warp control block
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_execute_if();
|
||||
) pe_execute_if[PE_COUNT]();
|
||||
|
||||
VX_commit_if#(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) wctl_commit_if();
|
||||
) pe_commit_if[PE_COUNT]();
|
||||
|
||||
assign wctl_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_WCTL(per_block_execute_if[0].data.op_type);
|
||||
assign wctl_execute_if.data = per_block_execute_if[0].data;
|
||||
reg [PE_SEL_BITS-1:0] pe_select;
|
||||
always @(*) begin
|
||||
if (`INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type))
|
||||
pe_select = PE_IDX_CSRS;
|
||||
else
|
||||
pe_select = PE_IDX_WCTL;
|
||||
end
|
||||
|
||||
VX_pe_switch #(
|
||||
.PE_COUNT (PE_COUNT),
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.ARBITER ("R"),
|
||||
.REQ_OUT_BUF(0),
|
||||
.RSP_OUT_BUF(3)
|
||||
) pe_switch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.pe_sel (pe_select),
|
||||
.execute_in_if (per_block_execute_if[0]),
|
||||
.commit_out_if (per_block_commit_if[0]),
|
||||
.execute_out_if (pe_execute_if),
|
||||
.commit_in_if (pe_commit_if)
|
||||
);
|
||||
|
||||
`RESET_RELAY (wctl_reset, reset);
|
||||
|
||||
|
@ -89,26 +107,11 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
) wctl_unit (
|
||||
.clk (clk),
|
||||
.reset (wctl_reset),
|
||||
.execute_if (wctl_execute_if),
|
||||
.execute_if (pe_execute_if[PE_IDX_WCTL]),
|
||||
.warp_ctl_if(warp_ctl_if),
|
||||
.commit_if (wctl_commit_if)
|
||||
.commit_if (pe_commit_if[PE_IDX_WCTL])
|
||||
);
|
||||
|
||||
assign rsp_arb_valid_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.valid;
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_WCTL] = wctl_commit_if.data;
|
||||
assign wctl_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_WCTL];
|
||||
|
||||
// CSR unit
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_execute_if();
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) csr_commit_if();
|
||||
|
||||
assign csr_execute_if.valid = per_block_execute_if[0].valid && `INST_SFU_IS_CSR(per_block_execute_if[0].data.op_type);
|
||||
assign csr_execute_if.data = per_block_execute_if[0].data;
|
||||
|
||||
`RESET_RELAY (csr_reset, reset);
|
||||
|
||||
VX_csr_unit #(
|
||||
|
@ -120,7 +123,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
.reset (csr_reset),
|
||||
|
||||
.base_dcrs (base_dcrs),
|
||||
.execute_if (csr_execute_if),
|
||||
.execute_if (pe_execute_if[PE_IDX_CSRS]),
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
.mem_perf_if (mem_perf_if),
|
||||
|
@ -133,47 +136,7 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
.sched_csr_if (sched_csr_if),
|
||||
.commit_csr_if (commit_csr_if),
|
||||
.commit_if (csr_commit_if)
|
||||
);
|
||||
|
||||
assign rsp_arb_valid_in[RSP_ARB_IDX_CSRS] = csr_commit_if.valid;
|
||||
assign rsp_arb_data_in[RSP_ARB_IDX_CSRS] = csr_commit_if.data;
|
||||
assign csr_commit_if.ready = rsp_arb_ready_in[RSP_ARB_IDX_CSRS];
|
||||
|
||||
// can accept new request?
|
||||
|
||||
reg sfu_req_ready;
|
||||
always @(*) begin
|
||||
case (per_block_execute_if[0].data.op_type)
|
||||
`INST_SFU_CSRRW,
|
||||
`INST_SFU_CSRRS,
|
||||
`INST_SFU_CSRRC: sfu_req_ready = csr_execute_if.ready;
|
||||
default: sfu_req_ready = wctl_execute_if.ready;
|
||||
endcase
|
||||
end
|
||||
assign per_block_execute_if[0].ready = sfu_req_ready;
|
||||
|
||||
// response arbitration
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) arb_commit_if[BLOCK_SIZE]();
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (RSP_ARB_SIZE),
|
||||
.DATAW (RSP_ARB_DATAW),
|
||||
.ARBITER ("R"),
|
||||
.OUT_BUF (3)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_arb_valid_in),
|
||||
.ready_in (rsp_arb_ready_in),
|
||||
.data_in (rsp_arb_data_in),
|
||||
.data_out (arb_commit_if[0].data),
|
||||
.valid_out (arb_commit_if[0].valid),
|
||||
.ready_out (arb_commit_if[0].ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
.commit_if (pe_commit_if[PE_IDX_CSRS])
|
||||
);
|
||||
|
||||
VX_gather_unit #(
|
||||
|
@ -181,9 +144,9 @@ module VX_sfu_unit import VX_gpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES),
|
||||
.OUT_BUF (3)
|
||||
) gather_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.commit_in_if (arb_commit_if),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.commit_in_if (per_block_commit_if),
|
||||
.commit_out_if (commit_if)
|
||||
);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue