mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
ALU unit critical path optimization
This commit is contained in:
parent
c06efbf480
commit
aeeb3ca616
6 changed files with 107 additions and 88 deletions
|
@ -22,12 +22,12 @@ module VX_alu_unit #(
|
|||
wire [`NUM_THREADS-1:0][31:0] shr_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] msc_result;
|
||||
|
||||
wire stall_in, stall_out;
|
||||
wire ready_in;
|
||||
|
||||
`UNUSED_VAR (alu_req_if.op_mod)
|
||||
wire is_br_op = `INST_ALU_IS_BR(alu_req_if.op_mod);
|
||||
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_OP(alu_req_if.op_type);
|
||||
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_OP(alu_req_if.op_type);
|
||||
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_BITS'(alu_req_if.op_type);
|
||||
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_BITS'(alu_req_if.op_type);
|
||||
wire alu_signed = `INST_ALU_SIGNED(alu_op);
|
||||
wire [1:0] alu_op_class = `INST_ALU_OP_CLASS(alu_op);
|
||||
wire is_sub = (alu_op == `INST_ALU_SUB);
|
||||
|
@ -92,17 +92,49 @@ module VX_alu_unit #(
|
|||
|
||||
// output
|
||||
|
||||
wire result_valid;
|
||||
wire [`NW_BITS-1:0] result_wid;
|
||||
wire [`NUM_THREADS-1:0] result_tmask;
|
||||
wire [31:0] result_PC;
|
||||
wire [`NR_BITS-1:0] result_rd;
|
||||
wire result_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] result_data;
|
||||
wire result_is_br;
|
||||
wire alu_valid_in;
|
||||
wire alu_ready_in;
|
||||
wire alu_valid_out;
|
||||
wire alu_ready_out;
|
||||
wire [`NW_BITS-1:0] alu_wid;
|
||||
wire [`NUM_THREADS-1:0] alu_tmask;
|
||||
wire [31:0] alu_PC;
|
||||
wire [`NR_BITS-1:0] alu_rd;
|
||||
wire alu_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_data;
|
||||
|
||||
wire [`INST_BR_BITS-1:0] br_op_r;
|
||||
wire [31:0] br_dest_r;
|
||||
wire is_less_r;
|
||||
wire is_equal_r;
|
||||
wire is_br_op_r;
|
||||
|
||||
assign alu_ready_in = alu_ready_out || ~alu_valid_out;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (alu_ready_in),
|
||||
.data_in ({alu_valid_in, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_op, is_less, is_equal, br_dest}),
|
||||
.data_out ({alu_valid_out, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, alu_data, is_br_op_r, br_op_r, is_less_r, is_equal_r, br_dest_r})
|
||||
);
|
||||
|
||||
`UNUSED_VAR (br_op_r)
|
||||
wire br_neg = `INST_BR_NEG(br_op_r);
|
||||
wire br_less = `INST_BR_LESS(br_op_r);
|
||||
wire br_static = `INST_BR_STATIC(br_op_r);
|
||||
|
||||
assign branch_ctl_if.valid = alu_valid_out && alu_ready_out && is_br_op_r;
|
||||
assign branch_ctl_if.taken = ((br_less ? is_less_r : is_equal_r) ^ br_neg) | br_static;
|
||||
assign branch_ctl_if.wid = alu_wid;
|
||||
assign branch_ctl_if.dest = br_dest_r;
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
wire mul_valid_in;
|
||||
wire mul_ready_in;
|
||||
wire mul_valid_out;
|
||||
wire mul_ready_out;
|
||||
|
@ -113,14 +145,14 @@ module VX_alu_unit #(
|
|||
wire mul_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_data;
|
||||
|
||||
wire is_mul_op = `INST_ALU_IS_MUL(alu_req_if.op_mod);
|
||||
|
||||
wire [`INST_MUL_BITS-1:0] mul_op = `INST_MUL_BITS'(alu_req_if.op_type);
|
||||
|
||||
VX_muldiv muldiv (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Inputs
|
||||
.alu_op (`INST_MUL_OP(alu_req_if.op_type)),
|
||||
.alu_op (mul_op),
|
||||
.wid_in (alu_req_if.wid),
|
||||
.tmask_in (alu_req_if.tmask),
|
||||
.PC_in (alu_req_if.PC),
|
||||
|
@ -138,72 +170,52 @@ module VX_alu_unit #(
|
|||
.data_out (mul_data),
|
||||
|
||||
// handshake
|
||||
.valid_in (alu_req_if.valid && is_mul_op),
|
||||
.valid_in (mul_valid_in),
|
||||
.ready_in (mul_ready_in),
|
||||
.valid_out (mul_valid_out),
|
||||
.ready_out (mul_ready_out)
|
||||
);
|
||||
|
||||
assign stall_in = (is_mul_op && ~mul_ready_in)
|
||||
|| (~is_mul_op && (mul_valid_out || stall_out));
|
||||
|
||||
assign mul_ready_out = ~stall_out;
|
||||
wire is_mul_op = `INST_ALU_IS_MUL(alu_req_if.op_mod);
|
||||
|
||||
assign result_valid = mul_valid_out || (alu_req_if.valid && ~is_mul_op);
|
||||
assign result_wid = mul_valid_out ? mul_wid : alu_req_if.wid;
|
||||
assign result_tmask = mul_valid_out ? mul_tmask : alu_req_if.tmask;
|
||||
assign result_PC = mul_valid_out ? mul_PC : alu_req_if.PC;
|
||||
assign result_rd = mul_valid_out ? mul_rd : alu_req_if.rd;
|
||||
assign result_wb = mul_valid_out ? mul_wb : alu_req_if.wb;
|
||||
assign result_data = mul_valid_out ? mul_data : alu_jal_result;
|
||||
assign result_is_br = ~mul_valid_out && is_br_op;
|
||||
assign ready_in = is_mul_op ? mul_ready_in : alu_ready_in;
|
||||
|
||||
assign alu_valid_in = alu_req_if.valid && ~is_mul_op;
|
||||
assign mul_valid_in = alu_req_if.valid && is_mul_op;
|
||||
|
||||
assign alu_commit_if.valid = alu_valid_out || mul_valid_out;
|
||||
assign alu_commit_if.wid = alu_valid_out ? alu_wid : mul_wid;
|
||||
assign alu_commit_if.tmask = alu_valid_out ? alu_tmask : mul_tmask;
|
||||
assign alu_commit_if.PC = alu_valid_out ? alu_PC : mul_PC;
|
||||
assign alu_commit_if.rd = alu_valid_out ? alu_rd : mul_rd;
|
||||
assign alu_commit_if.wb = alu_valid_out ? alu_wb : mul_wb;
|
||||
assign alu_commit_if.data = alu_valid_out ? alu_data : mul_data;
|
||||
|
||||
assign alu_ready_out = alu_commit_if.ready;
|
||||
assign mul_ready_out = alu_commit_if.ready & ~alu_valid_out; // ALU takes priority
|
||||
|
||||
`else
|
||||
|
||||
assign stall_in = stall_out;
|
||||
assign ready_in = alu_ready_in;
|
||||
|
||||
assign result_valid = alu_req_if.valid;
|
||||
assign result_wid = alu_req_if.wid;
|
||||
assign result_tmask = alu_req_if.tmask;
|
||||
assign result_PC = alu_req_if.PC;
|
||||
assign result_rd = alu_req_if.rd;
|
||||
assign result_wb = alu_req_if.wb;
|
||||
assign result_data = alu_jal_result;
|
||||
assign result_is_br = is_br_op;
|
||||
assign alu_valid_in = alu_req_if.valid;
|
||||
|
||||
assign alu_commit_if.valid = alu_valid_out;
|
||||
assign alu_commit_if.wid = alu_wid;
|
||||
assign alu_commit_if.tmask = alu_tmask;
|
||||
assign alu_commit_if.PC = alu_PC;
|
||||
assign alu_commit_if.rd = alu_rd;
|
||||
assign alu_commit_if.wb = alu_wb;
|
||||
assign alu_commit_if.data = alu_data;
|
||||
|
||||
assign alu_ready_out = alu_commit_if.ready;
|
||||
|
||||
`endif
|
||||
|
||||
wire [`INST_BR_BITS-1:0] br_op_r;
|
||||
wire is_less_r;
|
||||
wire is_equal_r;
|
||||
wire is_br_op_r;
|
||||
|
||||
assign stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({result_valid, result_wid, result_tmask, result_PC, result_rd, result_wb, result_data, result_is_br, br_op, is_less, is_equal, br_dest}),
|
||||
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, br_op_r, is_less_r, is_equal_r, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
assign alu_commit_if.eop = 1'b1;
|
||||
|
||||
`UNUSED_VAR (br_op_r)
|
||||
wire br_neg = `INST_BR_NEG(br_op_r);
|
||||
wire br_less = `INST_BR_LESS(br_op_r);
|
||||
wire br_static = `INST_BR_STATIC(br_op_r);
|
||||
|
||||
assign branch_ctl_if.valid = alu_commit_if.valid && alu_commit_if.ready && is_br_op_r;
|
||||
assign branch_ctl_if.taken = ((br_less ? is_less_r : is_equal_r) ^ br_neg) | br_static;
|
||||
assign branch_ctl_if.wid = alu_commit_if.wid;
|
||||
|
||||
// can accept new request?
|
||||
assign alu_req_if.ready = ~stall_in;
|
||||
assign alu_req_if.ready = ready_in;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -31,7 +31,7 @@ module VX_csr_unit #(
|
|||
|
||||
wire write_enable = csr_commit_if.valid && csr_we_s1;
|
||||
|
||||
wire [31:0] csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.rs1) : csr_req_if.rs1_data;
|
||||
wire [31:0] csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.imm) : csr_req_if.rs1_data;
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID(CORE_ID)
|
||||
|
|
|
@ -201,10 +201,10 @@ module VX_decode #(
|
|||
op_type = `INST_OP_BITS'(func3[1:0]);
|
||||
use_rd = 1;
|
||||
use_imm = func3[2];
|
||||
imm = 32'(u_12); // addr
|
||||
imm[`CSR_ADDR_BITS-1:0] = u_12; // addr
|
||||
`USED_IREG (rd);
|
||||
if (func3[2]) begin
|
||||
rs1_r = `NR_BITS'(rs1); // imm
|
||||
imm[`CSR_ADDR_BITS +: `NRI_BITS] = rs1; // imm
|
||||
end else begin
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
|
|
|
@ -14,10 +14,14 @@
|
|||
|
||||
`define NB_BITS `LOG2UP(`NUM_BARRIERS)
|
||||
|
||||
`define NUM_IREGS 32
|
||||
|
||||
`define NRI_BITS `LOG2UP(`NUM_IREGS)
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`define NUM_REGS 64
|
||||
`define NUM_REGS (2 * `NUM_IREGS)
|
||||
`else
|
||||
`define NUM_REGS 32
|
||||
`define NUM_REGS `NUM_IREGS
|
||||
`endif
|
||||
|
||||
`define NR_BITS `LOG2UP(`NUM_REGS)
|
||||
|
@ -114,7 +118,6 @@
|
|||
`define INST_BR_DRET 4'b1110
|
||||
`define INST_BR_OTHER 4'b1111
|
||||
`define INST_BR_BITS 4
|
||||
`define INST_BR_OP(x) x[`INST_BR_BITS-1:0]
|
||||
`define INST_BR_NEG(x) x[1]
|
||||
`define INST_BR_LESS(x) x[2]
|
||||
`define INST_BR_STATIC(x) x[3]
|
||||
|
@ -128,7 +131,6 @@
|
|||
`define INST_MUL_REM 3'h6
|
||||
`define INST_MUL_REMU 3'h7
|
||||
`define INST_MUL_BITS 3
|
||||
`define INST_MUL_OP(x) x[`INST_MUL_BITS-1:0]
|
||||
`define INST_MUL_IS_DIV(x) x[2]
|
||||
|
||||
`define INST_FMT_B 3'b000
|
||||
|
@ -148,7 +150,6 @@
|
|||
`define INST_LSU_BITS 4
|
||||
`define INST_LSU_FMT(x) x[2:0]
|
||||
`define INST_LSU_WSIZE(x) x[1:0]
|
||||
`define INST_LSU_OP(x) x[`INST_LSU_BITS-1:0]
|
||||
`define INST_LSU_IS_FENCE(x) x[0]
|
||||
|
||||
`define INST_FENCE_BITS 1
|
||||
|
@ -160,7 +161,6 @@
|
|||
`define INST_CSR_RC 2'h3
|
||||
`define INST_CSR_OTHER 2'h0
|
||||
`define INST_CSR_BITS 2
|
||||
`define INST_CSR_OP(x) x[`INST_CSR_BITS-1:0]
|
||||
|
||||
`define INST_FPU_ADD 4'h0
|
||||
`define INST_FPU_SUB 4'h4
|
||||
|
@ -179,7 +179,6 @@
|
|||
`define INST_FPU_NMSUB 4'hB
|
||||
`define INST_FPU_NMADD 4'hF
|
||||
`define INST_FPU_BITS 4
|
||||
`define INST_FPU_OP(x) x[`INST_FPU_BITS-1:0]
|
||||
|
||||
`define INST_GPU_TMC 3'h0
|
||||
`define INST_GPU_WSPAWN 3'h1
|
||||
|
@ -188,7 +187,6 @@
|
|||
`define INST_GPU_BAR 3'h4
|
||||
`define INST_GPU_OTHER 3'h7
|
||||
`define INST_GPU_BITS 3
|
||||
`define INST_GPU_OP(x) x[`INST_GPU_BITS-1:0]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
|
@ -134,7 +134,7 @@ module VX_execute #(
|
|||
wire ebreak /* verilator public */;
|
||||
assign ebreak = alu_req_if.valid && alu_req_if.ready
|
||||
&& `INST_ALU_IS_BR(alu_req_if.op_mod)
|
||||
&& (`INST_BR_OP(alu_req_if.op_type) == `INST_BR_EBREAK
|
||||
|| `INST_BR_OP(alu_req_if.op_type) == `INST_BR_ECALL);
|
||||
&& (`INST_BR_BITS'(alu_req_if.op_type) == `INST_BR_EBREAK
|
||||
|| `INST_BR_BITS'(alu_req_if.op_type) == `INST_BR_ECALL);
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -39,6 +39,7 @@ module VX_instr_demux (
|
|||
// ALU unit
|
||||
|
||||
wire alu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_ALU);
|
||||
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
|
@ -48,8 +49,8 @@ module VX_instr_demux (
|
|||
.reset (reset),
|
||||
.valid_in (alu_req_valid),
|
||||
.ready_in (alu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `INST_ALU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, alu_op_type, ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.valid_out (alu_req_if.valid),
|
||||
.ready_out (alu_req_if.ready)
|
||||
);
|
||||
|
@ -57,6 +58,7 @@ module VX_instr_demux (
|
|||
// lsu unit
|
||||
|
||||
wire lsu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_LSU);
|
||||
wire [`INST_LSU_BITS-1:0] lsu_op_type = `INST_LSU_BITS'(ibuffer_if.op_type);
|
||||
wire lsu_is_fence = `INST_LSU_IS_FENCE(ibuffer_if.op_mod);
|
||||
|
||||
VX_skid_buffer #(
|
||||
|
@ -67,8 +69,8 @@ module VX_instr_demux (
|
|||
.reset (reset),
|
||||
.valid_in (lsu_req_valid),
|
||||
.ready_in (lsu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `INST_LSU_OP(ibuffer_if.op_type), lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, lsu_op_type, lsu_is_fence, ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
|
||||
.valid_out (lsu_req_if.valid),
|
||||
.ready_out (lsu_req_if.ready)
|
||||
);
|
||||
|
@ -76,17 +78,21 @@ module VX_instr_demux (
|
|||
// csr unit
|
||||
|
||||
wire csr_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_CSR);
|
||||
wire [`INST_CSR_BITS-1:0] csr_op_type = `INST_CSR_BITS'(ibuffer_if.op_type);
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr = ibuffer_if.imm[`CSR_ADDR_BITS-1:0];
|
||||
wire [`NRI_BITS-1:0] csr_imm = ibuffer_if.imm[`CSR_ADDR_BITS +: `NRI_BITS];
|
||||
wire [31:0] csr_rs1_data = gpr_rsp_if.rs1_data[tid];
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32),
|
||||
.OUTPUT_REG (1)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `INST_CSR_OP(ibuffer_if.op_type), ibuffer_if.imm[`CSR_ADDR_BITS-1:0], ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, ibuffer_if.rs1, gpr_rsp_if.rs1_data[0]}),
|
||||
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.rs1, csr_req_if.rs1_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, csr_op_type, csr_addr, ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, csr_imm, csr_rs1_data}),
|
||||
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.imm, csr_req_if.rs1_data}),
|
||||
.valid_out (csr_req_if.valid),
|
||||
.ready_out (csr_req_if.ready)
|
||||
);
|
||||
|
@ -95,6 +101,7 @@ module VX_instr_demux (
|
|||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_FPU);
|
||||
wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(ibuffer_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
|
@ -104,8 +111,8 @@ module VX_instr_demux (
|
|||
.reset (reset),
|
||||
.valid_in (fpu_req_valid),
|
||||
.ready_in (fpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `INST_FPU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, fpu_op_type, ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.valid_out (fpu_req_if.valid),
|
||||
.ready_out (fpu_req_if.ready)
|
||||
);
|
||||
|
@ -116,6 +123,8 @@ module VX_instr_demux (
|
|||
// gpu unit
|
||||
|
||||
wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU);
|
||||
wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(ibuffer_if.op_type);
|
||||
wire [31:0] gpu_rs2_data = gpr_rsp_if.rs2_data[tid];
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `NR_BITS + 1 + + `NT_BITS + (`NUM_THREADS * 32 + 32)),
|
||||
|
@ -125,8 +134,8 @@ module VX_instr_demux (
|
|||
.reset (reset),
|
||||
.valid_in (gpu_req_valid),
|
||||
.ready_in (gpu_req_ready),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `INST_GPU_OP(ibuffer_if.op_type), ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, gpu_op_type, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpu_rs2_data}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.valid_out (gpu_req_if.valid),
|
||||
.ready_out (gpu_req_if.ready)
|
||||
);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue