mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
fpu implementation (part1)
This commit is contained in:
parent
6836f397f8
commit
75e3c31b56
31 changed files with 662 additions and 159 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -1,3 +1,6 @@
|
|||
[submodule "hw/rtl/fp_cores/fpu_div_sqrt_mvp"]
|
||||
path = hw/rtl/fp_cores/fpu_div_sqrt_mvp
|
||||
url = https://github.com/pulp-platform/fpu_div_sqrt_mvp.git
|
||||
[submodule "hw/rtl/fp_cores/fpnew"]
|
||||
path = hw/rtl/fp_cores/fpnew
|
||||
url = https://github.com/pulp-platform/fpnew.git
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
`include "VX_define.vh"
|
||||
`include "fpnew_pkg.sv"
|
||||
`include "defs_div_sqrt_mvp.sv"
|
||||
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
|
@ -99,7 +101,7 @@ module VX_alu_unit #(
|
|||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32))
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -11,6 +11,7 @@ module VX_commit #(
|
|||
VX_commit_if lsu_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
|
@ -20,9 +21,10 @@ module VX_commit #(
|
|||
|
||||
wire [`NUM_EXS-1:0] commited_mask;
|
||||
assign commited_mask = {((| alu_commit_if.valid) && alu_commit_if.ready),
|
||||
((| lsu_commit_if.valid) && lsu_commit_if.ready),
|
||||
((| mul_commit_if.valid) && mul_commit_if.ready),
|
||||
((| lsu_commit_if.valid) && lsu_commit_if.ready),
|
||||
((| csr_commit_if.valid) && csr_commit_if.ready),
|
||||
((| mul_commit_if.valid) && mul_commit_if.ready),
|
||||
((| fpu_commit_if.valid) && fpu_commit_if.ready),
|
||||
((| gpu_commit_if.valid) && gpu_commit_if.ready)};
|
||||
|
||||
wire [`NE_BITS:0] num_commits;
|
||||
|
@ -65,6 +67,7 @@ module VX_commit #(
|
|||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if)
|
||||
);
|
||||
|
@ -77,11 +80,14 @@ module VX_commit #(
|
|||
if ((| lsu_commit_if.valid) && lsu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.warp_num, lsu_commit_if.curr_PC, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data);
|
||||
end
|
||||
if ((| mul_commit_if.valid) && mul_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
|
||||
end
|
||||
if ((| csr_commit_if.valid) && csr_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.warp_num, csr_commit_if.curr_PC, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
|
||||
end
|
||||
if ((| mul_commit_if.valid) && mul_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.warp_num, mul_commit_if.curr_PC, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
|
||||
end
|
||||
if ((| fpu_commit_if.valid) && fpu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=FPU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.warp_num, fpu_commit_if.curr_PC, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
|
||||
end
|
||||
if ((| gpu_commit_if.valid) && gpu_commit_if.ready) begin
|
||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.warp_num, gpu_commit_if.curr_PC, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data);
|
||||
|
|
|
@ -15,40 +15,41 @@ module VX_csr_arb (
|
|||
VX_commit_if csr_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
VX_commit_if csr_commit_if
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
|
||||
input wire select_io_req,
|
||||
input wire select_io_rsp
|
||||
);
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire core_select = ~(| csr_io_req_if.valid);
|
||||
|
||||
// requests
|
||||
assign csr_req_if.valid = core_select ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}};
|
||||
assign csr_req_if.warp_num = core_select ? csr_core_req_if.warp_num : 0;
|
||||
assign csr_req_if.curr_PC = core_select ? csr_core_req_if.curr_PC : 0;
|
||||
assign csr_req_if.csr_op = core_select ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_req_if.csr_addr = core_select ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||
assign csr_req_if.csr_mask = core_select ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign csr_req_if.rd = core_select ? csr_core_req_if.rd : 0;
|
||||
assign csr_req_if.wb = core_select ? csr_core_req_if.wb : 0;
|
||||
assign csr_req_if.is_io = ~core_select;
|
||||
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}};
|
||||
assign csr_req_if.warp_num = (~select_io_req) ? csr_core_req_if.warp_num : 0;
|
||||
assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
|
||||
assign csr_req_if.csr_op = (~select_io_req) ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
|
||||
assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
|
||||
assign csr_req_if.is_io = select_io_req;
|
||||
|
||||
assign csr_core_req_if.ready = csr_req_if.ready && core_select;
|
||||
assign csr_io_req_if.ready = csr_req_if.ready && ~core_select;
|
||||
assign csr_core_req_if.ready = csr_req_if.ready && (~select_io_req);
|
||||
assign csr_io_req_if.ready = csr_req_if.ready && select_io_req;
|
||||
|
||||
// responses
|
||||
assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & csr_rsp_if.is_io;
|
||||
assign csr_io_rsp_if.valid = csr_rsp_if.valid[0] & select_io_rsp;
|
||||
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
|
||||
|
||||
assign csr_commit_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~csr_rsp_if.is_io}};
|
||||
assign csr_commit_if.warp_num = csr_rsp_if.warp_num;
|
||||
assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC;
|
||||
assign csr_commit_if.data = csr_rsp_if.data;
|
||||
assign csr_commit_if.rd = csr_rsp_if.rd;
|
||||
assign csr_commit_if.wb = csr_rsp_if.wb;
|
||||
assign csr_commit_if.valid = csr_rsp_if.valid & {`NUM_THREADS{~select_io_rsp}};
|
||||
assign csr_commit_if.warp_num = csr_rsp_if.warp_num;
|
||||
assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC;
|
||||
assign csr_commit_if.data = csr_rsp_if.data;
|
||||
assign csr_commit_if.rd = csr_rsp_if.rd;
|
||||
assign csr_commit_if.wb = csr_rsp_if.wb;
|
||||
|
||||
assign csr_rsp_if.ready = csr_rsp_if.is_io ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
||||
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -6,7 +6,8 @@ module VX_csr_unit #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_perf_cntrs_if perf_cntrs_if,
|
||||
VX_perf_cntrs_if perf_cntrs_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
|
@ -17,15 +18,23 @@ module VX_csr_unit #(
|
|||
VX_csr_req_if csr_pipe_req_if();
|
||||
VX_commit_if csr_pipe_commit_if();
|
||||
|
||||
wire select_io_req = (| csr_io_req_if.valid);
|
||||
wire select_io_rsp;
|
||||
|
||||
VX_csr_arb csr_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.csr_core_req_if (csr_req_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_req_if (csr_pipe_req_if),
|
||||
|
||||
.csr_rsp_if (csr_pipe_commit_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_commit_if (csr_commit_if)
|
||||
.csr_commit_if (csr_commit_if),
|
||||
|
||||
.select_io_req (select_io_req),
|
||||
.select_io_rsp (select_io_rsp)
|
||||
);
|
||||
|
||||
wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2;
|
||||
|
@ -68,14 +77,14 @@ module VX_csr_unit #(
|
|||
wire stall = ~csr_pipe_commit_if.ready && (| csr_pipe_commit_if.valid);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32)
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + `CSR_ADDR_SIZE + 1 + 32 + 32)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
|
||||
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.warp_num, csr_pipe_commit_if.curr_PC, csr_pipe_commit_if.rd, csr_pipe_commit_if.wb, csr_addr_s2, csr_pipe_commit_if.is_io, csr_read_data_s2, csr_updated_data_s2})
|
||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.warp_num, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
|
||||
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.warp_num, csr_pipe_commit_if.curr_PC, csr_pipe_commit_if.rd, csr_pipe_commit_if.wb, csr_addr_s2, select_io_rsp, csr_read_data_s2, csr_updated_data_s2})
|
||||
);
|
||||
|
||||
genvar i;
|
||||
|
|
|
@ -19,10 +19,11 @@ module VX_decode #(
|
|||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
|
||||
reg [`ALU_BITS-1:0] alu_op;
|
||||
reg [`BR_BITS-1:0] br_op;
|
||||
reg [`MUL_BITS-1:0] mul_op;
|
||||
reg [`BR_BITS-1:0] br_op;
|
||||
wire [`LSU_BITS-1:0] lsu_op;
|
||||
reg [`CSR_BITS-1:0] csr_op;
|
||||
reg [`MUL_BITS-1:0] mul_op;
|
||||
reg [`FPU_BITS-1:0] fpu_op;
|
||||
reg [`GPU_BITS-1:0] gpu_op;
|
||||
|
||||
reg [19:0] upper_imm;
|
||||
|
@ -37,6 +38,7 @@ module VX_decode #(
|
|||
wire [`NR_BITS-1:0] rd = instr[11:7];
|
||||
wire [`NR_BITS-1:0] rs1 = instr[19:15];
|
||||
wire [`NR_BITS-1:0] rs2 = instr[24:20];
|
||||
wire [`NR_BITS-1:0] rs3 = instr[31:27];
|
||||
|
||||
// opcode types
|
||||
wire is_rtype = (opcode == `INST_R);
|
||||
|
@ -51,10 +53,9 @@ module VX_decode #(
|
|||
wire is_jals = (opcode == `INST_SYS) && (func3 == 0);
|
||||
wire is_csr = (opcode == `INST_SYS) && (func3 != 0);
|
||||
wire is_gpu = (opcode == `INST_GPU);
|
||||
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
|
||||
wire is_mul = is_rtype && (func7 == 7'h1);
|
||||
|
||||
|
||||
// upper immediate
|
||||
|
||||
always @(*) begin
|
||||
case (opcode)
|
||||
`INST_LUI: upper_imm = {func7, rs2, rs1, func3};
|
||||
|
@ -63,20 +64,8 @@ module VX_decode #(
|
|||
endcase
|
||||
end
|
||||
|
||||
// JAL
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
wire [31:0] jal_offset = {{11{jal_imm[20]}}, jal_imm};
|
||||
wire [11:0] jalr_imm = {func7, rs2};
|
||||
wire [31:0] jalr_offset = {{20{jalr_imm[11]}}, jalr_imm};
|
||||
always @(*) begin
|
||||
case (opcode)
|
||||
`INST_JAL: jalx_offset = jal_offset;
|
||||
`INST_JALR: jalx_offset = jalr_offset;
|
||||
default: jalx_offset = 32'd4;
|
||||
endcase
|
||||
end
|
||||
|
||||
// I-type immediate
|
||||
|
||||
wire alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
|
||||
wire [11:0] alu_shift_imm = {{7{1'b0}}, rs2};
|
||||
wire [11:0] alu_imm = alu_shift_i ? alu_shift_imm : u_12;
|
||||
|
@ -88,9 +77,26 @@ module VX_decode #(
|
|||
`INST_B: src2_imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
default: src2_imm = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// JAL
|
||||
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
wire [31:0] jal_offset = {{11{jal_imm[20]}}, jal_imm};
|
||||
wire [11:0] jalr_imm = {func7, rs2};
|
||||
wire [31:0] jalr_offset = {{20{jalr_imm[11]}}, jalr_imm};
|
||||
always @(*) begin
|
||||
case (opcode)
|
||||
`INST_JAL: jalx_offset = jal_offset;
|
||||
`INST_JALR: jalx_offset = jalr_offset;
|
||||
default: jalx_offset = 32'd4;
|
||||
endcase
|
||||
end
|
||||
|
||||
// BRANCH
|
||||
|
||||
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
|
||||
|
||||
always @(*) begin
|
||||
br_op = `BR_EQ;
|
||||
case (opcode)
|
||||
|
@ -119,6 +125,7 @@ module VX_decode #(
|
|||
end
|
||||
|
||||
// ALU
|
||||
|
||||
always @(*) begin
|
||||
alu_op = `ALU_OTHER;
|
||||
if (is_lui) begin
|
||||
|
@ -140,7 +147,29 @@ module VX_decode #(
|
|||
end
|
||||
end
|
||||
|
||||
// MUL
|
||||
// LSU
|
||||
|
||||
wire is_lsu = (is_ltype || is_stype);
|
||||
assign lsu_op = {is_stype, func3};
|
||||
|
||||
// CSR
|
||||
|
||||
wire is_csr_imm = is_csr && (func3[2] == 1);
|
||||
|
||||
always @(*) begin
|
||||
csr_op = `CSR_OTHER;
|
||||
case (func3[1:0])
|
||||
2'h1: csr_op = `CSR_RW;
|
||||
2'h2: csr_op = `CSR_RS;
|
||||
2'h3: csr_op = `CSR_RC;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
// MUL
|
||||
|
||||
wire is_mul = is_rtype && (func7 == 7'h1);
|
||||
|
||||
always @(*) begin
|
||||
mul_op = `MUL_MUL;
|
||||
case (func3)
|
||||
|
@ -156,23 +185,50 @@ module VX_decode #(
|
|||
endcase
|
||||
end
|
||||
|
||||
// LSU
|
||||
wire is_lsu = (is_ltype || is_stype);
|
||||
assign lsu_op = {is_stype, func3};
|
||||
// FPU
|
||||
|
||||
// CSR
|
||||
wire is_csr_imm = is_csr && (func3[2] == 1);
|
||||
always @(*) begin
|
||||
csr_op = `CSR_OTHER;
|
||||
case (func3[1:0])
|
||||
2'h1: csr_op = `CSR_RW;
|
||||
2'h2: csr_op = `CSR_RS;
|
||||
2'h3: csr_op = `CSR_RC;
|
||||
default:;
|
||||
endcase
|
||||
wire is_fl = (opcode == `INST_FL) && ((func3 == 2));
|
||||
wire is_fs = (opcode == `INST_FS) && ((func3 == 2));
|
||||
wire is_fci = (opcode == `INST_FCI);
|
||||
wire is_fmadd = (opcode == `INST_FMADD);
|
||||
wire is_fmsub = (opcode == `INST_FMSUB);
|
||||
wire is_fnmsub = (opcode == `INST_FNMSUB);
|
||||
wire is_fnmadd = (opcode == `INST_FNMADD);
|
||||
wire is_fr4 = is_fmadd || is_fmsub || is_fnmsub || is_fnmadd;
|
||||
wire is_fpu = (is_fl || is_fs || is_fci || is_fr4);
|
||||
|
||||
always @(*) begin
|
||||
fpu_op = `FPU_OTHER;
|
||||
if (is_fr4) begin
|
||||
case ({is_fmadd, is_fmsub, is_fnmsub, is_fnmadd})
|
||||
4'b1000: fpu_op = `FPU_MADD;
|
||||
4'b0100: fpu_op = `FPU_MSUB;
|
||||
4'b0010: fpu_op = `FPU_NMSUB;
|
||||
4'b0001: fpu_op = `FPU_NMADD;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
else begin
|
||||
case (func7)
|
||||
7'h00: fpu_op = `FPU_ADD;
|
||||
7'h04: fpu_op = `FPU_SUB;
|
||||
7'h08: fpu_op = `FPU_MUL;
|
||||
7'h0C: fpu_op = `FPU_DIV;
|
||||
7'h2C: fpu_op = `FPU_SQRT;
|
||||
7'h14: fpu_op = (func3 == 3'h0) ? `FPU_MIN : `FPU_MAX;
|
||||
7'h60: fpu_op = (instr[20]) ? `FPU_CVTWUS : `FPU_CVTWS; // doesn't need rs2, and read rs1 from fpReg, WB to intReg
|
||||
7'h68: fpu_op = (instr[20]) ? `FPU_CVTSWU : `FPU_CVTSW; // doesn't need rs2, and read rs1 from intReg
|
||||
7'h70: fpu_op = (func3 == 3'h0) ? `FPU_MVXW : `FPU_CLASS; // both wb to intReg
|
||||
7'h78: fpu_op = `FPU_MVWX;
|
||||
7'h50: fpu_op = `FPU_CMP; // wb to intReg
|
||||
7'h10: fpu_op = (func3[1]) ? `FPU_SGNJX : ((func3[0]) ? `FPU_SGNJN : `FPU_SGNJ);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// GPU
|
||||
|
||||
always @(*) begin
|
||||
gpu_op = `GPU_OTHER;
|
||||
case (func3)
|
||||
|
@ -195,23 +251,23 @@ module VX_decode #(
|
|||
assign decode_tmp_if.ex_type = is_lsu ? `EX_LSU :
|
||||
is_csr ? `EX_CSR :
|
||||
is_mul ? `EX_MUL :
|
||||
is_gpu ? `EX_GPU :
|
||||
is_br ? `EX_ALU :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
is_fpu ? `EX_FPU :
|
||||
is_gpu ? `EX_GPU :
|
||||
is_br ? `EX_ALU :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
|
||||
assign decode_tmp_if.instr_op = is_lsu ? `OP_BITS'(lsu_op) :
|
||||
is_csr ? `OP_BITS'(csr_op) :
|
||||
is_mul ? `OP_BITS'(mul_op) :
|
||||
is_gpu ? `OP_BITS'(gpu_op) :
|
||||
is_br ? `OP_BITS'({1'b1, br_op}) :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
||||
0;
|
||||
is_fpu ? `OP_BITS'(fpu_op) :
|
||||
is_gpu ? `OP_BITS'(gpu_op) :
|
||||
is_br ? `OP_BITS'({1'b1, br_op}) :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
||||
0;
|
||||
|
||||
assign decode_tmp_if.rd = rd;
|
||||
|
||||
assign decode_tmp_if.rs1 = is_lui ? `NR_BITS'(0) : rs1;
|
||||
|
||||
assign decode_tmp_if.rs2 = rs2;
|
||||
|
||||
assign decode_tmp_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
|
||||
|
@ -220,20 +276,22 @@ module VX_decode #(
|
|||
src2_imm;
|
||||
|
||||
assign decode_tmp_if.rs1_is_PC = is_auipc;
|
||||
|
||||
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
|
||||
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
|
||||
|
||||
assign decode_tmp_if.use_rs1 = (decode_tmp_if.rs1 != 0)
|
||||
&& (is_jalr || is_btype || is_ltype || is_stype || is_itype || is_rtype || ~is_csr_imm || is_gpu);
|
||||
|
||||
assign decode_tmp_if.use_rs2 = (decode_tmp_if.rs2 != 0)
|
||||
&& (is_btype || is_stype || is_rtype || (is_gpu && (gpu_op == `GPU_BAR || gpu_op == `GPU_WSPAWN)));
|
||||
|
||||
assign decode_tmp_if.rs1_is_fp = (is_fci && ((func7 != 7'h68) && (fpu_op != `FPU_MVWX)) || is_fr4);
|
||||
assign decode_tmp_if.rs2_is_fp = is_fs || (is_fci && ((func7 != 7'h60) && (func7 != 7'h68)) || is_fr4);
|
||||
assign decode_tmp_if.rs3 = rs3;
|
||||
assign decode_tmp_if.use_rs3 = is_fr4;
|
||||
assign decode_tmp_if.frm = func3;
|
||||
|
||||
assign decode_tmp_if.wb = (rd == 0) ? `WB_NO : // disable writeback to r0
|
||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||
(is_jal || is_jalr || is_jals) ? `WB_JAL :
|
||||
is_ltype ? `WB_MEM :
|
||||
`WB_NO;
|
||||
assign decode_tmp_if.wb = (is_fpu && (is_fl || (is_fci && ((func7 != 7'h50) || (func7 != 7'h70) || (func7 != 7'h60))) || is_fr4))
|
||||
|| (~is_fpu && (rd != 0) && (is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype));
|
||||
|
||||
assign join_if.is_join = in_valid && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
|
@ -241,17 +299,17 @@ module VX_decode #(
|
|||
assign wstall_if.wstall = in_valid && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
|
||||
assign wstall_if.warp_num = ifetch_rsp_if.warp_num;
|
||||
|
||||
wire stall = ~decode_if.ready && (| decode_if.valid);
|
||||
|
||||
wire stall = ~decode_if.ready && (| decode_if.valid);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + `WB_BITS)
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + 1 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + 1 + `FRM_BITS)
|
||||
) decode_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb}),
|
||||
.out ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb})
|
||||
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2, decode_tmp_if.ex_type, decode_tmp_if.instr_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.rs1_is_fp, decode_tmp_if.rs2_is_fp, decode_tmp_if.frm}),
|
||||
.out ({decode_if.valid, decode_if.warp_num, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.use_rs1, decode_if.use_rs2, decode_if.ex_type, decode_if.instr_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.rs1_is_fp, decode_if.rs2_is_fp, decode_if.frm})
|
||||
);
|
||||
|
||||
assign ifetch_rsp_if.ready = ~stall;
|
||||
|
@ -263,9 +321,7 @@ module VX_decode #(
|
|||
print_ex_type(decode_tmp_if.ex_type);
|
||||
$write(", op=");
|
||||
print_instr_op(decode_tmp_if.ex_type, decode_tmp_if.instr_op);
|
||||
$write(", wb=");
|
||||
print_wb(decode_tmp_if.wb);
|
||||
$write(", rd=%0d, rs1=%0d, rs2=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b\n", decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2);
|
||||
$write(", wb=%b, rd=%0d, rs1=%0d, rs2=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_rs1=%b, use_rs2=%b\n", decode_tmp_if.wb, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.use_rs1, decode_tmp_if.use_rs2);
|
||||
|
||||
// trap unsupported instructions
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.instr_op) == `ALU_OTHER));
|
||||
|
|
|
@ -19,12 +19,13 @@ module VX_execute #(
|
|||
|
||||
// perf
|
||||
VX_perf_cntrs_if perf_cntrs_if,
|
||||
|
||||
|
||||
// inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
|
||||
// outputs
|
||||
|
@ -34,10 +35,13 @@ module VX_execute #(
|
|||
VX_commit_if lsu_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if gpu_commit_if,
|
||||
|
||||
output wire ebreak
|
||||
);
|
||||
VX_fpu_to_csr_if fpu_to_csr_if();
|
||||
VX_fpu_from_csr_if fpu_from_csr_if();
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
|
@ -67,6 +71,7 @@ module VX_execute #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.perf_cntrs_if (perf_cntrs_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
|
@ -82,6 +87,17 @@ module VX_execute #(
|
|||
.mul_commit_if (mul_commit_if)
|
||||
);
|
||||
|
||||
VX_fpu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) fpu_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.fpu_from_csr_if(fpu_from_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.fpu_commit_if (fpu_commit_if)
|
||||
);
|
||||
|
||||
VX_gpu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) gpu_unit (
|
||||
|
|
140
hw/rtl/VX_fpu_unit.v
Normal file
140
hw/rtl/VX_fpu_unit.v
Normal file
|
@ -0,0 +1,140 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fpu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
// inputs
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_fpu_from_csr_if fpu_from_csr_if,
|
||||
|
||||
// outputs
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if
|
||||
);
|
||||
localparam FOP_BITS = fpnew_pkg::OP_BITS;
|
||||
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
|
||||
localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
|
||||
|
||||
localparam int FPU_DPATHW = `NUM_THREADS * 32;
|
||||
|
||||
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
|
||||
Width: FPU_DPATHW,
|
||||
EnableVectors: 1,
|
||||
EnableNanBox: 1,
|
||||
FpFmtMask: 5'b10000,
|
||||
IntFmtMask: 4'b0010
|
||||
};
|
||||
|
||||
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
|
||||
PipeRegs:'{'{`LATENCY_FMULADD, 0, 0, 0, 0}, // ADDMUL
|
||||
'{default: `LATENCY_FDIVSQRT}, // DIVSQRT
|
||||
'{default: `LATENCY_FNONCOMP}, // NONCOMP
|
||||
'{default: `LATENCY_FCONV}}, // CONV
|
||||
UnitTypes:'{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
|
||||
'{default: fpnew_pkg::MERGED}, // DIVSQRT
|
||||
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
|
||||
'{default: fpnew_pkg::MERGED}}, // CONV
|
||||
PipeConfig: fpnew_pkg::DISTRIBUTED
|
||||
};
|
||||
|
||||
wire fpu_in_ready;
|
||||
wire fpu_in_valid;
|
||||
wire fpu_out_ready;
|
||||
wire fpu_out_valid;
|
||||
|
||||
wire [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
|
||||
|
||||
wire [FMTF_BITS-1:0] fpu_src_fmt = fpnew_pkg::FP32;
|
||||
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
|
||||
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
|
||||
|
||||
assign fpu_in_valid = (| fpu_req_if.valid);
|
||||
assign fpu_operands[0] = fpu_req_if.rs1_data;
|
||||
assign fpu_operands[1] = fpu_req_if.rs2_data;
|
||||
assign fpu_operands[2] = fpu_req_if.rs3_data;
|
||||
assign fpu_req_if.ready = fpu_in_ready;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] fpu_result;
|
||||
fpnew_pkg::status_t fpu_status;
|
||||
|
||||
reg [FOP_BITS-1:0] fpu_op;
|
||||
reg [`FRM_BITS-1:0] fpu_rnd;
|
||||
reg fpu_op_mod;
|
||||
|
||||
always @(*) begin
|
||||
fpu_op = fpnew_pkg::SGNJ;
|
||||
fpu_op_mod = 0;
|
||||
fpu_rnd = fpu_req_if.frm;
|
||||
case (fpu_req_if.fpu_op)
|
||||
`FPU_ADD: fpu_op = fpnew_pkg::ADD;
|
||||
`FPU_SUB: begin fpu_op = fpnew_pkg::ADD; fpu_op_mod = 1; end
|
||||
`FPU_MUL: fpu_op = fpnew_pkg::MUL;
|
||||
`FPU_DIV: fpu_op = fpnew_pkg::DIV;
|
||||
`FPU_SQRT: fpu_op = fpnew_pkg::SQRT;
|
||||
`FPU_MADD: fpu_op = fpnew_pkg::FMADD;
|
||||
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
|
||||
`FPU_NMSUB: fpu_op = fpnew_pkg::FNMSUB;
|
||||
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
|
||||
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; end
|
||||
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; end
|
||||
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; end
|
||||
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
|
||||
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
|
||||
`FPU_CVTWS: fpu_op = fpnew_pkg::F2I;
|
||||
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::ADD; fpu_op_mod = 1; end
|
||||
`FPU_CVTSW: fpu_op = fpnew_pkg::I2F;
|
||||
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
|
||||
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; end
|
||||
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; end
|
||||
`FPU_CLASS: fpu_op = fpnew_pkg::CLASSIFY;
|
||||
`FPU_CMP: fpu_op = fpnew_pkg::CMP;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
fpnew_top #(
|
||||
.Features (FPU_FEATURES),
|
||||
.Implementation (FPU_IMPLEMENTATION),
|
||||
.TagType (logic)
|
||||
) fpnew_core (
|
||||
.clk_i (clk),
|
||||
.rst_ni (1'b1),
|
||||
.operands_i (fpu_operands),
|
||||
.rnd_mode_i (fpu_rnd),
|
||||
.op_i (fpu_op),
|
||||
.op_mod_i (fpu_op_mod),
|
||||
.src_fmt_i (fpu_src_fmt),
|
||||
.dst_fmt_i (fpu_dst_fmt),
|
||||
.int_fmt_i (fpu_int_fmt),
|
||||
.vectorial_op_i (1'b1),
|
||||
.tag_i (1'b0),
|
||||
.in_valid_i (fpu_in_valid),
|
||||
.in_ready_o (fpu_in_ready),
|
||||
.flush_i (reset),
|
||||
.result_o (fpu_result),
|
||||
.status_o (fpu_status),
|
||||
`UNUSED_PIN (tag_o),
|
||||
.out_valid_o (fpu_out_valid),
|
||||
.out_ready_i (fpu_out_ready),
|
||||
`UNUSED_PIN (busy_o)
|
||||
);
|
||||
|
||||
assign fpu_commit_if.valid = fpu_req_if.valid & {`NUM_THREADS{fpu_out_valid}};
|
||||
assign fpu_commit_if.data = fpu_result;
|
||||
assign fpu_commit_if.wb = fpu_req_if.wb;
|
||||
assign fpu_commit_if.rd = fpu_req_if.rd;
|
||||
assign fpu_out_ready = fpu_commit_if.ready;
|
||||
|
||||
assign fpu_to_csr_if.valid = fpu_out_valid;
|
||||
assign fpu_to_csr_if.warp_num = fpu_req_if.warp_num;
|
||||
assign fpu_to_csr_if.fflags_NV = fpu_status.NV;
|
||||
assign fpu_to_csr_if.fflags_DZ = fpu_status.DZ;
|
||||
assign fpu_to_csr_if.fflags_OF = fpu_status.OF;
|
||||
assign fpu_to_csr_if.fflags_UF = fpu_status.UF;
|
||||
assign fpu_to_csr_if.fflags_NX = fpu_status.NX;
|
||||
|
||||
endmodule
|
94
hw/rtl/VX_gpr_fp_ctrl.v
Normal file
94
hw/rtl/VX_gpr_fp_ctrl.v
Normal file
|
@ -0,0 +1,94 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
// control module to support multi-cycle read for fp register
|
||||
|
||||
module VX_gpr_fp_ctrl (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_decode_if decode_if,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs1_int_data,
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs2_int_data,
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs1_fp_data,
|
||||
input wire [`NUM_THREADS-1:0][31:0] rs2_fp_data,
|
||||
|
||||
// outputs
|
||||
output wire [`NR_BITS-1:0] raddr1,
|
||||
output wire [`NR_BITS-1:0] raddr2,
|
||||
|
||||
VX_gpr_data_if gpr_data_if,
|
||||
|
||||
input wire schedule_delay,
|
||||
output wire gpr_delay
|
||||
);
|
||||
// param
|
||||
localparam GPR_DELAY_WID = 1;
|
||||
reg [GPR_DELAY_WID-1:0] multi_cyc_state;
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] tmp_rs1_data;
|
||||
reg [`NUM_THREADS-1:0][31:0] tmp_rs2_data;
|
||||
reg [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
reg [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
reg [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
multi_cyc_state <= 0;
|
||||
end else if (!schedule_delay) begin
|
||||
multi_cyc_state <= decode_if.use_rs3 && (multi_cyc_state == 0);
|
||||
end else begin
|
||||
multi_cyc_state <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
// select rs1 data
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
tmp_rs1_data <= 0;
|
||||
end else begin
|
||||
if (decode_if.rs1_is_fp) begin
|
||||
tmp_rs1_data <= rs1_fp_data;
|
||||
end else begin
|
||||
tmp_rs1_data <= decode_if.rs1_is_PC ? {`NUM_THREADS{decode_if.curr_PC}} : rs1_int_data;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// select rs2 data
|
||||
always @(posedge clk) begin
|
||||
if(reset) begin
|
||||
tmp_rs2_data <= 0;
|
||||
end else begin
|
||||
if (decode_if.rs2_is_fp) begin
|
||||
tmp_rs2_data <= rs2_fp_data;
|
||||
end else begin
|
||||
tmp_rs2_data <= decode_if.rs2_is_imm ? {`NUM_THREADS{decode_if.imm}} : rs2_int_data;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// outputs
|
||||
|
||||
assign gpr_delay = (multi_cyc_state == 0) && decode_if.use_rs3;
|
||||
|
||||
assign raddr1 = multi_cyc_state ? decode_if.rs3 : decode_if.rs1 ;
|
||||
assign raddr2 = decode_if.rs2;
|
||||
|
||||
always @(*) begin
|
||||
if (decode_if.use_rs3) begin
|
||||
rs1_data = tmp_rs1_data;
|
||||
rs2_data = tmp_rs2_data;
|
||||
rs3_data = rs1_fp_data;
|
||||
end else begin
|
||||
rs1_data = decode_if.rs1_is_fp ? rs1_fp_data : rs1_int_data;
|
||||
rs2_data = decode_if.rs2_is_fp ? rs2_fp_data : rs2_int_data;
|
||||
rs3_data = {`NUM_THREADS{32'h8000_0000}}; // default value: -0 in single fp
|
||||
end
|
||||
end
|
||||
|
||||
assign gpr_data_if.rs1_data = rs1_data;
|
||||
assign gpr_data_if.rs2_data = rs2_data;
|
||||
assign gpr_data_if.rs3_data = rs3_data;
|
||||
|
||||
endmodule
|
|
@ -4,42 +4,76 @@ module VX_gpr_stage #(
|
|||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_wb_if writeback_if,
|
||||
VX_decode_if decode_if,
|
||||
VX_decode_if decode_if,
|
||||
|
||||
// outputs
|
||||
VX_gpr_data_if gpr_data_if
|
||||
VX_gpr_data_if gpr_data_if,
|
||||
|
||||
input wire schedule_delay,
|
||||
output wire gpr_delay
|
||||
);
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data_all [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data_all [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_imm;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_int_data [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_int_data [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_fp_data [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_fp_data [`NUM_WARPS-1:0];
|
||||
wire [`NUM_THREADS-1:0] we [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NR_BITS-1:0] raddr1;
|
||||
wire [`NR_BITS-1:0] raddr2;
|
||||
|
||||
genvar i;
|
||||
|
||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign rs1_PC[i] = decode_if.curr_PC;
|
||||
assign rs2_imm[i] = decode_if.imm;
|
||||
end
|
||||
|
||||
assign gpr_data_if.rs1_data = decode_if.rs1_is_PC ? rs1_PC : rs1_data_all[decode_if.warp_num];
|
||||
assign gpr_data_if.rs2_data = decode_if.rs2_is_imm ? rs2_imm : rs2_data_all[decode_if.warp_num];
|
||||
|
||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign we[i] = writeback_if.valid & {`NUM_THREADS{(i == writeback_if.warp_num)}};
|
||||
VX_gpr_ram gpr_ram (
|
||||
|
||||
// Int GPRs
|
||||
VX_gpr_ram gpr_int_ram (
|
||||
.clk (clk),
|
||||
.we (we[i]),
|
||||
.we (we[i] & {`NUM_THREADS{~writeback_if.is_fp}}),
|
||||
.waddr (writeback_if.rd),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (decode_if.rs1),
|
||||
.rs2 (decode_if.rs2),
|
||||
.rs1_data (rs1_data_all[i]),
|
||||
.rs2_data (rs2_data_all[i])
|
||||
.rs1 (raddr1),
|
||||
.rs2 (raddr2),
|
||||
.rs1_data (rs1_int_data[i]),
|
||||
.rs2_data (rs2_int_data[i])
|
||||
);
|
||||
|
||||
// FP GPRs
|
||||
VX_gpr_ram gpr_fp_ram (
|
||||
.clk (clk),
|
||||
.we (we[i] & {`NUM_THREADS{writeback_if.is_fp}}),
|
||||
.waddr (writeback_if.rd),
|
||||
.wdata (writeback_if.data),
|
||||
.rs1 (raddr1),
|
||||
.rs2 (raddr2),
|
||||
.rs1_data (rs1_fp_data[i]),
|
||||
.rs2_data (rs2_fp_data[i])
|
||||
);
|
||||
|
||||
// controller for multi-cycle read
|
||||
VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
//inputs
|
||||
.decode_if (decode_if),
|
||||
.rs1_int_data (rs1_int_data[i]),
|
||||
.rs2_int_data (rs2_int_data[i]),
|
||||
.rs1_fp_data (rs1_fp_data[i]),
|
||||
.rs2_fp_data (rs2_fp_data[i]),
|
||||
|
||||
// outputs
|
||||
.raddr1 (raddr1),
|
||||
.raddr2 (raddr2),
|
||||
.gpr_data_if (gpr_data_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.gpr_delay (gpr_delay)
|
||||
);
|
||||
end
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
|
|
@ -79,7 +79,7 @@ module VX_gpu_unit #(
|
|||
assign gpu_commit_if.valid = gpu_req_if.valid;
|
||||
assign gpu_commit_if.warp_num = gpu_req_if.warp_num;
|
||||
assign gpu_commit_if.curr_PC = gpu_req_if.curr_PC;
|
||||
assign gpu_commit_if.wb = `WB_NO;
|
||||
assign gpu_commit_if.wb = 0;
|
||||
assign gpu_commit_if.rd = 0;
|
||||
assign gpu_commit_if.data = 0;
|
||||
|
||||
|
|
|
@ -13,16 +13,19 @@ module VX_issue #(
|
|||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
VX_gpr_data_if gpr_data_if();
|
||||
wire schedule_delay;
|
||||
wire gpr_delay;
|
||||
|
||||
wire alu_busy = ~alu_req_if.ready/* && (| alu_req_if.valid)*/;
|
||||
wire lsu_busy = ~lsu_req_if.ready/* && (| lsu_req_if.valid)*/;
|
||||
wire csr_busy = ~csr_req_if.ready/* && (| csr_req_if.valid)*/;
|
||||
wire mul_busy = ~mul_req_if.ready/* && (| mul_req_if.valid)*/;
|
||||
wire gpu_busy = ~gpu_req_if.ready/* && (| gpu_req_if.valid)*/;
|
||||
wire alu_busy = ~alu_req_if.ready;
|
||||
wire lsu_busy = ~lsu_req_if.ready;
|
||||
wire csr_busy = ~csr_req_if.ready;
|
||||
wire mul_busy = ~mul_req_if.ready;
|
||||
wire fpu_busy = ~mul_req_if.ready;
|
||||
wire gpu_busy = ~gpu_req_if.ready;
|
||||
|
||||
VX_scheduler #(
|
||||
.CORE_ID(CORE_ID)
|
||||
|
@ -31,10 +34,12 @@ module VX_issue #(
|
|||
.reset (reset),
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.gpr_busy (gpr_delay),
|
||||
.alu_busy (alu_busy),
|
||||
.lsu_busy (lsu_busy),
|
||||
.csr_busy (csr_busy),
|
||||
.mul_busy (mul_busy),
|
||||
.fpu_busy (fpu_busy),
|
||||
.gpu_busy (gpu_busy),
|
||||
.schedule_delay (schedule_delay),
|
||||
`UNUSED_PIN (is_empty)
|
||||
|
@ -43,16 +48,20 @@ module VX_issue #(
|
|||
VX_gpr_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) gpr_stage (
|
||||
.clk (clk),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.gpr_data_if (gpr_data_if)
|
||||
.gpr_data_if (gpr_data_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.gpr_delay (gpr_delay)
|
||||
);
|
||||
|
||||
VX_alu_req_if alu_req_tmp_if();
|
||||
VX_lsu_req_if lsu_req_tmp_if();
|
||||
VX_csr_req_if csr_req_tmp_if();
|
||||
VX_mul_req_if mul_req_tmp_if();
|
||||
VX_fpu_req_if fpu_req_tmp_if();
|
||||
VX_gpu_req_if gpu_req_tmp_if();
|
||||
|
||||
VX_issue_mux issue_mux (
|
||||
|
@ -62,6 +71,7 @@ module VX_issue #(
|
|||
.lsu_req_if (lsu_req_tmp_if),
|
||||
.csr_req_if (csr_req_tmp_if),
|
||||
.mul_req_if (mul_req_tmp_if),
|
||||
.fpu_req_if (fpu_req_tmp_if),
|
||||
.gpu_req_if (gpu_req_tmp_if)
|
||||
);
|
||||
|
||||
|
@ -69,16 +79,18 @@ module VX_issue #(
|
|||
wire stall_lsu = ~lsu_req_if.ready || schedule_delay;
|
||||
wire stall_csr = ~csr_req_if.ready || schedule_delay;
|
||||
wire stall_mul = ~mul_req_if.ready || schedule_delay;
|
||||
wire stall_fpu = ~fpu_req_if.ready || schedule_delay;
|
||||
wire stall_gpu = ~gpu_req_if.ready || schedule_delay;
|
||||
|
||||
wire flush_alu = alu_req_if.ready && schedule_delay;
|
||||
wire flush_lsu = lsu_req_if.ready && schedule_delay;
|
||||
wire flush_csr = csr_req_if.ready && schedule_delay;
|
||||
wire flush_mul = mul_req_if.ready && schedule_delay;
|
||||
wire flush_fpu = fpu_req_if.ready && schedule_delay;
|
||||
wire flush_gpu = gpu_req_if.ready && schedule_delay;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `ALU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -89,7 +101,7 @@ module VX_issue #(
|
|||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32)
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + 1 + `BYTEEN_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32)
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -100,7 +112,7 @@ module VX_issue #(
|
|||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + `WB_BITS + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1)
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `CSR_BITS + 1 + `NR_BITS + `CSR_ADDR_SIZE + 32 + 1)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -110,8 +122,8 @@ module VX_issue #(
|
|||
.out ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.csr_op, csr_req_if.wb, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + `WB_BITS + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `MUL_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -121,6 +133,17 @@ module VX_issue #(
|
|||
.out ({mul_req_if.valid, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.mul_op, mul_req_if.wb, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS +`NW_BITS + 32 + `FPU_BITS + 1 + `NR_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + `FRM_BITS)
|
||||
) fpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_fpu),
|
||||
.flush (flush_fpu),
|
||||
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data, fpu_req_tmp_if.frm}),
|
||||
.out ({fpu_req_if.valid, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.fpu_op, fpu_req_if.wb, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data, fpu_req_if.frm})
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32)
|
||||
) gpu_reg (
|
||||
|
@ -140,6 +163,9 @@ module VX_issue #(
|
|||
if ((| mul_req_tmp_if.valid) && ~stall_mul) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.mul_op, mul_req_tmp_if.wb, mul_req_tmp_if.rd, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data);
|
||||
end
|
||||
if ((| fpu_req_tmp_if.valid) && ~stall_fpu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, op=%0d, wb=%d, rd=%0d, rs1=%0h, rs2=%0h", $time, CORE_ID, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.wb, fpu_req_tmp_if.rd, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data);
|
||||
end
|
||||
if ((| lsu_req_tmp_if.valid) && ~stall_lsu) begin
|
||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, rw=%b, wb=%0d, rd=%0d, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.rw, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset);
|
||||
end
|
||||
|
|
|
@ -10,6 +10,7 @@ module VX_issue_mux (
|
|||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
|
||||
|
@ -17,6 +18,7 @@ module VX_issue_mux (
|
|||
wire[`NUM_THREADS-1:0] is_lsu = {`NUM_THREADS{decode_if.ex_type == `EX_LSU}};
|
||||
wire[`NUM_THREADS-1:0] is_csr = {`NUM_THREADS{decode_if.ex_type == `EX_CSR}};
|
||||
wire[`NUM_THREADS-1:0] is_mul = {`NUM_THREADS{decode_if.ex_type == `EX_MUL}};
|
||||
wire[`NUM_THREADS-1:0] is_fpu = {`NUM_THREADS{decode_if.ex_type == `EX_FPU}};
|
||||
wire[`NUM_THREADS-1:0] is_gpu = {`NUM_THREADS{decode_if.ex_type == `EX_GPU}};
|
||||
|
||||
// ALU unit
|
||||
|
@ -64,6 +66,18 @@ module VX_issue_mux (
|
|||
assign mul_req_if.rd = decode_if.rd;
|
||||
assign mul_req_if.wb = decode_if.wb;
|
||||
|
||||
// FPU unit
|
||||
assign fpu_req_if.valid = decode_if.valid & is_fpu;
|
||||
assign fpu_req_if.warp_num = decode_if.warp_num;
|
||||
assign fpu_req_if.curr_PC = decode_if.curr_PC;
|
||||
assign fpu_req_if.fpu_op = `FPU_OP(decode_if.instr_op);
|
||||
assign fpu_req_if.rs1_data = gpr_data_if.rs1_data;
|
||||
assign fpu_req_if.rs2_data = gpr_data_if.rs2_data;
|
||||
assign fpu_req_if.rs3_data = gpr_data_if.rs3_data;
|
||||
assign fpu_req_if.frm = decode_if.frm;
|
||||
assign fpu_req_if.rd = decode_if.rd;
|
||||
assign fpu_req_if.wb = decode_if.wb;
|
||||
|
||||
// GPU unit
|
||||
assign gpu_req_if.valid = decode_if.valid & is_gpu;
|
||||
assign gpu_req_if.warp_num = decode_if.warp_num;
|
||||
|
|
|
@ -28,7 +28,7 @@ module VX_lsu_unit #(
|
|||
wire [`BYTEEN_BITS-1:0] mem_byteen;
|
||||
wire [`NR_BITS-1:0] use_rd;
|
||||
wire [`NW_BITS-1:0] use_warp_num;
|
||||
wire [`WB_BITS-1:0] use_wb;
|
||||
wire use_wb;
|
||||
wire [31:0] use_pc;
|
||||
wire mrq_full;
|
||||
|
||||
|
@ -69,7 +69,7 @@ module VX_lsu_unit #(
|
|||
`IGNORE_WARNINGS_END
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + (`NUM_THREADS * 32) + `BYTEEN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + `NW_BITS + `WB_BITS + 32)
|
||||
.N(`NUM_THREADS + (`NUM_THREADS * 32) + `BYTEEN_BITS + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + `NW_BITS + 1 + 32)
|
||||
) mem_req_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -97,7 +97,7 @@ module VX_lsu_unit #(
|
|||
wire mrq_pop = mrq_pop_part && (0 == mem_rsp_mask_upd);
|
||||
|
||||
VX_index_queue #(
|
||||
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + `WB_BITS + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
|
||||
.DATAW (`LOG2UP(`DCREQ_SIZE) + 32 + 1 + (`NUM_THREADS * 2) + `BYTEEN_BITS + `NR_BITS + `NW_BITS),
|
||||
.SIZE (`DCREQ_SIZE)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
|
|
|
@ -36,7 +36,7 @@ module VX_mul_unit #(
|
|||
.WIDTHB(33),
|
||||
.WIDTHP(64),
|
||||
.SIGNED(1),
|
||||
.PIPELINE(`MUL_LATENCY)
|
||||
.PIPELINE(`LATENCY_IMUL)
|
||||
) multiplier (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
@ -52,7 +52,7 @@ module VX_mul_unit #(
|
|||
.WIDTHR(32),
|
||||
.NSIGNED(1),
|
||||
.DSIGNED(1),
|
||||
.PIPELINE(`DIV_LATENCY)
|
||||
.PIPELINE(`LATENCY_IDIV)
|
||||
) sdiv (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
@ -81,7 +81,7 @@ module VX_mul_unit #(
|
|||
|
||||
reg result_avail;
|
||||
reg [4:0] pending_ctr;
|
||||
wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `DIV_LATENCY : `MUL_LATENCY;
|
||||
wire [4:0] instr_delay = `IS_DIV_OP(alu_op) ? `LATENCY_IDIV : `LATENCY_IMUL;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -112,7 +112,7 @@ module VX_mul_unit #(
|
|||
wire flush = mul_commit_if.ready && pipeline_stall;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + `WB_BITS + (`NUM_THREADS * 32))
|
||||
.N(`NUM_THREADS + `NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -110,6 +110,7 @@ module VX_pipeline #(
|
|||
VX_lsu_req_if lsu_req_if();
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_mul_req_if mul_req_if();
|
||||
VX_fpu_req_if fpu_req_if();
|
||||
VX_gpu_req_if gpu_req_if();
|
||||
VX_wb_if writeback_if();
|
||||
VX_wstall_if wstall_if();
|
||||
|
@ -118,6 +119,7 @@ module VX_pipeline #(
|
|||
VX_commit_if lsu_commit_if();
|
||||
VX_commit_if csr_commit_if();
|
||||
VX_commit_if mul_commit_if();
|
||||
VX_commit_if fpu_commit_if();
|
||||
VX_commit_if gpu_commit_if();
|
||||
|
||||
VX_fetch #(
|
||||
|
@ -159,6 +161,7 @@ module VX_pipeline #(
|
|||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
|
@ -181,6 +184,7 @@ module VX_pipeline #(
|
|||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if),
|
||||
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
|
@ -189,6 +193,7 @@ module VX_pipeline #(
|
|||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.ebreak (ebreak)
|
||||
|
@ -204,6 +209,7 @@ module VX_pipeline #(
|
|||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.writeback_if (writeback_if),
|
||||
|
|
|
@ -8,10 +8,12 @@ module VX_scheduler #(
|
|||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
input wire gpr_busy,
|
||||
input wire alu_busy,
|
||||
input wire lsu_busy,
|
||||
input wire csr_busy,
|
||||
input wire mul_busy,
|
||||
input wire fpu_busy,
|
||||
input wire gpu_busy,
|
||||
output wire schedule_delay,
|
||||
output wire is_empty
|
||||
|
@ -19,23 +21,27 @@ module VX_scheduler #(
|
|||
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
||||
|
||||
reg [`NUM_REGS-1:0][`NUM_THREADS-1:0] rename_table [`NUM_WARPS-1:0];
|
||||
reg [`NUM_REGS-1:0] busy_table[`NUM_WARPS-1:0];
|
||||
reg [`NUM_REGS-1:0] busy_table [`NUM_WARPS-1:0];
|
||||
reg [CTVW-1:0] count_valid;
|
||||
|
||||
wire rs1_rename = busy_table[decode_if.warp_num][decode_if.rs1];
|
||||
wire rs2_rename = busy_table[decode_if.warp_num][decode_if.rs2];
|
||||
wire rs3_rename = busy_table[decode_if.warp_num][decode_if.rs3];
|
||||
wire rd_rename = busy_table[decode_if.warp_num][decode_if.rd];
|
||||
|
||||
wire rs1_rename_qual = (rs1_rename) && (decode_if.use_rs1);
|
||||
wire rs2_rename_qual = (rs2_rename) && (decode_if.use_rs2);
|
||||
wire rd_rename_qual = (rd_rename) && (decode_if.wb != 0);
|
||||
wire rs1_rename_qual = rs1_rename && decode_if.use_rs1;
|
||||
wire rs2_rename_qual = rs2_rename && decode_if.use_rs2;
|
||||
wire rs3_rename_qual = rs3_rename && decode_if.use_rs3;
|
||||
wire rd_rename_qual = rd_rename && decode_if.wb;
|
||||
|
||||
wire rename_valid = (rs1_rename_qual || rs2_rename_qual || rd_rename_qual);
|
||||
wire rename_valid = (rs1_rename_qual || rs2_rename_qual || rs3_rename_qual || rd_rename_qual);
|
||||
|
||||
wire ex_stalled = ((alu_busy && (decode_if.ex_type == `EX_ALU))
|
||||
wire ex_stalled = ((gpr_busy)
|
||||
|| (alu_busy && (decode_if.ex_type == `EX_ALU))
|
||||
|| (lsu_busy && (decode_if.ex_type == `EX_LSU))
|
||||
|| (csr_busy && (decode_if.ex_type == `EX_CSR))
|
||||
|| (mul_busy && (decode_if.ex_type == `EX_MUL))
|
||||
|| (fpu_busy && (decode_if.ex_type == `EX_FPU))
|
||||
|| (gpu_busy && (decode_if.ex_type == `EX_GPU)));
|
||||
|
||||
wire stall = (ex_stalled || rename_valid) && (| decode_if.valid);
|
||||
|
@ -82,7 +88,7 @@ module VX_scheduler #(
|
|||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (stall) begin
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, rename=%b%b%b, alu=%b, lsu=%b, csr=%b, mul=%b, gpu=%b", $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, rd_rename_qual, rs1_rename_qual, rs2_rename_qual, alu_busy, lsu_busy, csr_busy, mul_busy, gpu_busy);
|
||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, rename=%b%b%b, alu=%b, lsu=%b, csr=%b, mul=%b, fpu=%b, gpu=%b", $time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, rd_rename_qual, rs1_rename_qual, rs2_rename_qual, alu_busy, lsu_busy, csr_busy, mul_busy, fpu_busy, gpu_busy);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -9,17 +9,19 @@ module VX_writeback #(
|
|||
// inputs
|
||||
VX_commit_if alu_commit_if,
|
||||
VX_commit_if lsu_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if mul_commit_if,
|
||||
VX_commit_if fpu_commit_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_wb_if writeback_if
|
||||
);
|
||||
|
||||
wire lsu_valid = (| lsu_commit_if.valid) && (lsu_commit_if.wb != `WB_NO);
|
||||
wire mul_valid = (| mul_commit_if.valid) && (mul_commit_if.wb != `WB_NO);
|
||||
wire alu_valid = (| alu_commit_if.valid) && (alu_commit_if.wb != `WB_NO);
|
||||
wire csr_valid = (| csr_commit_if.valid) && (csr_commit_if.wb != `WB_NO);
|
||||
wire alu_valid = (| alu_commit_if.valid) && alu_commit_if.wb;
|
||||
wire lsu_valid = (| lsu_commit_if.valid) && lsu_commit_if.wb;
|
||||
wire csr_valid = (| csr_commit_if.valid) && csr_commit_if.wb;
|
||||
wire mul_valid = (| mul_commit_if.valid) && mul_commit_if.wb;
|
||||
wire fpu_valid = (| fpu_commit_if.valid) && fpu_commit_if.wb;
|
||||
|
||||
VX_wb_if writeback_tmp_if();
|
||||
|
||||
|
@ -47,23 +49,26 @@ module VX_writeback #(
|
|||
csr_valid ? csr_commit_if.rd :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.is_fp = fpu_valid && fpu_commit_if.ready;
|
||||
|
||||
wire stall = ~writeback_if.ready && (| writeback_if.valid);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(`NUM_THREADS + `NW_BITS + `NR_BITS + (`NUM_THREADS * 32))
|
||||
.N(`NUM_THREADS + `NW_BITS + `NR_BITS + (`NUM_THREADS * 32) + 1)
|
||||
) wb_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data}),
|
||||
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data})
|
||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.rd, writeback_tmp_if.data, writeback_tmp_if.is_fp}),
|
||||
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.rd, writeback_if.data, writeback_if.is_fp})
|
||||
);
|
||||
|
||||
assign lsu_commit_if.ready = !stall;
|
||||
assign mul_commit_if.ready = !stall && !lsu_valid;
|
||||
assign alu_commit_if.ready = !stall && !lsu_valid && !mul_valid;
|
||||
assign csr_commit_if.ready = !stall && !lsu_valid && !mul_valid && !alu_valid;
|
||||
assign fpu_commit_if.ready = !stall && !lsu_valid;
|
||||
assign mul_commit_if.ready = !stall && !lsu_valid && !fpu_valid;
|
||||
assign alu_commit_if.ready = !stall && !lsu_valid && !fpu_valid && !mul_valid;
|
||||
assign csr_commit_if.ready = !stall && !lsu_valid && !fpu_valid && !mul_valid && !alu_valid;
|
||||
|
||||
// special workaround to control RISC-V benchmarks termination on Verilator
|
||||
reg [31:0] last_data_wb /* verilator public */;
|
||||
|
|
1
hw/rtl/fp_cores/fpnew
Submodule
1
hw/rtl/fp_cores/fpnew
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 1def7bb630ceae2ebc58921f6b5ee3e686fb6d5a
|
|
@ -11,7 +11,7 @@ interface VX_alu_req_if ();
|
|||
|
||||
wire [`ALU_BITS-1:0] alu_op;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
|
|
|
@ -10,8 +10,7 @@ interface VX_commit_if ();
|
|||
wire [31:0] curr_PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire is_io;
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -15,7 +15,7 @@ interface VX_csr_req_if ();
|
|||
wire [31:0] csr_mask;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire wb;
|
||||
wire is_io;
|
||||
|
||||
wire ready;
|
||||
|
|
|
@ -19,12 +19,19 @@ interface VX_decode_if ();
|
|||
wire [31:0] imm;
|
||||
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
wire rs2_is_imm;
|
||||
|
||||
wire use_rs1;
|
||||
wire use_rs2;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
// FP states
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
wire use_rs3;
|
||||
wire rs1_is_fp;
|
||||
wire rs2_is_fp;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
wire wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
|
|
16
hw/rtl/interfaces/VX_fpu_from_csr_if.v
Normal file
16
hw/rtl/interfaces/VX_fpu_from_csr_if.v
Normal file
|
@ -0,0 +1,16 @@
|
|||
`ifndef VX_FPU_FROM_CSR_IF
|
||||
`define VX_FPU_FROM_CSR_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_fpu_from_csr_if ();
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
|
||||
wire [`NUM_WARPS-1:0][`FRM_BITS-1:0] frm;
|
||||
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
26
hw/rtl/interfaces/VX_fpu_req_if.v
Normal file
26
hw/rtl/interfaces/VX_fpu_req_if.v
Normal file
|
@ -0,0 +1,26 @@
|
|||
`ifndef VX_FPU_REQ_IF
|
||||
`define VX_FPU_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_fpu_req_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`FPU_BITS-1:0] fpu_op;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
wire wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
23
hw/rtl/interfaces/VX_fpu_to_csr_if.v
Normal file
23
hw/rtl/interfaces/VX_fpu_to_csr_if.v
Normal file
|
@ -0,0 +1,23 @@
|
|||
`ifndef VX_FPU_TO_CSR_IF
|
||||
`define VX_FPU_TO_CSR_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_fpu_to_csr_if ();
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
wire fflags_NV;
|
||||
wire fflags_DZ;
|
||||
wire fflags_OF;
|
||||
wire fflags_UF;
|
||||
wire fflags_NX;
|
||||
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -7,6 +7,7 @@ interface VX_gpr_data_if ();
|
|||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ interface VX_lsu_req_if ();
|
|||
wire rw;
|
||||
wire [`BYTEEN_BITS-1:0] byteen;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] store_data;
|
||||
|
|
|
@ -11,7 +11,7 @@ interface VX_mul_req_if ();
|
|||
|
||||
wire [`MUL_BITS-1:0] mul_op;
|
||||
|
||||
wire [`WB_BITS-1:0] wb;
|
||||
wire wb;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
|
|
|
@ -9,6 +9,7 @@ interface VX_wb_if ();
|
|||
wire [`NW_BITS-1:0] warp_num;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire is_fp;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
11
hw/simulate/verilator.vlt
Normal file
11
hw/simulate/verilator.vlt
Normal file
|
@ -0,0 +1,11 @@
|
|||
`verilator_config
|
||||
|
||||
lint_off -rule BLKANDNBLK -file "../rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNOPTFLAT -file "../rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule WIDTH -file "../rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule UNUSED -file "../rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule LITENDIAN -file "../rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule IMPORTSTAR -file "../rtl/fp_cores/fpnew/*"
|
||||
lint_off -rule PINCONNECTEMPTY -file "../rtl/fp_cores/fpnew/*"
|
||||
|
||||
//lint_off -rule CASEINCOMPLETE -file "../rtl/fp_cores/fpnew/*"
|
Loading…
Add table
Add a link
Reference in a new issue