enabling support for 64-bit floating-point hardware

This commit is contained in:
Blaise Tine 2023-06-05 14:41:39 -04:00
parent 91a8e65fd5
commit 014490dcd8
27 changed files with 469 additions and 451 deletions

View file

@ -210,13 +210,19 @@ CONFIGS="-DNUM_DCACHES=2 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app
# multiple FPUs per cluster
CONFIGS="-DNUM_FPU_UNITS=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemm --cores=8 --warps=1 --threads=2
# using FPNEW FPU core
CONFIGS="-DDISABLE_DPI -DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# test FPNEW FPU core
CONFIGS="-DFPU_FPNEW" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using DPI modules
# test DSP FPU core
CONFIGS="-DFPU_DSP" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# test DPI FPU core
CONFIGS="-DFPU_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# test DPI modules
CONFIGS="-DENABLE_DPI" ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using AXI bus
# test AXI bus
AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
# adjust l1 block size to match l2

View file

@ -19,21 +19,34 @@
///////////////////////////////////////////////////////////////////////////////
// 32 bit as default.
// 32 bit XLEN as default.
`ifndef XLEN_32
`ifndef XLEN_64
`define XLEN_32
`endif
`endif
// 32 bit FLEN as default.
`ifndef FLEN_32
`ifndef FLEN_64
`define FLEN_32
`endif
`endif
`ifdef XLEN_64
`define XLEN 64
// disable unsupported extensions
`define EXT_F_DISABLE 1
`else
`endif
`ifdef XLEN_32
`define XLEN 32
`endif
`ifdef FLEN_64
`define FLEN 64
`endif
`ifdef FLEN_32
`define FLEN 32
`endif
`ifndef NUM_CLUSTERS
@ -136,16 +149,25 @@
`define STALL_TIMEOUT (100000 * (1 ** (`L2_ENABLED + `L3_ENABLED)))
`endif
`ifndef SYNTHESIS
`ifdef ENABLE_DPI
`define IMUL_DPI
`define IDIV_DPI
`define FPU_DPI
`endif
`ifndef FPU_DPI
`ifdef SYNTHESIS
`ifndef FPU_DSP
`ifndef FPU_FPNEW
`define FPU_FPNEW
`endif
`endif
`else
`ifndef FPU_DPI
`ifndef FPU_FPNEW
`define FPU_FPNEW
`endif
`endif
`endif
`ifndef DEBUG_LEVEL
`define DEBUG_LEVEL 3
@ -294,39 +316,53 @@
`endif
`ifndef LATENCY_FMA
`ifdef FPU_DPI
`define LATENCY_FMA 4
`endif
`ifdef FPU_FPNEW
`define LATENCY_FMA 4
`endif
`ifdef FPU_DSP
`ifdef QUARTUS
`define LATENCY_FMA 4
`define LATENCY_FSQRT 4
`else
`endif
`ifdef VIVADO
`define LATENCY_FMA 16
`else
`define LATENCY_FMA 4
`endif
`endif
`endif
`ifndef LATENCY_FDIV
`ifdef FPU_DPI
`define LATENCY_FDIV 15
`endif
`ifdef FPU_FPNEW
`define LATENCY_FDIV 16
`else
`ifdef VIVADO
`define LATENCY_FDIV 28
`else
`define LATENCY_FDIV 16
`endif
`ifdef FPU_DSP
`ifdef QUARTUS
`define LATENCY_FDIV 15
`endif
`ifdef VIVADO
`define LATENCY_FDIV 28
`endif
`endif
`endif
`ifndef LATENCY_FSQRT
`ifdef FPU_DPI
`define LATENCY_FSQRT 10
`endif
`ifdef FPU_FPNEW
`define LATENCY_FSQRT 16
`else
`ifdef VIVADO
`define LATENCY_FSQRT 28
`else
`define LATENCY_FSQRT 16
`endif
`ifdef FPU_DSP
`ifdef QUARTUS
`define LATENCY_FSQRT 10
`endif
`ifdef VIVADO
`define LATENCY_FSQRT 28
`endif
`endif
`endif

View file

@ -96,7 +96,7 @@
///////////////////////////////////////////////////////////////////////////////
`define INST_OP_BITS 4
`define INST_MOD_BITS 3
`define INST_MOD_BITS 4
///////////////////////////////////////////////////////////////////////////////
@ -140,20 +140,20 @@
`define INST_BR_LESS(op) op[2]
`define INST_BR_STATIC(op) op[3]
`define INST_M_MUL 3'b000
`define INST_M_MULHU 3'b001
`define INST_M_MULH 3'b010
`define INST_M_MULHSU 3'b011
`define INST_M_DIV 3'b100
`define INST_M_DIVU 3'b101
`define INST_M_REM 3'b110
`define INST_M_REMU 3'b111
`define INST_M_BITS 3
`define INST_M_IS_MULX(op) (~op[2])
`define INST_M_IS_MULH(op) (op[1:0] != 0)
`define INST_M_SIGNED_A(op) (op[1:0] != 1)
`define INST_M_IS_REM(op) op[1]
`define INST_M_SIGNED(op) (~op[0])
`define INST_M_MUL 3'b000
`define INST_M_MULHU 3'b001
`define INST_M_MULH 3'b010
`define INST_M_MULHSU 3'b011
`define INST_M_DIV 3'b100
`define INST_M_DIVU 3'b101
`define INST_M_REM 3'b110
`define INST_M_REMU 3'b111
`define INST_M_BITS 3
`define INST_M_IS_MULX(op) (~op[2])
`define INST_M_IS_MULH(op) (op[1:0] != 0)
`define INST_M_SIGNED_A(op) (op[1:0] != 1)
`define INST_M_IS_REM(op) op[1]
`define INST_M_SIGNED(op) (~op[0])
`define INST_FMT_B 3'b000
`define INST_FMT_H 3'b001
@ -177,7 +177,7 @@
`define INST_LSU_BITS 4
`define INST_LSU_FMT(op) op[2:0]
`define INST_LSU_WSIZE(op) op[1:0]
`define INST_LSU_IS_FENCE(mod) (mod == 1)
`define INST_LSU_IS_FENCE(mod) (mod[0])
`define INST_FENCE_BITS 1
`define INST_FENCE_D 1'h0
@ -189,22 +189,24 @@
`define INST_CSR_OTHER 2'h0
`define INST_CSR_BITS 2
`define INST_FPU_ADD 4'h0
`define INST_FPU_SUB 4'h4
`define INST_FPU_MUL 4'h8
`define INST_FPU_DIV 4'hC
`define INST_FPU_CVTWS 4'h1 // FCVT.W.S
`define INST_FPU_CVTWUS 4'h5 // FCVT.WU.S
`define INST_FPU_CVTSW 4'h9 // FCVT.S.W
`define INST_FPU_CVTSWU 4'hD // FCVT.S.WU
`define INST_FPU_SQRT 4'h2
`define INST_FPU_CLASS 4'h6
`define INST_FPU_CMP 4'hA
`define INST_FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
`define INST_FPU_MADD 4'h3
`define INST_FPU_MSUB 4'h7
`define INST_FPU_NMSUB 4'hB
`define INST_FPU_NMADD 4'hF
`define INST_FPU_ADD 4'b0000
`define INST_FPU_SUB 4'b0001
`define INST_FPU_MUL 4'b0010
`define INST_FPU_DIV 4'b0011
`define INST_FPU_SQRT 4'b0100
// UNUSED 4'b0101
// UNUSED 4'b0110
`define INST_FPU_NCP 4'b0111 // SGNJ, SGNJN, SGNJX, CLASS, MVXW, MVWX, FMIN, FMAX, LE, LT, EQ
`define INST_FPU_CVTWX 4'b1000 // FCVT.W.X
`define INST_FPU_CVTWUX 4'b1001 // FCVT.WU.X
`define INST_FPU_CVTXW 4'b1010 // FCVT.X.W
`define INST_FPU_CVTXWU 4'b1011 // FCVT.X.WU
`define INST_FPU_MADD 4'b1100
`define INST_FPU_MSUB 4'b1101
`define INST_FPU_NMSUB 4'b1110
`define INST_FPU_NMADD 4'b1111
`define INST_FPU_IS_W(mod) (mod[4])
`define INST_FPU_BITS 4
`define INST_GPU_TMC 4'h0
@ -423,7 +425,7 @@
`define ASSIGN_VX_FPU_REQ_IF(dst, src) \
assign dst.valid = src.valid; \
assign dst.op_type= src.op_type; \
assign dst.frm = src.frm; \
assign dst.op_mod = src.op_mod; \
assign dst.dataa = src.dataa; \
assign dst.datab = src.datab; \
assign dst.datac = src.datac; \

View file

@ -127,13 +127,11 @@
`INST_FPU_MADD: `TRACE(level, ("MADD")); \
`INST_FPU_NMSUB: `TRACE(level, ("NMSUB")); \
`INST_FPU_NMADD: `TRACE(level, ("NMADD")); \
`INST_FPU_CVTWS: `TRACE(level, ("CVTWS")); \
`INST_FPU_CVTWUS:`TRACE(level, ("CVTWUS")); \
`INST_FPU_CVTSW: `TRACE(level, ("CVTSW")); \
`INST_FPU_CVTSWU:`TRACE(level, ("CVTSWU")); \
`INST_FPU_CLASS: `TRACE(level, ("CLASS")); \
`INST_FPU_CMP: `TRACE(level, ("CMP")); \
`INST_FPU_MISC: begin \
`INST_FPU_CVTWX: `TRACE(level, ("CVT.W.X")); \
`INST_FPU_CVTWUX:`TRACE(level, ("CVT.WU.X")); \
`INST_FPU_CVTXW: `TRACE(level, ("CVT.X.W")); \
`INST_FPU_CVTXWU:`TRACE(level, ("CVT.X.WU")); \
`INST_FPU_NCP: begin \
case (op_mod) \
0: `TRACE(level, ("SGNJ")); \
1: `TRACE(level, ("SGNJN")); \
@ -142,6 +140,10 @@
4: `TRACE(level, ("MAX")); \
5: `TRACE(level, ("MVXW")); \
6: `TRACE(level, ("MVWX")); \
7: `TRACE(level, ("CLASS")); \
8: `TRACE(level, ("FLE")); \
9: `TRACE(level, ("FLT")); \
10: `TRACE(level, ("FEQ")); \
endcase \
end \
default: `TRACE(level, ("?")); \

View file

@ -394,7 +394,7 @@ module VX_core_top #(
`ifdef EXT_F_ENABLE
output wire fpu_req_valid,
output wire [`INST_FPU_BITS-1:0] fpu_req_op_type,
output wire [`INST_FRM_BITS-1:0] fpu_req_frm,
output wire [`INST_MOD_BITS-1:0] fpu_req_mod,
output wire [`NUM_THREADS-1:0][`XLEN-1:0] fpu_req_dataa,
output wire [`NUM_THREADS-1:0][`XLEN-1:0] fpu_req_datab,
output wire [`NUM_THREADS-1:0][`XLEN-1:0] fpu_req_datac,
@ -519,7 +519,7 @@ module VX_core_top #(
assign fpu_req_valid = fpu_req_if.valid;
assign fpu_req_op_type = fpu_req_if.op_type;
assign fpu_req_frm = fpu_req_if.frm;
assign fpu_req_mod = fpu_req_if.mod;
assign fpu_req_dataa = fpu_req_if.dataa;
assign fpu_req_datab = fpu_req_if.datab;
assign fpu_req_datac = fpu_req_if.datac;

View file

@ -45,6 +45,7 @@ module VX_decode #(
wire [6:0] opcode = instr[6:0];
wire [1:0] func2 = instr[26:25];
wire [2:0] func3 = instr[14:12];
wire [4:0] func5 = instr[31:27];
wire [6:0] func7 = instr[31:25];
wire [11:0] u_12 = instr[31:20];
@ -313,8 +314,8 @@ module VX_decode #(
`INST_FNMSUB,
`INST_FNMADD: begin
ex_type = `EX_FPU;
op_type = `INST_OP_BITS'(opcode[3:0]);
op_mod = func3;
op_type = `INST_OP_BITS'({2'b10, opcode[3:2]});
op_mod = `INST_MOD_BITS'(func3);
use_rd = 1;
`USED_FREG (rd);
`USED_FREG (rs1);
@ -323,71 +324,80 @@ module VX_decode #(
end
`INST_FCI: begin
ex_type = `EX_FPU;
op_mod = func3;
op_mod = `INST_MOD_BITS'(func3);
use_rd = 1;
case (func7)
7'h00, // FADD
7'h04, // FSUB
7'h08, // FMUL
7'h0C: begin // FDIV
op_type = `INST_OP_BITS'(func7[3:0]);
case (func5)
5'b00000, // FADD
5'b00001, // FSUB
5'b00010, // FMUL
5'b00011: begin // FDIV
op_type = `INST_OP_BITS'(func5[1:0]);
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h2C: begin
5'b00100: begin
// NCP: FSGNJ=0, FSGNJN=1, FSGNJX=2
op_type = `INST_OP_BITS'(`INST_FPU_NCP);
op_mod = `INST_MOD_BITS'(func3[1:0]);
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
5'b00101: begin
// NCP: FMIN=6, FMAX=7
op_type = `INST_OP_BITS'(`INST_FPU_NCP);
op_mod = func3[0] ? 7 : 6;
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
5'b01011: begin
op_type = `INST_OP_BITS'(`INST_FPU_SQRT);
`USED_FREG (rd);
`USED_FREG (rs1);
end
7'h50: begin
op_type = `INST_OP_BITS'(`INST_FPU_CMP);
end
5'b10100: begin
// NCP: FLE=8, FLT=9, FEQ=10
op_type = `INST_OP_BITS'(`INST_FPU_NCP);
op_mod = {2'b10, func3[1:0]};
`USED_IREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h60: begin
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTWUS) : `INST_OP_BITS'(`INST_FPU_CVTWS);
end
5'b11000: begin
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_CVTWUX) : `INST_OP_BITS'(`INST_FPU_CVTWX);
`ifdef XLEN_64
op_mod[3] = rs2[1];
`endif
`USED_IREG (rd);
`USED_FREG (rs1);
end
7'h68: begin
op_type = (instr[20]) ? `INST_OP_BITS'(`INST_FPU_CVTSWU) : `INST_OP_BITS'(`INST_FPU_CVTSW);
5'b11010: begin
op_type = (rs2[0]) ? `INST_OP_BITS'(`INST_FPU_CVTXWU) : `INST_OP_BITS'(`INST_FPU_CVTXW);
`ifdef XLEN_64
op_mod[3] = rs2[1];
`endif
`USED_FREG (rd);
`USED_IREG (rs1);
end
7'h10: begin
// FSGNJ=0, FSGNJN=1, FSGNJX=2
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = {1'b0, func3[1:0]};
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h14: begin
// FMIN=3, FMAX=4
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = func3[0] ? 4 : 3;
`USED_FREG (rd);
`USED_FREG (rs1);
`USED_FREG (rs2);
end
7'h70: begin
5'b11100: begin
if (func3[0]) begin
// FCLASS
op_type = `INST_OP_BITS'(`INST_FPU_CLASS);
// NCP: FCLASS=3
op_type = `INST_OP_BITS'(`INST_FPU_NCP);
op_mod = 3;
end else begin
// FMV.X.W=5
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = 5;
// NCP: FMV.X.W=4
op_type = `INST_OP_BITS'(`INST_FPU_NCP);
op_mod = 4;
end
`USED_IREG (rd);
`USED_FREG (rs1);
end
7'h78: begin
// FMV.W.X=6
op_type = `INST_OP_BITS'(`INST_FPU_MISC);
op_mod = 6;
5'b11110: begin
// NCP: FMV.W.X=5
op_type = `INST_OP_BITS'(`INST_FPU_NCP);
op_mod = 5;
`USED_FREG (rd);
`USED_IREG (rs1);
end

View file

@ -83,7 +83,7 @@ module VX_gpr_stage #(
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
VX_dp_ram #(
.DATAW (`XLEN),
.DATAW (`FLEN),
.SIZE (RAM_SIZE),
.INIT_ENABLE (1),
.INIT_VALUE (0)
@ -92,9 +92,9 @@ module VX_gpr_stage #(
.write (write[i]),
`UNUSED_PIN (wren),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.wdata (writeback_if.data[i][`FLEN-1:0]),
.raddr (raddr3),
.rdata (gpr_rsp_if.rs3_data[i])
.rdata (gpr_rsp_if.rs3_data[i][`FLEN-1:0])
);
end
`else

View file

@ -19,7 +19,7 @@ module VX_ibuffer #(
localparam ALM_FULL = SIZE - 1;
localparam ALM_EMPTY = 1;
localparam DATAW = `UP(`UUID_BITS) + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + `XLEN + 1 + 1;
localparam DATAW = `UP(`UUID_BITS) + `NUM_THREADS + `XLEN + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS + 1 + (`NR_BITS * 4) + `XLEN + 1 + 1;
localparam ADDRW = $clog2(SIZE);
localparam NWARPSW = $clog2(`NUM_WARPS+1);

View file

@ -60,9 +60,10 @@ module VX_fpu_agent #(
);
// resolve dynamic FRM from CSR
wire [`INST_FRM_BITS-1:0] req_frm;
wire [`INST_MOD_BITS-1:0] req_op_mod;
assign fpu_to_csr_if.read_wid = fpu_agent_if.wid;
assign req_frm = (fpu_agent_if.op_mod == `INST_FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_agent_if.op_mod;
assign req_op_mod = (fpu_agent_if.op_type != `INST_FPU_NCP
&& fpu_agent_if.op_mod[2:0] == `INST_FRM_DYN) ? {1'b0, fpu_to_csr_if.read_frm} : fpu_agent_if.op_mod;
// submit FPU request
@ -73,15 +74,15 @@ module VX_fpu_agent #(
assign fpu_agent_if.ready = ready_in && mdata_and_csr_ready;
VX_skid_buffer #(
.DATAW (`INST_FPU_BITS + `INST_FRM_BITS + `NUM_THREADS * 3 * `XLEN + `FPU_REQ_TAG_WIDTH),
.DATAW (`INST_FPU_BITS + `INST_MOD_BITS + `NUM_THREADS * 3 * `XLEN + `FPU_REQ_TAG_WIDTH),
.OUT_REG (1)
) req_sbuf (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.data_in ({fpu_agent_if.op_type, req_frm, fpu_agent_if.rs1_data, fpu_agent_if.rs2_data, fpu_agent_if.rs3_data, req_tag}),
.data_out ({fpu_req_if.op_type, fpu_req_if.frm, fpu_req_if.dataa, fpu_req_if.datab, fpu_req_if.datac, fpu_req_if.tag}),
.data_in ({fpu_agent_if.op_type, req_op_mod, fpu_agent_if.rs1_data, fpu_agent_if.rs2_data, fpu_agent_if.rs3_data, req_tag}),
.data_out ({fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.dataa, fpu_req_if.datab, fpu_req_if.datac, fpu_req_if.tag}),
.valid_out (fpu_req_if.valid),
.ready_out (fpu_req_if.ready)
);

View file

@ -29,7 +29,7 @@ module VX_fpu_arb #(
localparam LOG_NUM_REQS = `ARB_SEL_BITS(NUM_INPUTS, NUM_OUTPUTS);
localparam NUM_REQS = 1 << LOG_NUM_REQS;
localparam TAG_OUT_WIDTH = TAG_WIDTH + LOG_NUM_REQS;
localparam REQ_DATAW = TAG_OUT_WIDTH + `INST_FPU_BITS + `INST_FRM_BITS + NUM_LANES * 3 * `XLEN;
localparam REQ_DATAW = TAG_OUT_WIDTH + `INST_FPU_BITS + `INST_MOD_BITS + NUM_LANES * 3 * `XLEN;
localparam RSP_DATAW = TAG_WIDTH + NUM_LANES * (`XLEN + `FP_FLAGS_BITS) + 1;
///////////////////////////////////////////////////////////////////////
@ -59,9 +59,9 @@ module VX_fpu_arb #(
.sel_in (LOG_NUM_REQS'(r)),
.data_out (req_tag_in)
);
assign req_data_in[i] = {req_tag_in, req_in_if[i].op_type, req_in_if[i].frm, req_in_if[i].dataa, req_in_if[i].datab, req_in_if[i].datac};
assign req_data_in[i] = {req_tag_in, req_in_if[i].op_type, req_in_if[i].op_mod, req_in_if[i].dataa, req_in_if[i].datab, req_in_if[i].datac};
end else begin
assign req_data_in[i] = {req_in_if[i].tag, req_in_if[i].op_type, req_in_if[i].frm, req_in_if[i].dataa, req_in_if[i].datab, req_in_if[i].datac};
assign req_data_in[i] = {req_in_if[i].tag, req_in_if[i].op_type, req_in_if[i].op_mod, req_in_if[i].dataa, req_in_if[i].datab, req_in_if[i].datac};
end
end
@ -85,7 +85,7 @@ module VX_fpu_arb #(
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
assign req_out_if[i].valid = req_valid_out[i];
assign {req_out_if[i].tag, req_out_if[i].op_type, req_out_if[i].frm, req_out_if[i].dataa, req_out_if[i].datab, req_out_if[i].datac} = req_data_out[i];
assign {req_out_if[i].tag, req_out_if[i].op_type, req_out_if[i].op_mod, req_out_if[i].dataa, req_out_if[i].datab, req_out_if[i].datac} = req_data_out[i];
assign req_ready_out[i] = req_out_if[i].ready;
end

View file

@ -14,7 +14,7 @@ module VX_fpu_dpi #(
input wire [TAGW-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_MOD_BITS-1:0] frm,
input wire [`INST_MOD_BITS-1:0] op_mod,
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
@ -52,7 +52,8 @@ module VX_fpu_dpi #(
reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub;
reg is_itof, is_utof, is_ftoi, is_ftou;
reg is_fclss, is_flt, is_fle, is_feq, is_fmin, is_fmax, is_fsgnj, is_fsgnjn, is_fsgnjx;
wire [`INST_FRM_BITS-1:0] frm = `INST_FRM_BITS'(op_mod);
always @(*) begin
is_fadd = 0;
@ -66,15 +67,6 @@ module VX_fpu_dpi #(
is_utof = 0;
is_ftoi = 0;
is_ftou = 0;
is_fclss = 0;
is_flt = 0;
is_fle = 0;
is_feq = 0;
is_fmin = 0;
is_fmax = 0;
is_fsgnj = 0;
is_fsgnjn = 0;
is_fsgnjx = 0;
case (op_type)
`INST_FPU_ADD: begin core_select = FPU_FMA; is_fadd = 1; end
@ -86,23 +78,11 @@ module VX_fpu_dpi #(
`INST_FPU_NMSUB: begin core_select = FPU_FMA; is_fnmsub = 1; end
`INST_FPU_DIV: begin core_select = FPU_DIV; end
`INST_FPU_SQRT: begin core_select = FPU_SQRT; end
`INST_FPU_CVTWS: begin core_select = FPU_CVT; is_ftoi = 1; end
`INST_FPU_CVTWUS:begin core_select = FPU_CVT; is_ftou = 1; end
`INST_FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; end
`INST_FPU_CVTSWU:begin core_select = FPU_CVT; is_utof = 1; end
`INST_FPU_CLASS: begin core_select = FPU_NCP; is_fclss = 1; end
`INST_FPU_CMP: begin core_select = FPU_NCP;
is_fle = (frm == 0);
is_flt = (frm == 1);
is_feq = (frm == 2);
end
default: begin core_select = FPU_NCP;
is_fsgnj = (frm == 0);
is_fsgnjn = (frm == 1);
is_fsgnjx = (frm == 2);
is_fmin = (frm == 3);
is_fmax = (frm == 4);
end
`INST_FPU_CVTWX: begin core_select = FPU_CVT; is_ftoi = 1; end
`INST_FPU_CVTWUX:begin core_select = FPU_CVT; is_ftou = 1; end
`INST_FPU_CVTXW: begin core_select = FPU_CVT; is_itof = 1; end
`INST_FPU_CVTXWU:begin core_select = FPU_CVT; is_utof = 1; end
default: begin core_select = FPU_NCP; end
endcase
end
@ -312,7 +292,7 @@ module VX_fpu_dpi #(
generate
begin : fncp
wire [NUM_LANES-1:0][`XLEN-1:0] result_fncp;
reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fclss;
wire [NUM_LANES-1:0][`XLEN-1:0] result_flt;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fle;
@ -351,25 +331,25 @@ module VX_fpu_dpi #(
end
end
assign result_fncp = is_fclss ? result_fclss :
is_flt ? result_flt :
is_fle ? result_fle :
is_feq ? result_feq :
is_fmin ? result_fmin :
is_fmax ? result_fmax :
is_fsgnj ? result_fsgnj :
is_fsgnjn ? result_fsgnjn :
is_fsgnjx ? result_fsgnjx :
result_fmv;
always (*) begin
result_fncp = 'x;
fflags_fncp = 'x;
case (op_mod)
0: begin result_fncp = result_fsgnj; end
1: begin result_fncp = result_fsgnjn; end
2: begin result_fncp = result_fsgnjx; end
3: begin result_fncp = result_fclss; end
4: begin result_fncp = result_fmv; end
5: begin result_fncp = result_fmv; end
6: begin result_fncp = result_fmin; fflags_fncp = fflags_fmin; end
7: begin result_fncp = result_fmax; fflags_fncp = fflags_fmax; end
8: begin result_fncp = result_fle; fflags_fncp = fflags_fle; end
9: begin result_fncp = result_flt; fflags_fncp = fflags_flt; end
10: begin result_fncp = result_feq; fflags_fncp = fflags_feq; end
endcase
end
wire has_fflags_fncp = (is_flt || is_fle || is_feq || is_fmin || is_fmax);
assign fflags_fncp = is_flt ? fflags_flt :
is_fle ? fflags_fle :
is_feq ? fflags_feq :
is_fmin ? fflags_fmin :
is_fmax ? fflags_fmax :
0;
wire has_fflags_fncp = (op_mod >= 6);
VX_shift_register #(
.DATAW (1 + TAGW + 1 + NUM_LANES * (`XLEN + $bits(fflags_t))),

View file

@ -1,6 +1,6 @@
`include "VX_fpu_define.vh"
module VX_fpu_fpga #(
module VX_fpu_dsp #(
parameter NUM_LANES = 4,
parameter TAGW = 4
) (
@ -13,7 +13,7 @@ module VX_fpu_fpga #(
input wire [TAGW-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_MOD_BITS-1:0] frm,
input wire [`INST_MOD_BITS-1:0] op_mod,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
@ -50,6 +50,8 @@ module VX_fpu_fpga #(
reg [FPC_BITS-1:0] core_select;
reg do_madd, do_sub, do_neg, is_itof, is_signed;
wire [`INST_FRM_BITS-1:0] frm = `INST_FRM_BITS'(op_mod);
always @(*) begin
do_madd = 0;
do_sub = 0;
@ -66,10 +68,10 @@ module VX_fpu_fpga #(
`INST_FPU_NMSUB: begin core_select = FPU_FMA; do_madd = 1; do_sub = 1; do_neg = 1; end
`INST_FPU_DIV: begin core_select = FPU_DIV; end
`INST_FPU_SQRT: begin core_select = FPU_SQRT; end
`INST_FPU_CVTWS: begin core_select = FPU_CVT; is_signed = 1; end
`INST_FPU_CVTWUS: begin core_select = FPU_CVT; end
`INST_FPU_CVTSW: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
`INST_FPU_CVTSWU: begin core_select = FPU_CVT; is_itof = 1; end
`INST_FPU_CVTWX: begin core_select = FPU_CVT; is_signed = 1; end
`INST_FPU_CVTWUX: begin core_select = FPU_CVT; end
`INST_FPU_CVTXW: begin core_select = FPU_CVT; is_itof = 1; is_signed = 1; end
`INST_FPU_CVTXWU: begin core_select = FPU_CVT; is_itof = 1; end
default: begin core_select = FPU_NCP; end
endcase
end
@ -172,9 +174,8 @@ module VX_fpu_fpga #(
.reset (ncp_reset),
.valid_in (valid_in && (core_select == FPU_NCP)),
.ready_in (per_core_ready_in[FPU_NCP]),
.tag_in (tag_in),
.op_type (op_type),
.frm (frm),
.tag_in (tag_in),
.op_mod (op_mod),
.dataa (dataa),
.datab (datab),
.result (per_core_result[FPU_NCP]),

View file

@ -7,11 +7,7 @@
module VX_fpu_fpnew #(
parameter NUM_LANES = 1,
parameter TAGW = 1,
parameter FMULADD = 1,
parameter FDIVSQRT = 1,
parameter FNONCOMP = 1,
parameter FCONV = 1
parameter TAGW = 1
) (
input wire clk,
input wire reset,
@ -22,12 +18,12 @@ module VX_fpu_fpnew #(
input wire [TAGW-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_MOD_BITS-1:0] frm,
input wire [`INST_MOD_BITS-1:0] op_mod,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
input wire [NUM_LANES-1:0][31:0] datac,
output wire [NUM_LANES-1:0][31:0] result,
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire has_fflags,
output wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags,
@ -37,36 +33,33 @@ module VX_fpu_fpnew #(
input wire ready_out,
output wire valid_out
);
localparam UNIT_FMULADD = FMULADD ? fpnew_pkg::PARALLEL : fpnew_pkg::DISABLED;
localparam UNIT_FDIVSQRT = FDIVSQRT ? fpnew_pkg::MERGED : fpnew_pkg::DISABLED;
localparam UNIT_FNONCOMP = FNONCOMP ? fpnew_pkg::PARALLEL : fpnew_pkg::DISABLED;
localparam UNIT_FCONV = FCONV ? fpnew_pkg::MERGED : fpnew_pkg::DISABLED;
localparam FOP_BITS = fpnew_pkg::OP_BITS;
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
localparam FPU_DPATHW = 32'd32;
localparam FMTF_BITS = fpnew_pkg::FP_FORMAT_BITS;
localparam FMTI_BITS = fpnew_pkg::INT_FORMAT_BITS;
localparam LATENCY_FDIVSQRT = `MAX(`LATENCY_FDIV, `LATENCY_FSQRT);
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: FPU_DPATHW,
Width: `XLEN,
EnableVectors: 1'b0,
EnableNanBox: 1'b1,
FpFmtMask: 5'b10000,
`ifdef XLEN_64
IntFmtMask: 4'b0011
`else
IntFmtMask: 4'b0010
`endif
};
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs:'{'{`LATENCY_FMA, 0, 0, 0, 0}, // ADDMUL
PipeRegs:'{'{`LATENCY_FMA, 0, 0, 0, 0}, // ADDMUL
'{default: unsigned'(LATENCY_FDIVSQRT)}, // DIVSQRT
'{default: `LATENCY_FNCP}, // NONCOMP
'{default: `LATENCY_FCVT}}, // CONV
UnitTypes:'{'{default: UNIT_FMULADD}, // ADDMUL
'{default: UNIT_FDIVSQRT}, // DIVSQRT
'{default: UNIT_FNONCOMP}, // NONCOMP
'{default: UNIT_FCONV}}, // CONV
'{default: `LATENCY_FNCP}, // NONCOMP
'{default: `LATENCY_FCVT}}, // CONV
UnitTypes:'{'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
@ -75,29 +68,31 @@ module VX_fpu_fpnew #(
reg [TAGW-1:0] fpu_tag_in, fpu_tag_out;
reg [2:0][NUM_LANES-1:0][31:0] fpu_operands;
wire [FMTF_BITS-1:0] fpu_src_fmt = fpnew_pkg::FP32;
wire [FMTF_BITS-1:0] fpu_dst_fmt = fpnew_pkg::FP32;
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
reg [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands;
wire [NUM_LANES-1:0][31:0] fpu_result;
wire [NUM_LANES-1:0][`XLEN-1:0] fpu_result;
fpnew_pkg::status_t [NUM_LANES-1:0] fpu_status;
reg [FOP_BITS-1:0] fpu_op;
reg [`INST_FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
reg fpu_has_fflags, fpu_has_fflags_out;
reg [FMTF_BITS-1:0] fpu_src_fmt, fpu_dst_fmt, fpu_int_fmt;
wire is_fp_w = op_mod[3];
always @(*) begin
fpu_op = fpnew_pkg::SGNJ;
fpu_rnd = frm;
fpu_rnd = `INST_FRM_BITS'(op_mod);
fpu_op_mod = 0;
fpu_has_fflags = 1;
fpu_operands[0] = dataa;
fpu_operands[1] = datab;
fpu_operands[2] = datac;
fpu_operands[2] = datac;
fpu_src_fmt = fpnew_pkg::FP32;
fpu_dst_fmt = fpnew_pkg::FP32;
fpu_int_fmt = is_fp_w ? fpnew_pkg::INT64 : fpnew_pkg::INT32;
case (op_type)
`INST_FPU_ADD: begin
fpu_op = fpnew_pkg::ADD;
@ -117,20 +112,21 @@ module VX_fpu_fpnew #(
`INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`INST_FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
`INST_FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`INST_FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
`INST_FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`INST_FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end
`INST_FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
`INST_FPU_MISC: begin
case (frm)
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RNE; fpu_has_fflags = 0; end
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RTZ; fpu_has_fflags = 0; end
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RDN; fpu_has_fflags = 0; end
3: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RNE; end
4: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RTZ; end
default: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RUP; fpu_has_fflags = 0; end
`INST_FPU_CVTWX: begin fpu_op = fpnew_pkg::F2I; end
`INST_FPU_CVTWUX:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`INST_FPU_CVTXW: begin fpu_op = fpnew_pkg::I2F; end
`INST_FPU_CVTXWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`INST_FPU_NCP: begin
case (op_mod)
0: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RNE; fpu_has_fflags = 0; end // FSGNJ
1: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RTZ; fpu_has_fflags = 0; end // FSGNJN
2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RDN; fpu_has_fflags = 0; end // FSGNJX
3: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end // CLASS
4: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RUP; fpu_op_mod = 1; fpu_has_fflags = 0; end // FMV.X.W
5: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `INST_FRM_RUP; fpu_has_fflags = 0; end // FMV.W.X
6: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RNE; end // MIN
7: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `INST_FRM_RTZ; end // MAX
default: begin fpu_op = fpnew_pkg::CMP; fpu_has_fflags = 0; end // CMP (8,9,10)
endcase
end
default:;
@ -145,7 +141,7 @@ module VX_fpu_fpnew #(
.TagType (logic[TAGW+1+1-1:0])
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.rst_ni (~reset),
.operands_i ({fpu_operands[2][0], fpu_operands[1][0], fpu_operands[0][0]}),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
@ -172,7 +168,7 @@ module VX_fpu_fpnew #(
.TagType (logic)
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.rst_ni (~reset),
.operands_i ({fpu_operands[2][i], fpu_operands[1][i], fpu_operands[0][i]}),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),

View file

@ -15,8 +15,7 @@ module VX_fpu_ncomp #(
input wire [TAGW-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [`INST_MOD_BITS-1:0] op_mod,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
@ -86,8 +85,7 @@ module VX_fpu_ncomp #(
wire valid_in_s0;
wire [TAGW-1:0] tag_in_s0;
wire [`INST_FPU_BITS-1:0] op_type_s0;
wire [`INST_FRM_BITS-1:0] frm_s0;
wire [`INST_MOD_BITS-1:0] op_mod_s0;
wire [NUM_LANES-1:0][31:0] dataa_s0, datab_s0;
wire [NUM_LANES-1:0] a_sign_s0, b_sign_s0;
wire [NUM_LANES-1:0][7:0] a_exponent_s0;
@ -98,15 +96,15 @@ module VX_fpu_ncomp #(
wire stall;
VX_pipe_register #(
.DATAW (1 + TAGW + `INST_FPU_BITS + `INST_FRM_BITS + NUM_LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fclass_t) + 1 + 1)),
.DATAW (1 + TAGW + `INST_MOD_BITS + NUM_LANES * (2 * 32 + 1 + 1 + 8 + 23 + 2 * $bits(fclass_t) + 1 + 1)),
.RESETW (1),
.DEPTH (0)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (!stall),
.data_in ({valid_in, tag_in, op_type, frm, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_fclass, b_fclass, a_smaller, ab_equal}),
.data_out ({valid_in_s0, tag_in_s0, op_type_s0, frm_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_fclass_s0, b_fclass_s0, a_smaller_s0, ab_equal_s0})
.data_in ({valid_in, tag_in, op_mod, dataa, datab, a_sign, b_sign, a_exponent, a_mantissa, a_fclass, b_fclass, a_smaller, ab_equal}),
.data_out ({valid_in_s0, tag_in_s0, op_mod_s0, dataa_s0, datab_s0, a_sign_s0, b_sign_s0, a_exponent_s0, a_mantissa_s0, a_fclass_s0, b_fclass_s0, a_smaller_s0, ab_equal_s0})
);
// FCLASS
@ -135,7 +133,7 @@ module VX_fpu_ncomp #(
end
// Min/Max
reg [NUM_LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
reg [NUM_LANES-1:0][31:0] fminmax_res;
for (genvar i = 0; i < NUM_LANES; ++i) begin
always @(*) begin
if (a_fclass_s0[i].is_nan && b_fclass_s0[i].is_nan)
@ -145,11 +143,8 @@ module VX_fpu_ncomp #(
else if (b_fclass_s0[i].is_nan)
fminmax_res[i] = dataa_s0[i];
else begin
case (frm_s0) // use LSB to distinguish MIN and MAX
3: fminmax_res[i] = a_smaller_s0[i] ? dataa_s0[i] : datab_s0[i];
4: fminmax_res[i] = a_smaller_s0[i] ? datab_s0[i] : dataa_s0[i];
default: fminmax_res[i] = 'x; // don't care value
endcase
fminmax_res[i] = op_mod_s0[0] ? (a_smaller_s0[i] ? dataa_s0[i] : datab_s0[i]): // FMIN
(a_smaller_s0[i] ? datab_s0[i] : dataa_s0[i]); // FMAX
end
end
end
@ -158,11 +153,10 @@ module VX_fpu_ncomp #(
reg [NUM_LANES-1:0][31:0] fsgnj_res; // result of sign injection
for (genvar i = 0; i < NUM_LANES; ++i) begin
always @(*) begin
case (frm_s0)
case (op_mod_s0[1:0])
0: fsgnj_res[i] = { b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
1: fsgnj_res[i] = {~b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
2: fsgnj_res[i] = { a_sign_s0[i] ^ b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
default: fsgnj_res[i] = 'x; // don't care value
default: fsgnj_res[i] = { a_sign_s0[i] ^ b_sign_s0[i], a_exponent_s0[i], a_mantissa_s0[i]};
endcase
end
end
@ -172,7 +166,7 @@ module VX_fpu_ncomp #(
reg [NUM_LANES-1:0] fcmp_fflags_NV; // comparison fflags
for (genvar i = 0; i < NUM_LANES; ++i) begin
always @(*) begin
case (frm_s0)
case (op_mod_s0[1:0])
`INST_FRM_RNE: begin // LE
if (a_fclass_s0[i].is_nan || b_fclass_s0[i].is_nan) begin
fcmp_res[i] = 32'h0;
@ -214,42 +208,38 @@ module VX_fpu_ncomp #(
reg [NUM_LANES-1:0] tmp_fflags_NV;
for (genvar i = 0; i < NUM_LANES; ++i) begin
always @(*) begin
case (op_type_s0)
`INST_FPU_CLASS: begin
always @(*) begin
case (op_mod_s0)
0,1,2: begin
// SGNJ
tmp_result[i] = fsgnj_res[i];
tmp_fflags_NV[i] = 'x;
end
3: begin
// CLASS
tmp_result[i] = fclass_mask[i];
tmp_fflags_NV[i] = 'x;
end
`INST_FPU_CMP: begin
end
4,5: begin
// FMV
tmp_result[i] = dataa_s0[i];
tmp_fflags_NV[i] = 'x;
end
6,7: begin
// MIN/MAX
tmp_result[i] = fminmax_res[i];
tmp_fflags_NV[i] = a_fclass_s0[i].is_signaling | b_fclass_s0[i].is_signaling;
end
default: begin
// CMP (8, 9, 10)
tmp_result[i] = fcmp_res[i];
tmp_fflags_NV[i] = fcmp_fflags_NV[i];
end
//`FPU_MISC:
default: begin
case (frm_s0)
0,1,2: begin
tmp_result[i] = fsgnj_res[i];
tmp_fflags_NV[i] = 'x;
end
3,4: begin
tmp_result[i] = fminmax_res[i];
tmp_fflags_NV[i] = a_fclass_s0[i].is_signaling | b_fclass_s0[i].is_signaling;
end
//5,6,7: MOVE
default: begin
tmp_result[i] = dataa_s0[i];
tmp_fflags_NV[i] = 'x;
end
endcase
end
end
endcase
end
end
wire has_fflags_s0 = ((op_type_s0 == `INST_FPU_MISC)
&& (frm_s0 == 3 // MIN
|| frm_s0 == 4)) // MAX
|| (op_type_s0 == `INST_FPU_CMP); // CMP
wire has_fflags_s0 = (op_mod_s0 >= 6);
assign stall = ~ready_out && valid_out;

View file

@ -8,7 +8,7 @@ interface VX_fpu_req_if #(
wire valid;
wire [`INST_FPU_BITS-1:0] op_type;
wire [`INST_FRM_BITS-1:0] frm;
wire [`INST_MOD_BITS-1:0] op_mod;
wire [NUM_LANES-1:0][`XLEN-1:0] dataa;
wire [NUM_LANES-1:0][`XLEN-1:0] datab;
wire [NUM_LANES-1:0][`XLEN-1:0] datac;
@ -18,7 +18,7 @@ interface VX_fpu_req_if #(
modport master (
output valid,
output op_type,
output frm,
output op_mod,
output dataa,
output datab,
output datac,
@ -29,7 +29,7 @@ interface VX_fpu_req_if #(
modport slave (
input valid,
input op_type,
input frm,
input op_mod,
input dataa,
input datab,
input datac,

View file

@ -29,7 +29,7 @@ module VX_fpu_unit #(
.valid_in (fpu_req_if.valid),
.op_type (fpu_req_if.op_type),
.frm (fpu_req_if.frm),
.op_mod (fpu_req_if.op_mod),
.dataa (fpu_req_if.dataa),
.datab (fpu_req_if.datab),
.datac (fpu_req_if.datac),
@ -42,16 +42,12 @@ module VX_fpu_unit #(
.fflags (fpu_rsp_if.fflags),
.tag_out (fpu_rsp_if.tag),
.ready_out (fpu_rsp_if.ready)
);
);
`elsif FPU_FPNEW
VX_fpu_fpnew #(
.NUM_LANES (NUM_LANES),
.FMULADD (1),
.FDIVSQRT (1),
.FNONCOMP (1),
.FCONV (1),
.TAGW (TAG_WIDTH)
) fpu_fpnew (
.clk (clk),
@ -59,7 +55,7 @@ module VX_fpu_unit #(
.valid_in (fpu_req_if.valid),
.op_type (fpu_req_if.op_type),
.frm (fpu_req_if.frm),
.op_mod (fpu_req_if.op_mod),
.dataa (fpu_req_if.dataa),
.datab (fpu_req_if.datab),
.datac (fpu_req_if.datac),
@ -72,20 +68,20 @@ module VX_fpu_unit #(
.fflags (fpu_rsp_if.fflags),
.tag_out (fpu_rsp_if.tag),
.ready_out (fpu_rsp_if.ready)
);
);
`else
`elsif FPU_DSP
VX_fpu_fpga #(
VX_fpu_dsp #(
.NUM_LANES (NUM_LANES),
.TAGW (TAG_WIDTH)
) fpu_fpga (
) fpu_dsp (
.clk (clk),
.reset (reset),
.valid_in (fpu_req_if.valid),
.op_type (fpu_req_if.op_type),
.frm (fpu_req_if.frm),
.op_mod (fpu_req_if.op_mod),
.dataa (fpu_req_if.dataa),
.datab (fpu_req_if.datab),
.datac (fpu_req_if.datac),

View file

@ -1,4 +1,4 @@
PROJECT = VX_fpu_fpga
PROJECT = VX_fpu_dsp
TOP_LEVEL_ENTITY = $(PROJECT)
SRC_FILE = $(PROJECT).sv

View file

@ -19,7 +19,7 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
DBG_FLAGS += $(DBG_TRACE_FLAGS) -DVCD_OUTPUT
FPU_INCLUDE = -I$(RTL_DIR)/fpu -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex
@ -49,8 +49,8 @@ VL_FLAGS += -j $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG

View file

@ -19,7 +19,7 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
DBG_FLAGS += $(DBG_TRACE_FLAGS) -DVCD_OUTPUT
FPU_INCLUDE = -I$(RTL_DIR)/fpu -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex
@ -49,8 +49,8 @@ VL_FLAGS += -j $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG

View file

@ -19,7 +19,7 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
DBG_FLAGS += $(DBG_TRACE_FLAGS) -DVCD_OUTPUT
FPU_INCLUDE = -I$(RTL_DIR)/fpu -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex
@ -49,8 +49,8 @@ VL_FLAGS += -j $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG

View file

@ -19,7 +19,7 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
DBG_FLAGS += $(DBG_TRACE_FLAGS) -DVCD_OUTPUT
FPU_INCLUDE = -I$(RTL_DIR)/fpu -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex
@ -49,8 +49,8 @@ VL_FLAGS += -j $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG

View file

@ -19,7 +19,7 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
DBG_FLAGS += $(DBG_TRACE_FLAGS) -DVCD_OUTPUT
FPU_INCLUDE = -I$(RTL_DIR)/fpu -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex
@ -49,8 +49,8 @@ VL_FLAGS += -j $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG

View file

@ -19,7 +19,7 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
DBG_FLAGS += $(DBG_TRACE_FLAGS) -DVCD_OUTPUT
FPU_INCLUDE = -I$(RTL_DIR)/fpu -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex
@ -49,8 +49,8 @@ VL_FLAGS += -j $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG

View file

@ -19,7 +19,7 @@ DBG_TRACE_FLAGS += -DDBG_TRACE_TEX
DBG_TRACE_FLAGS += -DDBG_TRACE_RASTER
DBG_TRACE_FLAGS += -DDBG_TRACE_ROP
DBG_FLAGS += $(DBG_TRACE_FLAGS)
DBG_FLAGS += $(DBG_TRACE_FLAGS) -DVCD_OUTPUT
FPU_INCLUDE = -I$(RTL_DIR)/fpu -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
TEX_INCLUDE = -I$(RTL_DIR)/tex
@ -49,8 +49,8 @@ VL_FLAGS += -j $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs -DVCD_OUTPUT $(DBG_FLAGS)
CXXFLAGS += -g -O0 -DVCD_OUTPUT $(DBG_FLAGS)
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG

View file

@ -507,13 +507,11 @@ std::shared_ptr<Instr> Decoder::decode(uint32_t code) const {
case 0x61: // FCVT.WU.S, FCVT.W.S, FCVT.L.S, FCVT.LU.S
instr->setDestReg(rd, RegType::Integer);
instr->setSrcReg(rs1, RegType::Float);
instr->setSrcReg(rs2, RegType::Integer);
break;
case 0x68: // FCVT.S.W, FCVT.S.WU, FCVT.S.L, FCVT.S.LU
case 0x69: // FCVT.D.W, FCVT.D.WU, FCVT.D.L, FCVT.D.LU
instr->setDestReg(rd, RegType::Float);
instr->setSrcReg(rs1, RegType::Integer);
instr->setSrcReg(rs2, RegType::Integer);
break;
case 0x70: // FCLASS.S, FMV.X.W
case 0x71: // FCLASS.D, FMV.X.D

View file

@ -175,9 +175,9 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
bool rd_write = false;
switch (opcode) {
// RV32I: LUI
switch (opcode) {
case LUI_INST: {
// RV32I: LUI
trace->exe_type = ExeType::ALU;
trace->alu_type = AluType::ARITH;
for (uint32_t t = 0; t < num_threads; ++t) {
@ -187,9 +187,9 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
rd_write = true;
break;
}
// RV32I: AUIPC
}
case AUIPC_INST: {
// RV32I: AUIPC
trace->exe_type = ExeType::ALU;
trace->alu_type = AluType::ARITH;
for (uint32_t t = 0; t < num_threads; ++t) {
@ -429,7 +429,7 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
for (uint32_t t = 0; t < num_threads; ++t) {
if (!tmask_.test(t))
continue;
if (func7 & 0x1){
if (func7 & 0x1) {
switch (func3) {
case 0: {
// RV64M: MULW
@ -649,9 +649,9 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
trace->fetch_stall = true;
break;
}
// RV32I: JAL
}
case JAL_INST: {
// RV32I: JAL
trace->exe_type = ExeType::ALU;
trace->alu_type = AluType::BRANCH;
for (uint32_t t = 0; t < num_threads; ++t) {
@ -664,9 +664,9 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
rd_write = true;
break;
}
// RV32I: JALR
}
case JALR_INST: {
// RV32I: JALR
trace->exe_type = ExeType::ALU;
trace->alu_type = AluType::BRANCH;
trace->used_iregs.set(rsrc0);
@ -875,14 +875,14 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
trace->alu_type = AluType::SYSCALL;
trace->fetch_stall = true;
switch (csr_addr) {
case 0: { // RV32I: ECALL
case 0:
// RV32I: ECALL
core_->trigger_ecall();
break;
}
case 1: { // RV32I: EBREAK
case 1:
// RV32I: EBREAK
core_->trigger_ebreak();
break;
}
case 0x002: // URET
case 0x102: // SRET
case 0x302: // MRET
@ -953,14 +953,14 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
}
break;
}
// RV32I: FENCE
}
case FENCE: {
// RV32I: FENCE
trace->exe_type = ExeType::LSU;
trace->lsu_type = LsuType::FENCE;
break;
}
case FCI: {
case FCI: {
trace->exe_type = ExeType::FPU;
for (uint32_t t = 0; t < num_threads; ++t) {
if (!tmask_.test(t))
@ -1032,20 +1032,6 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
trace->used_fregs.set(rsrc1);
break;
}
case 0x2c: { // RV32F: FSQRT.S
if (checkBoxedArgs(&rddata[t].u64, rsdata[t][0].u64, &fflags)) {
rddata[t].u64 = nan_box(rv_fsqrt_s(rsdata[t][0].u64, frm, &fflags));
}
trace->fpu_type = FpuType::FSQRT;
trace->used_fregs.set(rsrc0);
break;
}
case 0x2d: { // RV32D: FSQRT.D
rddata[t].u64 = rv_fsqrt_d(rsdata[t][0].u64, frm, &fflags);
trace->fpu_type = FpuType::FSQRT;
trace->used_fregs.set(rsrc0);
break;
}
case 0x10: {
if (checkBoxedArgs(&rddata[t].u64, rsdata[t][0].u64, rsdata[t][1].u64, &fflags)) {
switch (func3) {
@ -1126,77 +1112,20 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
trace->used_fregs.set(rsrc1);
break;
}
case 0x60: {
switch (rsrc1) {
case 0:
// RV32F: FCVT.W.S
rddata[t].i = sext((uint64_t)rv_ftoi_s(rsdata[t][0].u64, frm, &fflags), 32);
break;
case 1:
// RV32F: FCVT.WU.S
rddata[t].i = sext((uint64_t)rv_ftou_s(rsdata[t][0].u64, frm, &fflags), 32);
break;
case 2:
// RV64F: FCVT.L.S
rddata[t].i = rv_ftol_s(rsdata[t][0].u64, frm, &fflags);
break;
case 3:
// RV64F: FCVT.LU.S
rddata[t].i = rv_ftolu_s(rsdata[t][0].u64, frm, &fflags);
break;
case 0x2c: { // RV32F: FSQRT.S
if (checkBoxedArgs(&rddata[t].u64, rsdata[t][0].u64, &fflags)) {
rddata[t].u64 = nan_box(rv_fsqrt_s(rsdata[t][0].u64, frm, &fflags));
}
trace->fpu_type = FpuType::FCVT;
trace->used_fregs.set(rsrc0);
break;
}
case 0x61: {
switch (rsrc1) {
case 0:
// RV32D: FCVT.W.D
rddata[t].i = sext((uint64_t)rv_ftoi_d(rsdata[t][0].u64, frm, &fflags), 32);
break;
case 1:
// RV32D: FCVT.WU.D
rddata[t].i = sext((uint64_t)rv_ftou_d(rsdata[t][0].u64, frm, &fflags), 32);
break;
case 2:
// RV64D: FCVT.L.D
rddata[t].i = rv_ftol_d(rsdata[t][0].u64, frm, &fflags);
break;
case 3:
// RV64D: FCVT.LU.D
rddata[t].i = rv_ftolu_d(rsdata[t][0].u64, frm, &fflags);
break;
}
trace->fpu_type = FpuType::FCVT;
trace->fpu_type = FpuType::FSQRT;
trace->used_fregs.set(rsrc0);
break;
}
case 0x70: {
if (func3) {
// RV32F: FCLASS.S
rddata[t].i = rv_fclss_s(rsdata[t][0].u64);
} else {
// RV32F: FMV.X.W
uint32_t result = (uint32_t)rsdata[t][0].u64;
rddata[t].i = sext((uint64_t)result, 32);
}
trace->fpu_type = FpuType::FNCP;
case 0x2d: { // RV32D: FSQRT.D
rddata[t].u64 = rv_fsqrt_d(rsdata[t][0].u64, frm, &fflags);
trace->fpu_type = FpuType::FSQRT;
trace->used_fregs.set(rsrc0);
break;
}
case 0x71: {
if (func3) {
// RV32D: FCLASS.D
rddata[t].i = rv_fclss_d(rsdata[t][0].u64);
} else {
// RV64D: FMV.X.D
rddata[t].i = rsdata[t][0].u64;
}
trace->fpu_type = FpuType::FNCP;
trace->used_fregs.set(rsrc0);
break;
}
break;
}
case 0x50: {
if (checkBoxedCmpArgs(&rddata[t].u, rsdata[t][0].u64, rsdata[t][1].u64, &fflags)) {
switch (func3) {
@ -1238,25 +1167,71 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
trace->used_fregs.set(rsrc0);
trace->used_fregs.set(rsrc1);
break;
}
}
case 0x60: {
switch (rsrc1) {
case 0:
// RV32F: FCVT.W.S
rddata[t].i = sext((uint64_t)rv_ftoi_s(rsdata[t][0].u64, frm, &fflags), 32);
break;
case 1:
// RV32F: FCVT.WU.S
rddata[t].i = sext((uint64_t)rv_ftou_s(rsdata[t][0].u64, frm, &fflags), 32);
break;
case 2:
// RV64F: FCVT.L.S
rddata[t].i = rv_ftol_s(rsdata[t][0].u64, frm, &fflags);
break;
case 3:
// RV64F: FCVT.LU.S
rddata[t].i = rv_ftolu_s(rsdata[t][0].u64, frm, &fflags);
break;
}
trace->fpu_type = FpuType::FCVT;
trace->used_fregs.set(rsrc0);
break;
}
case 0x61: {
switch (rsrc1) {
case 0:
// RV32D: FCVT.W.D
rddata[t].i = sext((uint64_t)rv_ftoi_d(rsdata[t][0].u64, frm, &fflags), 32);
break;
case 1:
// RV32D: FCVT.WU.D
rddata[t].i = sext((uint64_t)rv_ftou_d(rsdata[t][0].u64, frm, &fflags), 32);
break;
case 2:
// RV64D: FCVT.L.D
rddata[t].i = rv_ftol_d(rsdata[t][0].u64, frm, &fflags);
break;
case 3:
// RV64D: FCVT.LU.D
rddata[t].i = rv_ftolu_d(rsdata[t][0].u64, frm, &fflags);
break;
}
trace->fpu_type = FpuType::FCVT;
trace->used_fregs.set(rsrc0);
break;
}
case 0x68: {
switch (rsrc1) {
case 0:
// RV32F: FCVT.S.W
rddata[t].u64 = nan_box(rv_itof_s(rsdata[t][0].i, frm, &fflags));
break;
case 1:
// RV32F: FCVT.S.WU
rddata[t].u64 = nan_box(rv_utof_s(rsdata[t][0].i, frm, &fflags));
break;
case 2:
// RV64F: FCVT.S.L
rddata[t].u64 = nan_box(rv_ltof_s(rsdata[t][0].i, frm, &fflags));
break;
case 3:
// RV64F: FCVT.S.LU
rddata[t].u64 = nan_box(rv_lutof_s(rsdata[t][0].i, frm, &fflags));
break;
case 0:
// RV32F: FCVT.S.W
rddata[t].u64 = nan_box(rv_itof_s(rsdata[t][0].i, frm, &fflags));
break;
case 1:
// RV32F: FCVT.S.WU
rddata[t].u64 = nan_box(rv_utof_s(rsdata[t][0].i, frm, &fflags));
break;
case 2:
// RV64F: FCVT.S.L
rddata[t].u64 = nan_box(rv_ltof_s(rsdata[t][0].i, frm, &fflags));
break;
case 3:
// RV64F: FCVT.S.LU
rddata[t].u64 = nan_box(rv_lutof_s(rsdata[t][0].i, frm, &fflags));
break;
}
trace->fpu_type = FpuType::FCVT;
trace->used_iregs.set(rsrc0);
@ -1264,27 +1239,52 @@ void Warp::execute(const Instr &instr, pipeline_trace_t *trace) {
}
case 0x69: {
switch (rsrc1) {
case 0:
// RV32D: FCVT.D.W
rddata[t].u64 = rv_itof_d(rsdata[t][0].i, frm, &fflags);
break;
case 1:
// RV32D: FCVT.D.WU
rddata[t].u64 = rv_utof_d(rsdata[t][0].i, frm, &fflags);
break;
case 2:
// RV64D: FCVT.D.L
rddata[t].u64 = rv_ltof_d(rsdata[t][0].i, frm, &fflags);
break;
case 3:
// RV64D: FCVT.D.LU
rddata[t].u64 = rv_lutof_d(rsdata[t][0].i, frm, &fflags);
break;
case 0:
// RV32D: FCVT.D.W
rddata[t].u64 = rv_itof_d(rsdata[t][0].i, frm, &fflags);
break;
case 1:
// RV32D: FCVT.D.WU
rddata[t].u64 = rv_utof_d(rsdata[t][0].i, frm, &fflags);
break;
case 2:
// RV64D: FCVT.D.L
rddata[t].u64 = rv_ltof_d(rsdata[t][0].i, frm, &fflags);
break;
case 3:
// RV64D: FCVT.D.LU
rddata[t].u64 = rv_lutof_d(rsdata[t][0].i, frm, &fflags);
break;
}
trace->fpu_type = FpuType::FCVT;
trace->used_iregs.set(rsrc0);
break;
}
case 0x70: {
if (func3) {
// RV32F: FCLASS.S
rddata[t].i = rv_fclss_s(rsdata[t][0].u64);
} else {
// RV32F: FMV.X.W
uint32_t result = (uint32_t)rsdata[t][0].u64;
rddata[t].i = sext((uint64_t)result, 32);
}
trace->fpu_type = FpuType::FNCP;
trace->used_fregs.set(rsrc0);
break;
}
case 0x71: {
if (func3) {
// RV32D: FCLASS.D
rddata[t].i = rv_fclss_d(rsdata[t][0].u64);
} else {
// RV64D: FMV.X.D
rddata[t].i = rsdata[t][0].u64;
}
trace->fpu_type = FpuType::FNCP;
trace->used_fregs.set(rsrc0);
break;
}
case 0x78: { // RV32F: FMV.W.X
rddata[t].u64 = nan_box((uint32_t)rsdata[t][0].i);
trace->fpu_type = FpuType::FNCP;

View file

@ -25,27 +25,27 @@ TESTS_64D := $(wildcard rv64ud-p-*.hex)
all:
run-simx-32imfd:
run-simx-32imafd:
$(foreach test, $(TESTS_32I) $(TESTS_32M) $(TESTS_32F) $(TESTS_32D) $(TESTS_32A), $(SIM_DIR)/simx/simx -r $(test) || exit;)
run-simx-64imfd:
run-simx-64imafd:
$(foreach test, $(TESTS_64I) $(TESTS_64M) $(TESTS_64F) $(TESTS_64D) $(TESTS_64A), $(SIM_DIR)/simx/simx -r $(test) || exit;)
run-simx-32: run-simx-32imfd
run-simx-32: run-simx-32imafd
run-simx-64: run-simx-32imfd run-simx-64imfd
run-simx-64: run-simx-32imafd run-simx-64imafd
run-simx: run-simx-$(XLEN)
run-rtlsim-32imf:
$(foreach test, $(TESTS_32I) $(TESTS_32M) $(TESTS_32F), $(SIM_DIR)/rtlsim/rtlsim -r $(test) || exit;)
run-rtlsim-64im:
$(foreach test, $(TESTS_64I) $(TESTS_64M), $(SIM_DIR)/rtlsim/rtlsim -r $(test) || exit;)
run-rtlsim-64imf:
$(foreach test, $(TESTS_64I) $(TESTS_64M) $(TESTS_64F), $(SIM_DIR)/rtlsim/rtlsim -r $(test) || exit;)
run-rtlsim-32: run-rtlsim-32imf
run-rtlsim-64: run-rtlsim-32imf run-rtlsim-64im
run-rtlsim-64: run-rtlsim-32imf run-rtlsim-64imf
run-rtlsim: run-rtlsim-$(XLEN)