mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-04-19 03:44:46 -04:00
571 lines
22 KiB
Systemverilog
571 lines
22 KiB
Systemverilog
// Copyright 2018 ETH Zurich and University of Bologna.
|
|
// Copyright and related rights are licensed under the Solderpad Hardware
|
|
// License, Version 0.51 (the "License"); you may not use this file except in
|
|
// compliance with the License. You may obtain a copy of the License at
|
|
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
|
// or agreed to in writing, software, hardware and materials distributed under
|
|
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
|
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations under the License.
|
|
//
|
|
// Author: Stefan Mach, ETH Zurich
|
|
// Date: 12.04.2018
|
|
// Description: Wrapper for the floating-point unit
|
|
|
|
|
|
module fpu_wrap
|
|
import ariane_pkg::*;
|
|
#(
|
|
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
|
parameter type exception_t = logic,
|
|
parameter type fu_data_t = logic
|
|
) (
|
|
input logic clk_i,
|
|
input logic rst_ni,
|
|
input logic flush_i,
|
|
input logic fpu_valid_i,
|
|
output logic fpu_ready_o,
|
|
input fu_data_t fu_data_i,
|
|
|
|
input logic [ 1:0] fpu_fmt_i,
|
|
input logic [ 2:0] fpu_rm_i,
|
|
input logic [ 2:0] fpu_frm_i,
|
|
input logic [ 6:0] fpu_prec_i,
|
|
output logic [CVA6Cfg.TRANS_ID_BITS-1:0] fpu_trans_id_o,
|
|
output logic [ CVA6Cfg.FLen-1:0] result_o,
|
|
output logic fpu_valid_o,
|
|
output exception_t fpu_exception_o
|
|
);
|
|
|
|
// this is a workaround
|
|
// otherwise compilation might issue an error if FLEN=0
|
|
enum logic {
|
|
READY,
|
|
STALL
|
|
}
|
|
state_q, state_d;
|
|
if (CVA6Cfg.FpPresent) begin : fpu_gen
|
|
logic [CVA6Cfg.FLen-1:0] operand_a_i;
|
|
logic [CVA6Cfg.FLen-1:0] operand_b_i;
|
|
logic [CVA6Cfg.FLen-1:0] operand_c_i;
|
|
assign operand_a_i = fu_data_i.operand_a[CVA6Cfg.FLen-1:0];
|
|
assign operand_b_i = fu_data_i.operand_b[CVA6Cfg.FLen-1:0];
|
|
assign operand_c_i = fu_data_i.imm[CVA6Cfg.FLen-1:0];
|
|
|
|
//-----------------------------------
|
|
// FPnew config from FPnew package
|
|
//-----------------------------------
|
|
localparam OPBITS = fpnew_pkg::OP_BITS;
|
|
localparam FMTBITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
|
|
localparam IFMTBITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
|
|
|
|
// Features (enabled formats, vectors etc.)
|
|
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
|
|
Width: unsigned'(CVA6Cfg.FLen), // parameterized using CVA6Cfg.FLen
|
|
EnableVectors: CVA6Cfg.XFVec,
|
|
EnableNanBox: 1'b1,
|
|
FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT},
|
|
IntFmtMask: {
|
|
CVA6Cfg.XFVec && CVA6Cfg.XF8,
|
|
CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT),
|
|
1'b1,
|
|
1'b1
|
|
}
|
|
};
|
|
|
|
// Implementation (number of registers etc)
|
|
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
|
|
PipeRegs: '{ // FP32, FP64, FP16, FP8, FP16alt
|
|
'{
|
|
unsigned'(LAT_COMP_FP32),
|
|
unsigned'(LAT_COMP_FP64),
|
|
unsigned'(LAT_COMP_FP16),
|
|
unsigned'(LAT_COMP_FP8),
|
|
unsigned'(LAT_COMP_FP16ALT)
|
|
}, // ADDMUL
|
|
'{default: unsigned'(LAT_DIVSQRT)}, // DIVSQRT
|
|
'{default: unsigned'(LAT_NONCOMP)}, // NONCOMP
|
|
'{default: unsigned'(LAT_CONV)}
|
|
}, // CONV
|
|
UnitTypes: '{
|
|
'{default: fpnew_pkg::PARALLEL}, // ADDMUL
|
|
'{default: fpnew_pkg::MERGED}, // DIVSQRT
|
|
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
|
|
'{default: fpnew_pkg::MERGED}
|
|
}, // CONV
|
|
PipeConfig: fpnew_pkg::DISTRIBUTED
|
|
};
|
|
|
|
//-------------------------------------------------
|
|
// Inputs to the FPU and protocol inversion buffer
|
|
//-------------------------------------------------
|
|
logic [CVA6Cfg.FLen-1:0] operand_a_d, operand_a_q, operand_a;
|
|
logic [CVA6Cfg.FLen-1:0] operand_b_d, operand_b_q, operand_b;
|
|
logic [CVA6Cfg.FLen-1:0] operand_c_d, operand_c_q, operand_c;
|
|
logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op;
|
|
logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod;
|
|
logic [FMTBITS-1:0] fpu_srcfmt_d, fpu_srcfmt_q, fpu_srcfmt;
|
|
logic [FMTBITS-1:0] fpu_dstfmt_d, fpu_dstfmt_q, fpu_dstfmt;
|
|
logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt;
|
|
logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm;
|
|
logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op;
|
|
|
|
logic [CVA6Cfg.TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag;
|
|
|
|
logic fpu_in_ready, fpu_in_valid;
|
|
logic fpu_out_ready, fpu_out_valid;
|
|
|
|
logic [4:0] fpu_status;
|
|
|
|
// FSM to handle protocol inversion
|
|
logic hold_inputs;
|
|
logic use_hold;
|
|
|
|
//-----------------------------
|
|
// Translate inputs
|
|
//-----------------------------
|
|
|
|
always_comb begin : input_translation
|
|
|
|
automatic logic vec_replication; // control honoring of replication flag
|
|
automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB)
|
|
automatic logic check_ah; // Decide for AH from RM field encoding
|
|
|
|
// Default Values
|
|
operand_a_d = operand_a_i;
|
|
operand_b_d = operand_b_i; // immediates come through this port unless used as operand
|
|
operand_c_d = operand_c_i; // immediates come through this port unless used as operand
|
|
fpu_op_d = fpnew_pkg::SGNJ; // sign injection by default
|
|
fpu_op_mod_d = 1'b0;
|
|
fpu_dstfmt_d = fpnew_pkg::FP32;
|
|
fpu_ifmt_d = fpnew_pkg::INT32;
|
|
fpu_rm_d = fpu_rm_i;
|
|
fpu_vec_op_d = fu_data_i.fu == FPU_VEC;
|
|
fpu_tag_d = fu_data_i.trans_id;
|
|
vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field
|
|
replicate_c = 1'b0;
|
|
check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i
|
|
|
|
// Scalar Rounding Modes - some ops encode inside RM but use smaller range
|
|
if (!(fpu_rm_i inside {[3'b000 : 3'b100]})) fpu_rm_d = fpu_frm_i;
|
|
|
|
// Vectorial ops always consult FRM
|
|
if (fpu_vec_op_d) fpu_rm_d = fpu_frm_i;
|
|
|
|
// Formats
|
|
unique case (fpu_fmt_i)
|
|
// FP32
|
|
2'b00: fpu_dstfmt_d = fpnew_pkg::FP32;
|
|
// FP64 or FP16ALT (vectorial)
|
|
2'b01: fpu_dstfmt_d = fpu_vec_op_d ? fpnew_pkg::FP16ALT : fpnew_pkg::FP64;
|
|
// FP16 or FP16ALT (scalar)
|
|
2'b10: begin
|
|
if (!fpu_vec_op_d && fpu_rm_i == 3'b101) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
|
|
else fpu_dstfmt_d = fpnew_pkg::FP16;
|
|
end
|
|
// FP8
|
|
default: fpu_dstfmt_d = fpnew_pkg::FP8;
|
|
endcase
|
|
|
|
// By default, set src=dst
|
|
fpu_srcfmt_d = fpu_dstfmt_d;
|
|
|
|
// Operations (this can modify the rounding mode field and format!)
|
|
unique case (fu_data_i.operation)
|
|
// Addition
|
|
FADD: begin
|
|
fpu_op_d = fpnew_pkg::ADD;
|
|
replicate_c = 1'b1; // second operand is in C
|
|
end
|
|
// Subtraction is modified ADD
|
|
FSUB: begin
|
|
fpu_op_d = fpnew_pkg::ADD;
|
|
fpu_op_mod_d = 1'b1;
|
|
replicate_c = 1'b1; // second operand is in C
|
|
end
|
|
// Multiplication
|
|
FMUL: fpu_op_d = fpnew_pkg::MUL;
|
|
// Division
|
|
FDIV: fpu_op_d = fpnew_pkg::DIV;
|
|
// Min/Max - OP is encoded in rm (000-001)
|
|
FMIN_MAX: begin
|
|
fpu_op_d = fpnew_pkg::MINMAX;
|
|
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
|
|
check_ah = 1'b1; // AH has RM MSB encoding
|
|
end
|
|
// Square Root
|
|
FSQRT: fpu_op_d = fpnew_pkg::SQRT;
|
|
// Fused Multiply Add
|
|
FMADD: fpu_op_d = fpnew_pkg::FMADD;
|
|
// Fused Multiply Subtract is modified FMADD
|
|
FMSUB: begin
|
|
fpu_op_d = fpnew_pkg::FMADD;
|
|
fpu_op_mod_d = 1'b1;
|
|
end
|
|
// Fused Negated Multiply Subtract
|
|
FNMSUB: fpu_op_d = fpnew_pkg::FNMSUB;
|
|
// Fused Negated Multiply Add is modified FNMSUB
|
|
FNMADD: begin
|
|
fpu_op_d = fpnew_pkg::FNMSUB;
|
|
fpu_op_mod_d = 1'b1;
|
|
end
|
|
// Float to Int Cast - Op encoded in lowest two imm bits or rm
|
|
FCVT_F2I: begin
|
|
fpu_op_d = fpnew_pkg::F2I;
|
|
// Vectorial Ops encoded in R bit
|
|
if (fpu_vec_op_d) begin
|
|
fpu_op_mod_d = fpu_rm_i[0];
|
|
vec_replication = 1'b0; // no replication, R bit used for op
|
|
unique case (fpu_fmt_i)
|
|
2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
|
|
2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
|
|
2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
|
|
endcase
|
|
// Scalar casts encoded in imm
|
|
end else begin
|
|
fpu_op_mod_d = operand_c_i[0];
|
|
if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
|
|
else fpu_ifmt_d = fpnew_pkg::INT32;
|
|
end
|
|
end
|
|
// Int to Float Cast - Op encoded in lowest two imm bits or rm
|
|
FCVT_I2F: begin
|
|
fpu_op_d = fpnew_pkg::I2F;
|
|
// Vectorial Ops encoded in R bit
|
|
if (fpu_vec_op_d) begin
|
|
fpu_op_mod_d = fpu_rm_i[0];
|
|
vec_replication = 1'b0; // no replication, R bit used for op
|
|
unique case (fpu_fmt_i)
|
|
2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
|
|
2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
|
|
2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
|
|
endcase
|
|
// Scalar casts encoded in imm
|
|
end else begin
|
|
fpu_op_mod_d = operand_c_i[0];
|
|
if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
|
|
else fpu_ifmt_d = fpnew_pkg::INT32;
|
|
end
|
|
end
|
|
// Float to Float Cast - Source format encoded in lowest two/three imm bits
|
|
FCVT_F2F: begin
|
|
fpu_op_d = fpnew_pkg::F2F;
|
|
// Vectorial ops encoded in lowest two imm bits
|
|
if (fpu_vec_op_d) begin
|
|
vec_replication = 1'b0; // no replication for casts (not needed)
|
|
unique case (operand_c_i[1:0])
|
|
2'b00: fpu_srcfmt_d = fpnew_pkg::FP32;
|
|
2'b01: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
|
|
2'b10: fpu_srcfmt_d = fpnew_pkg::FP16;
|
|
2'b11: fpu_srcfmt_d = fpnew_pkg::FP8;
|
|
endcase
|
|
// Scalar ops encoded in lowest three imm bits
|
|
end else begin
|
|
unique case (operand_c_i[2:0])
|
|
3'b000: fpu_srcfmt_d = fpnew_pkg::FP32;
|
|
3'b001: fpu_srcfmt_d = fpnew_pkg::FP64;
|
|
3'b010: fpu_srcfmt_d = fpnew_pkg::FP16;
|
|
3'b110: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
|
|
3'b011: fpu_srcfmt_d = fpnew_pkg::FP8;
|
|
default: ; // Do nothing
|
|
endcase
|
|
end
|
|
end
|
|
// Scalar Sign Injection - op encoded in rm (000-010)
|
|
FSGNJ: begin
|
|
fpu_op_d = fpnew_pkg::SGNJ;
|
|
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
|
|
check_ah = 1'b1; // AH has RM MSB encoding
|
|
end
|
|
// Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding
|
|
FMV_F2X: begin
|
|
fpu_op_d = fpnew_pkg::SGNJ;
|
|
fpu_rm_d = 3'b011; // passthrough without checking nan-box
|
|
fpu_op_mod_d = 1'b1; // no NaN-Boxing
|
|
check_ah = 1'b1; // AH has RM MSB encoding
|
|
vec_replication = 1'b0; // no replication, we set second operand
|
|
end
|
|
// Move from GPR to FPR - mapped to NOP since no recoding
|
|
FMV_X2F: begin
|
|
fpu_op_d = fpnew_pkg::SGNJ;
|
|
fpu_rm_d = 3'b011; // passthrough without checking nan-box
|
|
check_ah = 1'b1; // AH has RM MSB encoding
|
|
vec_replication = 1'b0; // no replication, we set second operand
|
|
end
|
|
// Scalar Comparisons - op encoded in rm (000-010)
|
|
FCMP: begin
|
|
fpu_op_d = fpnew_pkg::CMP;
|
|
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
|
|
check_ah = 1'b1; // AH has RM MSB encoding
|
|
end
|
|
// Classification
|
|
FCLASS: begin
|
|
fpu_op_d = fpnew_pkg::CLASSIFY;
|
|
fpu_rm_d = {
|
|
1'b0, fpu_rm_i[1:0]
|
|
}; // mask out AH encoding bit - CLASS doesn't care anyways
|
|
check_ah = 1'b1; // AH has RM MSB encoding
|
|
end
|
|
// Vectorial Minimum - set up scalar encoding in rm
|
|
VFMIN: begin
|
|
fpu_op_d = fpnew_pkg::MINMAX;
|
|
fpu_rm_d = 3'b000; // min
|
|
end
|
|
// Vectorial Maximum - set up scalar encoding in rm
|
|
VFMAX: begin
|
|
fpu_op_d = fpnew_pkg::MINMAX;
|
|
fpu_rm_d = 3'b001; // max
|
|
end
|
|
// Vectorial Sign Injection - set up scalar encoding in rm
|
|
VFSGNJ: begin
|
|
fpu_op_d = fpnew_pkg::SGNJ;
|
|
fpu_rm_d = 3'b000; // sgnj
|
|
end
|
|
// Vectorial Negated Sign Injection - set up scalar encoding in rm
|
|
VFSGNJN: begin
|
|
fpu_op_d = fpnew_pkg::SGNJ;
|
|
fpu_rm_d = 3'b001; // sgnjn
|
|
end
|
|
// Vectorial Xored Sign Injection - set up scalar encoding in rm
|
|
VFSGNJX: begin
|
|
fpu_op_d = fpnew_pkg::SGNJ;
|
|
fpu_rm_d = 3'b010; // sgnjx
|
|
end
|
|
// Vectorial Equals - set up scalar encoding in rm
|
|
VFEQ: begin
|
|
fpu_op_d = fpnew_pkg::CMP;
|
|
fpu_rm_d = 3'b010; // eq
|
|
end
|
|
// Vectorial Not Equals - set up scalar encoding in rm
|
|
VFNE: begin
|
|
fpu_op_d = fpnew_pkg::CMP;
|
|
fpu_op_mod_d = 1'b1; // invert output
|
|
fpu_rm_d = 3'b010; // eq
|
|
end
|
|
// Vectorial Less Than - set up scalar encoding in rm
|
|
VFLT: begin
|
|
fpu_op_d = fpnew_pkg::CMP;
|
|
fpu_rm_d = 3'b001; // lt
|
|
end
|
|
// Vectorial Greater or Equal - set up scalar encoding in rm
|
|
VFGE: begin
|
|
fpu_op_d = fpnew_pkg::CMP;
|
|
fpu_op_mod_d = 1'b1; // invert output
|
|
fpu_rm_d = 3'b001; // lt
|
|
end
|
|
// Vectorial Less or Equal - set up scalar encoding in rm
|
|
VFLE: begin
|
|
fpu_op_d = fpnew_pkg::CMP;
|
|
fpu_rm_d = 3'b000; // le
|
|
end
|
|
// Vectorial Greater Than - set up scalar encoding in rm
|
|
VFGT: begin
|
|
fpu_op_d = fpnew_pkg::CMP;
|
|
fpu_op_mod_d = 1'b1; // invert output
|
|
fpu_rm_d = 3'b000; // le
|
|
end
|
|
// Vectorial Convert-and-Pack from FP32, lower 4 entries
|
|
VFCPKAB_S: begin
|
|
fpu_op_d = fpnew_pkg::CPKAB;
|
|
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
|
|
vec_replication = 1'b0; // no replication, R bit used for op
|
|
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
|
|
end
|
|
// Vectorial Convert-and-Pack from FP32, upper 4 entries
|
|
VFCPKCD_S: begin
|
|
fpu_op_d = fpnew_pkg::CPKCD;
|
|
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
|
|
vec_replication = 1'b0; // no replication, R bit used for op
|
|
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
|
|
end
|
|
// Vectorial Convert-and-Pack from FP64, lower 4 entries
|
|
VFCPKAB_D: begin
|
|
fpu_op_d = fpnew_pkg::CPKAB;
|
|
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
|
|
vec_replication = 1'b0; // no replication, R bit used for op
|
|
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
|
|
end
|
|
// Vectorial Convert-and-Pack from FP64, upper 4 entries
|
|
VFCPKCD_D: begin
|
|
fpu_op_d = fpnew_pkg::CPKCD;
|
|
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
|
|
vec_replication = 1'b0; // no replication, R bit used for op
|
|
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
|
|
end
|
|
// No changes per default
|
|
default: ; //nothing
|
|
endcase
|
|
|
|
// Scalar AH encoding fixing
|
|
if (!fpu_vec_op_d && check_ah) if (fpu_rm_i[2]) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
|
|
|
|
// Replication
|
|
if (fpu_vec_op_d && vec_replication) begin
|
|
if (replicate_c) begin
|
|
unique case (fpu_dstfmt_d)
|
|
fpnew_pkg::FP32: operand_c_d = CVA6Cfg.RVD ? {2{operand_c_i[31:0]}} : operand_c_i;
|
|
fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
|
|
operand_c_d = CVA6Cfg.RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}};
|
|
fpnew_pkg::FP8:
|
|
operand_c_d = CVA6Cfg.RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}};
|
|
default: ; // Do nothing
|
|
endcase // fpu_dstfmt_d
|
|
end else begin
|
|
unique case (fpu_dstfmt_d)
|
|
fpnew_pkg::FP32: operand_b_d = CVA6Cfg.RVD ? {2{operand_b_i[31:0]}} : operand_b_i;
|
|
fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
|
|
operand_b_d = CVA6Cfg.RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}};
|
|
fpnew_pkg::FP8:
|
|
operand_b_d = CVA6Cfg.RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}};
|
|
default: ; // Do nothing
|
|
endcase // fpu_dstfmt_d
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
//---------------------------------------------------------
|
|
// Upstream protocol inversion: InValid depends on InReady
|
|
//---------------------------------------------------------
|
|
|
|
always_comb begin : p_inputFSM
|
|
// Default Values
|
|
fpu_ready_o = 1'b0;
|
|
fpu_in_valid = 1'b0;
|
|
hold_inputs = 1'b0; // hold register disabled
|
|
use_hold = 1'b0; // inputs go directly to unit
|
|
state_d = state_q; // stay in the same state
|
|
|
|
// FSM
|
|
unique case (state_q)
|
|
// Default state, ready for instructions
|
|
READY: begin
|
|
fpu_ready_o = 1'b1; // Act as if FPU ready
|
|
fpu_in_valid = fpu_valid_i; // Forward input valid to FPU
|
|
// There is a transaction but the FPU can't handle it
|
|
if (fpu_valid_i & ~fpu_in_ready) begin
|
|
fpu_ready_o = 1'b0; // No token given to Issue
|
|
hold_inputs = 1'b1; // save inputs to the holding register
|
|
state_d = STALL; // stall future incoming requests
|
|
end
|
|
end
|
|
// We're stalling the upstream (ready=0)
|
|
STALL: begin
|
|
fpu_in_valid = 1'b1; // we have data for the FPU
|
|
use_hold = 1'b1; // the data comes from the hold reg
|
|
// Wait until it's consumed
|
|
if (fpu_in_ready) begin
|
|
fpu_ready_o = 1'b1; // Give a token to issue
|
|
state_d = READY; // accept future requests
|
|
end
|
|
end
|
|
// Default: emit default values
|
|
default: ;
|
|
endcase
|
|
|
|
// Flushing will override issue and go back to idle
|
|
if (flush_i) begin
|
|
state_d = READY;
|
|
end
|
|
|
|
end
|
|
|
|
// Buffer register and FSM state holding
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg
|
|
if (~rst_ni) begin
|
|
state_q <= READY;
|
|
operand_a_q <= '0;
|
|
operand_b_q <= '0;
|
|
operand_c_q <= '0;
|
|
fpu_op_q <= '0;
|
|
fpu_op_mod_q <= '0;
|
|
fpu_srcfmt_q <= '0;
|
|
fpu_dstfmt_q <= '0;
|
|
fpu_ifmt_q <= '0;
|
|
fpu_rm_q <= '0;
|
|
fpu_vec_op_q <= '0;
|
|
fpu_tag_q <= '0;
|
|
end else begin
|
|
state_q <= state_d;
|
|
// Hold register is [TRIGGERED] by FSM
|
|
if (hold_inputs) begin
|
|
operand_a_q <= operand_a_d;
|
|
operand_b_q <= operand_b_d;
|
|
operand_c_q <= operand_c_d;
|
|
fpu_op_q <= fpu_op_d;
|
|
fpu_op_mod_q <= fpu_op_mod_d;
|
|
fpu_srcfmt_q <= fpu_srcfmt_d;
|
|
fpu_dstfmt_q <= fpu_dstfmt_d;
|
|
fpu_ifmt_q <= fpu_ifmt_d;
|
|
fpu_rm_q <= fpu_rm_d;
|
|
fpu_vec_op_q <= fpu_vec_op_d;
|
|
fpu_tag_q <= fpu_tag_d;
|
|
end
|
|
end
|
|
end
|
|
|
|
// Select FPU input data: from register if valid data in register, else directly from input
|
|
assign operand_a = use_hold ? operand_a_q : operand_a_d;
|
|
assign operand_b = use_hold ? operand_b_q : operand_b_d;
|
|
assign operand_c = use_hold ? operand_c_q : operand_c_d;
|
|
assign fpu_op = use_hold ? fpu_op_q : fpu_op_d;
|
|
assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d;
|
|
assign fpu_srcfmt = use_hold ? fpu_srcfmt_q : fpu_srcfmt_d;
|
|
assign fpu_dstfmt = use_hold ? fpu_dstfmt_q : fpu_dstfmt_d;
|
|
assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d;
|
|
assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d;
|
|
assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d;
|
|
assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d;
|
|
|
|
// Consolidate operands
|
|
logic [2:0][CVA6Cfg.FLen-1:0] fpu_operands;
|
|
|
|
assign fpu_operands[0] = operand_a;
|
|
assign fpu_operands[1] = operand_b;
|
|
assign fpu_operands[2] = operand_c;
|
|
|
|
//---------------
|
|
// FPU instance
|
|
//---------------
|
|
|
|
fpnew_top #(
|
|
.Features (FPU_FEATURES),
|
|
.Implementation(FPU_IMPLEMENTATION),
|
|
.TagType (logic [CVA6Cfg.TRANS_ID_BITS-1:0])
|
|
) i_fpnew_bulk (
|
|
.clk_i,
|
|
.rst_ni,
|
|
.operands_i (fpu_operands),
|
|
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rm)),
|
|
.op_i (fpnew_pkg::operation_e'(fpu_op)),
|
|
.op_mod_i (fpu_op_mod),
|
|
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_srcfmt)),
|
|
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dstfmt)),
|
|
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_ifmt)),
|
|
.vectorial_op_i(fpu_vec_op),
|
|
.tag_i (fpu_tag),
|
|
.simd_mask_i (1'b1),
|
|
.in_valid_i (fpu_in_valid),
|
|
.in_ready_o (fpu_in_ready),
|
|
.flush_i,
|
|
.result_o,
|
|
.status_o (fpu_status),
|
|
.tag_o (fpu_trans_id_o),
|
|
.out_valid_o (fpu_out_valid),
|
|
.out_ready_i (fpu_out_ready),
|
|
.busy_o ( /* unused */)
|
|
);
|
|
|
|
// Pack status flag into exception cause, tval ignored in wb, exception is always invalid
|
|
assign fpu_exception_o.cause = {59'h0, fpu_status};
|
|
assign fpu_exception_o.valid = 1'b0;
|
|
assign fpu_exception_o.tval = '0;
|
|
|
|
// Donwstream write port is dedicated to FPU and always ready
|
|
assign fpu_out_ready = 1'b1;
|
|
|
|
// Downstream valid from unit
|
|
assign fpu_valid_o = fpu_out_valid;
|
|
|
|
end
|
|
endmodule
|