cva6/core/fpu_wrap.sv
Florian Zaruba 9f40ad57cb
Make D independent on xlen (#2005)
A 64-bit core might very well support just single-precision.
2024-05-12 20:15:50 +02:00

571 lines
22 KiB
Systemverilog

// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Stefan Mach, ETH Zurich
// Date: 12.04.2018
// Description: Wrapper for the floating-point unit
module fpu_wrap
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type exception_t = logic,
parameter type fu_data_t = logic
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic fpu_valid_i,
output logic fpu_ready_o,
input fu_data_t fu_data_i,
input logic [ 1:0] fpu_fmt_i,
input logic [ 2:0] fpu_rm_i,
input logic [ 2:0] fpu_frm_i,
input logic [ 6:0] fpu_prec_i,
output logic [CVA6Cfg.TRANS_ID_BITS-1:0] fpu_trans_id_o,
output logic [ CVA6Cfg.FLen-1:0] result_o,
output logic fpu_valid_o,
output exception_t fpu_exception_o
);
// this is a workaround
// otherwise compilation might issue an error if FLEN=0
enum logic {
READY,
STALL
}
state_q, state_d;
if (CVA6Cfg.FpPresent) begin : fpu_gen
logic [CVA6Cfg.FLen-1:0] operand_a_i;
logic [CVA6Cfg.FLen-1:0] operand_b_i;
logic [CVA6Cfg.FLen-1:0] operand_c_i;
assign operand_a_i = fu_data_i.operand_a[CVA6Cfg.FLen-1:0];
assign operand_b_i = fu_data_i.operand_b[CVA6Cfg.FLen-1:0];
assign operand_c_i = fu_data_i.imm[CVA6Cfg.FLen-1:0];
//-----------------------------------
// FPnew config from FPnew package
//-----------------------------------
localparam OPBITS = fpnew_pkg::OP_BITS;
localparam FMTBITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam IFMTBITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
// Features (enabled formats, vectors etc.)
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: unsigned'(CVA6Cfg.FLen), // parameterized using CVA6Cfg.FLen
EnableVectors: CVA6Cfg.XFVec,
EnableNanBox: 1'b1,
FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT},
IntFmtMask: {
CVA6Cfg.XFVec && CVA6Cfg.XF8,
CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT),
1'b1,
1'b1
}
};
// Implementation (number of registers etc)
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs: '{ // FP32, FP64, FP16, FP8, FP16alt
'{
unsigned'(LAT_COMP_FP32),
unsigned'(LAT_COMP_FP64),
unsigned'(LAT_COMP_FP16),
unsigned'(LAT_COMP_FP8),
unsigned'(LAT_COMP_FP16ALT)
}, // ADDMUL
'{default: unsigned'(LAT_DIVSQRT)}, // DIVSQRT
'{default: unsigned'(LAT_NONCOMP)}, // NONCOMP
'{default: unsigned'(LAT_CONV)}
}, // CONV
UnitTypes: '{
'{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}
}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
//-------------------------------------------------
// Inputs to the FPU and protocol inversion buffer
//-------------------------------------------------
logic [CVA6Cfg.FLen-1:0] operand_a_d, operand_a_q, operand_a;
logic [CVA6Cfg.FLen-1:0] operand_b_d, operand_b_q, operand_b;
logic [CVA6Cfg.FLen-1:0] operand_c_d, operand_c_q, operand_c;
logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op;
logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod;
logic [FMTBITS-1:0] fpu_srcfmt_d, fpu_srcfmt_q, fpu_srcfmt;
logic [FMTBITS-1:0] fpu_dstfmt_d, fpu_dstfmt_q, fpu_dstfmt;
logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt;
logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm;
logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op;
logic [CVA6Cfg.TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag;
logic fpu_in_ready, fpu_in_valid;
logic fpu_out_ready, fpu_out_valid;
logic [4:0] fpu_status;
// FSM to handle protocol inversion
logic hold_inputs;
logic use_hold;
//-----------------------------
// Translate inputs
//-----------------------------
always_comb begin : input_translation
automatic logic vec_replication; // control honoring of replication flag
automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB)
automatic logic check_ah; // Decide for AH from RM field encoding
// Default Values
operand_a_d = operand_a_i;
operand_b_d = operand_b_i; // immediates come through this port unless used as operand
operand_c_d = operand_c_i; // immediates come through this port unless used as operand
fpu_op_d = fpnew_pkg::SGNJ; // sign injection by default
fpu_op_mod_d = 1'b0;
fpu_dstfmt_d = fpnew_pkg::FP32;
fpu_ifmt_d = fpnew_pkg::INT32;
fpu_rm_d = fpu_rm_i;
fpu_vec_op_d = fu_data_i.fu == FPU_VEC;
fpu_tag_d = fu_data_i.trans_id;
vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field
replicate_c = 1'b0;
check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i
// Scalar Rounding Modes - some ops encode inside RM but use smaller range
if (!(fpu_rm_i inside {[3'b000 : 3'b100]})) fpu_rm_d = fpu_frm_i;
// Vectorial ops always consult FRM
if (fpu_vec_op_d) fpu_rm_d = fpu_frm_i;
// Formats
unique case (fpu_fmt_i)
// FP32
2'b00: fpu_dstfmt_d = fpnew_pkg::FP32;
// FP64 or FP16ALT (vectorial)
2'b01: fpu_dstfmt_d = fpu_vec_op_d ? fpnew_pkg::FP16ALT : fpnew_pkg::FP64;
// FP16 or FP16ALT (scalar)
2'b10: begin
if (!fpu_vec_op_d && fpu_rm_i == 3'b101) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
else fpu_dstfmt_d = fpnew_pkg::FP16;
end
// FP8
default: fpu_dstfmt_d = fpnew_pkg::FP8;
endcase
// By default, set src=dst
fpu_srcfmt_d = fpu_dstfmt_d;
// Operations (this can modify the rounding mode field and format!)
unique case (fu_data_i.operation)
// Addition
FADD: begin
fpu_op_d = fpnew_pkg::ADD;
replicate_c = 1'b1; // second operand is in C
end
// Subtraction is modified ADD
FSUB: begin
fpu_op_d = fpnew_pkg::ADD;
fpu_op_mod_d = 1'b1;
replicate_c = 1'b1; // second operand is in C
end
// Multiplication
FMUL: fpu_op_d = fpnew_pkg::MUL;
// Division
FDIV: fpu_op_d = fpnew_pkg::DIV;
// Min/Max - OP is encoded in rm (000-001)
FMIN_MAX: begin
fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Square Root
FSQRT: fpu_op_d = fpnew_pkg::SQRT;
// Fused Multiply Add
FMADD: fpu_op_d = fpnew_pkg::FMADD;
// Fused Multiply Subtract is modified FMADD
FMSUB: begin
fpu_op_d = fpnew_pkg::FMADD;
fpu_op_mod_d = 1'b1;
end
// Fused Negated Multiply Subtract
FNMSUB: fpu_op_d = fpnew_pkg::FNMSUB;
// Fused Negated Multiply Add is modified FNMSUB
FNMADD: begin
fpu_op_d = fpnew_pkg::FNMSUB;
fpu_op_mod_d = 1'b1;
end
// Float to Int Cast - Op encoded in lowest two imm bits or rm
FCVT_F2I: begin
fpu_op_d = fpnew_pkg::F2I;
// Vectorial Ops encoded in R bit
if (fpu_vec_op_d) begin
fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op
unique case (fpu_fmt_i)
2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
endcase
// Scalar casts encoded in imm
end else begin
fpu_op_mod_d = operand_c_i[0];
if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
else fpu_ifmt_d = fpnew_pkg::INT32;
end
end
// Int to Float Cast - Op encoded in lowest two imm bits or rm
FCVT_I2F: begin
fpu_op_d = fpnew_pkg::I2F;
// Vectorial Ops encoded in R bit
if (fpu_vec_op_d) begin
fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op
unique case (fpu_fmt_i)
2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
endcase
// Scalar casts encoded in imm
end else begin
fpu_op_mod_d = operand_c_i[0];
if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
else fpu_ifmt_d = fpnew_pkg::INT32;
end
end
// Float to Float Cast - Source format encoded in lowest two/three imm bits
FCVT_F2F: begin
fpu_op_d = fpnew_pkg::F2F;
// Vectorial ops encoded in lowest two imm bits
if (fpu_vec_op_d) begin
vec_replication = 1'b0; // no replication for casts (not needed)
unique case (operand_c_i[1:0])
2'b00: fpu_srcfmt_d = fpnew_pkg::FP32;
2'b01: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
2'b10: fpu_srcfmt_d = fpnew_pkg::FP16;
2'b11: fpu_srcfmt_d = fpnew_pkg::FP8;
endcase
// Scalar ops encoded in lowest three imm bits
end else begin
unique case (operand_c_i[2:0])
3'b000: fpu_srcfmt_d = fpnew_pkg::FP32;
3'b001: fpu_srcfmt_d = fpnew_pkg::FP64;
3'b010: fpu_srcfmt_d = fpnew_pkg::FP16;
3'b110: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
3'b011: fpu_srcfmt_d = fpnew_pkg::FP8;
default: ; // Do nothing
endcase
end
end
// Scalar Sign Injection - op encoded in rm (000-010)
FSGNJ: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding
FMV_F2X: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box
fpu_op_mod_d = 1'b1; // no NaN-Boxing
check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand
end
// Move from GPR to FPR - mapped to NOP since no recoding
FMV_X2F: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box
check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand
end
// Scalar Comparisons - op encoded in rm (000-010)
FCMP: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Classification
FCLASS: begin
fpu_op_d = fpnew_pkg::CLASSIFY;
fpu_rm_d = {
1'b0, fpu_rm_i[1:0]
}; // mask out AH encoding bit - CLASS doesn't care anyways
check_ah = 1'b1; // AH has RM MSB encoding
end
// Vectorial Minimum - set up scalar encoding in rm
VFMIN: begin
fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = 3'b000; // min
end
// Vectorial Maximum - set up scalar encoding in rm
VFMAX: begin
fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = 3'b001; // max
end
// Vectorial Sign Injection - set up scalar encoding in rm
VFSGNJ: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b000; // sgnj
end
// Vectorial Negated Sign Injection - set up scalar encoding in rm
VFSGNJN: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b001; // sgnjn
end
// Vectorial Xored Sign Injection - set up scalar encoding in rm
VFSGNJX: begin
fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b010; // sgnjx
end
// Vectorial Equals - set up scalar encoding in rm
VFEQ: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b010; // eq
end
// Vectorial Not Equals - set up scalar encoding in rm
VFNE: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b010; // eq
end
// Vectorial Less Than - set up scalar encoding in rm
VFLT: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b001; // lt
end
// Vectorial Greater or Equal - set up scalar encoding in rm
VFGE: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b001; // lt
end
// Vectorial Less or Equal - set up scalar encoding in rm
VFLE: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b000; // le
end
// Vectorial Greater Than - set up scalar encoding in rm
VFGT: begin
fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b000; // le
end
// Vectorial Convert-and-Pack from FP32, lower 4 entries
VFCPKAB_S: begin
fpu_op_d = fpnew_pkg::CPKAB;
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
end
// Vectorial Convert-and-Pack from FP32, upper 4 entries
VFCPKCD_S: begin
fpu_op_d = fpnew_pkg::CPKCD;
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
end
// Vectorial Convert-and-Pack from FP64, lower 4 entries
VFCPKAB_D: begin
fpu_op_d = fpnew_pkg::CPKAB;
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
end
// Vectorial Convert-and-Pack from FP64, upper 4 entries
VFCPKCD_D: begin
fpu_op_d = fpnew_pkg::CPKCD;
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
end
// No changes per default
default: ; //nothing
endcase
// Scalar AH encoding fixing
if (!fpu_vec_op_d && check_ah) if (fpu_rm_i[2]) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
// Replication
if (fpu_vec_op_d && vec_replication) begin
if (replicate_c) begin
unique case (fpu_dstfmt_d)
fpnew_pkg::FP32: operand_c_d = CVA6Cfg.RVD ? {2{operand_c_i[31:0]}} : operand_c_i;
fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
operand_c_d = CVA6Cfg.RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}};
fpnew_pkg::FP8:
operand_c_d = CVA6Cfg.RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}};
default: ; // Do nothing
endcase // fpu_dstfmt_d
end else begin
unique case (fpu_dstfmt_d)
fpnew_pkg::FP32: operand_b_d = CVA6Cfg.RVD ? {2{operand_b_i[31:0]}} : operand_b_i;
fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
operand_b_d = CVA6Cfg.RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}};
fpnew_pkg::FP8:
operand_b_d = CVA6Cfg.RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}};
default: ; // Do nothing
endcase // fpu_dstfmt_d
end
end
end
//---------------------------------------------------------
// Upstream protocol inversion: InValid depends on InReady
//---------------------------------------------------------
always_comb begin : p_inputFSM
// Default Values
fpu_ready_o = 1'b0;
fpu_in_valid = 1'b0;
hold_inputs = 1'b0; // hold register disabled
use_hold = 1'b0; // inputs go directly to unit
state_d = state_q; // stay in the same state
// FSM
unique case (state_q)
// Default state, ready for instructions
READY: begin
fpu_ready_o = 1'b1; // Act as if FPU ready
fpu_in_valid = fpu_valid_i; // Forward input valid to FPU
// There is a transaction but the FPU can't handle it
if (fpu_valid_i & ~fpu_in_ready) begin
fpu_ready_o = 1'b0; // No token given to Issue
hold_inputs = 1'b1; // save inputs to the holding register
state_d = STALL; // stall future incoming requests
end
end
// We're stalling the upstream (ready=0)
STALL: begin
fpu_in_valid = 1'b1; // we have data for the FPU
use_hold = 1'b1; // the data comes from the hold reg
// Wait until it's consumed
if (fpu_in_ready) begin
fpu_ready_o = 1'b1; // Give a token to issue
state_d = READY; // accept future requests
end
end
// Default: emit default values
default: ;
endcase
// Flushing will override issue and go back to idle
if (flush_i) begin
state_d = READY;
end
end
// Buffer register and FSM state holding
always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg
if (~rst_ni) begin
state_q <= READY;
operand_a_q <= '0;
operand_b_q <= '0;
operand_c_q <= '0;
fpu_op_q <= '0;
fpu_op_mod_q <= '0;
fpu_srcfmt_q <= '0;
fpu_dstfmt_q <= '0;
fpu_ifmt_q <= '0;
fpu_rm_q <= '0;
fpu_vec_op_q <= '0;
fpu_tag_q <= '0;
end else begin
state_q <= state_d;
// Hold register is [TRIGGERED] by FSM
if (hold_inputs) begin
operand_a_q <= operand_a_d;
operand_b_q <= operand_b_d;
operand_c_q <= operand_c_d;
fpu_op_q <= fpu_op_d;
fpu_op_mod_q <= fpu_op_mod_d;
fpu_srcfmt_q <= fpu_srcfmt_d;
fpu_dstfmt_q <= fpu_dstfmt_d;
fpu_ifmt_q <= fpu_ifmt_d;
fpu_rm_q <= fpu_rm_d;
fpu_vec_op_q <= fpu_vec_op_d;
fpu_tag_q <= fpu_tag_d;
end
end
end
// Select FPU input data: from register if valid data in register, else directly from input
assign operand_a = use_hold ? operand_a_q : operand_a_d;
assign operand_b = use_hold ? operand_b_q : operand_b_d;
assign operand_c = use_hold ? operand_c_q : operand_c_d;
assign fpu_op = use_hold ? fpu_op_q : fpu_op_d;
assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d;
assign fpu_srcfmt = use_hold ? fpu_srcfmt_q : fpu_srcfmt_d;
assign fpu_dstfmt = use_hold ? fpu_dstfmt_q : fpu_dstfmt_d;
assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d;
assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d;
assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d;
assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d;
// Consolidate operands
logic [2:0][CVA6Cfg.FLen-1:0] fpu_operands;
assign fpu_operands[0] = operand_a;
assign fpu_operands[1] = operand_b;
assign fpu_operands[2] = operand_c;
//---------------
// FPU instance
//---------------
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation(FPU_IMPLEMENTATION),
.TagType (logic [CVA6Cfg.TRANS_ID_BITS-1:0])
) i_fpnew_bulk (
.clk_i,
.rst_ni,
.operands_i (fpu_operands),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rm)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_srcfmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dstfmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_ifmt)),
.vectorial_op_i(fpu_vec_op),
.tag_i (fpu_tag),
.simd_mask_i (1'b1),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i,
.result_o,
.status_o (fpu_status),
.tag_o (fpu_trans_id_o),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
.busy_o ( /* unused */)
);
// Pack status flag into exception cause, tval ignored in wb, exception is always invalid
assign fpu_exception_o.cause = {59'h0, fpu_status};
assign fpu_exception_o.valid = 1'b0;
assign fpu_exception_o.tval = '0;
// Donwstream write port is dedicated to FPU and always ready
assign fpu_out_ready = 1'b1;
// Downstream valid from unit
assign fpu_valid_o = fpu_out_valid;
end
endmodule