FpNew RTL fix

This commit is contained in:
Blaise Tine 2024-06-14 16:29:52 -07:00
parent c5e57ce5d5
commit 4a11c1ec0f
3 changed files with 43 additions and 85 deletions

View file

@ -88,4 +88,9 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
- Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder.
```sh
$ ../configure
```
```
- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information.
```sh
$ ./ci/blackbox.sh --app=demo --debug=3
```
- For additional information, check out the /docs.

View file

@ -78,7 +78,6 @@ isa()
if [ "$XLEN" == "64" ]
then
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
make -C tests/riscv/isa run-rtlsim-64d

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,12 +15,12 @@
`ifdef FPU_FPNEW
module VX_fpu_fpnew
import VX_fpu_pkg::*;
import fpnew_pkg::*;
import cf_math_pkg::*;
module VX_fpu_fpnew
import VX_fpu_pkg::*;
import fpnew_pkg::*;
import cf_math_pkg::*;
import defs_div_sqrt_mvp::*;
#(
#(
parameter NUM_LANES = 1,
parameter TAG_WIDTH = 1,
parameter OUT_BUF = 0
@ -34,7 +34,7 @@ module VX_fpu_fpnew
input wire [NUM_LANES-1:0] mask_in,
input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_FMT_BITS-1:0] fmt,
input wire [`INST_FRM_BITS-1:0] frm,
@ -42,7 +42,7 @@ module VX_fpu_fpnew
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -51,32 +51,27 @@ module VX_fpu_fpnew
input wire ready_out,
output wire valid_out
);
);
localparam LATENCY_FDIVSQRT = `MAX(`LATENCY_FDIV, `LATENCY_FSQRT);
localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH;
`ifdef XLEN_64
// use scalar configuration for mixed formats
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: unsigned'(`XLEN),
EnableVectors: 1'b0,
`ifdef XLEN_64
EnableNanBox: 1'b1,
`ifdef FLEN_64
FpFmtMask: 5'b11000,
`else
FpFmtMask: 5'b11000, // TODO: added FP64 to fix CVT bug in FpNew
FpFmtMask: 5'b11000, // TODO: adding FP64 to fix CVT bug in FpNew
`endif
IntFmtMask: 4'b0011
};
`else
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: unsigned'(`XLEN * NUM_LANES),
EnableVectors: 1'b1,
`else
EnableNanBox: 1'b0,
FpFmtMask: 5'b10000,
IntFmtMask: 4'b0010
`endif
};
`endif
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs:'{'{`LATENCY_FMA, 0, 0, 0, 0}, // ADDMUL
@ -89,12 +84,12 @@ module VX_fpu_fpnew
'{default: fpnew_pkg::MERGED}}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
};
wire fpu_ready_in, fpu_valid_in;
wire fpu_ready_in, fpu_valid_in;
wire fpu_ready_out, fpu_valid_out;
reg [TAG_WIDTH-1:0] fpu_tag_in, fpu_tag_out;
reg [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands;
wire [NUM_LANES-1:0][`XLEN-1:0] fpu_result;
@ -111,12 +106,12 @@ module VX_fpu_fpnew
always @(*) begin
fpu_op = 'x;
fpu_rnd = frm;
fpu_op_mod = 0;
fpu_rnd = frm;
fpu_op_mod = 0;
fpu_has_fflags = 1;
fpu_operands[0] = dataa;
fpu_operands[1] = datab;
fpu_operands[2] = datac;
fpu_operands[2] = datac;
fpu_dst_fmt = fpnew_pkg::FP32;
fpu_int_fmt = fpnew_pkg::INT32;
@ -133,24 +128,24 @@ module VX_fpu_fpnew
`endif
fpu_src_fmt = fpu_dst_fmt;
case (op_type)
`INST_FPU_ADD: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
end
`INST_FPU_SUB: begin
fpu_op = fpnew_pkg::ADD;
`INST_FPU_SUB: begin
fpu_op = fpnew_pkg::ADD;
fpu_operands[1] = dataa;
fpu_operands[2] = datab;
fpu_op_mod = 1;
fpu_op_mod = 1;
end
`INST_FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
`INST_FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
`INST_FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
`INST_FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
`INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`INST_FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`ifdef FLEN_64
@ -164,30 +159,18 @@ module VX_fpu_fpnew
`INST_FPU_MISC:begin
case (frm)
0,1,2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = {1'b0, frm[1:0]}; fpu_has_fflags = 0; end // FSGNJ
3: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end // CLASS
3: begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end // CLASS
4,5: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = 3'b011; fpu_op_mod = ~frm[0]; fpu_has_fflags = 0; end // FMV.X.W, FMV.W.X
6,7: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = {2'b00, frm[0]}; end // MIN, MAX
endcase
endcase
end
default:;
endcase
`ifdef FPU_RV64F
// apply nan-boxing to floating-point operands
for (integer i = 0; i < NUM_LANES; ++i) begin
if (op_type != `INST_FPU_I2F && op_type != `INST_FPU_U2F) begin
fpu_operands[0][i] |= 64'hffffffff00000000;
end
fpu_operands[1][i] |= 64'hffffffff00000000;
fpu_operands[2][i] |= 64'hffffffff00000000;
end
`endif
end
`ifdef XLEN_64
`UNUSED_VAR (mask_in)
for (genvar i = 0; i < NUM_LANES; ++i) begin
wire [(TAG_WIDTH+1)-1:0] fpu_tag;
wire [(TAG_WIDTH+1)-1:0] fpu_tag;
wire fpu_valid_out_uq;
wire fpu_ready_in_uq;
fpnew_pkg::status_t fpu_status_uq;
@ -196,10 +179,12 @@ module VX_fpu_fpnew
`UNUSED_VAR (fpu_ready_in_uq)
`UNUSED_VAR (fpu_status_uq)
fpnew_top #(
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic[(TAG_WIDTH+1)-1:0])
.TagType (logic[(TAG_WIDTH+1)-1:0]),
.TrueSIMDClass (1),
.EnableSIMDMask (1)
) fpnew_core (
.clk_i (clk),
.rst_ni (~reset),
@ -210,9 +195,9 @@ module VX_fpu_fpnew
.src_fmt_i (fpu_src_fmt),
.dst_fmt_i (fpu_dst_fmt),
.int_fmt_i (fpu_int_fmt),
`UNUSED_PIN (vectorial_op_i),
`UNUSED_PIN (simd_mask_i),
.tag_i ({fpu_tag_in, fpu_has_fflags}),
.vectorial_op_i (1'b0),
.simd_mask_i (mask_in[i]),
.tag_i ({fpu_tag_in, fpu_has_fflags}),
.in_valid_i (fpu_valid_in),
.in_ready_o (fpu_ready_in_uq),
.flush_i (reset),
@ -223,45 +208,14 @@ module VX_fpu_fpnew
.out_ready_i (fpu_ready_out),
`UNUSED_PIN (busy_o)
);
if (i == 0) begin
assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag;
assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag;
assign fpu_valid_out = fpu_valid_out_uq;
assign fpu_ready_in = fpu_ready_in_uq;
assign fpu_status = fpu_status_uq;
end
end
`else
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic[(TAG_WIDTH+1)-1:0]),
.TrueSIMDClass (1),
.EnableSIMDMask (1)
) fpnew_core (
.clk_i (clk),
.rst_ni (~reset),
.operands_i (fpu_operands),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpu_op),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpu_src_fmt),
.dst_fmt_i (fpu_dst_fmt),
.int_fmt_i (fpu_int_fmt),
.vectorial_op_i (1'b1),
.simd_mask_i (mask_in),
.tag_i ({fpu_tag_in, fpu_has_fflags}),
.in_valid_i (fpu_valid_in),
.in_ready_o (fpu_ready_in),
.flush_i (reset),
.result_o (fpu_result),
.status_o (fpu_status),
.tag_o ({fpu_tag_out, fpu_has_fflags_out}),
.out_valid_o (fpu_valid_out),
.out_ready_i (fpu_ready_out),
`UNUSED_PIN (busy_o)
);
`endif
assign fpu_valid_in = valid_in;
assign ready_in = fpu_ready_in;