FpNew RTL fix

2025-04-23 21:39:10 -04:00 · 2024-06-14 16:29:52 -07:00 · 2024-06-14 16:29:52 -07:00 · 4a11c1ec0f
commit 4a11c1ec0f
parent c5e57ce5d5
3 changed files with 43 additions and 85 deletions
--- a/README.md
+++ b/README.md
@ -88,4 +88,9 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
 - Making changes to Makefiles in your source tree or adding new folders will require executing the "configure" script again to get it propagated into your build folder.
    ```sh
    $ ../configure
-    ```
+    ```
+- To debug the GPU, you can generate a "run.log" trace. see /docs/debugging.md for more information.
+    ```sh
+    $ ./ci/blackbox.sh --app=demo --debug=3
+    ```
+- For additional information, check out the /docs.
--- a/ci/regression.sh.in
+++ b/ci/regression.sh.in
@ -78,7 +78,6 @@ isa()

    if [ "$XLEN" == "64" ]
    then
-
        make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
        make -C tests/riscv/isa run-rtlsim-64d

--- a/hw/rtl/fpu/VX_fpu_fpnew.sv
+++ b/hw/rtl/fpu/VX_fpu_fpnew.sv
@ -1,10 +1,10 @@
 // Copyright © 2019-2023
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 // http://www.apache.org/licenses/LICENSE-2.0
-// 
+//
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,12 +15,12 @@

 `ifdef FPU_FPNEW

-module VX_fpu_fpnew 
-    import VX_fpu_pkg::*; 
-    import fpnew_pkg::*; 
-    import cf_math_pkg::*; 
+module VX_fpu_fpnew
+    import VX_fpu_pkg::*;
+    import fpnew_pkg::*;
+    import cf_math_pkg::*;
    import defs_div_sqrt_mvp::*;
-#(      
+#(
    parameter NUM_LANES = 1,
    parameter TAG_WIDTH = 1,
    parameter OUT_BUF   = 0
@ -34,7 +34,7 @@ module VX_fpu_fpnew
    input wire [NUM_LANES-1:0] mask_in,

    input wire [TAG_WIDTH-1:0] tag_in,
-    
+
    input wire [`INST_FPU_BITS-1:0] op_type,
    input wire [`INST_FMT_BITS-1:0] fmt,
    input wire [`INST_FRM_BITS-1:0] frm,
@ -42,7 +42,7 @@ module VX_fpu_fpnew
    input wire [NUM_LANES-1:0][`XLEN-1:0]  dataa,
    input wire [NUM_LANES-1:0][`XLEN-1:0]  datab,
    input wire [NUM_LANES-1:0][`XLEN-1:0]  datac,
-    output wire [NUM_LANES-1:0][`XLEN-1:0] result, 
+    output wire [NUM_LANES-1:0][`XLEN-1:0] result,

    output wire has_fflags,
    output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -51,32 +51,27 @@ module VX_fpu_fpnew

    input wire  ready_out,
    output wire valid_out
-);  
+);
    localparam LATENCY_FDIVSQRT = `MAX(`LATENCY_FDIV, `LATENCY_FSQRT);
    localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH;

-`ifdef XLEN_64
-    // use scalar configuration for mixed formats
    localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
        Width:         unsigned'(`XLEN),
        EnableVectors: 1'b0,
+    `ifdef XLEN_64
        EnableNanBox:  1'b1,
    `ifdef FLEN_64
        FpFmtMask:     5'b11000,
    `else
-        FpFmtMask:     5'b11000, // TODO: added FP64 to fix CVT bug in FpNew
+        FpFmtMask:     5'b11000, // TODO: adding FP64 to fix CVT bug in FpNew
    `endif
        IntFmtMask:    4'b0011
-    };
-`else
-    localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
-        Width:         unsigned'(`XLEN * NUM_LANES),
-        EnableVectors: 1'b1,
+    `else
        EnableNanBox:  1'b0,
        FpFmtMask:     5'b10000,
        IntFmtMask:    4'b0010
+    `endif
    };
-`endif

    localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
      PipeRegs:'{'{`LATENCY_FMA, 0, 0, 0, 0}, // ADDMUL
@ -89,12 +84,12 @@ module VX_fpu_fpnew
                  '{default: fpnew_pkg::MERGED}}, // CONV
      PipeConfig: fpnew_pkg::DISTRIBUTED
    };
-    
-    wire fpu_ready_in, fpu_valid_in;    
+
+    wire fpu_ready_in, fpu_valid_in;
    wire fpu_ready_out, fpu_valid_out;

    reg [TAG_WIDTH-1:0] fpu_tag_in, fpu_tag_out;
-    
+
    reg [2:0][NUM_LANES-1:0][`XLEN-1:0] fpu_operands;

    wire [NUM_LANES-1:0][`XLEN-1:0] fpu_result;
@ -111,12 +106,12 @@ module VX_fpu_fpnew

    always @(*) begin
        fpu_op          = 'x;
-        fpu_rnd         = frm;  
-        fpu_op_mod      = 0;        
+        fpu_rnd         = frm;
+        fpu_op_mod      = 0;
        fpu_has_fflags  = 1;
        fpu_operands[0] = dataa;
        fpu_operands[1] = datab;
-        fpu_operands[2] = datac;    
+        fpu_operands[2] = datac;
        fpu_dst_fmt     = fpnew_pkg::FP32;
        fpu_int_fmt     = fpnew_pkg::INT32;

@ -133,24 +128,24 @@ module VX_fpu_fpnew
    `endif

        fpu_src_fmt = fpu_dst_fmt;
-        
+
        case (op_type)
            `INST_FPU_ADD: begin
                fpu_op = fpnew_pkg::ADD;
                fpu_operands[1] = dataa;
                fpu_operands[2] = datab;
            end
-            `INST_FPU_SUB: begin 
-                fpu_op = fpnew_pkg::ADD; 
+            `INST_FPU_SUB: begin
+                fpu_op = fpnew_pkg::ADD;
                fpu_operands[1] = dataa;
                fpu_operands[2] = datab;
-                fpu_op_mod = 1; 
+                fpu_op_mod = 1;
            end
            `INST_FPU_MUL:   begin fpu_op = fpnew_pkg::MUL; end
            `INST_FPU_DIV:   begin fpu_op = fpnew_pkg::DIV; end
            `INST_FPU_SQRT:  begin fpu_op = fpnew_pkg::SQRT; end
            `INST_FPU_MADD:  begin fpu_op = fpnew_pkg::FMADD; end
-            `INST_FPU_MSUB:  begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end            
+            `INST_FPU_MSUB:  begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
            `INST_FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
            `INST_FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
        `ifdef FLEN_64
@ -164,30 +159,18 @@ module VX_fpu_fpnew
            `INST_FPU_MISC:begin
                case (frm)
                    0,1,2: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = {1'b0, frm[1:0]}; fpu_has_fflags = 0; end // FSGNJ
-                    3:     begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end // CLASS                     
+                    3:     begin fpu_op = fpnew_pkg::CLASSIFY; fpu_has_fflags = 0; end // CLASS
                    4,5:   begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = 3'b011; fpu_op_mod = ~frm[0]; fpu_has_fflags = 0; end // FMV.X.W, FMV.W.X
                    6,7:   begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = {2'b00, frm[0]}; end // MIN, MAX
-                endcase    
+                endcase
            end
            default:;
        endcase
-
-    `ifdef FPU_RV64F
-        // apply nan-boxing to floating-point operands
-        for (integer i = 0; i < NUM_LANES; ++i) begin                    
-            if (op_type != `INST_FPU_I2F && op_type != `INST_FPU_U2F) begin
-                fpu_operands[0][i] |= 64'hffffffff00000000;
-            end
-            fpu_operands[1][i] |= 64'hffffffff00000000;
-            fpu_operands[2][i] |= 64'hffffffff00000000;        
-        end
-    `endif
    end

-`ifdef XLEN_64
    `UNUSED_VAR (mask_in)
    for (genvar i = 0; i < NUM_LANES; ++i) begin
-        wire [(TAG_WIDTH+1)-1:0] fpu_tag;        
+        wire [(TAG_WIDTH+1)-1:0] fpu_tag;
        wire fpu_valid_out_uq;
        wire fpu_ready_in_uq;
        fpnew_pkg::status_t fpu_status_uq;
@ -196,10 +179,12 @@ module VX_fpu_fpnew
        `UNUSED_VAR (fpu_ready_in_uq)
        `UNUSED_VAR (fpu_status_uq)

-        fpnew_top #( 
+        fpnew_top #(
            .Features       (FPU_FEATURES),
            .Implementation (FPU_IMPLEMENTATION),
-            .TagType        (logic[(TAG_WIDTH+1)-1:0])
+            .TagType        (logic[(TAG_WIDTH+1)-1:0]),
+            .TrueSIMDClass  (1),
+            .EnableSIMDMask (1)
        ) fpnew_core (
            .clk_i          (clk),
            .rst_ni         (~reset),
@ -210,9 +195,9 @@ module VX_fpu_fpnew
            .src_fmt_i      (fpu_src_fmt),
            .dst_fmt_i      (fpu_dst_fmt),
            .int_fmt_i      (fpu_int_fmt),
-            `UNUSED_PIN (vectorial_op_i),
-            `UNUSED_PIN (simd_mask_i),
-            .tag_i          ({fpu_tag_in, fpu_has_fflags}),            
+            .vectorial_op_i (1'b0),
+            .simd_mask_i    (mask_in[i]),
+            .tag_i          ({fpu_tag_in, fpu_has_fflags}),
            .in_valid_i     (fpu_valid_in),
            .in_ready_o     (fpu_ready_in_uq),
            .flush_i        (reset),
@ -223,45 +208,14 @@ module VX_fpu_fpnew
            .out_ready_i    (fpu_ready_out),
            `UNUSED_PIN (busy_o)
        );
-        
+
        if (i == 0) begin
-            assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag;            
+            assign {fpu_tag_out, fpu_has_fflags_out} = fpu_tag;
            assign fpu_valid_out = fpu_valid_out_uq;
            assign fpu_ready_in = fpu_ready_in_uq;
            assign fpu_status = fpu_status_uq;
        end
    end
-`else
-    fpnew_top #( 
-        .Features       (FPU_FEATURES),
-        .Implementation (FPU_IMPLEMENTATION),
-        .TagType        (logic[(TAG_WIDTH+1)-1:0]),
-        .TrueSIMDClass  (1),
-        .EnableSIMDMask (1)
-    ) fpnew_core (
-        .clk_i          (clk),
-        .rst_ni         (~reset),
-        .operands_i     (fpu_operands),
-        .rnd_mode_i     (fpnew_pkg::roundmode_e'(fpu_rnd)),
-        .op_i           (fpu_op),
-        .op_mod_i       (fpu_op_mod),
-        .src_fmt_i      (fpu_src_fmt),
-        .dst_fmt_i      (fpu_dst_fmt),
-        .int_fmt_i      (fpu_int_fmt),
-        .vectorial_op_i (1'b1),
-        .simd_mask_i    (mask_in),
-        .tag_i          ({fpu_tag_in, fpu_has_fflags}),        
-        .in_valid_i     (fpu_valid_in),
-        .in_ready_o     (fpu_ready_in),
-        .flush_i        (reset),
-        .result_o       (fpu_result),
-        .status_o       (fpu_status),
-        .tag_o          ({fpu_tag_out, fpu_has_fflags_out}),
-        .out_valid_o    (fpu_valid_out),
-        .out_ready_i    (fpu_ready_out),
-        `UNUSED_PIN (busy_o)
-    );
-`endif

    assign fpu_valid_in = valid_in;
    assign ready_in = fpu_ready_in;