fpu compare simplification, minor cleanup

2025-04-20 03:47:20 -04:00 · 2022-03-29 17:11:28 +00:00 · 2022-03-29 17:11:28 +00:00 · 049c55769a
commit 049c55769a
parent ad106e7130
10 changed files with 76 additions and 48 deletions
--- a/addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
+++ b/addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
--- a/addins/embench-iot
+++ b/addins/embench-iot
@ -1 +1 @@
-Subproject commit 261a65e0a2d3e8d62d81b1d8fe7e309a096bc6a9
+Subproject commit 2d2aaa7b85c60219c591555b647dfa1785ffe1b3
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@ -1 +1 @@
-Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
+Subproject commit effd553a6a91ed9b0ba251796a8a44505a45174f
--- a/addins/riscv-dv
+++ b/addins/riscv-dv
@ -1 +1 @@
-Subproject commit a7e27bc046405f0dbcde091be99f5a5d564e2172
+Subproject commit cb4295f9ce5da2881d7746015a6105adb8f09071
--- a/addins/riscv-tests
+++ b/addins/riscv-tests
@ -1 +1 @@
-Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7
+Subproject commit 3e2bf06b071a77ae62c09bf07c5229d1f9397d94
--- a/pipelined/src/fpu/fcmp.sv
+++ b/pipelined/src/fpu/fcmp.sv
@ -23,40 +23,32 @@ module fcmp (
   output logic [`FLEN-1:0]   CmpResE         // compare resilt
   );

-   logic LT, EQ; // is X < or > or = Y
-
-   // X is less than Y:
-   //    Signs:
-   //       X      Y    answer
-   //      pos    pos    idk - keep checking
-   //      pos    neg    no
-   //      neg    pos    yes
-   //      neg    neg    idk - keep checking
-   //    Exponent 
-   //       - if XExp < YExp
-   //             - if negitive - no
-   //             - if positive - yes
-   //       - otherwise keep checking
-   //    Mantissa
-   //       - XMan < YMan then
-   //             - if negitive - no
-   //             - if positive - yes
-   // note: LT does -0 < 0
-   //*** compare Exp and Man together
-   assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
+   logic LTabs, LT, EQ; // is X < or > or = Y
+   logic BothZeroE, EitherNaNE, EitherSNaNE;
+   
+   assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
+   assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
+   //assign LT = $signed({XSgnE, XExpE, XManE[`NF-1:0]}) < $signed({YSgnE, YExpE, YManE[`NF-1:0]});
+   //assign LT = XInt < YInt;
+//   assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
   assign EQ = (FSrcXE == FSrcYE);

+   assign BothZeroE = XZeroE&YZeroE;
+   assign EitherNaNE = XNaNE|YNaNE;
+   assign EitherSNaNE = XSNaNE|YSNaNE;
+
+
   // flags
   //    Min/Max - if an input is a signaling NaN set invalid flag
   //    LT/LE - signaling - sets invalid if NaN input
   //    EQ - quiet - sets invalid if signaling NaN input
   always_comb begin
      case (FOpCtrlE[2:0])
-         3'b111: CmpNVE = XSNaNE|YSNaNE;//min 
-         3'b101: CmpNVE = XSNaNE|YSNaNE;//max
-         3'b010: CmpNVE = XSNaNE|YSNaNE;//equal
-         3'b001: CmpNVE = XNaNE|YNaNE;//less than
-         3'b011: CmpNVE = XNaNE|YNaNE;//less than or equal
+         3'b111: CmpNVE = EitherSNaNE;//min 
+         3'b101: CmpNVE = EitherSNaNE;//max
+         3'b010: CmpNVE = EitherSNaNE;//equal
+         3'b001: CmpNVE = EitherNaNE;//less than
+         3'b011: CmpNVE = EitherNaNE;//less than or equal
         default: CmpNVE = 1'b0;
      endcase
   end 
@ -71,24 +63,22 @@ module fcmp (
   //    - inf = inf and -inf = -inf
   //    - return 0 if comparison with NaN (unordered)

-   logic [`FLEN-1:0] QNaNX, QNaNY;
-    if(`IEEE754) begin
-        assign QNaNX = FmtE ? {XSgnE, XExpE, 1'b1, XManE[`NF-2:0]} : {{32{1'b1}}, XSgnE, XExpE[7:0], 1'b1, XManE[50:29]};
-        assign QNaNY = FmtE ? {YSgnE, YExpE, 1'b1, YManE[`NF-2:0]} : {{32{1'b1}}, YSgnE, YExpE[7:0], 1'b1, YManE[50:29]};
-    end else begin
-        assign QNaNX = FmtE ? {1'b0, XExpE, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, XExpE[7:0], 1'b1, 22'b0};
-        assign QNaNY = FmtE ? {1'b0, YExpE, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, YExpE[7:0], 1'b1, 22'b0};
-    end
+   logic [`FLEN-1:0] QNaN;
+   // fmin/fmax of two NaNs returns a quiet NaN of the appropriate size
+   // for IEEE, return the payload of X
+   // for RISC-V, return the canonical NaN
+   if(`IEEE754) assign QNaN = FmtE ? {XSgnE, XExpE, 1'b1, XManE[`NF-2:0]} : {{32{1'b1}}, XSgnE, XExpE[7:0], 1'b1, XManE[50:29]};
+   else         assign QNaN = FmtE ? {1'b0, XExpE, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, XExpE[7:0], 1'b1, 22'b0};
 
   always_comb begin
      case (FOpCtrlE[2:0])
-         3'b111: CmpResE = XNaNE ? YNaNE ? QNaNX : FSrcYE // Min
+         3'b111: CmpResE = XNaNE ? YNaNE ? QNaN : FSrcYE // Min
                                 : YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
-         3'b101: CmpResE = XNaNE ? YNaNE ? QNaNX : FSrcYE // Max
+         3'b101: CmpResE = XNaNE ? YNaNE ? QNaN : FSrcYE // Max
                                 : YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE;
-         3'b010: CmpResE = {63'b0, (EQ|(XZeroE&YZeroE))&~(XNaNE|YNaNE)}; // Equal
-         3'b001: CmpResE = {63'b0, LT&~(XZeroE&YZeroE)&~(XNaNE|YNaNE)}; // Less than
-         3'b011: CmpResE = {63'b0, (LT|EQ|(XZeroE&YZeroE))&~(XNaNE|YNaNE)}; // Less than or equal
+         3'b010: CmpResE = {63'b0, (EQ|BothZeroE) & ~EitherNaNE}; // Equal
+         3'b001: CmpResE = {63'b0, LT & ~BothZeroE & ~EitherNaNE}; // Less than
+         3'b011: CmpResE = {63'b0, (LT|EQ|BothZeroE) & ~EitherNaNE}; // Less than or equal
         default: CmpResE = 64'b0;
      endcase
   end 
--- a/pipelined/src/ifu/BTBPredictor.sv
+++ b/pipelined/src/ifu/BTBPredictor.sv
@ -102,6 +102,7 @@ module BTBPredictor
  // Another optimization may be using a PC relative address.
  // *** need to add forwarding.

+  // *** optimize for byte write enables
  SRAM2P1R1W #(Depth, `XLEN+5) memory(.clk(clk),
          .reset(reset),
          .RA1(LookUpPCIndex),
--- a/pipelined/src/lsu/swbytemask.sv
+++ b/pipelined/src/lsu/swbytemask.sv
@ -1,5 +1,5 @@
 ///////////////////////////////////////////
-// ram.sv
+// swbytemask.sv
 //
 // Written: David_Harris@hmc.edu 9 January 2021
 // Modified: 
--- a/pipelined/src/privileged/trap.sv
+++ b/pipelined/src/privileged/trap.sv
@ -102,7 +102,7 @@ module trap (
  if(`VECTORED_INTERRUPTS_SUPPORTED) begin:vec
      always_comb
        if (PrivilegedTrapVector[1:0] == 2'b01 & CauseM[`XLEN-1] == 1)
-          PrivilegedVectoredTrapVector = {PrivilegedTrapVector[`XLEN-1:2] + CauseM[`XLEN-5:0], 2'b00};
+          PrivilegedVectoredTrapVector = {PrivilegedTrapVector[`XLEN-1:2] + CauseM[`XLEN-3:0], 2'b00};
        else
          PrivilegedVectoredTrapVector = {PrivilegedTrapVector[`XLEN-1:2], 2'b00};
  end
--- a/synthDC/hdl/wally-shared.vh
+++ b/synthDC/hdl/wally-shared.vh
@ -50,10 +50,47 @@
 // Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries)
 `define PMPCFG_ENTRIES (`PMP_ENTRIES/8)

+
+// Floating-point half-precision
+`define ZFH_SUPPORTED 0
+
+// Floating point constants for Quad, Double, Single, and Half precisions
+`define Q_LEN 128
+`define Q_NE 15
+`define Q_NF 112
+`define Q_BIAS 16383
+`define D_LEN 64
+`define D_NE 11
+`define D_NF 52
+`define D_BIAS 1023
+`define S_LEN 32
+`define S_NE 8
+`define S_NF 23
+`define S_BIAS 127
+`define H_LEN 16
+`define H_NE 5
+`define H_NF 10
+`define H_BIAS 15
+
 // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
-`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
-`define NE   11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
-`define NF   52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
+`define FLEN (`Q_SUPPORTED ? `Q_LEN  : `D_SUPPORTED ? `D_LEN  : `F_SUPPORTED ? `S_LEN  : `H_LEN)
+`define NE   (`Q_SUPPORTED ? `Q_NE   : `D_SUPPORTED ? `D_NE   : `F_SUPPORTED ? `S_NE   : `H_NE)
+`define NF   (`Q_SUPPORTED ? `Q_NF   : `D_SUPPORTED ? `D_NF   : `F_SUPPORTED ? `S_NF   : `H_NF)
+`define FMT  (`Q_SUPPORTED ? 3       : `D_SUPPORTED ? 1       : `F_SUPPORTED ? 0       : 2)
+`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS)
+
+// Floating point constants needed for FPU paramerterization
+`define FPSIZES (`Q_SUPPORTED+`D_SUPPORTED+`F_SUPPORTED+`ZFH_SUPPORTED)
+`define LEN1  ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_LEN   : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_LEN  : `H_LEN)
+`define NE1   ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NE   : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NE  : `H_NE)
+`define NF1   ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NF  : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NF : `H_NF)
+`define FMT1  ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? 1        : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? 0       : 2)
+`define BIAS1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_BIAS  : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_BIAS : `H_BIAS)
+`define LEN2  ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_LEN   : `H_LEN)
+`define NE2   ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NE   : `H_NE)
+`define NF2   ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NF  : `H_NF)
+`define FMT2  ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 0        : 2)
+`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS  : `H_BIAS)

 // Disable spurious Verilator warnings