From 3c1bea11049ab3b539c9bc83a3189175119ba15d Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Tue, 12 Jul 2022 18:32:17 -0700
Subject: [PATCH] removed warnings and took a mux out of the critical path

---
 pipelined/config/rv64fp/wally-config.vh |  2 +-
 pipelined/config/shared/wally-shared.vh | 13 +++++++------
 pipelined/regression/wave-fpu.do        |  3 ---
 pipelined/src/fpu/divsqrt.sv            |  6 +++---
 pipelined/src/fpu/fmashiftcalc.sv       |  3 ++-
 pipelined/src/fpu/postprocess.sv        |  2 +-
 pipelined/src/fpu/srt-radix4.sv         | 26 +++++++++++++------------
 pipelined/src/fpu/srtfsm.sv             |  6 +++---
 pipelined/src/fpu/srtpreproc.sv         |  3 +--
 pipelined/src/generic/lzc.sv            |  2 +-
 pipelined/testbench/testbench-fp.sv     |  8 ++++----
 synthDC/scripts/synth.tcl               |  2 +-
 12 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh
index cc8d1b2b8..8f13b2e36 100644
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@@ -32,7 +32,7 @@
 `define DESIGN_COMPILER 0
 
 // RV32 or RV64: XLEN = 32 or 64
-`define XLEN 32
+`define XLEN 64
 
 // IEEE 754 compliance
 `define IEEE754 0
diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index c064783c2..54fa7a9bd 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -101,14 +101,15 @@
 `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6))
 
 // division constants
-`define RADIX 4
-`define DIVCOPIES 4
+`define RADIX 32'h4
+`define DIVCOPIES 32'h4
 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF))
 `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN)
-`define LOGR ((`RADIX==2) ? 1 : 2)
-`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES))
-`define DURLEN ($clog2($rtoi(`FPDUR)+1))
-`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES)
+`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2)
+// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES))
+`define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES))
+`define DURLEN ($clog2(`FPDUR+1))
+`define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
 
 
 `define USE_SRAM 0
diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 58f782bd6..9a3d7e061 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -24,9 +24,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/*
 add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/*
 add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/*
 add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/*
-add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/*
-add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/*
-add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/*
 add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/*
 add wave -group {Divide} -noupdate /testbenchfp/srtfsm/*
diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index c4f09aea5..8420baa13 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -52,7 +52,7 @@ module divsqrt(
 //   output logic [`XLEN-1:0] RemM,
 );
 
-  logic [`DIVLEN+3:0]  WSN, WCN;
+  logic [`DIVLEN+3:0]  NextWSN, NextWCN;
   logic [`DIVLEN+3:0]  WS, WC;
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
   logic [`DIVLEN-1:0] X;
@@ -61,8 +61,8 @@ module divsqrt(
 
   srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt);
 
-  srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
+  srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
                 .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM));
-  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
+  srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE,
                 .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index d4898e806..3c286b50f 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -53,7 +53,8 @@ module fmashiftcalc(
     assign FmaSZero = ~(|FmaSm);
 
     // calculate the sum's exponent
-    assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4);
+    //                                                                      ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4
+    assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
 
     //convert the sum's exponent into the proper percision
     if (`FPSIZES == 1) begin
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 18452abd0..30945532a 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -29,7 +29,7 @@
 
 `include "wally-config.vh"
 
-module postprocess(
+module postprocess (
     // general signals
     input logic                             Xs, Ys,  // input signs
     input logic  [`NE-1:0]                  Ze, // input exponents
diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv
index 1c7b96489..5a7e96e2a 100644
--- a/pipelined/src/fpu/srt-radix4.sv
+++ b/pipelined/src/fpu/srt-radix4.sv
@@ -41,7 +41,7 @@ module srtradix4(
   input logic [`DIVLEN-1:0] Dpreproc,
   input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
   output logic [`QLEN-1:0] Quot,
-  output logic [`DIVLEN+3:0]  WSN, WCN,
+  output logic [`DIVLEN+3:0]  NextWSN, NextWCN,
   output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
   output logic  [`NE+1:0] DivCalcExpM,
   output logic [`XLEN-1:0] Rem
@@ -58,11 +58,12 @@ module srtradix4(
   logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
   logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
  /* verilator lint_on UNOPTFLAT */
+  logic [`DIVLEN+3:0]  WSN, WCN;
   logic [`DIVLEN+3:0]  D, DBar, D2, DBar2;
   logic [`NE+1:0] DivCalcExp;
   logic [$clog2(`XLEN+1)-1:0] intExp;
   logic           intSign;
-  logic [`QLEN-1:0] QMux, QMMux;
+  logic [`QLEN-1:0] QMMux;
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
@@ -72,9 +73,11 @@ module srtradix4(
   //  - otherwise load WSA into the flipflop
   //  - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection)
   //  - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized
-  mux2   #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN);
+  assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
+  assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
+  mux2   #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN);
   flop   #(`DIVLEN+4) wsflop(clk, WSN, WS[0]);
-  mux2   #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN);
+  mux2   #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
   flop   #(`DIVLEN+4) wcflop(clk, WCN, WC[0]);
   flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
   flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
@@ -88,10 +91,10 @@ module srtradix4(
 
   genvar i;
   generate
-    for(i=0; i<`DIVCOPIES; i++) begin
+    for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin
       divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, 
       .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
-      if(i<3) begin 
+      if(i<(`DIVCOPIES-1)) begin 
         assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
         assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
         assign Q[i+1] = QNext[i];
@@ -101,9 +104,8 @@ module srtradix4(
   endgenerate
 
   // if starting a new divison set Q to 0 and QM to -1
-  mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux);
   mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux);
-  flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage
+  flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
   flop #(`QLEN) QMreg(clk, QMMux, QM[0]);
 
   assign Quot = Q[0];
@@ -181,7 +183,7 @@ module qsel4 (
 
 	logic [3:0] QSel4[1023:0];
 
-  initial begin 
+  always_comb begin 
     integer d, w, i, w2;
     for(d=0; d<8; d++)
       for(w=0; w<128; w++)begin
@@ -270,9 +272,9 @@ module otfc4 (
 		// else if 	q = -2	Q = {QM, 10} 	QM = {QM, 01}
     // *** how does the 0 concatination numbers work?
 
+  assign QR  = Q[`QLEN-3:0];
+  assign QMR = QM[`QLEN-3:0];     // Shifted Q and QM
   always_comb begin
-    QR  = Q[`QLEN-3:0];
-    QMR = QM[`QLEN-3:0];     // Shift Q and QM
     if (q[3]) begin // +2
       QNext  = {QR,  2'b10};
       QMNext = {QR,  2'b01};
@@ -352,5 +354,5 @@ module expcalc(
             endcase
     end
     // correct exponent for denormalized input's normalization shifts
-    assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
+    assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
     endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv
index fc73cf710..21e35c365 100644
--- a/pipelined/src/fpu/srtfsm.sv
+++ b/pipelined/src/fpu/srtfsm.sv
@@ -33,7 +33,7 @@
 module srtfsm(
   input  logic clk, 
   input  logic reset, 
-  input logic [`DIVLEN+3:0] WSN, WCN, WS, WC,
+  input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC,
   input  logic XInfE, YInfE, 
   input  logic XZeroE, YZeroE, 
   input  logic XNaNE, YNaNE, 
@@ -58,8 +58,8 @@ module srtfsm(
 
   //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur);
   assign DivBusy = (state == BUSY);
-  assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0});
-  assign DivStickyE = ~WZero;
+  assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0});
+  assign DivStickyE = |W;
   assign DivDone = (state == DONE);
   assign W = WC+WS;
   assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this???
diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv
index fa76c0511..7386332f8 100644
--- a/pipelined/src/fpu/srtpreproc.sv
+++ b/pipelined/src/fpu/srtpreproc.sv
@@ -63,8 +63,7 @@ module srtpreproc (
   
   assign X = PreprocX;
   assign Dpreproc = PreprocY;
-  
-  assign Dur = (`DURLEN)'($rtoi(`FPDUR));
+  assign Dur = (`DURLEN)'(`FPDUR);
   // assign intExp = zeroCntB - zeroCntA + 1;
   // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 
diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv
index 9f6e59811..71aabbc61 100644
--- a/pipelined/src/generic/lzc.sv
+++ b/pipelined/src/generic/lzc.sv
@@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) (
 /* verilator lint_off CMPCONST */
 /* verilator lint_off WIDTH */
     
-    int i;
+    logic [31:0] i;
     always_comb begin
         i = 0;
         while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1;  // search for leading one
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index ba14499e0..2aec1ab15 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -87,8 +87,8 @@ module testbenchfp;
   logic reset = 1'b0;
   logic [`DIVLEN-1:0]    DivX;
   logic [`DIVLEN-1:0]  Dpreproc;
-  logic [`DIVLEN+3:0]  WSN, WS;
-  logic [`DIVLEN+3:0]  WCN, WC;
+  logic [`DIVLEN+3:0]  NextWSN, WS;
+  logic [`DIVLEN+3:0]  NextWCN, WC;
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
   logic [`DURLEN-1:0] Dur;
 
@@ -696,9 +696,9 @@ module testbenchfp;
               .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
               .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
   srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt);
-  srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
+  srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN),
                 .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift));
-  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
+  srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero),
                 .Quot, .Rem(), .DivCalcExpM(DivCalcExp));
 
   assign CmpFlg[3:0] = 0;
diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl
index 9f2b46478..251522dc8 100755
--- a/synthDC/scripts/synth.tcl
+++ b/synthDC/scripts/synth.tcl
@@ -347,7 +347,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets -
 redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" }
 redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" }
-redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 }
+redirect -append $filename { report_timing -capacitance -transition_time -nets -through {divsqrt/*} -nworst 1 }
 redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" }
 redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 }