From 3c1bea11049ab3b539c9bc83a3189175119ba15d Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 12 Jul 2022 18:32:17 -0700 Subject: [PATCH] removed warnings and took a mux out of the critical path --- pipelined/config/rv64fp/wally-config.vh | 2 +- pipelined/config/shared/wally-shared.vh | 13 +++++++------ pipelined/regression/wave-fpu.do | 3 --- pipelined/src/fpu/divsqrt.sv | 6 +++--- pipelined/src/fpu/fmashiftcalc.sv | 3 ++- pipelined/src/fpu/postprocess.sv | 2 +- pipelined/src/fpu/srt-radix4.sv | 26 +++++++++++++------------ pipelined/src/fpu/srtfsm.sv | 6 +++--- pipelined/src/fpu/srtpreproc.sv | 3 +-- pipelined/src/generic/lzc.sv | 2 +- pipelined/testbench/testbench-fp.sv | 8 ++++---- synthDC/scripts/synth.tcl | 2 +- 12 files changed, 38 insertions(+), 38 deletions(-) diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index cc8d1b2b8..8f13b2e36 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,7 +32,7 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 32 +`define XLEN 64 // IEEE 754 compliance `define IEEE754 0 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index c064783c2..54fa7a9bd 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -101,14 +101,15 @@ `define CORRSHIFTSZ ((`DIVLEN+`NF+3) > (3*`NF+8) ? (`DIVLEN+`NF+3) : (3*`NF+6)) // division constants -`define RADIX 4 -`define DIVCOPIES 4 +`define RADIX 32'h4 +`define DIVCOPIES 32'h4 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF)) `define DIVRESLEN ((`NF>`XLEN) ? `DIVLEN+2 : `DIVLEN) -`define LOGR ((`RADIX==2) ? 1 : 2) -`define FPDUR $ceil($itor(`DIVRESLEN)/$itor(`LOGR*`DIVCOPIES)) -`define DURLEN ($clog2($rtoi(`FPDUR)+1)) -`define QLEN ($rtoi(`FPDUR)*`LOGR*`DIVCOPIES) +`define LOGR ((`RADIX==2) ? 32'h1 : 32'h2) +// FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) +`define FPDUR ((`DIVRESLEN+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)) +`define DURLEN ($clog2(`FPDUR+1)) +`define QLEN (`FPDUR*`LOGR*`DIVCOPIES) `define USE_SRAM 0 diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 58f782bd6..9a3d7e061 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -24,9 +24,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/qsel4/* add wave -group {Divide} -group inter0 -noupdate /testbenchfp/srtradix4/genblk1[0]/divinteration/otfc4/* -add wave -group {Divide} -group inter1 -noupdate /testbenchfp/srtradix4/genblk1[1]/divinteration/* -add wave -group {Divide} -group inter2 -noupdate /testbenchfp/srtradix4/genblk1[2]/divinteration/* -add wave -group {Divide} -group inter3 -noupdate /testbenchfp/srtradix4/genblk1[3]/divinteration/* add wave -group {Divide} -noupdate /testbenchfp/srtpreproc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/srtfsm/* diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index c4f09aea5..8420baa13 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -52,7 +52,7 @@ module divsqrt( // output logic [`XLEN-1:0] RemM, ); - logic [`DIVLEN+3:0] WSN, WCN; + logic [`DIVLEN+3:0] NextWSN, NextWCN; logic [`DIVLEN+3:0] WS, WC; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DIVLEN-1:0] X; @@ -61,8 +61,8 @@ module divsqrt( srtpreproc srtpreproc(.XManE, .Dur, .YManE,.X,.Dpreproc, .XZeroCnt, .YZeroCnt); - srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, + srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, .XInfE, .YInfE, .DivNegStickyE(DivNegStickyM), .EarlyTermShiftE(EarlyTermShiftM)); - srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, + srtradix4 srtradix4(.clk, .FmtE, .X,.Dpreproc, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .XExpE, .YExpE, .XZeroE, .YZeroE, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index d4898e806..3c286b50f 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -53,7 +53,8 @@ module fmashiftcalc( assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent - assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + -{{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNCnt} - 1 + (`NE+2)'(`NF+4); + // ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4 + assign NormSumExp = FmaKillProd ? {2'b0, Ze[`NE-1:1], Ze[0]&~ZDenorm} : FmaPe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 18452abd0..30945532a 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -29,7 +29,7 @@ `include "wally-config.vh" -module postprocess( +module postprocess ( // general signals input logic Xs, Ys, // input signs input logic [`NE-1:0] Ze, // input exponents diff --git a/pipelined/src/fpu/srt-radix4.sv b/pipelined/src/fpu/srt-radix4.sv index 1c7b96489..5a7e96e2a 100644 --- a/pipelined/src/fpu/srt-radix4.sv +++ b/pipelined/src/fpu/srt-radix4.sv @@ -41,7 +41,7 @@ module srtradix4( input logic [`DIVLEN-1:0] Dpreproc, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, output logic [`QLEN-1:0] Quot, - output logic [`DIVLEN+3:0] WSN, WCN, + output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] FirstWS, FirstWC, output logic [`NE+1:0] DivCalcExpM, output logic [`XLEN-1:0] Rem @@ -58,11 +58,12 @@ module srtradix4( logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; /* verilator lint_on UNOPTFLAT */ + logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2; logic [`NE+1:0] DivCalcExp; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; - logic [`QLEN-1:0] QMux, QMMux; + logic [`QLEN-1:0] QMMux; // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -72,9 +73,11 @@ module srtradix4( // - otherwise load WSA into the flipflop // - the assumed one is added to D since it's always normalized (and X/0 is a special case handeled by result selection) // - XZeroE is used as the assumed one to avoid creating a sticky bit - all other numbers are normalized - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {3'b000, ~XZeroE, X}, DivStart, WSN); + assign NextWSN = {WSA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; + mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN); flop #(`DIVLEN+4) wsflop(clk, WSN, WS[0]); - mux2 #(`DIVLEN+4) wcmux({WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); + mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); flop #(`DIVLEN+4) wcflop(clk, WCN, WC[0]); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); @@ -88,10 +91,10 @@ module srtradix4( genvar i; generate - for(i=0; i<`DIVCOPIES; i++) begin + for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin divinteration divinteration(.clk, .DivStart, .DivBusy, .D, .DBar, .D2, .DBar2, .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); - if(i<3) begin + if(i<(`DIVCOPIES-1)) begin assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; assign Q[i+1] = QNext[i]; @@ -101,9 +104,8 @@ module srtradix4( endgenerate // if starting a new divison set Q to 0 and QM to -1 - mux2 #(`QLEN) Qmux(QNext[`DIVCOPIES-1], {`QLEN{1'b0}}, DivStart, QMux); mux2 #(`QLEN) QMmux(QMNext[`DIVCOPIES-1], {`QLEN{1'b1}}, DivStart, QMMux); - flopen #(`QLEN) Qreg(clk, DivBusy|DivStart, QMux, Q[0]); // *** have to connect Quot directly to M stage + flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); flop #(`QLEN) QMreg(clk, QMMux, QM[0]); assign Quot = Q[0]; @@ -181,7 +183,7 @@ module qsel4 ( logic [3:0] QSel4[1023:0]; - initial begin + always_comb begin integer d, w, i, w2; for(d=0; d<8; d++) for(w=0; w<128; w++)begin @@ -270,9 +272,9 @@ module otfc4 ( // else if q = -2 Q = {QM, 10} QM = {QM, 01} // *** how does the 0 concatination numbers work? + assign QR = Q[`QLEN-3:0]; + assign QMR = QM[`QLEN-3:0]; // Shifted Q and QM always_comb begin - QR = Q[`QLEN-3:0]; - QMR = QM[`QLEN-3:0]; // Shift Q and QM if (q[3]) begin // +2 QNext = {QR, 2'b10}; QMNext = {QR, 2'b01}; @@ -352,5 +354,5 @@ module expcalc( endcase end // correct exponent for denormalized input's normalization shifts - assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$clog2(`NF+2){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$clog2(`NF+2){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; + assign DivCalcExp = ({2'b0, XExpE} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, YExpE} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index fc73cf710..21e35c365 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -33,7 +33,7 @@ module srtfsm( input logic clk, input logic reset, - input logic [`DIVLEN+3:0] WSN, WCN, WS, WC, + input logic [`DIVLEN+3:0] NextWSN, NextWCN, WS, WC, input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, @@ -58,8 +58,8 @@ module srtfsm( //flopen #($clog2(`DIVLEN/2+3)) durflop(clk, DivStart, CalcDur, Dur); assign DivBusy = (state == BUSY); - assign WZero = ((WSN^WCN)=={WSN[`DIVLEN+2:0]|WCN[`DIVLEN+2:0], 1'b0}); - assign DivStickyE = ~WZero; + assign WZero = ((NextWSN^NextWCN)=={NextWSN[`DIVLEN+2:0]|NextWCN[`DIVLEN+2:0], 1'b0}); + assign DivStickyE = |W; assign DivDone = (state == DONE); assign W = WC+WS; assign DivNegStickyE = W[`DIVLEN+3]; //*** is there a better way to do this??? diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index fa76c0511..7386332f8 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -63,8 +63,7 @@ module srtpreproc ( assign X = PreprocX; assign Dpreproc = PreprocY; - - assign Dur = (`DURLEN)'($rtoi(`FPDUR)); + assign Dur = (`DURLEN)'(`FPDUR); // assign intExp = zeroCntB - zeroCntA + 1; // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv index 9f6e59811..71aabbc61 100644 --- a/pipelined/src/generic/lzc.sv +++ b/pipelined/src/generic/lzc.sv @@ -34,7 +34,7 @@ module lzc #(parameter WIDTH = 1) ( /* verilator lint_off CMPCONST */ /* verilator lint_off WIDTH */ - int i; + logic [31:0] i; always_comb begin i = 0; while (~num[WIDTH-1-i] & (i < WIDTH)) i = i+1; // search for leading one diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index ba14499e0..2aec1ab15 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -87,8 +87,8 @@ module testbenchfp; logic reset = 1'b0; logic [`DIVLEN-1:0] DivX; logic [`DIVLEN-1:0] Dpreproc; - logic [`DIVLEN+3:0] WSN, WS; - logic [`DIVLEN+3:0] WCN, WC; + logic [`DIVLEN+3:0] NextWSN, WS; + logic [`DIVLEN+3:0] NextWCN, WC; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; logic [`DURLEN-1:0] Dur; @@ -696,9 +696,9 @@ module testbenchfp; .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); srtpreproc srtpreproc(.XManE(XMan), .Dur, .YManE(YMan),.X(DivX),.Dpreproc, .XZeroCnt, .YZeroCnt); - srtfsm srtfsm(.reset, .WSN, .WCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), + srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .DivDone, .clk, .DivStart, .StallM(1'b0), .StallE(1'b0), .XZeroE(XZero), .YZeroE(YZero), .DivStickyE(DivSticky), .XNaNE(XNaN), .YNaNE(YNaN), .XInfE(XInf), .YInfE(YInf), .DivNegStickyE(DivNegSticky), .EarlyTermShiftE(EarlyTermShift)); - srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .WSN, .WCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), + srtradix4 srtradix4(.clk, .FmtE(ModFmt), .X(DivX),.Dpreproc, .DivBusy, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart, .XExpE(XExp), .YExpE(YExp), .XZeroE(XZero), .YZeroE(YZero), .Quot, .Rem(), .DivCalcExpM(DivCalcExp)); assign CmpFlg[3:0] = 0; diff --git a/synthDC/scripts/synth.tcl b/synthDC/scripts/synth.tcl index 9f2b46478..251522dc8 100755 --- a/synthDC/scripts/synth.tcl +++ b/synthDC/scripts/synth.tcl @@ -347,7 +347,7 @@ redirect -append $filename { report_timing -capacitance -transition_time -nets - redirect -append $filename { echo "\n\n\n//// Critical paths through fma2 ////\n\n\n" } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {postprocess/*} -nworst 1 } redirect -append $filename { echo "\n\n\n//// Critical paths through fpdiv ////\n\n\n" } -redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fdivsqrt/*} -nworst 1 } +redirect -append $filename { report_timing -capacitance -transition_time -nets -through {divsqrt/*} -nworst 1 } redirect -append $filename { echo "\n\n\n//// Critical paths through fcvt ////\n\n\n" } redirect -append $filename { report_timing -capacitance -transition_time -nets -through {fcvt/*} -nworst 1 }