diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 506cc7c50..ee09a4260 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -123,11 +123,11 @@ `define LOGRK ($clog2(`RK)) // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) // one iteration is required for the integer bit for minimally redundent radix-4 -`define FPDUR ((`DIVN+2+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4)) +`define FPDUR ((`DIVN+1+(`LOGR*`DIVCOPIES))/(`LOGR*`DIVCOPIES)+(`RADIX/4)) `define DURLEN ($clog2(`FPDUR+1)) `define QLEN (`FPDUR*`LOGR*`DIVCOPIES) `define DIVb (`QLEN-1) -`define DIVa (`DIVb+4-`XLEN) +`define DIVa (`DIVb+1-`XLEN) `define DIVBLEN ($clog2(`DIVb+1)-1) diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv index 65ea6cc54..14e7cfa99 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv @@ -64,12 +64,13 @@ module fdivsqrt( logic Firstun; logic WZero; logic SpecialCaseM; - logic [`DIVBLEN:0] n, p, m; - logic OTFCSwap; + logic [`DIVBLEN:0] n, m; + logic OTFCSwap, ALTB, BZero, As; fdivsqrtpreproc fdivsqrtpreproc( .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), - .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .n, .p, .m, .OTFCSwap, + .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, + .n, .m, .OTFCSwap, .ALTB, .BZero, .As, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E); fdivsqrtfsm fdivsqrtfsm( .clk, .reset, .FmtE, .XsE, .SqrtE, @@ -84,6 +85,6 @@ module fdivsqrt( fdivsqrtpostproc fdivsqrtpostproc( .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .SqrtM, .SpecialCaseM, .RemOp(Funct3E[1]), - .n, .p, .m, + .MDUE, .n, .ALTB, .m, .BZero, .As, .QmM, .WZero, .DivSM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 9e9bdb10b..8f2087643 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -35,20 +35,23 @@ module fdivsqrtpostproc( input logic [`DIVN-2:0] D, // U0.N-1 input logic [`DIVb:0] FirstU, FirstUM, input logic [`DIVb+1:0] FirstC, - input logic Firstun, + input logic Firstun, input logic SqrtM, input logic SpecialCaseM, - input logic RemOp, - input logic [`DIVBLEN:0] n, p, m, + input logic RemOp, MDUE, ALTB, BZero, As, + input logic [`DIVBLEN:0] n, m, output logic [`DIVb:0] QmM, output logic WZero, output logic DivSM ); - logic [`DIVb+3:0] W; + logic [`DIVb+3:0] W, Sum; logic [`DIVb:0] PreQmM; - logic NegSticky; + logic NegSticky, PostInc; logic weq0; + logic [`DIVBLEN:0] NormShift; + logic [`DIVb:0] IntQuot, IntRem, NormQuot, NormRem; + logic [`DIVb:0] PreResult, Result; // check for early termination on an exact result. If the result is not exact, the sticky should be set aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0); @@ -70,11 +73,70 @@ module fdivsqrtpostproc( assign DivSM = ~WZero & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide // Determine if sticky bit is negative - assign W = WC+WS; + assign Sum = WC + WS; + assign W = $signed(Sum) >>> `LOGR; assign NegSticky = W[`DIVb+3]; + assign RemD = {4'b0000, D, {(`DIVb-`DIVN){1'b0}}}; + + always_comb + if (~As) + if (NegSticky) begin + assign NormQuot = FirstUM; + assign NormRem = W + RemD; + assign PostInc = 0; + end else begin + assign NormQuot = FirstU; + assign NormRem = W; + assign PostInc = 0; + end + else + if (NegSticky | weq0) begin + assign NormQuot = FirstU; + assign NormRem = W; + assign PostInc = 0; + end else begin + assign NormQuot = FirstU; + assign NormRem = W - RemD; + assign PostInc = 1; + end + +/* + always_comb + if(ALTB) begin + assign IntQuot = '0; + assign IntRem = ForwardedSrcAE; + end else if (BZero) begin + assign IntQuot = '1; + assign IntRem = ForwardedSrcAE; + end else if (EarlyTerm) begin + if (weq0) begin + assign IntQuot = FirstU; + assign IntRem = '0; + end else begin + assign IntQuot = FirstUM; + assign IntRem = '0; + end + end else begin + assign IntQuot = NormQuot; + assign IntRem = NormRem; + end + */ + + /* + always_comb + if (RemOp) begin + assign NormShift = m + (`DIVBLEN)'(`DIVa); + assign PreResult = IntRem; + end else begin + assign NormShift = DIVb - (j << `LOGR); + assign PreResult = IntQuot; + end + */ // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted + assign Result = ($signed(PreResult) >>> NormShift) + (PostInc & ~RemOp); + assign PreQmM = NegSticky ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit assign QmM = SqrtM ? (PreQmM << 1) : PreQmM; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index b3d81705c..af6a86179 100644 --- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -41,8 +41,8 @@ module fdivsqrtpreproc ( input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic [2:0] Funct3E, Funct3M, input logic MDUE, W64E, - output logic [`DIVBLEN:0] n, p, m, - output logic OTFCSwap, + output logic [`DIVBLEN:0] n, m, + output logic OTFCSwap, ALTB, BZero, As, output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, output logic [`DIVN-2:0] Dpreproc @@ -52,15 +52,15 @@ module fdivsqrtpreproc ( logic [`NF-1:0] PreprocB, PreprocY; logic [`NF+1:0] SqrtX; logic [`DIVb+3:0] DivX; - logic [`DIVBLEN:0] L; logic [`NE+1:0] Qe; // Intdiv signals logic [`DIVb-1:0] ZeroBufX, ZeroBufY; logic [`XLEN-1:0] PosA, PosB; - logic As, Bs, OTFCSwapTemp; + logic Bs, OTFCSwapTemp; logic [`XLEN-1:0] A64, B64; + logic [`DIVBLEN:0] Calcn, Calcm; logic [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX; - logic [`DIVBLEN:0] pPlusr, pPrCeil; + logic [`DIVBLEN:0] pPlusr, pPrCeil, p, L; logic [`LOGRK-1:0] pPrTrunc; logic [`DIVb+3:0] PreShiftX; @@ -76,22 +76,24 @@ module fdivsqrtpreproc ( assign PosA = As ? -A64 : A64; assign PosB = Bs ? -B64 : B64; + assign BZero = |ForwardedSrcBE; assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}}; assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}}; lzc #(`DIVb) lzcX (ZeroBufX, L); - lzc #(`DIVb) lzcY (ZeroBufY, m); + lzc #(`DIVb) lzcY (ZeroBufY, Calcm); assign PreprocX = Xm[`NF-1:0]<> `LOGRK) + {{`DIVBLEN-1{1'b0}}, |(pPrTrunc)}; - assign n = (pPrCeil << `LOGK) - 1; + assign Calcn = (pPrCeil << `LOGK) - 1; assign IntBits = (`DIVBLEN)'(`RK) + p; assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]}; @@ -115,7 +117,9 @@ module fdivsqrtpreproc ( // DIVRESLEN/(r*`DIVCOPIES) flopen #(`NE+2) expflop(clk, DivStartE, Qe, QeM); flopen #(1) swapflop(clk, DivStartE, OTFCSwapTemp, OTFCSwap); - expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m, .Qe); + flopen #(`DIVBLEN+1) nflop(clk, DivStartE, Calcn, n); + flopen #(`DIVBLEN+1) mflop(clk, DivStartE, Calcm, m); + expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m(Calcm), .Qe); endmodule diff --git a/pipelined/src/mmu/hptw.sv b/pipelined/src/mmu/hptw.sv index eede21e78..bcbefae5a 100644 --- a/pipelined/src/mmu/hptw.sv +++ b/pipelined/src/mmu/hptw.sv @@ -223,7 +223,6 @@ module hptw ( if (`XLEN == 32) begin assign InitialWalkerState = L1_ADR; assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0 - // *** Possible bug - should be L1_ADR? assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned); end else begin logic GigapageMisaligned, TerapageMisaligned;