diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 506cc7c50..ee09a4260 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -123,11 +123,11 @@
 `define LOGRK ($clog2(`RK))
 // FPDUR = ceil(DIVRESLEN/(LOGR*DIVCOPIES)) 
 // one iteration is required for the integer bit for minimally redundent radix-4
-`define FPDUR ((`DIVN+2+(`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES)+(`RADIX/4))
+`define FPDUR ((`DIVN+1+(`LOGR*`DIVCOPIES))/(`LOGR*`DIVCOPIES)+(`RADIX/4))
 `define DURLEN ($clog2(`FPDUR+1))
 `define QLEN (`FPDUR*`LOGR*`DIVCOPIES)
 `define DIVb (`QLEN-1)
-`define DIVa (`DIVb+4-`XLEN)
+`define DIVa (`DIVb+1-`XLEN)
 `define DIVBLEN ($clog2(`DIVb+1)-1)
 
 
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
index 65ea6cc54..14e7cfa99 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -64,12 +64,13 @@ module fdivsqrt(
   logic Firstun;
   logic WZero;
   logic SpecialCaseM;
-  logic [`DIVBLEN:0] n, p, m;
-  logic OTFCSwap;
+  logic [`DIVBLEN:0] n, m;
+  logic OTFCSwap, ALTB, BZero, As;
 
   fdivsqrtpreproc fdivsqrtpreproc(
     .clk, .DivStartE, .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), 
-    .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .n, .p, .m, .OTFCSwap,
+    .Sqrt(SqrtE), .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, 
+    .n, .m, .OTFCSwap, .ALTB, .BZero, .As,
     .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .MDUE, .W64E);
   fdivsqrtfsm fdivsqrtfsm(
     .clk, .reset, .FmtE, .XsE, .SqrtE, 
@@ -84,6 +85,6 @@ module fdivsqrt(
   fdivsqrtpostproc fdivsqrtpostproc(
     .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .Firstun, 
     .SqrtM, .SpecialCaseM, .RemOp(Funct3E[1]),
-    .n, .p, .m,
+    .MDUE, .n, .ALTB, .m, .BZero, .As,
     .QmM, .WZero, .DivSM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
index 9e9bdb10b..8f2087643 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@@ -35,20 +35,23 @@ module fdivsqrtpostproc(
   input  logic [`DIVN-2:0]  D, // U0.N-1
   input  logic [`DIVb:0] FirstU, FirstUM, 
   input  logic [`DIVb+1:0] FirstC,
-  input  logic  Firstun,
+  input  logic Firstun,
   input  logic SqrtM,
   input  logic SpecialCaseM,
-  input  logic RemOp,
-  input  logic [`DIVBLEN:0] n, p, m,
+  input  logic RemOp, MDUE, ALTB, BZero, As,
+  input  logic [`DIVBLEN:0] n, m,
   output logic [`DIVb:0] QmM, 
   output logic WZero,
   output logic DivSM
 );
   
-  logic [`DIVb+3:0] W;
+  logic [`DIVb+3:0] W, Sum;
   logic [`DIVb:0] PreQmM;
-  logic NegSticky;
+  logic NegSticky, PostInc;
   logic weq0;
+  logic [`DIVBLEN:0] NormShift;
+  logic [`DIVb:0] IntQuot, IntRem, NormQuot, NormRem;
+  logic [`DIVb:0] PreResult, Result;
 
   // check for early termination on an exact result.  If the result is not exact, the sticky should be set
   aplusbeq0 #(`DIVb+4) wspluswceq0(WS, WC, weq0);
@@ -70,11 +73,70 @@ module fdivsqrtpostproc(
   assign DivSM = ~WZero & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide
 
   // Determine if sticky bit is negative
-  assign W = WC+WS;
+  assign Sum = WC + WS;
+  assign W = $signed(Sum) >>> `LOGR;
   assign NegSticky = W[`DIVb+3];
+  assign RemD = {4'b0000, D, {(`DIVb-`DIVN){1'b0}}};
+
+  always_comb 
+    if (~As)
+      if (NegSticky) begin
+        assign NormQuot = FirstUM;
+        assign NormRem  = W + RemD;
+        assign PostInc = 0;
+      end else begin
+        assign NormQuot = FirstU;
+        assign NormRem  = W;
+        assign PostInc = 0;
+      end
+    else 
+      if (NegSticky | weq0) begin
+        assign NormQuot = FirstU;
+        assign NormRem  = W;
+        assign PostInc = 0;
+      end else begin 
+        assign NormQuot = FirstU;
+        assign NormRem  = W - RemD;
+        assign PostInc = 1;
+      end
+
+/*
+  always_comb
+    if(ALTB) begin
+      assign   IntQuot = '0;
+      assign   IntRem  = ForwardedSrcAE;
+    end else if (BZero) begin
+      assign   IntQuot = '1;
+      assign   IntRem  = ForwardedSrcAE;
+    end else if (EarlyTerm) begin
+      if (weq0) begin
+        assign IntQuot = FirstU;
+        assign IntRem  = '0;
+      end else begin
+        assign IntQuot = FirstUM;
+        assign IntRem  = '0;
+      end
+    end else begin 
+      assign   IntQuot = NormQuot;
+      assign   IntRem  = NormRem;
+    end 
+  */
+  
+  /*
+  always_comb
+    if (RemOp) begin
+      assign NormShift = m + (`DIVBLEN)'(`DIVa);
+      assign PreResult = IntRem;
+    end else begin
+      assign NormShift = DIVb - (j << `LOGR);
+      assign PreResult = IntQuot;
+    end
+  */
 
    // division takes the result from the next cycle, which is shifted to the left one more time so the square root also needs to be shifted
 
+  assign Result = ($signed(PreResult) >>> NormShift) + (PostInc & ~RemOp);
+
   assign PreQmM = NegSticky ? FirstUM : FirstU; // Select U or U-1 depending on negative sticky bit
   assign QmM = SqrtM ? (PreQmM << 1) : PreQmM;
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index b3d81705c..af6a86179 100644
--- a/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/pipelined/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -41,8 +41,8 @@ module fdivsqrtpreproc (
   input  logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
 	input  logic [2:0] 	Funct3E, Funct3M,
 	input  logic MDUE, W64E,
-  output logic [`DIVBLEN:0] n, p, m,
-  output logic OTFCSwap,
+  output logic [`DIVBLEN:0] n, m,
+  output logic OTFCSwap, ALTB, BZero, As,
   output logic [`NE+1:0] QeM,
   output logic [`DIVb+3:0] X,
   output logic [`DIVN-2:0] Dpreproc
@@ -52,15 +52,15 @@ module fdivsqrtpreproc (
   logic  [`NF-1:0] PreprocB, PreprocY;
   logic  [`NF+1:0] SqrtX;
   logic  [`DIVb+3:0] DivX;
-  logic  [`DIVBLEN:0] L;
   logic  [`NE+1:0] Qe;
   // Intdiv signals
   logic  [`DIVb-1:0] ZeroBufX, ZeroBufY;
   logic  [`XLEN-1:0] PosA, PosB;
-  logic  As, Bs, OTFCSwapTemp;
+  logic  Bs, OTFCSwapTemp;
   logic  [`XLEN-1:0] A64, B64;
+  logic  [`DIVBLEN:0] Calcn, Calcm;
   logic  [`DIVBLEN:0] ZeroDiff, IntBits, RightShiftX;
-  logic  [`DIVBLEN:0] pPlusr, pPrCeil;
+  logic  [`DIVBLEN:0] pPlusr, pPrCeil, p, L;
   logic  [`LOGRK-1:0] pPrTrunc;
   logic  [`DIVb+3:0] PreShiftX;
 
@@ -76,22 +76,24 @@ module fdivsqrtpreproc (
   
   assign PosA = As ? -A64 : A64;
   assign PosB = Bs ? -B64 : B64;
+  assign BZero = |ForwardedSrcBE;
 
   assign ZeroBufX = MDUE ? {PosA, {`DIVb-`XLEN{1'b0}}} : {Xm, {`DIVb-`NF-1{1'b0}}};
   assign ZeroBufY = MDUE ? {PosB, {`DIVb-`XLEN{1'b0}}} : {Ym, {`DIVb-`NF-1{1'b0}}};
   lzc #(`DIVb) lzcX (ZeroBufX, L);
-  lzc #(`DIVb) lzcY (ZeroBufY, m);
+  lzc #(`DIVb) lzcY (ZeroBufY, Calcm);
 
   assign PreprocX = Xm[`NF-1:0]<<L;
-  assign PreprocY = Ym[`NF-1:0]<<m;
+  assign PreprocY = Ym[`NF-1:0]<<Calcm;
 
-  assign ZeroDiff = m - L;
-  assign p = ZeroDiff[`DIVBLEN] ? '0 : ZeroDiff;
+  assign ZeroDiff = Calcm - L;
+  assign ALTB = ZeroDiff[`DIVBLEN]; // A less than B
+  assign p = ALTB ? '0 : ZeroDiff;
 
   assign pPlusr = (`DIVBLEN)'(`LOGR) + p;
   assign pPrTrunc = pPlusr[`LOGRK-1:0];
   assign pPrCeil = (pPlusr >> `LOGRK) + {{`DIVBLEN-1{1'b0}}, |(pPrTrunc)};
-  assign n = (pPrCeil << `LOGK) - 1;
+  assign Calcn = (pPrCeil << `LOGK) - 1;
   assign IntBits = (`DIVBLEN)'(`RK) + p;
   assign RightShiftX = (`DIVBLEN)'(`RK) - {{(`DIVBLEN-`RK){1'b0}}, IntBits[`RK-1:0]};
 
@@ -115,7 +117,9 @@ module fdivsqrtpreproc (
   // DIVRESLEN/(r*`DIVCOPIES)
   flopen #(`NE+2) expflop(clk, DivStartE, Qe, QeM);
   flopen #(1) swapflop(clk, DivStartE, OTFCSwapTemp, OTFCSwap);
-  expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m, .Qe);
+  flopen #(`DIVBLEN+1) nflop(clk, DivStartE, Calcn, n);
+  flopen #(`DIVBLEN+1) mflop(clk, DivStartE, Calcm, m);
+  expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .L, .m(Calcm), .Qe);
 
 endmodule
 
diff --git a/pipelined/src/mmu/hptw.sv b/pipelined/src/mmu/hptw.sv
index eede21e78..bcbefae5a 100644
--- a/pipelined/src/mmu/hptw.sv
+++ b/pipelined/src/mmu/hptw.sv
@@ -223,7 +223,6 @@ module hptw (
 	if (`XLEN == 32) begin
 		assign InitialWalkerState = L1_ADR;
 		assign MegapageMisaligned = |(CurrentPPN[9:0]); // must have zero PPN0
-			// *** Possible bug - should be L1_ADR?
 		assign Misaligned = ((WalkerState == L0_ADR) & MegapageMisaligned);
 	end else begin
 		logic  GigapageMisaligned, TerapageMisaligned;