diff --git a/docs/testplans/testplan.md b/docs/testplans/testplan.md
index a25b3a189..37390a632 100644
--- a/docs/testplans/testplan.md
+++ b/docs/testplans/testplan.md
@@ -1,6 +1,29 @@
-# CORE-V Wally Test Plan
+# CORE-V Wally Design Verification Test Plan
+
+CORE-V Wally is functionally tested in the following ways.  Each test is run in lock-step against ImperasDV to ensure all architectural state is correct after each instruction.
+
+| Functions      | Coverage Method | Status |
+| ----------- | ----------- |----|
+|  Instructions | riscv-arch-test | Pass   |
+| Privileged Unit   | wally-riscv-arch-test        | Pass   |
+| Virtual Memory | wally-riscv-arch-test | Pass |
+| PMP | wally-riscv-arch-test | Pass
+| Peripherals | wally-riscv-arch-test | Pass |
+| Floating-Point | TestFloat | Pass |
+| General | Code Coverage | 91% |
+| General | Boot Linux in Sim | Pass | 
+| General | Boot Linux on FPGA | Pass |
+
+
+The following performance validation is also run:
+| Function | Method | Status |
+| --- | --- | --- |
+| Overall Performance | embench | Pass|
+| Overall Performance | coremark | Pass |
+| Branch Predictor | *** | Pass |
+| Cache Miss Rate | *** | Pass |
+
 
-CORE-V Wally is tested in the following ways:
 
 * Run [RISC-V Architecture Compatibility Tests](https://github.com/riscv-non-isa/riscv-arch-test) in lock-step against the ImperasDV reference model.
 * Run custom tests to cover virtual memory, PMP, privileged unit, and peripherals in lock step against ImperasDV.
diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do
index 4f90333a9..241c9b064 100644
--- a/sim/coverage-exclusions-rv64gc.do
+++ b/sim/coverage-exclusions-rv64gc.do
@@ -31,11 +31,14 @@
 do GetLineNum.do
 
 # LZA (i<64) statement confuses coverage tool 
-# This is ugly to exlcude the whole file - is there a better option?  // coverage off isn't working
+# DH 4/22/23: Exclude all LZAs
 coverage exclude -srcfile lzc.sv 
 
-# FDIVSQRT has 
+# DH 4/22/23: FDIVSQRT can't go directly from done to busy again
 coverage exclude -scope /dut/core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state DONE->BUSY
+# DH 4/22/23: The busy->idle transition only occurs if a FlushE occurs while the divider is busy.  The flush is caused by a trap or return,
+# which won't happen while the divider is busy. 
+coverage exclude -scope /dut/core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state BUSY->IDLE
 
 ### Exclude D$ states and logic for the I$ instance
 # This is cleaner than trying to set an I$-specific pragma in cachefsm.sv (which would exclude it for the D$ instance too)
@@ -74,7 +77,7 @@ for {set i 0} {$i < $numcacheways} {incr i} {
     coverage exclude -scope /dut/core/ifu/bus/icache/icache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: icache SetDirtyWay"] -item e 1
     coverage exclude -scope /dut/core/ifu/bus/icache/icache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: icache SelectedWiteWordEn"] -item e 1 -fecexprrow 4 6
     # below: flushD can't go high during an icache write b/c of pipeline stall
-    coverage exclude -scope /dut/core/ifu/bus/icache/icache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: icache SetValidEN"] -item e 1 -fecexprrow 4
+    coverage exclude -scope /dut/core/ifu/bus/icache/icache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: cache SetValidEN"] -item e 1 -fecexprrow 4
 }
 
 ## D$ Exclusions.
@@ -85,7 +88,11 @@ coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [Get
 coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: cache AnyMiss"] -item e 1 -fecexprrow 4
 set numcacheways 4
 for {set i 0} {$i < $numcacheways} {incr i} {
-    coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: dcache invalidateway"] -item be 1 -fecexprrow 4
+    coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: dcache invalidateway"] -item bes 1 -fecexprrow 4
+
+    # FlushStage=1 will never happen when SetValidWay=1 since a pipeline stall is asserted by the cache in the fetch stage, which happens before
+    # going into the WRITE_LINE state (and asserting SetValidWay). No TrapM can fire and since StallW is high, a stallM caused by WFIStallM would not cause a flushW.
+    coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: cache SetValidEN"] -item e 1 -fecexprrow 4
 }
 # D$ writeback, flush, write_line, or flush_writeback states can't be cancelled by a flush
 coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -ftrans CurrState STATE_WRITEBACK->STATE_READY STATE_FLUSH->STATE_READY STATE_WRITE_LINE->STATE_READY STATE_FLUSH_WRITEBACK->STATE_READY
@@ -124,4 +131,8 @@ coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$lin
 set line [GetLineNum ../src/mmu/pmachecker.sv "WriteAccessM \\| ExecuteAccessF"]
 coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-5
 set line [GetLineNum ../src/mmu/pmachecker.sv "ReadAccessM \\| ExecuteAccessF"]
-coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-3
\ No newline at end of file
+coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-3
+
+# Excluding reset and clear for impossible case in the wficountreg in privdec
+set line [GetLineNum ../src/generic/flop/floprc.sv "reset \\| clear"]
+coverage exclude -scope /dut/core/priv/priv/pmd/wfi/wficountreg -linerange $line-$line -item c 1 -feccondrow 2
diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv
index 34f1778f5..544e3454e 100644
--- a/src/cache/cachefsm.sv
+++ b/src/cache/cachefsm.sv
@@ -159,7 +159,7 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) (
   assign SelFlush = (CurrState == STATE_READY & FlushCache) |
           (CurrState == STATE_FLUSH) | 
           (CurrState == STATE_FLUSH_WRITEBACK);
-  // coverage off -item e -fecexprrow 1
+  // coverage off -item e 1 -fecexprrow 1
   // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck)
   assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) |
              (CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty);
diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv
index 368c7b587..f504f40ad 100644
--- a/src/cache/cacheway.sv
+++ b/src/cache/cacheway.sv
@@ -82,6 +82,8 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
     mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag);
 
     // FlushWay is part of a one hot way selection. Must clear it if FlushWay not selected.
+    // coverage off -item e 1 -fecexprrow 3
+    // nonzero ways will never see SelFlush=0 while FlushWay=1 since FlushWay only advances on a subset of SelFlush assertion cases.
     assign FlushWayEn = FlushWay & SelFlush;
     assign SelNonHit = FlushWayEn | SetValid | SelWriteback;
   end
@@ -100,7 +102,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
   assign SetDirtyWay = SetDirty & SelData;                                 // exclusion-tag: icache SetDirtyWay
   assign ClearDirtyWay = ClearDirty & SelData;
   assign SelectedWriteWordEn = (SetValidWay | SetDirtyWay) & ~FlushStage;  // exclusion-tag: icache SelectedWiteWordEn
-  assign SetValidEN = SetValidWay & ~FlushStage;                           // exclusion-tag: icache SetValidEN
+  assign SetValidEN = SetValidWay & ~FlushStage;                           // exclusion-tag: cache SetValidEN
 
   // If writing the whole line set all write enables to 1, else only set the correct word.
   assign FinalByteMask = SetValidWay ? '1 : LineByteMask; // OR
diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv
index b9584bc9e..206cefbb4 100755
--- a/src/fpu/fctrl.sv
+++ b/src/fpu/fctrl.sv
@@ -138,10 +138,10 @@ module fctrl (
                                   endcase
                       7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000)          
                                                 ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass
-                                  else if (Funct3D == 3'b000 & Rs2D == 5'b00000) 
-                                                ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w / fmv.x.d to int register
-                      7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000) 
-                                                ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w.x / fmv.d.x   to fp reg
+                                  else if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) 
+                                                ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q  fp to int register
+                      7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) 
+                                                ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x  int to fp reg
                       7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00)
                                                 ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h)
                       7'b0100001: if (Rs2D[4:2] == 3'b000  & SupportedFmt2 & Rs2D[1:0] != 2'b01)
diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv
index f4d465012..f7a443639 100644
--- a/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -62,7 +62,7 @@ module fdivsqrt(
   logic [`DIVb+1:0]           FirstC;                       // Step tracker
   logic                       Firstun;                      // Quotient selection
   logic                       WZeroE;                       // Early termination flag
-  logic [`DURLEN-1:0]         cycles;                       // FSM cycles
+  logic [`DURLEN-1:0]         CyclesE;                      // FSM cycles
   logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
   logic                       DivStartE;                    // Enable signal for flops during stall
                                                             
@@ -76,7 +76,7 @@ module fdivsqrt(
 
   fdivsqrtpreproc fdivsqrtpreproc(                          // Preprocessor
     .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
-    .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles,
+    .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
     // Int-specific 
     .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
     .BZeroM, .nM, .mM, .AM, 
@@ -85,7 +85,7 @@ module fdivsqrt(
   fdivsqrtfsm fdivsqrtfsm(                                  // FSM
     .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, 
     .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, 
-    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles,
+    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
     // Int-specific 
     .IDivStartE, .ISpecialCaseE, .IntDivE);
 
diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv
index f1ad32cd8..2e17cc25b 100644
--- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv
@@ -1,10 +1,10 @@
 ///////////////////////////////////////////
-// fdivsqrt.sv
+// fdivsqrtcycles.sv
 //
 // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu
 // Modified: 18 April 2022
 //
-// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// Purpose: Determine number of cycles for divsqrt
 // 
 // Documentation: RISC-V System on Chip Design Chapter 13
 //
@@ -33,7 +33,7 @@ module fdivsqrtcycles(
   input  logic                SqrtE,
   input  logic                IntDivE,
   input  logic [`DIVBLEN:0]   nE,
-  output logic [`DURLEN-1:0]  cycles
+  output logic [`DURLEN-1:0]  CyclesE
 );
   logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
   // DIVN = `NF+3
@@ -68,8 +68,8 @@ module fdivsqrtcycles(
   always_comb begin 
     if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
     else       fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
-    if (`IDIV_ON_FPU) cycles =  IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
-    else              cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
+    if (`IDIV_ON_FPU) CyclesE =  IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
+    else              CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
   end 
   /* verilator lint_on WIDTH */
 
diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
index 5332087ad..ba0758ee6 100644
--- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@@ -39,7 +39,7 @@ module fdivsqrtfsm(
   input  logic               StallM, FlushE,
   input  logic               IntDivE,
   input  logic               ISpecialCaseE,
-  input  logic [`DURLEN-1:0] cycles,
+  input  logic [`DURLEN-1:0] CyclesE,
   output logic               IFDivStartE,
   output logic               FDivBusyE, FDivDoneE,
   output logic               SpecialCaseM
@@ -66,8 +66,7 @@ module fdivsqrtfsm(
       if (reset | FlushE) begin
           state <= #1 IDLE; 
       end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE
-//       end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE
-          step <= cycles; 
+          step <= CyclesE; 
           if (SpecialCaseE) state <= #1 DONE;
           else              state <= #1 BUSY;
       end else if (state == BUSY) begin 
diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index 43a5e42b2..3de4b252e 100644
--- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -43,44 +43,49 @@ module fdivsqrtpreproc (
   input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
   input  logic                IntDivE, W64E,
   output logic                ISpecialCaseE,
-  output logic [`DURLEN-1:0]  cycles,
+  output logic [`DURLEN-1:0]  CyclesE,
   output logic [`DIVBLEN:0]   nM, mM,
   output logic                NegQuotM, ALTBM, IntDivM, W64M,
   output logic                AsM, BZeroM,
   output logic [`XLEN-1:0]    AM
 );
 
-  logic [`DIVb-1:0]           XPreproc, DPreproc;
+  logic [`DIVb-1:0]           Xfract, Dfract;
   logic [`DIVb:0]             PreSqrtX;
   logic [`DIVb+3:0]           DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
   logic [`NE+1:0]             QeE;                                 // Quotient Exponent (FP only)
   logic [`DIVb-1:0]           IFX, IFD;                            // Correctly-sized inputs for iterator, selected from int or fp input
-  logic [`DIVBLEN:0]          mE, nE, ell;                             // Leading zeros of inputs
+  logic [`DIVBLEN:0]          mE, nE, ell;                         // Leading zeros of inputs
   logic                       NumerZeroE;                          // Numerator is zero (X or A)
   logic                       AZeroE, BZeroE;                      // A or B is Zero for integer division
-  logic                       signedDiv;                           // signed division
+  logic                       SignedDivE;                          // signed division
   logic                       NegQuotE;                            // Integer quotient is negative
   logic                       AsE, BsE;                            // Signs of integer inputs
   logic [`XLEN-1:0]           AE;                                  // input A after W64 adjustment
+  logic  ALTBE;
+
+  //////////////////////////////////////////////////////
+  // Integer Preprocessing
+  //////////////////////////////////////////////////////
 
   if (`IDIV_ON_FPU) begin:intpreproc // Int Supported
     logic [`XLEN-1:0] BE, PosA, PosB;
 
     // Extract inputs, signs, zero, depending on W64 mode if applicable
-    assign signedDiv = ~Funct3E[0];
+    assign SignedDivE = ~Funct3E[0];
   
     // Source handling
     if (`XLEN==64) begin // 64-bit, supports W64
-      mux2 #(64)    amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & signedDiv}}, ForwardedSrcAE[31:0]}, W64E, AE);
-      mux2 #(64)    bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & signedDiv}}, ForwardedSrcBE[31:0]}, W64E, BE);
+      mux2 #(64)    amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
+      mux2 #(64)    bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
     end else begin // 32 bits only
       assign AE = ForwardedSrcAE;
       assign BE = ForwardedSrcBE;
      end
     assign AZeroE = ~(|AE);
     assign BZeroE = ~(|BE);
-    assign AsE = AE[`XLEN-1] & signedDiv;
-    assign BsE = BE[`XLEN-1] & signedDiv; 
+    assign AsE = AE[`XLEN-1] & SignedDivE;
+    assign BsE = BE[`XLEN-1] & SignedDivE; 
     assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative
 
     // Force integer inputs to be postiive
@@ -90,35 +95,35 @@ module fdivsqrtpreproc (
     // Select integer or floating point inputs
     mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX);
     mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD);
-
-
+    mux2 #(1)    numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
   end else begin // Int not supported
     assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}};
     assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}};
+    assign NumerZeroE = XZeroE;
   end
 
+  //////////////////////////////////////////////////////
+  // Integer & FP leading zero and normalization shift
+  //////////////////////////////////////////////////////
+
   // count leading zeros for Subnorm FP and to normalize integer inputs
   lzc #(`DIVb) lzcX (IFX, ell);
   lzc #(`DIVb) lzcY (IFD, mE);
 
   // Normalization shift: shift off leading one
-  assign XPreproc = (IFX << ell) << 1;
-  assign DPreproc = (IFD << mE)  << 1; 
+  assign Xfract = (IFX << ell) << 1;
+  assign Dfract = (IFD << mE)  << 1; 
 
-  // append leading 1 (for nonzero inputs)
-  // shift square root to be in range [1/4, 1)
-  // Normalized numbers are shifted right by 1 if the exponent is odd
-  // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS.  They are shifted right if the number of leading zeros is odd.
-  mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
-  assign DivX = {3'b000, ~NumerZeroE, XPreproc};
+  // *** CT: move to fdivsqrtintpreshift
 
-   // Divisior register
-  flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D);
+  //////////////////////////////////////////////////////
+  // Integer Right Shift to digit boundary
+  //  Determine DivXShifted (X shifted to digit boundary)
+  //  and nE (number of fractional digits)
+  //////////////////////////////////////////////////////
 
-  // ***CT: factor out fdivsqrtcycles
   if (`IDIV_ON_FPU) begin:intrightshift // Int Supported
     logic [`DIVBLEN:0] ZeroDiff, p;
-    logic  ALTBE;
 
     // calculate number of fractional bits p
     assign ZeroDiff = mE - ell;         // Difference in number of leading zeros
@@ -128,31 +133,68 @@ module fdivsqrtpreproc (
     // Integer special cases (terminate immediately)
     assign ISpecialCaseE = BZeroE | ALTBE;
 
-  /* verilator lint_off WIDTH */
     // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
 
     if (`LOGRK > 0) begin // more than 1 bit per cycle
       logic [`LOGRK-1:0] IntTrunc, RightShiftX;
       logic [`DIVBLEN:0] TotalIntBits, IntSteps;
-
+      /* verilator lint_off WIDTH */
       assign TotalIntBits = `LOGR + p;                            // Total number of result bits (r integer bits plus p fractional bits)
       assign IntTrunc = TotalIntBits % `RK;                       // Truncation check for ceiling operator
       assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc;     // Number of steps for int div
       assign nE = (IntSteps * `DIVCOPIES) - 1;                    // Fractional digits
       assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK);  // Right shift amount
       assign DivXShifted = DivX >> RightShiftX;                   // shift X by up to R*K-1 to complete in nE steps
+      /* verilator lint_on WIDTH */
     end else begin // radix 2 1 copy doesn't require shifting
       assign nE = p; 
       assign DivXShifted = DivX;
     end
-  /* verilator lint_on WIDTH */
+  end else begin
+    assign ISpecialCaseE = 0;
+  end
 
-    // Selet integer or floating-point operands
-    mux2 #(1)    numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
+  // CT *** fdivsqrtfplead1
+
+  //////////////////////////////////////////////////////
+  // Floating-Point Preprocessing
+  // append leading 1 (for nonzero inputs)
+  // shift square root to be in range [1/4, 1)
+  // Normalized numbers are shifted right by 1 if the exponent is odd
+  // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS.  They are shifted right if the number of leading zeros is odd.
+  //////////////////////////////////////////////////////
+
+  assign DivX = {3'b000, ~NumerZeroE, Xfract};
+
+  // Sqrt is initialized on step one as R(X-1), so depends on Radix
+  mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
+  if (`RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
+  else              assign SqrtX = {2'b11, PreSqrtX, 1'b0};
+  mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
+  
+  //////////////////////////////////////////////////////
+  // Selet integer or floating-point operands
+  //////////////////////////////////////////////////////
+
+  if (`IDIV_ON_FPU) begin
     mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
+  end else begin
+    assign X = PreShiftX;
+  end
 
+   // Divisior register
+  flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
+ 
+  // Floating-point exponent
+  fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
+  flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
+
+  // Number of FSM cycles (to FSM)
+  fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
+
+  if (`IDIV_ON_FPU) begin:intpipelineregs
     // pipeline registers
-    flopen #(1)        mdureg(clk, IFDivStartE, IntDivE,     IntDivM);
+    flopen #(1)        mdureg(clk, IFDivStartE, IntDivE,  IntDivM);
     flopen #(1)       altbreg(clk, IFDivStartE, ALTBE,    ALTBM);
     flopen #(1)    negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
     flopen #(1)      bzeroreg(clk, IFDivStartE, BZeroE,   BZeroM);
@@ -162,21 +204,7 @@ module fdivsqrtpreproc (
     flopen #(`XLEN)   srcareg(clk, IFDivStartE, AE,       AM);
     if (`XLEN==64) 
       flopen #(1)      w64reg(clk, IFDivStartE, W64E,     W64M);
-  end else begin
-    assign NumerZeroE = XZeroE;
-    assign X = PreShiftX;
   end
 
-  // Sqrt is initialized on step one as R(X-1), so depends on Radix
-  if (`RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
-  else              assign SqrtX = {2'b11, PreSqrtX, 1'b0};
-  mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
- 
-  // Floating-point exponent
-  fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
-  flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
-
-  // Number of FSM cycles (to FSM)
-  fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles);
 endmodule
 
diff --git a/src/privileged/csrm.sv b/src/privileged/csrm.sv
index f0e5f00db..fb519be37 100644
--- a/src/privileged/csrm.sv
+++ b/src/privileged/csrm.sv
@@ -171,7 +171,8 @@ module csrm #(parameter
     IllegalCSRMAccessM = !(`S_SUPPORTED) & (CSRAdrM == MEDELEG | CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode
     if (CSRAdrM >= PMPADDR0 & CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry
       CSRMReadValM = {{(`XLEN-(`PA_BITS-2)){1'b0}}, PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0]};
-    else if (CSRAdrM >= PMPCFG0 & CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4) begin
+    else if (CSRAdrM >= PMPCFG0 & CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4 & (`XLEN==32 | CSRAdrM[0] == 0)) begin
+      // only odd-numbered PMPCFG entries exist in RV64
       if (`XLEN==64) begin
         entry = ({CSRAdrM[11:1], 1'b0} - PMPCFG0)*4; // disregard odd entries in RV64
         CSRMReadValM = {PMPCFG_ARRAY_REGW[entry+7],PMPCFG_ARRAY_REGW[entry+6],PMPCFG_ARRAY_REGW[entry+5],PMPCFG_ARRAY_REGW[entry+4],
diff --git a/src/privileged/csrsr.sv b/src/privileged/csrsr.sv
index 60968a68b..61a6f3247 100644
--- a/src/privileged/csrsr.sv
+++ b/src/privileged/csrsr.sv
@@ -122,7 +122,10 @@ module csrsr (
     logic [1:0] EndiannessPrivMode;
     always_comb begin
       if      (SelHPTW)                                  EndiannessPrivMode = `S_MODE;
+      //coverage off -item c 1 -feccondrow 1
+      // status.MPRV always gets reset upon leaving machine mode, so MPRV will never be high when out of machine mode
       else if (PrivilegeModeW == `M_MODE & STATUS_MPRV)  EndiannessPrivMode = STATUS_MPP;
+      //coverage on
       else                                               EndiannessPrivMode = PrivilegeModeW;
 
       case (EndiannessPrivMode) 
diff --git a/tests/coverage/WALLY-init-lib.h b/tests/coverage/WALLY-init-lib.h
index 6b6dd6dd9..ec179a0dd 100644
--- a/tests/coverage/WALLY-init-lib.h
+++ b/tests/coverage/WALLY-init-lib.h
@@ -63,6 +63,9 @@ trap_handler:
     bgez t0, exception  # if msb is clear, it is an exception
 
 interrupt:              # must be a timer interrupt 
+    li t0, -1           # set mtimecmp to biggest number so it doesnt interrupt again
+    li t1, 0x02004000   # MTIMECMP in CLINT
+    sd t0, 0(t1)        
     j trap_return       # clean up and return
 
 exception:
diff --git a/tests/coverage/fpu.S b/tests/coverage/fpu.S
index b2a52be06..879980899 100644
--- a/tests/coverage/fpu.S
+++ b/tests/coverage/fpu.S
@@ -28,7 +28,7 @@
 
 main:
 
-    #bseti t0, zero, 14  # turn on FPU
+    bseti t0, zero, 14  # turn on FPU
     csrs mstatus, t0
 
     #Pull denormalized FP number from memory and pass it to fclass.S for coverage
@@ -105,6 +105,25 @@ main:
     # fcvt.w.q a0, ft0
     # fcvt.q.d ft3, ft0
 
+    // fdivsqrt: test busy->idle transition caused by a FlushE while divider is busy (when interrupt arrives)
+    // This code doesn't actually trigger a busy->idle transition because the pending timer interrupt doesn't occur until the division finishes.
+    li t0, 0x3F812345 # random value slightly bigger than 1
+    li t1, 0x3F823456
+    fmv.w.x ft0, t0  # move int to fp register
+    fmv.w.x ft1, t1
+    li t0, -1           # set mtimecmp to biggest number so it doesnt interrupt again
+    li t1, 0x02004000   # MTIMECMP in CLINT
+    sd t0, 0(t1)
+    csrsi mstatus, 0b1000   # enable interrupts with mstatus.MIE
+    li t1, 0x0200bff8   # read MTIME in CLINT
+    ld t0, 0(t1)        
+    addi t0, t0, 11
+    li t1, 0x02004000   # MTIMECMP in CLINT
+    sd t0, 0(t1)        # write mtime+10 to cause interrupt soon  This is very touchy timing and is sensitive to cache line fetch latency
+    nop
+    fdiv.s ft2, ft1, ft0 # should get interrupted, triggering a flush
+    csrci mstatus, 0b1000   # disable interrupts with mstatus.MIE
+
     # Completing branch coverage in fctrl.sv
     .word 0x38007553    // Testing the all False case for 119 - funct7 under, op = 101 0011
     .word 0x40000053    // Line 145 All False Test case - illegal instruction?
@@ -145,4 +164,5 @@ TestData2:
 .word 0x7f800000 #INF
 .int 0xbf800000 #FP -1.0
 .int 0x7fa00000 #SNaN
-.int 0x3fffffff #OverFlow Test
\ No newline at end of file
+.int 0x3fffffff #OverFlow Test
+DivTestData:
diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S
index 5b3e37b56..bcc8f3950 100644
--- a/tests/coverage/pmpcfg.S
+++ b/tests/coverage/pmpcfg.S
@@ -1,6 +1,6 @@
 // pmpcfg part 1
 // Kevin Wan, kewan@hmc.edu, 4/18/2023
-// Liam Chalk, lchalk@hmc.edu, 4/19/2023
+// Liam Chalk, lchalk@hmc.edu, 4/25/2023
 // locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. 
 // See the next part in pmpcfg1.S
 
@@ -19,34 +19,59 @@ main:
 
     li t0, 0x90000000
     csrw pmpaddr0, t0
-    li t0, 0x00000017
+    li t0, 0x00001700
+    csrw pmpcfg0, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr0, t0
+    li t0, 0x00001700
     csrw pmpcfg1, t0
 
     li t0, 0x90000000
     csrw pmpaddr0, t0
-    li t0, 0x00000017
+    li t0, 0x00001700
     csrw pmpcfg2, t0
 
     li t0, 0x90000000
     csrw pmpaddr0, t0
-    li t0, 0x00000017
+    li t0, 0x00001700
     csrw pmpcfg3, t0
 
     li t0, 0x90000000
     csrw pmpaddr1, t0
-    li t0, 0x00000017
+    li t0, 0x00001700
     csrw pmpcfg1, t0
 
     li t0, 0x90000000
-    csrw pmpaddr1, t0
-    li t0, 0x00000017
+    csrw pmpaddr2, t0
+    li t0, 0x00001700
     csrw pmpcfg2, t0
 
     li t0, 0x90000000
-    csrw pmpaddr1, t0
-    li t0, 0x00000017
+    csrw pmpaddr3, t0
+    li t0, 0x00001700
     csrw pmpcfg3, t0
 
+    li t0, 0x90000000
+    csrw pmpaddr0, t0
+    li t0, 0x00170000
+    csrw pmpcfg0, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr2, t0
+    li t0, 0x00170000
+    csrw pmpcfg2, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr0, t0
+    li t0, 0x17000000
+    csrw pmpcfg0, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr2, t0
+    li t0, 0x17000000
+    csrw pmpcfg2, t0
+
     li t0, 0x8800000000000000
     csrw pmpcfg2, t0
     li t0, 0x88000000000000