From 5e45fef8384fd810c75ed1cdb57830e14d46e0cf Mon Sep 17 00:00:00 2001
From: Miles Cook <mdcook@g.hmc.edu>
Date: Mon, 17 Apr 2023 18:35:03 -0700
Subject: [PATCH 01/35] Increase of TLB coverage in IFU

---
 tests/coverage/ifuCamlineWrite.S | 146 +++++++++++++++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 tests/coverage/ifuCamlineWrite.S

diff --git a/tests/coverage/ifuCamlineWrite.S b/tests/coverage/ifuCamlineWrite.S
new file mode 100644
index 000000000..4c11bf183
--- /dev/null
+++ b/tests/coverage/ifuCamlineWrite.S
@@ -0,0 +1,146 @@
+///////////////////////////////////////////
+// ifuCamlineWrite.S
+//
+// Written: Miles Cook <mdcook@g.hmc.edu> and Kevin Box <kbox@g.hmc.edu> 4/17
+// 
+// Acknowledgements: The pagetable and outline for this test was written by Manuel Mendoza 
+//                   and Noah Limpert. 
+//
+// Purpose: Test coverage for TLBCamlines in IFU 
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+// load code to initalize stack, handle interrupts, terminate
+
+#include "WALLY-init-lib.h"
+
+# run-elf.bash find this in project description
+main:
+    # Page table root address at 0x80010000
+    li t5, 0x9000000000080010
+    csrw satp, t5
+
+    # switch to supervisor mode
+    li a0, 1   
+    ecall
+
+    li t0, 0x80015000 # base addr 
+
+    li t2, 0       # i = 0
+    li t3, 33     # Max amount of Loops = 32
+
+loop: bge t2, t3, finished   # exit loop if i >= loops
+    li t4, 0x1000
+    li t1, 0x00008067 # load in jalr
+    sw t1, 0 (t0)  
+    fence.I
+    jalr t0   
+    add t0, t0, t4
+    addi t2, t2, 1
+    j loop
+
+finished:
+    j done
+
+.data
+
+.align 16
+# Page table situated at 0x80010000
+pagetable: 
+    .8byte 0x200044C1 // old page table was 200040 which just pointed to itself! wrong
+
+.align 12
+    .8byte 0x0000000000000000
+    .8byte 0x00000000200048C1
+    .8byte 0x00000000200048C1
+    
+
+.align 12
+    .8byte 0x0000000020004CC1
+    //.8byte 0x00000200800CF// ADD IN THE MEGAPAGE should 3 nibbles of zeros be removed?
+
+.align 12
+    #80000000
+    .8byte 0x200000CF
+    .8byte 0x200004CF
+    .8byte 0x200008CF
+    .8byte 0x20000CCF
+
+    .8byte 0x200010CF
+    .8byte 0x200014CF
+    .8byte 0x200018CF
+    .8byte 0x20001CCF
+
+    .8byte 0x200020CF
+    .8byte 0x200024CF
+    .8byte 0x200028CF
+    .8byte 0x20002CCF
+
+    .8byte 0x200030CF
+    .8byte 0x200034CF
+    .8byte 0x200038CF
+    .8byte 0x20003CCF
+
+    .8byte 0x200040CF
+    .8byte 0x200044CF
+    .8byte 0x200048CF
+    .8byte 0x20004CCF
+
+    .8byte 0x200050CF
+    .8byte 0x200054CF
+    .8byte 0x200058CF
+    .8byte 0x20005CCF
+
+    .8byte 0x200060CF
+    .8byte 0x200064CF
+    .8byte 0x200068CF
+    .8byte 0x20006CCF
+
+    .8byte 0x200070CF
+    .8byte 0x200074CF
+    .8byte 0x200078CF
+    .8byte 0x20007CCF
+    
+    .8byte 0x200080CF
+    .8byte 0x200084CF
+    .8byte 0x200088CF
+    .8byte 0x20008CCF
+
+    .8byte 0x200090CF
+    .8byte 0x200094CF
+    .8byte 0x200098CF
+    .8byte 0x20009CCF
+
+    .8byte 0x200100CF
+    .8byte 0x200104CF
+    .8byte 0x200108CF
+    .8byte 0x20010CCF
+
+    .8byte 0x200110CF
+    .8byte 0x200114CF
+    .8byte 0x200118CF
+    .8byte 0x20011CCF
+
+    .8byte 0x200120CF
+    .8byte 0x200124CF
+    .8byte 0x200128CF
+    .8byte 0x20012CCF
+
+    .8byte 0x200130CF
+    .8byte 0x200134CF

From 914baf6bb1d030e406c0788db1028fb1bf5d332f Mon Sep 17 00:00:00 2001
From: Cedar Turek <cturek@g.hmc.edu>
Date: Tue, 18 Apr 2023 15:14:17 -0700
Subject: [PATCH 02/35] moved D flop to preproc

---
 src/fpu/fdivsqrt/fdivsqrt.sv        | 9 ++++-----
 src/fpu/fdivsqrt/fdivsqrtiter.sv    | 6 +-----
 src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 7 +++++--
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv
index 92f64cbdc..e8708c6c4 100644
--- a/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -57,7 +57,7 @@ module fdivsqrt(
 
   logic [`DIVb+3:0]           WS, WC;                       // Partial remainder components
   logic [`DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
-  logic [`DIVb-1:0]           DPreproc, D;                  // Iterator Divisor
+  logic [`DIVb-1:0]           D;                  // Iterator Divisor
   logic [`DIVb:0]             FirstU, FirstUM;              // Intermediate result values
   logic [`DIVb+1:0]           FirstC;                       // Step tracker
   logic                       Firstun;                      // Quotient selection
@@ -75,8 +75,7 @@ module fdivsqrt(
 
   fdivsqrtpreproc fdivsqrtpreproc(                        // Preprocessor
     .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), 
-    .Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E, 
-    .QeM, .X, .DPreproc, 
+    .Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E, .QeM, .X, .D, 
     // Int-specific 
     .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
     .nE, .BZeroM, .nM, .mM, .AM, 
@@ -90,8 +89,8 @@ module fdivsqrt(
     .IDivStartE, .ISpecialCaseE, .nE, .IntDivE);
 
   fdivsqrtiter fdivsqrtiter(                              // CSA Iterator
-    .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .DPreproc, 
-    .D, .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
+    .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, 
+    .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
 
   fdivsqrtpostproc fdivsqrtpostproc(                      // Postprocessor
     .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, 
diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv
index ec15423e4..f3048c8b6 100644
--- a/src/fpu/fdivsqrt/fdivsqrtiter.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv
@@ -34,8 +34,7 @@ module fdivsqrtiter(
   input  logic             FDivBusyE, 
   input  logic             SqrtE,
   input  logic [`DIVb+3:0] X,
-  input  logic [`DIVb-1:0] DPreproc,
-  output logic [`DIVb-1:0] D,
+  input  logic [`DIVb-1:0] D,
   output logic [`DIVb:0]   FirstU, FirstUM,
   output logic [`DIVb+1:0] FirstC,
   output logic             Firstun,
@@ -95,9 +94,6 @@ module fdivsqrtiter(
   mux2   #(`DIVb+2) cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC); 
   flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);
 
-   // Divisior register
-  flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D);
-
   // Divisor Selections
   //  - choose the negitive version of what's being selected
   //  - D is a 0.b mantissa
diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index cf8a055ef..4af1d786c 100644
--- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -39,7 +39,7 @@ module fdivsqrtpreproc (
   input  logic [2:0]          Funct3E,
   output logic [`NE+1:0]      QeM,
   output logic [`DIVb+3:0]    X,
-  output logic [`DIVb-1:0]    DPreproc,
+  output logic [`DIVb-1:0]    D,
   // Int-specific
   input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
   input  logic                IntDivE, W64E,
@@ -50,7 +50,7 @@ module fdivsqrtpreproc (
   output logic [`XLEN-1:0]    AM
 );
 
-  logic [`DIVb-1:0]           XPreproc;
+  logic [`DIVb-1:0]           XPreproc, DPreproc;
   logic [`DIVb:0]             PreSqrtX;
   logic [`DIVb+3:0]           DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
   logic [`NE+1:0]             QeE;                                 // Quotient Exponent (FP only)
@@ -173,5 +173,8 @@ module fdivsqrtpreproc (
   // Floating-point exponent
   fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
   flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
+
+   // Divisior register
+  flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D);
 endmodule
 

From b1dd1a627ffccb1fb6f75eb2bc31468b7e5a5a80 Mon Sep 17 00:00:00 2001
From: Cedar Turek <cturek@g.hmc.edu>
Date: Tue, 18 Apr 2023 15:41:04 -0700
Subject: [PATCH 03/35] gave integer bits to D instead of adding manually
 everywhere

---
 src/fpu/fdivsqrt/fdivsqrt.sv         |  2 +-
 src/fpu/fdivsqrt/fdivsqrtiter.sv     | 11 ++++-------
 src/fpu/fdivsqrt/fdivsqrtpostproc.sv |  9 ++++-----
 src/fpu/fdivsqrt/fdivsqrtpreproc.sv  | 10 ++++------
 src/fpu/fdivsqrt/fdivsqrtstage2.sv   |  5 ++---
 src/fpu/fdivsqrt/fdivsqrtstage4.sv   |  5 ++---
 6 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv
index e8708c6c4..1e05aee16 100644
--- a/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -57,7 +57,7 @@ module fdivsqrt(
 
   logic [`DIVb+3:0]           WS, WC;                       // Partial remainder components
   logic [`DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
-  logic [`DIVb-1:0]           D;                  // Iterator Divisor
+  logic [`DIVb+3:0]           D;                  // Iterator Divisor
   logic [`DIVb:0]             FirstU, FirstUM;              // Intermediate result values
   logic [`DIVb+1:0]           FirstC;                       // Step tracker
   logic                       Firstun;                      // Quotient selection
diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv
index f3048c8b6..aeb4bcc4d 100644
--- a/src/fpu/fdivsqrt/fdivsqrtiter.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv
@@ -33,8 +33,7 @@ module fdivsqrtiter(
   input  logic             IFDivStartE, 
   input  logic             FDivBusyE, 
   input  logic             SqrtE,
-  input  logic [`DIVb+3:0] X,
-  input  logic [`DIVb-1:0] D,
+  input  logic [`DIVb+3:0] X, D,
   output logic [`DIVb:0]   FirstU, FirstUM,
   output logic [`DIVb+1:0] FirstC,
   output logic             Firstun,
@@ -95,12 +94,10 @@ module fdivsqrtiter(
   flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]);
 
   // Divisor Selections
-  //  - choose the negitive version of what's being selected
-  //  - D is a 0.b mantissa
-  assign DBar    = {3'b111, 1'b0, ~D};
+  assign DBar    = ~D;        // for -D
   if(`RADIX == 4) begin : d2
-    assign DBar2 = {2'b11, 1'b0, ~D, 1'b1};
-    assign D2    = {2'b0, 1'b1, D, 1'b0};
+    assign D2    = D << 1;    // for 2D,  only used in R4
+    assign DBar2 = ~D2;       // for -2D, only used in R4
   end
 
   // k=DIVCOPIES of the recurrence logic
diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
index b8575f7fe..1009cd227 100644
--- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv
@@ -32,7 +32,7 @@ module fdivsqrtpostproc(
   input  logic              clk, reset,
   input  logic              StallM,
   input  logic [`DIVb+3:0]  WS, WC,
-  input  logic [`DIVb-1:0]  D, 
+  input  logic [`DIVb+3:0]  D, 
   input  logic [`DIVb:0]    FirstU, FirstUM, 
   input  logic [`DIVb+1:0]  FirstC,
   input  logic              SqrtE,
@@ -46,7 +46,7 @@ module fdivsqrtpostproc(
   output logic [`XLEN-1:0]  FIntDivResultM
 );
   
-  logic [`DIVb+3:0]         W, Sum, DM;
+  logic [`DIVb+3:0]         W, Sum;
   logic [`DIVb:0]           PreQmM;
   logic                     NegStickyM;
   logic                     weq0E, WZeroM;
@@ -67,7 +67,7 @@ module fdivsqrtpostproc(
 
     assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1));
     assign FZeroSqrtE = {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0};    // F for square root
-    assign FZeroDivE =  {3'b001,D,1'b0};                                    // F for divide
+    assign FZeroDivE =  D << 1;                                    // F for divide
     mux2 #(`DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE);
     csa #(`DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero};
     aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E);
@@ -102,11 +102,10 @@ module fdivsqrtpostproc(
     logic signed [`DIVb+3:0] PreResultM, PreIntResultM;
 
     assign W = $signed(Sum) >>> `LOGR;
-    assign DM = {4'b0001, D};
     assign UnsignedQuotM = {3'b000, PreQmM};
 
     // Integer remainder: sticky and sign correction muxes
-    mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM);
+    mux2 #(`DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM);
     mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM);
     mux2 #(`DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM);
 
diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index 4af1d786c..9d375a267 100644
--- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -38,8 +38,7 @@ module fdivsqrtpreproc (
   input  logic                XZeroE,
   input  logic [2:0]          Funct3E,
   output logic [`NE+1:0]      QeM,
-  output logic [`DIVb+3:0]    X,
-  output logic [`DIVb-1:0]    D,
+  output logic [`DIVb+3:0]    X, D,
   // Int-specific
   input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
   input  logic                IntDivE, W64E,
@@ -111,7 +110,9 @@ module fdivsqrtpreproc (
   // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS.  They are shifted right if the number of leading zeros is odd.
   mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
   assign DivX = {3'b000, ~NumerZeroE, XPreproc};
-  // *** CT 4/13/23 Create D output here with leading 1 appended as well, use in the other modules
+
+   // Divisior register
+  flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D);
 
   // ***CT: factor out fdivsqrtcycles
   if (`IDIV_ON_FPU) begin:intrightshift // Int Supported
@@ -173,8 +174,5 @@ module fdivsqrtpreproc (
   // Floating-point exponent
   fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
   flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
-
-   // Divisior register
-  flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D);
 endmodule
 
diff --git a/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/src/fpu/fdivsqrt/fdivsqrtstage2.sv
index 53c1711cb..be62f8aa6 100644
--- a/src/fpu/fdivsqrt/fdivsqrtstage2.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtstage2.sv
@@ -30,8 +30,7 @@
 
 /* verilator lint_off UNOPTFLAT */
 module fdivsqrtstage2 (
-  input  logic [`DIVb-1:0] D,
-  input  logic [`DIVb+3:0] DBar, 
+  input  logic [`DIVb+3:0] D, DBar, 
   input  logic [`DIVb:0]   U, UM,
   input  logic [`DIVb+3:0] WS, WC,
   input  logic [`DIVb+1:0] C,
@@ -66,7 +65,7 @@ module fdivsqrtstage2 (
   always_comb
     if      (up) Dsel = DBar;
     else if (uz) Dsel = '0;
-    else         Dsel = {4'b0001, D}; // un
+    else         Dsel = D; // un
 
   // Partial Product Generation
   //  WSA, WCA = WS + WC - qD
diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv
index f2ff3734b..9464e6a88 100644
--- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv
@@ -29,8 +29,7 @@
 `include "wally-config.vh"
 
 module fdivsqrtstage4 (
-  input  logic [`DIVb-1:0] D,
-  input  logic [`DIVb+3:0] DBar, D2, DBar2,
+  input  logic [`DIVb+3:0] D, DBar, D2, DBar2,
   input  logic [`DIVb:0]   U,UM,
   input  logic [`DIVb+3:0] WS, WC,
   input  logic [`DIVb+1:0] C,
@@ -75,7 +74,7 @@ module fdivsqrtstage4 (
       4'b1000: Dsel = DBar2;
       4'b0100: Dsel = DBar;
       4'b0000: Dsel = '0;
-      4'b0010: Dsel = {3'b0, 1'b1, D};
+      4'b0010: Dsel = D;
       4'b0001: Dsel = D2;
       default: Dsel = 'x;
     endcase

From db0ca8695a8e7051a9454dc631c29d506cdd4f98 Mon Sep 17 00:00:00 2001
From: Kevin Thomas <ps2gamer20@gmail.com>
Date: Tue, 18 Apr 2023 17:57:56 -0500
Subject: [PATCH 04/35] Add PR#252 test file to coverage

---
 testbench/tests.vh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/testbench/tests.vh b/testbench/tests.vh
index 6a0f80276..19adb818c 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -53,7 +53,8 @@ string tvpaths[] = '{
     "lsu",
     "vm64check",
     "pmp",
-    "tlbKP"
+    "tlbKP",
+    "ifuCamlineWrite"
   };
 
   string coremark[] = '{

From 49356aa4ca4553364bcaf51248c2872b5b13ce19 Mon Sep 17 00:00:00 2001
From: Cedar Turek <cturek@g.hmc.edu>
Date: Tue, 18 Apr 2023 16:14:45 -0700
Subject: [PATCH 05/35] created fdivsqrtcycles, moved cycles calculation from
 FSM to preproc

---
 src/fpu/fdivsqrt/fdivsqrt.sv        | 25 +++++-----
 src/fpu/fdivsqrt/fdivsqrtcycles.sv  | 76 +++++++++++++++++++++++++++++
 src/fpu/fdivsqrt/fdivsqrtfsm.sv     | 76 ++++++-----------------------
 src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 16 +++---
 4 files changed, 114 insertions(+), 79 deletions(-)
 create mode 100644 src/fpu/fdivsqrt/fdivsqrtcycles.sv

diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv
index 1e05aee16..f4d465012 100644
--- a/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -57,42 +57,43 @@ module fdivsqrt(
 
   logic [`DIVb+3:0]           WS, WC;                       // Partial remainder components
   logic [`DIVb+3:0]           X;                            // Iterator Initial Value (from dividend)
-  logic [`DIVb+3:0]           D;                  // Iterator Divisor
+  logic [`DIVb+3:0]           D;                            // Iterator Divisor
   logic [`DIVb:0]             FirstU, FirstUM;              // Intermediate result values
   logic [`DIVb+1:0]           FirstC;                       // Step tracker
   logic                       Firstun;                      // Quotient selection
   logic                       WZeroE;                       // Early termination flag
+  logic [`DURLEN-1:0]         cycles;                       // FSM cycles
   logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
   logic                       DivStartE;                    // Enable signal for flops during stall
                                                             
   // Integer div/rem signals                                
   logic                       BZeroM;                       // Denominator is zero
   logic                       IntDivM;                      // Integer operation
-  logic [`DIVBLEN:0]          nE, nM, mM;                   // Shift amounts
+  logic [`DIVBLEN:0]          nM, mM;                       // Shift amounts
   logic                       NegQuotM, ALTBM, AsM, W64M;   // Special handling for postprocessor
   logic [`XLEN-1:0]           AM;                           // Original Numerator for postprocessor
   logic                       ISpecialCaseE;                // Integer div/remainder special cases
 
-  fdivsqrtpreproc fdivsqrtpreproc(                        // Preprocessor
-    .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), 
-    .Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E, .QeM, .X, .D, 
+  fdivsqrtpreproc fdivsqrtpreproc(                          // Preprocessor
+    .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
+    .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles,
     // Int-specific 
     .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
-    .nE, .BZeroM, .nM, .mM, .AM, 
+    .BZeroM, .nM, .mM, .AM, 
     .IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM);
 
-  fdivsqrtfsm fdivsqrtfsm(                                // FSM
-    .clk, .reset, .FmtE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, 
+  fdivsqrtfsm fdivsqrtfsm(                                  // FSM
+    .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, 
     .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, 
-    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, 
+    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles,
     // Int-specific 
-    .IDivStartE, .ISpecialCaseE, .nE, .IntDivE);
+    .IDivStartE, .ISpecialCaseE, .IntDivE);
 
-  fdivsqrtiter fdivsqrtiter(                              // CSA Iterator
+  fdivsqrtiter fdivsqrtiter(                                // CSA Iterator
     .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, 
     .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC));
 
-  fdivsqrtpostproc fdivsqrtpostproc(                      // Postprocessor
+  fdivsqrtpostproc fdivsqrtpostproc(                        // Postprocessor
     .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, 
     .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, 
     .QmM, .WZeroE, .DivStickyM, 
diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv
new file mode 100644
index 000000000..f1ad32cd8
--- /dev/null
+++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv
@@ -0,0 +1,76 @@
+///////////////////////////////////////////
+// fdivsqrt.sv
+//
+// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu
+// Modified: 18 April 2022
+//
+// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// 
+// Documentation: RISC-V System on Chip Design Chapter 13
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+`include "wally-config.vh"
+
+module fdivsqrtcycles(
+  input  logic [`FMTBITS-1:0] FmtE,
+  input  logic                SqrtE,
+  input  logic                IntDivE,
+  input  logic [`DIVBLEN:0]   nE,
+  output logic [`DURLEN-1:0]  cycles
+);
+  logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
+  // DIVN = `NF+3
+  // NS = NF + 1
+  // N = NS or NS+2 for div/sqrt.
+
+  /* verilator lint_off WIDTH */
+  if (`FPSIZES == 1)
+    assign Nf = `NF;
+  else if (`FPSIZES == 2)
+    always_comb
+      case (FmtE)
+        1'b0: Nf = `NF1;
+        1'b1: Nf = `NF;
+      endcase
+  else if (`FPSIZES == 3)
+    always_comb
+      case (FmtE)
+        `FMT:  Nf = `NF;
+        `FMT1: Nf = `NF1;
+        `FMT2: Nf = `NF2; 
+      endcase
+  else if (`FPSIZES == 4)  
+    always_comb
+      case(FmtE)
+        `S_FMT: Nf = `S_NF;
+        `D_FMT: Nf = `D_NF;
+        `H_FMT: Nf = `H_NF;
+        `Q_FMT: Nf = `Q_NF;
+      endcase 
+
+  always_comb begin 
+    if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
+    else       fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
+    if (`IDIV_ON_FPU) cycles =  IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
+    else              cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
+  end 
+  /* verilator lint_on WIDTH */
+
+endmodule
\ No newline at end of file
diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
index 0793346bf..5332087ad 100644
--- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@@ -29,32 +29,27 @@
 `include "wally-config.vh"
 
 module fdivsqrtfsm(
-  input  logic                clk, 
-  input  logic                reset, 
-  input  logic [`FMTBITS-1:0] FmtE,
-  input  logic                XInfE, YInfE, 
-  input  logic                XZeroE, YZeroE, 
-  input  logic                XNaNE, YNaNE, 
-  input  logic                FDivStartE, IDivStartE,
-  input  logic                XsE,
-  input  logic                SqrtE,
-  input  logic                StallM,
-  input  logic                FlushE,
-  input  logic                WZeroE,
-  input  logic                IntDivE,
-  input  logic [`DIVBLEN:0]   nE,
-  input  logic                ISpecialCaseE,
-  output logic                IFDivStartE,
-  output logic                FDivBusyE, FDivDoneE,
-  output logic                SpecialCaseM
+  input  logic               clk, reset, 
+  input  logic               XInfE, YInfE, 
+  input  logic               XZeroE, YZeroE, 
+  input  logic               XNaNE, YNaNE, 
+  input  logic               FDivStartE, IDivStartE,
+  input  logic               XsE, WZeroE,
+  input  logic               SqrtE,
+  input  logic               StallM, FlushE,
+  input  logic               IntDivE,
+  input  logic               ISpecialCaseE,
+  input  logic [`DURLEN-1:0] cycles,
+  output logic               IFDivStartE,
+  output logic               FDivBusyE, FDivDoneE,
+  output logic               SpecialCaseM
 );
   
   typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype;
   statetype state;
 
-  logic [`DURLEN-1:0] step;
-  logic [`DURLEN-1:0] cycles;
   logic SpecialCaseE, FSpecialCaseE;
+  logic [`DURLEN-1:0] step;
 
   // FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division
   assign IFDivStartE = (FDivStartE | (IDivStartE & `IDIV_ON_FPU)) & (state == IDLE) & ~StallM;
@@ -67,47 +62,6 @@ module fdivsqrtfsm(
   else              assign SpecialCaseE = FSpecialCaseE;
   flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
 
-// DIVN = `NF+3
-// NS = NF + 1
-// N = NS or NS+2 for div/sqrt.  
-
-// *** CT 4/13/23 move cycles calculation back to preprocesor
-/* verilator lint_off WIDTH */
-  logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
-  if (`FPSIZES == 1)
-    assign Nf = `NF;
-  else if (`FPSIZES == 2)
-    always_comb
-      case (FmtE)
-        1'b0: Nf = `NF1;
-        1'b1: Nf = `NF;
-      endcase
-  else if (`FPSIZES == 3)
-    always_comb
-      case (FmtE)
-        `FMT: Nf = `NF;
-        `FMT1: Nf = `NF1;
-        `FMT2: Nf = `NF2; 
-      endcase
-  else if (`FPSIZES == 4)  
-    always_comb
-      case(FmtE)
-        `S_FMT: Nf = `S_NF;
-        `D_FMT: Nf = `D_NF;
-        `H_FMT: Nf = `H_NF;
-        `Q_FMT: Nf = `Q_NF;
-      endcase 
-
-
-  always_comb begin 
-    if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
-    else       fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
-    if (`IDIV_ON_FPU) cycles =  IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
-    else              cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
-  end 
-
-  /* verilator lint_on WIDTH */
-
   always_ff @(posedge clk) begin
       if (reset | FlushE) begin
           state <= #1 IDLE; 
diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index 9d375a267..43a5e42b2 100644
--- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -33,8 +33,8 @@ module fdivsqrtpreproc (
   input  logic                IFDivStartE, 
   input  logic [`NF:0]        Xm, Ym,
   input  logic [`NE-1:0]      Xe, Ye,
-  input  logic [`FMTBITS-1:0] Fmt,
-  input  logic                Sqrt,
+  input  logic [`FMTBITS-1:0] FmtE,
+  input  logic                SqrtE,
   input  logic                XZeroE,
   input  logic [2:0]          Funct3E,
   output logic [`NE+1:0]      QeM,
@@ -43,7 +43,8 @@ module fdivsqrtpreproc (
   input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
   input  logic                IntDivE, W64E,
   output logic                ISpecialCaseE,
-  output logic [`DIVBLEN:0]   nE, nM, mM,
+  output logic [`DURLEN-1:0]  cycles,
+  output logic [`DIVBLEN:0]   nM, mM,
   output logic                NegQuotM, ALTBM, IntDivM, W64M,
   output logic                AsM, BZeroM,
   output logic [`XLEN-1:0]    AM
@@ -54,7 +55,7 @@ module fdivsqrtpreproc (
   logic [`DIVb+3:0]           DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
   logic [`NE+1:0]             QeE;                                 // Quotient Exponent (FP only)
   logic [`DIVb-1:0]           IFX, IFD;                            // Correctly-sized inputs for iterator, selected from int or fp input
-  logic [`DIVBLEN:0]          mE, ell;                             // Leading zeros of inputs
+  logic [`DIVBLEN:0]          mE, nE, ell;                             // Leading zeros of inputs
   logic                       NumerZeroE;                          // Numerator is zero (X or A)
   logic                       AZeroE, BZeroE;                      // A or B is Zero for integer division
   logic                       signedDiv;                           // signed division
@@ -169,10 +170,13 @@ module fdivsqrtpreproc (
   // Sqrt is initialized on step one as R(X-1), so depends on Radix
   if (`RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
   else              assign SqrtX = {2'b11, PreSqrtX, 1'b0};
-  mux2 #(`DIVb+4) prexmux(DivX, SqrtX, Sqrt, PreShiftX);
+  mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
  
   // Floating-point exponent
-  fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
+  fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
   flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
+
+  // Number of FSM cycles (to FSM)
+  fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles);
 endmodule
 

From 1bdae2285d3276dd7e22b1c8e582c77d1326bb2f Mon Sep 17 00:00:00 2001
From: Kevin Wan <kewan@hmc.edu>
Date: Tue, 18 Apr 2023 18:43:50 -0700
Subject: [PATCH 06/35] PMPCFG_ARRAY_REGW cases

---
 testbench/tests.vh      |  2 ++
 tests/coverage/pmpcfg.S | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 tests/coverage/pmpcfg.S

diff --git a/testbench/tests.vh b/testbench/tests.vh
index 6a0f80276..8e327cafb 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -53,7 +53,9 @@ string tvpaths[] = '{
     "lsu",
     "vm64check",
     "pmp",
+    "pmpcfg",
     "tlbKP"
+    
   };
 
   string coremark[] = '{
diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S
new file mode 100644
index 000000000..bb6961526
--- /dev/null
+++ b/tests/coverage/pmpcfg.S
@@ -0,0 +1,33 @@
+// pmpcfg supplemental
+
+#include "WALLY-init-lib.h" 
+main: 
+    li t0, 0x8800000000000000
+    csrw pmpcfg2, t0
+    li t0, 0x88000000000000
+    csrw pmpcfg2, t0
+    li t0, 0x880000000000
+    csrw pmpcfg2, t0
+    li t0, 0x8800000000
+    csrw pmpcfg2, t0
+    li t0, 0x88000000
+    csrw pmpcfg2, t0
+    li t0, 0x880000
+    csrw pmpcfg2, t0
+    li t0, 0x8800
+    csrw pmpcfg2, t0
+    li t0, 0x8800000000000000
+    csrw pmpcfg0, t0
+    li t0, 0x88000000000000
+    csrw pmpcfg0, t0
+    li t0, 0x880000000000
+    csrw pmpcfg0, t0
+    li t0, 0x8800000000
+    csrw pmpcfg0, t0
+    li t0, 0x88000000
+    csrw pmpcfg0, t0
+    li t0, 0x880000
+    csrw pmpcfg0, t0
+    li t0, 0x8800
+    csrw pmpcfg0, t0
+    j done

From 771124e265e0ce6b4f9fe8861de861fb9df24a96 Mon Sep 17 00:00:00 2001
From: Kevin Wan <kewan@hmc.edu>
Date: Tue, 18 Apr 2023 21:50:48 -0700
Subject: [PATCH 07/35] Completely covers all PMPCFG_ARRAY_REGW cases

---
 testbench/tests.vh       |  4 +++-
 tests/coverage/pmpcfg.S  |  8 ++++++-
 tests/coverage/pmpcfg1.S | 48 ++++++++++++++++++++++++++++++++++++++++
 tests/coverage/pmpcfg2.S | 12 ++++++++++
 tests/coverage/priv.S    |  1 +
 5 files changed, 71 insertions(+), 2 deletions(-)
 create mode 100644 tests/coverage/pmpcfg1.S
 create mode 100644 tests/coverage/pmpcfg2.S

diff --git a/testbench/tests.vh b/testbench/tests.vh
index 8e327cafb..49c946802 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -54,8 +54,10 @@ string tvpaths[] = '{
     "vm64check",
     "pmp",
     "pmpcfg",
+    "pmpcfg1",
+    "pmpcfg2",
     "tlbKP"
-    
+  
   };
 
   string coremark[] = '{
diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S
index bb6961526..d65f810e5 100644
--- a/tests/coverage/pmpcfg.S
+++ b/tests/coverage/pmpcfg.S
@@ -1,4 +1,7 @@
-// pmpcfg supplemental
+// pmpcfg part 1
+// Kevin Wan, kewan@hmc.edu, 4/18/2023
+// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. 
+// See the next part in pmpcfg1.S
 
 #include "WALLY-init-lib.h" 
 main: 
@@ -16,6 +19,8 @@ main:
     csrw pmpcfg2, t0
     li t0, 0x8800
     csrw pmpcfg2, t0
+    li t0, 0x88
+    csrw pmpcfg2, t0
     li t0, 0x8800000000000000
     csrw pmpcfg0, t0
     li t0, 0x88000000000000
@@ -30,4 +35,5 @@ main:
     csrw pmpcfg0, t0
     li t0, 0x8800
     csrw pmpcfg0, t0
+
     j done
diff --git a/tests/coverage/pmpcfg1.S b/tests/coverage/pmpcfg1.S
new file mode 100644
index 000000000..96264c55f
--- /dev/null
+++ b/tests/coverage/pmpcfg1.S
@@ -0,0 +1,48 @@
+// another set of pmpcfg tests. A new file is made because pmpcfg register fields are 
+// locked forever after writing 1 to the lock bit for the first time. 
+
+// Kevin Wan, kewan@hmc.edu, 4/13/2023
+// This set tests locking the pmpXcfg fields in descending order again, without setting the TOR bits. 
+// for the other part of the tests, see pmpcfg.S
+
+#include "WALLY-init-lib.h" 
+main: 
+    li t0, 0x800
+    csrw pmpcfg0, t0
+    li t0, 0x8000000
+    csrw pmpcfg0, t0
+
+    li t0, 0x8000000000000000
+    csrw pmpcfg2, t0
+    li t0, 0x80000000000000
+    csrw pmpcfg2, t0
+    li t0, 0x800000000000
+    csrw pmpcfg2, t0
+    li t0, 0x8000000000
+    csrw pmpcfg2, t0
+    li t0, 0x80000000
+    csrw pmpcfg2, t0
+    li t0, 0x800000
+    csrw pmpcfg2, t0
+    li t0, 0x8000
+    csrw pmpcfg2, t0
+    li t0, 0x80
+    csrw pmpcfg2, t0
+    li t0, 0x8000000000000000
+    csrw pmpcfg0, t0
+    li t0, 0x80000000000000
+    csrw pmpcfg0, t0
+    li t0, 0x800000000000
+    csrw pmpcfg0, t0
+    li t0, 0x8000000000
+    csrw pmpcfg0, t0
+    li t0, 0x80000000
+    csrw pmpcfg0, t0
+    li t0, 0x800000
+    csrw pmpcfg0, t0
+    li t0, 0x8000
+    csrw pmpcfg0, t0
+
+
+
+    j done
\ No newline at end of file
diff --git a/tests/coverage/pmpcfg2.S b/tests/coverage/pmpcfg2.S
new file mode 100644
index 000000000..5966e3cdc
--- /dev/null
+++ b/tests/coverage/pmpcfg2.S
@@ -0,0 +1,12 @@
+// pmpcfg part 3
+// Kevin Wan, kewan@hmc.edu, 4/18/2023
+// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. 
+// See the next part in pmpcfg1.S
+
+#include "WALLY-init-lib.h" 
+main: 
+    li t0, 0x80
+    csrw pmpcfg0, t0
+
+
+    j done
\ No newline at end of file
diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S
index 94b7cd0ef..5e187866b 100644
--- a/tests/coverage/priv.S
+++ b/tests/coverage/priv.S
@@ -189,6 +189,7 @@ main:
     li t1, -1
     csrw mcounteren, t1
 
+
     # Go to supervisor mode
     li a0, 1
     ecall

From b5a3ff2d2d6bdceca8388645b88c8e6e08de21c4 Mon Sep 17 00:00:00 2001
From: Kevin Wan <kewan@hmc.edu>
Date: Tue, 18 Apr 2023 22:09:50 -0700
Subject: [PATCH 08/35] a

---
 tests/coverage/pmpcfg2.S | 12 ------------
 1 file changed, 12 deletions(-)
 delete mode 100644 tests/coverage/pmpcfg2.S

diff --git a/tests/coverage/pmpcfg2.S b/tests/coverage/pmpcfg2.S
deleted file mode 100644
index 5966e3cdc..000000000
--- a/tests/coverage/pmpcfg2.S
+++ /dev/null
@@ -1,12 +0,0 @@
-// pmpcfg part 3
-// Kevin Wan, kewan@hmc.edu, 4/18/2023
-// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. 
-// See the next part in pmpcfg1.S
-
-#include "WALLY-init-lib.h" 
-main: 
-    li t0, 0x80
-    csrw pmpcfg0, t0
-
-
-    j done
\ No newline at end of file

From d74768ce04293ccc3019719825c8f5c1e9704d4f Mon Sep 17 00:00:00 2001
From: Liam <lchalk@hmc.edu>
Date: Tue, 18 Apr 2023 23:06:52 -0700
Subject: [PATCH 09/35] Add test cases for pmpcfg.S

---
 tests/coverage/pmpcfg.S  | 39 ++++++++++++++++++++++++++++++++
 tests/coverage/pmpcfg1.S | 48 ++++++++++++++++++++++++++++++++++++++++
 tests/coverage/pmpcfg2.S | 12 ++++++++++
 3 files changed, 99 insertions(+)
 create mode 100644 tests/coverage/pmpcfg.S
 create mode 100644 tests/coverage/pmpcfg1.S
 create mode 100644 tests/coverage/pmpcfg2.S

diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S
new file mode 100644
index 000000000..387a8a726
--- /dev/null
+++ b/tests/coverage/pmpcfg.S
@@ -0,0 +1,39 @@
+// pmpcfg part 1
+// Kevin Wan, kewan@hmc.edu, 4/18/2023
+// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. 
+// See the next part in pmpcfg1.S
+
+#include "WALLY-init-lib.h" 
+main: 
+    li t0, 0x8800000000000000
+    csrw pmpcfg2, t0
+    li t0, 0x88000000000000
+    csrw pmpcfg2, t0
+    li t0, 0x880000000000
+    csrw pmpcfg2, t0
+    li t0, 0x8800000000
+    csrw pmpcfg2, t0
+    li t0, 0x88000000
+    csrw pmpcfg2, t0
+    li t0, 0x880000
+    csrw pmpcfg2, t0
+    li t0, 0x8800
+    csrw pmpcfg2, t0
+    li t0, 0x88
+    csrw pmpcfg2, t0
+    li t0, 0x8800000000000000
+    csrw pmpcfg0, t0
+    li t0, 0x88000000000000
+    csrw pmpcfg0, t0
+    li t0, 0x880000000000
+    csrw pmpcfg0, t0
+    li t0, 0x8800000000
+    csrw pmpcfg0, t0
+    li t0, 0x88000000
+    csrw pmpcfg0, t0
+    li t0, 0x880000
+    csrw pmpcfg0, t0
+    li t0, 0x8800
+    csrw pmpcfg0, t0
+
+    j done
\ No newline at end of file
diff --git a/tests/coverage/pmpcfg1.S b/tests/coverage/pmpcfg1.S
new file mode 100644
index 000000000..96264c55f
--- /dev/null
+++ b/tests/coverage/pmpcfg1.S
@@ -0,0 +1,48 @@
+// another set of pmpcfg tests. A new file is made because pmpcfg register fields are 
+// locked forever after writing 1 to the lock bit for the first time. 
+
+// Kevin Wan, kewan@hmc.edu, 4/13/2023
+// This set tests locking the pmpXcfg fields in descending order again, without setting the TOR bits. 
+// for the other part of the tests, see pmpcfg.S
+
+#include "WALLY-init-lib.h" 
+main: 
+    li t0, 0x800
+    csrw pmpcfg0, t0
+    li t0, 0x8000000
+    csrw pmpcfg0, t0
+
+    li t0, 0x8000000000000000
+    csrw pmpcfg2, t0
+    li t0, 0x80000000000000
+    csrw pmpcfg2, t0
+    li t0, 0x800000000000
+    csrw pmpcfg2, t0
+    li t0, 0x8000000000
+    csrw pmpcfg2, t0
+    li t0, 0x80000000
+    csrw pmpcfg2, t0
+    li t0, 0x800000
+    csrw pmpcfg2, t0
+    li t0, 0x8000
+    csrw pmpcfg2, t0
+    li t0, 0x80
+    csrw pmpcfg2, t0
+    li t0, 0x8000000000000000
+    csrw pmpcfg0, t0
+    li t0, 0x80000000000000
+    csrw pmpcfg0, t0
+    li t0, 0x800000000000
+    csrw pmpcfg0, t0
+    li t0, 0x8000000000
+    csrw pmpcfg0, t0
+    li t0, 0x80000000
+    csrw pmpcfg0, t0
+    li t0, 0x800000
+    csrw pmpcfg0, t0
+    li t0, 0x8000
+    csrw pmpcfg0, t0
+
+
+
+    j done
\ No newline at end of file
diff --git a/tests/coverage/pmpcfg2.S b/tests/coverage/pmpcfg2.S
new file mode 100644
index 000000000..5966e3cdc
--- /dev/null
+++ b/tests/coverage/pmpcfg2.S
@@ -0,0 +1,12 @@
+// pmpcfg part 3
+// Kevin Wan, kewan@hmc.edu, 4/18/2023
+// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. 
+// See the next part in pmpcfg1.S
+
+#include "WALLY-init-lib.h" 
+main: 
+    li t0, 0x80
+    csrw pmpcfg0, t0
+
+
+    j done
\ No newline at end of file

From 9b72d6ac37c6cacd3eee7f42f061e8184cb324a3 Mon Sep 17 00:00:00 2001
From: Liam <lchalk@hmc.edu>
Date: Tue, 18 Apr 2023 23:15:47 -0700
Subject: [PATCH 10/35] Update tests.vh

---
 testbench/tests.vh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/testbench/tests.vh b/testbench/tests.vh
index 6a0f80276..f777dbf17 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -53,7 +53,11 @@ string tvpaths[] = '{
     "lsu",
     "vm64check",
     "pmp",
-    "tlbKP"
+    "tlbKP",
+    "pmpcfg",
+    "pmpcfg1",
+    "pmpcfg2"
+
   };
 
   string coremark[] = '{

From 9ef85c547b35c0cdae84631e5efbd5a1e3255cab Mon Sep 17 00:00:00 2001
From: Alec Vercruysse <vercruysse.alec@gmail.com>
Date: Mon, 17 Apr 2023 14:12:58 -0700
Subject: [PATCH 11/35] fix unhit exclusion in fdivsqrtfsm

---
 sim/coverage-exclusions-rv64gc.do | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do
index 754d57db6..41345e6e6 100644
--- a/sim/coverage-exclusions-rv64gc.do
+++ b/sim/coverage-exclusions-rv64gc.do
@@ -35,7 +35,7 @@ do GetLineNum.do
 coverage exclude -srcfile lzc.sv 
 
 # FDIVSQRT has 
-coverage exclude -scope /core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state DONE->BUSY
+coverage exclude -scope /dut/core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state DONE->BUSY
 
 ### Exclude D$ states and logic for the I$ instance
 # This is cleaner than trying to set an I$-specific pragma in cachefsm.sv (which would exclude it for the D$ instance too)

From cd803bfa443c50739a363028448adbaf72375460 Mon Sep 17 00:00:00 2001
From: Alec Vercruysse <vercruysse.alec@gmail.com>
Date: Wed, 19 Apr 2023 01:19:25 -0700
Subject: [PATCH 12/35] Cover CacheWay edge case: CacheDataMem we=1 while ce=0.

This test basically triggers an i$ miss during a d$ (hit) store
operation. It requires some tricky timing (e.g. a flushD right
before the relevant store). I use a script to generate the test.
---
 testbench/tests.vh        |  3 +-
 tests/coverage/dcache1.S  | 83 +++++++++++++++++++++++++++++++++++++
 tests/coverage/dcache1.py | 86 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 171 insertions(+), 1 deletion(-)
 create mode 100644 tests/coverage/dcache1.S
 create mode 100644 tests/coverage/dcache1.py

diff --git a/testbench/tests.vh b/testbench/tests.vh
index 6a0f80276..d2b8a9347 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -53,7 +53,8 @@ string tvpaths[] = '{
     "lsu",
     "vm64check",
     "pmp",
-    "tlbKP"
+    "tlbKP",
+    "dcache1",
   };
 
   string coremark[] = '{
diff --git a/tests/coverage/dcache1.S b/tests/coverage/dcache1.S
new file mode 100644
index 000000000..4a9b3de15
--- /dev/null
+++ b/tests/coverage/dcache1.S
@@ -0,0 +1,83 @@
+      #include "WALLY-init-lib.h"
+main:
+      // start way test #1
+      li t0, 0x80100000
+.align 6
+      // i$ boundary, way test #1
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      sd zero, 0(t0)
+      sd zero, 0(t0)
+      .word 0x00000013
+      .word 0x00000013
+      // start way test #2
+      li t0, 0x80101000
+.align 6
+      // i$ boundary, way test #2
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      sd zero, 0(t0)
+      sd zero, 0(t0)
+      .word 0x00000013
+      .word 0x00000013
+      // start way test #3
+      li t0, 0x80102000
+.align 6
+      // i$ boundary, way test #3
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      sd zero, 0(t0)
+      sd zero, 0(t0)
+      .word 0x00000013
+      .word 0x00000013
+      // start way test #4
+      li t0, 0x80103000
+.align 6
+      // i$ boundary, way test #4
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      .word 0x00000013
+      sd zero, 0(t0)
+      sd zero, 0(t0)
+      .word 0x00000013
+      .word 0x00000013
+      j done
diff --git a/tests/coverage/dcache1.py b/tests/coverage/dcache1.py
new file mode 100644
index 000000000..59259567b
--- /dev/null
+++ b/tests/coverage/dcache1.py
@@ -0,0 +1,86 @@
+####################
+# dcache1.py
+#
+# Written: avercruysse@hmc.edu 18 April 2023
+#
+# Purpose: Test Coverage for D$
+#          (For each way, trigger a CacheDataMem write enable while chip enable is low)        
+#
+# A component of the CORE-V-WALLY configurable RISC-V project.
+# 
+# Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+#
+# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+#
+# Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+# except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+# may obtain a copy of the License at
+#
+# https://solderpad.org/licenses/SHL-2.1/
+#
+# Unless required by applicable law or agreed to in writing, any work distributed under the 
+# License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+# either express or implied. See the License for the specific language governing permissions 
+# and limitations under the License.
+################################################
+
+import os
+
+test_name = "dcache1.S"
+dcache_num_ways = 4
+dcache_way_size_in_bytes = 4096 
+# warning i$ line size is not currently parameterized.
+
+# arbitrary start location of where I send stores to.
+mem_start_addr = 0x80100000
+
+# pointer to the start of unused memory (strictly increasing)
+mem_addr = mem_start_addr
+
+
+def wl(line="", comment=None, fname=test_name):
+    with open(fname, "a") as f:
+        instr = False if (":" in line or
+                          ".align" in line or
+                          "# include" in line) else True
+        indent = 6 if instr else 0
+        comment = "// " + comment if comment is not None else ""
+        to_write = " " * indent + line + comment + "\n"
+        f.write(to_write)
+
+    
+def write_repro_instrs():
+    """
+    Assumes that the store location has been fetched to d$, and is in t0.
+    """
+    for i in range(16): # write a whole cache set.
+        if i == 12:
+            wl('sd zero, 0(t0)') # D$ write to set PCM = PCF + 8 for proper alignment (stallD will happen).
+        elif i == 13:
+            # the store in question happens here, at adresses 0x34, 0x74
+            wl('sd zero, 0(t0)') # it should hit this time
+        else:
+            # can't be a NOP or anything else that is encoded as compressed.
+            # this is because the branch predictor will use the wrong address
+            # so the IFU cache miss will come late.
+            wl('.word 0x00000013') # addi x0, x0, 0 (canonical NOP, uncompressed).
+
+if __name__ == "__main__":
+    if os.path.exists(test_name):
+        os.remove(test_name)
+        # os.rename(test_name, test_name + ".old")
+    wl(comment="This file is generated by dcache1.py (run that script manually)")
+    wl('#include "WALLY-init-lib.h"')
+    wl('main:')
+    
+    # excercise all 4 D$ ways. If they're not all full, it uses the first empty.
+    # So we are sure all 4 ways are exercised.
+    for i in range(dcache_num_ways):
+        wl(comment=f"start way test #{i+1}")
+        wl(f'li t0, {hex(mem_addr)}')
+        wl(f'.align 6')                # start at i$ set boundary. 6 lsb bits are zero.
+        wl(comment=f"i$ boundary, way test #{i+1}")
+        write_repro_instrs()
+        mem_addr += dcache_way_size_in_bytes  # so that we excercise a new D$ way.
+    
+    wl("j done")

From b3a3af8ed364919db5b4c13fca6d66fde2e5d904 Mon Sep 17 00:00:00 2001
From: Alec Vercruysse <vercruysse.alec@gmail.com>
Date: Wed, 19 Apr 2023 01:21:57 -0700
Subject: [PATCH 13/35] add D$ test case to trigger a FlushStage while
 SetDirtyWay=1

This hits some conditional coverage in each cacheway.
A cache store hit happens at the same time as a StoreAmoMisalignedFault.
---
 testbench/tests.vh       |  1 +
 tests/coverage/dcache2.S | 49 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)
 create mode 100644 tests/coverage/dcache2.S

diff --git a/testbench/tests.vh b/testbench/tests.vh
index d2b8a9347..fd48d6dc5 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -55,6 +55,7 @@ string tvpaths[] = '{
     "pmp",
     "tlbKP",
     "dcache1",
+    "dcache2"
   };
 
   string coremark[] = '{
diff --git a/tests/coverage/dcache2.S b/tests/coverage/dcache2.S
new file mode 100644
index 000000000..58f97a2e4
--- /dev/null
+++ b/tests/coverage/dcache2.S
@@ -0,0 +1,49 @@
+///////////////////////////////////////////
+// dcache2.S
+//
+// Written: avercruysse@hmc.edu 18 April 2023
+//
+// Purpose: Test Coverage for D$
+//          (for all 4 cache ways, trigger a FlushStage while SetDirtyWay=1)        
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "WALLY-init-lib.h"
+main:
+        // way 0 
+        li t0, 0x80100770
+        sd zero, 0(t0)
+        sd zero, 1(t0)
+
+        // way 1 
+        li t0, 0x80101770
+        sd zero, 0(t0)
+        sd zero, 1(t0)
+
+        // way 2 
+        li t0, 0x80102770
+        sd zero, 0(t0)        
+        sd zero, 1(t0)
+
+        // way 3
+        li t0, 0x80103770
+        sd zero, 0(t0)
+        sd zero, 1(t0)
+        
+        j done

From de93bd6937d31362fc2ef286c6dc83037cd1d436 Mon Sep 17 00:00:00 2001
From: Alec Vercruysse <vercruysse.alec@gmail.com>
Date: Wed, 19 Apr 2023 01:28:45 -0700
Subject: [PATCH 14/35] D$ scope-specific coverage exclusions (I$ logic that
 never fires)

The InvalidateCache signal in the D$ is for I$ only, which
causes some coverage issues that need exclusion.

Another manual exclusion is due to the fact that D$ writeback, flush,
write_line, or flush_writeback states can't be cancelled by a flush,
so those transistions are excluded.

There is some other small stuff to review (logic simplification,
or an exclusion pragma if removing the redundent logic would
make it harder to understand the code, as is the case in the
FlushAdrCntEn assign statement, in my opinion).
---
 sim/coverage-exclusions-rv64gc.do | 15 ++++++++++++++-
 src/cache/cachefsm.sv             |  8 +++++---
 src/cache/cacheway.sv             |  2 +-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do
index 41345e6e6..38c04231c 100644
--- a/sim/coverage-exclusions-rv64gc.do
+++ b/sim/coverage-exclusions-rv64gc.do
@@ -52,7 +52,7 @@ set end [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag-end: icache case"]
 coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange $start-$end
 coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache WRITEBACKStatement"]
 # exclude Atomic Operation logic
-coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache storeAMO"] -item e 1 -fecexprrow 6
+coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: cache AnyMiss"] -item e 1 -fecexprrow 6
 coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache storeAMO1"] -item e 1 -fecexprrow 2-4
 coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache AnyUpdateHit"] -item e 1 -fecexprrow 2
 # cache write logic
@@ -77,6 +77,19 @@ for {set i 0} {$i < $numcacheways} {incr i} {
     coverage exclude -scope /dut/core/ifu/bus/icache/icache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: icache SetValidEN"] -item e 1 -fecexprrow 4
 }
 
+## D$ Exclusions.
+# InvalidateCache is I$ only:
+coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: dcache InvalidateCheck"] -item b 2
+coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: dcache InvalidateCheck"] -item s 1
+coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: dcache CacheEn"] -item e 1 -fecexprrow 12
+coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: cache AnyMiss"] -item e 1 -fecexprrow 4
+set numcacheways 4
+for {set i 0} {$i < $numcacheways} {incr i} {
+    coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: dcache invalidateway"] -item be 1 -fecexprrow 4
+}
+# D$ writeback, flush, write_line, or flush_writeback states can't be cancelled by a flush
+coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -ftrans CurrState STATE_WRITEBACK->STATE_READY STATE_FLUSH->STATE_READY STATE_WRITE_LINE->STATE_READY STATE_FLUSH_WRITEBACK->STATE_READY
+
 
 # Excluding peripherals as sources of instructions for the ifu
 coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker/adrdecs/clintdec
diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv
index 90d8eaad8..7cd8240c4 100644
--- a/src/cache/cachefsm.sv
+++ b/src/cache/cachefsm.sv
@@ -110,10 +110,10 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) (
   always_comb begin
     NextState = STATE_READY;
     case (CurrState)                                                                                        // exclusion-tag: icache state-case
-      STATE_READY:           if(InvalidateCache)                               NextState = STATE_READY;
+      STATE_READY:           if(InvalidateCache)                               NextState = STATE_READY;     // exclusion-tag: dcache InvalidateCheck
                              else if(FlushCache & ~READ_ONLY_CACHE)            NextState = STATE_FLUSH;
                              else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH;     // exclusion-tag: icache FETCHStatement
-                             else if(AnyMiss & LineDirty)                      NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement
+                             else if(AnyMiss) /* & LineDirty */                NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement
                              else                                              NextState = STATE_READY;
       STATE_FETCH:           if(CacheBusAck)                                   NextState = STATE_WRITE_LINE;
                              else                                              NextState = STATE_FETCH;
@@ -160,6 +160,8 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) (
   assign SelFlush = (CurrState == STATE_READY & FlushCache) |
           (CurrState == STATE_FLUSH) | 
           (CurrState == STATE_FLUSH_WRITEBACK);
+  // coverage off -item e -fecexprrow 1
+  // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck)
   assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) |
              (CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty);
   assign FlushWayCntEn = (CurrState == STATE_FLUSH & ~LineDirty) |
@@ -181,6 +183,6 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) (
                   (CurrState == STATE_WRITE_LINE) |
                   resetDelay;
   assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_READ_HOLD;
-  assign CacheEn = (~Stall | FlushCache | AnyMiss) | (CurrState != STATE_READY) | reset | InvalidateCache;
+  assign CacheEn = (~Stall | FlushCache | AnyMiss) | (CurrState != STATE_READY) | reset | InvalidateCache; // exclusion-tag: dcache CacheEn
                        
 endmodule // cachefsm
diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv
index 79ec65e64..368c7b587 100644
--- a/src/cache/cacheway.sv
+++ b/src/cache/cacheway.sv
@@ -155,7 +155,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26,
     if (reset) ValidBits        <= #1 '0;
     if(CacheEn) begin 
     ValidWay <= #1 ValidBits[CacheSet];
-    if(InvalidateCache)                    ValidBits <= #1 '0;
+    if(InvalidateCache)                    ValidBits <= #1 '0; // exclusion-tag: dcache invalidateway
       else if (SetValidEN) ValidBits[CacheSet] <= #1 SetValidWay;
     end
   end

From faaf26655861423bb07180b21573f82217146c99 Mon Sep 17 00:00:00 2001
From: Alec Vercruysse <vercruysse.alec@gmail.com>
Date: Wed, 19 Apr 2023 01:32:43 -0700
Subject: [PATCH 15/35] CacheFSM logic simplification for AMO operations

Ran this by Ross.
---
 src/cache/cachefsm.sv | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv
index 7cd8240c4..34f1778f5 100644
--- a/src/cache/cachefsm.sv
+++ b/src/cache/cachefsm.sv
@@ -69,7 +69,7 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) (
 );
   
   logic              resetDelay;
-  logic              AMO, StoreAMO;
+  logic              StoreAMO;
   logic              AnyUpdateHit, AnyHit;
   logic              AnyMiss;
   logic              FlushFlag;
@@ -86,16 +86,15 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) (
 
   statetype CurrState, NextState;
 
-  assign AMO = CacheAtomic[1] & (&CacheRW);
-  assign StoreAMO = AMO | CacheRW[0];
+  assign StoreAMO = CacheRW[0]; // AMO operations assert CacheRW[0]
 
-  assign AnyMiss = (StoreAMO | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: icache storeAMO
+  assign AnyMiss = (StoreAMO | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss
   assign AnyUpdateHit = (StoreAMO) & CacheHit;                             // exclusion-tag: icache storeAMO1
   assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit);                  // exclusion-tag: icache AnyUpdateHit
   assign FlushFlag = FlushAdrFlag & FlushWayFlag;
 
   // outputs for the performance counters.
-  assign CacheAccess = (AMO | CacheRW[1] | CacheRW[0]) & CurrState == STATE_READY; // exclusion-tag: icache CacheW
+  assign CacheAccess = (|CacheRW) & CurrState == STATE_READY; // exclusion-tag: icache CacheW
   assign CacheMiss = CacheAccess & ~CacheHit;
 
   // special case on reset. When the fsm first exists reset the

From 6e612a1693e682de51c3741400d13eb9f8b64e0e Mon Sep 17 00:00:00 2001
From: David Harris <74973295+davidharrishmc@users.noreply.github.com>
Date: Wed, 19 Apr 2023 06:23:05 -0700
Subject: [PATCH 16/35] Update tests.vh

Missing comma from merge
---
 testbench/tests.vh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/testbench/tests.vh b/testbench/tests.vh
index 93a406109..e2d4e5ad1 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -54,7 +54,7 @@ string tvpaths[] = '{
     "vm64check",
     "pmp",
     "dcache1",
-    "dcache2"
+    "dcache2",
     "pmpcfg",
     "pmpcfg1",
     "pmpcfg2",

From 4f57dca0dcf2da99990169788ea84c0ea982c4e4 Mon Sep 17 00:00:00 2001
From: Liam <lchalk@hmc.edu>
Date: Wed, 19 Apr 2023 11:58:22 -0700
Subject: [PATCH 17/35] Add pmpcfg test cases increasing IFU coverage

---
 tests/coverage/pmpcfg.S | 46 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S
index 74181ab62..5b3e37b56 100644
--- a/tests/coverage/pmpcfg.S
+++ b/tests/coverage/pmpcfg.S
@@ -1,10 +1,52 @@
 // pmpcfg part 1
 // Kevin Wan, kewan@hmc.edu, 4/18/2023
+// Liam Chalk, lchalk@hmc.edu, 4/19/2023
 // locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. 
 // See the next part in pmpcfg1.S
 
 #include "WALLY-init-lib.h" 
 main: 
+
+    li t0, 0x90000000
+    csrw pmpaddr0, t0
+    li t0, 0x00000017
+    csrw pmpcfg0, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr2, t0
+    li t0, 0x00000017
+    csrw pmpcfg2, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr0, t0
+    li t0, 0x00000017
+    csrw pmpcfg1, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr0, t0
+    li t0, 0x00000017
+    csrw pmpcfg2, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr0, t0
+    li t0, 0x00000017
+    csrw pmpcfg3, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr1, t0
+    li t0, 0x00000017
+    csrw pmpcfg1, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr1, t0
+    li t0, 0x00000017
+    csrw pmpcfg2, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr1, t0
+    li t0, 0x00000017
+    csrw pmpcfg3, t0
+
     li t0, 0x8800000000000000
     csrw pmpcfg2, t0
     li t0, 0x88000000000000
@@ -36,6 +78,4 @@ main:
     li t0, 0x8800
     csrw pmpcfg0, t0
 
-
-    j done
-
+    j done
\ No newline at end of file

From ea9639435ec331c0058e5abd3df8b2e332c40992 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Wed, 19 Apr 2023 13:07:07 -0700
Subject: [PATCH 18/35] Added -fp flag to run arch64d/f tests in coverage

---
 sim/regression-wally | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sim/regression-wally b/sim/regression-wally
index c70177206..fa112731a 100755
--- a/sim/regression-wally
+++ b/sim/regression-wally
@@ -28,6 +28,7 @@ regressionDir = os.path.dirname(os.path.abspath(__file__))
 os.chdir(regressionDir)
 
 coverage = '-coverage' in sys.argv
+fp = '-fp' in sys.argv
 
 TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr'])
 # name:     the name of this test configuration (used in printing human-readable
@@ -140,6 +141,9 @@ if (coverage):  # delete all but 64gc tests when running coverage
                  "arch64zi", "wally64a", "wally64periph", "wally64priv", 
                  "arch64zba",  "arch64zbb",  "arch64zbc",  "arch64zbs", 
                  "imperas64f", "imperas64d", "imperas64c", "imperas64i"] 
+    if (fp):
+       tests64gc.append("arch64f")
+       tests64gc.append("arch64d")
     coverStr = '-coverage'
 else:
    coverStr = ''

From 65c04489f1dbe5b3998685ba51e34136d1093aa0 Mon Sep 17 00:00:00 2001
From: David Harris <74973295+davidharrishmc@users.noreply.github.com>
Date: Thu, 20 Apr 2023 14:09:32 -0700
Subject: [PATCH 19/35] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9cb56de0d..ff76f72ff 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # core-v-wally
 Configurable RISC-V Processor
 
-Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, and M extensions, FENCE.I, and the various privileged modes and CSRs.  It is written in SystemVerilog.  It passes the RISC-V Arch Tests and boots Linux on an FPGA.
+Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, Q, M, and Zb* extensions, FENCE.I, and the various privileged modes and CSRs.  It is written in SystemVerilog.  It passes the RISC-V Arch Tests and boots Linux on an FPGA.
 
 ![Wally block diagram](wallyriscvTopAll.png)
 

From 2bd8b65a2bb065b94061e06d67a2c421f57763b3 Mon Sep 17 00:00:00 2001
From: David Harris <74973295+davidharrishmc@users.noreply.github.com>
Date: Thu, 20 Apr 2023 14:15:34 -0700
Subject: [PATCH 20/35] Update README.md

---
 README.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index ff76f72ff..b73aecdb8 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,15 @@
 # core-v-wally
-Configurable RISC-V Processor
 
-Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, Q, M, and Zb* extensions, FENCE.I, and the various privileged modes and CSRs.  It is written in SystemVerilog.  It passes the RISC-V Arch Tests and boots Linux on an FPGA.
+Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, Q, M, and Zb* extensions, virtual memory, PMP, and the various privileged modes and CSRs. It provides optional caches, branch prediction, and standard RISC-V peripherals (CLINT, PLIC, UART, GPIO).   Wally is written in SystemVerilog.  It passes the RISC-V Arch Tests and boots Linux on an FPGA.  Configurations range from a minimal RV32E core to a fully featured RV64GC application processor.
 
 ![Wally block diagram](wallyriscvTopAll.png)
 
 Wally is described in an upcoming textbook, *RISC-V System-on-Chip Design*, by Harris, Stine, Thompson, and Harris.  Users should follow the setup instructions below.  A system administrator must install CAD tools using the directions further down.
 
+# Verification
+
+Wally is presently at Technology Readiness Level 4, passing the RISC-V compatibility test suite and custom tests, and booting Linux in simulation and on an FPGA.  See the [Test Plan](docs/testplan.md) for details.
+
 # New User Setup
 
 New users may wish to do the following setup to access the server via a GUI and use a text editor.

From 7ca44de126c1e3c3b2ff37b71f7d9f0c6b50eeff Mon Sep 17 00:00:00 2001
From: Noah Limpert <nlimpert@g.hmc.edu>
Date: Thu, 20 Apr 2023 14:38:13 -0700
Subject: [PATCH 21/35] Commiting changes to add coverage to ASID, Global,
 Megapage size checks.

---
 testbench/tests.vh       |   3 +
 tests/coverage/tlbASID.S | 133 ++++++++++++++++++++++++++++++++
 tests/coverage/tlbGLB.S  | 134 ++++++++++++++++++++++++++++++++
 tests/coverage/tlbMP.S   | 163 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 433 insertions(+)
 create mode 100644 tests/coverage/tlbASID.S
 create mode 100644 tests/coverage/tlbGLB.S
 create mode 100644 tests/coverage/tlbMP.S

diff --git a/testbench/tests.vh b/testbench/tests.vh
index e2d4e5ad1..b86756b4e 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -59,6 +59,9 @@ string tvpaths[] = '{
     "pmpcfg1",
     "pmpcfg2",
     "tlbKP",
+    "tlbMP",
+    "tlbASID",
+    "tlbGLB",
     "ifuCamlineWrite"
   };
 
diff --git a/tests/coverage/tlbASID.S b/tests/coverage/tlbASID.S
new file mode 100644
index 000000000..bf71c0491
--- /dev/null
+++ b/tests/coverage/tlbASID.S
@@ -0,0 +1,133 @@
+///////////////////////////////////////////
+// tlbASID.S
+//
+// Written: mmendozamanriquez@hmc.edu 4 April 2023
+//          nlimpert@hmc.edu
+//
+// Purpose: Test coverage for LSU
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+// load code to initalize stack, handle interrupts, terminate
+
+#include "WALLY-init-lib.h"
+
+# run-elf.bash find this in project description
+main:
+    # Page table root address at 0x80010000
+    li t5, 0x9000000000080080 // try making asid = 0. 
+    csrw satp, t5
+
+    # sfence.vma x0, x0
+
+    # switch to supervisor mode
+    li a0, 1   
+    ecall
+
+    li t0, 0xC0000000
+
+    li t2, 0             # i = 0
+    li t5, 0            # j = 0 // now use as a counter for new asid loop 
+    li t3, 32     # Max amount of Loops = 32
+
+loop: bge t2, t3, nASID   # exit loop if i >= loops
+    lw t1, 0(t0)
+    li t4, 0x1000
+    add t0, t0, t4
+    addi t2, t2, 1
+    j loop
+
+nASID: bne t5, zero, finished
+    li a0, 3   // go
+    ecall
+    li t5, 0x9000100000080080 // try making asid = 1 
+    csrw satp, t5
+    li a0, 1   
+    ecall
+    li t2, 0
+    li t0, 0xC0000000
+    li t5, 1 // make this not zero. 
+    j loop
+
+
+finished:
+    j done
+
+.data
+.align 19
+# level 3 Page table situated at 0x8008 0000, should point to 8008,1000
+pagetable: 
+    .8byte 0x200204C1
+    
+.align 12 // level 2 page table, contains direction to a gigapageg
+    .8byte 0x0
+    .8byte 0x0
+    .8byte 0x200000CF // gigapage that starts at 8000 0000 goes to C000 0000
+    .8byte 0x200208C1 // pointer to next page table entry at 8008 2000
+
+.align 12 // level 1 page table, points to level 0 page table
+    .8byte 0x20020CC1
+
+.align 12 // level 0 page table, points to address C000 0000 // FOR NOW ALL OF THESE GO TO 8 instead of C cause they start with 2
+    .8byte 0x200000CF // access xC000 0000
+    .8byte 0x200004CF // access xC000 1000
+    .8byte 0x200008CF // access xC000 2000
+    .8byte 0x20000CCF // access xC000 3000
+
+    .8byte 0x200010CF // access xC000 4000
+    .8byte 0x200014CF
+    .8byte 0x200018CF
+    .8byte 0x20001CCF
+
+    .8byte 0x200020CF // access xC000 8000
+    .8byte 0x200024CF
+    .8byte 0x200028CF
+    .8byte 0x20002CCF
+
+    .8byte 0x200030CF // access xC000 C000
+    .8byte 0x200034CF
+    .8byte 0x200038CF
+    .8byte 0x20003CCF
+
+    .8byte 0x200040CF // access xC001 0000
+    .8byte 0x200044CF
+    .8byte 0x200048CF
+    .8byte 0x20004CCF
+
+    .8byte 0x200050CF // access xC001 4000
+    .8byte 0x200054CF
+    .8byte 0x200058CF
+    .8byte 0x20005CCF
+
+    .8byte 0x200060CF // access xC001 8000
+    .8byte 0x200064CF
+    .8byte 0x200068CF
+    .8byte 0x20006CCF
+
+    .8byte 0x200070CF // access xC001 C000
+    .8byte 0x200074CF
+    .8byte 0x200078CF
+    .8byte 0x20007CCF
+
+    .8byte 0x200080CF // access xC002 0000
+    .8byte 0x200084CF
+    .8byte 0x200088CF
+    .8byte 0x20008CCF
+
+    
\ No newline at end of file
diff --git a/tests/coverage/tlbGLB.S b/tests/coverage/tlbGLB.S
new file mode 100644
index 000000000..77e3a31c1
--- /dev/null
+++ b/tests/coverage/tlbGLB.S
@@ -0,0 +1,134 @@
+///////////////////////////////////////////
+// tlbGLB.S
+//
+// Written: mmendozamanriquez@hmc.edu 4 April 2023
+//          nlimpert@hmc.edu
+//
+// Purpose: coverage for the global check. 
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+// load code to initalize stack, handle interrupts, terminate
+
+#include "WALLY-init-lib.h"
+
+# run-elf.bash find this in project description
+main:
+    # Page table root address at 0x80010000
+    li t5, 0x9000000000080080 // try making asid = 0. 
+    csrw satp, t5
+
+    # sfence.vma x0, x0
+
+    # switch to supervisor mode
+    li a0, 1   
+    ecall
+
+    li t0, 0xC0000000
+
+    li t2, 0             # i = 0
+    li t5, 0            # j = 0 // now use as a counter for new asid loop 
+    li t3, 32     # Max amount of Loops = 32
+
+loop: bge t2, t3, nASID   # exit loop if i >= loops
+    lw t1, 0(t0)
+    li t4, 0x1000
+    add t0, t0, t4
+    addi t2, t2, 1
+    j loop
+
+nASID: bne t5, zero, finished
+    li a0, 3   // go
+    ecall
+    li t5, 0x9000100000080080 // try making asid = 1 
+    csrw satp, t5
+    li a0, 1   
+    ecall
+    li t2, 0
+    li t0, 0xC0000000
+    li t5, 1 // make this not zero. 
+    j loop
+
+
+finished:
+    j done
+
+.data
+.align 19
+# level 3 Page table situated at 0x8008 0000, should point to 8008,1000
+pagetable: 
+    .8byte 0x200204C1
+    
+.align 12 // level 2 page table, contains direction to a gigapageg
+    .8byte 0x0
+    .8byte 0x0
+    .8byte 0x200000CF // gigapage that starts at 8000 0000 goes to C000 0000
+    .8byte 0x200208C1 // pointer to next page table entry at 8008 2000
+
+.align 12 // level 1 page table, points to level 0 page table
+    .8byte 0x20020CE1
+
+.align 12 // level 0 page table, points to address C000 0000 // FOR NOW ALL OF THESE GO TO 8 instead of C cause they start with 2
+    .8byte 0x200000CF // access xC000 0000
+    .8byte 0x200004CF // access xC000 1000
+    .8byte 0x200008CF // access xC000 2000
+    .8byte 0x20000CCF // access xC000 3000
+
+    .8byte 0x200010EF // access xC000 4000
+    .8byte 0x200014EF
+    .8byte 0x200018EF
+    .8byte 0x20001CEF
+
+    .8byte 0x200020EF // access xC000 8000
+    .8byte 0x200024EF
+    .8byte 0x200028EF
+    .8byte 0x20002CEF
+
+    .8byte 0x200030EF // access xC000 C000
+    .8byte 0x200034EF
+    .8byte 0x200038EF
+    .8byte 0x20003CEF
+
+    .8byte 0x200040EF // access xC001 0000
+    .8byte 0x200044EF
+    .8byte 0x200048EF
+    .8byte 0x20004CEF
+
+    .8byte 0x200050EF // access xC001 4000
+    .8byte 0x200054EF
+    .8byte 0x200058EF
+    .8byte 0x20005CEF
+
+    .8byte 0x200060EF // access xC001 8000
+    .8byte 0x200064EF
+    .8byte 0x200068EF
+    .8byte 0x20006CEF
+
+    .8byte 0x200070EF // access xC001 C000
+    .8byte 0x200074eF
+    .8byte 0x200078EF
+    .8byte 0x20007CEF
+
+    .8byte 0x200080EF // access xC002 0000
+    .8byte 0x200084EF
+    .8byte 0x200088EF
+    .8byte 0x20008CEF
+
+    
\ No newline at end of file
diff --git a/tests/coverage/tlbMP.S b/tests/coverage/tlbMP.S
new file mode 100644
index 000000000..6981d1f36
--- /dev/null
+++ b/tests/coverage/tlbMP.S
@@ -0,0 +1,163 @@
+///////////////////////////////////////////
+// tlbMP.S
+//
+// Written: mmendozamanriquez@hmc.edu 4 April 2023
+//          nlimpert@hmc.edu
+//
+// Purpose: Test coverage for LSU
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+// load code to initalize stack, handle interrupts, terminate
+
+#include "WALLY-init-lib.h"
+
+# run-elf.bash find this in project description
+main:
+    # Page table root address at 0x80010000
+    li t5, 0x9000000000080010
+    csrw satp, t5
+
+    # sfence.vma x0, x0
+
+    # switch to supervisor mode
+    li a0, 1   
+    ecall
+    li t5, 0 
+    li t0, 0x84000000 // go to first megapage
+    li t4, 0x1000 // put this outside the loop.
+    li t2, 0      # i = 0
+    li t3, 32     # Max amount of Loops = 16
+
+loop: bge t2, t3, lKP   # exit loop if i >= loops
+    lw t1, 0(t0)
+    add t0, t0, t4
+    addi t2, t2, 1
+    j loop
+
+lKP: bne t5, zero, finished
+    li t0, 0x80000000 
+    slli t4, t4, 9
+    addi t5, t5, 1
+    li t2, 0
+    j loop
+
+finished:
+    j done
+
+.data
+
+.align 16
+# Page table situated at 0x80010000
+pagetable: 
+    .8byte 0x200044C1
+
+.align 12
+    .8byte 0x00000000200048C1
+    .8byte 0x00000000200048C1
+    .8byte 0x00000000200048C1
+    
+
+.align 12 // megapages starting at 8000 0000 going to 8480 0000  (32*2 MiB beyond that)
+
+    .8byte 0x200000CF // access 8000,0000
+    .8byte 0x200800CF // access 8020,0000 
+    .8byte 0x201000CF // acesss 8040,0000
+    .8byte 0x201800CF // acesss 8060,0000
+
+    .8byte 0x202000CF // access 8080,0000
+    .8byte 0x202800CF // access 80A0,0000
+    .8byte 0x203000CF // access 80C0,0000
+    .8byte 0x203800CF // access 80E0,0000
+
+    .8byte 0x204000CF // access 8100,0000
+    .8byte 0x204800CF 
+    .8byte 0x205000CF 
+    .8byte 0x205800CF 
+
+    .8byte 0x206000CF // access 8180,0000
+    .8byte 0x206800CF 
+    .8byte 0x207000CF 
+    .8byte 0x207800CF 
+
+    .8byte 0x208000CF // access 8200,0000
+    .8byte 0x208800CF 
+    .8byte 0x209000CF 
+    .8byte 0x209800CF 
+
+    .8byte 0x20A000CF // access 8280,0000
+    .8byte 0x20A800CF 
+    .8byte 0x20B000CF 
+    .8byte 0x20B800CF 
+
+    .8byte 0x20C000CF // access 8300,0000
+    .8byte 0x20C800CF 
+    .8byte 0x20D000CF 
+    .8byte 0x20D800CF
+
+    .8byte 0x20E000CF // access 8380,0000
+    .8byte 0x20E800CF 
+    .8byte 0x20F000CF 
+    .8byte 0x20F800CF
+
+    .8byte 0x20004CC1
+     // Kilopage entry, for addresses from 8400, 0000 to 841F, FFFF
+                      // point to ... 
+
+.align 12 // should start at 84000000 
+    .8byte 0x210000CF
+    .8byte 0x210004CF
+    .8byte 0x210008CF
+    .8byte 0x21000CCF
+
+    .8byte 0x210010CF
+    .8byte 0x210014CF
+    .8byte 0x210018CF
+    .8byte 0x21001CCF
+
+    .8byte 0x210020CF
+    .8byte 0x210024CF
+    .8byte 0x210028CF
+    .8byte 0x21002CCF
+
+    .8byte 0x210030CF
+    .8byte 0x210034CF
+    .8byte 0x210038CF
+    .8byte 0x21003CCF
+
+    .8byte 0x210040CF
+    .8byte 0x210044CF
+    .8byte 0x210048CF
+    .8byte 0x21004CCF
+
+    .8byte 0x210050CF
+    .8byte 0x210054CF
+    .8byte 0x210058CF
+    .8byte 0x21005CCF
+
+    .8byte 0x210060CF
+    .8byte 0x210064CF
+    .8byte 0x210068CF
+    .8byte 0x21006CCF
+
+    .8byte 0x210070CF
+    .8byte 0x210074CF
+    .8byte 0x210078CF
+    .8byte 0x21007CCF
+

From a0e71c26cbaae1cfcc11e0a55641bc1d6e8bd65b Mon Sep 17 00:00:00 2001
From: Noah Limpert <nlimpert@g.hmc.edu>
Date: Thu, 20 Apr 2023 14:50:06 -0700
Subject: [PATCH 22/35] Add in a test that makes match 3 = 0 for all tlb lines

---
 testbench/tests.vh     |   1 +
 tests/coverage/tlbM3.S | 155 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 156 insertions(+)
 create mode 100644 tests/coverage/tlbM3.S

diff --git a/testbench/tests.vh b/testbench/tests.vh
index b86756b4e..a450b057d 100644
--- a/testbench/tests.vh
+++ b/testbench/tests.vh
@@ -60,6 +60,7 @@ string tvpaths[] = '{
     "pmpcfg2",
     "tlbKP",
     "tlbMP",
+    "tlbM3",
     "tlbASID",
     "tlbGLB",
     "ifuCamlineWrite"
diff --git a/tests/coverage/tlbM3.S b/tests/coverage/tlbM3.S
new file mode 100644
index 000000000..ececa1f34
--- /dev/null
+++ b/tests/coverage/tlbM3.S
@@ -0,0 +1,155 @@
+///////////////////////////////////////////
+// tlbKP.S
+//
+// Written: mmendozamanriquez@hmc.edu 4 April 2023
+//          nlimpert@hmc.edu
+//
+// Purpose: Test coverage for LSU
+//
+// A component of the CORE-V-WALLY configurable RISC-V project.
+// 
+// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University
+//
+// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1
+//
+// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file 
+// except in compliance with the License, or, at your option, the Apache License version 2.0. You 
+// may obtain a copy of the License at
+//
+// https://solderpad.org/licenses/SHL-2.1/
+//
+// Unless required by applicable law or agreed to in writing, any work distributed under the 
+// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+// either express or implied. See the License for the specific language governing permissions 
+// and limitations under the License.
+////////////////////////////////////////////////////////////////////////////////////////////////
+
+// load code to initalize stack, handle interrupts, terminate
+
+#include "WALLY-init-lib.h"
+
+# run-elf.bash find this in project description
+main:
+    # Page table root address at 0x80010000
+    li t5, 0x9000000000080010
+    csrw satp, t5
+
+    # sfence.vma x0, x0
+
+    # switch to supervisor mode
+    li a0, 1   
+    ecall
+
+    li t0, 0x1000
+
+    li t2, 0             # i = 0
+    li t3, 64     # Max amount of Loops = 32
+    li t4, 0x1000
+
+loop: bge t2, t3, interim  # exit loop if i >= loops
+    lw t1, 0(t0)
+    # sfence.vma x0, x0
+    add t0, t0, t4
+    addi t2, t2, 1
+    j loop
+
+interim:
+    li t0, 0xFFFFFFFF000
+    li t2, 0             # i = 0
+     
+
+loop2:bge t2, t3, finished  # exit loop if i >= loops
+    lw t1, 0(t0)
+    add t0, t0, t4
+    addi t2, t2, 1
+    j loop2
+
+finished:
+    j done
+
+.data
+
+.align 16
+# Page table situated at 0x80010000
+pagetable: 
+    .8byte 0x200044C1 // old page table was 200040 which just pointed to itself! wrong
+
+.align 12
+    .8byte 0x00000000200048C1
+    .8byte 0x00000000200048C1
+    .8byte 0x00000000200048C1
+    
+
+.align 12
+    .8byte 0x0000000020004CC1
+    //.8byte 0x00000200800CF// ADD IN THE MEGAPAGE should 3 nibbles of zeros be removed?
+
+.align 12
+    #80000000
+    .8byte 0x200000CF
+    .8byte 0x200004CF
+    .8byte 0x200008CF
+    .8byte 0x20000CCF
+
+    .8byte 0x200010CF
+    .8byte 0x200014CF
+    .8byte 0x200018CF
+    .8byte 0x20001CCF
+
+    .8byte 0x200020CF
+    .8byte 0x200024CF
+    .8byte 0x200028CF
+    .8byte 0x20002CCF
+
+    .8byte 0x200030CF
+    .8byte 0x200034CF
+    .8byte 0x200038CF
+    .8byte 0x20003CCF
+
+    .8byte 0x200040CF
+    .8byte 0x200044CF
+    .8byte 0x200048CF
+    .8byte 0x20004CCF
+
+    .8byte 0x200050CF
+    .8byte 0x200054CF
+    .8byte 0x200058CF
+    .8byte 0x20005CCF
+
+    .8byte 0x200060CF
+    .8byte 0x200064CF
+    .8byte 0x200068CF
+    .8byte 0x20006CCF
+
+    .8byte 0x200070CF
+    .8byte 0x200074CF
+    .8byte 0x200078CF
+    .8byte 0x20007CCF
+    
+    .8byte 0x200080CF
+    .8byte 0x200084CF
+    .8byte 0x200088CF
+    .8byte 0x20008CCF
+
+    .8byte 0x200090CF
+    .8byte 0x200094CF
+    .8byte 0x200098CF
+    .8byte 0x20009CCF
+
+    .8byte 0x2000A0CF
+    .8byte 0x2000A4CF
+    .8byte 0x2000A8CF
+    .8byte 0x2000ACCF
+
+    .8byte 0x2000B0CF
+    .8byte 0x2000B4CF
+    .8byte 0x2000B8CF
+    .8byte 0x2000BCCF
+
+    .8byte 0x2000C0CF
+    .8byte 0x2000C4CF
+    .8byte 0x2000C8CF
+    .8byte 0x2000CCCF
+
+    .8byte 0x2000D0CF
+    .8byte 0x2000D4CF

From f2ae770e1783a770948ed4b0b44521644400974c Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Thu, 20 Apr 2023 16:24:58 -0700
Subject: [PATCH 23/35] Fmv h/q comments in controller

---
 src/fpu/fctrl.sv | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv
index b9584bc9e..206cefbb4 100755
--- a/src/fpu/fctrl.sv
+++ b/src/fpu/fctrl.sv
@@ -138,10 +138,10 @@ module fctrl (
                                   endcase
                       7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000)          
                                                 ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass
-                                  else if (Funct3D == 3'b000 & Rs2D == 5'b00000) 
-                                                ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w / fmv.x.d to int register
-                      7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000) 
-                                                ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w.x / fmv.d.x   to fp reg
+                                  else if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) 
+                                                ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q  fp to int register
+                      7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) 
+                                                ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x  int to fp reg
                       7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00)
                                                 ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h)
                       7'b0100001: if (Rs2D[4:2] == 3'b000  & SupportedFmt2 & Rs2D[1:0] != 2'b01)

From 2c47268f50c159d78451973711208d2a840e1a83 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Thu, 20 Apr 2023 16:25:19 -0700
Subject: [PATCH 24/35] Started fdivsqrtpreproc flow organization

---
 src/fpu/fdivsqrt/fdivsqrtcycles.sv  |  4 +-
 src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 57 ++++++++++++++++++++---------
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv
index f1ad32cd8..4025a30cb 100644
--- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv
@@ -1,10 +1,10 @@
 ///////////////////////////////////////////
-// fdivsqrt.sv
+// fdivsqrtcycles.sv
 //
 // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu
 // Modified: 18 April 2022
 //
-// Purpose: Combined Divide and Square Root Floating Point and Integer Unit
+// Purpose: Determine number of cycles for divsqrt
 // 
 // Documentation: RISC-V System on Chip Design Chapter 13
 //
diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index 43a5e42b2..04739ee88 100644
--- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -63,6 +63,10 @@ module fdivsqrtpreproc (
   logic                       AsE, BsE;                            // Signs of integer inputs
   logic [`XLEN-1:0]           AE;                                  // input A after W64 adjustment
 
+  //////////////////////////////////////////////////////
+  // Integer Preprocessing
+  //////////////////////////////////////////////////////
+
   if (`IDIV_ON_FPU) begin:intpreproc // Int Supported
     logic [`XLEN-1:0] BE, PosA, PosB;
 
@@ -90,13 +94,17 @@ module fdivsqrtpreproc (
     // Select integer or floating point inputs
     mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX);
     mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD);
-
-
+    mux2 #(1)    numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
   end else begin // Int not supported
     assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}};
     assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}};
+    assign NumerZeroE = XZeroE;
   end
 
+  //////////////////////////////////////////////////////
+  // Integer & FP leading zero and normalization shift
+  //////////////////////////////////////////////////////
+
   // count leading zeros for Subnorm FP and to normalize integer inputs
   lzc #(`DIVb) lzcX (IFX, ell);
   lzc #(`DIVb) lzcY (IFD, mE);
@@ -105,17 +113,10 @@ module fdivsqrtpreproc (
   assign XPreproc = (IFX << ell) << 1;
   assign DPreproc = (IFD << mE)  << 1; 
 
-  // append leading 1 (for nonzero inputs)
-  // shift square root to be in range [1/4, 1)
-  // Normalized numbers are shifted right by 1 if the exponent is odd
-  // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS.  They are shifted right if the number of leading zeros is odd.
-  mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
-  assign DivX = {3'b000, ~NumerZeroE, XPreproc};
+  //////////////////////////////////////////////////////
+  // Integer Right Shift to digit boundary
+  //////////////////////////////////////////////////////
 
-   // Divisior register
-  flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D);
-
-  // ***CT: factor out fdivsqrtcycles
   if (`IDIV_ON_FPU) begin:intrightshift // Int Supported
     logic [`DIVBLEN:0] ZeroDiff, p;
     logic  ALTBE;
@@ -146,11 +147,6 @@ module fdivsqrtpreproc (
       assign DivXShifted = DivX;
     end
   /* verilator lint_on WIDTH */
-
-    // Selet integer or floating-point operands
-    mux2 #(1)    numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE);
-    mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
-
     // pipeline registers
     flopen #(1)        mdureg(clk, IFDivStartE, IntDivE,     IntDivM);
     flopen #(1)       altbreg(clk, IFDivStartE, ALTBE,    ALTBM);
@@ -163,14 +159,39 @@ module fdivsqrtpreproc (
     if (`XLEN==64) 
       flopen #(1)      w64reg(clk, IFDivStartE, W64E,     W64M);
   end else begin
-    assign NumerZeroE = XZeroE;
     assign X = PreShiftX;
+    assign ISpecialCaseE = 0;
   end
 
+  //////////////////////////////////////////////////////
+  // Floating-Point Preprocessing
+  // append leading 1 (for nonzero inputs)
+  // shift square root to be in range [1/4, 1)
+  // Normalized numbers are shifted right by 1 if the exponent is odd
+  // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS.  They are shifted right if the number of leading zeros is odd.
+  //////////////////////////////////////////////////////
+
+  mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
+  assign DivX = {3'b000, ~NumerZeroE, XPreproc};
+
   // Sqrt is initialized on step one as R(X-1), so depends on Radix
   if (`RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
   else              assign SqrtX = {2'b11, PreSqrtX, 1'b0};
   mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
+  
+  //////////////////////////////////////////////////////
+  // Selet integer or floating-point operands
+  //////////////////////////////////////////////////////
+
+  mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
+
+   // Divisior register
+  flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D);
+
+
+
+
+  
  
   // Floating-point exponent
   fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));

From 33c0f644576cb74df808b78abdf98f83a3ab7b71 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Thu, 20 Apr 2023 16:38:47 -0700
Subject: [PATCH 25/35] Reordered fdivsqrtpreproc to follow logic

---
 src/fpu/fdivsqrt/fdivsqrt.sv        |  6 ++--
 src/fpu/fdivsqrt/fdivsqrtcycles.sv  |  6 ++--
 src/fpu/fdivsqrt/fdivsqrtfsm.sv     |  4 +--
 src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 53 ++++++++++++++++-------------
 4 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv
index f4d465012..f7a443639 100644
--- a/src/fpu/fdivsqrt/fdivsqrt.sv
+++ b/src/fpu/fdivsqrt/fdivsqrt.sv
@@ -62,7 +62,7 @@ module fdivsqrt(
   logic [`DIVb+1:0]           FirstC;                       // Step tracker
   logic                       Firstun;                      // Quotient selection
   logic                       WZeroE;                       // Early termination flag
-  logic [`DURLEN-1:0]         cycles;                       // FSM cycles
+  logic [`DURLEN-1:0]         CyclesE;                      // FSM cycles
   logic                       SpecialCaseM;                 // Divide by zero, square root of negative, etc.
   logic                       DivStartE;                    // Enable signal for flops during stall
                                                             
@@ -76,7 +76,7 @@ module fdivsqrt(
 
   fdivsqrtpreproc fdivsqrtpreproc(                          // Preprocessor
     .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE),
-    .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles,
+    .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE,
     // Int-specific 
     .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE,
     .BZeroM, .nM, .mM, .AM, 
@@ -85,7 +85,7 @@ module fdivsqrt(
   fdivsqrtfsm fdivsqrtfsm(                                  // FSM
     .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, 
     .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, 
-    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles,
+    .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE,
     // Int-specific 
     .IDivStartE, .ISpecialCaseE, .IntDivE);
 
diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv
index 4025a30cb..2e17cc25b 100644
--- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv
@@ -33,7 +33,7 @@ module fdivsqrtcycles(
   input  logic                SqrtE,
   input  logic                IntDivE,
   input  logic [`DIVBLEN:0]   nE,
-  output logic [`DURLEN-1:0]  cycles
+  output logic [`DURLEN-1:0]  CyclesE
 );
   logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits
   // DIVN = `NF+3
@@ -68,8 +68,8 @@ module fdivsqrtcycles(
   always_comb begin 
     if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2
     else       fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
-    if (`IDIV_ON_FPU) cycles =  IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
-    else              cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
+    if (`IDIV_ON_FPU) CyclesE =  IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
+    else              CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES);
   end 
   /* verilator lint_on WIDTH */
 
diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
index 5332087ad..75010f74c 100644
--- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@@ -39,7 +39,7 @@ module fdivsqrtfsm(
   input  logic               StallM, FlushE,
   input  logic               IntDivE,
   input  logic               ISpecialCaseE,
-  input  logic [`DURLEN-1:0] cycles,
+  input  logic [`DURLEN-1:0] CyclesE,
   output logic               IFDivStartE,
   output logic               FDivBusyE, FDivDoneE,
   output logic               SpecialCaseM
@@ -67,7 +67,7 @@ module fdivsqrtfsm(
           state <= #1 IDLE; 
       end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE
 //       end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE
-          step <= cycles; 
+          step <= CyclesE; 
           if (SpecialCaseE) state <= #1 DONE;
           else              state <= #1 BUSY;
       end else if (state == BUSY) begin 
diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index 04739ee88..a63fad82c 100644
--- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -43,7 +43,7 @@ module fdivsqrtpreproc (
   input  logic [`XLEN-1:0]    ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
   input  logic                IntDivE, W64E,
   output logic                ISpecialCaseE,
-  output logic [`DURLEN-1:0]  cycles,
+  output logic [`DURLEN-1:0]  CyclesE,
   output logic [`DIVBLEN:0]   nM, mM,
   output logic                NegQuotM, ALTBM, IntDivM, W64M,
   output logic                AsM, BZeroM,
@@ -62,6 +62,7 @@ module fdivsqrtpreproc (
   logic                       NegQuotE;                            // Integer quotient is negative
   logic                       AsE, BsE;                            // Signs of integer inputs
   logic [`XLEN-1:0]           AE;                                  // input A after W64 adjustment
+  logic  ALTBE;
 
   //////////////////////////////////////////////////////
   // Integer Preprocessing
@@ -113,13 +114,16 @@ module fdivsqrtpreproc (
   assign XPreproc = (IFX << ell) << 1;
   assign DPreproc = (IFD << mE)  << 1; 
 
+  // *** CT: move to fdivsqrtintpreshift
+
   //////////////////////////////////////////////////////
   // Integer Right Shift to digit boundary
+  //  Determine DivXShifted (X shifted to digit boundary)
+  //  and nE (number of fractional digits)
   //////////////////////////////////////////////////////
 
   if (`IDIV_ON_FPU) begin:intrightshift // Int Supported
     logic [`DIVBLEN:0] ZeroDiff, p;
-    logic  ALTBE;
 
     // calculate number of fractional bits p
     assign ZeroDiff = mE - ell;         // Difference in number of leading zeros
@@ -129,37 +133,24 @@ module fdivsqrtpreproc (
     // Integer special cases (terminate immediately)
     assign ISpecialCaseE = BZeroE | ALTBE;
 
-  /* verilator lint_off WIDTH */
     // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps
 
     if (`LOGRK > 0) begin // more than 1 bit per cycle
       logic [`LOGRK-1:0] IntTrunc, RightShiftX;
       logic [`DIVBLEN:0] TotalIntBits, IntSteps;
-
+      /* verilator lint_off WIDTH */
       assign TotalIntBits = `LOGR + p;                            // Total number of result bits (r integer bits plus p fractional bits)
       assign IntTrunc = TotalIntBits % `RK;                       // Truncation check for ceiling operator
       assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc;     // Number of steps for int div
       assign nE = (IntSteps * `DIVCOPIES) - 1;                    // Fractional digits
       assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK);  // Right shift amount
       assign DivXShifted = DivX >> RightShiftX;                   // shift X by up to R*K-1 to complete in nE steps
+      /* verilator lint_on WIDTH */
     end else begin // radix 2 1 copy doesn't require shifting
       assign nE = p; 
       assign DivXShifted = DivX;
     end
-  /* verilator lint_on WIDTH */
-    // pipeline registers
-    flopen #(1)        mdureg(clk, IFDivStartE, IntDivE,     IntDivM);
-    flopen #(1)       altbreg(clk, IFDivStartE, ALTBE,    ALTBM);
-    flopen #(1)    negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
-    flopen #(1)      bzeroreg(clk, IFDivStartE, BZeroE,   BZeroM);
-    flopen #(1)      asignreg(clk, IFDivStartE, AsE,      AsM);
-    flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE,       nM); 
-    flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE,       mM);
-    flopen #(`XLEN)   srcareg(clk, IFDivStartE, AE,       AM);
-    if (`XLEN==64) 
-      flopen #(1)      w64reg(clk, IFDivStartE, W64E,     W64M);
   end else begin
-    assign X = PreShiftX;
     assign ISpecialCaseE = 0;
   end
 
@@ -183,21 +174,35 @@ module fdivsqrtpreproc (
   // Selet integer or floating-point operands
   //////////////////////////////////////////////////////
 
-  mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
+  if (`IDIV_ON_FPU) begin
+    mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X);
+  end else begin
+    assign X = PreShiftX;
+  end
 
    // Divisior register
   flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D);
-
-
-
-
-  
  
   // Floating-point exponent
   fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));
   flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM);
 
   // Number of FSM cycles (to FSM)
-  fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles);
+  fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE);
+
+  if (`IDIV_ON_FPU) begin:intpipelineregs
+    // pipeline registers
+    flopen #(1)        mdureg(clk, IFDivStartE, IntDivE,     IntDivM);
+    flopen #(1)       altbreg(clk, IFDivStartE, ALTBE,    ALTBM);
+    flopen #(1)    negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
+    flopen #(1)      bzeroreg(clk, IFDivStartE, BZeroE,   BZeroM);
+    flopen #(1)      asignreg(clk, IFDivStartE, AsE,      AsM);
+    flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE,       nM); 
+    flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE,       mM);
+    flopen #(`XLEN)   srcareg(clk, IFDivStartE, AE,       AM);
+    if (`XLEN==64) 
+      flopen #(1)      w64reg(clk, IFDivStartE, W64E,     W64M);
+  end
+
 endmodule
 

From 86107e613667c6301c784d34ac26742c30fe2df7 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Thu, 20 Apr 2023 16:48:23 -0700
Subject: [PATCH 26/35] continued cleanup

---
 src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index a63fad82c..b3c97c27f 100644
--- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -55,10 +55,10 @@ module fdivsqrtpreproc (
   logic [`DIVb+3:0]           DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
   logic [`NE+1:0]             QeE;                                 // Quotient Exponent (FP only)
   logic [`DIVb-1:0]           IFX, IFD;                            // Correctly-sized inputs for iterator, selected from int or fp input
-  logic [`DIVBLEN:0]          mE, nE, ell;                             // Leading zeros of inputs
+  logic [`DIVBLEN:0]          mE, nE, ell;                         // Leading zeros of inputs
   logic                       NumerZeroE;                          // Numerator is zero (X or A)
   logic                       AZeroE, BZeroE;                      // A or B is Zero for integer division
-  logic                       signedDiv;                           // signed division
+  logic                       SignedDivE;                          // signed division
   logic                       NegQuotE;                            // Integer quotient is negative
   logic                       AsE, BsE;                            // Signs of integer inputs
   logic [`XLEN-1:0]           AE;                                  // input A after W64 adjustment
@@ -72,20 +72,20 @@ module fdivsqrtpreproc (
     logic [`XLEN-1:0] BE, PosA, PosB;
 
     // Extract inputs, signs, zero, depending on W64 mode if applicable
-    assign signedDiv = ~Funct3E[0];
+    assign SignedDivE = ~Funct3E[0];
   
     // Source handling
     if (`XLEN==64) begin // 64-bit, supports W64
-      mux2 #(64)    amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & signedDiv}}, ForwardedSrcAE[31:0]}, W64E, AE);
-      mux2 #(64)    bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & signedDiv}}, ForwardedSrcBE[31:0]}, W64E, BE);
+      mux2 #(64)    amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE);
+      mux2 #(64)    bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE);
     end else begin // 32 bits only
       assign AE = ForwardedSrcAE;
       assign BE = ForwardedSrcBE;
      end
     assign AZeroE = ~(|AE);
     assign BZeroE = ~(|BE);
-    assign AsE = AE[`XLEN-1] & signedDiv;
-    assign BsE = BE[`XLEN-1] & signedDiv; 
+    assign AsE = AE[`XLEN-1] & SignedDivE;
+    assign BsE = BE[`XLEN-1] & SignedDivE; 
     assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative
 
     // Force integer inputs to be postiive
@@ -162,10 +162,10 @@ module fdivsqrtpreproc (
   // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS.  They are shifted right if the number of leading zeros is odd.
   //////////////////////////////////////////////////////
 
-  mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
   assign DivX = {3'b000, ~NumerZeroE, XPreproc};
 
   // Sqrt is initialized on step one as R(X-1), so depends on Radix
+  mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
   if (`RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
   else              assign SqrtX = {2'b11, PreSqrtX, 1'b0};
   mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
@@ -192,7 +192,7 @@ module fdivsqrtpreproc (
 
   if (`IDIV_ON_FPU) begin:intpipelineregs
     // pipeline registers
-    flopen #(1)        mdureg(clk, IFDivStartE, IntDivE,     IntDivM);
+    flopen #(1)        mdureg(clk, IFDivStartE, IntDivE,  IntDivM);
     flopen #(1)       altbreg(clk, IFDivStartE, ALTBE,    ALTBM);
     flopen #(1)    negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM);
     flopen #(1)      bzeroreg(clk, IFDivStartE, BZeroE,   BZeroM);

From 8a59a4ce94baa225c6851b60514cee1aecb2685d Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Thu, 20 Apr 2023 17:35:01 -0700
Subject: [PATCH 27/35] fdivsqrt cleanup

---
 src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
index b3c97c27f..3de4b252e 100644
--- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv
@@ -50,7 +50,7 @@ module fdivsqrtpreproc (
   output logic [`XLEN-1:0]    AM
 );
 
-  logic [`DIVb-1:0]           XPreproc, DPreproc;
+  logic [`DIVb-1:0]           Xfract, Dfract;
   logic [`DIVb:0]             PreSqrtX;
   logic [`DIVb+3:0]           DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
   logic [`NE+1:0]             QeE;                                 // Quotient Exponent (FP only)
@@ -111,8 +111,8 @@ module fdivsqrtpreproc (
   lzc #(`DIVb) lzcY (IFD, mE);
 
   // Normalization shift: shift off leading one
-  assign XPreproc = (IFX << ell) << 1;
-  assign DPreproc = (IFD << mE)  << 1; 
+  assign Xfract = (IFX << ell) << 1;
+  assign Dfract = (IFD << mE)  << 1; 
 
   // *** CT: move to fdivsqrtintpreshift
 
@@ -154,6 +154,8 @@ module fdivsqrtpreproc (
     assign ISpecialCaseE = 0;
   end
 
+  // CT *** fdivsqrtfplead1
+
   //////////////////////////////////////////////////////
   // Floating-Point Preprocessing
   // append leading 1 (for nonzero inputs)
@@ -162,10 +164,10 @@ module fdivsqrtpreproc (
   // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS.  They are shifted right if the number of leading zeros is odd.
   //////////////////////////////////////////////////////
 
-  assign DivX = {3'b000, ~NumerZeroE, XPreproc};
+  assign DivX = {3'b000, ~NumerZeroE, Xfract};
 
   // Sqrt is initialized on step one as R(X-1), so depends on Radix
-  mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
+  mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
   if (`RADIX == 2)  assign SqrtX = {3'b111, PreSqrtX};
   else              assign SqrtX = {2'b11, PreSqrtX, 1'b0};
   mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
@@ -181,7 +183,7 @@ module fdivsqrtpreproc (
   end
 
    // Divisior register
-  flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D);
+  flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D);
  
   // Floating-point exponent
   fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE));

From c2f441724b1e3b165d6798d823b6c92a18dba451 Mon Sep 17 00:00:00 2001
From: Liam <lchalk@hmc.edu>
Date: Fri, 21 Apr 2023 20:43:37 -0700
Subject: [PATCH 28/35] pmpcfg test cases

Increased IFU coverage from 83.37% to 83.53% and LSU coverage from 93.14% to 93.28%.
---
 tests/coverage/pmpcfg.S | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S
index 5b3e37b56..fd838041d 100644
--- a/tests/coverage/pmpcfg.S
+++ b/tests/coverage/pmpcfg.S
@@ -1,6 +1,6 @@
 // pmpcfg part 1
 // Kevin Wan, kewan@hmc.edu, 4/18/2023
-// Liam Chalk, lchalk@hmc.edu, 4/19/2023
+// Liam Chalk, lchalk@hmc.edu, 4/21/2023
 // locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. 
 // See the next part in pmpcfg1.S
 
@@ -19,32 +19,37 @@ main:
 
     li t0, 0x90000000
     csrw pmpaddr0, t0
-    li t0, 0x00000017
+    li t0, 0x00001700
+    csrw pmpcfg0, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr0, t0
+    li t0, 0x00001700
     csrw pmpcfg1, t0
 
     li t0, 0x90000000
     csrw pmpaddr0, t0
-    li t0, 0x00000017
+    li t0, 0x00001700
     csrw pmpcfg2, t0
 
     li t0, 0x90000000
     csrw pmpaddr0, t0
-    li t0, 0x00000017
+    li t0, 0x00001700
     csrw pmpcfg3, t0
 
     li t0, 0x90000000
     csrw pmpaddr1, t0
-    li t0, 0x00000017
+    li t0, 0x00001700
     csrw pmpcfg1, t0
 
     li t0, 0x90000000
-    csrw pmpaddr1, t0
-    li t0, 0x00000017
+    csrw pmpaddr2, t0
+    li t0, 0x00001700
     csrw pmpcfg2, t0
 
     li t0, 0x90000000
-    csrw pmpaddr1, t0
-    li t0, 0x00000017
+    csrw pmpaddr3, t0
+    li t0, 0x00001700
     csrw pmpcfg3, t0
 
     li t0, 0x8800000000000000

From 69cc0b8bf3ab3590ee98d5a7bbf897063e2899cc Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Sat, 22 Apr 2023 09:38:14 -0700
Subject: [PATCH 29/35] test plan update

---
 docs/testplans/testplan.md | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/docs/testplans/testplan.md b/docs/testplans/testplan.md
index a25b3a189..37390a632 100644
--- a/docs/testplans/testplan.md
+++ b/docs/testplans/testplan.md
@@ -1,6 +1,29 @@
-# CORE-V Wally Test Plan
+# CORE-V Wally Design Verification Test Plan
+
+CORE-V Wally is functionally tested in the following ways.  Each test is run in lock-step against ImperasDV to ensure all architectural state is correct after each instruction.
+
+| Functions      | Coverage Method | Status |
+| ----------- | ----------- |----|
+|  Instructions | riscv-arch-test | Pass   |
+| Privileged Unit   | wally-riscv-arch-test        | Pass   |
+| Virtual Memory | wally-riscv-arch-test | Pass |
+| PMP | wally-riscv-arch-test | Pass
+| Peripherals | wally-riscv-arch-test | Pass |
+| Floating-Point | TestFloat | Pass |
+| General | Code Coverage | 91% |
+| General | Boot Linux in Sim | Pass | 
+| General | Boot Linux on FPGA | Pass |
+
+
+The following performance validation is also run:
+| Function | Method | Status |
+| --- | --- | --- |
+| Overall Performance | embench | Pass|
+| Overall Performance | coremark | Pass |
+| Branch Predictor | *** | Pass |
+| Cache Miss Rate | *** | Pass |
+
 
-CORE-V Wally is tested in the following ways:
 
 * Run [RISC-V Architecture Compatibility Tests](https://github.com/riscv-non-isa/riscv-arch-test) in lock-step against the ImperasDV reference model.
 * Run custom tests to cover virtual memory, PMP, privileged unit, and peripherals in lock step against ImperasDV.

From 063e41806ec40ceb3eca410d08499f90153310c5 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Sat, 22 Apr 2023 10:07:48 -0700
Subject: [PATCH 30/35] Fixted syntax error in exclusion.  Arbitrarily picked
 -e 1; fix if this isn't right

---
 src/cache/cachefsm.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv
index 34f1778f5..544e3454e 100644
--- a/src/cache/cachefsm.sv
+++ b/src/cache/cachefsm.sv
@@ -159,7 +159,7 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) (
   assign SelFlush = (CurrState == STATE_READY & FlushCache) |
           (CurrState == STATE_FLUSH) | 
           (CurrState == STATE_FLUSH_WRITEBACK);
-  // coverage off -item e -fecexprrow 1
+  // coverage off -item e 1 -fecexprrow 1
   // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck)
   assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) |
              (CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty);

From 086556310cdd61748a9a0ef9533fdebde28ea82e Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Sat, 22 Apr 2023 12:22:45 -0700
Subject: [PATCH 31/35] Attempted to cause interrupt during fdivsqrt.  Fixed
 enabling fpu in fpu.S.  Fdivsqrt exclusions for coverage.

---
 sim/coverage-exclusions-rv64gc.do |  7 +++++--
 src/fpu/fdivsqrt/fdivsqrtfsm.sv   |  3 ++-
 tests/coverage/WALLY-init-lib.h   |  3 +++
 tests/coverage/fpu.S              | 24 ++++++++++++++++++++++--
 4 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do
index 4f90333a9..45d98a726 100644
--- a/sim/coverage-exclusions-rv64gc.do
+++ b/sim/coverage-exclusions-rv64gc.do
@@ -31,11 +31,14 @@
 do GetLineNum.do
 
 # LZA (i<64) statement confuses coverage tool 
-# This is ugly to exlcude the whole file - is there a better option?  // coverage off isn't working
+# DH 4/22/23: Exclude all LZAs
 coverage exclude -srcfile lzc.sv 
 
-# FDIVSQRT has 
+# DH 4/22/23: FDIVSQRT can't go directly from done to busy again
 coverage exclude -scope /dut/core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state DONE->BUSY
+# DH 4/22/23: The busy->idle transition only occurs if a FlushE occurs while the divider is busy.  The flush is caused by a trap or return,
+# which won't happen while the divider is busy. 
+coverage exclude -scope /dut/core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state BUSY->IDLE
 
 ### Exclude D$ states and logic for the I$ instance
 # This is cleaner than trying to set an I$-specific pragma in cachefsm.sv (which would exclude it for the D$ instance too)
diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
index 75010f74c..d1d9dda10 100644
--- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@@ -63,10 +63,11 @@ module fdivsqrtfsm(
   flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
 
   always_ff @(posedge clk) begin
+      // coverage off: dh 4/22/23 FlushE doesn't seem to happen while fdivsqrt is busy
       if (reset | FlushE) begin
+      // coverage on
           state <= #1 IDLE; 
       end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE
-//       end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE
           step <= CyclesE; 
           if (SpecialCaseE) state <= #1 DONE;
           else              state <= #1 BUSY;
diff --git a/tests/coverage/WALLY-init-lib.h b/tests/coverage/WALLY-init-lib.h
index 6b6dd6dd9..ec179a0dd 100644
--- a/tests/coverage/WALLY-init-lib.h
+++ b/tests/coverage/WALLY-init-lib.h
@@ -63,6 +63,9 @@ trap_handler:
     bgez t0, exception  # if msb is clear, it is an exception
 
 interrupt:              # must be a timer interrupt 
+    li t0, -1           # set mtimecmp to biggest number so it doesnt interrupt again
+    li t1, 0x02004000   # MTIMECMP in CLINT
+    sd t0, 0(t1)        
     j trap_return       # clean up and return
 
 exception:
diff --git a/tests/coverage/fpu.S b/tests/coverage/fpu.S
index b2a52be06..879980899 100644
--- a/tests/coverage/fpu.S
+++ b/tests/coverage/fpu.S
@@ -28,7 +28,7 @@
 
 main:
 
-    #bseti t0, zero, 14  # turn on FPU
+    bseti t0, zero, 14  # turn on FPU
     csrs mstatus, t0
 
     #Pull denormalized FP number from memory and pass it to fclass.S for coverage
@@ -105,6 +105,25 @@ main:
     # fcvt.w.q a0, ft0
     # fcvt.q.d ft3, ft0
 
+    // fdivsqrt: test busy->idle transition caused by a FlushE while divider is busy (when interrupt arrives)
+    // This code doesn't actually trigger a busy->idle transition because the pending timer interrupt doesn't occur until the division finishes.
+    li t0, 0x3F812345 # random value slightly bigger than 1
+    li t1, 0x3F823456
+    fmv.w.x ft0, t0  # move int to fp register
+    fmv.w.x ft1, t1
+    li t0, -1           # set mtimecmp to biggest number so it doesnt interrupt again
+    li t1, 0x02004000   # MTIMECMP in CLINT
+    sd t0, 0(t1)
+    csrsi mstatus, 0b1000   # enable interrupts with mstatus.MIE
+    li t1, 0x0200bff8   # read MTIME in CLINT
+    ld t0, 0(t1)        
+    addi t0, t0, 11
+    li t1, 0x02004000   # MTIMECMP in CLINT
+    sd t0, 0(t1)        # write mtime+10 to cause interrupt soon  This is very touchy timing and is sensitive to cache line fetch latency
+    nop
+    fdiv.s ft2, ft1, ft0 # should get interrupted, triggering a flush
+    csrci mstatus, 0b1000   # disable interrupts with mstatus.MIE
+
     # Completing branch coverage in fctrl.sv
     .word 0x38007553    // Testing the all False case for 119 - funct7 under, op = 101 0011
     .word 0x40000053    // Line 145 All False Test case - illegal instruction?
@@ -145,4 +164,5 @@ TestData2:
 .word 0x7f800000 #INF
 .int 0xbf800000 #FP -1.0
 .int 0x7fa00000 #SNaN
-.int 0x3fffffff #OverFlow Test
\ No newline at end of file
+.int 0x3fffffff #OverFlow Test
+DivTestData:

From 3b299fb77ad56a0b8e9c8561eb26a9b4f47faba9 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Sat, 22 Apr 2023 15:27:05 -0700
Subject: [PATCH 32/35] Removed unproven fdivsqrt exclusion

---
 src/fpu/fdivsqrt/fdivsqrtfsm.sv | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
index d1d9dda10..ba0758ee6 100644
--- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv
+++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv
@@ -63,9 +63,7 @@ module fdivsqrtfsm(
   flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
 
   always_ff @(posedge clk) begin
-      // coverage off: dh 4/22/23 FlushE doesn't seem to happen while fdivsqrt is busy
       if (reset | FlushE) begin
-      // coverage on
           state <= #1 IDLE; 
       end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE
           step <= CyclesE; 

From 52f49ed24d3237338050ed3c21aba64426a42dd6 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Sat, 22 Apr 2023 15:32:39 -0700
Subject: [PATCH 33/35] Fault on writes to odd-numbered PMPCFG in RV64

---
 src/privileged/csrm.sv | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/privileged/csrm.sv b/src/privileged/csrm.sv
index f0e5f00db..fb519be37 100644
--- a/src/privileged/csrm.sv
+++ b/src/privileged/csrm.sv
@@ -171,7 +171,8 @@ module csrm #(parameter
     IllegalCSRMAccessM = !(`S_SUPPORTED) & (CSRAdrM == MEDELEG | CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode
     if (CSRAdrM >= PMPADDR0 & CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry
       CSRMReadValM = {{(`XLEN-(`PA_BITS-2)){1'b0}}, PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0]};
-    else if (CSRAdrM >= PMPCFG0 & CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4) begin
+    else if (CSRAdrM >= PMPCFG0 & CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4 & (`XLEN==32 | CSRAdrM[0] == 0)) begin
+      // only odd-numbered PMPCFG entries exist in RV64
       if (`XLEN==64) begin
         entry = ({CSRAdrM[11:1], 1'b0} - PMPCFG0)*4; // disregard odd entries in RV64
         CSRMReadValM = {PMPCFG_ARRAY_REGW[entry+7],PMPCFG_ARRAY_REGW[entry+6],PMPCFG_ARRAY_REGW[entry+5],PMPCFG_ARRAY_REGW[entry+4],

From d29dc30288fbeedbaed4389b8a460b81949c56c1 Mon Sep 17 00:00:00 2001
From: Diego Herrera Vicioso <dherreravicioso@hmc.edu>
Date: Mon, 24 Apr 2023 02:06:53 -0700
Subject: [PATCH 34/35] Excluded coverage for impossible cases in wficountreg
 and status.MPRV

---
 sim/coverage-exclusions-rv64gc.do | 6 +++++-
 src/privileged/csrsr.sv           | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do
index 45d98a726..b4441e5b1 100644
--- a/sim/coverage-exclusions-rv64gc.do
+++ b/sim/coverage-exclusions-rv64gc.do
@@ -127,4 +127,8 @@ coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$lin
 set line [GetLineNum ../src/mmu/pmachecker.sv "WriteAccessM \\| ExecuteAccessF"]
 coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-5
 set line [GetLineNum ../src/mmu/pmachecker.sv "ReadAccessM \\| ExecuteAccessF"]
-coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-3
\ No newline at end of file
+coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-3
+
+# Excluding reset and clear for impossible case in the wficountreg in privdec
+set line [GetLineNum ../src/generic/flop/floprc.sv "reset \\| clear"]
+coverage exclude -scope /dut/core/priv/priv/pmd/wfi/wficountreg -linerange $line-$line -item c 1 -feccondrow 2
diff --git a/src/privileged/csrsr.sv b/src/privileged/csrsr.sv
index 60968a68b..61a6f3247 100644
--- a/src/privileged/csrsr.sv
+++ b/src/privileged/csrsr.sv
@@ -122,7 +122,10 @@ module csrsr (
     logic [1:0] EndiannessPrivMode;
     always_comb begin
       if      (SelHPTW)                                  EndiannessPrivMode = `S_MODE;
+      //coverage off -item c 1 -feccondrow 1
+      // status.MPRV always gets reset upon leaving machine mode, so MPRV will never be high when out of machine mode
       else if (PrivilegeModeW == `M_MODE & STATUS_MPRV)  EndiannessPrivMode = STATUS_MPP;
+      //coverage on
       else                                               EndiannessPrivMode = PrivilegeModeW;
 
       case (EndiannessPrivMode) 

From 7bf2ee54187dd13847794d6919f54b293da331c3 Mon Sep 17 00:00:00 2001
From: Liam <lchalk@hmc.edu>
Date: Tue, 25 Apr 2023 15:37:04 -0700
Subject: [PATCH 35/35] pmpaddr0 and pmpaddr2 test cases

Writing 0x00170000 and 0x17000000 to pmpaddr0 and pmpaddr2.
Increased IFU coverage from 83.53% to 83.68% and LSU coverage from 93.29% to 93.45%.
---
 tests/coverage/pmpcfg.S | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S
index fd838041d..bcc8f3950 100644
--- a/tests/coverage/pmpcfg.S
+++ b/tests/coverage/pmpcfg.S
@@ -1,6 +1,6 @@
 // pmpcfg part 1
 // Kevin Wan, kewan@hmc.edu, 4/18/2023
-// Liam Chalk, lchalk@hmc.edu, 4/21/2023
+// Liam Chalk, lchalk@hmc.edu, 4/25/2023
 // locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. 
 // See the next part in pmpcfg1.S
 
@@ -52,6 +52,26 @@ main:
     li t0, 0x00001700
     csrw pmpcfg3, t0
 
+    li t0, 0x90000000
+    csrw pmpaddr0, t0
+    li t0, 0x00170000
+    csrw pmpcfg0, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr2, t0
+    li t0, 0x00170000
+    csrw pmpcfg2, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr0, t0
+    li t0, 0x17000000
+    csrw pmpcfg0, t0
+
+    li t0, 0x90000000
+    csrw pmpaddr2, t0
+    li t0, 0x17000000
+    csrw pmpcfg2, t0
+
     li t0, 0x8800000000000000
     csrw pmpcfg2, t0
     li t0, 0x88000000000000