From 03f573351a5335866bd6dc79bd902aa5b16378a4 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Sun, 17 Jul 2022 16:40:58 +0000
Subject: [PATCH 01/36] Rewrote convert shift calculation with always for ease
 of reading

---
 pipelined/src/fpu/cvtshiftcalc.sv | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pipelined/src/fpu/cvtshiftcalc.sv b/pipelined/src/fpu/cvtshiftcalc.sv
index 4d346d970..aef894f90 100644
--- a/pipelined/src/fpu/cvtshiftcalc.sv
+++ b/pipelined/src/fpu/cvtshiftcalc.sv
@@ -60,10 +60,11 @@ module cvtshiftcalc(
     //          - otherwise:
     //              |     LzcInM      | 0's if nessisary | 
     // change to int shift to the left one
-    assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : 
-                     CvtResDenormUf ? {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}} : 
-                                   {CvtLzcIn, {`NF+1{1'b0}}};
-    
+
+    always_comb
+        if (ToInt)               CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}};
+        else if (CvtResDenormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}};
+        else                     CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}};
     
     // choose the negative of the fraction size
     if (`FPSIZES == 1) begin

From 5a38a1522558b64b2034a69fa1aa3c1e1e94ec8d Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Mon, 18 Jul 2022 13:30:35 -0700
Subject: [PATCH 02/36] added additional changes to coremark to support rv32

---
 benchmarks/coremark/riscv64-baremetal/core_portme.c | 11 ++++++++---
 benchmarks/coremark/riscv64-baremetal/core_portme.h |  6 ++++--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.c b/benchmarks/coremark/riscv64-baremetal/core_portme.c
index 57b7993ad..017bef74e 100755
--- a/benchmarks/coremark/riscv64-baremetal/core_portme.c
+++ b/benchmarks/coremark/riscv64-baremetal/core_portme.c
@@ -114,7 +114,12 @@ void portable_free(void *p) {
     #define read_csr(reg) ({ unsigned long __tmp; \
        asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
        __tmp; })
-    #define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8)
+	// #if (XLEN==64) 
+	// 	typedef unsigned long long ee_ptr_int;
+	// #else
+	// 	typedef unsigned long ee_ptr_int;
+	// #endif
+    #define GETMYTIME(_t) (_t = *(volatile ee_ptr_int*)0x0200BFF8)
 	#define MYTIMEDIFF(fin,ini) ((fin)-(ini))
 	// Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms)
 	#define TIMER_RES_DIVIDER 10000
@@ -196,8 +201,8 @@ void stop_time(void) {
 CORE_TICKS get_time(void) {
 	CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val));
 	unsigned long instructions = minstretDiff();
-	long long cm100 = 1000000000 / elapsed;  // coremark score * 100
-	long long cpi100 = elapsed*100/instructions; // CPI * 100
+	ee_ptr_int cm100 = 1000000000 / elapsed;  // coremark score * 100
+	ee_ptr_int cpi100 = elapsed*100/instructions; // CPI * 100
 	ee_printf("   WALLY CoreMark Results (from get_time)\n");
 	ee_printf("    Elapsed MTIME: %u\n", elapsed);
 	ee_printf("    Elapsed MINSTRET: %lu\n", instructions);
diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.h b/benchmarks/coremark/riscv64-baremetal/core_portme.h
index 33768b0f1..3146d3ec0 100755
--- a/benchmarks/coremark/riscv64-baremetal/core_portme.h
+++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h
@@ -69,14 +69,16 @@ typedef clock_t CORE_TICKS;
 // #elif (XLEN==32) 
 // 	#include <sys/types.h>
 // 	typedef ee_u32 CORE_TICKS;
-#else
 /* Configuration: size_t and clock_t
      Note these need to match the size of the clock output and the xLen the processor supports
  */
+#elif (XLEN==64) 
 typedef unsigned long int size_t;
 typedef unsigned long int clock_t;
-typedef clock_t CORE_TICKS;
+#else
+#include <sys/types.h>
 #endif
+typedef clock_t CORE_TICKS;
 
 /* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION
 	Initialize these strings per platform

From cce5fb8dfdb9e05039af2a7a39014c5e7a1e0383 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Mon, 18 Jul 2022 20:48:56 +0000
Subject: [PATCH 03/36] moved Ss to execute stage

---
 pipelined/regression/sim-wally-batch |  2 +-
 pipelined/src/fpu/fma.sv             |  9 ++++++-
 pipelined/src/fpu/fpu.sv             | 36 +++++-----------------------
 pipelined/src/fpu/postprocess.sv     |  3 ++-
 pipelined/src/fpu/roundsign.sv       | 12 ++--------
 pipelined/testbench/testbench-fp.sv  |  5 ++--
 6 files changed, 22 insertions(+), 45 deletions(-)

diff --git a/pipelined/regression/sim-wally-batch b/pipelined/regression/sim-wally-batch
index 7e821e584..8b5b5d628 100755
--- a/pipelined/regression/sim-wally-batch
+++ b/pipelined/regression/sim-wally-batch
@@ -1 +1 @@
-vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d"
+vsim -c -do "do wally-pipelined-batch.do rv32gc wally32periph"
diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv
index 44cd3616a..3cd128301 100644
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@@ -44,6 +44,7 @@ module fma(
     output logic                InvA,          // Was A inverted for effective subtraction (P-A or -P+A)
     output logic                As,       // the aligned addend's sign (modified Z sign for other opperations)
     output logic                Ps,          // the product's sign
+    output logic                Ss,          // the sum's sign
     output logic [$clog2(3*`NF+7)-1:0]          NCnt        // normalization shift count
     );
 
@@ -81,7 +82,7 @@ module fma(
     // // Addition/LZA
     // ///////////////////////////////////////////////////////////////////////////////
         
-    add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm);
+    add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Ss);
     
     loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
 endmodule
@@ -226,6 +227,7 @@ module add(
     output logic [2*`NF+1:0]    PmKilled,     // the product's mantissa possibly killed
     output logic                NegSum,        // was the sum negitive
     output logic                InvA,          // do you invert the aligned addend
+    output logic                Ss,          
     output logic [3*`NF+5:0]    Sm           // the positive sum
 );
     logic [3*`NF+6:0]    PreSum, NegPreSum; // possibly negitive sum
@@ -257,6 +259,11 @@ module add(
 
     // Choose the positive sum and accompanying LZA result.
     assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
+    // is the result negitive
+    //  if p - z is the Sum negitive
+    //  if -p + z is the Sum positive
+    //  if -p - z then the Sum is negitive
+    assign Ss = NegSum^Ps; //*** move to execute stage
 endmodule
 
 
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 65be29972..e7888551c 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -114,6 +114,7 @@ module fpu (
    logic 			    NegSumE, NegSumM;
    logic 			    ZSgnEffE, ZSgnEffM;
    logic 			    PSgnE, PSgnM;
+   logic 			    SsE, SsM;
    logic [$clog2(3*`NF+7)-1:0]			FmaNormCntE, FmaNormCntM;
 
    // Cvt Signals
@@ -255,36 +256,11 @@ module fpu (
             .Xm(XManE), .Ym(YManE), .Zm(ZManE), 
             .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), 
             .FOpCtrl(FOpCtrlE), .Fmt(FmtE), 
-            .As(ZSgnEffE), .Ps(PSgnE),
+            .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE),
             .Sm(SumE), .Pe(ProdExpE), 
             .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), 
             .ZmSticky(AddendStickyE), .KillProd(KillProdE)); 
 
-   // // fpdivsqrt using Goldschmidt's iteration
-   // if(`FLEN == 64) begin 
-   // flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E),
-   //       .clear(FDivSqrtDoneE), .en(load_preload),
-   //       .reset(reset),  .clk(clk));
-   // flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E),
-   //          .clear(FDivSqrtDoneE), .en(load_preload),
-   //          .reset(reset),  .clk(clk));
-   // end
-   // else if (`FLEN == 32) begin 
-   // flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E),
-   //       .clear(FDivSqrtDoneE), .en(load_preload),
-   //       .reset(reset),  .clk(clk));
-   // flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E),
-   //          .clear(FDivSqrtDoneE), .en(load_preload),
-   //          .reset(reset),  .clk(clk));
-   // end
-   // flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), 
-   //          .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}),
-   //          .clear(FDivSqrtDoneE), .en(load_preload),
-   //          .reset(reset),  .clk(clk));
-   // fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), 
-   //       .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1),
-   //       .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload,
-   //       .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM));
    divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, 
                   .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), 
                   .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
@@ -359,9 +335,9 @@ module fpu (
                {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
    flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
-   flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, 
-                           {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
-                           {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
+   flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM, 
+                           {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE},
+                           {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM});
    flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
                            {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
                            {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
@@ -381,7 +357,7 @@ module fpu (
    assign FpLoadStoreM = FResSelM[1];
 
    postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
-                           .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM),
+                           .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM),
                            .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
                            .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM),
                            .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index f9ccd2553..7e741abbb 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -52,6 +52,7 @@ module postprocess (
     input logic                             FmaKillProd,      // set the product to zero before addition if the product is too small to matter
     input logic                             FmaNegSum,    // was the sum negitive
     input logic                             FmaInvA,      // do you invert Z
+    input logic                             FmaSs,
     input logic  [$clog2(3*`NF+7)-1:0]      FmaNCnt,   // the normalization shift count
     //divide signals
     input logic  [`DURLEN-1:0]              DivEarlyTermShift,
@@ -197,7 +198,7 @@ module postprocess (
 
                           
     roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, 
-                          .Xs, .Ys, .CvtCs, .Ms);
+                        .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
 
     round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
                 .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt,  .CvtResUf,
diff --git a/pipelined/src/fpu/roundsign.sv b/pipelined/src/fpu/roundsign.sv
index 55e322bc3..acecb5947 100644
--- a/pipelined/src/fpu/roundsign.sv
+++ b/pipelined/src/fpu/roundsign.sv
@@ -38,23 +38,15 @@ module roundsign(
     input logic         DivOp,
     input logic         CvtOp,
     input logic         CvtCs,
+    input logic         FmaSs,
     output logic        Ms
 );
 
-    logic FmaResSgnTmp;
     logic Qs;
 
-    // is the result negitive
-    //  if p - z is the Sum negitive
-    //  if -p + z is the Sum positive
-    //  if -p - z then the Sum is negitive
-    assign FmaResSgnTmp = FmaNegSum^FmaPs; //*** move to execute stage
-
-    // assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs);
-
     assign Qs = Xs^Ys;
 
     // Sign for rounding calulation
-    assign Ms = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
+    assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index b90c3d3de..be5114e95 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -94,6 +94,7 @@ module testbenchfp;
 
   // in-between FMA signals
   logic                 Mult;
+  logic                 Ss;
   logic [`NE+1:0]	      Pe;
   logic 				        ZmSticky;
   logic 					      KillProd; 
@@ -674,13 +675,13 @@ module testbenchfp;
   fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), 
           .Xe(XExp), .Ye(YExp), .Ze(ZExp), 
           .Xm(XMan), .Ym(YMan), .Zm(ZMan),
-          .XZero, .YZero, .ZZero,
+          .XZero, .YZero, .ZZero, .Ss,
           .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps,
           .Pe, .ZmSticky, .KillProd); 
               
   postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]),
               .Ze(ZExp),  .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
-              .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky),
+              .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss),
               .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE),
               .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
               .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),

From 1e876733210f687163845bf6694a8b0fc59fe75c Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Mon, 18 Jul 2022 23:10:22 +0000
Subject: [PATCH 04/36] Cleaned up Coremark makefile

---
 benchmarks/coremark/Makefile | 33 ++++++---------------------------
 1 file changed, 6 insertions(+), 27 deletions(-)

diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile
index 2db418aa3..d6cdc529e 100644
--- a/benchmarks/coremark/Makefile
+++ b/benchmarks/coremark/Makefile
@@ -1,6 +1,8 @@
+# Wally Coremark Makefile
+# Daniel Torres & David Harris 28 July 2022
+
 PORT_DIR = $(CURDIR)/riscv64-baremetal
 cmbase=../../addins/coremark
-# cmbase= ../riscv-coremark/coremark
 work_dir= ../../benchmarks/coremark/work
 XLEN ?=64
 sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h  \
@@ -14,17 +16,7 @@ PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=$(ARCH) -static -falign-
 	-fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \
 	-funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \
 	-nostdlib -nostartfiles -ffreestanding -mstrict-align \
-	-DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DXLEN=$(XLEN)
-
-# flags that cause build errors mcmodel=medlow
-
-# -static -mcmodel=medlow -mtune=sifive-7-series \
-# -O3 -falign-functions=16 -funroll-all-loops -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET 
-# -finline-functions -falign-jumps=4 \
-# -nostdlib -nostartfiles -ffreestanding -mstrict-align \
-# -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \
-# -DPERFORMANCE_RUN=1
-# "-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " 
+	-DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DXLEN=$(XLEN) -DITERATIONS=10
 
 all: $(work_dir)/coremark.bare.riscv.elf.memfile
 
@@ -38,10 +30,8 @@ $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv
 	extractFunctionRadix.sh $<.elf.objdump
 
 $(work_dir)/coremark.bare.riscv: $(sources) Makefile
-    # These flags were used by WD on CoreMark
 	make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="$(PORT_CFLAGS)"
-    # -fno-toplevel-reorder --param=max-inline-insns-size=128 " # adding this bit caused a compiler error
-	mkdir -p $(work_dir)
+ 	mkdir -p $(work_dir)
 	mv $(cmbase)/coremark.bare.riscv $(work_dir)
 
 .PHONY: clean
@@ -49,15 +39,4 @@ $(work_dir)/coremark.bare.riscv: $(sources) Makefile
 clean:
 	rm -f $(work_dir)/*
 
-
-
-# # PORT_CFLAGS = -g -march=$(XLEN)im -mabi=$(ABI) -static -mcmodel=medlow -mtune=sifive-3-series \
-# #   -O3 -falign-functions=16 -funroll-all-loops \
-# # 	-finline-functions -falign-jumps=4 \
-# #   -nostdlib -nostartfiles -ffreestanding -mstrict-align \
-# # 	-DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \
-# # 	-DPERFORMANCE_RUN=1
-
-# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " 
-# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta"
-# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " 
\ No newline at end of file
+ 
\ No newline at end of file

From 630110e73ed0cb72e303eb3a9eedad62aa5f1d8d Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Mon, 18 Jul 2022 16:48:13 -0700
Subject: [PATCH 05/36] Coremark cleanup

---
 benchmarks/coremark/Makefile                        | 6 +++---
 benchmarks/coremark/riscv64-baremetal/core_portme.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile
index d6cdc529e..3f94a7b8b 100644
--- a/benchmarks/coremark/Makefile
+++ b/benchmarks/coremark/Makefile
@@ -16,7 +16,7 @@ PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=$(ARCH) -static -falign-
 	-fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \
 	-funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \
 	-nostdlib -nostartfiles -ffreestanding -mstrict-align \
-	-DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DXLEN=$(XLEN) -DITERATIONS=10
+	-DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=10 -DXLEN=$(XLEN) 
 
 all: $(work_dir)/coremark.bare.riscv.elf.memfile
 
@@ -31,7 +31,7 @@ $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv
 
 $(work_dir)/coremark.bare.riscv: $(sources) Makefile
 	make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="$(PORT_CFLAGS)"
- 	mkdir -p $(work_dir)
+	mkdir -p $(work_dir)
 	mv $(cmbase)/coremark.bare.riscv $(work_dir)
 
 .PHONY: clean
@@ -39,4 +39,4 @@ $(work_dir)/coremark.bare.riscv: $(sources) Makefile
 clean:
 	rm -f $(work_dir)/*
 
- 
\ No newline at end of file
+
diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.h b/benchmarks/coremark/riscv64-baremetal/core_portme.h
index 3146d3ec0..ce88f7239 100755
--- a/benchmarks/coremark/riscv64-baremetal/core_portme.h
+++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h
@@ -94,7 +94,7 @@ typedef clock_t CORE_TICKS;
  #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */
 #endif
 #ifndef MEM_LOCATION
- #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)"
+ #define MEM_LOCATION "Code and Data in external RAM"
  #define MEM_LOCATION_UNSPEC 1
 #endif
 

From 64b3e4117b3aa0b7d20e45ef713214c86eea0385 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Tue, 19 Jul 2022 00:04:24 +0000
Subject: [PATCH 06/36] reworked fmashiftcalc to match book

---
 pipelined/src/fpu/fmashiftcalc.sv | 69 +++++++++++++++++--------------
 1 file changed, 38 insertions(+), 31 deletions(-)

diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index a6c1a1c60..6d1c40bc7 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -43,6 +43,7 @@ module fmashiftcalc(
 );
     logic [$clog2(3*`NF+7)-1:0] DenormShift;        // right shift if the result is denormalized //***change this later
     logic [`NE+1:0]             NormSumExp;       // the exponent of the normalized sum with the `FLEN bias
+    logic [`NE+1:0] BiasCorr;
 
     ///////////////////////////////////////////////////////////////////////////////
     // Normalization
@@ -50,37 +51,40 @@ module fmashiftcalc(
     //*** insert bias-bias simplification in fcvt.sv/phone pictures
     // Determine if the sum is zero
     assign FmaSZero = ~(|FmaSm);
-
+    logic [`NE+1:0] FmaSe;
+    assign FmaSe = FmaKillProd ? {2'b0, Ze} : FmaPe;
     // calculate the sum's exponent
-    //                                                                      ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4
-    assign NormSumExp = (FmaKillProd ? {2'b0, Ze} : FmaPe) + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
+    assign NormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
 
     //convert the sum's exponent into the proper percision
     if (`FPSIZES == 1) begin
         assign FmaNe = NormSumExp;
 
     end else if (`FPSIZES == 2) begin
-        assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
-
+        assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS);
+        assign FmaNe = NormSumExp+BiasCorr;
+        
     end else if (`FPSIZES == 3) begin
         always_comb begin
             case (Fmt)
-                `FMT: FmaNe = NormSumExp;
-                `FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
-                `FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
-                default: FmaNe = {`NE+2{1'bx}};
+                `FMT: BiasCorr =  '0;
+                `FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS);
+                `FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS);
+                default: BiasCorr = 'x;
             endcase
         end
+        assign FmaNe = NormSumExp+BiasCorr;
 
     end else if (`FPSIZES == 4) begin
         always_comb begin
             case (Fmt)
-                2'h3: FmaNe = NormSumExp;
-                2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
-                2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
-                2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
+                2'h3: BiasCorr = '0;
+                2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS);
+                2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS);
+                2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS);
             endcase
         end
+        assign FmaNe = NormSumExp+BiasCorr;
 
     end
     
@@ -89,25 +93,25 @@ module fmashiftcalc(
     if (`FPSIZES == 1) begin
         logic Sum0LEZ, Sum0GEFL;
         assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
-        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2));
         assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
 
     end else if (`FPSIZES == 2) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
         assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
-        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
-        assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
-        assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2));
+        assign Sum1LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
+        assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp;
         assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
 
     end else if (`FPSIZES == 3) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
         assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
-        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
-        assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
-        assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
-        assign Sum2LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
-        assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2));
+        assign Sum1LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
+        assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp;
+        assign Sum2LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2));
+        assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|NormSumExp;
         always_comb begin
             case (Fmt)
                 `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
@@ -120,20 +124,20 @@ module fmashiftcalc(
     end else if (`FPSIZES == 4) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
         assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
-        assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF  )-(`NE+2)'(2));
-        assign Sum1LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
-        assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp;
-        assign Sum2LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
-        assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp;
-        assign Sum3LEZ  = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
-        assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
+        assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2));
+        assign Sum1LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS));
+        assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|NormSumExp;
+        assign Sum2LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS));
+        assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|NormSumExp;
+        assign Sum3LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS));
+        assign Sum3GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|NormSumExp;
         always_comb begin
             case (Fmt)
                 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
                 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
                 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
                 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero;
-            endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking
+            endcase
         end
 
     end
@@ -152,5 +156,8 @@ module fmashiftcalc(
     // set and calculate the shift input and amount
     //  - shift once if killing a product and the result is denormalized
     assign FmaShiftIn = {3'b0, FmaSm};
-    assign FmaShiftAmt = FmaNCnt+DenormShift;
+    if (`FPSIZES == 1)
+        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaNCnt+1;
+    else
+        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaNCnt+1;
 endmodule

From 514674417ed91fb5b0c94c1750d156b3b1d626b6 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Tue, 19 Jul 2022 01:10:10 +0000
Subject: [PATCH 07/36] moved Se into execute stage

---
 pipelined/src/fpu/fma.sv             |  7 +++-
 pipelined/src/fpu/fmashiftcalc.sv    | 61 ++++++++++++++--------------
 pipelined/src/fpu/fpu.sv             | 11 ++---
 pipelined/src/fpu/postprocess.sv     | 15 +++----
 pipelined/src/fpu/resultsign.sv      |  4 +-
 pipelined/src/fpu/round.sv           |  6 +--
 pipelined/src/fpu/shiftcorrection.sv |  6 +--
 pipelined/testbench/testbench-fp.sv  |  5 ++-
 8 files changed, 61 insertions(+), 54 deletions(-)

diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv
index 3cd128301..3f4cc2ac5 100644
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@@ -45,6 +45,7 @@ module fma(
     output logic                As,       // the aligned addend's sign (modified Z sign for other opperations)
     output logic                Ps,          // the product's sign
     output logic                Ss,          // the sum's sign
+    output logic [`NE+1:0]      Se,
     output logic [$clog2(3*`NF+7)-1:0]          NCnt        // normalization shift count
     );
 
@@ -82,7 +83,7 @@ module fma(
     // // Addition/LZA
     // ///////////////////////////////////////////////////////////////////////////////
         
-    add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Ss);
+    add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss);
     
     loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
 endmodule
@@ -223,11 +224,14 @@ module add(
     input logic                 Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
     input logic                 KillProd,      // should the product be set to 0
     input logic                 ZmSticky,
+    input logic  [`NE-1:0]      Ze,
+    input logic  [`NE+1:0]      Pe,
     output logic [3*`NF+6:0]    AmInv,  // aligned addend possibly inverted
     output logic [2*`NF+1:0]    PmKilled,     // the product's mantissa possibly killed
     output logic                NegSum,        // was the sum negitive
     output logic                InvA,          // do you invert the aligned addend
     output logic                Ss,          
+    output logic [`NE+1:0]      Se,
     output logic [3*`NF+5:0]    Sm           // the positive sum
 );
     logic [3*`NF+6:0]    PreSum, NegPreSum; // possibly negitive sum
@@ -264,6 +268,7 @@ module add(
     //  if -p + z is the Sum positive
     //  if -p - z then the Sum is negitive
     assign Ss = NegSum^Ps; //*** move to execute stage
+    assign Se = KillProd ? {2'b0, Ze} : Pe;
 endmodule
 
 
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index 6d1c40bc7..79953b212 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -35,14 +35,15 @@ module fmashiftcalc(
     input logic  [$clog2(3*`NF+7)-1:0]  FmaNCnt,   // normalization shift count
     input logic  [`FMTBITS-1:0]         Fmt,       // precision 1 = double 0 = single
     input logic                         FmaKillProd,  // is the product set to zero
-    output logic [`NE+1:0]              FmaNe,          // exponent of the normalized sum not taking into account denormal or zero results
+    input logic [`NE+1:0] FmaSe,
+    output logic [`NE+1:0]              NormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
     output logic                        FmaSZero,    // is the result denormalized - calculated before LZA corection
     output logic                        FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
     output logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt,   // normalization shift count
     output logic [3*`NF+8:0]            FmaShiftIn        // is the sum zero
 );
     logic [$clog2(3*`NF+7)-1:0] DenormShift;        // right shift if the result is denormalized //***change this later
-    logic [`NE+1:0]             NormSumExp;       // the exponent of the normalized sum with the `FLEN bias
+    logic [`NE+1:0]             PreNormSumExp;       // the exponent of the normalized sum with the `FLEN bias
     logic [`NE+1:0] BiasCorr;
 
     ///////////////////////////////////////////////////////////////////////////////
@@ -51,18 +52,16 @@ module fmashiftcalc(
     //*** insert bias-bias simplification in fcvt.sv/phone pictures
     // Determine if the sum is zero
     assign FmaSZero = ~(|FmaSm);
-    logic [`NE+1:0] FmaSe;
-    assign FmaSe = FmaKillProd ? {2'b0, Ze} : FmaPe;
     // calculate the sum's exponent
-    assign NormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
+    assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
 
     //convert the sum's exponent into the proper percision
     if (`FPSIZES == 1) begin
-        assign FmaNe = NormSumExp;
+        assign NormSumExp = PreNormSumExp;
 
     end else if (`FPSIZES == 2) begin
         assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS);
-        assign FmaNe = NormSumExp+BiasCorr;
+        assign NormSumExp = PreNormSumExp+BiasCorr;
         
     end else if (`FPSIZES == 3) begin
         always_comb begin
@@ -73,7 +72,7 @@ module fmashiftcalc(
                 default: BiasCorr = 'x;
             endcase
         end
-        assign FmaNe = NormSumExp+BiasCorr;
+        assign NormSumExp = PreNormSumExp+BiasCorr;
 
     end else if (`FPSIZES == 4) begin
         always_comb begin
@@ -84,7 +83,7 @@ module fmashiftcalc(
                 2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS);
             endcase
         end
-        assign FmaNe = NormSumExp+BiasCorr;
+        assign NormSumExp = PreNormSumExp+BiasCorr;
 
     end
     
@@ -92,26 +91,26 @@ module fmashiftcalc(
     
     if (`FPSIZES == 1) begin
         logic Sum0LEZ, Sum0GEFL;
-        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
-        assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2));
+        assign Sum0LEZ  = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
+        assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
         assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
 
     end else if (`FPSIZES == 2) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
-        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
-        assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2));
-        assign Sum1LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
-        assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp;
+        assign Sum0LEZ  = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
+        assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
+        assign Sum1LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
+        assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
         assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
 
     end else if (`FPSIZES == 3) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
-        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
-        assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2));
-        assign Sum1LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
-        assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp;
-        assign Sum2LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2));
-        assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|NormSumExp;
+        assign Sum0LEZ  = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
+        assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
+        assign Sum1LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
+        assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
+        assign Sum2LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2));
+        assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp;
         always_comb begin
             case (Fmt)
                 `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
@@ -123,14 +122,14 @@ module fmashiftcalc(
 
     end else if (`FPSIZES == 4) begin
         logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
-        assign Sum0LEZ  = NormSumExp[`NE+1] | ~|NormSumExp;
-        assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2));
-        assign Sum1LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS));
-        assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|NormSumExp;
-        assign Sum2LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS));
-        assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|NormSumExp;
-        assign Sum3LEZ  = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS));
-        assign Sum3GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|NormSumExp;
+        assign Sum0LEZ  = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
+        assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
+        assign Sum1LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS));
+        assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp;
+        assign Sum2LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS));
+        assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp;
+        assign Sum3LEZ  = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS));
+        assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp;
         always_comb begin
             case (Fmt)
                 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
@@ -148,11 +147,11 @@ module fmashiftcalc(
     //      - if kill prod dont add to exp
 
     // Determine if the result is denormal
-    // assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero;
+    // assign FmaPreResultDenorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero;
 
     // Determine the shift needed for denormal results
     //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1;
+    assign DenormShift = FmaPreResultDenorm ? NormSumExp[$clog2(3*`NF+7)-1:0] : 1;
     // set and calculate the shift input and amount
     //  - shift once if killing a product and the result is denormalized
     assign FmaShiftIn = {3'b0, FmaSm};
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index e7888551c..a9c0ac247 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -109,6 +109,7 @@ module fpu (
    logic [3*`NF+5:0]	SumE, SumM;                       
    logic [`NE+1:0]	    ProdExpE, ProdExpM;
    logic 			    AddendStickyE, AddendStickyM;
+   logic [`NE+1:0]      SeE,SeM;
    logic 			    KillProdE, KillProdM;
    logic 			    InvAE, InvAM;
    logic 			    NegSumE, NegSumM;
@@ -256,7 +257,7 @@ module fpu (
             .Xm(XManE), .Ym(YManE), .Zm(ZManE), 
             .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), 
             .FOpCtrl(FOpCtrlE), .Fmt(FmtE), 
-            .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE),
+            .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Se(SeE),
             .Sm(SumE), .Pe(ProdExpE), 
             .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), 
             .ZmSticky(AddendStickyE), .KillProd(KillProdE)); 
@@ -335,9 +336,9 @@ module fpu (
                {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
    flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
-   flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM, 
-                           {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE},
-                           {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM});
+   flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, 
+                           {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE, SeE},
+                           {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM, SeM});
    flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
                            {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
                            {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
@@ -359,7 +360,7 @@ module fpu (
    postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
                            .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM),
                            .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
-                           .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM),
+                           .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .FmaSe(SeM),
                            .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
                            .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
 
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index 7e741abbb..de3c4f30c 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -46,6 +46,7 @@ module postprocess (
     //fma signals
     input logic                             FmaAs,   // the modified Z sign - depends on instruction
     input logic                             FmaPs,      // the product's sign
+    input logic  [`NE+1:0]                  FmaSe,
     input logic  [`NE+1:0]                  FmaPe,       // Product exponent
     input logic  [3*`NF+5:0]                FmaSm,       // the positive sum
     input logic                             FmaZmS,  // sticky bit that is calculated during alignment
@@ -93,10 +94,10 @@ module postprocess (
     logic UfL;
     logic [`FMTBITS-1:0] OutFmt;
     // fma signals
-    logic [`NE+1:0] FmaSe;     // exponent of the normalized sum
+    logic [`NE+1:0] FmaMe;     // exponent of the normalized sum
     logic FmaSZero;        // is the sum zero
     logic [3*`NF+8:0] FmaShiftIn;        // shift input
-    logic [`NE+1:0] FmaNe;          // exponent of the normalized sum not taking into account denormal or zero results
+    logic [`NE+1:0] NormSumExp;          // exponent of the normalized sum not taking into account denormal or zero results
     logic FmaPreResultDenorm;    // is the result denormalized - calculated before LZA corection
     logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt;   // normalization shift count
     // division singals
@@ -151,7 +152,7 @@ module postprocess (
 
     cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,  
                               .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
-    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe,
+    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
                           .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
     divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
 
@@ -182,9 +183,9 @@ module postprocess (
     
     normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
 
-    shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe,
+    shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .NormSumExp,
                                 .DivResDenorm, .DivDenormShift, .DivOp, .DivQe,
-                                .Qe, .FmaSZero, .Shifted, .FmaSe, .Mf);
+                                .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);
 
     ///////////////////////////////////////////////////////////////////////////////
     // Rounding
@@ -201,7 +202,7 @@ module postprocess (
                         .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
 
     round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe,
-                .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt,  .CvtResUf,
+                .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt,  .CvtResUf,
                 .DivS, .DivDone,
                 .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me);
 
@@ -209,7 +210,7 @@ module postprocess (
     // Sign calculation
     ///////////////////////////////////////////////////////////////////////////////
 
-    resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S,
+    resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaMe, .R, .S,
                           .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws);
 
     ///////////////////////////////////////////////////////////////////////////////
diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv
index e1ea5e410..b8019b98b 100644
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@@ -34,7 +34,7 @@ module resultsign(
     input logic         ZInf,
     input logic         InfIn,
     input logic         FmaOp,
-    input logic [`NE+1:0] FmaSe,
+    input logic [`NE+1:0] FmaMe,
     input logic         FmaSZero,
     input logic         Mult,
     input logic         R,
@@ -50,7 +50,7 @@ module resultsign(
     //      if cancelation then 0 unless round to -infinity
     //      if multiply then Psgn
     //      otherwise psign
-    assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
+    assign Zeros = (FmaPs^FmaAs)&~(FmaMe[`NE+1] | ((FmaMe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
 
 
     // is the result negitive
diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv
index 6132dba4a..0943413bd 100644
--- a/pipelined/src/fpu/round.sv
+++ b/pipelined/src/fpu/round.sv
@@ -48,7 +48,7 @@ module round(
     input logic                     CvtResUf,
     input logic  [`CORRSHIFTSZ-1:0] Mf,
     input logic                     FmaZmS,  // addend's sticky bit
-    input logic  [`NE+1:0]          FmaSe,         // exponent of the normalized sum
+    input logic  [`NE+1:0]          FmaMe,         // exponent of the normalized sum
     input logic                     Ms,      // the result's sign
     input logic  [`NE:0]            CvtCe,    // the calculated expoent
     input logic  [`NE+1:0]          Qe,    // the calculated expoent
@@ -176,7 +176,7 @@ module round(
 
     // only add the Addend sticky if doing an FMA opperation
     //      - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
-    assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp;
+    assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp;
     
     // determine round and LSB of the rounded value
     //      - underflow round bit is used to determint the underflow flag
@@ -299,7 +299,7 @@ module round(
     
     always_comb
         case(PostProcSel)
-            2'b10: Me = FmaSe; // fma
+            2'b10: Me = FmaMe; // fma
             2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt
             2'b01: Me = DivDone ? Qe : '0; // divide
             default: Me = '0; 
diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv
index 71a2393a6..50cffb078 100644
--- a/pipelined/src/fpu/shiftcorrection.sv
+++ b/pipelined/src/fpu/shiftcorrection.sv
@@ -35,12 +35,12 @@ module shiftcorrection(
     input logic                     DivResDenorm,
     input logic  [`NE+1:0]          DivQe,
     input logic  [`NE+1:0]          DivDenormShift,
-    input logic  [`NE+1:0]          FmaNe,          // exponent of the normalized sum not taking into account denormal or zero results
+    input logic  [`NE+1:0]          NormSumExp,          // exponent of the normalized sum not taking into account denormal or zero results
     input logic                     FmaPreResultDenorm,    // is the result denormalized - calculated before LZA corection
     input logic                     FmaSZero,
     output logic [`CORRSHIFTSZ-1:0] Mf,         // the shifted sum before LZA correction
     output logic [`NE+1:0]          Qe,
-    output logic [`NE+1:0]          FmaSe         // exponent of the normalized sum
+    output logic [`NE+1:0]          FmaMe         // exponent of the normalized sum
 );
     logic [3*`NF+5:0]      CorrSumShifted;     // the shifted sum after LZA correction
     logic [`CORRSHIFTSZ-1:0] CorrQuotShifted;
@@ -58,7 +58,7 @@ module shiftcorrection(
     assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
+    assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &NormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
     // recalculate if the result is denormalized
     assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
 
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index be5114e95..91ce82616 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -96,6 +96,7 @@ module testbenchfp;
   logic                 Mult;
   logic                 Ss;
   logic [`NE+1:0]	      Pe;
+  logic [`NE+1:0]	      Se;
   logic 				        ZmSticky;
   logic 					      KillProd; 
   logic [$clog2(3*`NF+7)-1:0]	NCnt;
@@ -675,7 +676,7 @@ module testbenchfp;
   fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), 
           .Xe(XExp), .Ye(YExp), .Ze(ZExp), 
           .Xm(XMan), .Ym(YMan), .Zm(ZMan),
-          .XZero, .YZero, .ZZero, .Ss,
+          .XZero, .YZero, .ZZero, .Ss, .Se,
           .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps,
           .Pe, .ZmSticky, .KillProd); 
               
@@ -686,7 +687,7 @@ module testbenchfp;
               .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
               .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
               .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
-              .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone,
+              .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSe(Se),
               .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
               .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
   

From b13b49658bb6906378910ab7ca95beebee54d8b1 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Tue, 19 Jul 2022 02:58:07 +0000
Subject: [PATCH 08/36] Removed duplicate -march from CoreMark makefile

---
 benchmarks/coremark/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile
index 3f94a7b8b..1b619d80e 100644
--- a/benchmarks/coremark/Makefile
+++ b/benchmarks/coremark/Makefile
@@ -10,8 +10,8 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h  \
 	$(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \
 	$(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c
 ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32)
-ARCH := rv$(XLEN)im
-PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \
+ARCH := rv$(XLEN)i
+PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \
 	-mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -funroll-all-loops -finline-functions -falign-jumps=4 \
 	-fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \
 	-funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \

From 37bf837d48deb9cef6839d13d6405cfb128ea5c7 Mon Sep 17 00:00:00 2001
From: slmnemo <nicholas.lucioforlife@yahoo.com>
Date: Tue, 19 Jul 2022 08:59:16 -0700
Subject: [PATCH 09/36] fixed GPIO test by adding a new function to clear PLIC
 interrupts

---
 .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 39 +++++++++++++++++++
 .../rv32i_m/privilege/src/WALLY-gpio-01.S     |  4 ++
 2 files changed, 43 insertions(+)

diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h
index 0caad5d0b..7e112c917 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h
@@ -973,6 +973,45 @@ readsip_test:  // read the MIP into the signature
     addi a6, a6, 4
     j test_loop // go to next test case
 
+claim_m_plic_interrupts: // clears one non-pending PLIC interrupt
+    li t2, 0x0C002000
+    li t3, 0x0C200004
+    li t4, 0xFFF
+    lw t6, 0(t2) // save current enable status
+    sw t4, 0(t2) // enable all relevant interrupts on PLIC
+    lw t5, 0(t3) // make PLIC claim
+    sw t5, 0(t3) // complete claim made
+    sw t6, 0(t2) // restore saved enable status
+    j test_loop
+
+claim_s_plic_interrupts: // clears one non-pending PLIC interrupt
+    li t2, 0x0C002080
+    li t3, 0x0C201004
+    li t4, 0xFFF
+    lw t6, 0(t2) // save current enable status
+    sw t4, 0(t2) // enable all relevant interrupts on PLIC
+    lw t5, 0(t3) // make PLIC claim
+    sw t5, 0(t3) // complete claim made
+    sw t6, 0(t2) // restore saved enable status
+    j test_loop
+
+uart_data_wait:
+    li t2, 0x10000005 // LSR
+    li t3, 0x10000002 // IIR
+    lb t4, 0(t3) // save IIR before potential clear
+    lb t5, 0(t2)
+    andi t5, t5, 1  // only care if data is ready
+    li t6, 1
+    beq t5, t6, uart_data_ready
+    j uart_data_wait
+
+uart_data_ready:
+    sb t4, 0(t1)
+    sb t5, 1(t1)
+    addi t1, t1, 4
+    addi a6, a6, 4
+    j test_loop
+
 goto_s_mode:
     // return to address in t3, 
     li a0, 3 // Trap handler behavior (go to supervisor mode)
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S
index 1b3bbdb47..8fb801feb 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S
@@ -140,18 +140,22 @@ SETUP_PLIC
 .4byte high_ie, 0x00020000, write32_test            # enable high interrupt on bit 17, which is pending
 .4byte 0x0, 0x00000800, readmip_test                # MEIP should be raised
 .4byte high_ie, 0x00000000, write32_test            # disable high interrupt on bit 17
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear PLIC pending interrupts
 .4byte 0x0, 0x00000000, readmip_test                # MEIP should be released
 .4byte low_ie, 0x00010000, write32_test             # enable low interrupt on bit 16, which is pending
 .4byte 0x0, 0x00000800, readmip_test                # MEIP should be raised
 .4byte low_ie, 0x00000000, write32_test             # disable low interrupt on bit 16
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear PLIC pending interrupts
 .4byte 0x0, 0x00000000, readmip_test                # MEIP should be released
 .4byte rise_ie, 0x00200000, write32_test            # enable rise interrupt on bit 21, which is pending
 .4byte 0x0, 0x00000800, readmip_test                # MEIP should be raised
 .4byte rise_ie, 0x00000000, write32_test            # disable rise interrupt on bit 21, which is pending
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear PLIC pending interrupts
 .4byte 0x0, 0x00000000, readmip_test                # MEIP should be released
 .4byte fall_ie, 0x01000000, write32_test            # enable high interrupt on bit 24, which is pending
 .4byte 0x0, 0x00000800, readmip_test                # MEIP should be raised
 .4byte fall_ie, 0x00000000, write32_test            # disable high interrupt on bit 24, which is pending
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear PLIC pending interrupts
 .4byte 0x0, 0x00000000, readmip_test                # MEIP should be released
 
 .4byte 0x0, 0x0, terminate_test # terminate tests

From 0668659ac9dedc3990e3cfc37db9365d7b59e16c Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Tue, 19 Jul 2022 13:17:02 -0700
Subject: [PATCH 10/36] made changes to makefile, now builds fastest version
 (RV64im) by default. Also removed redundent CFLAG funroll-all-loops (was
 duplicated)

---
 benchmarks/coremark/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile
index 1b619d80e..e41865986 100644
--- a/benchmarks/coremark/Makefile
+++ b/benchmarks/coremark/Makefile
@@ -10,9 +10,9 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h  \
 	$(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \
 	$(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c
 ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32)
-ARCH := rv$(XLEN)i
+ARCH := rv$(XLEN)im
 PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \
-	-mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -funroll-all-loops -finline-functions -falign-jumps=4 \
+	-mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -finline-functions -falign-jumps=4 \
 	-fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \
 	-funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \
 	-nostdlib -nostartfiles -ffreestanding -mstrict-align \

From 5b1adc7a675f2ddaa85e57c64a19ce3df96b4e03 Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Tue, 19 Jul 2022 13:36:18 -0700
Subject: [PATCH 11/36] commented out embench 2.0 tests

---
 pipelined/testbench/tests.vh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index 4b1b9a160..8a2765701 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -49,24 +49,24 @@ string tvpaths[] = '{
     `EMBENCH,
     "bd_speedopt_speed/src/aha-mont64/aha-mont64",
     "bd_speedopt_speed/src/crc32/crc32",
-    "bd_speedopt_speed/src/cubic/cubic",
+    "bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches
     "bd_speedopt_speed/src/edn/edn",
     "bd_speedopt_speed/src/huffbench/huffbench",
     "bd_speedopt_speed/src/matmult-int/matmult-int",
-    "bd_speedopt_speed/src/md5sum/md5sum",
+    // "bd_speedopt_speed/src/md5sum/md5sum", //commenting out tests from embench 2.0. When embench 2.0 launches stabilty, add these tests back
     "bd_speedopt_speed/src/minver/minver",
     "bd_speedopt_speed/src/nbody/nbody",
     "bd_speedopt_speed/src/nettle-aes/nettle-aes",
     "bd_speedopt_speed/src/nettle-sha256/nettle-sha256",
     "bd_speedopt_speed/src/nsichneu/nsichneu",
     "bd_speedopt_speed/src/picojpeg/picojpeg",
-    "bd_speedopt_speed/src/primecount/primecount",
+    // "bd_speedopt_speed/src/primecount/primecount",
     "bd_speedopt_speed/src/qrduino/qrduino",
     "bd_speedopt_speed/src/sglib-combined/sglib-combined",
     "bd_speedopt_speed/src/slre/slre",
     "bd_speedopt_speed/src/st/st",
     "bd_speedopt_speed/src/statemate/statemate",
-    "bd_speedopt_speed/src/tarfind/tarfind",
+    // "bd_speedopt_speed/src/tarfind/tarfind",
     "bd_speedopt_speed/src/ud/ud",
     "bd_speedopt_speed/src/wikisort/wikisort",
     "bd_sizeopt_speed/src/aha-mont64/aha-mont64",
@@ -75,7 +75,7 @@ string tvpaths[] = '{
     "bd_sizeopt_speed/src/edn/edn",
     "bd_sizeopt_speed/src/huffbench/huffbench",
     "bd_sizeopt_speed/src/matmult-int/matmult-int",
-    "bd_sizeopt_speed/src/md5sum/md5sum",
+    // "bd_sizeopt_speed/src/md5sum/md5sum",
     "bd_sizeopt_speed/src/minver/minver",
     "bd_sizeopt_speed/src/nbody/nbody",
     "bd_sizeopt_speed/src/nettle-aes/nettle-aes",
@@ -88,7 +88,7 @@ string tvpaths[] = '{
     "bd_sizeopt_speed/src/slre/slre",
     "bd_sizeopt_speed/src/st/st",
     "bd_sizeopt_speed/src/statemate/statemate",
-    "bd_sizeopt_speed/src/tarfind/tarfind",
+    // "bd_sizeopt_speed/src/tarfind/tarfind",
     "bd_sizeopt_speed/src/ud/ud",
     "bd_sizeopt_speed/src/wikisort/wikisort"
   };

From 20800b2714d9d95daa4c8b1f81ec5a29767fa0cc Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Tue, 19 Jul 2022 15:16:12 -0700
Subject: [PATCH 12/36] embench no longer launches run automatiacally, need to
 use make run

---
 benchmarks/embench/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile
index 255ea8be5..630864ded 100644
--- a/benchmarks/embench/Makefile
+++ b/benchmarks/embench/Makefile
@@ -4,7 +4,8 @@
 
 embench_dir = ../../addins/embench-iot
 
-all: build sim size
+all: build 
+run: sim size
 
 allClean: clean all
 

From d61f84e7510aa98ad3aae1af8d45994d532ad74f Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Tue, 19 Jul 2022 23:44:37 +0000
Subject: [PATCH 13/36] oprimized zeros and replaced complex ?: with
 always_comb

---
 pipelined/src/fpu/fcvt.sv            | 21 ++++++---
 pipelined/src/fpu/fmashiftcalc.sv    |  4 --
 pipelined/src/fpu/negateintres.sv    |  9 +++-
 pipelined/src/fpu/resultsign.sv      | 25 ++++++++---
 pipelined/src/fpu/shiftcorrection.sv |  5 ++-
 pipelined/src/fpu/specialcase.sv     | 65 ++++++++++++++++++----------
 6 files changed, 85 insertions(+), 44 deletions(-)

diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv
index b9932523a..9d7f2d62d 100644
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@@ -103,7 +103,7 @@ module fcvt (
     // choose the input to the leading zero counter i.e. priority encoder
     //             int -> fp : | positive integer | 00000... (if needed) | 
     //             fp  -> fp : | fraction         | 00000... (if needed) | 
-    assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} :
+    assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} :
                              {Xm, {`CVTLEN-`NF{1'b0}}};
     assign LzcIn = LzcInFull[`CVTLEN-1:0];
     
@@ -125,9 +125,10 @@ module fcvt (
     //              - only shift fp -> fp if the intital value is denormalized
     //                  - this is a problem because the input to the lzc was the fraction rather than the mantissa
     //                  - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
-    assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} :
-                    ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : 
-                              (LeadingZeros);
+    always_comb
+        if(ToInt)                       ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}};
+        else if (ResDenormUf&~IntToFp)  ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0];
+        else                            ShiftAmt = LeadingZeros;
     
     ///////////////////////////////////////////////////////////////////////////
     // exp calculations
@@ -150,7 +151,9 @@ module fcvt (
         assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS); 
 
     end else if (`FPSIZES == 2) begin
-        assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
+        logic [`NE-2:0] NewBiasToFp;
+        assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
+        assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; 
 
     end else if (`FPSIZES == 3) begin
         logic [`NE-2:0] NewBiasToFp;
@@ -177,7 +180,7 @@ module fcvt (
     // select the old exponent
     //      int -> fp : largest bias + XLEN
     //      fp -> ??? : XExp
-    assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : Xe;
+    assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe;
     
     // calculate CalcExp
     //      fp -> fp : 
@@ -222,7 +225,11 @@ module fcvt (
     //          - if 64-bit : check the msb of the 64-bit integer input and if it's signed
     //          - if 32-bit : check the msb of the 32-bit integer input and if it's signed
     //      - otherwise: the floating point input's sign
-    assign Cs = IntToFp ? Int64 ? Int[`XLEN-1]&Signed : Int[31]&Signed : Xs;
+    always_comb
+        if(IntToFp)
+            if(Int64)   Cs = Int[`XLEN-1]&Signed;
+            else        Cs = Int[31]&Signed;
+        else            Cs = Xs;
 
 endmodule
 
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index 79953b212..d598efb7e 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -42,7 +42,6 @@ module fmashiftcalc(
     output logic [$clog2(3*`NF+7)-1:0]  FmaShiftAmt,   // normalization shift count
     output logic [3*`NF+8:0]            FmaShiftIn        // is the sum zero
 );
-    logic [$clog2(3*`NF+7)-1:0] DenormShift;        // right shift if the result is denormalized //***change this later
     logic [`NE+1:0]             PreNormSumExp;       // the exponent of the normalized sum with the `FLEN bias
     logic [`NE+1:0] BiasCorr;
 
@@ -149,9 +148,6 @@ module fmashiftcalc(
     // Determine if the result is denormal
     // assign FmaPreResultDenorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero;
 
-    // Determine the shift needed for denormal results
-    //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = FmaPreResultDenorm ? NormSumExp[$clog2(3*`NF+7)-1:0] : 1;
     // set and calculate the shift input and amount
     //  - shift once if killing a product and the result is denormalized
     assign FmaShiftIn = {3'b0, FmaSm};
diff --git a/pipelined/src/fpu/negateintres.sv b/pipelined/src/fpu/negateintres.sv
index dde515b94..7a696b379 100644
--- a/pipelined/src/fpu/negateintres.sv
+++ b/pipelined/src/fpu/negateintres.sv
@@ -42,7 +42,12 @@ module negateintres(
     // round and negate the positive res if needed
     assign CvtNegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
     
-    assign CvtNegResMsbs = Signed ? Int64 ? CvtNegRes[`XLEN:`XLEN-1] : CvtNegRes[32:31] :
-			              Int64 ? CvtNegRes[`XLEN+1:`XLEN] : CvtNegRes[33:32];
+    always_comb
+        if(Signed)
+            if(Int64)   CvtNegResMsbs = CvtNegRes[`XLEN:`XLEN-1];
+            else        CvtNegResMsbs = CvtNegRes[32:31];
+        else
+            if(Int64)   CvtNegResMsbs = CvtNegRes[`XLEN+1:`XLEN];
+            else        CvtNegResMsbs = CvtNegRes[33:32];
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv
index b8019b98b..c2912ece7 100644
--- a/pipelined/src/fpu/resultsign.sv
+++ b/pipelined/src/fpu/resultsign.sv
@@ -46,11 +46,21 @@ module resultsign(
     logic Zeros;
     logic Infs;
 
-    // Determine the sign if the sum is zero
-    //      if cancelation then 0 unless round to -infinity
-    //      if multiply then Psgn
-    //      otherwise psign
-    assign Zeros = (FmaPs^FmaAs)&~(FmaMe[`NE+1] | ((FmaMe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
+    // The IEEE754-2019 standard specifies: 
+    //      - the sign of an exact zero sum (with operands of diffrent signs) should be positive unless rounding toward negitive infinity
+    //      - when the exact result of an FMA opperation is non-zero, but is zero due to rounding, use the sign of the exact result
+    //      - if x = +0 or -0 then x+x=x and x-(-x)=x 
+    //      - the sign of a product is the exclisive or or the opperand's signs
+    // Zero sign will only be selected if:
+    //      - P=Z and a cancelation occurs - exact zero
+    //      - Z is zero and P is zero - exact zero
+    //      - P is killed and Z is zero - Psgn
+    //      - Z is killed and P is zero - impossible
+    // Zero sign calculation:
+    //      - if a multiply opperation is done, then use the products sign(Ps)
+    //      - if the zero sum is not exactly zero i.e. R|S use the sign of the exact result (which is the product's sign)
+    //      - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign
+    assign Zeros = (FmaPs^FmaAs)&~(R|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
 
 
     // is the result negitive
@@ -58,6 +68,9 @@ module resultsign(
     //  if -p + z is the Sum positive
     //  if -p - z then the Sum is negitive
     assign Infs = ZInf ? FmaAs : FmaPs;
-    assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms;
+    always_comb
+        if(InfIn&FmaOp) Ws = Infs;
+        else if(FmaSZero&FmaOp) Ws = Zeros;
+        else Ws = Ms;
 
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv
index 50cffb078..514edbee1 100644
--- a/pipelined/src/fpu/shiftcorrection.sv
+++ b/pipelined/src/fpu/shiftcorrection.sv
@@ -55,7 +55,10 @@ module shiftcorrection(
     //                        if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
     assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
     // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
-    assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
+    always_comb
+        if(FmaOp)                       Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}};
+        else if (DivOp&~DivResDenorm)   Mf = CorrQuotShifted;
+        else                            Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
     assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &NormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}};
diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv
index 3c28eae2e..6014962a1 100644
--- a/pipelined/src/fpu/specialcase.sv
+++ b/pipelined/src/fpu/specialcase.sv
@@ -95,9 +95,14 @@ module specialcase(
         end else begin 
             assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
         end
-        
-        assign OfRes =  OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} :
-                               OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
+
+        always_comb
+            if(OutFmt)
+                if(OfResMax)    OfRes = {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}};
+                else            OfRes = {Ws, {`NE{1'b1}}, {`NF{1'b0}}};
+            else
+                if(OfResMax)    OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}};
+                else            OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)};
         assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
         assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
 
@@ -234,20 +239,21 @@ module specialcase(
     assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1);
     assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
     // output infinity with result sign if divide by zero
-    if(`IEEE754) begin
-        assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes :
-                         YNaN&~CvtOp ? YNaNRes :
-                         ZNaN&FmaOp ? ZNaNRes :
-                         Invalid ? InvalidRes : 
-                         SelOfRes ? OfRes :
-                         KillRes ? UfRes :  
-                         NormRes;
-    end else begin
-        assign PostProcRes = NaNIn|Invalid ? InvalidRes :
-                         SelOfRes ? OfRes :
-                         KillRes ? UfRes :  
-                         NormRes;
-    end
+    if(`IEEE754)
+        always_comb
+            if(XNaN&~(IntToFp&CvtOp))   PostProcRes = XNaNRes;
+            else if(YNaN&~CvtOp)        PostProcRes = YNaNRes;
+            else if(ZNaN&FmaOp)         PostProcRes = ZNaNRes;
+            else if(Invalid)            PostProcRes = InvalidRes;
+            else if(SelOfRes)           PostProcRes = OfRes;
+            else if(KillRes)            PostProcRes = UfRes;
+            else                        PostProcRes = NormRes;
+    else
+        always_comb
+            if(NaNIn|Invalid)           PostProcRes = InvalidRes;
+            else if(SelOfRes)           PostProcRes = OfRes;
+            else if(KillRes)            PostProcRes = UfRes;
+            else                        PostProcRes = NormRes;
 
     ///////////////////////////////////////////////////////////////////////////////////////
     //
@@ -272,10 +278,17 @@ module specialcase(
     //        unsigned | 2^32-1 | 2^64-1 |
     //
     //      other: 32 bit unsinged res should be sign extended as if it were a signed number
-    assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
-                                              Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
-                               Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive
-                                              {`XLEN{1'b1}};// unsigned positive
+    always_comb
+        if(Signed)
+            if(Xs&~XNaN)    // signed negitive
+                if(Int64)   OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
+                else        OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
+            else            // signed positive
+                if(Int64)   OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
+                else        OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
+        else
+            if(Xs&~XNaN)    OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
+            else            OfIntRes = {`XLEN{1'b1}}; // unsigned positive
 
 
     // select the integer output
@@ -284,7 +297,11 @@ module specialcase(
     //          - if rounding and signed opperation and negitive input, output -1
     //          - otherwise output a rounded 0
     //      - otherwise output the normal res (trmined and sign extended if nessisary)
-    assign FCvtIntRes = IntInvalid ?  OfIntRes :
-			            CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
-                        Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
+    always_comb
+        if(IntInvalid)          FCvtIntRes = OfIntRes;
+        else if(CvtCe[`NE]) 
+            if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}};
+            else                FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1};
+        else if(Int64)          FCvtIntRes = CvtNegRes[`XLEN-1:0];
+        else                    FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
 endmodule
\ No newline at end of file

From 0f94177765c6a47ff7c468d2136f180803de623d Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 20 Jul 2022 01:36:25 +0000
Subject: [PATCH 14/36] small changes

---
 pipelined/src/fpu/divsqrt.sv |  2 +-
 pipelined/src/fpu/srt.sv     | 12 ++++++------
 pipelined/srt/srt.sv         | 29 +++++++++++++++++------------
 3 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index cbf7f95f0..ffc60026b 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -41,7 +41,7 @@ module divsqrt(
   input  logic XNaNE, YNaNE, 
   input  logic DivStartE, 
   input  logic StallM,
-  input logic StallE,
+  input  logic StallE,
   output logic DivStickyM,
   output logic DivBusy,
   output logic DivDone,
diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv
index 9e0315113..ee5ae9a39 100644
--- a/pipelined/src/fpu/srt.sv
+++ b/pipelined/src/fpu/srt.sv
@@ -34,18 +34,18 @@ module srt(
   input  logic clk,
   input  logic DivStart, 
   input  logic DivBusy, 
-  input logic  [`FMTBITS-1:0] FmtE,
+  input  logic [`FMTBITS-1:0] FmtE,
   input  logic [`NE-1:0] Xe, Ye,
   input  logic XZeroE, YZeroE, 
-  input logic [`DIVLEN-1:0] X,
-  input logic [`DIVLEN-1:0] Dpreproc,
-  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  input logic NegSticky,
+  input  logic [`DIVLEN-1:0] X,
+  input  logic [`DIVLEN-1:0] Dpreproc,
+  input  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  input  logic NegSticky,
   output logic [`QLEN-1-(`RADIX/4):0] Quot,
   output logic [`DIVLEN+3:0]  NextWSN, NextWCN,
   output logic [`DIVLEN+3:0]  StickyWSA,
   output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
-  output logic  [`NE+1:0] DivCalcExpM,
+  output logic [`NE+1:0] DivCalcExpM,
   output logic [`XLEN-1:0] Rem
 );
 
diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 949335bf0..13a59d848 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -55,7 +55,7 @@ module srt (
   logic                       qp, qz, qn; // quotient is +1, 0, or -1
   logic [`NE-1:0]             calcExp;
   logic                       calcSign;
-  logic [`DIVLEN+3:0]         X, Dpreproc, C, F, AddIn;
+  logic [`DIVLEN+3:0]         X, Dpreproc, C, F, S, SM, AddIn;
   logic [`DIVLEN+3:0]         WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
   logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
   logic                       intSign;
@@ -90,8 +90,9 @@ module srt (
   // If only implementing division, use divide otfc
   // otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot);
   // otherwise use sotfc
-  creg   sotfcC(clk, Start, C);
-  sotfc2 sotfc2(clk, Start, qp, qn, C, Quot, F);
+  creg   sotfcC(clk, Start, Sqrt, C);
+  sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, Quot, S, SM);
+  fsel2 fsel(qp, qn, C, S, SM, F);
 
   // Adder input selection
   assign AddIn = Sqrt ? F : Dsel;
@@ -214,11 +215,16 @@ module fsel2 (
   // Generate for both positive and negative bits
   assign FP = ~S & C;
   assign FN = SM | (C & (~C << 2));
-  assign FZ = {(`DIVLEN+4){1'b0}};
+  assign FZ = '0;
 
   // Choose which adder input will be used
 
-  assign F = sp ? FP : (sn ? FN : FZ);
+  always_comb
+    if (sp)       F = FP;
+    else if (sn)  F = FN;
+    else          F = FZ;
+
+  // assign F = sp ? FP : (sn ? FN : FZ);
 
 endmodule
 
@@ -266,17 +272,18 @@ module sotfc2(
   input  logic         clk,
   input  logic         Start,
   input  logic         sp, sn,
+  input  logic         Sqrt,
   input  logic [`DIVLEN+3:0] C,
   output logic [`DIVLEN-2:0] Sq,
-  output logic [`DIVLEN+3:0] F
+  output logic [`DIVLEN+3:0] S, SM
 );
   //  The on-the-fly converter transfers the square root 
   //  bits to the quotient as they come.
   //  Use this otfc for division and square root.
-  logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux;
+  logic [`DIVLEN+3:0] SNext, SMNext, SMux;
 
   flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM);
-  mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux);
+  mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux);
   flop #(`DIVLEN+4) Sreg(clk, SMux, S);
 
   always_comb begin
@@ -292,9 +299,6 @@ module sotfc2(
     end 
   end
   assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0];
-
-  fsel2 fsel(sp, sn, C, S, SM, F);
-
 endmodule
 
 //////////////////////////
@@ -302,11 +306,12 @@ endmodule
 //////////////////////////
 module creg(input  logic clk,
             input  logic Start,
+            input  logic Sqrt,
             output logic [`DIVLEN+3:0] C
 );
   logic [`DIVLEN+3:0] CMux;
 
-  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, {(`DIVLEN-2){1'b0}}}, Start, CMux);
+  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, Start, CMux);
   flop #(`DIVLEN+4) cflop(clk, CMux, C);
 endmodule
 

From 36bd17984bf50b9da89424a69ba32d788ba45c49 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Wed, 20 Jul 2022 01:49:33 +0000
Subject: [PATCH 15/36] Reordered embench Makefile to run size tests first

---
 benchmarks/embench/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile
index 630864ded..21c30cf6e 100644
--- a/benchmarks/embench/Makefile
+++ b/benchmarks/embench/Makefile
@@ -5,7 +5,7 @@
 embench_dir = ../../addins/embench-iot
 
 all: build 
-run: sim size
+run: size sim
 
 allClean: clean all
 

From c3a4a2abdf14068e85653f22a6e8a9221775ba44 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 20 Jul 2022 02:00:43 +0000
Subject: [PATCH 16/36] New radix-2 algorithm implemented and working

---
 pipelined/srt/srt.sv | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 13a59d848..ed55ddd8d 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -75,7 +75,7 @@ module srt (
 
   // Quotient Selection logic
   // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
-  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], Sqrt, qp, qz, qn);
+  qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], Sqrt, qp, qz, qn);
 
   flopen #(`NE) expflop(clk, Start, calcExp, rExp);
   flopen #(1) signflop(clk, Start, calcSign, rsign);
@@ -154,7 +154,7 @@ module srtpreproc (
 
   // Selecting correct divider inputs
   assign DivX = Int ? PreprocA : PreprocX;
-  assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac};
+  assign SqrtX = XExp[0] ? {5'b11101, SrcXFrac} : {4'b1111, SrcXFrac, 1'b0};
   assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX};
   assign D = {4'b0001, Int ? PreprocB : PreprocY};
 
@@ -169,13 +169,13 @@ endmodule
 /////////////////////////////////
 // Quotient Selection, Radix 2 //
 /////////////////////////////////
-module qsel2 ( // *** eventually just change to 4 bits
-  input  logic [`DIVLEN+3:`DIVLEN-1] ps, pc, 
+module qsel2 (
+  input  logic [`DIVLEN+3:`DIVLEN] ps, pc, 
   input  logic         Sqrt,
   output logic         qp, qz, qn
 );
  
-  logic [`DIVLEN+3:`DIVLEN-1]  p, g;
+  logic [`DIVLEN+3:`DIVLEN]  p, g;
   logic          magnitude, sign, cout;
 
   // The quotient selection logic is presented for simplicity, not
@@ -186,8 +186,8 @@ module qsel2 ( // *** eventually just change to 4 bits
   assign p = ps ^ pc;
   assign g = ps & pc;
 
-  assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]);
-  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (Sqrt & (p[`DIVLEN] & g[`DIVLEN-1])))));
+  assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]);
+  assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN])));
   assign #1 sign = p[`DIVLEN+3] ^ cout;
 /*  assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & 
 			  (ps[52]^pc[52]));
@@ -283,7 +283,7 @@ module sotfc2(
   logic [`DIVLEN+3:0] SNext, SMNext, SMux;
 
   flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM);
-  mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux);
+  mux2 #(`DIVLEN+4) Smux(SNext, {2'b00, Sqrt, {(`DIVLEN+1){1'b0}}}, Start, SMux);
   flop #(`DIVLEN+4) Sreg(clk, SMux, S);
 
   always_comb begin
@@ -298,7 +298,7 @@ module sotfc2(
       SMNext = SM | ((C << 1) & ~(C << 2));
     end 
   end
-  assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0];
+  assign Sq = S[`DIVLEN+1] ? S[`DIVLEN:2] : S[`DIVLEN-1:1];
 endmodule
 
 //////////////////////////
@@ -311,7 +311,7 @@ module creg(input  logic clk,
 );
   logic [`DIVLEN+3:0] CMux;
 
-  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, Start, CMux);
+  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b1111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux);
   flop #(`DIVLEN+4) cflop(clk, CMux, C);
 endmodule
 

From cce57fdcc569fd7631c37b27dcbf3a1d1ff928b5 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Wed, 20 Jul 2022 02:04:20 +0000
Subject: [PATCH 17/36] divsqrt working for floating point

---
 pipelined/srt/srt.sv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index ed55ddd8d..27fac324e 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -311,7 +311,7 @@ module creg(input  logic clk,
 );
   logic [`DIVLEN+3:0] CMux;
 
-  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b1111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux);
+  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b11111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux);
   flop #(`DIVLEN+4) cflop(clk, CMux, C);
 endmodule
 

From b26297e87490c5f6e986967a4d89d36d6ccb901a Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Wed, 20 Jul 2022 02:27:39 +0000
Subject: [PATCH 18/36] moved ctrl signal registers into fctrl, also a lot of
 code cleaning

---
 pipelined/src/fpu/divsqrt.sv         |  20 +-
 pipelined/src/fpu/fclassify.sv       |  38 +--
 pipelined/src/fpu/fcmp.sv            | 138 ++++++-----
 pipelined/src/fpu/fctrl.sv           |  51 +++-
 pipelined/src/fpu/fcvt.sv            |  12 +-
 pipelined/src/fpu/fhazard.sv         |  28 +--
 pipelined/src/fpu/fma.sv             |  18 +-
 pipelined/src/fpu/fmashiftcalc.sv    |   8 +-
 pipelined/src/fpu/fpu.sv             | 336 +++++++++++++--------------
 pipelined/src/fpu/fsgninj.sv         |  45 ++--
 pipelined/src/fpu/otfc.sv            |   2 +-
 pipelined/src/fpu/postprocess.sv     |  20 +-
 pipelined/src/fpu/qsel.sv            |   2 +-
 pipelined/src/fpu/shiftcorrection.sv |   6 +-
 pipelined/src/fpu/srt.sv             |  26 +--
 pipelined/src/fpu/srtfsm.sv          |   6 +-
 pipelined/src/fpu/unpack.sv          |  45 ++--
 pipelined/src/fpu/unpackinput.sv     |  38 +--
 18 files changed, 439 insertions(+), 400 deletions(-)

diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index ffc60026b..a2f0ba8e3 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -34,20 +34,20 @@ module divsqrt(
   input  logic clk, 
   input  logic reset, 
   input  logic [`FMTBITS-1:0] FmtE,
-  input  logic [`NF:0] XManE, YManE,
-  input  logic [`NE-1:0] XExpE, YExpE,
+  input  logic [`NF:0] XmE, YmE,
+  input  logic [`NE-1:0] XeE, YeE,
   input  logic XInfE, YInfE, 
   input  logic XZeroE, YZeroE, 
   input  logic XNaNE, YNaNE, 
   input  logic DivStartE, 
   input  logic StallM,
-  input  logic StallE,
-  output logic DivStickyM,
+  input logic StallE,
+  output logic DivSM,
   output logic DivBusy,
   output logic DivDone,
-  output logic [`NE+1:0] DivCalcExpM,
+  output logic [`NE+1:0] QeM,
   output logic [`DURLEN-1:0] EarlyTermShiftM,
-  output logic [`QLEN-1-(`RADIX/4):0] QuotM
+  output logic [`QLEN-1-(`RADIX/4):0] QmM
 //   output logic [`XLEN-1:0] RemM,
 );
 
@@ -60,10 +60,10 @@ module divsqrt(
   logic [`DURLEN-1:0] Dur;
   logic NegSticky;
 
-  srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
+  srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
 
-  srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE,
+  srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE,
                .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
-  srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE,
-                .StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM);
+  srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
+                .StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM);
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fclassify.sv b/pipelined/src/fpu/fclassify.sv
index 6c7ab451f..6aaec00a4 100644
--- a/pipelined/src/fpu/fclassify.sv
+++ b/pipelined/src/fpu/fclassify.sv
@@ -29,29 +29,29 @@
 `include "wally-config.vh"
 
 module fclassify (
-    input logic         XSgnE,  // sign bit
-    input logic         XNaNE,  // is NaN
-    input logic         XSNaNE, // is signaling NaN
-    input logic         XDenormE, // is denormal
-    input logic         XZeroE, // is zero
-    input logic         XInfE,  // is infinity
-    output logic [`XLEN-1:0] ClassResE // classify result
-    );
+    input logic         Xs,     // sign bit
+    input logic         XNaN,   // is NaN
+    input logic         XSNaN,  // is signaling NaN
+    input logic         XDenorm,// is denormal
+    input logic         XZero,  // is zero
+    input logic         XInf,   // is infinity
+    output logic [`XLEN-1:0] ClassRes// classify result
+);
 
     logic PInf, PZero, PNorm, PDenorm;
     logic NInf, NZero, NNorm, NDenorm;
-    logic XNormE;
+    logic XNorm;
    
     // determine the sub categories
-    assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE);
-    assign PInf = ~XSgnE&XInfE;
-    assign NInf = XSgnE&XInfE;
-    assign PNorm = ~XSgnE&XNormE;
-    assign NNorm = XSgnE&XNormE;
-    assign PDenorm = ~XSgnE&XDenormE;
-    assign NDenorm = XSgnE&XDenormE;
-    assign PZero = ~XSgnE&XZeroE;
-    assign NZero = XSgnE&XZeroE;
+    assign XNorm= ~(XNaN | XInf| XDenorm| XZero);
+    assign PInf = ~Xs&XInf;
+    assign NInf = Xs&XInf;
+    assign PNorm = ~Xs&XNorm;
+    assign NNorm = Xs&XNorm;
+    assign PDenorm = ~Xs&XDenorm;
+    assign NDenorm = Xs&XDenorm;
+    assign PZero = ~Xs&XZero;
+    assign NZero = Xs&XZero;
 
     // determine sub category and combine into the result
     //  bit 0 - -Inf
@@ -64,6 +64,6 @@ module fclassify (
     //  bit 7 - +Inf
     //  bit 8 - signaling NaN
     //  bit 9 - quiet NaN
-    assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm,  PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
+    assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf};
 
 endmodule
diff --git a/pipelined/src/fpu/fcmp.sv b/pipelined/src/fpu/fcmp.sv
index 9c6757848..48ff536f6 100755
--- a/pipelined/src/fpu/fcmp.sv
+++ b/pipelined/src/fpu/fcmp.sv
@@ -27,9 +27,10 @@
 //   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
 //   OR OTHER DEALINGS IN THE SOFTWARE.
 ////////////////////////////////////////////////////////////////////////////////////////////////
+
 `include "wally-config.vh"
 
-// FOpCtrlE values
+// OpCtrl values
 //    110   min
 //    101   max
 //    010   equal
@@ -37,36 +38,32 @@
 //    011   less than or equal
 
 module fcmp (   
-   input logic  [`FMTBITS-1:0]   FmtE,           // precision 1 = double 0 = single
-   input logic  [2:0]            FOpCtrlE,       // see above table
-   input logic                   XSgnE, YSgnE,   // input signs
-   input logic  [`NE-1:0]        XExpE, YExpE,   // input exponents
-   input logic  [`NF:0]          XManE, YManE,   // input mantissa
-   input logic                   XZeroE, YZeroE, // is zero
-   input logic                   XNaNE, YNaNE,   // is NaN
-   input logic                   XSNaNE, YSNaNE, // is signaling NaN
-   input logic  [`FLEN-1:0]      FSrcXE, FSrcYE, // original, non-converted to double, inputs
-   output logic                  CmpNVE,         // invalid flag
-   output logic [`FLEN-1:0]      CmpFpResE,         // compare resilt
-   output logic [`XLEN-1:0]      CmpIntResE         // compare resilt
+   input logic  [`FMTBITS-1:0]   Fmt,      // format of fp number
+   input logic  [2:0]            OpCtrl,   // see above table
+   input logic                   Xs, Ys,   // input signs
+   input logic  [`NE-1:0]        Xe, Ye,   // input exponents
+   input logic  [`NF:0]          Xm, Ym,   // input mantissa
+   input logic                   XZero, YZero, // is zero
+   input logic                   XNaN, YNaN,   // is NaN
+   input logic                   XSNaN, YSNaN, // is signaling NaN
+   input logic  [`FLEN-1:0]      X, Y,       // original inputs (before unpacker)
+   output logic                  CmpNV,      // invalid flag
+   output logic [`FLEN-1:0]      CmpFpRes,   // compare floating-point result
+   output logic [`XLEN-1:0]      CmpIntRes   // compare integer result
    );
 
-   logic LTabs, LT, EQ; // is X < or > or = Y
-   logic [`FLEN-1:0] NaNRes;
-   logic BothZero, EitherNaN, EitherSNaN;
+   logic LTabs, LT, EQ;         // is X < or > or = Y
+   logic [`FLEN-1:0] NaNRes;    // NaN result
+   logic BothZero;              // are both inputs zero
+   logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
    
-   assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
-   assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
-   // assign LT = {~XSgnE, XExpE, XManE[`NF-1:0]} < {~YSgnE, YExpE, YManE[`NF-1:0]}; // *** James look at whether we can simplify to this, but it fails regression
+   assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers
+   assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison
+   assign EQ = (X == Y);
 
-   //assign LT = $signed({XSgnE, XExpE, XManE[`NF-1:0]}) < $signed({YSgnE, YExpE, YManE[`NF-1:0]});
-   //assign LT = XInt < YInt;
-//   assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
-   assign EQ = (FSrcXE == FSrcYE);
-
-   assign BothZero = XZeroE&YZeroE;
-   assign EitherNaN = XNaNE|YNaNE;
-   assign EitherSNaN = XSNaNE|YSNaNE;
+   assign BothZero = XZero&YZero;
+   assign EitherNaN = XNaN|YNaN;
+   assign EitherSNaN = XSNaN|YSNaN;
 
 
    // flags
@@ -74,78 +71,91 @@ module fcmp (
    //    LT/LE - signaling - sets invalid if NaN input
    //    EQ - quiet - sets invalid if signaling NaN input
    always_comb begin
-      case (FOpCtrlE[2:0])
-         3'b110: CmpNVE = EitherSNaN;//min 
-         3'b101: CmpNVE = EitherSNaN;//max
-         3'b010: CmpNVE = EitherSNaN;//equal
-         3'b001: CmpNVE = EitherNaN;//less than
-         3'b011: CmpNVE = EitherNaN;//less than or equal
-         default: CmpNVE = 1'bx;
+      case (OpCtrl[2:0])
+         3'b110: CmpNV = EitherSNaN;//min 
+         3'b101: CmpNV = EitherSNaN;//max
+         3'b010: CmpNV = EitherSNaN;//equal
+         3'b001: CmpNV = EitherNaN;//less than
+         3'b011: CmpNV = EitherNaN;//less than or equal
+         default: CmpNV = 1'bx;
       endcase
    end 
 
-   // Min/Max
-   //    - outputs the min/max of X and Y
-   //    - -0 < 0
-   //    - if both are NaN return quiet X
-   //    - if one is a NaN output the non-NaN
-   // LT/LE/EQ
-   //    - -0 = 0
-   //    - inf = inf and -inf = -inf
-   //    - return 0 if comparison with NaN (unordered)
-
    // fmin/fmax of two NaNs returns a quiet NaN of the appropriate size
    // for IEEE, return the payload of X
    // for RISC-V, return the canonical NaN
 
-   
+   // select the NaN result
    if (`FPSIZES == 1)
-      if(`IEEE754) assign NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
+      if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
       else         assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
 
    else if (`FPSIZES == 2) 
-      if(`IEEE754) assign NaNRes = FmtE ? {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]};
-      else         assign NaNRes = FmtE ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+      if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
+      else         assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
    
    else if (`FPSIZES == 3)
       always_comb
-            case (FmtE)
+            case (Fmt)
                `FMT:  
-                  if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
+                  if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
                   else         NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                `FMT1:
-                  if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, XSgnE, {`NE1{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF1]};
+                  if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
                   else         NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
                `FMT2:
-                  if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, XSgnE, {`NE2{1'b1}}, 1'b1, XManE[`NF-2:`NF-`NF2]};
+                  if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
                   else         NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
                default:        NaNRes = {`FLEN{1'bx}};
             endcase
 
    else if (`FPSIZES == 4)
       always_comb
-            case (FmtE)
+            case (Fmt)
                2'h3:  
-                  if(`IEEE754) NaNRes = {XSgnE, {`NE{1'b1}}, 1'b1, XManE[`NF-2:0]};
+                  if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
                   else         NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
                2'h1:  
-                  if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, XSgnE, {`D_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`D_NF]};
+                  if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
                   else         NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
                2'h0: 
-                  if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, XSgnE, {`S_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`S_NF]};
+                  if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
                   else         NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
                2'h2:
-                  if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, XSgnE, {`H_NE{1'b1}}, 1'b1, XManE[`NF-2:`NF-`H_NF]};
+                  if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
                   else         NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
             endcase
 
- // when one input is a NaN -output the non-NaN
-   assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
-                                          : YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE : 
-                                    XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
-                                          : YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
-                                    
 
-   assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
+   // Min/Max
+   //    - outputs the min/max of X and Y
+   //    - -0 < 0
+   //    - if both are NaN return quiet X
+   //    - if one is a NaN output the non-NaN
+   always_comb
+      if(OpCtrl[0]) // MAX
+         if(XNaN)
+            if(YNaN)    CmpFpRes = NaNRes;   // X = NaN Y = NaN
+            else        CmpFpRes = Y;        // X = NaN Y != NaN
+         else
+            if(YNaN)    CmpFpRes = X; // X != NaN Y = NaN
+            else // X,Y != NaN
+               if(LT)   CmpFpRes = Y; // X < Y
+               else     CmpFpRes = X; // X > Y
+      else  // MIN
+         if(XNaN)
+            if(YNaN)    CmpFpRes = NaNRes;   // X = NaN Y = NaN
+            else        CmpFpRes = Y;        // X = NaN Y != NaN
+         else
+            if(YNaN)    CmpFpRes = X; // X != NaN Y = NaN
+            else // X,Y != NaN
+               if(LT)   CmpFpRes = X; // X < Y
+               else     CmpFpRes = Y; // X > Y
+                                    
+   // LT/LE/EQ
+   //    - -0 = 0
+   //    - inf = inf and -inf = -inf
+   //    - return 0 if comparison with NaN (unordered)
+   assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
    
 endmodule
diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv
index 5c553e864..85047248d 100755
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@@ -29,25 +29,41 @@
 `include "wally-config.vh"
 
 module fctrl (
+  input  logic       clk,
+  input  logic       reset,
+  input  logic       StallE, StallM, StallW, // stall signals
+  input  logic       FlushE, FlushM, FlushW, // flush signals
+  input  logic [31:0] InstrD,
   input  logic [6:0] Funct7D,   // bits 31:25 of instruction - may contain percision
   input  logic [6:0] OpD,       // bits 6:0 of instruction
   input  logic [4:0] Rs2D,      // bits 24:20 of instruction
   input  logic [2:0] Funct3D,   // bits 14:12 of instruction - may contain rounding mode
   input  logic [2:0] FRM_REGW,  // rounding mode from CSR
   input  logic [1:0] STATUS_FS, // is FPU enabled?
+  input  logic       FDivBusyE,  // is the divider busy
   output logic       IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
-  output logic       FRegWriteD,  // FP register write enable
-  output logic       FDivStartD,  // Start division or squareroot
-  output logic [1:0] FResSelD, // select result to be written to fp register
-  output logic [2:0] FOpCtrlD,    // chooses which opperation to do - specifics shown at bottom of module and in each unit
-  output logic [1:0] PostProcSelD, 
-  output logic [`FMTBITS-1:0] FmtD,        // precision - single-0 double-1
-  output logic [2:0] FrmD,        // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-  output logic       FWriteIntD   // is the result written to the integer register
+  output logic 		         FRegWriteM, FRegWriteW, // FP register write enable
+  output logic [2:0] 	      FrmM,                   // FP rounding mode
+  output logic [`FMTBITS-1:0] FmtE, FmtM,             // FP format
+  output logic 		         DivStartE,             // Start division or squareroot
+  output logic 		         FWriteIntE, FWriteIntM,                         // Write to integer register
+  output logic [2:0] 	      OpCtrlE, OpCtrlM,       // Select which opperation to do in each component
+  output logic [1:0] 	      FResSelE, FResSelM, FResSelW,       // Select one of the results that finish in the memory stage
+  output logic [1:0] 	      PostProcSelE, PostProcSelM, // select result in the post processing unit
+  output logic [4:0] 	      Adr1E, Adr2E, Adr3E                // adresses of each input
   );
 
   `define FCTRLW 11
   logic [`FCTRLW-1:0] ControlsD;
+  logic 		  FRegWriteD; // FP register write enable
+  logic 		  DivStartD; // integer register write enable
+  logic 		  FWriteIntD; // integer register write enable
+  logic 		         FRegWriteE; // FP register write enable
+  logic [2:0] 	      OpCtrlD;       // Select which opperation to do in each component
+  logic [1:0] 	      PostProcSelD; // select result in the post processing unit
+  logic [1:0] 	      FResSelD;       // Select one of the results that finish in the memory stage
+  logic [2:0] FrmD, FrmE;                   // FP rounding mode
+  logic [`FMTBITS-1:0] FmtD;             // FP format
   //*** will putting x for don't cares reduce area in synthisis???
   // FPU Instruction Decoder
   always_comb
@@ -130,7 +146,7 @@ module fctrl (
     endcase
 
   // unswizzle control bits
-  assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
+  assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, DivStartD, IllegalFPUInstrD} = ControlsD;
   
   // rounding modes:
   //    000 - round to nearest, ties to even
@@ -168,7 +184,7 @@ module fctrl (
 //  10  fma
 
 //  Other Sel:
-//    Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
+//    Ctrl signal = {OpCtrl[2], &FOpctrl[1:0]}
 //        000 - sign            00
 //        001 - negate sign     00
 //        010 - xor sign        00
@@ -205,5 +221,20 @@ module fctrl (
 //        01 - negate sign
 //        10 - xor sign
     
+  // D/E pipleine register
+  flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, 
+              {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD},
+              {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE});
+   flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
+                           {Adr1E, Adr2E, Adr3E});
+  flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE);
+  // E/M pipleine register
+  flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
+              {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE},
+              {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM});
+  // M/W pipleine register
+  flopenrc #(3)  MWCtrlReg(clk, reset, FlushW, ~StallW,
+          {FRegWriteM, FResSelM},
+          {FRegWriteW, FResSelW});
 
 endmodule
diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv
index 9d7f2d62d..d2967887f 100644
--- a/pipelined/src/fpu/fcvt.sv
+++ b/pipelined/src/fpu/fcvt.sv
@@ -35,7 +35,7 @@ module fcvt (
     input logic [`NE-1:0]   Xe,          // input's exponent
     input logic [`NF:0]     Xm,          // input's fraction
     input logic [`XLEN-1:0] Int, // integer input - from IEU
-    input logic [2:0]       FOpCtrl,       // choose which opperation (look below for values)
+    input logic [2:0]       OpCtrl,       // choose which opperation (look below for values)
     input logic             ToInt,     // is fp->int (since it's writting to the integer register)
     input logic             XZero,         // is the input zero
     input logic             XDenorm,   // is the input denormalized
@@ -73,17 +73,17 @@ module fcvt (
 
 
     // seperate OpCtrl for code readability
-    assign Signed = FOpCtrl[0];
-    assign Int64 =  FOpCtrl[1];
-    assign IntToFp =   FOpCtrl[2];
+    assign Signed = OpCtrl[0];
+    assign Int64 =  OpCtrl[1];
+    assign IntToFp =   OpCtrl[2];
 
     // choose the ouptut format depending on the opperation
     //      - fp -> fp: OpCtrl contains the percision of the output
     //      - int -> fp: Fmt contains the percision of the output
     if (`FPSIZES == 2) 
-        assign OutFmt = IntToFp ? Fmt : (FOpCtrl[1:0] == `FMT); 
+        assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT); 
     else if (`FPSIZES == 3 | `FPSIZES == 4) 
-        assign OutFmt = IntToFp ? Fmt : FOpCtrl[1:0]; 
+        assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0]; 
 
 
     ///////////////////////////////////////////////////////////////////////////
diff --git a/pipelined/src/fpu/fhazard.sv b/pipelined/src/fpu/fhazard.sv
index ca31d904f..36a0ff82f 100644
--- a/pipelined/src/fpu/fhazard.sv
+++ b/pipelined/src/fpu/fhazard.sv
@@ -31,20 +31,20 @@
 `include "wally-config.vh"
 
 module fhazard(
-    input logic [4:0]   Adr1E, Adr2E, Adr3E,    // read data adresses
-    input logic         FRegWriteM, FRegWriteW, // is the fp register being written to
-	  input logic [4:0]   RdM, RdW,               // the adress being written to
-    input logic [1:0]   FResSelM,            // the result being selected
+    input  logic [4:0]  Adr1E, Adr2E, Adr3E,    // read data adresses
+    input  logic        FRegWriteM, FRegWriteW, // is the fp register being written to
+	  input  logic [4:0]  RdM, RdW,               // the adress being written to
+    input  logic [1:0]  FResSelM,            // the result being selected
     output logic        FStallD,                // stall the decode stage
-    output logic [1:0]  FForwardXE, FForwardYE, FForwardZE // select a forwarded value
+    output logic [1:0]  ForwardXE, ForwardYE, ForwardZE // select a forwarded value
 );
 
 
   always_comb begin
     // set defaults
-    FForwardXE = 2'b00; // choose FRD1E
-    FForwardYE = 2'b00; // choose FRD2E
-    FForwardZE = 2'b00; // choose FRD3E
+    ForwardXE = 2'b00; // choose FRD1E
+    ForwardYE = 2'b00; // choose FRD2E
+    ForwardZE = 2'b00; // choose FRD3E
     FStallD = 0;
 
     //*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
@@ -52,28 +52,28 @@ module fhazard(
     // if the needed value is in the memory stage - input 1
     if ((Adr1E == RdM) & FRegWriteM) 
       // if the result will be FResM (can be taken from the memory stage)
-      if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM
       else FStallD = 1;                             // otherwise stall
     // if the needed value is in the writeback stage
-    else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
+    else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W
   
 
     // if the needed value is in the memory stage - input 2
     if ((Adr2E == RdM) & FRegWriteM)
       // if the result will be FResM (can be taken from the memory stage)
-      if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM
       else FStallD = 1;                             // otherwise stall
     // if the needed value is in the writeback stage
-    else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
+    else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W
 
 
     // if the needed value is in the memory stage - input 3
     if ((Adr3E == RdM) & FRegWriteM)
       // if the result will be FResM (can be taken from the memory stage)
-      if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
+      if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM
       else FStallD = 1;                             // otherwise stall
     // if the needed value is in the writeback stage
-    else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W
+    else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W
 
   end 
 
diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv
index 3f4cc2ac5..067147ee6 100644
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@@ -34,7 +34,7 @@ module fma(
     input logic  [`NE-1:0]      Xe, Ye, Ze,    // input's biased exponents in B(NE.0) format
     input logic  [`NF:0]        Xm, Ym, Zm,    // input's significands in U(0.NF) format
     input logic                 XZero, YZero, ZZero, // is the input zero
-    input logic  [2:0]          FOpCtrl,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
+    input logic  [2:0]          OpCtrl,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
     input logic  [`FMTBITS-1:0] Fmt,       // format of the result single double half or quad
     output logic [`NE+1:0]      Pe,       // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
     output logic                ZmSticky,  // sticky bit that is calculated during alignment
@@ -46,7 +46,7 @@ module fma(
     output logic                Ps,          // the product's sign
     output logic                Ss,          // the sum's sign
     output logic [`NE+1:0]      Se,
-    output logic [$clog2(3*`NF+7)-1:0]          NCnt        // normalization shift count
+    output logic [$clog2(3*`NF+7)-1:0]          SCnt        // normalization shift count
     );
 
     logic [2*`NF+1:0]   Pm;           // the product's significand in U(2.2Nf) format
@@ -72,7 +72,7 @@ module fma(
     // Alignment shifter
     ///////////////////////////////////////////////////////////////////////////////
     // calculate the signs and take the opperation into account
-    sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
+    sign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As);
 
     align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye,
                 .Am, .ZmSticky, .KillProd);
@@ -85,7 +85,7 @@ module fma(
         
     add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss);
     
-    loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt);
+    loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt);
 endmodule
 
 
@@ -120,7 +120,7 @@ endmodule
 
 
 module sign(    
-    input  logic [2:0]  FOpCtrl,               // opperation contol
+    input  logic [2:0]  OpCtrl,               // opperation contol
     input  logic        Xs, Ys, Zs,    // sign of the inputs
     output logic        Ps,     // the product's sign - takes opperation into account
     output logic        As   // aligned addend sign used in fma - takes opperation into account
@@ -130,9 +130,9 @@ module sign(
     //      Negate product's sign if FNMADD or FNMSUB
     
     // flip is negation opperation
-    assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]);
+    assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]);
     // flip if subtraction
-    assign As = Zs^FOpCtrl[0];
+    assign As = Zs^OpCtrl[0];
 
 endmodule
 
@@ -275,7 +275,7 @@ endmodule
 module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
     input logic  [3*`NF+6:0] A,     // addend
     input logic  [2*`NF+3:0] P,     // product
-    output logic [$clog2(3*`NF+7)-1:0]       NCnt   // normalization shift count for the positive result
+    output logic [$clog2(3*`NF+7)-1:0]       SCnt   // normalization shift count for the positive result
     ); 
     
     logic [3*`NF+6:0] T;
@@ -300,6 +300,6 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE
 
 
 
-    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt));
+    lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt));
   
 endmodule
diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv
index d598efb7e..7464149f6 100644
--- a/pipelined/src/fpu/fmashiftcalc.sv
+++ b/pipelined/src/fpu/fmashiftcalc.sv
@@ -32,7 +32,7 @@ module fmashiftcalc(
     input logic  [3*`NF+5:0]            FmaSm,       // the positive sum
     input logic  [`NE-1:0]              Ze,      // exponent of Z
     input logic  [`NE+1:0]              FmaPe,   // X exponent + Y exponent - bias
-    input logic  [$clog2(3*`NF+7)-1:0]  FmaNCnt,   // normalization shift count
+    input logic  [$clog2(3*`NF+7)-1:0]  FmaSCnt,   // normalization shift count
     input logic  [`FMTBITS-1:0]         Fmt,       // precision 1 = double 0 = single
     input logic                         FmaKillProd,  // is the product set to zero
     input logic [`NE+1:0] FmaSe,
@@ -52,7 +52,7 @@ module fmashiftcalc(
     // Determine if the sum is zero
     assign FmaSZero = ~(|FmaSm);
     // calculate the sum's exponent
-    assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4);
+    assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+4);
 
     //convert the sum's exponent into the proper percision
     if (`FPSIZES == 1) begin
@@ -152,7 +152,7 @@ module fmashiftcalc(
     //  - shift once if killing a product and the result is denormalized
     assign FmaShiftIn = {3'b0, FmaSm};
     if (`FPSIZES == 1)
-        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaNCnt+1;
+        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1;
     else
-        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaNCnt+1;
+        assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaSCnt+1;
 endmodule
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index a9c0ac247..6d9b9cf47 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -30,28 +30,28 @@
 `include "wally-config.vh"
 
 module fpu (
-  input logic 		   clk,
-  input logic 		   reset,
-  input logic [2:0] 	   FRM_REGW, // Rounding mode from CSR
-  input logic [31:0] 	   InstrD, // instruction from IFU
-  input logic [`FLEN-1:0]  ReadDataW,// Read data from memory
-  input logic [`XLEN-1:0]  ForwardedSrcAE, // Integer input being processed (from IEU)
-  input logic 		   StallE, StallM, StallW, // stall signals from HZU
-  input logic 		   FlushE, FlushM, FlushW, // flush signals from HZU
-  input logic [4:0] 	   RdM, RdW, // which FP register to write to (from IEU)
-  input logic [1:0]        STATUS_FS, // Is floating-point enabled?
-  output logic 		   FRegWriteM, // FP register write enable
-  output logic 		   FpLoadStoreM, // Fp load instruction?
-  output logic              FStore2,
-  output logic 		   FStallD, // Stall the decode stage
-  output logic 		   FWriteIntE, // integer register write enables
-  output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
-  output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory
-  output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
-  output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
-  output logic [1:0]       FResSelW,
-  output logic 		   FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
-  output logic 		   IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
+  input logic 		         clk,
+  input logic 		         reset,
+  input logic  [2:0] 	   FRM_REGW,   // Rounding mode (from CSR)
+  input logic  [31:0] 	   InstrD,     // instruction (from IFU)
+  input logic  [`FLEN-1:0] ReadDataW,  // Read data (from LSU)
+  input logic  [`XLEN-1:0] ForwardedSrcAE, // Integer input (from IEU)
+  input logic 		         StallE, StallM, StallW, // stall signals (from HZU)
+  input logic 		         FlushE, FlushM, FlushW, // flush signals (from HZU)
+  input logic  [4:0] 	   RdM, RdW,   // which FP register to write to (from IEU)
+  input logic  [1:0]       STATUS_FS,  // Is floating-point enabled? (From privileged unit)
+  output logic 		      FRegWriteM, // FP register write enable (to privileged unit)
+  output logic 		      FpLoadStoreM,  // Fp load instruction? (to LSU)
+  output logic             FStore2,       // store two words into memory (to LSU)
+  output logic 		      FStallD,       // Stall the decode stage (To HZU)
+  output logic 		      FWriteIntE,    // integer register write enable (to IEU)
+  output logic [`XLEN-1:0] FWriteDataE,   // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN
+  output logic [`FLEN-1:0] FWriteDataM,   // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN
+  output logic [`XLEN-1:0] FIntResM,      // data to be written to integer register (to IEU)
+  output logic [`XLEN-1:0] FCvtIntResW,   // convert result to to be written to integer register (to IEU)
+  output logic [1:0]       FResSelW,      // final result selection (to IEU)
+  output logic 		      FDivBusyE,     // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
+  output logic 		      IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit)
   output logic [4:0] 	   SetFflagsM        // FPU flags (to privileged unit)
   );
 
@@ -62,99 +62,88 @@ module fpu (
    //    - sets the underflow after rounding
   
    // control signals
-   logic 		  FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable
-   logic [2:0] 	  FrmD, FrmE, FrmM;                   // FP rounding mode
-   logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW;             // FP precision 0-single 1-double
-   logic 		  FDivStartD, FDivStartE;             // Start division or squareroot
-   logic 		  FWriteIntD;                         // Write to integer register
-   logic 		  FWriteIntM;                         // Write to integer register
-   logic [1:0] 	  FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
-   logic [2:0] 	  FOpCtrlD, FOpCtrlE, FOpCtrlM;       // Select which opperation to do in each component
-   logic [1:0] 	  FResSelD, FResSelE, FResSelM;       // Select one of the results that finish in the memory stage
-   logic [1:0] 	  PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
-   logic [4:0] 	  Adr1E, Adr2E, Adr3E;                // adresses of each input
+   logic 		         FRegWriteW; // FP register write enable
+   logic [2:0] 	      FrmM;                   // FP rounding mode
+   logic [`FMTBITS-1:0] FmtE, FmtM;             // FP precision 0-single 1-double
+   logic 		         DivStartE;             // Start division or squareroot
+   logic 		         FWriteIntM;                         // Write to integer register
+   logic [1:0] 	      ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
+   logic [2:0] 	      OpCtrlE, OpCtrlM;       // Select which opperation to do in each component
+   logic [1:0] 	      FResSelE, FResSelM;       // Select one of the results that finish in the memory stage
+   logic [1:0] 	      PostProcSelE, PostProcSelM; // select result in the post processing unit
+   logic [4:0] 	      Adr1E, Adr2E, Adr3E;                // adresses of each input
 
    // regfile signals
-   logic [`FLEN-1:0] 	  FRD1D, FRD2D, FRD3D;                // Read Data from FP register - decode stage
-   logic [`FLEN-1:0] 	  FRD1E, FRD2E, FRD3E;                // Read Data from FP register - execute stage
-   logic [`FLEN-1:0] 	  FSrcXE;                             // Input 1 to the various units (after forwarding)
-   logic [`XLEN-1:0] 	  IntSrcXE;                             // Input 1 to the various units (after forwarding)
-   logic [`FLEN-1:0] 	  FPreSrcYE, FSrcYE;                  // Input 2 to the various units (after forwarding)
-   logic [`FLEN-1:0] 	  FPreSrcZE, FSrcZE;                  // Input 3 to the various units (after forwarding)
+   logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D;                // Read Data from FP register - decode stage
+   logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E;                // Read Data from FP register - execute stage
+   logic [`FLEN-1:0] XE;                             // Input 1 to the various units (after forwarding)
+   logic [`XLEN-1:0] IntSrcXE;                             // Input 1 to the various units (after forwarding)
+   logic [`FLEN-1:0] PreYE, YE;                  // Input 2 to the various units (after forwarding)
+   logic [`FLEN-1:0] PreZE, ZE;                  // Input 3 to the various units (after forwarding)
 
    // unpacking signals
-   logic 		  XSgnE, YSgnE, ZSgnE;                // input's sign - execute stage
-   logic 		  XSgnM, YSgnM;                       // input's sign - memory stage
-   logic [`NE-1:0] 	  XExpE, YExpE, ZExpE;                // input's exponent - execute stage
-   logic [`NE-1:0] 	  ZExpM;                              // input's exponent - memory stage
-   logic [`NF:0] 	  XManE, YManE, ZManE;                // input's fraction - execute stage
-   logic [`NF:0] 	  XManM, YManM, ZManM;                // input's fraction - memory stage
-   logic 		  XNaNE, YNaNE, ZNaNE;                // is the input a NaN - execute stage
-   logic 		  XNaNM, YNaNM, ZNaNM;                // is the input a NaN - memory stage
-   logic 		  XNaNQ, YNaNQ;                       // is the input a NaN - divide
-   logic 		  XSNaNE, YSNaNE, ZSNaNE;             // is the input a signaling NaN - execute stage
-   logic 		  XSNaNM, YSNaNM, ZSNaNM;             // is the input a signaling NaN - memory stage
-   logic 		  XDenormE, ZDenormE, ZDenormM;       // is the input denormalized
-   logic 		  XZeroE, YZeroE, ZZeroE;             // is the input zero - execute stage
-   logic 		  XZeroM, YZeroM, ZZeroM;             // is the input zero - memory stage
-   logic 		  XZeroQ, YZeroQ;                     // is the input zero - divide
-   logic 		  XInfE, YInfE, ZInfE;                // is the input infinity - execute stage
-   logic 		  XInfM, YInfM, ZInfM;                // is the input infinity - memory stage
-   logic 		  XInfQ, YInfQ;                       // is the input infinity - divide
-   logic 		  XExpMaxE;                           // is the exponent all ones (max value)
-   logic 		  FmtQ;
-   logic 		  FOpCtrlQ;   
+   logic 		      XsE, YsE, ZsE;                // input's sign - execute stage
+   logic 		      XsM, YsM;                       // input's sign - memory stage
+   logic [`NE-1:0] 	XeE, YeE, ZeE;                // input's exponent - execute stage
+   logic [`NE-1:0] 	ZeM;                              // input's exponent - memory stage
+   logic [`NF:0] 	   XmE, YmE, ZmE;                // input's fraction - execute stage
+   logic [`NF:0] 	   XmM, YmM, ZmM;                // input's fraction - memory stage
+   logic 		      XNaNE, YNaNE, ZNaNE;                // is the input a NaN - execute stage
+   logic 		      XNaNM, YNaNM, ZNaNM;                // is the input a NaN - memory stage
+   logic 		      XNaNQ, YNaNQ;                       // is the input a NaN - divide
+   logic 		      XSNaNE, YSNaNE, ZSNaNE;             // is the input a signaling NaN - execute stage
+   logic 		      XSNaNM, YSNaNM, ZSNaNM;             // is the input a signaling NaN - memory stage
+   logic 		      XDenormE, ZDenormE, ZDenormM;       // is the input denormalized
+   logic 		      XZeroE, YZeroE, ZZeroE;             // is the input zero - execute stage
+   logic 		      XZeroM, YZeroM, ZZeroM;             // is the input zero - memory stage
+   logic 		      XInfE, YInfE, ZInfE;                // is the input infinity - execute stage
+   logic 		      XInfM, YInfM, ZInfM;                // is the input infinity - memory stage
+   logic 		      XExpMaxE;                           // is the exponent all ones (max value)
 
    // Fma Signals
-   logic [3*`NF+5:0]	SumE, SumM;                       
-   logic [`NE+1:0]	    ProdExpE, ProdExpM;
-   logic 			    AddendStickyE, AddendStickyM;
-   logic [`NE+1:0]      SeE,SeM;
-   logic 			    KillProdE, KillProdM;
-   logic 			    InvAE, InvAM;
-   logic 			    NegSumE, NegSumM;
-   logic 			    ZSgnEffE, ZSgnEffM;
-   logic 			    PSgnE, PSgnM;
-   logic 			    SsE, SsM;
-   logic [$clog2(3*`NF+7)-1:0]			FmaNormCntE, FmaNormCntM;
+   logic [3*`NF+5:0] SmE, SmM;                       
+   logic [`NE+1:0]	PeE, PeM;
+   logic 			   ZmStickyE, ZmStickyM;
+   logic [`NE+1:0]   SeE,SeM;
+   logic 			   KillProdE, KillProdM;
+   logic 			   InvAE, InvAM;
+   logic 			   NegSumE, NegSumM;
+   logic 			   AsE, AsM;
+   logic 			   PsE, PsM;
+   logic 			   SsE, SsM;
+   logic [$clog2(3*`NF+7)-1:0] SCntE, SCntM;
 
    // Cvt Signals
-   logic [`NE:0]           CvtCalcExpE, CvtCalcExpM;    // the calculated expoent
-   logic [`LOGCVTLEN-1:0]   CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
+   logic [`NE:0]           CeE, CeM;    // the calculated expoent
+   logic [`LOGCVTLEN-1:0]  CvtShiftAmtE, CvtShiftAmtM;  // how much to shift by
    logic                   CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
-   logic                   CvtResSgnE, CvtResSgnM;     // the result's sign
+   logic                   CsE, CsM;     // the result's sign
    logic                   IntZeroE, IntZeroM;      // is the integer zero?
-   logic [`CVTLEN-1:0]      CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
+   logic [`CVTLEN-1:0]     CvtLzcInE, CvtLzcInM;      // input to the Leading Zero Counter (priority encoder)
    
    //divide signals
-   logic [`QLEN-1-(`RADIX/4):0] QuotM;
-   logic [`NE+1:0] DivCalcExpE, DivCalcExpM; 
-   logic DivStickyE, DivStickyM;
-   logic DivDoneM;
-   logic [`DURLEN-1:0] EarlyTermShiftM;
+   logic [`QLEN-1-(`RADIX/4):0] QmM;
+   logic [`NE+1:0]      QeE, QeM; 
+   logic                DivSE, DivSM;
+   logic                DivDoneM;
+   logic [`DURLEN-1:0]  EarlyTermShiftM;
 
    // result and flag signals
-   logic [63:0] 	  FDivResM, FDivResW;                 // divide/squareroot result
-   logic [4:0] 	  FDivFlgM;                 // divide/squareroot flags  
-   logic [`FLEN-1:0] 	  ReadResW;                           // read result (load instruction)
-   logic [`XLEN-1:0] 	  ClassResE;               // classify result
-   logic [`XLEN-1:0] 	  FIntResE;               // classify result
-   logic [`FLEN-1:0] 	  FpResM, FpResW;               // classify result
-   logic [`FLEN-1:0] 	  PostProcResM;               // classify result
-   logic [4:0] 	  PostProcFlgM;               // classify result
+   logic [`XLEN-1:0] ClassResE;               // classify result
+   logic [`XLEN-1:0] FIntResE;               // classify result
+   logic [`FLEN-1:0] FpResM, FpResW;               // classify result
+   logic [`FLEN-1:0] PostProcResM;               // classify result
+   logic [4:0] 	   PostProcFlgM;               // classify result
    logic [`XLEN-1:0] FCvtIntResM; 
-   logic [`FLEN-1:0] 	  CmpFpResE;                   // compare result
-   logic [`XLEN-1:0] 	  CmpIntResE;                   // compare result
-   logic 		           CmpNVE;                     // compare invalid flag (Not Valid)     
-   logic [`FLEN-1:0] 	  SgnResE;                   // sign injection result
-   logic [`FLEN-1:0] 	  PreFpResE, PreFpResM, PreFpResW;                // selected result that is ready in the memory stage
-   logic  	        PreNVE, PreNVM;                       // selected flag that is ready in the memory stage     
-   logic [`FLEN-1:0] 	  FPUResultW;                         // final FP result being written to the FP register     
+   logic [`FLEN-1:0] CmpFpResE;                   // compare result
+   logic [`XLEN-1:0] CmpIntResE;                   // compare result
+   logic 		      CmpNVE;                     // compare invalid flag (Not Valid)     
+   logic [`FLEN-1:0] SgnResE;                   // sign injection result
+   logic [`FLEN-1:0] PreFpResE, PreFpResM;                // selected result that is ready in the memory stage
+   logic  	         PreNVE, PreNVM;                       // selected flag that is ready in the memory stage     
+   logic [`FLEN-1:0] FPUResultW;                         // final FP result being written to the FP register     
    // other signals
-   logic 		  FDivSqrtDoneE;                      // is divide done
-   logic [63:0] 	  DivInput1E, DivInput2E;             // inputs to divide/squareroot unit
-   logic 		  load_preload;                       // enable for FF on fpdivsqrt     
-   logic [`FLEN-1:0] 	  AlignedSrcAE;                       // align SrcA to the floating point format
+   logic [`FLEN-1:0] 	 AlignedSrcAE;                       // align SrcA to the floating point format
    logic [`FLEN-1:0]     BoxedZeroE;                         // Zero value for Z for multiplication, with NaN boxing if needed
    logic [`FLEN-1:0]     BoxedOneE;                         // Zero value for Z for multiplication, with NaN boxing if needed
    
@@ -171,9 +160,11 @@ module fpu (
    //////////////////////////////////////////////////////////////////////////////////////////
 
    // calculate FP control signals
-   fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
-      .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD, 
-      .FmtD, .FrmD, .FWriteIntD);
+   fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD,
+               .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
+               .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
+               .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM,
+               .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E);
 
    // FP register file
    fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@@ -185,12 +176,6 @@ module fpu (
    flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
    flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
    flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
-   flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
-                           {Adr1E, Adr2E, Adr3E});
-   flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, 
-               {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD},
-               {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE});
-   flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE);
 
    // EXECUTION STAGE
    
@@ -207,12 +192,12 @@ module fpu (
    // Hazard unit for FPU  
    //    - determines if any forwarding or stalls are needed
    fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, 
-                  .FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
+                  .FStallD, .ForwardXE, .ForwardYE, .ForwardZE);
 
    // forwarding muxs
-   mux3  #(`FLEN)  fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE);
-   mux3  #(`FLEN)  fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE);
-   mux3  #(`FLEN)  fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE);
+   mux3  #(`FLEN)  fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE);
+   mux3  #(`FLEN)  fyemux (FRD2E, FPUResultW, PreFpResM, ForwardYE, PreYE);
+   mux3  #(`FLEN)  fzemux (FRD3E, FPUResultW, PreFpResM, ForwardZE, PreZE);
 
 
    generate
@@ -227,7 +212,7 @@ module fpu (
    endgenerate
 
 
-   mux2  #(`FLEN)  fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
+   mux2  #(`FLEN)  fyaddmux (PreYE, BoxedOneE, OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), YE); // Force Z to be 0 for multiply instructions
    
    // Force Z to be 0 for multiply instructions 
    generate
@@ -241,55 +226,76 @@ module fpu (
                                  (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
    endgenerate
 
-   mux3  #(`FLEN)  fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE);
+   mux3  #(`FLEN)  fzmulmux (PreZE, BoxedZeroE, PreYE, {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}, ZE);
 
    // unpack unit
    //    - splits FP inputs into their various parts
    //    - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
-   unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE,
-         .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
-         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, 
-         .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
+   unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), 
+                  .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), 
+                  .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), 
+                  .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE), 
+                  .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), 
+                  .ZInf(ZInfE), .XExpMax(XExpMaxE));
    
-   // fma - does multiply, add, and multiply-add instructions 
-   fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), 
-            .Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), 
-            .Xm(XManE), .Ym(YManE), .Zm(ZManE), 
+   // fused multiply add
+   //    - fadd/fsub
+   //    - fmul
+   //    - fmadd/fnmadd/fmsub/fnmsub
+   fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), 
+            .Xe(XeE), .Ye(YeE), .Ze(ZeE), 
+            .Xm(XmE), .Ym(YmE), .Zm(ZmE), 
             .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), 
-            .FOpCtrl(FOpCtrlE), .Fmt(FmtE), 
-            .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Se(SeE),
-            .Sm(SumE), .Pe(ProdExpE), 
-            .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), 
-            .ZmSticky(AddendStickyE), .KillProd(KillProdE)); 
+            .OpCtrl(OpCtrlE), .Fmt(FmtE), 
+            .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE),
+            .Sm(SmE), .Pe(PeE), 
+            .NegSum(NegSumE), .InvA(InvAE), .SCnt(SCntE), 
+            .ZmSticky(ZmStickyE), .KillProd(KillProdE)); 
 
-   divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, 
-                  .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), 
-                  .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal
-                  .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM));
-   // other FP execution units
-   fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, 
-            .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
-   fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
-   fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
+   // divide and squareroot
+   //    - fdiv
+   //    - fsqrt
+   // *** add other opperations
+   divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, 
+                  .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), 
+                  .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal
+                  .EarlyTermShiftM, .QmM, .DivDone(DivDoneM));
+   // compare
+   //    - fmin/fmax
+   //    - flt/fle/feq
+   fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), 
+               .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), 
+               .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), 
+               .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
+   // sign injection
+   //    - fsgnj/fsgnjx/fsgnjn
+   fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE));
 
-   fcvt fcvt (.Xs(XSgnE), .Xe(XExpE), .Xm(XManE), .Int(ForwardedSrcAE), .FOpCtrl(FOpCtrlE), 
-              .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CvtCalcExpE), 
-              .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .IntZero(IntZeroE), 
+   // classify
+   //    - fclass
+   fclassify fclassify (.Xs(XsE), .XDenorm(XDenormE), .XZero(XZeroE), .XNaN(XNaNE), 
+                        .XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
+
+   // convert
+   //    - fcvt.*.*
+   fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), 
+              .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CeE), 
+              .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CsE), .IntZero(IntZeroE), 
               .LzcIn(CvtLzcInE));
 
    // data to be stored in memory - to IEU
    //    - FP uses NaN-blocking format
    //        - if there are any unsused bits the most significant bits are filled with 1s
    if (`LLEN==`XLEN) begin
-      assign FWriteDataE = FSrcYE[`XLEN-1:0]; 
+      assign FWriteDataE = YE[`XLEN-1:0]; 
    end else begin
       logic [`FLEN-1:0] FWriteDataE;
       if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT;
       else assign FStore2 = FmtM;
 
-      if (`FPSIZES==1) assign FWriteDataE = FSrcYE;
-      else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
-      else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}};
+      if (`FPSIZES==1) assign FWriteDataE = YE;
+      else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}};
+      else assign FWriteDataE = FmtE == `FMT ? YE : {2{YE[`LEN1-1:0]}};
 
       flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM);
    end
@@ -306,14 +312,14 @@ module fpu (
                              {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
    endgenerate
    // select a result that may be written to the FP register
-   mux3  #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
-   assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
+   mux3  #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
+   assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
 
    // select the result that may be written to the integer register - to IEU
    if (`FLEN>`XLEN)
-      assign IntSrcXE = FSrcXE[`XLEN-1:0];
+      assign IntSrcXE = XE[`XLEN-1:0];
    else 
-      assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
+      assign IntSrcXE = {{`XLEN-`FLEN{XE[`FLEN-1:0]}}, XE};
 
    mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
    // *** DH 5/25/22: CvtRes will move to mem stage.  Premux in execute to save area, then make sure stalls are ok
@@ -321,27 +327,24 @@ module fpu (
 
    // E/M pipe registers
 
-   // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
-   flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
-   flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
-   flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
+   // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM);
+   flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM});
+   flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM});
+   flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
    flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
    flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
    flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, 
             {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
             {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});     
    flopenrc #(1)  EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);      
-   flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
-               {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
-               {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
-   flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
-   flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
+   flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); 
+   flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM);  
    flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, 
-                           {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE, SeE},
-                           {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM, SeM});
+                           {ZmStickyE, KillProdE, InvAE, SCntE, NegSumE, AsE, PsE, SsE, SeE},
+                           {ZmStickyM, KillProdM, InvAM, SCntM, NegSumM, AsM, PsM, SsM, SeM});
    flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, 
-                           {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
-                           {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
+                           {CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE},
+                           {CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM});
 
    // BEGIN MEMORY STAGE
 
@@ -357,11 +360,11 @@ module fpu (
 
    assign FpLoadStoreM = FResSelM[1];
 
-   postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM),
-                           .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM),
-                           .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM),
-                           .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .FmaSe(SeM),
-                           .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM),
+   postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), .DivEarlyTermShift(EarlyTermShiftM),
+                           .FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
+                           .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM),
+                           .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
+                           .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM),
                            .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
 
    // FPU flag selection - to privileged
@@ -371,9 +374,6 @@ module fpu (
    // M/W pipe registers
    flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); 
    flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); 
-   flopenrc #(4+int'(`FMTBITS-1))  MWCtrlReg(clk, reset, FlushW, ~StallW,
-            {FRegWriteM, FResSelM, FmtM},
-            {FRegWriteW, FResSelW, FmtW});
 
    // BEGIN WRITEBACK STAGE
 
diff --git a/pipelined/src/fpu/fsgninj.sv b/pipelined/src/fpu/fsgninj.sv
index 17d15669f..a5b7e7742 100755
--- a/pipelined/src/fpu/fsgninj.sv
+++ b/pipelined/src/fpu/fsgninj.sv
@@ -26,60 +26,59 @@
 //   TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
 //   OR OTHER DEALINGS IN THE SOFTWARE.
 ////////////////////////////////////////////////////////////////////////////////////////////////
+
 `include "wally-config.vh"
 
 module fsgninj (  
-	input logic        	XSgnE, YSgnE,	// X and Y sign bits
-	input logic [`FLEN-1:0] 	FSrcXE,			// X
-	input logic [`FMTBITS-1:0]		FmtE,			// precision 1 = double 0 = single
-	input  logic [1:0]  SgnOpCodeE,		// operation control
-	output logic [`FLEN-1:0] SgnResE			// result
+	input logic        			Xs, Ys,	// X and Y sign bits
+	input logic [`FLEN-1:0] 	X,		// X
+	input logic [`FMTBITS-1:0]	Fmt,	// format
+	input  logic [1:0]  		OpCtrl,	// operation control
+	output logic [`FLEN-1:0] 	SgnRes	// result
 );
 
 	logic ResSgn;
 
-	//op code designation:
-	//
-	//00 - fsgnj - directly copy over sign value of FSrcYE
-	//01 - fsgnjn - negate sign value of FSrcYE
-	//10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE
-	//
+	// OpCtrl:
+	// 		00 - fsgnj  - directly copy over sign value of Y
+	// 		01 - fsgnjn - negate sign value of Y
+	// 		10 - fsgnjx - XOR sign values of X and Y
 	
 	// calculate the result's sign
-	assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE;
+	assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys;
 	
 	// format final result based on precision
 	//    - uses NaN-blocking format
 	//        - if there are any unsused bits the most significant bits are filled with 1s
 	
     if (`FPSIZES == 1)
-		assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]};
+		assign SgnRes = {ResSgn, X[`FLEN-2:0]};
 
     else if (`FPSIZES == 2)
-		assign SgnResE = {~FmtE|ResSgn, FSrcXE[`FLEN-2:`LEN1], FmtE ? FSrcXE[`LEN1-1] : ResSgn, FSrcXE[`LEN1-2:0]};
+		assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
 
     else if (`FPSIZES == 3) begin
 		logic [2:0] SgnBits;
         always_comb
-            case (FmtE)
-                `FMT: SgnBits = {ResSgn, FSrcXE[`LEN1-1], FSrcXE[`LEN2-1]};
-                `FMT1: SgnBits = {1'b1, ResSgn, FSrcXE[`LEN2-1]};
+            case (Fmt)
+                `FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
+                `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
                 `FMT2: SgnBits = {2'b11, ResSgn};
                 default: SgnBits = {3{1'bx}};
             endcase
-		assign SgnResE = {SgnBits[2], FSrcXE[`FLEN-2:`LEN1], SgnBits[1], FSrcXE[`LEN1-2:`LEN2], SgnBits[0], FSrcXE[`LEN2-2:0]};
+		assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
         
 
 	end else if (`FPSIZES == 4) begin
 		logic [3:0] SgnBits;
         always_comb
-            case (FmtE)
-                `Q_FMT: SgnBits = {ResSgn, FSrcXE[`D_LEN-1], FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]};
-                `D_FMT: SgnBits = {1'b1, ResSgn, FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]};
-                `S_FMT: SgnBits = {2'b11, ResSgn, FSrcXE[`H_LEN-1]};
+            case (Fmt)
+                `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
+                `D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
+                `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
                 `H_FMT: SgnBits = {3'b111, ResSgn};
             endcase
-		assign SgnResE = {SgnBits[3], FSrcXE[`Q_LEN-2:`D_LEN], SgnBits[2], FSrcXE[`D_LEN-2:`S_LEN], SgnBits[1], FSrcXE[`S_LEN-2:`H_LEN], SgnBits[0], FSrcXE[`H_LEN-2:0]};
+		assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
 	end
 
 endmodule
diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv
index 8d11273a2..66af5b3c5 100644
--- a/pipelined/src/fpu/otfc.sv
+++ b/pipelined/src/fpu/otfc.sv
@@ -107,6 +107,6 @@ module otfc4 (
       QMNext = {QMR, 2'b11};
     end 
   end
-  // Final Quoteint is in the range [.5, 2)
+  // Final Qmeint is in the range [.5, 2)
 
 endmodule
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index de3c4f30c..d3169d471 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -36,7 +36,7 @@ module postprocess (
     input logic  [`NF:0]                    Xm, Ym, Zm, // input mantissas
     input logic  [2:0]                      Frm,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
     input logic  [`FMTBITS-1:0]             Fmt,       // precision 1 = double 0 = single
-    input logic  [2:0]                      FOpCtrl,       // choose which opperation (look below for values)
+    input logic  [2:0]                      OpCtrl,       // choose which opperation (look below for values)
     input logic                             XZero, YZero, ZZero, // inputs are zero
     input logic                             XInf, YInf, ZInf,    // inputs are infinity
     input logic                             XNaN, YNaN, ZNaN,    // inputs are NaN
@@ -54,7 +54,7 @@ module postprocess (
     input logic                             FmaNegSum,    // was the sum negitive
     input logic                             FmaInvA,      // do you invert Z
     input logic                             FmaSs,
-    input logic  [$clog2(3*`NF+7)-1:0]      FmaNCnt,   // the normalization shift count
+    input logic  [$clog2(3*`NF+7)-1:0]      FmaSCnt,   // the normalization shift count
     //divide signals
     input logic  [`DURLEN-1:0]              DivEarlyTermShift,
     input logic                             DivS,
@@ -125,14 +125,14 @@ module postprocess (
     logic Sqrt;
 
     // signals to help readability
-    assign Signed =  FOpCtrl[0];
-    assign Int64 =   FOpCtrl[1];
-    assign IntToFp = FOpCtrl[2];
-    assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0];
+    assign Signed =  OpCtrl[0];
+    assign Int64 =   OpCtrl[1];
+    assign IntToFp = OpCtrl[2];
+    assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
     assign CvtOp = (PostProcSel == 2'b00);
     assign FmaOp = (PostProcSel == 2'b10);
     assign DivOp = (PostProcSel == 2'b01)&DivDone;
-    assign Sqrt =  FOpCtrl[0];
+    assign Sqrt =  OpCtrl[0];
 
     // is there an input of infinity or NaN being used
     assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp);
@@ -142,9 +142,9 @@ module postprocess (
     //      - fp -> fp: OpCtrl contains the percision of the output
     //      - otherwise: Fmt contains the percision of the output
     if (`FPSIZES == 2) 
-        assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT); 
+        assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); 
     else if (`FPSIZES == 3 | `FPSIZES == 4) 
-        assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0]; 
+        assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; 
 
     ///////////////////////////////////////////////////////////////////////////////
     // Normalization
@@ -152,7 +152,7 @@ module postprocess (
 
     cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn,  
                               .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
-    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
+    fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe,
                           .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
     divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn);
 
diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv
index 396ca7761..202b3ee81 100644
--- a/pipelined/src/fpu/qsel.sv
+++ b/pipelined/src/fpu/qsel.sv
@@ -42,7 +42,7 @@ module qsel2 ( // *** eventually just change to 4 bits
   // for efficiency.  You can probably optimize your logic to
   // select the proper divisor with less delay.
 
-  // Quotient equations from EE371 lecture notes 13-20
+  // Qmient equations from EE371 lecture notes 13-20
   assign p = ps ^ pc;
   assign g = ps & pc;
 
diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv
index 514edbee1..6329ffe28 100644
--- a/pipelined/src/fpu/shiftcorrection.sv
+++ b/pipelined/src/fpu/shiftcorrection.sv
@@ -43,7 +43,7 @@ module shiftcorrection(
     output logic [`NE+1:0]          FmaMe         // exponent of the normalized sum
 );
     logic [3*`NF+5:0]      CorrSumShifted;     // the shifted sum after LZA correction
-    logic [`CORRSHIFTSZ-1:0] CorrQuotShifted;
+    logic [`CORRSHIFTSZ-1:0] CorrQmShifted;
     logic                  ResDenorm;    // is the result denormalized
     logic                  LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
 
@@ -53,11 +53,11 @@ module shiftcorrection(
 	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
     assign CorrSumShifted =  LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
     //                        if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm)
-    assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
+    assign CorrQmShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
     // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits
     always_comb
         if(FmaOp)                       Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}};
-        else if (DivOp&~DivResDenorm)   Mf = CorrQuotShifted;
+        else if (DivOp&~DivResDenorm)   Mf = CorrQmShifted;
         else                            Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
     // Determine sum's exponent
     //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv
index ee5ae9a39..7e9f9922a 100644
--- a/pipelined/src/fpu/srt.sv
+++ b/pipelined/src/fpu/srt.sv
@@ -37,15 +37,15 @@ module srt(
   input  logic [`FMTBITS-1:0] FmtE,
   input  logic [`NE-1:0] Xe, Ye,
   input  logic XZeroE, YZeroE, 
-  input  logic [`DIVLEN-1:0] X,
-  input  logic [`DIVLEN-1:0] Dpreproc,
-  input  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  input  logic NegSticky,
-  output logic [`QLEN-1-(`RADIX/4):0] Quot,
+  input logic [`DIVLEN-1:0] X,
+  input logic [`DIVLEN-1:0] Dpreproc,
+  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  input logic NegSticky,
+  output logic [`QLEN-1-(`RADIX/4):0] Qm,
   output logic [`DIVLEN+3:0]  NextWSN, NextWCN,
   output logic [`DIVLEN+3:0]  StickyWSA,
   output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
-  output logic [`NE+1:0] DivCalcExpM,
+  output logic  [`NE+1:0] QeM,
   output logic [`XLEN-1:0] Rem
 );
 
@@ -62,7 +62,7 @@ module srt(
  /* verilator lint_on UNOPTFLAT */
   logic [`DIVLEN+3:0]  WSN, WCN;
   logic [`DIVLEN+3:0]  D, DBar, D2, DBar2;
-  logic [`NE+1:0] DivCalcExp;
+  logic [`NE+1:0] Qe;
   logic [$clog2(`XLEN+1)-1:0] intExp;
   logic           intSign;
   logic [`QLEN-1:0] QMMux;
@@ -88,7 +88,7 @@ module srt(
   mux2   #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
   flopen   #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
   flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
-  flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM);
+  flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM);
 
 
   // Divisor Selections
@@ -123,7 +123,7 @@ module srt(
   flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
   flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
 
-  assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
+  assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
   assign FirstWS = WS[0];
   assign FirstWC = WC[0];
   if(`RADIX==2)
@@ -132,7 +132,7 @@ module srt(
     else
       assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0};
 
-  expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp);
+  expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe);
 
 endmodule
 
@@ -155,7 +155,7 @@ module divinteration (
   logic [3:0]     q;
   logic qp, qz;//, qn;
 
-  // Quotient Selection logic
+  // Qmient Selection logic
   // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
   // q encoding:
 	// 1000 = +2
@@ -226,7 +226,7 @@ module expcalc(
   input  logic [`NE-1:0] Xe, Ye,
   input logic XZeroE, 
   input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic  [`NE+1:0] DivCalcExp
+  output logic  [`NE+1:0] Qe
   );
     logic [`NE-2:0] Bias;
     
@@ -255,5 +255,5 @@ module expcalc(
             endcase
     end
     // correct exponent for denormalized input's normalization shifts
-    assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
+    assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
     endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv
index 634ecc1d3..597f96cd7 100644
--- a/pipelined/src/fpu/srtfsm.sv
+++ b/pipelined/src/fpu/srtfsm.sv
@@ -43,7 +43,7 @@ module srtfsm(
   input  logic [`DIVLEN+3:0] StickyWSA,
   input  logic [`DURLEN-1:0] Dur,
   output logic [`DURLEN-1:0] EarlyTermShiftE,
-  output logic DivStickyE,
+  output logic DivSE,
   output logic DivDone,
   output logic NegSticky,
   output logic DivBusy
@@ -65,9 +65,9 @@ module srtfsm(
   //      this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant
   //      radix-4 division can't create a QM that continually adds 0's
   if (`RADIX == 2)
-    assign DivStickyE = |W&~(StickyWSA == WS);
+    assign DivSE = |W&~(StickyWSA == WS);
   else
-    assign DivStickyE = |W;
+    assign DivSE = |W;
   assign DivDone = (state == DONE);
   assign W = WC+WS;
   assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this???
diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv
index 71cad1872..050839c2f 100644
--- a/pipelined/src/fpu/unpack.sv
+++ b/pipelined/src/fpu/unpack.sv
@@ -30,35 +30,34 @@
 
 module unpack ( 
     input logic  [`FLEN-1:0]        X, Y, Z,    // inputs from register file
-    input logic  [`FMTBITS-1:0]     FmtE,       // format signal 00 - single 01 - double 11 - quad 10 - half
-    output logic                    XSgnE, YSgnE, ZSgnE,    // sign bits of XYZ
-    output logic [`NE-1:0]          XExpE, YExpE, ZExpE,    // exponents of XYZ (converted to largest supported precision)
-    output logic [`NF:0]            XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
-    output logic                    XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
-    output logic                    XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-    output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
-    output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
-    output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
-    output logic                    XExpMaxE                        // does X have the maximum exponent (NaN or Inf)
+    input logic  [`FMTBITS-1:0]     Fmt,       // format signal 00 - single 01 - double 11 - quad 10 - half
+    output logic                    Xs, Ys, Zs,    // sign bits of XYZ
+    output logic [`NE-1:0]          Xe, Ye, Ze,    // exponents of XYZ (converted to largest supported precision)
+    output logic [`NF:0]            Xm, Ym, Zm,    // mantissas of XYZ (converted to largest supported precision)
+    output logic                    XNaN, YNaN, ZNaN,    // is XYZ a NaN
+    output logic                    XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
+    output logic                    XDenorm, ZDenorm,   // is XYZ denormalized
+    output logic                    XZero, YZero, ZZero,         // is XYZ zero
+    output logic                    XInf, YInf, ZInf,            // is XYZ infinity
+    output logic                    XExpMax                        // does X have the maximum exponent (NaN or Inf)
 );
  
-    logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ
     logic           XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
     logic           XFracZero, YFracZero, ZFracZero; // is the fraction zero
-    logic           YExpMaxE, ZExpMaxE;  // is the exponent all 1s
+    logic           YExpMax, ZExpMax;  // is the exponent all 1s
     
-    unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), 
-                            .NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero),
-                            .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero));
+    unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), 
+                            .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
+                            .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero));
 
-    unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), 
-                            .NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero),
-                            .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero));
+    unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), 
+                            .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
+                            .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero));
 
-    unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), 
-                            .NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero),
-                            .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero));
+    unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), 
+                            .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
+                            .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero));
     // is the input denormalized
-    assign XDenormE = ~XExpNonZero & ~XFracZero;
-    assign ZDenormE = ~ZExpNonZero & ~ZFracZero;
+    assign XDenorm = ~XExpNonZero & ~XFracZero;
+    assign ZDenorm = ~ZExpNonZero & ~ZFracZero;
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv
index 2b078cc6c..7be922508 100644
--- a/pipelined/src/fpu/unpackinput.sv
+++ b/pipelined/src/fpu/unpackinput.sv
@@ -30,7 +30,7 @@
 
 module unpackinput ( 
     input logic  [`FLEN-1:0]        In,    // inputs from register file
-    input logic  [`FMTBITS-1:0]     FmtE,       // format signal 00 - single 01 - double 11 - quad 10 - half
+    input logic  [`FMTBITS-1:0]     Fmt,       // format signal 00 - single 01 - double 11 - quad 10 - half
     output logic                    Sgn,    // sign bits of XYZ
     output logic [`NE-1:0]          Exp,    // exponents of XYZ (converted to largest supported precision)
     output logic [`NF:0]            Man,    // mantissas of XYZ (converted to largest supported precision)
@@ -74,16 +74,16 @@ module unpackinput (
         //      quad   and half
         //      double and half
 
-        assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
+        assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
 
         // choose sign bit depending on format - 1=larger precsion 0=smaller precision
-        assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1];
+        assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1];
 
         // extract the fraction, add trailing zeroes to the mantissa if nessisary
-        assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
+        assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
 
         // is the exponent non-zero
-        assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; 
+        assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; 
 
         // example double to single conversion:
         // 1023 = 0011 1111 1111
@@ -95,10 +95,10 @@ module unpackinput (
 
         // extract the exponent, converting the smaller exponent into the larger precision if nessisary
         //      - if the original precision had a denormal number convert the exponent value 1
-        assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
+        assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
  
         // is the exponent all 1's
-        assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
+        assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
     
 
     end else if (`FPSIZES == 3) begin       // three floating point precsions supported
@@ -122,7 +122,7 @@ module unpackinput (
 
         // Check NaN boxing
         always_comb
-            case (FmtE)
+            case (Fmt)
                 `FMT:  BadNaNBox = 0;
                 `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
                 `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
@@ -131,7 +131,7 @@ module unpackinput (
 
         // extract the sign bit
         always_comb
-            case (FmtE)
+            case (Fmt)
                 `FMT:  Sgn = In[`FLEN-1];
                 `FMT1: Sgn = In[`LEN1-1];
                 `FMT2: Sgn = In[`LEN2-1];
@@ -140,7 +140,7 @@ module unpackinput (
 
         // extract the fraction
         always_comb
-            case (FmtE)
+            case (Fmt)
                 `FMT: Frac = In[`NF-1:0];
                 `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
                 `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
@@ -149,7 +149,7 @@ module unpackinput (
 
         // is the exponent non-zero
         always_comb
-            case (FmtE)
+            case (Fmt)
                 `FMT:  ExpNonZero = |In[`FLEN-2:`NF];     // if input is largest precision (`FLEN - ie quad or double)
                 `FMT1: ExpNonZero = |In[`LEN1-2:`NF1];  // if input is larger precsion (`LEN1 - double or single)
                 `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
@@ -166,7 +166,7 @@ module unpackinput (
 
         // convert the larger precision's exponent to use the largest precision's bias
         always_comb 
-            case (FmtE)
+            case (Fmt)
                 `FMT:  Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
                 `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; 
                 `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; 
@@ -175,7 +175,7 @@ module unpackinput (
 
         // is the exponent all 1's
         always_comb
-            case (FmtE)
+            case (Fmt)
                 `FMT:  ExpMax = &In[`FLEN-2:`NF];
                 `FMT1: ExpMax = &In[`LEN1-2:`NF1];
                 `FMT2: ExpMax = &In[`LEN2-2:`NF2];
@@ -194,7 +194,7 @@ module unpackinput (
 
         // Check NaN boxing
         always_comb
-            case (FmtE)
+            case (Fmt)
                 2'b11:  BadNaNBox = 0;
                 2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
                 2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
@@ -203,7 +203,7 @@ module unpackinput (
 
         // extract sign bit
         always_comb
-            case (FmtE)
+            case (Fmt)
                 2'b11: Sgn = In[`Q_LEN-1];
                 2'b01: Sgn = In[`D_LEN-1];
                 2'b00: Sgn = In[`S_LEN-1];
@@ -213,7 +213,7 @@ module unpackinput (
 
         // extract the fraction
         always_comb
-            case (FmtE)
+            case (Fmt)
                 2'b11: Frac = In[`Q_NF-1:0];
                 2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
                 2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
@@ -222,7 +222,7 @@ module unpackinput (
 
         // is the exponent non-zero
         always_comb
-            case (FmtE)
+            case (Fmt)
                 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
                 2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
                 2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; 
@@ -240,7 +240,7 @@ module unpackinput (
         
         // convert the double precsion exponent into quad precsion
         always_comb
-            case (FmtE)
+            case (Fmt)
                 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
                 2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
                 2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
@@ -250,7 +250,7 @@ module unpackinput (
 
         // is the exponent all 1's
         always_comb 
-            case (FmtE)
+            case (Fmt)
                 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
                 2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
                 2'b00: ExpMax = &In[`S_LEN-2:`S_NF];

From a30d9c6bd8783761b9d9c57f3efd51c82eda4ab6 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Wed, 20 Jul 2022 21:57:23 +0000
Subject: [PATCH 19/36] turn off 2 word store durring non-fp instructions

---
 pipelined/src/fpu/fctrl.sv       | 26 +++++++++++++++++++++++++-
 pipelined/src/fpu/fpu.sv         |  7 ++++---
 pipelined/src/fpu/unpackinput.sv |  1 +
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv
index 85047248d..ea98651b7 100755
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@@ -41,7 +41,7 @@ module fctrl (
   input  logic [2:0] FRM_REGW,  // rounding mode from CSR
   input  logic [1:0] STATUS_FS, // is FPU enabled?
   input  logic       FDivBusyE,  // is the divider busy
-  output logic       IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
+  output logic       IllegalFPUInstrD, IllegalFPUInstrM, // Is the instruction an illegal fpu instruction
   output logic 		         FRegWriteM, FRegWriteW, // FP register write enable
   output logic [2:0] 	      FrmM,                   // FP rounding mode
   output logic [`FMTBITS-1:0] FmtE, FmtM,             // FP format
@@ -55,6 +55,7 @@ module fctrl (
 
   `define FCTRLW 11
   logic [`FCTRLW-1:0] ControlsD;
+  logic       IllegalFPUInstrE;
   logic 		  FRegWriteD; // FP register write enable
   logic 		  DivStartD; // integer register write enable
   logic 		  FWriteIntD; // integer register write enable
@@ -171,6 +172,25 @@ module fctrl (
     else if (`FPSIZES == 3|`FPSIZES == 4)
       assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
 
+      
+//     // signals to help readability
+//     assign IntToFp = OpCtrl[2];
+//     assign CvtOp = (PostProcSelE == 2'b00)&(FResSelE == 2'b01);
+//     assign FmaOp = (PostProcSelE == 2'b10)&(FResSelE == 2'b01);
+//     assign Sqrt =  OpCtrl[0];
+
+//     // is there an input of infinity or NaN being used
+//     assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp);
+//     assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp);
+
+// // enables:
+// //    X - all except int->fp, store, load, mv int->fp
+// //    Y - all except cvt, mv, load, class
+// //    Z - fma ops only
+//     assign XEnE = ;
+//     assign YEnE = ~((FResSel==2'b10));
+//     assign ZEnE = FmaOp&~OpCtrlE[2];
+
 //  Final Res Sel:
 //        fp      int
 //  00  other     cmp
@@ -228,10 +248,14 @@ module fctrl (
    flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, 
                            {Adr1E, Adr2E, Adr3E});
   flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE);
+  if(`FLEN>`XLEN)
+    flopenrc #(1) DEIllegalReg(clk, reset, FlushE, ~StallE, IllegalFPUInstrD, IllegalFPUInstrE);
   // E/M pipleine register
   flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
               {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE},
               {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM});
+  if(`FLEN>`XLEN)
+    flopenrc #(1) EMIllegalReg(clk, reset, FlushM, ~StallM, IllegalFPUInstrE, IllegalFPUInstrM);
   // M/W pipleine register
   flopenrc #(3)  MWCtrlReg(clk, reset, FlushW, ~StallW,
           {FRegWriteM, FResSelM},
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 6d9b9cf47..84f8e04df 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -72,6 +72,7 @@ module fpu (
    logic [1:0] 	      FResSelE, FResSelM;       // Select one of the results that finish in the memory stage
    logic [1:0] 	      PostProcSelE, PostProcSelM; // select result in the post processing unit
    logic [4:0] 	      Adr1E, Adr2E, Adr3E;                // adresses of each input
+   logic                IllegalFPUInstrM;
 
    // regfile signals
    logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D;                // Read Data from FP register - decode stage
@@ -163,7 +164,7 @@ module fpu (
    fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD,
                .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
                .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
-               .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM,
+               .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM,
                .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E);
 
    // FP register file
@@ -290,8 +291,8 @@ module fpu (
       assign FWriteDataE = YE[`XLEN-1:0]; 
    end else begin
       logic [`FLEN-1:0] FWriteDataE;
-      if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT;
-      else assign FStore2 = FmtM;
+      if(`FMTBITS == 2) assign FStore2 = (FmtM == `FMT)&~IllegalFPUInstrM;
+      else assign FStore2 = FmtM&~IllegalFPUInstrM;
 
       if (`FPSIZES==1) assign FWriteDataE = YE;
       else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}};
diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv
index 7be922508..699d88958 100644
--- a/pipelined/src/fpu/unpackinput.sv
+++ b/pipelined/src/fpu/unpackinput.sv
@@ -30,6 +30,7 @@
 
 module unpackinput ( 
     input logic  [`FLEN-1:0]        In,    // inputs from register file
+    // input logic                     En,     // enable the input
     input logic  [`FMTBITS-1:0]     Fmt,       // format signal 00 - single 01 - double 11 - quad 10 - half
     output logic                    Sgn,    // sign bits of XYZ
     output logic [`NE-1:0]          Exp,    // exponents of XYZ (converted to largest supported precision)

From 7950a675ea77fe516515a15d2639df330648d2c3 Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 21 Jul 2022 01:20:06 +0000
Subject: [PATCH 20/36] added input enables and improved forwarding

---
 pipelined/src/fpu/fctrl.sv          | 29 ++++-----
 pipelined/src/fpu/fhazard.sv        | 42 +++++++------
 pipelined/src/fpu/flags.sv          |  5 +-
 pipelined/src/fpu/fpu.sv            | 14 +++--
 pipelined/src/fpu/postprocess.sv    |  6 +-
 pipelined/src/fpu/specialcase.sv    |  4 +-
 pipelined/src/fpu/unpack.sv         |  7 ++-
 pipelined/src/fpu/unpackinput.sv    |  6 +-
 pipelined/testbench/testbench-fp.sv | 95 +++++++++++++++--------------
 9 files changed, 107 insertions(+), 101 deletions(-)

diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv
index ea98651b7..5b6b22ef0 100755
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@@ -46,6 +46,8 @@ module fctrl (
   output logic [2:0] 	      FrmM,                   // FP rounding mode
   output logic [`FMTBITS-1:0] FmtE, FmtM,             // FP format
   output logic 		         DivStartE,             // Start division or squareroot
+  output logic              XEnE, YEnE, ZEnE,
+  output logic              YEnForwardE, ZEnForwardE,
   output logic 		         FWriteIntE, FWriteIntM,                         // Write to integer register
   output logic [2:0] 	      OpCtrlE, OpCtrlM,       // Select which opperation to do in each component
   output logic [1:0] 	      FResSelE, FResSelM, FResSelW,       // Select one of the results that finish in the memory stage
@@ -173,23 +175,18 @@ module fctrl (
       assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
 
       
-//     // signals to help readability
-//     assign IntToFp = OpCtrl[2];
-//     assign CvtOp = (PostProcSelE == 2'b00)&(FResSelE == 2'b01);
-//     assign FmaOp = (PostProcSelE == 2'b10)&(FResSelE == 2'b01);
-//     assign Sqrt =  OpCtrl[0];
 
-//     // is there an input of infinity or NaN being used
-//     assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp);
-//     assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp);
-
-// // enables:
-// //    X - all except int->fp, store, load, mv int->fp
-// //    Y - all except cvt, mv, load, class
-// //    Z - fma ops only
-//     assign XEnE = ;
-//     assign YEnE = ~((FResSel==2'b10));
-//     assign ZEnE = FmaOp&~OpCtrlE[2];
+// enables:
+//    X - all except int->fp, store, load, mv int->fp
+//    Y - all except cvt, mv, load, class
+//    Z - fma ops only
+//                  load/store                        mv int->fp                      cvt int->fp
+    assign XEnE = ~(((FResSelE==2'b10)&~FWriteIntE)|((FResSelE==2'b11)&FRegWriteE)|((FResSelE==2'b01)&(PostProcSelE==2'b00)&OpCtrlE[2]));
+//                  load/class                                    mv               cvt
+    assign YEnE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&(PostProcSelE==2'b00)));    
+    assign ZEnE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&(~OpCtrlE[2]|OpCtrlE[1]);
+    assign YEnForwardE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&(PostProcSelE==2'b00)));    
+    assign ZEnForwardE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&~OpCtrlE[2];
 
 //  Final Res Sel:
 //        fp      int
diff --git a/pipelined/src/fpu/fhazard.sv b/pipelined/src/fpu/fhazard.sv
index 36a0ff82f..690e04ebb 100644
--- a/pipelined/src/fpu/fhazard.sv
+++ b/pipelined/src/fpu/fhazard.sv
@@ -35,6 +35,7 @@ module fhazard(
     input  logic        FRegWriteM, FRegWriteW, // is the fp register being written to
 	  input  logic [4:0]  RdM, RdW,               // the adress being written to
     input  logic [1:0]  FResSelM,            // the result being selected
+    input  logic        XEnE, YEnE, ZEnE,
     output logic        FStallD,                // stall the decode stage
     output logic [1:0]  ForwardXE, ForwardYE, ForwardZE // select a forwarded value
 );
@@ -47,33 +48,34 @@ module fhazard(
     ForwardZE = 2'b00; // choose FRD3E
     FStallD = 0;
 
-    //*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
-
     // if the needed value is in the memory stage - input 1
-    if ((Adr1E == RdM) & FRegWriteM) 
-      // if the result will be FResM (can be taken from the memory stage)
-      if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM
-      else FStallD = 1;                             // otherwise stall
-    // if the needed value is in the writeback stage
-    else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W
+    if(XEnE)
+      if ((Adr1E == RdM) & FRegWriteM) 
+        // if the result will be FResM (can be taken from the memory stage)
+        if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM
+        else FStallD = 1;                             // otherwise stall
+      // if the needed value is in the writeback stage
+      else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W
   
 
     // if the needed value is in the memory stage - input 2
-    if ((Adr2E == RdM) & FRegWriteM)
-      // if the result will be FResM (can be taken from the memory stage)
-      if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM
-      else FStallD = 1;                             // otherwise stall
-    // if the needed value is in the writeback stage
-    else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W
+    if(YEnE)
+      if ((Adr2E == RdM) & FRegWriteM)
+        // if the result will be FResM (can be taken from the memory stage)
+        if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM
+        else FStallD = 1;                             // otherwise stall
+      // if the needed value is in the writeback stage
+      else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W
 
 
     // if the needed value is in the memory stage - input 3
-    if ((Adr3E == RdM) & FRegWriteM)
-      // if the result will be FResM (can be taken from the memory stage)
-      if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM
-      else FStallD = 1;                             // otherwise stall
-    // if the needed value is in the writeback stage
-    else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W
+    if(ZEnE)
+      if ((Adr3E == RdM) & FRegWriteM)
+        // if the result will be FResM (can be taken from the memory stage)
+        if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM
+        else FStallD = 1;                             // otherwise stall
+      // if the needed value is in the writeback stage
+      else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W
 
   end 
 
diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv
index 6b1bc6381..c169ab2fa 100644
--- a/pipelined/src/fpu/flags.sv
+++ b/pipelined/src/fpu/flags.sv
@@ -37,7 +37,6 @@ module flags(
     input logic                 NaNIn,                  // is a NaN input being used
     input logic [`FMTBITS-1:0]  OutFmt,                 // output format
     input logic                 XZero, YZero,         // inputs are zero
-    input logic                 XNaN, YNaN,           // inputs are NaN
     input logic                 Sqrt,                   // Sqrt?
     input logic                 ToInt,                  // convert to integer
     input logic                 IntToFp,                // convert integer to floating point
@@ -153,11 +152,11 @@ module flags(
     //                  |           |                                  |                    |               or the res rounds up out of bounds
     //                  |           |                                  |                    |                       and the res didn't underflow
     //                  |           |                                  |                    |                       |
-    assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
+    assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
     //                                                                                                     |
     //                                                                                                     or when the positive res rounds up out of range
     assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp);
-    assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~XNaN & ~YNaN) | (XZero & YInf) | (YZero & XInf);
+    assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~NaNIn) | (XZero & YInf) | (YZero & XInf);
     assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt);
 
     assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index 84f8e04df..cfa46b657 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -73,6 +73,8 @@ module fpu (
    logic [1:0] 	      PostProcSelE, PostProcSelM; // select result in the post processing unit
    logic [4:0] 	      Adr1E, Adr2E, Adr3E;                // adresses of each input
    logic                IllegalFPUInstrM;
+   logic                XEnE, YEnE, ZEnE;
+   logic                YEnForwardE, ZEnForwardE;
 
    // regfile signals
    logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D;                // Read Data from FP register - decode stage
@@ -163,8 +165,8 @@ module fpu (
    // calculate FP control signals
    fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD,
                .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
-               .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
-               .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM,
+               .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE,
+               .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE,
                .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E);
 
    // FP register file
@@ -193,7 +195,7 @@ module fpu (
    // Hazard unit for FPU  
    //    - determines if any forwarding or stalls are needed
    fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, 
-                  .FStallD, .ForwardXE, .ForwardYE, .ForwardZE);
+                   .XEnE, .YEnE(YEnForwardE), .ZEnE(ZEnForwardE), .FStallD, .ForwardXE, .ForwardYE, .ForwardZE);
 
    // forwarding muxs
    mux3  #(`FLEN)  fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE);
@@ -233,11 +235,11 @@ module fpu (
    //    - splits FP inputs into their various parts
    //    - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity)
    unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), 
-                  .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), 
-                  .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), 
+                  .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE),
+                  .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), 
                   .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE), 
                   .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), 
-                  .ZInf(ZInfE), .XExpMax(XExpMaxE));
+                  .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE));
    
    // fused multiply add
    //    - fadd/fsub
diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv
index d3169d471..4d9dc310f 100644
--- a/pipelined/src/fpu/postprocess.sv
+++ b/pipelined/src/fpu/postprocess.sv
@@ -135,8 +135,8 @@ module postprocess (
     assign Sqrt =  OpCtrl[0];
 
     // is there an input of infinity or NaN being used
-    assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp);
-    assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp);
+    assign InfIn = XInf|YInf|ZInf;
+    assign NaNIn = XNaN|YNaN|ZNaN;
 
     // choose the ouptut format depending on the opperation
     //      - fp -> fp: OpCtrl contains the percision of the output
@@ -219,7 +219,7 @@ module postprocess (
 
     flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, 
                 .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
-                .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
+                .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero,
                 .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
                 .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
 
diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv
index 6014962a1..41e75110f 100644
--- a/pipelined/src/fpu/specialcase.sv
+++ b/pipelined/src/fpu/specialcase.sv
@@ -280,14 +280,14 @@ module specialcase(
     //      other: 32 bit unsinged res should be sign extended as if it were a signed number
     always_comb
         if(Signed)
-            if(Xs&~XNaN)    // signed negitive
+            if(Xs&~NaNIn)    // signed negitive
                 if(Int64)   OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
                 else        OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
             else            // signed positive
                 if(Int64)   OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
                 else        OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
         else
-            if(Xs&~XNaN)    OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
+            if(Xs&~NaNIn)    OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
             else            OfIntRes = {`XLEN{1'b1}}; // unsigned positive
 
 
diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv
index 050839c2f..4053cba13 100644
--- a/pipelined/src/fpu/unpack.sv
+++ b/pipelined/src/fpu/unpack.sv
@@ -31,6 +31,7 @@
 module unpack ( 
     input logic  [`FLEN-1:0]        X, Y, Z,    // inputs from register file
     input logic  [`FMTBITS-1:0]     Fmt,       // format signal 00 - single 01 - double 11 - quad 10 - half
+    input logic                     XEn, YEn, ZEn,
     output logic                    Xs, Ys, Zs,    // sign bits of XYZ
     output logic [`NE-1:0]          Xe, Ye, Ze,    // exponents of XYZ (converted to largest supported precision)
     output logic [`NF:0]            Xm, Ym, Zm,    // mantissas of XYZ (converted to largest supported precision)
@@ -46,15 +47,15 @@ module unpack (
     logic           XFracZero, YFracZero, ZFracZero; // is the fraction zero
     logic           YExpMax, ZExpMax;  // is the exponent all 1s
     
-    unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), 
+    unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
                             .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
                             .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero));
 
-    unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), 
+    unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
                             .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
                             .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero));
 
-    unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), 
+    unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
                             .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
                             .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero));
     // is the input denormalized
diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv
index 699d88958..4e43768c4 100644
--- a/pipelined/src/fpu/unpackinput.sv
+++ b/pipelined/src/fpu/unpackinput.sv
@@ -30,7 +30,7 @@
 
 module unpackinput ( 
     input logic  [`FLEN-1:0]        In,    // inputs from register file
-    // input logic                     En,     // enable the input
+    input logic                     En,     // enable the input
     input logic  [`FMTBITS-1:0]     Fmt,       // format signal 00 - single 01 - double 11 - quad 10 - half
     output logic                    Sgn,    // sign bits of XYZ
     output logic [`NE-1:0]          Exp,    // exponents of XYZ (converted to largest supported precision)
@@ -263,8 +263,8 @@ module unpackinput (
     // Output logic
     assign FracZero = ~|Frac; // is the fraction zero?
     assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand
-    assign NaN = (ExpMax & ~FracZero)|BadNaNBox; // is the input a NaN?
+    assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN?
     assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
-    assign Inf = ExpMax & FracZero; // is the input infinity?
+    assign Inf = ExpMax & FracZero &En; // is the input infinity?
     assign Zero = ~ExpNonZero & FracZero; // is the input zero?
 endmodule
\ No newline at end of file
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 91ce82616..19b637478 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -66,9 +66,9 @@ module testbenchfp;
   logic [`XLEN-1:0]     IntRes, CmpRes;  // Results from each unit
   logic [4:0]           FmaFlg, CvtFlg, DivFlg, CmpFlg;  // Outputed flags
   logic                 AnsNaN, ResNaN, NaNGood;
-  logic                 XSgn, YSgn, ZSgn;                     // sign of the inputs
-  logic [`NE-1:0]       XExp, YExp, ZExp;                     // exponent of the inputs
-  logic [`NF:0]         XMan, YMan, ZMan;                     // mantissas of the inputs
+  logic                 Xs, Ys, Zs;                     // sign of the inputs
+  logic [`NE-1:0]       Xe, Ye, Ze;                     // exponent of the inputs
+  logic [`NF:0]         Xm, Ym, Zm;                     // mantissas of the inputs
   logic                 XNaN, YNaN, ZNaN;                     // is the input NaN
   logic                 XSNaN, YSNaN, ZSNaN;                  // is the input a signaling NaN
   logic                 XDenorm, ZDenorm;            // is the input denormalized
@@ -99,7 +99,7 @@ module testbenchfp;
   logic [`NE+1:0]	      Se;
   logic 				        ZmSticky;
   logic 					      KillProd; 
-  logic [$clog2(3*`NF+7)-1:0]	NCnt;
+  logic [$clog2(3*`NF+7)-1:0]	SCnt;
   logic [3*`NF+5:0]	    Sm;       
   logic 			          InvA;
   logic 			          NegSum;
@@ -650,14 +650,14 @@ module testbenchfp;
 
   // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
   readvectors readvectors          (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, 
-                                    .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
-                                    .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
-                                    .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart,
-                                    .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN),
-                                    .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), 
-                                    .XDenormE(XDenorm), .ZDenormE(ZDenorm), 
-                                    .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
-                                    .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf), .XExpMaxE(XExpMax),
+                                    .Xs, .Ys, .Zs, .Unit(UnitVal),
+                                    .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal),
+                                    .Xm, .Ym, .Zm, .DivStart,
+                                    .XNaN, .YNaN, .ZNaN,
+                                    .XSNaN, .YSNaN, .ZSNaN, 
+                                    .XDenorm, .ZDenorm, 
+                                    .XZero, .YZero, .ZZero,
+                                    .XInf, .YInf, .ZInf, .XExpMax,
                                     .X, .Y, .Z);
 
 
@@ -673,34 +673,34 @@ module testbenchfp;
   ///////////////////////////////////////////////////////////////////////////////////////////////
 
   // instantiate devices under test
-  fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), 
-          .Xe(XExp), .Ye(YExp), .Ze(ZExp), 
-          .Xm(XMan), .Ym(YMan), .Zm(ZMan),
+  fma fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), 
+          .Xe(Xe), .Ye(Ye), .Ze(Ze), 
+          .Xm(Xm), .Ym(Ym), .Zm(Zm),
           .XZero, .YZero, .ZZero, .Ss, .Se,
-          .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps,
+          .OpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .SCnt, .As, .Ps,
           .Pe, .ZmSticky, .KillProd); 
               
-  postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]),
-              .Ze(ZExp),  .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
-              .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss),
+  postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]),
+              .Ze(Ze),  .ZDenorm(ZDenorm), .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp),
+              .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss),
               .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE),
               .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE),
               .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal),
               .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero,
               .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSe(Se),
-              .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
+              .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaSCnt(SCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), 
               .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes));
   
-  fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal), 
-            .XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero,
+  fcvt fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), 
+            .XZero(XZero), .XDenorm(XDenorm), .OpCtrl(OpCtrlVal), .IntZero,
             .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE));
-  fcmp fcmp   (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), 
-              .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
-              .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
-  divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XManE(XMan), .YManE(YMan), .XExpE(XExp), .YExpE(YExp), 
+  fcmp fcmp   (.Fmt(ModFmt), .OpCtrl(OpCtrlVal), .Xs, .Ys, .Xe, .Ye, 
+              .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes),
+              .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
+  divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), 
                   .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), 
-                  .StallE(1'b0), .StallM(1'b0), .DivStickyM(DivSticky), .DivBusy, .DivCalcExpM(DivCalcExp),
-                  .EarlyTermShiftM(EarlyTermShift), .QuotM(Quot), .DivDone);
+                  .StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp),
+                  .EarlyTermShiftM(EarlyTermShift), .QmM(Quot), .DivDone);
 
   assign CmpFlg[3:0] = 0;
 
@@ -868,10 +868,10 @@ end
 
     // Testfloat outputs 800... for both the largest integer values for both positive and negitive numbers but 
     // the riscv spec specifies 2^31-1 for positive values out of range and NaNs ie 7fff...
-    else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&(Res[`XLEN-1:0] === (`XLEN)'(0))) | 
-            (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) | 
-            (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | 
-            (~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin
+    else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&(Res[`XLEN-1:0] === (`XLEN)'(0))) | 
+            (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) | 
+            (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | 
+            (~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin
       errors += 1;
       $display("There is an error in %s", Tests[TestNum]);
       $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg);
@@ -924,18 +924,19 @@ module readvectors (
   output logic [`FLEN-1:0] Ans,
   output logic [`XLEN-1:0] SrcA,
   output logic [4:0] AnsFlg,
-  output logic                    XSgnE, YSgnE, ZSgnE,    // sign bits of XYZ
-  output logic [`NE-1:0]          XExpE, YExpE, ZExpE,    // exponents of XYZ (converted to largest supported precision)
-  output logic [`NF:0]            XManE, YManE, ZManE,    // mantissas of XYZ (converted to largest supported precision)
-  output logic                    XNaNE, YNaNE, ZNaNE,    // is XYZ a NaN
-  output logic                    XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
-  output logic                    XDenormE, ZDenormE,   // is XYZ denormalized
-  output logic                    XZeroE, YZeroE, ZZeroE,         // is XYZ zero
-  output logic                    XInfE, YInfE, ZInfE,            // is XYZ infinity
-  output logic                    XExpMaxE,
+  output logic                    Xs, Ys, Zs,    // sign bits of XYZ
+  output logic [`NE-1:0]          Xe, Ye, Ze,    // exponents of XYZ (converted to largest supported precision)
+  output logic [`NF:0]            Xm, Ym, Zm,    // mantissas of XYZ (converted to largest supported precision)
+  output logic                    XNaN, YNaN, ZNaN,    // is XYZ a NaN
+  output logic                    XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
+  output logic                    XDenorm, ZDenorm,   // is XYZ denormalized
+  output logic                    XZero, YZero, ZZero,         // is XYZ zero
+  output logic                    XInf, YInf, ZInf,            // is XYZ infinity
+  output logic                    XExpMax,
   output logic                    DivStart,
   output logic [`FLEN-1:0] X, Y, Z
 );
+  logic XEn, YEn, ZEn;
 
   // apply test vectors on rising edge of clk
   // Format of vectors Inputs(1/2/3)_AnsFlg
@@ -1257,8 +1258,12 @@ module readvectors (
     endcase  
   end
   
-  unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
-                .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
-                .XDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
-                .XExpMaxE);
+  assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]);
+  assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT));
+  assign ZEn = (Unit == `FMAUNIT);
+  
+  unpack unpack(.X, .Y, .Z, .Fmt(ModFmt), .Xs, .Ys, .Zs, .Xe, .Ye, .Ze,
+                .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN,
+                .XDenorm, .ZDenorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf,
+                .XEn, .YEn, .ZEn, .XExpMax);
 endmodule
\ No newline at end of file

From e46e96e0805ba4a5306517720cd0ab202cec7949 Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Thu, 21 Jul 2022 10:14:05 -0700
Subject: [PATCH 21/36] changed the default branch of embench

---
 .gitmodules        | 5 +++++
 addins/embench-iot | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/.gitmodules b/.gitmodules
index 81ed2d5f4..bf7fdaefd 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -17,6 +17,11 @@
 [submodule "addins/embench-iot"]
 	path = addins/embench-iot
 	url = https://github.com/embench/embench-iot
+	branch = embench-1.0-branch
 [submodule "addins/coremark"]
 	path = addins/coremark
 	url = https://github.com/eembc/coremark
+[submodule "embench"]
+	branch = embench-1.0-branch
+[submodule "embench-iot"]
+	branch = embench-1.0-branch
diff --git a/addins/embench-iot b/addins/embench-iot
index 261a65e0a..58ffa0c68 160000
--- a/addins/embench-iot
+++ b/addins/embench-iot
@@ -1 +1 @@
-Subproject commit 261a65e0a2d3e8d62d81b1d8fe7e309a096bc6a9
+Subproject commit 58ffa0c68c52f291d12c5902fc787d2bca94ddf9

From 16e4260ddab1e7b038bb11cbf0531c1792767eca Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Thu, 21 Jul 2022 10:15:13 -0700
Subject: [PATCH 22/36] fixed gitmodules

---
 .gitmodules | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index bf7fdaefd..ab45d3f96 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -21,7 +21,3 @@
 [submodule "addins/coremark"]
 	path = addins/coremark
 	url = https://github.com/eembc/coremark
-[submodule "embench"]
-	branch = embench-1.0-branch
-[submodule "embench-iot"]
-	branch = embench-1.0-branch

From 4793267bd7619c5d9433406882859cbdd2597ec4 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Thu, 21 Jul 2022 17:36:21 +0000
Subject: [PATCH 23/36] Updated Radix2 Sqrt to follow new algorithm

---
 pipelined/srt/srt.sv | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 27fac324e..157be2e7f 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -213,8 +213,8 @@ module fsel2 (
   logic [`DIVLEN+3:0] FP, FN, FZ;
   
   // Generate for both positive and negative bits
-  assign FP = ~S & C;
-  assign FN = SM | (C & (~C << 2));
+  assign FP = ~(S << 1) & C;
+  assign FN = (SM << 1) | (C & (~C << 2));
   assign FZ = '0;
 
   // Choose which adder input will be used
@@ -283,22 +283,22 @@ module sotfc2(
   logic [`DIVLEN+3:0] SNext, SMNext, SMux;
 
   flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM);
-  mux2 #(`DIVLEN+4) Smux(SNext, {2'b00, Sqrt, {(`DIVLEN+1){1'b0}}}, Start, SMux);
+  mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux);
   flop #(`DIVLEN+4) Sreg(clk, SMux, S);
 
   always_comb begin
     if (sp) begin
-      SNext  = S | ((C << 1) & ~(C << 2));
+      SNext  = S | (C & ~(C << 1));
       SMNext = S;
     end else if (sn) begin
-      SNext  = SM | ((C << 1) & ~(C << 2));
+      SNext  = SM | (C & ~(C << 1));
       SMNext = SM;
     end else begin        // If sp and sn are not true, then sz is
       SNext  = S;
-      SMNext = SM | ((C << 1) & ~(C << 2));
+      SMNext = SM | (C & ~(C << 1));
     end 
   end
-  assign Sq = S[`DIVLEN+1] ? S[`DIVLEN:2] : S[`DIVLEN-1:1];
+  assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0];
 endmodule
 
 //////////////////////////
@@ -311,7 +311,7 @@ module creg(input  logic clk,
 );
   logic [`DIVLEN+3:0] CMux;
 
-  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b11111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux);
+  mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b1111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux);
   flop #(`DIVLEN+4) cflop(clk, CMux, C);
 endmodule
 

From 86ebdd05f0753693a1d86e27ea48f7e897e4fe90 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Thu, 21 Jul 2022 17:59:10 +0000
Subject: [PATCH 24/36] Division working too

---
 pipelined/srt/srt.sv       | 2 +-
 pipelined/srt/testbench.sv | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 157be2e7f..a7216b9ff 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -2,7 +2,7 @@
 // srt.sv
 //
 // Written: David_Harris@hmc.edu 13 January 2022
-// Modified: cturek@hmc.edu June 2022
+// Modified: cturek@hmc.edu July 2022
 //
 // Purpose: Combined Divide and Square Root Floating Point and Integer Unit
 // 
diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index 39696af44..7a4e1897b 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -72,7 +72,7 @@ module testbench;
 
   // Equip Int test or Sqrt test
   assign Int = 1'b0;
-  assign Sqrt = 1'b1;
+  assign Sqrt = 1'b0;
 
   // Divider
   srt srt(.clk, .Start(req), 
@@ -101,7 +101,7 @@ module testbench;
     begin
       testnum = 0; 
       errors = 0;
-      $readmemh ("sqrttestvectors", Tests);
+      $readmemh ("testvectors", Tests);
       Vec = Tests[testnum];
       a = Vec[`mema];
       {asign, aExp, afrac} = a;

From fbe8bb2298413d731f70306e100e8cc3881222ec Mon Sep 17 00:00:00 2001
From: Katherine Parry <kparry4@gmail.com>
Date: Thu, 21 Jul 2022 19:38:06 +0000
Subject: [PATCH 25/36] radix-4 division integrated into srt - not tested

---
 addins/embench-iot                      |   2 +-
 pipelined/config/shared/wally-shared.vh |   2 +-
 pipelined/regression/wave-fpu.do        |   2 +-
 pipelined/src/fpu/divsqrt.sv            |  13 +--
 pipelined/src/fpu/fctrl.sv              |   4 +-
 pipelined/src/fpu/fpu.sv                |   2 +-
 pipelined/src/fpu/otfc.sv               |  70 ++++++++++++++++
 pipelined/src/fpu/qsel.sv               |  89 +++++++++++++++++----
 pipelined/src/fpu/srt.sv                | 102 +++++++++++-------------
 pipelined/src/fpu/srtpreproc.sv         |  78 +++++++++++++++---
 pipelined/testbench/testbench-fp.sv     |   2 +-
 11 files changed, 271 insertions(+), 95 deletions(-)

diff --git a/addins/embench-iot b/addins/embench-iot
index 58ffa0c68..261a65e0a 160000
--- a/addins/embench-iot
+++ b/addins/embench-iot
@@ -1 +1 @@
-Subproject commit 58ffa0c68c52f291d12c5902fc787d2bca94ddf9
+Subproject commit 261a65e0a2d3e8d62d81b1d8fe7e309a096bc6a9
diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh
index 015ef2611..b2abdff7b 100644
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@@ -101,7 +101,7 @@
 `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6))
 
 // division constants
-`define RADIX 32'h2
+`define RADIX 32'h4
 `define DIVCOPIES 32'h1
 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3))
 `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3)
diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do
index 98c72f170..b71207e09 100644
--- a/pipelined/regression/wave-fpu.do
+++ b/pipelined/regression/wave-fpu.do
@@ -33,7 +33,7 @@ add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/intera
 # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/*
 # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/*
 add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/*
-add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/*
+# add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/*
 add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/*
 add wave -group {Testbench} -noupdate /testbenchfp/*
 add wave -group {Testbench} -noupdate /testbenchfp/readvectors/*
diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv
index a2f0ba8e3..7ba44a953 100644
--- a/pipelined/src/fpu/divsqrt.sv
+++ b/pipelined/src/fpu/divsqrt.sv
@@ -41,7 +41,8 @@ module divsqrt(
   input  logic XNaNE, YNaNE, 
   input  logic DivStartE, 
   input  logic StallM,
-  input logic StallE,
+  input  logic StallE,
+  input  logic SqrtE, SqrtM,
   output logic DivSM,
   output logic DivBusy,
   output logic DivDone,
@@ -55,15 +56,15 @@ module divsqrt(
   logic [`DIVLEN+3:0]  WS, WC;
   logic [`DIVLEN+3:0] StickyWSA;
   logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt;
-  logic [`DIVLEN-1:0] X;
-  logic [`DIVLEN-1:0] Dpreproc;
+  logic [`DIVLEN+3:0] X;
+  logic [`DIVLEN+3:0] Dpreproc;
   logic [`DURLEN-1:0] Dur;
   logic NegSticky;
 
-  srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt);
+  srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .XZeroCnt, .YZeroCnt);
 
   srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE,
                .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM));
-  srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
-                .StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM);
+  srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE,
+                .StickyWSA, .DivBusy, .Qm(QmM), .Rem());
 endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv
index 5b6b22ef0..20e4a0099 100755
--- a/pipelined/src/fpu/fctrl.sv
+++ b/pipelined/src/fpu/fctrl.sv
@@ -219,8 +219,8 @@ module fctrl (
 //        110 - add
 //        111 - sub
 //    Div: 
-//        0 - ???
-//        1 - ???
+//        0 - div
+//        1 - sqrt
 //    Cvt Int: {Int to Fp?, 64 bit int?, signed int?}
 //    Cvt Fp: output format
 //        10 - to half
diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv
index cfa46b657..3e214b0f1 100755
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@@ -259,7 +259,7 @@ module fpu (
    //    - fdiv
    //    - fsqrt
    // *** add other opperations
-   divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, 
+   divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
                   .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), 
                   .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal
                   .EarlyTermShiftM, .QmM, .DivDone(DivDoneM));
diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv
index 66af5b3c5..7ecb823e6 100644
--- a/pipelined/src/fpu/otfc.sv
+++ b/pipelined/src/fpu/otfc.sv
@@ -58,6 +58,41 @@ module otfc2 (
 
 endmodule
 
+///////////////////////////////
+// Square Root OTFC, Radix 2 //
+///////////////////////////////
+module sotfc2(
+  input  logic         clk,
+  input  logic         Start,
+  input  logic         sp, sn,
+  input  logic         Sqrt,
+  input  logic [`DIVLEN+3:0] C,
+  output logic [`DIVLEN-2:0] Sq,
+  output logic [`DIVLEN+3:0] S, SM
+);
+  //  The on-the-fly converter transfers the square root 
+  //  bits to the quotient as they come.
+  //  Use this otfc for division and square root.
+  logic [`DIVLEN+3:0] SNext, SMNext, SMux;
+
+  flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM);
+  mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux);
+  flop #(`DIVLEN+4) Sreg(clk, SMux, S);
+
+  always_comb begin
+    if (sp) begin
+      SNext  = S | (C & ~(C << 1));
+      SMNext = S;
+    end else if (sn) begin
+      SNext  = SM | (C & ~(C << 1));
+      SMNext = SM;
+    end else begin        // If sp and sn are not true, then sz is
+      SNext  = S;
+      SMNext = SM | (C & ~(C << 1));
+    end 
+  end
+  assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0];
+endmodule
 
 module otfc4 (
   input  logic [3:0]   q,
@@ -110,3 +145,38 @@ module otfc4 (
   // Final Qmeint is in the range [.5, 2)
 
 endmodule
+
+///////////////////////////////
+// Square Root OTFC, Radix 4 //
+///////////////////////////////
+module sotfc4(
+  input  logic [3:0]   s,
+  input  logic         Sqrt,
+  input  logic [`DIVLEN+3:0] S, SM,
+  input  logic [`DIVLEN+3:0] C,
+  output logic [`DIVLEN+3:0] SNext, SMNext
+);
+  //  The on-the-fly converter transfers the square root 
+  //  bits to the quotient as they come.
+  //  Use this otfc for division and square root.
+
+  always_comb begin
+    if (s[3]) begin
+      SNext  = S | ((C << 1)&~(C << 2));
+      SMNext = S | (C&~(C << 1));
+    end else if (s[2]) begin
+      SNext  = S | (C&~(C << 1));
+      SMNext = S;
+    end else if (s[1]) begin
+      SNext  = SM | (C&~(C << 2));
+      SMNext = SM | ((C << 1)&~(C << 2));
+    end else if (s[0]) begin
+      SNext  = SM | ((C << 1)&~(C << 2));
+      SMNext = SM | (C&~(C << 1));
+    end else begin        // If sp and sn are not true, then sz is
+      SNext  = S;
+      SMNext = SM | (C & ~(C << 2));
+    end 
+  end
+
+endmodule
diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv
index 202b3ee81..87c6a4b25 100644
--- a/pipelined/src/fpu/qsel.sv
+++ b/pipelined/src/fpu/qsel.sv
@@ -62,9 +62,36 @@ module qsel2 ( // *** eventually just change to 4 bits
 //   assign #1 qn = magnitude & sign;
 endmodule
 
+////////////////////////////////////
+// Adder Input Generation, Radix 2 //
+////////////////////////////////////
+module fgen2 (
+  input  logic sp, sn,
+  input  logic [`DIVLEN+3:0] C, S, SM,
+  output logic [`DIVLEN+3:0] F
+);
+  logic [`DIVLEN+3:0] FP, FN, FZ;
+  
+  // Generate for both positive and negative bits
+  assign FP = ~(S << 1) & C;
+  assign FN = (SM << 1) | (C & (~C << 2));
+  assign FZ = '0;
+
+  // Choose which adder input will be used
+
+  always_comb
+    if (sp)       F = FP;
+    else if (sn)  F = FN;
+    else          F = FZ;
+
+  // assign F = sp ? FP : (sn ? FN : FZ);
+
+endmodule
+
 module qsel4 (
 	input logic [`DIVLEN+3:0] D,
 	input logic [`DIVLEN+3:0] WS, WC,
+  input logic Sqrt,
 	output logic [3:0] q
 );
 	logic [6:0] Wmsbs;
@@ -91,45 +118,77 @@ module qsel4 (
             else if(w2>=4)   QSel4[i] = 4'b0100; 
             else if(w2>=-4)  QSel4[i] = 4'b0000; 
             else if(w2>=-13) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
+            else             QSel4[i] = 4'b0001; 
           1: if(w2>=14)      QSel4[i] = 4'b1000;
             else if(w2>=4)   QSel4[i] = 4'b0100; 
-            else if(w2>=-6)  QSel4[i] = 4'b0000; 
-            else if(w2>=-15) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
+            else if(w2>=-5)  QSel4[i] = 4'b0000; // was -6
+            else if(~Sqrt&(w2>=-15)) QSel4[i] = 4'b0010; // divide case
+            else if( Sqrt&(w2>=-14)) QSel4[i] = 4'b0010; // sqrt case
+            else             QSel4[i] = 4'b0001; 
           2: if(w2>=15)      QSel4[i] = 4'b1000;
             else if(w2>=4)   QSel4[i] = 4'b0100; 
             else if(w2>=-6)  QSel4[i] = 4'b0000; 
             else if(w2>=-16) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
+            else             QSel4[i] = 4'b0001; 
           3: if(w2>=16)      QSel4[i] = 4'b1000;
             else if(w2>=4)   QSel4[i] = 4'b0100; 
             else if(w2>=-6)  QSel4[i] = 4'b0000; 
-            else if(w2>=-18) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
+            else if(w2>=-17) QSel4[i] = 4'b0010; // was -18
+            else             QSel4[i] = 4'b0001; 
           4: if(w2>=18)      QSel4[i] = 4'b1000;
             else if(w2>=6)   QSel4[i] = 4'b0100; 
-            else if(w2>=-8)  QSel4[i] = 4'b0000; 
-            else if(w2>=-20) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
+            else if(w2>=-6)  QSel4[i] = 4'b0000; // was -8
+            else if(~Sqrt&(w2>=-20)) QSel4[i] = 4'b0010; // divide case
+            else if( Sqrt&(w2>=-18)) QSel4[i] = 4'b0010; // sqrt case
+            else             QSel4[i] = 4'b0001; 
           5: if(w2>=20)      QSel4[i] = 4'b1000;
             else if(w2>=6)   QSel4[i] = 4'b0100; 
             else if(w2>=-8)  QSel4[i] = 4'b0000; 
             else if(w2>=-20) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
+            else             QSel4[i] = 4'b0001; 
           6: if(w2>=20)      QSel4[i] = 4'b1000;
             else if(w2>=8)   QSel4[i] = 4'b0100; 
             else if(w2>=-8)  QSel4[i] = 4'b0000; 
             else if(w2>=-22) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
-          7: if(w2>=24)      QSel4[i] = 4'b1000;
+            else             QSel4[i] = 4'b0001; 
+          7: if(w2>=22)      QSel4[i] = 4'b1000; // was 24
             else if(w2>=8)   QSel4[i] = 4'b0100; 
             else if(w2>=-8)  QSel4[i] = 4'b0000; 
-            else if(w2>=-24) QSel4[i] = 4'b0010; 
-            else            QSel4[i] = 4'b0001; 
+            else if(w2>=-23) QSel4[i] = 4'b0010; // was -24
+            else             QSel4[i] = 4'b0001; 
         endcase
       end
   end
 	assign q = QSel4[{Dmsbs,Wmsbs}];
 	
 endmodule
+
+////////////////////////////////////
+// Adder Input Generation, Radix 4 //
+////////////////////////////////////
+module fgen4 (
+  input  logic [3:0] s,
+  input  logic [`DIVLEN+3:0] C, S, SM,
+  output logic [`DIVLEN+3:0] F
+);
+  logic [`DIVLEN+3:0] F2, F1, F0, FN1, FN2;
+  
+  // Generate for both positive and negative bits
+  assign F2  = (~S << 2) & (C << 2);
+  assign F1  = ~(S << 1) & C;
+  assign F0  = '0;
+  assign FN1 = (SM << 1) | (C & ~(C << 2));
+  assign FN2 = (SM << 2) | ((C << 2)&~(C <<4));
+
+  // Choose which adder input will be used
+
+  always_comb
+    if (s[3])       F = F2;
+    else if (s[2])  F = F1;
+    else if (s[1])  F = FN1;
+    else if (s[0])  F = FN2;
+    else            F = F0;
+
+  // assign F = sp ? FP : (sn ? FN : FZ);
+
+endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv
index 7e9f9922a..633ac1787 100644
--- a/pipelined/src/fpu/srt.sv
+++ b/pipelined/src/fpu/srt.sv
@@ -34,18 +34,17 @@ module srt(
   input  logic clk,
   input  logic DivStart, 
   input  logic DivBusy, 
-  input  logic [`FMTBITS-1:0] FmtE,
   input  logic [`NE-1:0] Xe, Ye,
   input  logic XZeroE, YZeroE, 
-  input logic [`DIVLEN-1:0] X,
-  input logic [`DIVLEN-1:0] Dpreproc,
-  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  input logic NegSticky,
+  input  logic Sqrt,
+  input  logic [`DIVLEN+3:0] X,
+  input  logic [`DIVLEN+3:0] Dpreproc,
+  input  logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  input  logic NegSticky,
   output logic [`QLEN-1-(`RADIX/4):0] Qm,
   output logic [`DIVLEN+3:0]  NextWSN, NextWCN,
   output logic [`DIVLEN+3:0]  StickyWSA,
   output logic [`DIVLEN+3:0]  FirstWS, FirstWC,
-  output logic  [`NE+1:0] QeM,
   output logic [`XLEN-1:0] Rem
 );
 
@@ -59,13 +58,19 @@ module srt(
   logic [`QLEN-1:0] QM[`DIVCOPIES-1:0];
   logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0];
   logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0] S[`DIVCOPIES-1:0]; //***change to QLEN???
+  logic [`DIVLEN+3:0] SM[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0] SNext[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0] SMNext[`DIVCOPIES-1:0];
+  logic [`DIVLEN+3:0] C[`DIVCOPIES-1:0];
  /* verilator lint_on UNOPTFLAT */
   logic [`DIVLEN+3:0]  WSN, WCN;
   logic [`DIVLEN+3:0]  D, DBar, D2, DBar2;
-  logic [`NE+1:0] Qe;
   logic [$clog2(`XLEN+1)-1:0] intExp;
   logic           intSign;
   logic [`QLEN-1:0] QMMux;
+  logic [`DIVLEN+3:0] CMux;
+  logic [`DIVLEN+3:0] SMux;
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
@@ -83,13 +88,13 @@ module srt(
     assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0};
   end
 
-  mux2   #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN);
+  mux2   #(`DIVLEN+4) wsmux(NextWSN, X, DivStart, WSN);
   flopen   #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]);
   mux2   #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN);
   flopen   #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]);
-  flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D);
-  flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM);
-
+  flopen #(`DIVLEN+4) dflop(clk, DivStart, Dpreproc, D);
+  mux2 #(`DIVLEN+4) Cmux({2'b11, C[`DIVCOPIES-1][`DIVLEN+3:2]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, DivStart, CMux);
+  flop #(`DIVLEN+4) cflop(clk, CMux, C[0]);
 
   // Divisor Selections
   // - choose the negitive version of what's being selected
@@ -102,8 +107,9 @@ module srt(
   genvar i;
   generate
     for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations
-      divinteration divinteration(.D, .DBar, .D2, .DBar2, 
-      .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]));
+      divinteration divinteration(.D, .DBar, .D2, .DBar2, .Sqrt,
+      .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]),
+      .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i]));
       if(i<(`DIVCOPIES-1)) begin 
         if (`RADIX==2)begin 
           assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0};
@@ -111,9 +117,12 @@ module srt(
         end else begin
           assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0};
           assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0};
+          assign  C[i+1] = {2'b11, C[i][`DIVLEN+3:2]};
         end
         assign Q[i+1] = QNext[i];
         assign QM[i+1] = QMNext[i];
+        assign S[i+1] = SNext[i];
+        assign SM[i+1] = SMNext[i];
       end
     end
   endgenerate
@@ -123,16 +132,27 @@ module srt(
   flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]);
   flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]);
 
-  assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0];
+  flopr #(`DIVLEN+4) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]);
+  mux2 #(`DIVLEN+4) Smux(SNext[`DIVCOPIES-1], {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, DivStart, SMux);
+  flop #(`DIVLEN+4) Sreg(clk, SMux, S[0]);
+
+  always_comb
+    if(Sqrt)
+      if(NegSticky) Qm = SM[0][`QLEN-1-(`RADIX/4):0];
+      else          Qm = S[0][`QLEN-1-(`RADIX/4):0];
+    else  
+      if(NegSticky) Qm = QM[0][`QLEN-1-(`RADIX/4):0];
+      else          Qm = Q[0][`QLEN-1-(`RADIX/4):0];
+
   assign FirstWS = WS[0];
   assign FirstWC = WC[0];
+
   if(`RADIX==2)
     if (`DIVCOPIES == 1)
       assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0};
     else
       assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0};
 
-  expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe);
 
 endmodule
 
@@ -145,8 +165,12 @@ module divinteration (
   input logic [`DIVLEN+3:0] D,
   input logic [`DIVLEN+3:0]  DBar, D2, DBar2,
   input logic [`QLEN-1:0] Q, QM,
+  input logic [`DIVLEN+3:0] S, SM,
   input logic [`DIVLEN+3:0]  WS, WC,
+  input logic [`DIVLEN+3:0] C,
+  input logic Sqrt,
   output logic [`QLEN-1:0] QNext, QMNext, 
+  output logic [`DIVLEN+3:0] SNext, SMNext, 
   output logic [`DIVLEN+3:0]  WSA, WCA
 );
  /* verilator lint_on UNOPTFLAT */
@@ -154,6 +178,8 @@ module divinteration (
   logic [`DIVLEN+3:0]  Dsel;
   logic [3:0]     q;
   logic qp, qz;//, qn;
+  logic [`DIVLEN+3:0] F;
+  logic [`DIVLEN+3:0] AddIn;
 
   // Qmient Selection logic
   // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm)
@@ -166,7 +192,8 @@ module divinteration (
   if(`RADIX == 2) begin : qsel
     qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn);
   end else begin
-    qsel4 qsel4(.D, .WS, .WC, .q);
+    qsel4 qsel4(.D, .WS, .WC, .Sqrt, .q);
+    fgen4 fgen4(.s(q), .C, .S, .SM, .F);
   end
 
   if(`RADIX == 2) begin : dsel
@@ -184,16 +211,18 @@ module divinteration (
   end
   // Partial Product Generation
   //  WSA, WCA = WS + WC - qD
+  assign AddIn = Sqrt ? F : Dsel;
   if (`RADIX == 2) begin : csa
-    csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA);
+    csa #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA);
   end else begin
-    csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA);
+    csa #(`DIVLEN+4) csa(WS, WC, AddIn, |q[3:2], WSA, WCA);
   end
 
   if (`RADIX == 2) begin : otfc
     otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext);
   end else begin
     otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext);
+    sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext);
   end
 
 endmodule
@@ -220,40 +249,3 @@ module csa #(parameter N=69) (
   assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | 
 		    (in2[N-2:0] & in3[N-2:0]), cin};
 endmodule
-
-module expcalc(
-  input logic  [`FMTBITS-1:0] FmtE,
-  input  logic [`NE-1:0] Xe, Ye,
-  input logic XZeroE, 
-  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
-  output logic  [`NE+1:0] Qe
-  );
-    logic [`NE-2:0] Bias;
-    
-    if (`FPSIZES == 1) begin
-        assign Bias = (`NE-1)'(`BIAS); 
-
-    end else if (`FPSIZES == 2) begin
-        assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
-
-    end else if (`FPSIZES == 3) begin
-        always_comb
-            case (FmtE)
-                `FMT: Bias  =  (`NE-1)'(`BIAS);
-                `FMT1: Bias = (`NE-1)'(`BIAS1);
-                `FMT2: Bias = (`NE-1)'(`BIAS2);
-                default: Bias = 'x;
-            endcase
-
-    end else if (`FPSIZES == 4) begin        
-        always_comb
-            case (FmtE)
-                2'h3: Bias =  (`NE-1)'(`Q_BIAS);
-                2'h1: Bias =  (`NE-1)'(`D_BIAS);
-                2'h0: Bias =  (`NE-1)'(`S_BIAS);
-                2'h2: Bias =  (`NE-1)'(`H_BIAS);
-            endcase
-    end
-    // correct exponent for denormalized input's normalization shifts
-    assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}};
-    endmodule
\ No newline at end of file
diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv
index b9fb8bb82..4d2609179 100644
--- a/pipelined/src/fpu/srtpreproc.sv
+++ b/pipelined/src/fpu/srtpreproc.sv
@@ -31,16 +31,25 @@
 `include "wally-config.vh"
 
 module srtpreproc (
+  input  logic clk,
+  input  logic DivStart, 
   input  logic [`NF:0] Xm, Ym,
-  output logic [`DIVLEN-1:0] X,
-  output logic [`DIVLEN-1:0] Dpreproc,
+  input  logic [`NE-1:0] Xe, Ye,
+  input  logic [`FMTBITS-1:0] Fmt,
+  input  logic Sqrt,
+  input logic XZero,
+  output logic  [`NE+1:0] QeM,
+  output logic [`DIVLEN+3:0] X,
+  output logic [`DIVLEN+3:0] Dpreproc,
   output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
   output logic [`DURLEN-1:0] Dur
 );
   // logic  [`XLEN-1:0] PosA, PosB;
   // logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY;
-  logic  [`DIVLEN-1:0] PreprocA, PreprocX;
-  logic  [`DIVLEN-1:0] PreprocB, PreprocY;
+  logic  [`NF-1:0] PreprocA, PreprocX;
+  logic  [`NF-1:0] PreprocB, PreprocY;
+  logic  [`NF+3:0] SqrtX;
+  logic [`NE+1:0] Qe;
 
   // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA;
   // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB;
@@ -49,23 +58,22 @@ module srtpreproc (
 
   // ***can probably merge X LZC with conversion
   // cout the number of leading zeros
-  lzc #(`NF+1) lzcA (Xm, XZeroCnt);
-  lzc #(`NF+1) lzcB (Ym, YZeroCnt);
+  lzc #(`NF+1) lzcX (Xm, XZeroCnt);
+  lzc #(`NF+1) lzcY (Ym, YZeroCnt);
 
   // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}};
   // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}};
 
   // assign PreprocA = ExtraA << zeroCntA;
   // assign PreprocB = ExtraB << (zeroCntB + 1);
-  assign PreprocX = {Xm[`NF-1:0]<<XZeroCnt, {`DIVLEN-`NF{1'b0}}};
-  assign PreprocY = {Ym[`NF-1:0]<<YZeroCnt, {`DIVLEN-`NF{1'b0}}};
+  assign PreprocX = Xm[`NF-1:0]<<XZeroCnt;
+  assign PreprocY = Ym[`NF-1:0]<<YZeroCnt;
 
   
-  assign X = PreprocX;
-  assign Dpreproc = PreprocY;
+  assign SqrtX = Xe[0] ? {3'b110, ~XZero, PreprocX} : {2'b11, ~XZero, PreprocX, 1'b0};
+  assign X = Sqrt ? {SqrtX, {`DIVLEN-`NF{1'b0}}} : {3'b000, ~XZero, PreprocX, {`DIVLEN-`NF{1'b0}}};
+  assign Dpreproc = {4'b0001, /*Int ? PreprocB : */PreprocY, {`DIVLEN-`NF{1'b0}}};
   assign Dur = (`DURLEN)'(`FPDUR);
-  // assign intExp = zeroCntB - zeroCntA + 1;
-  // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 
   //           radix 2     radix 4
   // 1 copies  DIVLEN+2    DIVLEN+2/2
@@ -76,6 +84,52 @@ module srtpreproc (
   // DIVRESLEN = DIVLEN or DIVLEN+2
   // r = 1 or 2
   // DIVRESLEN/(r*`DIVCOPIES)
+  flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM);
+  expcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero, .XZeroCnt, .YZeroCnt, .Qe);
 
 
+endmodule
+
+module expcalc(
+  input logic  [`FMTBITS-1:0] Fmt,
+  input  logic [`NE-1:0] Xe, Ye,
+  input logic Sqrt,
+  input logic XZero, 
+  input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt,
+  output logic  [`NE+1:0] Qe
+  );
+  logic [`NE-2:0] Bias;
+  logic [`NE-1:0] SExp, SXExp;
+  logic [`NE+1:0] DExp;
+  
+  if (`FPSIZES == 1) begin
+      assign Bias = (`NE-1)'(`BIAS); 
+
+  end else if (`FPSIZES == 2) begin
+      assign Bias = Fmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); 
+
+  end else if (`FPSIZES == 3) begin
+      always_comb
+          case (Fmt)
+              `FMT: Bias  =  (`NE-1)'(`BIAS);
+              `FMT1: Bias = (`NE-1)'(`BIAS1);
+              `FMT2: Bias = (`NE-1)'(`BIAS2);
+              default: Bias = 'x;
+          endcase
+
+  end else if (`FPSIZES == 4) begin        
+    always_comb
+        case (Fmt)
+            2'h3: Bias =  (`NE-1)'(`Q_BIAS);
+            2'h1: Bias =  (`NE-1)'(`D_BIAS);
+            2'h0: Bias =  (`NE-1)'(`S_BIAS);
+            2'h2: Bias =  (`NE-1)'(`H_BIAS);
+        endcase
+  end
+  assign SXExp = Xe - (`NE)'(`BIAS);
+  assign SExp  = {1'b0, SXExp[`NE-1:1]} + Bias;
+  // correct exponent for denormalized input's normalization shifts
+  assign DExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZero}};
+  
+  assign Qe = Sqrt ? {2'b0, SExp} : DExp;
 endmodule
\ No newline at end of file
diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv
index 19b637478..9be68f507 100644
--- a/pipelined/testbench/testbench-fp.sv
+++ b/pipelined/testbench/testbench-fp.sv
@@ -697,7 +697,7 @@ module testbenchfp;
   fcmp fcmp   (.Fmt(ModFmt), .OpCtrl(OpCtrlVal), .Xs, .Ys, .Xe, .Ye, 
               .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes),
               .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes));
-  divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), 
+  divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .SqrtE(1'b0), .SqrtM(1'b0),
                   .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), 
                   .StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp),
                   .EarlyTermShiftM(EarlyTermShift), .QmM(Quot), .DivDone);

From a8faddf81f0404680ff5350411e0fbb43eb04a1d Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Thu, 21 Jul 2022 12:47:51 -0700
Subject: [PATCH 26/36] removed ugly /ref/Ref from tests.vh, added back
 d_fsd-align-01.S and d_fld-align-01.S tests to tests.vh, updated makefile to
 fix the riscof issues and fix fld fsd tests, updated testbench.sv for
 comptability with changes

---
 pipelined/testbench/testbench.sv           |   29 +-
 pipelined/testbench/tests.vh               | 1152 ++++++++++----------
 tests/riscof/Makefile                      |    8 +-
 tests/riscof/sail_cSim/riscof_sail_cSim.py |    4 +-
 4 files changed, 607 insertions(+), 586 deletions(-)

diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv
index 0fb5f5e60..30c62865e 100644
--- a/pipelined/testbench/testbench.sv
+++ b/pipelined/testbench/testbench.sv
@@ -195,13 +195,19 @@ logic [3:0] dummy;
       /* if (tests[0] == `IMPERASTEST)
         pathname = tvpaths[0];
       else pathname = tvpaths[1]; */
-      memfilename = {pathname, tests[test], ".elf.memfile"};
+      if (riscofTest) memfilename = {pathname, tests[test], "/ref/ref.elf.memfile"};
+      else memfilename = {pathname, tests[test], ".elf.memfile"};
       if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.irom.ram.memory.RAM);
       else              $readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);
       if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.memory.RAM);
 
-      ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
-      ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
+      if (riscofTest) begin
+        ProgramAddrMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.addr"};
+        ProgramLabelMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.lab"};
+      end else begin
+        ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
+        ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
+      end
       // declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array
       // to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test)
       updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
@@ -241,7 +247,8 @@ logic [3:0] dummy;
           // this contains instret and cycles for start and end of test run, used by embench python speed script to calculate embench speed score
           // also begin_signature contains the results of the self checking mechanism, which will be read by the python script for error checking
           $display("Embench Benchmark: %s is done.", tests[test]);
-          outputfile = {pathname, tests[test], ".sim.output"};
+          if (riscofTest) outputfile = {pathname, tests[test], "/ref/ref.sim.output"};
+          else outputfile = {pathname, tests[test], ".sim.output"};
           outputFilePointer = $fopen(outputfile);
           i = 0;
           while ($unsigned(i) < $unsigned(5'd5)) begin
@@ -256,7 +263,7 @@ logic [3:0] dummy;
           for(i=0; i<SIGNATURESIZE; i=i+1) begin
             sig32[i] = 'bx;
           end
-          if (riscofTest) signame = {pathname, tests[test], "erence-sail_c_simulator.signature"};
+          if (riscofTest) signame = {pathname, tests[test], "/ref/Reference-sail_c_simulator.signature"};
           else signame = {pathname, tests[test], ".signature.output"};
           // read signature, reformat in 64 bits if necessary
           $readmemh(signame, sig32);
@@ -313,14 +320,20 @@ logic [3:0] dummy;
         else begin
             // If there are still additional tests to run, read in information for the next test
             //pathname = tvpaths[tests[0]];
-            memfilename = {pathname, tests[test], ".elf.memfile"};
+            if (riscofTest) memfilename = {pathname, tests[test], "/ref/ref.elf.memfile"};
+            else memfilename = {pathname, tests[test], ".elf.memfile"};
             //$readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);
             if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.irom.ram.memory.RAM);
             else                   $readmemh(memfilename, dut.uncore.ram.ram.memory.RAM);
             if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.memory.RAM);
 
-            ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
-            ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
+            if (riscofTest) begin
+              ProgramAddrMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.addr"};
+              ProgramLabelMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.lab"};
+            end else begin
+              ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"};
+              ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"};
+            end
             ProgramAddrLabelArray = '{ "begin_signature" : 0, "tohost" : 0 };
             updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray);
             $display("Read memfile %s", memfilename);
diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index 8a2765701..df06eb010 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -922,570 +922,572 @@ string imperas32f[] = '{
 
   string arch64priv[] = '{
     `RISCVARCHTEST,
-    "rv64i_m/privilege/src/ebreak.S/ref/Ref",
-    "rv64i_m/privilege/src/ecall.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign1-jalr-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign2-jalr-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-beq-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-bge-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-bgeu-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-blt-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-bltu-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-bne-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-jal-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-ld-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-lh-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-lhu-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-lw-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-lwu-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-sd-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-sh-01.S/ref/Ref",
-    "rv64i_m/privilege/src/misalign-sw-01.S/ref/Ref"
+    "rv64i_m/privilege/src/ebreak.S",
+    "rv64i_m/privilege/src/ecall.S",
+    "rv64i_m/privilege/src/misalign1-jalr-01.S",
+    "rv64i_m/privilege/src/misalign2-jalr-01.S",
+    "rv64i_m/privilege/src/misalign-beq-01.S",
+    "rv64i_m/privilege/src/misalign-bge-01.S",
+    "rv64i_m/privilege/src/misalign-bgeu-01.S",
+    "rv64i_m/privilege/src/misalign-blt-01.S",
+    "rv64i_m/privilege/src/misalign-bltu-01.S",
+    "rv64i_m/privilege/src/misalign-bne-01.S",
+    "rv64i_m/privilege/src/misalign-jal-01.S",
+    "rv64i_m/privilege/src/misalign-ld-01.S",
+    "rv64i_m/privilege/src/misalign-lh-01.S",
+    "rv64i_m/privilege/src/misalign-lhu-01.S",
+    "rv64i_m/privilege/src/misalign-lw-01.S",
+    "rv64i_m/privilege/src/misalign-lwu-01.S",
+    "rv64i_m/privilege/src/misalign-sd-01.S",
+    "rv64i_m/privilege/src/misalign-sh-01.S",
+    "rv64i_m/privilege/src/misalign-sw-01.S"
     };
 
   string arch64m[] = '{
     `RISCVARCHTEST,
-    "rv64i_m/M/src/div-01.S/ref/Ref",
-    "rv64i_m/M/src/divu-01.S/ref/Ref",
-    "rv64i_m/M/src/divuw-01.S/ref/Ref",
-    "rv64i_m/M/src/divw-01.S/ref/Ref",
-    "rv64i_m/M/src/mul-01.S/ref/Ref",
-    "rv64i_m/M/src/mulh-01.S/ref/Ref",
-    "rv64i_m/M/src/mulhsu-01.S/ref/Ref",
-    "rv64i_m/M/src/mulhu-01.S/ref/Ref",
-    "rv64i_m/M/src/mulw-01.S/ref/Ref",
-    "rv64i_m/M/src/rem-01.S/ref/Ref",
-    "rv64i_m/M/src/remu-01.S/ref/Ref",
-    "rv64i_m/M/src/remuw-01.S/ref/Ref",
-    "rv64i_m/M/src/remw-01.S/ref/Ref"
+    "rv64i_m/M/src/div-01.S",
+    "rv64i_m/M/src/divu-01.S",
+    "rv64i_m/M/src/divuw-01.S",
+    "rv64i_m/M/src/divw-01.S",
+    "rv64i_m/M/src/mul-01.S",
+    "rv64i_m/M/src/mulh-01.S",
+    "rv64i_m/M/src/mulhsu-01.S",
+    "rv64i_m/M/src/mulhu-01.S",
+    "rv64i_m/M/src/mulw-01.S",
+    "rv64i_m/M/src/rem-01.S",
+    "rv64i_m/M/src/remu-01.S",
+    "rv64i_m/M/src/remuw-01.S",
+    "rv64i_m/M/src/remw-01.S"
    };
 
   string arch64c[] = '{
     `RISCVARCHTEST,
-    "rv64i_m/C/src/cadd-01.S/ref/Ref",
-    "rv64i_m/C/src/caddi-01.S/ref/Ref",
-    "rv64i_m/C/src/caddi16sp-01.S/ref/Ref",
-    "rv64i_m/C/src/caddi4spn-01.S/ref/Ref",
-    "rv64i_m/C/src/caddiw-01.S/ref/Ref",
-    "rv64i_m/C/src/caddw-01.S/ref/Ref",
-    "rv64i_m/C/src/cand-01.S/ref/Ref",
-    "rv64i_m/C/src/candi-01.S/ref/Ref",
-    "rv64i_m/C/src/cbeqz-01.S/ref/Ref",
-    "rv64i_m/C/src/cbnez-01.S/ref/Ref",
-    "rv64i_m/C/src/cj-01.S/ref/Ref",
-    "rv64i_m/C/src/cjalr-01.S/ref/Ref",
-    "rv64i_m/C/src/cjr-01.S/ref/Ref",
-    "rv64i_m/C/src/cld-01.S/ref/Ref",
-    "rv64i_m/C/src/cldsp-01.S/ref/Ref",
-    "rv64i_m/C/src/cli-01.S/ref/Ref",
-    "rv64i_m/C/src/clui-01.S/ref/Ref",
-    "rv64i_m/C/src/clw-01.S/ref/Ref",
-    "rv64i_m/C/src/clwsp-01.S/ref/Ref",
-    "rv64i_m/C/src/cmv-01.S/ref/Ref",
-    "rv64i_m/C/src/cnop-01.S/ref/Ref",
-    "rv64i_m/C/src/cor-01.S/ref/Ref",
-    "rv64i_m/C/src/csd-01.S/ref/Ref",
-    "rv64i_m/C/src/csdsp-01.S/ref/Ref",
-    "rv64i_m/C/src/cslli-01.S/ref/Ref",
-    "rv64i_m/C/src/csrai-01.S/ref/Ref",
-    "rv64i_m/C/src/csrli-01.S/ref/Ref",
-    "rv64i_m/C/src/csub-01.S/ref/Ref",
-    "rv64i_m/C/src/csubw-01.S/ref/Ref",
-    "rv64i_m/C/src/csw-01.S/ref/Ref",
-    "rv64i_m/C/src/cswsp-01.S/ref/Ref",
-    "rv64i_m/C/src/cxor-01.S/ref/Ref"
+    "rv64i_m/C/src/cadd-01.S",
+    "rv64i_m/C/src/caddi-01.S",
+    "rv64i_m/C/src/caddi16sp-01.S",
+    "rv64i_m/C/src/caddi4spn-01.S",
+    "rv64i_m/C/src/caddiw-01.S",
+    "rv64i_m/C/src/caddw-01.S",
+    "rv64i_m/C/src/cand-01.S",
+    "rv64i_m/C/src/candi-01.S",
+    "rv64i_m/C/src/cbeqz-01.S",
+    "rv64i_m/C/src/cbnez-01.S",
+    "rv64i_m/C/src/cj-01.S",
+    "rv64i_m/C/src/cjalr-01.S",
+    "rv64i_m/C/src/cjr-01.S",
+    "rv64i_m/C/src/cld-01.S",
+    "rv64i_m/C/src/cldsp-01.S",
+    "rv64i_m/C/src/cli-01.S",
+    "rv64i_m/C/src/clui-01.S",
+    "rv64i_m/C/src/clw-01.S",
+    "rv64i_m/C/src/clwsp-01.S",
+    "rv64i_m/C/src/cmv-01.S",
+    "rv64i_m/C/src/cnop-01.S",
+    "rv64i_m/C/src/cor-01.S",
+    "rv64i_m/C/src/csd-01.S",
+    "rv64i_m/C/src/csdsp-01.S",
+    "rv64i_m/C/src/cslli-01.S",
+    "rv64i_m/C/src/csrai-01.S",
+    "rv64i_m/C/src/csrli-01.S",
+    "rv64i_m/C/src/csub-01.S",
+    "rv64i_m/C/src/csubw-01.S",
+    "rv64i_m/C/src/csw-01.S",
+    "rv64i_m/C/src/cswsp-01.S",
+    "rv64i_m/C/src/cxor-01.S"
   };
 
   string arch64cpriv[] = '{
 //    `RISCVARCHTEST,
-    "rv64i_m/C/src/cebreak-01.S/ref/Ref"
+    "rv64i_m/C/src/cebreak-01.S"
   };
 
   string arch64i[] = '{
     `RISCVARCHTEST,
-    "rv64i_m/I/src/add-01.S/ref/Ref",
-    "rv64i_m/I/src/addi-01.S/ref/Ref",
-    "rv64i_m/I/src/addiw-01.S/ref/Ref",
-    "rv64i_m/I/src/addw-01.S/ref/Ref",
-    "rv64i_m/I/src/and-01.S/ref/Ref",
-    "rv64i_m/I/src/andi-01.S/ref/Ref",
-    "rv64i_m/I/src/auipc-01.S/ref/Ref",
-    "rv64i_m/I/src/beq-01.S/ref/Ref",
-    "rv64i_m/I/src/bge-01.S/ref/Ref",
-    "rv64i_m/I/src/bgeu-01.S/ref/Ref",
-    "rv64i_m/I/src/blt-01.S/ref/Ref",
-    "rv64i_m/I/src/bltu-01.S/ref/Ref",
-    "rv64i_m/I/src/bne-01.S/ref/Ref",
-    "rv64i_m/I/src/fence-01.S/ref/Ref",
-    "rv64i_m/I/src/jal-01.S/ref/Ref",
-    "rv64i_m/I/src/jalr-01.S/ref/Ref",
-    "rv64i_m/I/src/lb-align-01.S/ref/Ref",
-    "rv64i_m/I/src/lbu-align-01.S/ref/Ref",
-    "rv64i_m/I/src/ld-align-01.S/ref/Ref",
-    "rv64i_m/I/src/lh-align-01.S/ref/Ref",
-    "rv64i_m/I/src/lhu-align-01.S/ref/Ref",
-    "rv64i_m/I/src/lui-01.S/ref/Ref",
-    "rv64i_m/I/src/lw-align-01.S/ref/Ref",
-    "rv64i_m/I/src/lwu-align-01.S/ref/Ref",
-    "rv64i_m/I/src/or-01.S/ref/Ref",
-    "rv64i_m/I/src/ori-01.S/ref/Ref",
-    "rv64i_m/I/src/sb-align-01.S/ref/Ref",
-    "rv64i_m/I/src/sd-align-01.S/ref/Ref",
-    "rv64i_m/I/src/sh-align-01.S/ref/Ref",
-    "rv64i_m/I/src/sll-01.S/ref/Ref",
-    "rv64i_m/I/src/slli-01.S/ref/Ref",
-    "rv64i_m/I/src/slliw-01.S/ref/Ref",
-    "rv64i_m/I/src/sllw-01.S/ref/Ref",
-    "rv64i_m/I/src/slt-01.S/ref/Ref",
-    "rv64i_m/I/src/slti-01.S/ref/Ref",
-    "rv64i_m/I/src/sltiu-01.S/ref/Ref",
-    "rv64i_m/I/src/sltu-01.S/ref/Ref",
-    "rv64i_m/I/src/sra-01.S/ref/Ref",
-    "rv64i_m/I/src/srai-01.S/ref/Ref",
-    "rv64i_m/I/src/sraiw-01.S/ref/Ref",
-    "rv64i_m/I/src/sraw-01.S/ref/Ref",
-    "rv64i_m/I/src/srl-01.S/ref/Ref",
-    "rv64i_m/I/src/srli-01.S/ref/Ref",
-    "rv64i_m/I/src/srliw-01.S/ref/Ref",
-    "rv64i_m/I/src/srlw-01.S/ref/Ref",
-    "rv64i_m/I/src/sub-01.S/ref/Ref",
-    "rv64i_m/I/src/subw-01.S/ref/Ref",
-    "rv64i_m/I/src/sw-align-01.S/ref/Ref",
-    "rv64i_m/I/src/xor-01.S/ref/Ref",
-    "rv64i_m/I/src/xori-01.S/ref/Ref"
+    "rv64i_m/I/src/add-01.S",
+    "rv64i_m/I/src/addi-01.S",
+    "rv64i_m/I/src/addiw-01.S",
+    "rv64i_m/I/src/addw-01.S",
+    "rv64i_m/I/src/and-01.S",
+    "rv64i_m/I/src/andi-01.S",
+    "rv64i_m/I/src/auipc-01.S",
+    "rv64i_m/I/src/beq-01.S",
+    "rv64i_m/I/src/bge-01.S",
+    "rv64i_m/I/src/bgeu-01.S",
+    "rv64i_m/I/src/blt-01.S",
+    "rv64i_m/I/src/bltu-01.S",
+    "rv64i_m/I/src/bne-01.S",
+    "rv64i_m/I/src/fence-01.S",
+    "rv64i_m/I/src/jal-01.S",
+    "rv64i_m/I/src/jalr-01.S",
+    "rv64i_m/I/src/lb-align-01.S",
+    "rv64i_m/I/src/lbu-align-01.S",
+    "rv64i_m/I/src/ld-align-01.S",
+    "rv64i_m/I/src/lh-align-01.S",
+    "rv64i_m/I/src/lhu-align-01.S",
+    "rv64i_m/I/src/lui-01.S",
+    "rv64i_m/I/src/lw-align-01.S",
+    "rv64i_m/I/src/lwu-align-01.S",
+    "rv64i_m/I/src/or-01.S",
+    "rv64i_m/I/src/ori-01.S",
+    "rv64i_m/I/src/sb-align-01.S",
+    "rv64i_m/I/src/sd-align-01.S",
+    "rv64i_m/I/src/sh-align-01.S",
+    "rv64i_m/I/src/sll-01.S",
+    "rv64i_m/I/src/slli-01.S",
+    "rv64i_m/I/src/slliw-01.S",
+    "rv64i_m/I/src/sllw-01.S",
+    "rv64i_m/I/src/slt-01.S",
+    "rv64i_m/I/src/slti-01.S",
+    "rv64i_m/I/src/sltiu-01.S",
+    "rv64i_m/I/src/sltu-01.S",
+    "rv64i_m/I/src/sra-01.S",
+    "rv64i_m/I/src/srai-01.S",
+    "rv64i_m/I/src/sraiw-01.S",
+    "rv64i_m/I/src/sraw-01.S",
+    "rv64i_m/I/src/srl-01.S",
+    "rv64i_m/I/src/srli-01.S",
+    "rv64i_m/I/src/srliw-01.S",
+    "rv64i_m/I/src/srlw-01.S",
+    "rv64i_m/I/src/sub-01.S",
+    "rv64i_m/I/src/subw-01.S",
+    "rv64i_m/I/src/sw-align-01.S",
+    "rv64i_m/I/src/xor-01.S",
+    "rv64i_m/I/src/xori-01.S"
   };
 
   string arch64d[] = '{
     `RISCVARCHTEST,
-    "rv64i_m/D/src/d_fadd_b10-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fadd_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fadd_b11-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fadd_b12-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fadd_b13-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fadd_b2-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fadd_b3-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fadd_b4-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fadd_b5-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fadd_b7-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fadd_b8-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fclass_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.l_b25-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.l_b26-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.lu_b25-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.lu_b26-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.s_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.s_b22-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.s_b23-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.s_b24-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.s_b27-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.s_b28-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.s_b29-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.w_b25-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.w_b26-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.wu_b25-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.d.wu_b26-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.l.d_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.l.d_b22-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.l.d_b23-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.l.d_b24-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.l.d_b27-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.l.d_b28-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.l.d_b29-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.lu.d_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.lu.d_b22-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.lu.d_b23-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.lu.d_b24-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.lu.d_b27-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.lu.d_b28-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.lu.d_b29-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.s.d_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.s.d_b22-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.s.d_b23-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.s.d_b24-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.s.d_b27-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.s.d_b28-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.s.d_b29-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.w.d_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.w.d_b22-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.w.d_b23-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.w.d_b24-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.w.d_b27-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.w.d_b28-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.w.d_b29-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.wu.d_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.wu.d_b22-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.wu.d_b23-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.wu.d_b24-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.wu.d_b27-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.wu.d_b28-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fcvt.wu.d_b29-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fdiv_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fdiv_b20-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fdiv_b2-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fdiv_b21-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fdiv_b3-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fdiv_b4-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fdiv_b5-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fdiv_b6-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fdiv_b7-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fdiv_b8-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fdiv_b9-01.S/ref/Ref",
-    "rv64i_m/D/src/d_feq_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_feq_b19-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fle_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fle_b19-01.S/ref/Ref",
-    "rv64i_m/D/src/d_flt_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_flt_b19-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmadd_b14-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmadd_b16-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmadd_b17-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmadd_b18-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmadd_b2-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmadd_b3-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmadd_b4-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmadd_b5-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmadd_b6-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmadd_b7-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmadd_b8-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmax_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmax_b19-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmin_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmin_b19-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmsub_b14-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmsub_b16-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmsub_b17-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmsub_b18-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmsub_b2-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmsub_b3-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmsub_b4-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmsub_b5-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmsub_b6-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmsub_b7-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmsub_b8-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmul_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmul_b2-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmul_b3-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmul_b4-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmul_b5-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmul_b6-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmul_b7-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmul_b8-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmul_b9-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmv.d.x_b25-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmv.d.x_b26-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmv.x.d_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmv.x.d_b22-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmv.x.d_b23-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmv.x.d_b24-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmv.x.d_b27-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmv.x.d_b28-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fmv.x.d_b29-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmadd_b14-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmadd_b16-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmadd_b17-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmadd_b18-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmadd_b2-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmadd_b3-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmadd_b4-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmadd_b5-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmadd_b6-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmadd_b7-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmadd_b8-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmsub_b14-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmsub_b16-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmsub_b17-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmsub_b18-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmsub_b2-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmsub_b3-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmsub_b4-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmsub_b5-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmsub_b6-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmsub_b7-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fnmsub_b8-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsgnj_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsgnjn_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsgnjx_b1-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fsqrt_b1-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fsqrt_b20-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fsqrt_b2-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fsqrt_b3-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fsqrt_b4-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fsqrt_b5-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fsqrt_b7-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fsqrt_b8-01.S/ref/Ref",
-    // "rv64i_m/D/src/d_fsqrt_b9-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsub_b10-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsub_b1-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsub_b11-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsub_b12-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsub_b13-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsub_b2-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsub_b3-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsub_b4-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsub_b5-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsub_b7-01.S/ref/Ref",
-    "rv64i_m/D/src/d_fsub_b8-01.S/ref/Ref"
+    "rv64i_m/D/src/d_fadd_b10-01.S",
+    "rv64i_m/D/src/d_fadd_b1-01.S",
+    "rv64i_m/D/src/d_fadd_b11-01.S",
+    "rv64i_m/D/src/d_fadd_b12-01.S",
+    "rv64i_m/D/src/d_fadd_b13-01.S",
+    "rv64i_m/D/src/d_fadd_b2-01.S",
+    "rv64i_m/D/src/d_fadd_b3-01.S",
+    "rv64i_m/D/src/d_fadd_b4-01.S",
+    "rv64i_m/D/src/d_fadd_b5-01.S",
+    "rv64i_m/D/src/d_fadd_b7-01.S",
+    "rv64i_m/D/src/d_fadd_b8-01.S",
+    "rv64i_m/D/src/d_fclass_b1-01.S",
+    "rv64i_m/D/src/d_fcvt.d.l_b25-01.S",
+    "rv64i_m/D/src/d_fcvt.d.l_b26-01.S",
+    "rv64i_m/D/src/d_fcvt.d.lu_b25-01.S",
+    "rv64i_m/D/src/d_fcvt.d.lu_b26-01.S",
+    "rv64i_m/D/src/d_fcvt.d.s_b1-01.S",
+    "rv64i_m/D/src/d_fcvt.d.s_b22-01.S",
+    "rv64i_m/D/src/d_fcvt.d.s_b23-01.S",
+    "rv64i_m/D/src/d_fcvt.d.s_b24-01.S",
+    "rv64i_m/D/src/d_fcvt.d.s_b27-01.S",
+    "rv64i_m/D/src/d_fcvt.d.s_b28-01.S",
+    "rv64i_m/D/src/d_fcvt.d.s_b29-01.S",
+    "rv64i_m/D/src/d_fcvt.d.w_b25-01.S",
+    "rv64i_m/D/src/d_fcvt.d.w_b26-01.S",
+    "rv64i_m/D/src/d_fcvt.d.wu_b25-01.S",
+    "rv64i_m/D/src/d_fcvt.d.wu_b26-01.S",
+    "rv64i_m/D/src/d_fcvt.l.d_b1-01.S",
+    "rv64i_m/D/src/d_fcvt.l.d_b22-01.S",
+    "rv64i_m/D/src/d_fcvt.l.d_b23-01.S",
+    "rv64i_m/D/src/d_fcvt.l.d_b24-01.S",
+    "rv64i_m/D/src/d_fcvt.l.d_b27-01.S",
+    "rv64i_m/D/src/d_fcvt.l.d_b28-01.S",
+    "rv64i_m/D/src/d_fcvt.l.d_b29-01.S",
+    "rv64i_m/D/src/d_fcvt.lu.d_b1-01.S",
+    "rv64i_m/D/src/d_fcvt.lu.d_b22-01.S",
+    "rv64i_m/D/src/d_fcvt.lu.d_b23-01.S",
+    "rv64i_m/D/src/d_fcvt.lu.d_b24-01.S",
+    "rv64i_m/D/src/d_fcvt.lu.d_b27-01.S",
+    "rv64i_m/D/src/d_fcvt.lu.d_b28-01.S",
+    "rv64i_m/D/src/d_fcvt.lu.d_b29-01.S",
+    "rv64i_m/D/src/d_fcvt.s.d_b1-01.S",
+    "rv64i_m/D/src/d_fcvt.s.d_b22-01.S",
+    "rv64i_m/D/src/d_fcvt.s.d_b23-01.S",
+    "rv64i_m/D/src/d_fcvt.s.d_b24-01.S",
+    "rv64i_m/D/src/d_fcvt.s.d_b27-01.S",
+    "rv64i_m/D/src/d_fcvt.s.d_b28-01.S",
+    "rv64i_m/D/src/d_fcvt.s.d_b29-01.S",
+    "rv64i_m/D/src/d_fcvt.w.d_b1-01.S",
+    "rv64i_m/D/src/d_fcvt.w.d_b22-01.S",
+    "rv64i_m/D/src/d_fcvt.w.d_b23-01.S",
+    "rv64i_m/D/src/d_fcvt.w.d_b24-01.S",
+    "rv64i_m/D/src/d_fcvt.w.d_b27-01.S",
+    "rv64i_m/D/src/d_fcvt.w.d_b28-01.S",
+    "rv64i_m/D/src/d_fcvt.w.d_b29-01.S",
+    "rv64i_m/D/src/d_fcvt.wu.d_b1-01.S",
+    "rv64i_m/D/src/d_fcvt.wu.d_b22-01.S",
+    "rv64i_m/D/src/d_fcvt.wu.d_b23-01.S",
+    "rv64i_m/D/src/d_fcvt.wu.d_b24-01.S",
+    "rv64i_m/D/src/d_fcvt.wu.d_b27-01.S",
+    "rv64i_m/D/src/d_fcvt.wu.d_b28-01.S",
+    "rv64i_m/D/src/d_fcvt.wu.d_b29-01.S",
+    "rv64i_m/D/src/d_fdiv_b1-01.S",
+    "rv64i_m/D/src/d_fdiv_b20-01.S",
+    "rv64i_m/D/src/d_fdiv_b2-01.S",
+    "rv64i_m/D/src/d_fdiv_b21-01.S",
+    "rv64i_m/D/src/d_fdiv_b3-01.S",
+    "rv64i_m/D/src/d_fdiv_b4-01.S",
+    "rv64i_m/D/src/d_fdiv_b5-01.S",
+    "rv64i_m/D/src/d_fdiv_b6-01.S",
+    "rv64i_m/D/src/d_fdiv_b7-01.S",
+    "rv64i_m/D/src/d_fdiv_b8-01.S",
+    "rv64i_m/D/src/d_fdiv_b9-01.S",
+    "rv64i_m/D/src/d_feq_b1-01.S",
+    "rv64i_m/D/src/d_feq_b19-01.S",
+    "rv64i_m/D/src/d_fle_b1-01.S",
+    "rv64i_m/D/src/d_fle_b19-01.S",
+    "rv64i_m/D/src/d_flt_b1-01.S",
+    "rv64i_m/D/src/d_flt_b19-01.S",
+    "rv64i_m/D/src/d_fld-align-01.S",
+    "rv64i_m/D/src/d_fsd-align-01.S",
+    "rv64i_m/D/src/d_fmadd_b14-01.S",
+    "rv64i_m/D/src/d_fmadd_b16-01.S",
+    "rv64i_m/D/src/d_fmadd_b17-01.S",
+    "rv64i_m/D/src/d_fmadd_b18-01.S",
+    "rv64i_m/D/src/d_fmadd_b2-01.S",
+    "rv64i_m/D/src/d_fmadd_b3-01.S",
+    "rv64i_m/D/src/d_fmadd_b4-01.S",
+    "rv64i_m/D/src/d_fmadd_b5-01.S",
+    "rv64i_m/D/src/d_fmadd_b6-01.S",
+    "rv64i_m/D/src/d_fmadd_b7-01.S",
+    "rv64i_m/D/src/d_fmadd_b8-01.S",
+    "rv64i_m/D/src/d_fmax_b1-01.S",
+    "rv64i_m/D/src/d_fmax_b19-01.S",
+    "rv64i_m/D/src/d_fmin_b1-01.S",
+    "rv64i_m/D/src/d_fmin_b19-01.S",
+    "rv64i_m/D/src/d_fmsub_b14-01.S",
+    "rv64i_m/D/src/d_fmsub_b16-01.S",
+    "rv64i_m/D/src/d_fmsub_b17-01.S",
+    "rv64i_m/D/src/d_fmsub_b18-01.S",
+    "rv64i_m/D/src/d_fmsub_b2-01.S",
+    "rv64i_m/D/src/d_fmsub_b3-01.S",
+    "rv64i_m/D/src/d_fmsub_b4-01.S",
+    "rv64i_m/D/src/d_fmsub_b5-01.S",
+    "rv64i_m/D/src/d_fmsub_b6-01.S",
+    "rv64i_m/D/src/d_fmsub_b7-01.S",
+    "rv64i_m/D/src/d_fmsub_b8-01.S",
+    "rv64i_m/D/src/d_fmul_b1-01.S",
+    "rv64i_m/D/src/d_fmul_b2-01.S",
+    "rv64i_m/D/src/d_fmul_b3-01.S",
+    "rv64i_m/D/src/d_fmul_b4-01.S",
+    "rv64i_m/D/src/d_fmul_b5-01.S",
+    "rv64i_m/D/src/d_fmul_b6-01.S",
+    "rv64i_m/D/src/d_fmul_b7-01.S",
+    "rv64i_m/D/src/d_fmul_b8-01.S",
+    "rv64i_m/D/src/d_fmul_b9-01.S",
+    "rv64i_m/D/src/d_fmv.d.x_b25-01.S",
+    "rv64i_m/D/src/d_fmv.d.x_b26-01.S",
+    "rv64i_m/D/src/d_fmv.x.d_b1-01.S",
+    "rv64i_m/D/src/d_fmv.x.d_b22-01.S",
+    "rv64i_m/D/src/d_fmv.x.d_b23-01.S",
+    "rv64i_m/D/src/d_fmv.x.d_b24-01.S",
+    "rv64i_m/D/src/d_fmv.x.d_b27-01.S",
+    "rv64i_m/D/src/d_fmv.x.d_b28-01.S",
+    "rv64i_m/D/src/d_fmv.x.d_b29-01.S",
+    "rv64i_m/D/src/d_fnmadd_b14-01.S",
+    "rv64i_m/D/src/d_fnmadd_b16-01.S",
+    "rv64i_m/D/src/d_fnmadd_b17-01.S",
+    "rv64i_m/D/src/d_fnmadd_b18-01.S",
+    "rv64i_m/D/src/d_fnmadd_b2-01.S",
+    "rv64i_m/D/src/d_fnmadd_b3-01.S",
+    "rv64i_m/D/src/d_fnmadd_b4-01.S",
+    "rv64i_m/D/src/d_fnmadd_b5-01.S",
+    "rv64i_m/D/src/d_fnmadd_b6-01.S",
+    "rv64i_m/D/src/d_fnmadd_b7-01.S",
+    "rv64i_m/D/src/d_fnmadd_b8-01.S",
+    "rv64i_m/D/src/d_fnmsub_b14-01.S",
+    "rv64i_m/D/src/d_fnmsub_b16-01.S",
+    "rv64i_m/D/src/d_fnmsub_b17-01.S",
+    "rv64i_m/D/src/d_fnmsub_b18-01.S",
+    "rv64i_m/D/src/d_fnmsub_b2-01.S",
+    "rv64i_m/D/src/d_fnmsub_b3-01.S",
+    "rv64i_m/D/src/d_fnmsub_b4-01.S",
+    "rv64i_m/D/src/d_fnmsub_b5-01.S",
+    "rv64i_m/D/src/d_fnmsub_b6-01.S",
+    "rv64i_m/D/src/d_fnmsub_b7-01.S",
+    "rv64i_m/D/src/d_fnmsub_b8-01.S",
+    "rv64i_m/D/src/d_fsgnj_b1-01.S",
+    "rv64i_m/D/src/d_fsgnjn_b1-01.S",
+    "rv64i_m/D/src/d_fsgnjx_b1-01.S",
+    // "rv64i_m/D/src/d_fsqrt_b1-01.S",
+    // "rv64i_m/D/src/d_fsqrt_b20-01.S",
+    // "rv64i_m/D/src/d_fsqrt_b2-01.S",
+    // "rv64i_m/D/src/d_fsqrt_b3-01.S",
+    // "rv64i_m/D/src/d_fsqrt_b4-01.S",
+    // "rv64i_m/D/src/d_fsqrt_b5-01.S",
+    // "rv64i_m/D/src/d_fsqrt_b7-01.S",
+    // "rv64i_m/D/src/d_fsqrt_b8-01.S",
+    // "rv64i_m/D/src/d_fsqrt_b9-01.S",
+    "rv64i_m/D/src/d_fsub_b10-01.S",
+    "rv64i_m/D/src/d_fsub_b1-01.S",
+    "rv64i_m/D/src/d_fsub_b11-01.S",
+    "rv64i_m/D/src/d_fsub_b12-01.S",
+    "rv64i_m/D/src/d_fsub_b13-01.S",
+    "rv64i_m/D/src/d_fsub_b2-01.S",
+    "rv64i_m/D/src/d_fsub_b3-01.S",
+    "rv64i_m/D/src/d_fsub_b4-01.S",
+    "rv64i_m/D/src/d_fsub_b5-01.S",
+    "rv64i_m/D/src/d_fsub_b7-01.S",
+    "rv64i_m/D/src/d_fsub_b8-01.S"
 };
 
     string arch32priv[] = '{
     `RISCVARCHTEST,
-    "rv32i_m/privilege/src/ebreak.S/ref/Ref",
-    "rv32i_m/privilege/src/ecall.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign1-jalr-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign2-jalr-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-beq-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-bge-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-bgeu-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-blt-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-bltu-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-bne-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-jal-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-lh-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-lhu-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-lw-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-sh-01.S/ref/Ref",
-    "rv32i_m/privilege/src/misalign-sw-01.S/ref/Ref"
+    "rv32i_m/privilege/src/ebreak.S",
+    "rv32i_m/privilege/src/ecall.S",
+    "rv32i_m/privilege/src/misalign1-jalr-01.S",
+    "rv32i_m/privilege/src/misalign2-jalr-01.S",
+    "rv32i_m/privilege/src/misalign-beq-01.S",
+    "rv32i_m/privilege/src/misalign-bge-01.S",
+    "rv32i_m/privilege/src/misalign-bgeu-01.S",
+    "rv32i_m/privilege/src/misalign-blt-01.S",
+    "rv32i_m/privilege/src/misalign-bltu-01.S",
+    "rv32i_m/privilege/src/misalign-bne-01.S",
+    "rv32i_m/privilege/src/misalign-jal-01.S",
+    "rv32i_m/privilege/src/misalign-lh-01.S",
+    "rv32i_m/privilege/src/misalign-lhu-01.S",
+    "rv32i_m/privilege/src/misalign-lw-01.S",
+    "rv32i_m/privilege/src/misalign-sh-01.S",
+    "rv32i_m/privilege/src/misalign-sw-01.S"
     };
 
   string arch32m[] = '{
     `RISCVARCHTEST,
-    "rv32i_m/M/src/div-01.S/ref/Ref",
-    "rv32i_m/M/src/divu-01.S/ref/Ref",
-    "rv32i_m/M/src/mul-01.S/ref/Ref",
-    "rv32i_m/M/src/mulh-01.S/ref/Ref",
-    "rv32i_m/M/src/mulhsu-01.S/ref/Ref",
-    "rv32i_m/M/src/mulhu-01.S/ref/Ref",
-    "rv32i_m/M/src/rem-01.S/ref/Ref",
-    "rv32i_m/M/src/remu-01.S/ref/Ref"
+    "rv32i_m/M/src/div-01.S",
+    "rv32i_m/M/src/divu-01.S",
+    "rv32i_m/M/src/mul-01.S",
+    "rv32i_m/M/src/mulh-01.S",
+    "rv32i_m/M/src/mulhsu-01.S",
+    "rv32i_m/M/src/mulhu-01.S",
+    "rv32i_m/M/src/rem-01.S",
+    "rv32i_m/M/src/remu-01.S"
    };
 
   string arch32f[] = '{
     `RISCVARCHTEST,
-    "rv32i_m/F/src/fadd_b10-01.S/ref/Ref",
-    "rv32i_m/F/src/fadd_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fadd_b11-01.S/ref/Ref",
-    "rv32i_m/F/src/fadd_b12-01.S/ref/Ref",
-    "rv32i_m/F/src/fadd_b13-01.S/ref/Ref",
-    "rv32i_m/F/src/fadd_b2-01.S/ref/Ref",
-    "rv32i_m/F/src/fadd_b3-01.S/ref/Ref",
-    "rv32i_m/F/src/fadd_b4-01.S/ref/Ref",
-    "rv32i_m/F/src/fadd_b5-01.S/ref/Ref",
-    "rv32i_m/F/src/fadd_b7-01.S/ref/Ref",
-    "rv32i_m/F/src/fadd_b8-01.S/ref/Ref",
-    "rv32i_m/F/src/fclass_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.s.w_b25-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.s.w_b26-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.s.wu_b25-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.s.wu_b26-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.w.s_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.w.s_b22-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.w.s_b23-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.w.s_b24-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.w.s_b27-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.w.s_b28-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.w.s_b29-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.wu.s_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.wu.s_b22-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.wu.s_b23-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.wu.s_b24-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.wu.s_b27-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.wu.s_b28-01.S/ref/Ref",
-    "rv32i_m/F/src/fcvt.wu.s_b29-01.S/ref/Ref",
-    "rv32i_m/F/src/fdiv_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fdiv_b20-01.S/ref/Ref",
-    "rv32i_m/F/src/fdiv_b2-01.S/ref/Ref",
-    "rv32i_m/F/src/fdiv_b21-01.S/ref/Ref",
-    "rv32i_m/F/src/fdiv_b3-01.S/ref/Ref",
-    "rv32i_m/F/src/fdiv_b4-01.S/ref/Ref",
-    "rv32i_m/F/src/fdiv_b5-01.S/ref/Ref",
-    "rv32i_m/F/src/fdiv_b6-01.S/ref/Ref",
-    "rv32i_m/F/src/fdiv_b7-01.S/ref/Ref",
-    "rv32i_m/F/src/fdiv_b8-01.S/ref/Ref",
-    "rv32i_m/F/src/fdiv_b9-01.S/ref/Ref",
-    "rv32i_m/F/src/feq_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/feq_b19-01.S/ref/Ref",
-    "rv32i_m/F/src/fle_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fle_b19-01.S/ref/Ref",
-    "rv32i_m/F/src/flt_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/flt_b19-01.S/ref/Ref",
-    "rv32i_m/F/src/flw-align-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b14-01.S/ref/Ref",
-    // "rv32i_m/F/src/fmadd_b15-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b16-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b17-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b18-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b2-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b3-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b4-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b5-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b6-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b7-01.S/ref/Ref",
-    "rv32i_m/F/src/fmadd_b8-01.S/ref/Ref",
-    "rv32i_m/F/src/fmax_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fmax_b19-01.S/ref/Ref",
-    "rv32i_m/F/src/fmin_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fmin_b19-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b14-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b15-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b16-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b17-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b18-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b2-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b3-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b4-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b5-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b6-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b7-01.S/ref/Ref",
-    "rv32i_m/F/src/fmsub_b8-01.S/ref/Ref",
-    "rv32i_m/F/src/fmul_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fmul_b2-01.S/ref/Ref",
-    "rv32i_m/F/src/fmul_b3-01.S/ref/Ref",
-    "rv32i_m/F/src/fmul_b4-01.S/ref/Ref",
-    "rv32i_m/F/src/fmul_b5-01.S/ref/Ref",
-    "rv32i_m/F/src/fmul_b6-01.S/ref/Ref",
-    "rv32i_m/F/src/fmul_b7-01.S/ref/Ref",
-    "rv32i_m/F/src/fmul_b8-01.S/ref/Ref",
-    "rv32i_m/F/src/fmul_b9-01.S/ref/Ref",
-    "rv32i_m/F/src/fmv.w.x_b25-01.S/ref/Ref",
-    "rv32i_m/F/src/fmv.w.x_b26-01.S/ref/Ref",
-    "rv32i_m/F/src/fmv.x.w_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fmv.x.w_b22-01.S/ref/Ref",
-    "rv32i_m/F/src/fmv.x.w_b23-01.S/ref/Ref",
-    "rv32i_m/F/src/fmv.x.w_b24-01.S/ref/Ref",
-    "rv32i_m/F/src/fmv.x.w_b27-01.S/ref/Ref",
-    "rv32i_m/F/src/fmv.x.w_b28-01.S/ref/Ref",
-    "rv32i_m/F/src/fmv.x.w_b29-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b14-01.S/ref/Ref",
-    // "rv32i_m/F/src/fnmadd_b15-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b16-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b17-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b18-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b2-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b3-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b4-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b5-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b6-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b7-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmadd_b8-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b14-01.S/ref/Ref",
-    // "rv32i_m/F/src/fnmsub_b15-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b16-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b17-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b18-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b2-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b3-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b4-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b5-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b6-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b7-01.S/ref/Ref",
-    "rv32i_m/F/src/fnmsub_b8-01.S/ref/Ref",
-    "rv32i_m/F/src/fsgnj_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fsgnjn_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fsgnjx_b1-01.S/ref/Ref",
-    // "rv32i_m/F/src/fsqrt_b1-01.S/ref/Ref",
-    // "rv32i_m/F/src/fsqrt_b20-01.S/ref/Ref",
-    // "rv32i_m/F/src/fsqrt_b2-01.S/ref/Ref",
-    // "rv32i_m/F/src/fsqrt_b3-01.S/ref/Ref",
-    // "rv32i_m/F/src/fsqrt_b4-01.S/ref/Ref",
-    // "rv32i_m/F/src/fsqrt_b5-01.S/ref/Ref",
-    // "rv32i_m/F/src/fsqrt_b7-01.S/ref/Ref",
-    // "rv32i_m/F/src/fsqrt_b8-01.S/ref/Ref",
-    // "rv32i_m/F/src/fsqrt_b9-01.S/ref/Ref",
-    "rv32i_m/F/src/fsub_b10-01.S/ref/Ref",
-    "rv32i_m/F/src/fsub_b1-01.S/ref/Ref",
-    "rv32i_m/F/src/fsub_b11-01.S/ref/Ref",
-    "rv32i_m/F/src/fsub_b12-01.S/ref/Ref",
-    "rv32i_m/F/src/fsub_b13-01.S/ref/Ref",
-    "rv32i_m/F/src/fsub_b2-01.S/ref/Ref",
-    "rv32i_m/F/src/fsub_b3-01.S/ref/Ref",
-    "rv32i_m/F/src/fsub_b4-01.S/ref/Ref",
-    "rv32i_m/F/src/fsub_b5-01.S/ref/Ref",
-    "rv32i_m/F/src/fsub_b7-01.S/ref/Ref",
-    "rv32i_m/F/src/fsub_b8-01.S/ref/Ref",
-    "rv32i_m/F/src/fsw-align-01.S/ref/Ref"
+    "rv32i_m/F/src/fadd_b10-01.S",
+    "rv32i_m/F/src/fadd_b1-01.S",
+    "rv32i_m/F/src/fadd_b11-01.S",
+    "rv32i_m/F/src/fadd_b12-01.S",
+    "rv32i_m/F/src/fadd_b13-01.S",
+    "rv32i_m/F/src/fadd_b2-01.S",
+    "rv32i_m/F/src/fadd_b3-01.S",
+    "rv32i_m/F/src/fadd_b4-01.S",
+    "rv32i_m/F/src/fadd_b5-01.S",
+    "rv32i_m/F/src/fadd_b7-01.S",
+    "rv32i_m/F/src/fadd_b8-01.S",
+    "rv32i_m/F/src/fclass_b1-01.S",
+    "rv32i_m/F/src/fcvt.s.w_b25-01.S",
+    "rv32i_m/F/src/fcvt.s.w_b26-01.S",
+    "rv32i_m/F/src/fcvt.s.wu_b25-01.S",
+    "rv32i_m/F/src/fcvt.s.wu_b26-01.S",
+    "rv32i_m/F/src/fcvt.w.s_b1-01.S",
+    "rv32i_m/F/src/fcvt.w.s_b22-01.S",
+    "rv32i_m/F/src/fcvt.w.s_b23-01.S",
+    "rv32i_m/F/src/fcvt.w.s_b24-01.S",
+    "rv32i_m/F/src/fcvt.w.s_b27-01.S",
+    "rv32i_m/F/src/fcvt.w.s_b28-01.S",
+    "rv32i_m/F/src/fcvt.w.s_b29-01.S",
+    "rv32i_m/F/src/fcvt.wu.s_b1-01.S",
+    "rv32i_m/F/src/fcvt.wu.s_b22-01.S",
+    "rv32i_m/F/src/fcvt.wu.s_b23-01.S",
+    "rv32i_m/F/src/fcvt.wu.s_b24-01.S",
+    "rv32i_m/F/src/fcvt.wu.s_b27-01.S",
+    "rv32i_m/F/src/fcvt.wu.s_b28-01.S",
+    "rv32i_m/F/src/fcvt.wu.s_b29-01.S",
+    "rv32i_m/F/src/fdiv_b1-01.S",
+    "rv32i_m/F/src/fdiv_b20-01.S",
+    "rv32i_m/F/src/fdiv_b2-01.S",
+    "rv32i_m/F/src/fdiv_b21-01.S",
+    "rv32i_m/F/src/fdiv_b3-01.S",
+    "rv32i_m/F/src/fdiv_b4-01.S",
+    "rv32i_m/F/src/fdiv_b5-01.S",
+    "rv32i_m/F/src/fdiv_b6-01.S",
+    "rv32i_m/F/src/fdiv_b7-01.S",
+    "rv32i_m/F/src/fdiv_b8-01.S",
+    "rv32i_m/F/src/fdiv_b9-01.S",
+    "rv32i_m/F/src/feq_b1-01.S",
+    "rv32i_m/F/src/feq_b19-01.S",
+    "rv32i_m/F/src/fle_b1-01.S",
+    "rv32i_m/F/src/fle_b19-01.S",
+    "rv32i_m/F/src/flt_b1-01.S",
+    "rv32i_m/F/src/flt_b19-01.S",
+    "rv32i_m/F/src/flw-align-01.S",
+    "rv32i_m/F/src/fmadd_b1-01.S",
+    "rv32i_m/F/src/fmadd_b14-01.S",
+    // "rv32i_m/F/src/fmadd_b15-01.S",
+    "rv32i_m/F/src/fmadd_b16-01.S",
+    "rv32i_m/F/src/fmadd_b17-01.S",
+    "rv32i_m/F/src/fmadd_b18-01.S",
+    "rv32i_m/F/src/fmadd_b2-01.S",
+    "rv32i_m/F/src/fmadd_b3-01.S",
+    "rv32i_m/F/src/fmadd_b4-01.S",
+    "rv32i_m/F/src/fmadd_b5-01.S",
+    "rv32i_m/F/src/fmadd_b6-01.S",
+    "rv32i_m/F/src/fmadd_b7-01.S",
+    "rv32i_m/F/src/fmadd_b8-01.S",
+    "rv32i_m/F/src/fmax_b1-01.S",
+    "rv32i_m/F/src/fmax_b19-01.S",
+    "rv32i_m/F/src/fmin_b1-01.S",
+    "rv32i_m/F/src/fmin_b19-01.S",
+    "rv32i_m/F/src/fmsub_b1-01.S",
+    "rv32i_m/F/src/fmsub_b14-01.S",
+    "rv32i_m/F/src/fmsub_b15-01.S",
+    "rv32i_m/F/src/fmsub_b16-01.S",
+    "rv32i_m/F/src/fmsub_b17-01.S",
+    "rv32i_m/F/src/fmsub_b18-01.S",
+    "rv32i_m/F/src/fmsub_b2-01.S",
+    "rv32i_m/F/src/fmsub_b3-01.S",
+    "rv32i_m/F/src/fmsub_b4-01.S",
+    "rv32i_m/F/src/fmsub_b5-01.S",
+    "rv32i_m/F/src/fmsub_b6-01.S",
+    "rv32i_m/F/src/fmsub_b7-01.S",
+    "rv32i_m/F/src/fmsub_b8-01.S",
+    "rv32i_m/F/src/fmul_b1-01.S",
+    "rv32i_m/F/src/fmul_b2-01.S",
+    "rv32i_m/F/src/fmul_b3-01.S",
+    "rv32i_m/F/src/fmul_b4-01.S",
+    "rv32i_m/F/src/fmul_b5-01.S",
+    "rv32i_m/F/src/fmul_b6-01.S",
+    "rv32i_m/F/src/fmul_b7-01.S",
+    "rv32i_m/F/src/fmul_b8-01.S",
+    "rv32i_m/F/src/fmul_b9-01.S",
+    "rv32i_m/F/src/fmv.w.x_b25-01.S",
+    "rv32i_m/F/src/fmv.w.x_b26-01.S",
+    "rv32i_m/F/src/fmv.x.w_b1-01.S",
+    "rv32i_m/F/src/fmv.x.w_b22-01.S",
+    "rv32i_m/F/src/fmv.x.w_b23-01.S",
+    "rv32i_m/F/src/fmv.x.w_b24-01.S",
+    "rv32i_m/F/src/fmv.x.w_b27-01.S",
+    "rv32i_m/F/src/fmv.x.w_b28-01.S",
+    "rv32i_m/F/src/fmv.x.w_b29-01.S",
+    "rv32i_m/F/src/fnmadd_b1-01.S",
+    "rv32i_m/F/src/fnmadd_b14-01.S",
+    // "rv32i_m/F/src/fnmadd_b15-01.S",
+    "rv32i_m/F/src/fnmadd_b16-01.S",
+    "rv32i_m/F/src/fnmadd_b17-01.S",
+    "rv32i_m/F/src/fnmadd_b18-01.S",
+    "rv32i_m/F/src/fnmadd_b2-01.S",
+    "rv32i_m/F/src/fnmadd_b3-01.S",
+    "rv32i_m/F/src/fnmadd_b4-01.S",
+    "rv32i_m/F/src/fnmadd_b5-01.S",
+    "rv32i_m/F/src/fnmadd_b6-01.S",
+    "rv32i_m/F/src/fnmadd_b7-01.S",
+    "rv32i_m/F/src/fnmadd_b8-01.S",
+    "rv32i_m/F/src/fnmsub_b1-01.S",
+    "rv32i_m/F/src/fnmsub_b14-01.S",
+    // "rv32i_m/F/src/fnmsub_b15-01.S",
+    "rv32i_m/F/src/fnmsub_b16-01.S",
+    "rv32i_m/F/src/fnmsub_b17-01.S",
+    "rv32i_m/F/src/fnmsub_b18-01.S",
+    "rv32i_m/F/src/fnmsub_b2-01.S",
+    "rv32i_m/F/src/fnmsub_b3-01.S",
+    "rv32i_m/F/src/fnmsub_b4-01.S",
+    "rv32i_m/F/src/fnmsub_b5-01.S",
+    "rv32i_m/F/src/fnmsub_b6-01.S",
+    "rv32i_m/F/src/fnmsub_b7-01.S",
+    "rv32i_m/F/src/fnmsub_b8-01.S",
+    "rv32i_m/F/src/fsgnj_b1-01.S",
+    "rv32i_m/F/src/fsgnjn_b1-01.S",
+    "rv32i_m/F/src/fsgnjx_b1-01.S",
+    // "rv32i_m/F/src/fsqrt_b1-01.S",
+    // "rv32i_m/F/src/fsqrt_b20-01.S",
+    // "rv32i_m/F/src/fsqrt_b2-01.S",
+    // "rv32i_m/F/src/fsqrt_b3-01.S",
+    // "rv32i_m/F/src/fsqrt_b4-01.S",
+    // "rv32i_m/F/src/fsqrt_b5-01.S",
+    // "rv32i_m/F/src/fsqrt_b7-01.S",
+    // "rv32i_m/F/src/fsqrt_b8-01.S",
+    // "rv32i_m/F/src/fsqrt_b9-01.S",
+    "rv32i_m/F/src/fsub_b10-01.S",
+    "rv32i_m/F/src/fsub_b1-01.S",
+    "rv32i_m/F/src/fsub_b11-01.S",
+    "rv32i_m/F/src/fsub_b12-01.S",
+    "rv32i_m/F/src/fsub_b13-01.S",
+    "rv32i_m/F/src/fsub_b2-01.S",
+    "rv32i_m/F/src/fsub_b3-01.S",
+    "rv32i_m/F/src/fsub_b4-01.S",
+    "rv32i_m/F/src/fsub_b5-01.S",
+    "rv32i_m/F/src/fsub_b7-01.S",
+    "rv32i_m/F/src/fsub_b8-01.S",
+    "rv32i_m/F/src/fsw-align-01.S"
     };
 
 
   string arch32c[] = '{
     `RISCVARCHTEST,
-    "rv32i_m/C/src/cadd-01.S/ref/Ref",
-  "rv32i_m/C/src/caddi-01.S/ref/Ref",
-  "rv32i_m/C/src/caddi16sp-01.S/ref/Ref",
-  "rv32i_m/C/src/caddi4spn-01.S/ref/Ref",
-  "rv32i_m/C/src/cand-01.S/ref/Ref",
-  "rv32i_m/C/src/candi-01.S/ref/Ref",
-  "rv32i_m/C/src/cbeqz-01.S/ref/Ref",
-  "rv32i_m/C/src/cbnez-01.S/ref/Ref",
-  "rv32i_m/C/src/cj-01.S/ref/Ref",
-  "rv32i_m/C/src/cjal-01.S/ref/Ref",
-  "rv32i_m/C/src/cjalr-01.S/ref/Ref",
-  "rv32i_m/C/src/cjr-01.S/ref/Ref",
-  "rv32i_m/C/src/cli-01.S/ref/Ref",
-  "rv32i_m/C/src/clui-01.S/ref/Ref",
-  "rv32i_m/C/src/clw-01.S/ref/Ref",
-  "rv32i_m/C/src/clwsp-01.S/ref/Ref",
-  "rv32i_m/C/src/cmv-01.S/ref/Ref",
-  "rv32i_m/C/src/cnop-01.S/ref/Ref",
-  "rv32i_m/C/src/cor-01.S/ref/Ref",
-  "rv32i_m/C/src/cslli-01.S/ref/Ref",
-  "rv32i_m/C/src/csrai-01.S/ref/Ref",
-  "rv32i_m/C/src/csrli-01.S/ref/Ref",
-  "rv32i_m/C/src/csub-01.S/ref/Ref",
-  "rv32i_m/C/src/csw-01.S/ref/Ref",
-  "rv32i_m/C/src/cswsp-01.S/ref/Ref",
-  "rv32i_m/C/src/cxor-01.S/ref/Ref"
+    "rv32i_m/C/src/cadd-01.S",
+  "rv32i_m/C/src/caddi-01.S",
+  "rv32i_m/C/src/caddi16sp-01.S",
+  "rv32i_m/C/src/caddi4spn-01.S",
+  "rv32i_m/C/src/cand-01.S",
+  "rv32i_m/C/src/candi-01.S",
+  "rv32i_m/C/src/cbeqz-01.S",
+  "rv32i_m/C/src/cbnez-01.S",
+  "rv32i_m/C/src/cj-01.S",
+  "rv32i_m/C/src/cjal-01.S",
+  "rv32i_m/C/src/cjalr-01.S",
+  "rv32i_m/C/src/cjr-01.S",
+  "rv32i_m/C/src/cli-01.S",
+  "rv32i_m/C/src/clui-01.S",
+  "rv32i_m/C/src/clw-01.S",
+  "rv32i_m/C/src/clwsp-01.S",
+  "rv32i_m/C/src/cmv-01.S",
+  "rv32i_m/C/src/cnop-01.S",
+  "rv32i_m/C/src/cor-01.S",
+  "rv32i_m/C/src/cslli-01.S",
+  "rv32i_m/C/src/csrai-01.S",
+  "rv32i_m/C/src/csrli-01.S",
+  "rv32i_m/C/src/csub-01.S",
+  "rv32i_m/C/src/csw-01.S",
+  "rv32i_m/C/src/cswsp-01.S",
+  "rv32i_m/C/src/cxor-01.S"
   };
 
   string arch32cpriv[] = '{
   //  `RISCVARCHTEST,
-  "rv32i_m/C/src/cebreak-01.S/ref/Ref"
+  "rv32i_m/C/src/cebreak-01.S"
   };      
 
 
   string arch32i[] = '{
     `RISCVARCHTEST,
-    "rv32i_m/I/src/add-01.S/ref/Ref",
-    "rv32i_m/I/src/addi-01.S/ref/Ref",
-    "rv32i_m/I/src/and-01.S/ref/Ref",
-    "rv32i_m/I/src/andi-01.S/ref/Ref",
-    "rv32i_m/I/src/auipc-01.S/ref/Ref",
-    "rv32i_m/I/src/beq-01.S/ref/Ref",
-    "rv32i_m/I/src/bge-01.S/ref/Ref",
-    "rv32i_m/I/src/bgeu-01.S/ref/Ref",
-    "rv32i_m/I/src/blt-01.S/ref/Ref",
-    "rv32i_m/I/src/bltu-01.S/ref/Ref",
-    "rv32i_m/I/src/bne-01.S/ref/Ref",
-    "rv32i_m/I/src/fence-01.S/ref/Ref",
-    "rv32i_m/I/src/jal-01.S/ref/Ref",
-    "rv32i_m/I/src/jalr-01.S/ref/Ref",
-    "rv32i_m/I/src/lb-align-01.S/ref/Ref",
-    "rv32i_m/I/src/lbu-align-01.S/ref/Ref",
-    "rv32i_m/I/src/lh-align-01.S/ref/Ref",
-    "rv32i_m/I/src/lhu-align-01.S/ref/Ref",
-    "rv32i_m/I/src/lui-01.S/ref/Ref",
-    "rv32i_m/I/src/lw-align-01.S/ref/Ref",
-    "rv32i_m/I/src/or-01.S/ref/Ref",
-    "rv32i_m/I/src/ori-01.S/ref/Ref",
-    "rv32i_m/I/src/sb-align-01.S/ref/Ref",
-    "rv32i_m/I/src/sh-align-01.S/ref/Ref",
-    "rv32i_m/I/src/sll-01.S/ref/Ref",
-    "rv32i_m/I/src/slli-01.S/ref/Ref",
-    "rv32i_m/I/src/slt-01.S/ref/Ref",
-    "rv32i_m/I/src/slti-01.S/ref/Ref",
-    "rv32i_m/I/src/sltiu-01.S/ref/Ref",
-    "rv32i_m/I/src/sltu-01.S/ref/Ref",
-    "rv32i_m/I/src/sra-01.S/ref/Ref",
-    "rv32i_m/I/src/srai-01.S/ref/Ref",
-    "rv32i_m/I/src/srl-01.S/ref/Ref",
-    "rv32i_m/I/src/srli-01.S/ref/Ref",
-    "rv32i_m/I/src/sub-01.S/ref/Ref",
-    "rv32i_m/I/src/sw-align-01.S/ref/Ref",
-    "rv32i_m/I/src/xor-01.S/ref/Ref",
-    "rv32i_m/I/src/xori-01.S/ref/Ref"
+    "rv32i_m/I/src/add-01.S",
+    "rv32i_m/I/src/addi-01.S",
+    "rv32i_m/I/src/and-01.S",
+    "rv32i_m/I/src/andi-01.S",
+    "rv32i_m/I/src/auipc-01.S",
+    "rv32i_m/I/src/beq-01.S",
+    "rv32i_m/I/src/bge-01.S",
+    "rv32i_m/I/src/bgeu-01.S",
+    "rv32i_m/I/src/blt-01.S",
+    "rv32i_m/I/src/bltu-01.S",
+    "rv32i_m/I/src/bne-01.S",
+    "rv32i_m/I/src/fence-01.S",
+    "rv32i_m/I/src/jal-01.S",
+    "rv32i_m/I/src/jalr-01.S",
+    "rv32i_m/I/src/lb-align-01.S",
+    "rv32i_m/I/src/lbu-align-01.S",
+    "rv32i_m/I/src/lh-align-01.S",
+    "rv32i_m/I/src/lhu-align-01.S",
+    "rv32i_m/I/src/lui-01.S",
+    "rv32i_m/I/src/lw-align-01.S",
+    "rv32i_m/I/src/or-01.S",
+    "rv32i_m/I/src/ori-01.S",
+    "rv32i_m/I/src/sb-align-01.S",
+    "rv32i_m/I/src/sh-align-01.S",
+    "rv32i_m/I/src/sll-01.S",
+    "rv32i_m/I/src/slli-01.S",
+    "rv32i_m/I/src/slt-01.S",
+    "rv32i_m/I/src/slti-01.S",
+    "rv32i_m/I/src/sltiu-01.S",
+    "rv32i_m/I/src/sltu-01.S",
+    "rv32i_m/I/src/sra-01.S",
+    "rv32i_m/I/src/srai-01.S",
+    "rv32i_m/I/src/srl-01.S",
+    "rv32i_m/I/src/srli-01.S",
+    "rv32i_m/I/src/sub-01.S",
+    "rv32i_m/I/src/sw-align-01.S",
+    "rv32i_m/I/src/xor-01.S",
+    "rv32i_m/I/src/xori-01.S"
   };
 
  string wally64i[] = '{
@@ -1611,91 +1613,91 @@ string wally32i[] = '{
 // riscof test paths, to replace existing paths once riscof flow is working
 // string wally64a[] = '{
 //     `WALLYTEST,
-//     "rv64i_m/privilege/src/WALLY-amo.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
+//     "rv64i_m/privilege/src/WALLY-amo.S",
+//     "rv64i_m/privilege/src/WALLY-lrsc.S",
+//     "rv64i_m/privilege/src/WALLY-status-fp-enabled-01.S"
 //   };
 
 //     string wally32a[] = '{
 //     `WALLYTEST,
-//     "rv32i_m/privilege/src/WALLY-amo.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-lrsc.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S/ref/Ref"
+//     "rv32i_m/privilege/src/WALLY-amo.S",
+//     "rv32i_m/privilege/src/WALLY-lrsc.S",
+//     "rv32i_m/privilege/src/WALLY-status-fp-enabled-01.S"
 
 //   };
 
 //   string wally64i[] = '{
 //     `WALLYTEST,
-//     "rv64i_m/I/src/WALLY-ADD.S/ref/Ref",
-//     "rv64i_m/I/src/WALLY-SLT.S/ref/Ref",
-//     "rv64i_m/I/src/WALLY-SLTU.S/ref/Ref",
-//     "rv64i_m/I/src/WALLY-SUB.S/ref/Ref",
-//     "rv64i_m/I/src/WALLY-XOR.S/ref/Ref"
+//     "rv64i_m/I/src/WALLY-ADD.S",
+//     "rv64i_m/I/src/WALLY-SLT.S",
+//     "rv64i_m/I/src/WALLY-SLTU.S",
+//     "rv64i_m/I/src/WALLY-SUB.S",
+//     "rv64i_m/I/src/WALLY-XOR.S"
 //  };
 
 //  string wally64priv[] = '{
 //     `WALLYTEST,
-//     "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-mmu-sv39.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-mmu-sv48.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-pma.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-pmp.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
-//     "rv64i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
+//     "rv64i_m/privilege/src/WALLY-csr-permission-s-01.S",
+//     "rv64i_m/privilege/src/WALLY-csr-permission-u-01.S",
+//     "rv64i_m/privilege/src/WALLY-mie-01.S",
+//     "rv64i_m/privilege/src/WALLY-minfo-01.S",
+//     "rv64i_m/privilege/src/WALLY-misa-01.S",
+//     "rv64i_m/privilege/src/WALLY-mmu-sv39.S",
+//     "rv64i_m/privilege/src/WALLY-mmu-sv48.S",
+//     "rv64i_m/privilege/src/WALLY-mtvec-01.S",
+//     "rv64i_m/privilege/src/WALLY-pma.S",
+//     "rv64i_m/privilege/src/WALLY-pmp.S",
+//     "rv64i_m/privilege/src/WALLY-sie-01.S",
+//     "rv64i_m/privilege/src/WALLY-status-mie-01.S",
+//     "rv64i_m/privilege/src/WALLY-status-sie-01.S",
+//     "rv64i_m/privilege/src/WALLY-status-tw-01.S",
+//     "rv64i_m/privilege/src/WALLY-stvec-01.S",
+//     "rv64i_m/privilege/src/WALLY-trap-01.S",
+//     "rv64i_m/privilege/src/WALLY-trap-s-01.S",
+//     "rv64i_m/privilege/src/WALLY-trap-sret-01.S",
+//     "rv64i_m/privilege/src/WALLY-trap-u-01.S",
+//     "rv64i_m/privilege/src/WALLY-wfi-01.S"
 //  };
 
 //  string wally64periph[] = '{
 //     `WALLYTEST,
-//     "rv64i_m/privilege/src/WALLY-periph.S/ref/Ref"
+//     "rv64i_m/privilege/src/WALLY-periph.S"
 //  };
 
 
  string wally32d[] = '{
     `WALLYTEST,
-    "rv32i_m/D/src/WALLY-fld.S/ref/Ref"
+    "rv32i_m/D/src/WALLY-fld.S"
  };
 
 //  string wally32i[] = '{
 //     `WALLYTEST,
-//     "rv32i_m/I/src/WALLY-ADD.S/ref/Ref",
-//     "rv32i_m/I/src/WALLY-SLT.S/ref/Ref",
-//     "rv32i_m/I/src/WALLY-SLTU.S/ref/Ref",
-//     "rv32i_m/I/src/WALLY-SUB.S/ref/Ref",
-//     "rv32i_m/I/src/WALLY-XOR.S/ref/Ref" 
+//     "rv32i_m/I/src/WALLY-ADD.S",
+//     "rv32i_m/I/src/WALLY-SLT.S",
+//     "rv32i_m/I/src/WALLY-SLTU.S",
+//     "rv32i_m/I/src/WALLY-SUB.S",
+//     "rv32i_m/I/src/WALLY-XOR.S" 
 //  };
 
 //  string wally32priv[] = '{
 //     `WALLYTEST,
-//     "rv32i_m/privilege/src/WALLY-csr-permission-s-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-csr-permission-u-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-mie-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-minfo-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-misa-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-mmu-sv32.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-mtvec-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-pma.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-pmp.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-sie-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-status-mie-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-status-sie-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-status-tw-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-stvec-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-trap-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-trap-s-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-trap-sret-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-trap-u-01.S/ref/Ref",
-//     "rv32i_m/privilege/src/WALLY-wfi-01.S/ref/Ref"
+//     "rv32i_m/privilege/src/WALLY-csr-permission-s-01.S",
+//     "rv32i_m/privilege/src/WALLY-csr-permission-u-01.S",
+//     "rv32i_m/privilege/src/WALLY-mie-01.S",
+//     "rv32i_m/privilege/src/WALLY-minfo-01.S",
+//     "rv32i_m/privilege/src/WALLY-misa-01.S",
+//     "rv32i_m/privilege/src/WALLY-mmu-sv32.S",
+//     "rv32i_m/privilege/src/WALLY-mtvec-01.S",
+//     "rv32i_m/privilege/src/WALLY-pma.S",
+//     "rv32i_m/privilege/src/WALLY-pmp.S",
+//     "rv32i_m/privilege/src/WALLY-sie-01.S",
+//     "rv32i_m/privilege/src/WALLY-status-mie-01.S",
+//     "rv32i_m/privilege/src/WALLY-status-sie-01.S",
+//     "rv32i_m/privilege/src/WALLY-status-tw-01.S",
+//     "rv32i_m/privilege/src/WALLY-stvec-01.S",
+//     "rv32i_m/privilege/src/WALLY-trap-01.S",
+//     "rv32i_m/privilege/src/WALLY-trap-s-01.S",
+//     "rv32i_m/privilege/src/WALLY-trap-sret-01.S",
+//     "rv32i_m/privilege/src/WALLY-trap-u-01.S",
+//     "rv32i_m/privilege/src/WALLY-wfi-01.S"
 //  };
diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile
index af67a5357..259eb3f83 100644
--- a/tests/riscof/Makefile
+++ b/tests/riscof/Makefile
@@ -17,7 +17,13 @@ root:
 	mkdir -p $(wally_workdir)
 	sed 's,{0},$(current_dir),g;s,{1},$(XLEN)$(if $(findstring 64,$(XLEN)),gc,imc),g' config.ini > config$(XLEN).ini
 
-build_arch:
+fsd_fld_tempfix:
+    # this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests
+    # https://github.com/riscv-non-isa/riscv-arch-test/issues/266
+	find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fld*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
+	find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fsd*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
+
+build_arch: fsd_fld_tempfix
 	riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
 	rm -rf $(arch_workdir)/rv$(XLEN)i_m
 	mv -f $(work_dir)/rv$(XLEN)i_m $(arch_workdir)/
diff --git a/tests/riscof/sail_cSim/riscof_sail_cSim.py b/tests/riscof/sail_cSim/riscof_sail_cSim.py
index b86f62b55..dc3033ab3 100644
--- a/tests/riscof/sail_cSim/riscof_sail_cSim.py
+++ b/tests/riscof/sail_cSim/riscof_sail_cSim.py
@@ -90,7 +90,7 @@ class sail_cSim(pluginTemplate):
             test_dir = testentry['work_dir']
             test_name = test.rsplit('/',1)[1][:-2]
 
-            elf = 'Ref.elf'
+            elf = 'ref.elf'
 
             execute = "@cd "+testentry['work_dir']+";"
 
@@ -98,7 +98,7 @@ class sail_cSim(pluginTemplate):
             compile_cmd = cmd + ' -D' + " -D".join(testentry['macros'])
             execute+=compile_cmd+";"
 
-            execute += self.objdump_cmd.format(elf, self.xlen, 'Ref.elf.objdump')
+            execute += self.objdump_cmd.format(elf, self.xlen, 'ref.elf.objdump')
             sig_file = os.path.join(test_dir, self.name[:-1] + ".signature")
 
             execute += self.sail_exe[self.xlen] + ' -z268435455 --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name)

From 635a02cf6a8796e8ec106b3cdc4f4b0afad2967a Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Thu, 21 Jul 2022 12:50:02 -0700
Subject: [PATCH 27/36] made makefile more specific, just incase future
 additions

---
 tests/riscof/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile
index 259eb3f83..a9a442d38 100644
--- a/tests/riscof/Makefile
+++ b/tests/riscof/Makefile
@@ -20,8 +20,8 @@ root:
 fsd_fld_tempfix:
     # this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests
     # https://github.com/riscv-non-isa/riscv-arch-test/issues/266
-	find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fld*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
-	find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fsd*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
+	find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fld-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
+	find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fsd-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {}
 
 build_arch: fsd_fld_tempfix
 	riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser

From c7e84f8e40f0cdb6a61410651b307fdfa2e805d8 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Thu, 21 Jul 2022 20:45:08 +0000
Subject: [PATCH 28/36] Renamed variables, moved output handling to
 postprocessor, added remainder handling

---
 pipelined/srt/sqrttestgen  | Bin 22792 -> 22792 bytes
 pipelined/srt/srt.sv       | 100 +++++++++++++++++++++++++++++--------
 pipelined/srt/testbench.sv |  10 ++--
 3 files changed, 85 insertions(+), 25 deletions(-)

diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen
index 06615165395cb6abb37c9d60152d5fe70f15e11a..d2cb80d445718cfa8efc5daa24fefc6b5d462149 100755
GIT binary patch
delta 695
zcmXxiO=uHA6ae6PsoR()?atZ;R#1#kNU;Y?6~!LxR$N>x6e;8qJj5P~^<ec-dXf~Z
z+NhxAAp{XYyi|M9E`lhS6_oT?MWxb9{5hn0aLvV7qSLq8*M)s=zGdFLnPKOaU~UO+
zuf%eOXg<05&n3gtLwLH?9C-K$*18T}yFYsOz~ZiR3#*+YKi^M1KELtFEk*{S`EmW1
zSJRCv&;Kh`)1{Wxgb3`hN7>8llk64tG4?8ZfL&qtv#acVY(I2f8`~B(tn-8&JVB^%
zLv8Hs><C-3H@Mzp*Vrlc4|WgxE4z<ew(&!owC**OatgNZT)yOssUGKsc0HLAPT}>7
zZo0!)K%q3#VTtNPnpg^0EzqcJH-al(U&QYCp+v(e(*50h`-IMcWAT14@lJdYhTV_x
zS%6{8wr4@O&)OwKzBjPkaU>cP3sJ|YK8;uPk)hllrHb?=Rafc)<x`YrDYyPr%A=g7
zo)yYn|CAE6i*J-ulqGevD9>P0_QC=V$<E{(T8l2K`2>eJPOpa(G7U#@UhaWGd`dBa
z?<l^+-xQb8FivPb_0Zfi#;pWaAmRGE>w9-@{Yv3#VQdkGFkP@FH8_nI?P>7bbvv)a
aS)9o1k8M?W_#ksQW=gABWV&B6>6T65Z3JZi

delta 715
zcmXw%L1+^}6o%(DjY&w8oduP8uw6xMwP1rvN=3~IUEPx+nj*a^9-<fPLE=Rs+Li_{
z%0rAN^(4J`=pYo4CZ4k1dJ&X@R1irHN{C?hP&2;WeSI+hKi}}?&CFvKmtk=ko~)%x
zrU;|-^lRPpj0il#4ksQi!TL_MSvWNmE4(<^ou14+Jnb&L{dGds`c5V)T|>sd^$XE8
z%=wj`)&Wn)BW^HXVD^~LF|RS7VgACLXWn2w%G_c;#N6rWN_gN9FZi6Wn_1}SLRsb{
z^A2Xo+~WrfGXG_^n13^mGXG@G=_^_{9bd<USS@93QNKmfs%>fQR-KOf=zd4s9d#ea
z7j{{~t$uuefO@(LB=rYD;!8@b2iTgYS<~4HTHbzxRyv>jG3NMP<A{gX)5pNVXXyf5
zRG-sx0GDwlGXqKWE+e5&_}I)&CXR>k2NOma_G8&7p1r!Mb&bjm`dZJDyX14^J@N+m
zFrBzTKJZU#L3daux5$&!L!0~_X5`r6S5#%3vcyzEO0pb{541)N3?q2Z6pa<UCPyHT
zRXGe3xJ25IpGXz9NgJ3l&qGUHGM^8q&R&1tp6%}yRf#kvOkpXoZ^z*V-f_y{skT!w
bU>0u{$5Y!oJbYc8NLkVjH(Ba?aU}L1h0y`7

diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index a7216b9ff..7cbcd0daa 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -35,33 +35,35 @@ module srt (
   input  logic Start, 
   input  logic Stall, // *** multiple pipe stages
   input  logic Flush, // *** multiple pipe stages
-  // Floating Point Inputs
-  // later add exponents, signs, special cases
+  // Floating Point
   input  logic       XSign, YSign,
   input  logic [`NE-1:0] XExp, YExp,
   input  logic [`NF-1:0] SrcXFrac, SrcYFrac,
+  // Integer
   input  logic [`XLEN-1:0] SrcA, SrcB,
+  // Customization
   input  logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit
   input  logic       W64, // 32-bit ints on XLEN=64
+  // Selection
   input  logic       Signed, // Interpret integers as signed 2's complement
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
   output logic       rsign, done,
-  output logic [`DIVLEN-2:0] Rem, Quot, // *** later handle integers
+  output logic [`DIVLEN-1:0] Rem, Result,
   output logic [`NE-1:0] rExp,
   output logic [3:0] Flags
 );
 
-  logic                       qp, qz, qn; // quotient is +1, 0, or -1
+  logic                       qp, qz, qn; // result bits are +1, 0, or -1
   logic [`NE-1:0]             calcExp;
   logic                       calcSign;
   logic [`DIVLEN+3:0]         X, Dpreproc, C, F, S, SM, AddIn;
   logic [`DIVLEN+3:0]         WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel;
-  logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur;
+  logic [$clog2(`XLEN+1)-1:0] zeroCntD, intExp, dur, calcDur;
   logic                       intSign;
   logic                       cin;
  
-  srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign);
+  srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, zeroCntD, intExp, calcDur, intSign);
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
@@ -91,7 +93,7 @@ module srt (
   // otfc2  #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot);
   // otherwise use sotfc
   creg   sotfcC(clk, Start, Sqrt, C);
-  sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, Quot, S, SM);
+  sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, S, SM);
   fsel2 fsel(qp, qn, C, S, SM, F);
 
   // Adder input selection
@@ -103,7 +105,7 @@ module srt (
   
   expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
 
-  signcalc signcalc(.XSign, .YSign, .calcSign);
+  srtpostproc postproc(.WS, .WC, .X, .D, .S, .SM, .dur, .zeroCntD, .XSign, .YSign, .Signed, .Int, .Result, .Rem, .calcSign);
 endmodule
 
 ////////////////
@@ -123,11 +125,11 @@ module srtpreproc (
   input  logic       Int, // Choose integer inputs
   input  logic       Sqrt, // perform square root, not divide
   output logic [`DIVLEN+3:0] X, D,
-  output logic [$clog2(`XLEN+1)-1:0] intExp, dur, // Quotient integer exponent
+  output logic [$clog2(`XLEN+1)-1:0] zeroCntB, intExp, dur, // Quotient integer exponent
   output logic       intSign // Quotient integer sign
 );
 
-  logic  [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB;
+  logic  [$clog2(`XLEN+1)-1:0] zeroCntA;
   logic  [`XLEN-1:0] PosA, PosB;
   logic  [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX;
   logic  [`NF+4:0] SqrtX;
@@ -235,7 +237,7 @@ module otfc2 #(parameter N=66) (
   input  logic         clk,
   input  logic         Start,
   input  logic         qp, qz, qn,
-  output logic [N-3:0] r
+  output logic [N-3:0] Result
 );
   //  The on-the-fly converter transfers the quotient 
   //  bits to the quotient as they come.
@@ -261,7 +263,7 @@ module otfc2 #(parameter N=66) (
       QMNext = {QMR, 1'b0};
     end 
   end
-  assign r = Q[N] ? Q[N-1:2] : Q[N-2:1];
+  assign Result = Q[N] ? Q[N-1:2] : Q[N-2:1];
 
 endmodule
 
@@ -274,7 +276,6 @@ module sotfc2(
   input  logic         sp, sn,
   input  logic         Sqrt,
   input  logic [`DIVLEN+3:0] C,
-  output logic [`DIVLEN-2:0] Sq,
   output logic [`DIVLEN+3:0] S, SM
 );
   //  The on-the-fly converter transfers the square root 
@@ -298,7 +299,6 @@ module sotfc2(
       SMNext = SM | (C & ~(C << 1));
     end 
   end
-  assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0];
 endmodule
 
 //////////////////////////
@@ -395,14 +395,74 @@ module expcalc(
 
 endmodule
 
-//////////////
-// signcalc //
-//////////////
-module signcalc(
-  input logic  XSign, YSign,
+module srtpostproc(
+  input  logic [`DIVLEN+3:0] WS, WC, X, D, S, SM,
+  input  logic [$clog2(`XLEN+1)-1:0] dur, zeroCntD,
+  input  logic XSign, YSign, Signed, Int,
+  output logic [`DIVLEN-1:0]   Result, Rem,
   output logic calcSign
 );
+  logic [`DIVLEN+3:0] W, shiftRem, intRem, intS; 
+  logic [`DIVLEN-1:0] floatRes, intRes;
+  logic               WSign;
 
+  assign W = WS + WC;
+  assign WSign = W[`DIVLEN+3];
+  // Remainder handling
+  always_comb begin
+    if (zeroCntD == ($clog2(`XLEN+1))'(`XLEN)) begin
+      intRem = X;
+      intS = -1;
+    end
+    else if (~Signed) begin
+      if (WSign) begin
+        intRem = W + D;
+        intS = SM;
+      end else begin 
+        intRem = W;
+        intS = S;
+      end
+    end
+    else case ({YSign, XSign, WSign})
+      3'b000: begin
+        intRem = W; 
+        intS = S; 
+      end
+      3'b001: begin
+        intRem = W + D;
+        intS = SM;
+      end
+      3'b010: begin
+        intRem = W - D;
+        intS = ~S;
+      end
+      3'b011: begin
+        intRem = W;
+        intS = ~SM;
+      end
+      3'b100: begin
+        intRem = W;
+        intS = ~SM;
+      end
+      3'b101: begin
+        intRem = W + D;
+        intS = ~SM + 1;
+      end 
+      3'b110: begin
+        intRem = W - D;
+        intS = S + 1;
+      end 
+      3'b111: begin
+        intRem = W;
+        intS = S;
+      end
+    endcase
+  end
+  assign floatRes = S[`DIVLEN] ? S[`DIVLEN:1] : S[`DIVLEN-1:0];
+  assign intRes = intS[`DIVLEN] ? intS[`DIVLEN:1] : intS[`DIVLEN-1:0];
+  assign Result = Int ? intRes : floatRes;
   assign calcSign = XSign ^ YSign;
+  assign shiftRem = intRem >>> dur;
+  assign Rem = shiftRem[`DIVLEN-1:0];
+endmodule
 
-endmodule
\ No newline at end of file
diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index 7a4e1897b..b72ffb42e 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -49,7 +49,7 @@ module testbench;
   logic               asign, bsign;
   logic [`NF-1:0]     r;
   logic [`XLEN-1:0]   rInt;
-  logic [`DIVLEN-2:0] Quot;
+  logic [`DIVLEN-1:0] Quot, Rem;
  
   // Test parameters
   parameter MEM_SIZE = 40000;
@@ -72,7 +72,7 @@ module testbench;
 
   // Equip Int test or Sqrt test
   assign Int = 1'b0;
-  assign Sqrt = 1'b0;
+  assign Sqrt = 1'b1;
 
   // Divider
   srt srt(.clk, .Start(req), 
@@ -82,7 +82,7 @@ module testbench;
                 .SrcXFrac(afrac), .SrcYFrac(bfrac), 
                 .SrcA(a), .SrcB(b), .Fmt(2'b00), 
                 .W64(1'b1), .Signed(1'b0), .Int, .Sqrt, 
-                .Quot, .Rem(), .Flags(), .done);
+                .Result(Quot), .Rem, .Flags(), .done);
 
   // Counter
   // counter counter(clk, req, done);
@@ -101,7 +101,7 @@ module testbench;
     begin
       testnum = 0; 
       errors = 0;
-      $readmemh ("testvectors", Tests);
+      $readmemh ("sqrttestvectors", Tests);
       Vec = Tests[testnum];
       a = Vec[`mema];
       {asign, aExp, afrac} = a;
@@ -117,7 +117,7 @@ module testbench;
 
   always @(posedge clk) begin
     r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
-    rInt = {1'b1, Quot};
+    rInt = Quot;
     if (done) begin
       if (~Int & ~Sqrt) begin
         req <= #5 1;

From 8bfb23320468af2c4e814999f3508ba6d6e5b208 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Fri, 22 Jul 2022 01:27:08 +0000
Subject: [PATCH 29/36] Changed testbench to operate on two inputs and one
 output, changed all test generators, changed srt module to return only one
 output and take in Mod as a signal to compute integer remainder

---
 pipelined/srt/Makefile      |   8 +++-
 pipelined/srt/exptestgen.c  |   4 --
 pipelined/srt/inttestgen    | Bin 18496 -> 18496 bytes
 pipelined/srt/inttestgen.c  |  23 +++---------
 pipelined/srt/modtestgen    | Bin 0 -> 18496 bytes
 pipelined/srt/modtestgen.c  |  73 ++++++++++++++++++++++++++++++++++++
 pipelined/srt/sqrttestgen   | Bin 22792 -> 22792 bytes
 pipelined/srt/sqrttestgen.c |   4 --
 pipelined/srt/srt-waves.do  |   1 +
 pipelined/srt/srt.sv        |  22 ++++++-----
 pipelined/srt/testbench.sv  |  40 ++++++++++----------
 11 files changed, 118 insertions(+), 57 deletions(-)
 create mode 100755 pipelined/srt/modtestgen
 create mode 100644 pipelined/srt/modtestgen.c

diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile
index 5d7898b17..ee249139d 100644
--- a/pipelined/srt/Makefile
+++ b/pipelined/srt/Makefile
@@ -1,4 +1,4 @@
-all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen
+all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen modtestgen
 
 sqrttestgen: sqrttestgen.c
 	gcc sqrttestgen.c -o sqrttestgen -lm
@@ -28,6 +28,10 @@ inttestgen: inttestgen.c
 	gcc -lm -o inttestgen inttestgen.c
 	./inttestgen
 
+modtestgen: modtestgen.c
+	gcc -lm -o modtestgen modtestgen.c
+	./modtestgen 
+
 clean:
-	rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen
+	rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen modtestgen
 	
diff --git a/pipelined/srt/exptestgen.c b/pipelined/srt/exptestgen.c
index d6bebb774..61fe74aa4 100644
--- a/pipelined/srt/exptestgen.c
+++ b/pipelined/srt/exptestgen.c
@@ -96,10 +96,6 @@ void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp,
   // Print r in standard double format
   fprintf(fptr, "%03x", rExp|(rSign<<11));
   printhex(fptr, rFrac);
-  fprintf(fptr, "_");
-
-  // Spacing for testbench, value doesn't matter
-  fprintf(fptr, "%016x", 0);
   fprintf(fptr, "\n");
 }
 
diff --git a/pipelined/srt/inttestgen b/pipelined/srt/inttestgen
index 2f7e6819945a6b827154fdc1c264eb7d77a29312..9e65cf4a031da28d084759b76e92bd9fd7e4bfc3 100755
GIT binary patch
delta 1043
zcmYLIO-vI(6rR~a*(oh;5h#?VK&t#SLP35?lt>A!SwvKj5;gvCk(;I$;(>sTC6ZFq
z>Kw$xgI-Kbh)VW?2W^ok#e?BMh{Q_|Hfe%lZGvf}u)b}#?Ig3``}yAY-t6oK{W$2y
z{&k{JqSVqn+6ri0inQZW*iwdzzQW&2*13_1h2)Ki=j+=;4-Ioa-jLt#`FX~e$Hs^a
zajVO2{)4;*%gdoEpr?_7dfZHlSbg~=pCif5b3XegH|KRkR~-pab-R<=Sfg{`8|QOI
zIp27g^EqOicQtB)q{orKp2-B6`si^a!Ag>;7n%ICeL?WAtvESvg!BBY=Dd>$&N~jJ
zAmbeX8E1^fSi{kqz)Nd8fUqe_cY-v?fcrUc9qeddniHm-#K0Wcrz6D34Z{3DUV=P|
zvbko_F0kWjb1<&fs1QaduD-TBrmhcxpD!LMuD-Z@b!DtcT{SJ%(g3L_8_ugnPVRoA
zh!@T@h!eBXb99G%ZF##V2=l;eP6~nxa1>B1EQ2;dz`6rL2m`(b6x)5LzZ56TOHgHj
zD6$u5Gyu)8Hz1ZT#$X+WZHKMdqeF$YJ!e5w3&p6XNOO<rRmo*atSn%W0YY=IyP??(
zU^dU(71OEw0)$`EcZF4$=eG)V3>{V4XiUX13Bn&Qx>ZUNaPO%_7frnix8&^-{=oDF
zXZ;AFvz2CLCLIA99j&ZXGh(UeiEB4a%DF%~U%bJ0)lJKlyIGPAJvNmqaz5Q@JV}uB
zndZ|)%OM)ZCH$49E?kkFF4Z9YkRdf<fu_SQc$UhvC?q2P*P@qUH`TW47?{M|x(N<$
z(;dsqlaauAU@@xf*qL~>6o<2)Zj)oIq=W6Zp~qUgg^3aRul*Ex!rD^o8QRy;4X=>S
zc08!iNTX+TWO9VGsTo_WI+&r)dZ!_EeL(1kTrpB1qx3c!rJ`Xe-p~KBT9nvHYwQ+9
i=J3$xvb}?_QXiH+wOdK<zuvN26(K1XQt*05t?WPN_%N~n

delta 1109
zcmZ8hT}TvB6ux&>*O}Q}M>lQr2Y>J<6?E5h-KC^f+=*){7`Y&7<&$rT4@vdl53E$<
zS}3<b--4bh&6Odjmt2uD3&MvGrI#fT2I^tkmloaJ&fPz&3un&#&iT&y&YXb>%;CTs
zp4%3Bj3|D3=J(ce{bB<6MOXadr_b=$yc@$ywn)`Y^29vV`sV!htBt`SO<mwhGI$&C
z$mV)LeXB3W`aVEw{d7O829MHgzVOtDPmYoW;&Z>7B8seS$`RJ}dpC9QC6jWHERtX2
z8c{sk#5WGsy-R!^jrhib#3zS3ln7BawF;>4$`R}xj|daXUO5W4s4y|hC@g>PcPuuy
zwc#PkKJosr5oH|8#vlcql<r`cBD1a`phJ9zRZKxg;u~Q2{n~W^S}*iSNT*4IanfHL
zTY|$zOEq%-&oL%AyY!GR=Cmj+5K21d*@~3S*T6;Er)YXuEe7|nR2K+KIVK%OC@fuD
zAC_u90M0xFa#-qHzq&EpAeBzDS|pXyNul6mUPc-6kO%e5U+Jo4FU@NbeJOly96gN1
z7JwN7KMKAK6j<#J_yG7Qj28s|;wTnFptwty#dCGHfo+8-a%bh_fU|);f><Vt!Knmu
zgKZyBv!S{z8=6%^F>1@sxy!Yi^ty6v%;Hfogl56=Kx74cI?p|4>3Dt#!cXZPLn-#F
ziv|lvLngvGMfGg<NWS2NMX}?es}=2HKu93Fs2){(#A#e5(wU+<V=7$*MRcgBNa9#2
zL+scSq+<xA9`hHq_4Kq}u%}3%Q}i#Qd_$$+O6Fy{YCS=NI8PmHXvNqGFkdGj0^WlC
z3}xgZ-k^T7Ht2AG<~CV4(5-9Q0f3t=XHx3%iQs`V;K7;FL#E<o$vC{}rDi?GUG%d3
z76@$Ft(;Ay2lk7?67Pz$S7=Ae1y~rJX?a-ClbkcvN2f<97qI~w9Tu3O!!cPK|4=~a
zgq#_f&_Hsl^~s{%Wc<GR(_uB@$F$sS6;mGqbo$*b0!iu+y6AQY$A5-Bw?pI&u6QR{
JL!n%)`va8jPiX)E

diff --git a/pipelined/srt/inttestgen.c b/pipelined/srt/inttestgen.c
index 17ec9299e..8c83d7963 100644
--- a/pipelined/srt/inttestgen.c
+++ b/pipelined/srt/inttestgen.c
@@ -1,9 +1,8 @@
 /* testgen.c */
 
-/* Written 10/31/96 by David Harris
+/* Written 7/21/2022 by Cedar Turek
 
-   This program creates test vectors for mantissa component
-   of an IEEE floating point divider. 
+   This program creates test vectors for integer divide.
    */
 
 /* #includes */
@@ -19,7 +18,7 @@
 
 /* Prototypes */
 
-void output(FILE *fptr, long a, long b, long r, long rem);
+void output(FILE *fptr, long a, long b, long r);
 void printhex(FILE *fptr, long x);
 double random_input(void);
 
@@ -28,7 +27,7 @@ double random_input(void);
 void main(void)
 {
   FILE *fptr;
-  long a, b, r, rem;
+  long a, b, r;
   long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255};
   int i, j;
 
@@ -42,32 +41,22 @@ void main(void)
     for (j=0; j<ENTRIES; j++) {
       a = list[j];
       r = a/b;
-      rem = a%b;
-      output(fptr, a, b, r, rem);
+      output(fptr, a, b, r);
     }
   }
-  
-//   for (i = 0; i< RANDOM_VECS; i++) {
-//     a = random_input();
-//     b = random_input();
-//     r = a/b;
-//     output(fptr, a, b, r);
-//   }
 
   fclose(fptr);
 }
 
 /* Functions */
 
-void output(FILE *fptr, long a, long b, long r, long rem)
+void output(FILE *fptr, long a, long b, long r)
 {
   printhex(fptr, a);
   fprintf(fptr, "_");
   printhex(fptr, b);
   fprintf(fptr, "_");
   printhex(fptr, r);
-  fprintf(fptr, "_");
-  printhex(fptr, rem);
   fprintf(fptr, "\n");
 }
 
diff --git a/pipelined/srt/modtestgen b/pipelined/srt/modtestgen
new file mode 100755
index 0000000000000000000000000000000000000000..ad9c327574d70ba9aff77831c85afb8ba784fa2f
GIT binary patch
literal 18496
zcmeHP4{Te<c|VepEIBeoCvoCBY5da0h+{*P<l3H`)-x%K@>45Ys!gTNR_B=(A0;*v
zsgRF}94x6&rEWRbt$}T=`v;gd6h?{;Td<);P@s)mH?i9eSQ@llv22)9B$>RXh~_P+
zmN?eF@9z7^_a5KNf&lB5g=drB-S7Kzci;Va_l|e^WMXI_7zhXsLGil+Q*$jb38`-(
z<~agrO!SI2_`XHFSF}L7Nn-FNY0N>F$u!ee$rU056!m(83g`{SWQ6I>3KmScgh-o-
zr$~%aibmR1lBp1y1W*sly0k_nfgY6+rfDk23{lp@B<hK<6bE`#MTjm?!I*OWux|KC
zlD%Xw1}RL(kcfWyU85+AVdMoXjq$0~G6^&$BTSPD7ED?1qo60tN&NqgL`dKu<>%*@
z2$i2HRabQ7vd4CA@5-e+bJ=`(x^udBXXnoC(NZD0P1YOjqwKx|qcSIKZ+0z~`Fi*u
zhWzV)J$$ZwD)v`jUA)qH-`Bb_fBpw2I#3pylG{y2$OPiME}!JC0jc_M4M18wEbayD
zS`Xax!B6?%pZ39T2V8@XI~V|X<@<X+_`^Q<PQW$zxI+;DFZ&MwzE-RkM<Yk23inR|
z?Kg?dwNn^_$^Ay!ESlrll4TZ?`}gJwc{7<hmNSK6j87KwM#)MQEyGZvo`PU*OqnyM
zvzEvdrp&y^OqH!Mkr~SsN~S2L@@Y}BAfuwlJW$M9Ch&{dyp@4mP@Qs_Ol9+8-_T&+
zUSnHyTXY8&I{<x0{o|OT$O(uMkl_AY-jHp>Zt|zHwn8j)9F;K~8`zgPEZT4$L&#gQ
z;r8*gY{O}2N>l7toRiumqQ2IXzgIz$`#o|r8;<>-!)6<9pWnJ|INIXSYs0H;3e1=d
z2S-$cZo_eGaQOF*TVB&=UuxA)Hb2*<3H{UaR&eEMef9^f=cUNXj$Z=2vgvvF?${WE
z7^WwXcWGe-!lr+~G`jZE(=z=Grm@PG=4JXvn8vDJdQ7Ijk7;!Ir8$}Y4yLinmrl#{
zUt=0we`yBNSiYWL4_1D1k6!tuKKuOANOJJP{8C#?=oik-NOIxCDk#^w4WRcQr&0cV
zCfc#_5wIY{Y5mmh`+%XJYW@V0UCY)wsOpQYs;X@(3mqG0Q1CoakdI6G9d{zR<Enn{
zYDljv>E{;r=z*v7pIo)B2Ls<F1Fb6ynU0P9HGRx~X7?J<73J$j^~wv24?-=TYJMFQ
z0!PlDKA))-4*iMpzxqQI1fS377ZNk*N}!)bmjOMEt^s-mM>o*9_bB@4t%^RjMbXFO
ziq4NKdT|Bl_tvkDi6=I|*JA1F%8Fh|Ea|fkEs64Gs6z_tP&xM<4FxgRC%RGa6Cn5m
zCKpE_b|G;EqT*L~pG;iQ&*&%npPT_Jix(2h67D^jSgyef`pLwC{$>3)4=w7I(O>8%
z_rqs&Q9oHm&XQiau=svZn@ubSW*=H^d-(gai7ODh()RG5G5>3CtgM{=;d$kdmo&Zd
z(6U}BgDRM*><2TWOCSo^&(Ye)!J0}w^#snai<6+PvaHJVx@88H3>Jy);M0ih;OJi5
z<S0g@S`5nmdmjZ)K>;2IS+Uq!6JGof#4mSjnZMi-S%VYK<&JyKWjZ!rgmg2y>~hE9
zb4P3)6x%=kvBApU$L|`fyc!>kSKf#xhfZxuHfds1Kh=r!(9j*wJ6>vC9KHgzJNHJ&
z+Sv0f_1B@wYeSV+`ztTUSFZm@efE4n-}O)Bf5q|p`$ys*j~|KO6F2OAPL2;K2U0oi
zX{6(PY7Y;H$wJyPOV<77m{lm2n76l3&ZYCWTbi7~HJgN%$>z+}VpDg|&RlMq<&n;>
z{BumDm-Ym13VjgQE^Y96?DdtEGeEncT2nxe0>xo_1Nd+r=<U$|7J&XGQ1s=qKsBHd
z7*R2xI6r(AC<bgn41os^iokRva8vuL*3&2p7%mhZhrGH_pnp{av2OVM6fhiJ7;b|P
z3}X=iYzC+ZM+U;{KiqN61FbV+&xQ|f`M_=OLl|x4;Bx@Vx)$?}ha)F~d)Kst+5?cK
z<bMv}Jy3_w0Il+apF+0e{{_Hjf!}H8Pe``p|1IEu1N^Oaen#=Jj4#3GBJfS*4}>FM
z2qwboKOahjwNp)r@aE4o>*4MbEqb_jcGbRcEFbQThr8q9&3$35FTB1l9O(<UO22G}
z@-KlM_5qq}g#X_jm{}E*6&xW-!v}|#3`|ET-A+`r#AIv@rI+E+ivgcT82CNvCY2P>
z)n(W~beZ0_aQt9k{wr5k3Yb0<j3KGM+bxGwn%7|SL6zpU*EfX<*AQJ#bRAJ9_!O0a
z);~PUJxVao8@#r<k?_OKF&Sfgp-H8=+|wk7^B4xsAEqc-pLvppDN3f<uS=BvI`NhE
z|Kc~j$GS_fliGWL=v_oJMDHhhg6J2CqMdzv_ui>(hBv!AwBG2BXt%blds|QUj-Kt>
z=7VNh(^HnhJA1nCKzVFKoRhH=?u*|R_`p<8_qJU^{2ke3x)EhCD7ofhIiJtw$F=dX
zF;_-HxMfrMeBoF&?<vcDnC(vG%HtrKPfeP4YNUwSa$i$&TlNiqOl(O&hvGg;xa?vb
zsDMG1bt}9;jooXE<x-^yA+A##;%ynr7*oZ91<%=RA&&!t#~9fg7(SRB9JpKHf==Av
z(A_HElMWv?;)f0;Kx+8FA>g~pvkJ#Rb&i+ixm9*!m#xBN)-p0hsI@UwfcG*J-zi<T
zT5uJ*7Ctg4TdHnFGnLDZ=Mnd7&pB#OrtUUsv}dC|y~c85eB!*gwJ|;$^DXp)TO0GO
zdiuQUo}W)Q)<e+$x4h%lL%5#9^)3F)<}N?C>CdQbaZOf}^<EFSbsFof#`+oFn;P#!
zaqscsdWUJe4>jIz8}GNchEmU?cl~|^ZiYDapHe;5^Dn&j<R!yArd~4qTxaSf!+WbY
z&>Fx?hWA^=>KL}1+&^`fd4crt_fQOe>w=j!#eaq9J7pgR_p$y5_SIznckP9}rF}J2
z{B7>5`K%*l`MFJBpf-Ig>qxiGo7rc(+-Kg3eYcKZf8_l@rj7pi_V#&y+x@Z8o^R>B
z;~g`<G}^P#p8uEMBj4@Mf3I~RKaZKdT^U|`-t;DGP+ogB(;5_(KW`dB@Pg4BjMd46
z;H**76AuZ?6K|@X53m~x!Rl+?Q@%y;`2nZ=D&h6Jdk9w3{CmAqzD+DN#q5xbhs0{Z
z=L77_5U#k%e@?tzXrB0*>U)M$J}j#1eH$YLtKU`+--qDsh0llE#NmAPT5-`+{@Ut!
z4X6Bdg3oC<aX2LrK5B<#JS5gt&)3?SA^eq^&RaO~8^wqxUOS)cl&}8sXNRF)8^ly(
z&Vj+JtY{MdCVwNhGj?MlxTboRfO-3U@D$;^7G?Re4}QuAKkI{^_rag_!GGg}w_}B&
zUhZ;kBAoY#Q60l>!uefQp+em4W2fwc|A`O&Z6EwuAN+M6d?T)wJnHo!AN&a6?)pAL
zIKOvueZK*?S3mg{$@4oA%m0vYo?jV%j&QgC|I-IwhYv50dTsN;5BcB)AN&hG_@h4f
zlZ3nL`#j<N?#A^EG^z5tagA`de|7`z<!2r6Ae?YnXo<0DNk2?@g!V3&d4zC2uf})@
z@Bo~;nxOdvyAy^_NxXg@`v%#WaoPWtl&_x;e+sx)`TgqkoV2r6w717(IqY9vc3uW~
zys=R?-@=AM08g{J=G!*8(NJH`n<ZX9|943{_49B7@N2>Uc!C~-R0lc=5e)%XxFGTR
zdHX)H^QG39)R1RgAvO(oMBFHnu6h2GQr=}8lEa7x>z{Xj<RgF92Y(W9%-8*VdB#Wn
zUjWzYiz8kId~L8pL|o6!5d4U+*7p3B%?J9|0*?OVab%LsBy`9K^v@}mAKr)ZFc0v2
z1=-2)V~C)gxt5qrcPUuh3G!&i?au+qm-qVQS!B@p2;sL-`*xC@B;g++d@JE8iBrNx
zEFb)Y5B?Rv*9LDChh6nO3%F*p%h8+BPVkz#_0qhL{b$fV_~9toZ)HPW<x&wZqjkZ_
zp)R<EX6)SFSv1oVDNEf<>+0=_Mr;2;60WU{6|J6VK}6|rpOMXHt>~CLZJ2Ol$t4q=
zC|GU`<-G7{IiJhkYvzu7D)=#@^0HNV%2UaQ7nPT;Tt$_|axy^O%8nUDGglb1*Vl<V
z>+R$@>*?gx`@zZg@P|`C8mRY+Q>flQPQEk(P|>pc&?)2cmy;<CIQ?d%bINHc%fxGH
z=)|e4sfto-C_gdmM`kY5DZ5$WP)z~Ay1rM#upnNt&Ngt$`|&({?^3MydrhF;=QW<U
zziWJ#uWJl1KdXy?W2Lfki)_Z43+SF;mF@74jhO*Q*R@T}v}B_$OampW3^%gIMD6~g
zVNJq8UAO^Rf_sVSf-#;e982YlG~R+SQsrqeR+ya1nU<N3{?4{tJG_Mq%)>|(i>c#K
zO{;iZsDriX^5o=kP_d<CO&y|i^Jd)4L%|GV;9z`z!blwG#|xNF>9jEV?>-RUKe*S)
zQGPQFID>5@bh4oL9~8#Eq2a#xkTE<ka43;9lJUNw1X`8%FVU}9?l-*=S}0p^t5X<e
zI%TEkZf7iJzpp88eLA7K3!ZK^8dLW_`BJD`!g&wXNS6x61l%aaJF0`jpqkF+jdIBZ
zkJKuI<4yPsN{~BNDp8*D&Z)eNI$=)Bo2rFL@HAFb-eh$a2ImGRqVBUgF~fkHwA^fX
z#n$uQEZ&NB>ZvQTPE15g$0x1SF`!ma(Fu;pliKjK$iorxXuNN*(@Kp~YCK<#9xG?z
zl6Dp*5=>3t0rY74cpmI2Y84gdezRD@^XF2G5(e<#S&>2oicRI<@hANnwajUV%MKGQ
z7G!sbniJHuC(@wHDW#$MxYFP_7*0)Q;mJFWk5%}J$_$h-Xde-U(KZPqNR>-gCYrLW
zV)j@W?w;`?;O$equL4W8>g_k)uj0Q;!1q-+2b8sk_d^2jh~R4<2Hw}=d;gk5YhS!>
zWTXcoc75LevdG|VEJ)O?-w*h!aHoXxKNE~e$ull}UT0R<O;QT(u>pluShf7~!5E|j
z?5jy4da1&&{E~iReZKE<7vS=hQHfXg-IfuA@J&Uk-){fi101{~!{z7wxvx`2`Th`>
zpXHd|3;KBHhSR){H&6N+<?Jp$!Qw%PpeXC}e%}J=Pm>_$@2>wNq~Aw!yf1i|6dtwd
z<8P*J{m%o2KEd+I&=Mo62;ZktDOFaEBdx&4uCI~4Mh0h4$QI<80Z23bV~E@J=SY8!
z^hc>3*#zrx`YSGd-nU$)i&1M>kZ6-DJqKybpZ(AKo2_)wYr-YZ{$m*ZYuD#}Q2zTF
zNfspP&i~skecms<Ncrn%+!oyV&jT+EA9fSpgX6!a;g;t#Q@rbmXBt?a_XC#6aV!TS
zGH{ydF95}+V*TpA0Tv3v0+pZp7wd65J_j-=!utI85*Fy7g$6u(V11@90@JQv-S?#o
zM(oNEXFaAbyY%_}eT4Mg?M2n8T789Va{2lFFiHAEs`6SjrXsHaL7Uw!ob~zrnc18_
z(-1`Xjv&hLzVsuM@#F7hO+~%x!+qZ_8H=ja4D~~N4jqIK`;F&c9A;d1rJ=s(dlh|=
Z8=jb4ZkENnTweNT_b7??xD;GT@n1qa+-3j(

literal 0
HcmV?d00001

diff --git a/pipelined/srt/modtestgen.c b/pipelined/srt/modtestgen.c
new file mode 100644
index 000000000..74d66f92d
--- /dev/null
+++ b/pipelined/srt/modtestgen.c
@@ -0,0 +1,73 @@
+/* testgen.c */
+
+/* Written 7/21/2022 by Cedar Turek
+
+   This program creates test vectors for modulo
+   calculation from integer divide.
+   */
+
+/* #includes */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* Constants */
+
+#define ENTRIES  10
+#define RANDOM_VECS 500
+
+/* Prototypes */
+
+void output(FILE *fptr, long a, long b, long rem);
+void printhex(FILE *fptr, long x);
+double random_input(void);
+
+/* Main */
+
+void main(void)
+{
+  FILE *fptr;
+  long a, b, rem;
+  long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255};
+  int i, j;
+
+  if ((fptr = fopen("modtestvectors","w")) == NULL) {
+    fprintf(stderr, "Couldn't write testvectors file\n");
+    exit(1);
+  }
+
+  for (i=0; i<ENTRIES; i++) {
+    b = list[i];
+    for (j=0; j<ENTRIES; j++) {
+      a = list[j];
+      rem = a%b;
+      output(fptr, a, b, rem);
+    }
+  }
+
+  fclose(fptr);
+}
+
+/* Functions */
+
+void output(FILE *fptr, long a, long b, long rem)
+{
+  printhex(fptr, a);
+  fprintf(fptr, "_");
+  printhex(fptr, b);
+  fprintf(fptr, "_");
+  printhex(fptr, rem);
+  fprintf(fptr, "\n");
+}
+
+void printhex(FILE *fptr, long m)
+{
+    fprintf(fptr, "%016llx", m);
+}    
+
+double random_input(void)
+{
+  return 1.0 + rand()/32767.0;
+}
+  
diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen
index d2cb80d445718cfa8efc5daa24fefc6b5d462149..45fc6e7868a87d7e5f3b9308e340b3600a392273 100755
GIT binary patch
delta 1437
zcmY+^Uu;uV7y$5d+I4NWdv|Yfr3~2CVK=)?SaFgKA#icM+r^NG!DhtZWG~g}OMM_U
z{$Vp?x@mMXpONuo^#zC~EZhW0G<2234vn%IlBv8jC`l)pU|%dcxPIr}uZy0f_nhB%
z{(R@0-rULaeDXY>S@dfGW*6;)pR$3B;vl1Fb|V94`HlJm(ND)e8+rKp=J)G)=H#`G
zo<s2=bJjKF@rCO&X5@2oaNoN;n)NVNQUVj2WmzRf^;{$y<a6YM<kRHC<d4Zm$tTG#
zkf+Hh^4sKB9KYxpbpraB$o?u}Qs5Y2Mj%C)6?ln|5l9lw2n-YE1db3c2@Jwze`NGA
zCp0mC-#P9h<OOmEd6B$@{3CfIxj<e=E|P1=cgT-dIoDfuJnAV?fYE*YM)r|!l2!80
z<PGE>$YJs~<S6+oatxi<%a)t5%Le}8*(dN(ghEY(9|ceCFuwrL*KT!{-SAQEF+Ks3
z)W~0h4ylR11N)`+*o1gVrt+7>8&&+{Z~M0Ng7L0i#%Qq~qfYzlY>gQ41$-}chVq=T
zq|&7q(!bnZkC(0&@FDL6RSvFy(8XB#MqXbPE8b4py~X3OOK#$W@QmC!IP>3()tGTy
zU#%K{W3~JEhcj2NB#fzB#e?=fI_AZy60TQLhH^<Ix`6hGbxX?O+_P7b+mz-LA`4TR
zH3p0FhEPNevxM>9R#6{1(0<Dp|J_#%-8C)wJHCDFFUy+6(OTKEQdp<37W3QgTGl+)
z1FM!*z}k1uvKXFl8SW{&+81zE7e-m!tEyPV@b@dm@CC3-1fOyI6<}C>h!4S(+T8No
zKbDmfz5D9aw_JU4wd+gC>tk#)hCUZ@xT-eubLNWbl_3=f!<|NhN6np0(?Nd}uSb*^
zEOqz!=VV8%X5*vZ@-+tG&mEmor89_AV61af9bOe<LZY<VW7Z_bq_$i)kMx|V_P4H$
zS?b>Oe?fFl#2=&n9k8_b0Vwc#^YuNuIgcWPZiV$?9oks=4%yl0;bs9=R!o<+&CTav
zdtZpR!v4NxY`4Ti4VBGQX7DO(xnvfEr{Xx$9Ep25-wiM8X`X=<y^DvTq5nyqgOUEd
znEY&iwn3wvRhDS0W|&fJ>CJ%<ra3h*-Ec<I?CxtcD2-#*F`1>!ea2~t>#*{4*V;Wt
gp(WAd@1<pXVHj08Sq45y^wgB;dyD6~nP_wW3-1oZiU0rr

delta 1465
zcmY+^UrZcD7y$5@<BsECm)&c@JJLeITw4Bw(v}k9pTJRW>DC%W^pcCgdNmOX4+bBa
z^Pvh2ZG|+3GoKplgIZ0)A5A3P2OCMaBQ?D?P1D0PrP`QY+BCU}B*;roq+P$+{lGfO
z&g}1-Z)SF9Cp&ze51;4pv?uCiR*8P{Yr>mUY$TPSb(4fie!FDvM_+cGY5Z<$-^8uW
zFD?FZbn4yJo5q;qkgFhA9AtWGE)9>}3j=ptjIAi%lR?uoR}|H?K-S6E$(`gBIYLg5
zqvRjSN60^r`^n?v_igvNzOn<_pjiD3VOZcZLR=tD7!x>6_*&o;VM5?IVOroA;etR6
zW<7y{*X_{B<YW7FyU3TxFObvZedJr@XUQ4z9&(mkOI{|w<+QgqXM4c4i4qv?M<Ka@
z>>#V;Rhqz0ULjYK{~%YB?~-?+Q(Df<V{G;ae(*#eR$0gt2KXSjiVpLe@J>;^!|#O8
zi;nVhAW7wX9BQQsJ_QG)n(lMrkc^}*h!d4vTD5jd%jl=I97eO^pxO7A5^su(8O#uY
zpAc+z>dC>=)TNJHN)(FRQTRrx+c(1*>rooCOzhs`Q}}_j3|4ZzL+sa~IXse9K+9l`
z%ZJg}?Uc4AW?$^Fva?;F$QAq`?3C*cPCvQeZ-1NSkhbO=`e3c`!%yQ^E{64y``Li?
z4z)?K3Zqqel<Lc9*D_AKqD1E2xY!eiY1!{{s+BCPe_SuxS6{5TuMhoMkbRgl?#S2p
z?&w3)9K#WnIn(UNwiDYE*yiK9No;qlnPvvtl1HY=@GbAb@5*yE6F8~?qb%fBRcv<Q
z^)trsds`)dbqKE{w5dCI3-qens#^as%_Y&>R;-?Kw9EO9tmH0WY%7Mo$4i1qbsL{C
zuBdJqmdkan8_!gXmU?vD4pBN`zOmhtmThT9`QEF1gH7;PQ=PQl*@Q!2ux?8+ZVh8X
zqG(pmhD1*P(>ILn<`elIZ6jyCam)V&)hz)}C-sM6zI8KXc!_bW<$2C^WNDtlBG~$P
z5_quG>1BS^a0GYe@igpi_i+slv@hU&RmfMizPR-~SaCey%ptQTcqN3HMsLW?c{jYL
z#c;9*S_2P4S;xzK33@wPak0scL|K%cWPOV5`HXp)c!cBoeYngQ`$x-eNI|RnIwi$0
yXA_gzw?>;jF7dbF!D|g0`>I1#xY-k-X(MnL_21w3Y`D45uiDe$d)*Ch&-)idgXL=g

diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c
index 76c6a6649..7a45449fd 100644
--- a/pipelined/srt/sqrttestgen.c
+++ b/pipelined/srt/sqrttestgen.c
@@ -86,10 +86,6 @@ void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac)
   // Print r in standard double format
   fprintf(fptr, "%03x", rExp);
   printhex(fptr, rFrac);
-  fprintf(fptr, "_");
-
-  // Spacing for testbench, value doesn't matter
-  fprintf(fptr, "%016x", 0);
   fprintf(fptr, "\n");
 }
 
diff --git a/pipelined/srt/srt-waves.do b/pipelined/srt/srt-waves.do
index 1e0c3f281..2fbf40c18 100644
--- a/pipelined/srt/srt-waves.do
+++ b/pipelined/srt/srt-waves.do
@@ -2,4 +2,5 @@ add wave -noupdate /testbench/*
 add wave -noupdate /testbench/srt/*
 add wave -noupdate /testbench/srt/sotfc2/*
 add wave -noupdate /testbench/srt/preproc/*
+add wave -noupdate /testbench/srt/postproc/*
 add wave -noupdate /testbench/srt/divcounter/*
diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 7cbcd0daa..1b61bc14f 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -47,9 +47,10 @@ module srt (
   // Selection
   input  logic       Signed, // Interpret integers as signed 2's complement
   input  logic       Int, // Choose integer inputs
+  input  logic       Mod, // perform remainder calculation (modulo) instead of divide
   input  logic       Sqrt, // perform square root, not divide
   output logic       rsign, done,
-  output logic [`DIVLEN-1:0] Rem, Result,
+  output logic [`DIVLEN-1:0] Result,
   output logic [`NE-1:0] rExp,
   output logic [3:0] Flags
 );
@@ -63,7 +64,7 @@ module srt (
   logic                       intSign;
   logic                       cin;
  
-  srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, zeroCntD, intExp, calcDur, intSign);
+  srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Mod, Sqrt, X, Dpreproc, zeroCntD, intExp, calcDur, intSign);
 
   // Top Muxes and Registers
   // When start is asserted, the inputs are loaded into the divider.
@@ -105,7 +106,7 @@ module srt (
   
   expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt);
 
-  srtpostproc postproc(.WS, .WC, .X, .D, .S, .SM, .dur, .zeroCntD, .XSign, .YSign, .Signed, .Int, .Result, .Rem, .calcSign);
+  srtpostproc postproc(.WS, .WC, .X, .D, .S, .SM, .dur, .zeroCntD, .XSign, .YSign, .Signed, .Int, .Mod, .Result, .calcSign);
 endmodule
 
 ////////////////
@@ -123,6 +124,7 @@ module srtpreproc (
   input  logic       W64, // 32-bit ints on XLEN=64
   input  logic       Signed, // Interpret integers as signed 2's complement
   input  logic       Int, // Choose integer inputs
+  input  logic       Mod, // perform remainder calculation (modulo) instead of divide
   input  logic       Sqrt, // perform square root, not divide
   output logic [`DIVLEN+3:0] X, D,
   output logic [$clog2(`XLEN+1)-1:0] zeroCntB, intExp, dur, // Quotient integer exponent
@@ -161,7 +163,7 @@ module srtpreproc (
   assign D = {4'b0001, Int ? PreprocB : PreprocY};
 
   // Integer exponent and sign calculations
-  assign intExp = zeroCntB - zeroCntA + 1;
+  assign intExp = zeroCntB - zeroCntA - Mod + (PreprocA >= PreprocB);
   assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]);
 
   // Number of cycles of divider
@@ -398,8 +400,8 @@ endmodule
 module srtpostproc(
   input  logic [`DIVLEN+3:0] WS, WC, X, D, S, SM,
   input  logic [$clog2(`XLEN+1)-1:0] dur, zeroCntD,
-  input  logic XSign, YSign, Signed, Int,
-  output logic [`DIVLEN-1:0]   Result, Rem,
+  input  logic XSign, YSign, Signed, Int, Mod,
+  output logic [`DIVLEN-1:0] Result,
   output logic calcSign
 );
   logic [`DIVLEN+3:0] W, shiftRem, intRem, intS; 
@@ -460,9 +462,11 @@ module srtpostproc(
   end
   assign floatRes = S[`DIVLEN] ? S[`DIVLEN:1] : S[`DIVLEN-1:0];
   assign intRes = intS[`DIVLEN] ? intS[`DIVLEN:1] : intS[`DIVLEN-1:0];
-  assign Result = Int ? intRes : floatRes;
+  assign shiftRem = (intRem >>> (`DIVLEN - dur + 2));
+  always_comb 
+    if (Int)      Result = intRes >> (`DIVLEN - dur);
+    else if (Mod) Result = shiftRem[`DIVLEN-1:0];
+    else          Result = floatRes;
   assign calcSign = XSign ^ YSign;
-  assign shiftRem = intRem >>> dur;
-  assign Rem = shiftRem[`DIVLEN-1:0];
 endmodule
 
diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index b72ffb42e..513305b26 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -42,24 +42,23 @@ module testbench;
   logic               clk;
   logic               req;
   logic               done;
-  logic               Int;
+  logic               Int, Sqrt, Mod;
   logic [`XLEN-1:0]   a, b;
   logic [`NF-1:0]     afrac, bfrac;
   logic [`NE-1:0]     aExp, bExp;
   logic               asign, bsign;
   logic [`NF-1:0]     r;
   logic [`XLEN-1:0]   rInt;
-  logic [`DIVLEN-1:0] Quot, Rem;
+  logic [`DIVLEN-1:0] Quot;
  
   // Test parameters
   parameter MEM_SIZE = 40000;
   parameter MEM_WIDTH = 64+64+64+64;
  
   // Test sizes
-  `define memrem  63:0 
-  `define memr  127:64
-  `define memb  191:128
-  `define mema  255:192
+  `define memr  63:0 
+  `define memb  127:64
+  `define mema  191:128
 
   // Test logicisters
   logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE];  // Space for input file
@@ -70,9 +69,10 @@ module testbench;
   logic        rsign;
   integer testnum, errors;
 
-  // Equip Int test or Sqrt test
-  assign Int = 1'b0;
-  assign Sqrt = 1'b1;
+  // Equip Int, Sqrt, or IntMod test
+  assign Int =  1'b1;
+  assign Mod =  1'b0;
+  assign Sqrt = 1'b0;
 
   // Divider
   srt srt(.clk, .Start(req), 
@@ -81,8 +81,8 @@ module testbench;
                 .XSign(asign), .YSign(bsign), .rsign,
                 .SrcXFrac(afrac), .SrcYFrac(bfrac), 
                 .SrcA(a), .SrcB(b), .Fmt(2'b00), 
-                .W64(1'b1), .Signed(1'b0), .Int, .Sqrt, 
-                .Result(Quot), .Rem, .Flags(), .done);
+                .W64(1'b1), .Signed(1'b0), .Int, .Mod, .Sqrt, 
+                .Result(Quot), .Flags(), .done);
 
   // Counter
   // counter counter(clk, req, done);
@@ -101,7 +101,7 @@ module testbench;
     begin
       testnum = 0; 
       errors = 0;
-      $readmemh ("sqrttestvectors", Tests);
+      $readmemh ("inttestvectors", Tests);
       Vec = Tests[testnum];
       a = Vec[`mema];
       {asign, aExp, afrac} = a;
@@ -109,7 +109,7 @@ module testbench;
       {bsign, bExp, bfrac} = b;
       nextr = Vec[`memr];
       r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
-      rInt = {1'b1, Quot};
+      rInt = Quot;
       req <= #5 1;
     end
   
@@ -119,7 +119,7 @@ module testbench;
     r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)];
     rInt = Quot;
     if (done) begin
-      if (~Int & ~Sqrt) begin
+      if (~Int & ~Sqrt) begin // This test case checks floating point division
         req <= #5 1;
         diffp = correctr[51:0] - r;
         diffn = r - correctr[51:0];
@@ -135,23 +135,21 @@ module testbench;
             $display("%d Tests completed successfully", testnum);
             $stop;
           end
-      end else if (~Sqrt) begin
+      end else if (~Sqrt) begin // This test case works for both integer divide and integer modulo
         req <= #5 1;
         diffp = correctr[63:0] - rInt;
-        diffn = rInt - correctr[63:0];
-        if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp
+        if (($signed(diffp) != 0) | (diffp === 64'bx)) // check if accurate to 1 ulp
           begin
             errors = errors+1;
-            $display("result was %h, should be %h %h %h\n", rInt, correctr, diffn, diffp);
+            $display("result was %h, should be %h %h\n", rInt, correctr, diffp);
             $display("failed\n");
-            $stop;
           end
         if (afrac === 52'hxxxxxxxxxxxxx)
         begin
-          $display("%d Tests completed successfully", testnum);
+          $display("%d Tests completed successfully", testnum - errors);
           $stop;
         end
-      end else begin 
+      end else begin // This test case verifies square root
         req <= #5 1;
         diffp = correctr[51:0] - r;
         diffn = r - correctr[51:0];

From 3d2c6683d8d4331134ea8f54a4b318812e38e97f Mon Sep 17 00:00:00 2001
From: slmnemo <nicholas.lucioforlife@yahoo.com>
Date: Thu, 21 Jul 2022 20:35:46 -0700
Subject: [PATCH 30/36] Fixed UART bug related to parity and MSR/LSR

---
 pipelined/src/uncore/uartPC16550D.sv | 50 ++++++++++++++++------------
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/pipelined/src/uncore/uartPC16550D.sv b/pipelined/src/uncore/uartPC16550D.sv
index 524a63454..89bdc837d 100644
--- a/pipelined/src/uncore/uartPC16550D.sv
+++ b/pipelined/src/uncore/uartPC16550D.sv
@@ -165,6 +165,7 @@ module uartPC16550D(
       SCR <= #1 8'b0; // not strictly necessary to reset
     end else begin
       if (~MEMWb) begin
+        /* verilator lint_off CASEINCOMPLETE */
         case (A)
 		  /* -----\/----- EXCLUDED -----\/-----
            3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section
@@ -177,34 +178,40 @@ module uartPC16550D(
 		  // freq /baud / 16 = div
           //3'b000: if (DLAB) DLL <= #1 8'd38; //else TXHR <= #1 Din; // TX handled in TX register/FIFO section
 		  //3'b000: if (DLAB) DLL <= #1 8'd11; //else TXHR <= #1 Din; // TX handled in
-		  3'b000: if (DLAB) DLL <= #1 8'd8; //else TXHR <= #1 Din; // TX handled in 		  
+		      3'b000: if (DLAB) DLL <= #1 8'd8; //else TXHR <= #1 Din; // TX handled in 		  
           3'b001: if (DLAB) DLM <= #1 8'b0; else IER <= #1 Din[3:0];
-
           3'b010: FCR <= #1 {Din[7:6], 2'b0, Din[3], 2'b0, Din[0]}; // Write only FIFO Control Register; 4:5 reserved and 2:1 self-clearing
           3'b011: LCR <= #1 Din;
           3'b100: MCR <= #1 Din[4:0];
-          3'b101: LSR[6:1] <= #1 Din[6:1];  // recommended only for test, see 8.6.3
-          3'b110: MSR <= #1 Din[3:0];
           3'b111: SCR <= #1 Din;
         endcase
+        /* verilator lint_on CASEINCOMPLETE */
       end
-      
+
       // Line Status Register (8.6.3)
-      //   Ben 6/9/21 I don't like how this is a register. A lot of the individual bits have clocked components, so this just adds unecessary delay.
-      LSR[0] <= #1 rxdataready; // Data ready
-      LSR[1] <= #1 (LSR[1] | RXBR[10]) & ~squashRXerrIP;; // overrun error
-      LSR[2] <= #1 (LSR[2] | RXBR[9]) & ~squashRXerrIP; // parity error
-      LSR[3] <= #1 (LSR[3] | RXBR[8]) & ~squashRXerrIP; // framing error
-      LSR[4] <= #1 (LSR[4] | rxbreak) & ~squashRXerrIP; // break indicator
-      LSR[5] <= #1 THRE; // THRE
-      LSR[6] <= #1 ~txsrfull & THRE; //  TEMT
-      if (rxfifohaserr) LSR[7] <= #1 1; // any bits in FIFO have error
+      // Ben 6/9/21 I don't like how this is a register. A lot of the individual bits have clocked components, so this just adds unecessary delay.
+      if (~MEMWb & (A == 3'b101))
+        LSR[6:1] <= #1 Din[6:1]; // recommended only for test, see 8.6.3
+      else begin
+        LSR[0] <= #1 rxdataready; // Data ready
+        LSR[1] <= #1 (LSR[1] | RXBR[10]) & ~squashRXerrIP;; // overrun error
+        LSR[2] <= #1 (LSR[2] | RXBR[9]) & ~squashRXerrIP; // parity error
+        LSR[3] <= #1 (LSR[3] | RXBR[8]) & ~squashRXerrIP; // framing error
+        LSR[4] <= #1 (LSR[4] | rxbreak) & ~squashRXerrIP; // break indicator
+        LSR[5] <= #1 THRE; // THRE
+        LSR[6] <= #1 ~txsrfull & THRE; //  TEMT
+        if (rxfifohaserr) LSR[7] <= #1 1; // any bits in FIFO have error
+      end
 
       // Modem Status Register (8.6.8)
-      MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send
-      MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready
-      MSR[2] <= #1 MSR[2] | (~RIb2 & RIbsync); // Trailing Edge of Ring Indicator
-      MSR[3] <= #1 MSR[3] | DCDb2 ^ DCDbsync; // Delta Data Carrier Detect
+      if (~MEMWb & (A == 3'b110))
+        MSR <= #1 Din[3:0];
+      else begin
+        MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send
+        MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready
+        MSR[2] <= #1 MSR[2] | (~RIb2 & RIbsync); // Trailing Edge of Ring Indicator
+        MSR[3] <= #1 MSR[3] | DCDb2 ^ DCDbsync; // Delta Data Carrier Detect
+      end
     end
   always_comb
     if (~MEMRb)
@@ -215,7 +222,8 @@ module uartPC16550D(
         3'b011: Dout = LCR;
         3'b100: Dout = {3'b000, MCR};
         3'b101: Dout = LSR;
-        3'b110: Dout = {~CTSbsync, ~DSRbsync, ~RIbsync, ~DCDbsync, MSR[3:0]}; 
+        // 3'b110: Dout = {~CTSbsync, ~DSRbsync, ~RIbsync, ~DCDbsync, MSR[3:0]}; 
+        3'b110: Dout = {~DCDbsync, ~RIbsync, ~DSRbsync, ~CTSbsync, MSR[3:0]};
         3'b111: Dout = SCR;      
       endcase
     else Dout = 8'b0;
@@ -304,7 +312,7 @@ module uartPC16550D(
 
   // ERROR CONDITIONS
   assign rxparity = ^rxdata;
-  assign rxparityerr = rxparity ^ rxparitybit ^ ~evenparitysel; // Check even/odd parity (*** check if LCR needs to be inverted)
+  assign rxparityerr = (rxparity ^ rxparitybit ^ ~evenparitysel) & LCR[3]; // Check even/odd parity (*** check if LCR needs to be inverted)
   assign rxoverrunerr = fifoenabled ? (rxfifoentries == 15) : rxdataready; // overrun if FIFO or receive buffer register full 
   assign rxframingerr = ~rxstopbit; // framing error if no stop bit
   assign rxbreak = rxframingerr & (rxdata9 == 9'b0); // break when 0 for start + data + parity + stop time
@@ -405,7 +413,7 @@ module uartPC16550D(
       txstate <= #1 UART_IDLE;
     end
 
-  assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s)
+  assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) - 1
   // *** explain; is this necessary?
   if (`QEMU) assign txnextbit = txbaudpulse & (txoversampledcnt[1:0] == 2'b00);  // implies txstate = UART_ACTIVE
   else       assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000);  // implies txstate = UART_ACTIVE

From 8dcb794bbb713e144ef420c3cf008408cfc6d47e Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Thu, 21 Jul 2022 20:58:58 -0700
Subject: [PATCH 31/36] added support for new version of riscof and arch tests,
 now supports tests that can be compiled for both rv32 and rv64

---
 pipelined/testbench/tests.vh               | 346 ++++++++++-----------
 tests/riscof/Makefile                      |   5 +-
 tests/riscof/sail_cSim/riscof_sail_cSim.py |   2 +-
 tests/riscof/spike/riscof_spike.py         |   2 +-
 4 files changed, 178 insertions(+), 177 deletions(-)

diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index df06eb010..b10bb951b 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -1057,176 +1057,176 @@ string imperas32f[] = '{
 
   string arch64d[] = '{
     `RISCVARCHTEST,
-    "rv64i_m/D/src/d_fadd_b10-01.S",
-    "rv64i_m/D/src/d_fadd_b1-01.S",
-    "rv64i_m/D/src/d_fadd_b11-01.S",
-    "rv64i_m/D/src/d_fadd_b12-01.S",
-    "rv64i_m/D/src/d_fadd_b13-01.S",
-    "rv64i_m/D/src/d_fadd_b2-01.S",
-    "rv64i_m/D/src/d_fadd_b3-01.S",
-    "rv64i_m/D/src/d_fadd_b4-01.S",
-    "rv64i_m/D/src/d_fadd_b5-01.S",
-    "rv64i_m/D/src/d_fadd_b7-01.S",
-    "rv64i_m/D/src/d_fadd_b8-01.S",
-    "rv64i_m/D/src/d_fclass_b1-01.S",
-    "rv64i_m/D/src/d_fcvt.d.l_b25-01.S",
-    "rv64i_m/D/src/d_fcvt.d.l_b26-01.S",
-    "rv64i_m/D/src/d_fcvt.d.lu_b25-01.S",
-    "rv64i_m/D/src/d_fcvt.d.lu_b26-01.S",
-    "rv64i_m/D/src/d_fcvt.d.s_b1-01.S",
-    "rv64i_m/D/src/d_fcvt.d.s_b22-01.S",
-    "rv64i_m/D/src/d_fcvt.d.s_b23-01.S",
-    "rv64i_m/D/src/d_fcvt.d.s_b24-01.S",
-    "rv64i_m/D/src/d_fcvt.d.s_b27-01.S",
-    "rv64i_m/D/src/d_fcvt.d.s_b28-01.S",
-    "rv64i_m/D/src/d_fcvt.d.s_b29-01.S",
-    "rv64i_m/D/src/d_fcvt.d.w_b25-01.S",
-    "rv64i_m/D/src/d_fcvt.d.w_b26-01.S",
-    "rv64i_m/D/src/d_fcvt.d.wu_b25-01.S",
-    "rv64i_m/D/src/d_fcvt.d.wu_b26-01.S",
-    "rv64i_m/D/src/d_fcvt.l.d_b1-01.S",
-    "rv64i_m/D/src/d_fcvt.l.d_b22-01.S",
-    "rv64i_m/D/src/d_fcvt.l.d_b23-01.S",
-    "rv64i_m/D/src/d_fcvt.l.d_b24-01.S",
-    "rv64i_m/D/src/d_fcvt.l.d_b27-01.S",
-    "rv64i_m/D/src/d_fcvt.l.d_b28-01.S",
-    "rv64i_m/D/src/d_fcvt.l.d_b29-01.S",
-    "rv64i_m/D/src/d_fcvt.lu.d_b1-01.S",
-    "rv64i_m/D/src/d_fcvt.lu.d_b22-01.S",
-    "rv64i_m/D/src/d_fcvt.lu.d_b23-01.S",
-    "rv64i_m/D/src/d_fcvt.lu.d_b24-01.S",
-    "rv64i_m/D/src/d_fcvt.lu.d_b27-01.S",
-    "rv64i_m/D/src/d_fcvt.lu.d_b28-01.S",
-    "rv64i_m/D/src/d_fcvt.lu.d_b29-01.S",
-    "rv64i_m/D/src/d_fcvt.s.d_b1-01.S",
-    "rv64i_m/D/src/d_fcvt.s.d_b22-01.S",
-    "rv64i_m/D/src/d_fcvt.s.d_b23-01.S",
-    "rv64i_m/D/src/d_fcvt.s.d_b24-01.S",
-    "rv64i_m/D/src/d_fcvt.s.d_b27-01.S",
-    "rv64i_m/D/src/d_fcvt.s.d_b28-01.S",
-    "rv64i_m/D/src/d_fcvt.s.d_b29-01.S",
-    "rv64i_m/D/src/d_fcvt.w.d_b1-01.S",
-    "rv64i_m/D/src/d_fcvt.w.d_b22-01.S",
-    "rv64i_m/D/src/d_fcvt.w.d_b23-01.S",
-    "rv64i_m/D/src/d_fcvt.w.d_b24-01.S",
-    "rv64i_m/D/src/d_fcvt.w.d_b27-01.S",
-    "rv64i_m/D/src/d_fcvt.w.d_b28-01.S",
-    "rv64i_m/D/src/d_fcvt.w.d_b29-01.S",
-    "rv64i_m/D/src/d_fcvt.wu.d_b1-01.S",
-    "rv64i_m/D/src/d_fcvt.wu.d_b22-01.S",
-    "rv64i_m/D/src/d_fcvt.wu.d_b23-01.S",
-    "rv64i_m/D/src/d_fcvt.wu.d_b24-01.S",
-    "rv64i_m/D/src/d_fcvt.wu.d_b27-01.S",
-    "rv64i_m/D/src/d_fcvt.wu.d_b28-01.S",
-    "rv64i_m/D/src/d_fcvt.wu.d_b29-01.S",
-    "rv64i_m/D/src/d_fdiv_b1-01.S",
-    "rv64i_m/D/src/d_fdiv_b20-01.S",
-    "rv64i_m/D/src/d_fdiv_b2-01.S",
-    "rv64i_m/D/src/d_fdiv_b21-01.S",
-    "rv64i_m/D/src/d_fdiv_b3-01.S",
-    "rv64i_m/D/src/d_fdiv_b4-01.S",
-    "rv64i_m/D/src/d_fdiv_b5-01.S",
-    "rv64i_m/D/src/d_fdiv_b6-01.S",
-    "rv64i_m/D/src/d_fdiv_b7-01.S",
-    "rv64i_m/D/src/d_fdiv_b8-01.S",
-    "rv64i_m/D/src/d_fdiv_b9-01.S",
-    "rv64i_m/D/src/d_feq_b1-01.S",
-    "rv64i_m/D/src/d_feq_b19-01.S",
-    "rv64i_m/D/src/d_fle_b1-01.S",
-    "rv64i_m/D/src/d_fle_b19-01.S",
-    "rv64i_m/D/src/d_flt_b1-01.S",
-    "rv64i_m/D/src/d_flt_b19-01.S",
-    "rv64i_m/D/src/d_fld-align-01.S",
-    "rv64i_m/D/src/d_fsd-align-01.S",
-    "rv64i_m/D/src/d_fmadd_b14-01.S",
-    "rv64i_m/D/src/d_fmadd_b16-01.S",
-    "rv64i_m/D/src/d_fmadd_b17-01.S",
-    "rv64i_m/D/src/d_fmadd_b18-01.S",
-    "rv64i_m/D/src/d_fmadd_b2-01.S",
-    "rv64i_m/D/src/d_fmadd_b3-01.S",
-    "rv64i_m/D/src/d_fmadd_b4-01.S",
-    "rv64i_m/D/src/d_fmadd_b5-01.S",
-    "rv64i_m/D/src/d_fmadd_b6-01.S",
-    "rv64i_m/D/src/d_fmadd_b7-01.S",
-    "rv64i_m/D/src/d_fmadd_b8-01.S",
-    "rv64i_m/D/src/d_fmax_b1-01.S",
-    "rv64i_m/D/src/d_fmax_b19-01.S",
-    "rv64i_m/D/src/d_fmin_b1-01.S",
-    "rv64i_m/D/src/d_fmin_b19-01.S",
-    "rv64i_m/D/src/d_fmsub_b14-01.S",
-    "rv64i_m/D/src/d_fmsub_b16-01.S",
-    "rv64i_m/D/src/d_fmsub_b17-01.S",
-    "rv64i_m/D/src/d_fmsub_b18-01.S",
-    "rv64i_m/D/src/d_fmsub_b2-01.S",
-    "rv64i_m/D/src/d_fmsub_b3-01.S",
-    "rv64i_m/D/src/d_fmsub_b4-01.S",
-    "rv64i_m/D/src/d_fmsub_b5-01.S",
-    "rv64i_m/D/src/d_fmsub_b6-01.S",
-    "rv64i_m/D/src/d_fmsub_b7-01.S",
-    "rv64i_m/D/src/d_fmsub_b8-01.S",
-    "rv64i_m/D/src/d_fmul_b1-01.S",
-    "rv64i_m/D/src/d_fmul_b2-01.S",
-    "rv64i_m/D/src/d_fmul_b3-01.S",
-    "rv64i_m/D/src/d_fmul_b4-01.S",
-    "rv64i_m/D/src/d_fmul_b5-01.S",
-    "rv64i_m/D/src/d_fmul_b6-01.S",
-    "rv64i_m/D/src/d_fmul_b7-01.S",
-    "rv64i_m/D/src/d_fmul_b8-01.S",
-    "rv64i_m/D/src/d_fmul_b9-01.S",
-    "rv64i_m/D/src/d_fmv.d.x_b25-01.S",
-    "rv64i_m/D/src/d_fmv.d.x_b26-01.S",
-    "rv64i_m/D/src/d_fmv.x.d_b1-01.S",
-    "rv64i_m/D/src/d_fmv.x.d_b22-01.S",
-    "rv64i_m/D/src/d_fmv.x.d_b23-01.S",
-    "rv64i_m/D/src/d_fmv.x.d_b24-01.S",
-    "rv64i_m/D/src/d_fmv.x.d_b27-01.S",
-    "rv64i_m/D/src/d_fmv.x.d_b28-01.S",
-    "rv64i_m/D/src/d_fmv.x.d_b29-01.S",
-    "rv64i_m/D/src/d_fnmadd_b14-01.S",
-    "rv64i_m/D/src/d_fnmadd_b16-01.S",
-    "rv64i_m/D/src/d_fnmadd_b17-01.S",
-    "rv64i_m/D/src/d_fnmadd_b18-01.S",
-    "rv64i_m/D/src/d_fnmadd_b2-01.S",
-    "rv64i_m/D/src/d_fnmadd_b3-01.S",
-    "rv64i_m/D/src/d_fnmadd_b4-01.S",
-    "rv64i_m/D/src/d_fnmadd_b5-01.S",
-    "rv64i_m/D/src/d_fnmadd_b6-01.S",
-    "rv64i_m/D/src/d_fnmadd_b7-01.S",
-    "rv64i_m/D/src/d_fnmadd_b8-01.S",
-    "rv64i_m/D/src/d_fnmsub_b14-01.S",
-    "rv64i_m/D/src/d_fnmsub_b16-01.S",
-    "rv64i_m/D/src/d_fnmsub_b17-01.S",
-    "rv64i_m/D/src/d_fnmsub_b18-01.S",
-    "rv64i_m/D/src/d_fnmsub_b2-01.S",
-    "rv64i_m/D/src/d_fnmsub_b3-01.S",
-    "rv64i_m/D/src/d_fnmsub_b4-01.S",
-    "rv64i_m/D/src/d_fnmsub_b5-01.S",
-    "rv64i_m/D/src/d_fnmsub_b6-01.S",
-    "rv64i_m/D/src/d_fnmsub_b7-01.S",
-    "rv64i_m/D/src/d_fnmsub_b8-01.S",
-    "rv64i_m/D/src/d_fsgnj_b1-01.S",
-    "rv64i_m/D/src/d_fsgnjn_b1-01.S",
-    "rv64i_m/D/src/d_fsgnjx_b1-01.S",
-    // "rv64i_m/D/src/d_fsqrt_b1-01.S",
-    // "rv64i_m/D/src/d_fsqrt_b20-01.S",
-    // "rv64i_m/D/src/d_fsqrt_b2-01.S",
-    // "rv64i_m/D/src/d_fsqrt_b3-01.S",
-    // "rv64i_m/D/src/d_fsqrt_b4-01.S",
-    // "rv64i_m/D/src/d_fsqrt_b5-01.S",
-    // "rv64i_m/D/src/d_fsqrt_b7-01.S",
-    // "rv64i_m/D/src/d_fsqrt_b8-01.S",
-    // "rv64i_m/D/src/d_fsqrt_b9-01.S",
-    "rv64i_m/D/src/d_fsub_b10-01.S",
-    "rv64i_m/D/src/d_fsub_b1-01.S",
-    "rv64i_m/D/src/d_fsub_b11-01.S",
-    "rv64i_m/D/src/d_fsub_b12-01.S",
-    "rv64i_m/D/src/d_fsub_b13-01.S",
-    "rv64i_m/D/src/d_fsub_b2-01.S",
-    "rv64i_m/D/src/d_fsub_b3-01.S",
-    "rv64i_m/D/src/d_fsub_b4-01.S",
-    "rv64i_m/D/src/d_fsub_b5-01.S",
-    "rv64i_m/D/src/d_fsub_b7-01.S",
-    "rv64i_m/D/src/d_fsub_b8-01.S"
+    "rv64i_m/D/src/fadd.d_b10-01.S",
+    "rv64i_m/D/src/fadd.d_b1-01.S",
+    "rv64i_m/D/src/fadd.d_b11-01.S",
+    "rv64i_m/D/src/fadd.d_b12-01.S",
+    "rv64i_m/D/src/fadd.d_b13-01.S",
+    "rv64i_m/D/src/fadd.d_b2-01.S",
+    "rv64i_m/D/src/fadd.d_b3-01.S",
+    "rv64i_m/D/src/fadd.d_b4-01.S",
+    "rv64i_m/D/src/fadd.d_b5-01.S",
+    "rv64i_m/D/src/fadd.d_b7-01.S",
+    "rv64i_m/D/src/fadd.d_b8-01.S",
+    "rv64i_m/D/src/fclass.d_b1-01.S",
+    "rv64i_m/D/src/fcvt.d.l_b25-01.S",
+    "rv64i_m/D/src/fcvt.d.l_b26-01.S",
+    "rv64i_m/D/src/fcvt.d.lu_b25-01.S",
+    "rv64i_m/D/src/fcvt.d.lu_b26-01.S",
+    "rv64i_m/D/src/fcvt.d.s_b1-01.S",
+    "rv64i_m/D/src/fcvt.d.s_b22-01.S",
+    "rv64i_m/D/src/fcvt.d.s_b23-01.S",
+    "rv64i_m/D/src/fcvt.d.s_b24-01.S",
+    "rv64i_m/D/src/fcvt.d.s_b27-01.S",
+    "rv64i_m/D/src/fcvt.d.s_b28-01.S",
+    "rv64i_m/D/src/fcvt.d.s_b29-01.S",
+    "rv64i_m/D/src/fcvt.d.w_b25-01.S",
+    "rv64i_m/D/src/fcvt.d.w_b26-01.S",
+    "rv64i_m/D/src/fcvt.d.wu_b25-01.S",
+    "rv64i_m/D/src/fcvt.d.wu_b26-01.S",
+    "rv64i_m/D/src/fcvt.l.d_b1-01.S",
+    "rv64i_m/D/src/fcvt.l.d_b22-01.S",
+    "rv64i_m/D/src/fcvt.l.d_b23-01.S",
+    "rv64i_m/D/src/fcvt.l.d_b24-01.S",
+    "rv64i_m/D/src/fcvt.l.d_b27-01.S",
+    "rv64i_m/D/src/fcvt.l.d_b28-01.S",
+    "rv64i_m/D/src/fcvt.l.d_b29-01.S",
+    "rv64i_m/D/src/fcvt.lu.d_b1-01.S",
+    "rv64i_m/D/src/fcvt.lu.d_b22-01.S",
+    "rv64i_m/D/src/fcvt.lu.d_b23-01.S",
+    "rv64i_m/D/src/fcvt.lu.d_b24-01.S",
+    "rv64i_m/D/src/fcvt.lu.d_b27-01.S",
+    "rv64i_m/D/src/fcvt.lu.d_b28-01.S",
+    "rv64i_m/D/src/fcvt.lu.d_b29-01.S",
+    "rv64i_m/D/src/fcvt.s.d_b1-01.S",
+    "rv64i_m/D/src/fcvt.s.d_b22-01.S",
+    "rv64i_m/D/src/fcvt.s.d_b23-01.S",
+    "rv64i_m/D/src/fcvt.s.d_b24-01.S",
+    "rv64i_m/D/src/fcvt.s.d_b27-01.S",
+    "rv64i_m/D/src/fcvt.s.d_b28-01.S",
+    "rv64i_m/D/src/fcvt.s.d_b29-01.S",
+    "rv64i_m/D/src/fcvt.w.d_b1-01.S",
+    "rv64i_m/D/src/fcvt.w.d_b22-01.S",
+    "rv64i_m/D/src/fcvt.w.d_b23-01.S",
+    "rv64i_m/D/src/fcvt.w.d_b24-01.S",
+    "rv64i_m/D/src/fcvt.w.d_b27-01.S",
+    "rv64i_m/D/src/fcvt.w.d_b28-01.S",
+    "rv64i_m/D/src/fcvt.w.d_b29-01.S",
+    "rv64i_m/D/src/fcvt.wu.d_b1-01.S",
+    "rv64i_m/D/src/fcvt.wu.d_b22-01.S",
+    "rv64i_m/D/src/fcvt.wu.d_b23-01.S",
+    "rv64i_m/D/src/fcvt.wu.d_b24-01.S",
+    "rv64i_m/D/src/fcvt.wu.d_b27-01.S",
+    "rv64i_m/D/src/fcvt.wu.d_b28-01.S",
+    "rv64i_m/D/src/fcvt.wu.d_b29-01.S",
+    "rv64i_m/D/src/fdiv.d_b1-01.S",
+    "rv64i_m/D/src/fdiv.d_b20-01.S",
+    "rv64i_m/D/src/fdiv.d_b2-01.S",
+    "rv64i_m/D/src/fdiv.d_b21-01.S",
+    "rv64i_m/D/src/fdiv.d_b3-01.S",
+    "rv64i_m/D/src/fdiv.d_b4-01.S",
+    "rv64i_m/D/src/fdiv.d_b5-01.S",
+    "rv64i_m/D/src/fdiv.d_b6-01.S",
+    "rv64i_m/D/src/fdiv.d_b7-01.S",
+    "rv64i_m/D/src/fdiv.d_b8-01.S",
+    "rv64i_m/D/src/fdiv.d_b9-01.S",
+    "rv64i_m/D/src/feq.d_b1-01.S",
+    "rv64i_m/D/src/feq.d_b19-01.S",
+    "rv64i_m/D/src/fle.d_b1-01.S",
+    "rv64i_m/D/src/fle.d_b19-01.S",
+    "rv64i_m/D/src/flt.d_b1-01.S",
+    "rv64i_m/D/src/flt.d_b19-01.S",
+    // "rv64i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back
+    // "rv64i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266
+    "rv64i_m/D/src/fmadd.d_b14-01.S",
+    "rv64i_m/D/src/fmadd.d_b16-01.S",
+    "rv64i_m/D/src/fmadd.d_b17-01.S",
+    "rv64i_m/D/src/fmadd.d_b18-01.S",
+    "rv64i_m/D/src/fmadd.d_b2-01.S",
+    "rv64i_m/D/src/fmadd.d_b3-01.S",
+    "rv64i_m/D/src/fmadd.d_b4-01.S",
+    "rv64i_m/D/src/fmadd.d_b5-01.S",
+    "rv64i_m/D/src/fmadd.d_b6-01.S",
+    "rv64i_m/D/src/fmadd.d_b7-01.S",
+    "rv64i_m/D/src/fmadd.d_b8-01.S",
+    "rv64i_m/D/src/fmax.d_b1-01.S",
+    "rv64i_m/D/src/fmax.d_b19-01.S",
+    "rv64i_m/D/src/fmin.d_b1-01.S",
+    "rv64i_m/D/src/fmin.d_b19-01.S",
+    "rv64i_m/D/src/fmsub.d_b14-01.S",
+    "rv64i_m/D/src/fmsub.d_b16-01.S",
+    "rv64i_m/D/src/fmsub.d_b17-01.S",
+    "rv64i_m/D/src/fmsub.d_b18-01.S",
+    "rv64i_m/D/src/fmsub.d_b2-01.S",
+    "rv64i_m/D/src/fmsub.d_b3-01.S",
+    "rv64i_m/D/src/fmsub.d_b4-01.S",
+    "rv64i_m/D/src/fmsub.d_b5-01.S",
+    "rv64i_m/D/src/fmsub.d_b6-01.S",
+    "rv64i_m/D/src/fmsub.d_b7-01.S",
+    "rv64i_m/D/src/fmsub.d_b8-01.S",
+    "rv64i_m/D/src/fmul.d_b1-01.S",
+    "rv64i_m/D/src/fmul.d_b2-01.S",
+    "rv64i_m/D/src/fmul.d_b3-01.S",
+    "rv64i_m/D/src/fmul.d_b4-01.S",
+    "rv64i_m/D/src/fmul.d_b5-01.S",
+    "rv64i_m/D/src/fmul.d_b6-01.S",
+    "rv64i_m/D/src/fmul.d_b7-01.S",
+    "rv64i_m/D/src/fmul.d_b8-01.S",
+    "rv64i_m/D/src/fmul.d_b9-01.S",
+    "rv64i_m/D/src/fmv.d.x_b25-01.S",
+    "rv64i_m/D/src/fmv.d.x_b26-01.S",
+    "rv64i_m/D/src/fmv.x.d_b1-01.S",
+    "rv64i_m/D/src/fmv.x.d_b22-01.S",
+    "rv64i_m/D/src/fmv.x.d_b23-01.S",
+    "rv64i_m/D/src/fmv.x.d_b24-01.S",
+    "rv64i_m/D/src/fmv.x.d_b27-01.S",
+    "rv64i_m/D/src/fmv.x.d_b28-01.S",
+    "rv64i_m/D/src/fmv.x.d_b29-01.S",
+    "rv64i_m/D/src/fnmadd.d_b14-01.S",
+    "rv64i_m/D/src/fnmadd.d_b16-01.S",
+    "rv64i_m/D/src/fnmadd.d_b17-01.S",
+    "rv64i_m/D/src/fnmadd.d_b18-01.S",
+    "rv64i_m/D/src/fnmadd.d_b2-01.S",
+    "rv64i_m/D/src/fnmadd.d_b3-01.S",
+    "rv64i_m/D/src/fnmadd.d_b4-01.S",
+    "rv64i_m/D/src/fnmadd.d_b5-01.S",
+    "rv64i_m/D/src/fnmadd.d_b6-01.S",
+    "rv64i_m/D/src/fnmadd.d_b7-01.S",
+    "rv64i_m/D/src/fnmadd.d_b8-01.S",
+    "rv64i_m/D/src/fnmsub.d_b14-01.S",
+    "rv64i_m/D/src/fnmsub.d_b16-01.S",
+    "rv64i_m/D/src/fnmsub.d_b17-01.S",
+    "rv64i_m/D/src/fnmsub.d_b18-01.S",
+    "rv64i_m/D/src/fnmsub.d_b2-01.S",
+    "rv64i_m/D/src/fnmsub.d_b3-01.S",
+    "rv64i_m/D/src/fnmsub.d_b4-01.S",
+    "rv64i_m/D/src/fnmsub.d_b5-01.S",
+    "rv64i_m/D/src/fnmsub.d_b6-01.S",
+    "rv64i_m/D/src/fnmsub.d_b7-01.S",
+    "rv64i_m/D/src/fnmsub.d_b8-01.S",
+    "rv64i_m/D/src/fsgnj.d_b1-01.S",
+    "rv64i_m/D/src/fsgnjn.d_b1-01.S",
+    "rv64i_m/D/src/fsgnjx.d_b1-01.S",
+    // "rv64i_m/D/src/fsqrt.d_b1-01.S",
+    // "rv64i_m/D/src/fsqrt.d_b20-01.S",
+    // "rv64i_m/D/src/fsqrt.d_b2-01.S",
+    // "rv64i_m/D/src/fsqrt.d_b3-01.S",
+    // "rv64i_m/D/src/fsqrt.d_b4-01.S",
+    // "rv64i_m/D/src/fsqrt.d_b5-01.S",
+    // "rv64i_m/D/src/fsqrt.d_b7-01.S",
+    // "rv64i_m/D/src/fsqrt.d_b8-01.S",
+    // "rv64i_m/D/src/fsqrt.d_b9-01.S",
+    "rv64i_m/D/src/fssub.d_b10-01.S",
+    "rv64i_m/D/src/fssub.d_b1-01.S",
+    "rv64i_m/D/src/fssub.d_b11-01.S",
+    "rv64i_m/D/src/fssub.d_b12-01.S",
+    "rv64i_m/D/src/fssub.d_b13-01.S",
+    "rv64i_m/D/src/fssub.d_b2-01.S",
+    "rv64i_m/D/src/fssub.d_b3-01.S",
+    "rv64i_m/D/src/fssub.d_b4-01.S",
+    "rv64i_m/D/src/fssub.d_b5-01.S",
+    "rv64i_m/D/src/fssub.d_b7-01.S",
+    "rv64i_m/D/src/fssub.d_b8-01.S"
 };
 
     string arch32priv[] = '{
@@ -1310,7 +1310,7 @@ string imperas32f[] = '{
     "rv32i_m/F/src/fle_b19-01.S",
     "rv32i_m/F/src/flt_b1-01.S",
     "rv32i_m/F/src/flt_b19-01.S",
-    "rv32i_m/F/src/flw-align-01.S",
+    // "rv32i_m/F/src/flw-align-01.S",
     "rv32i_m/F/src/fmadd_b1-01.S",
     "rv32i_m/F/src/fmadd_b14-01.S",
     // "rv32i_m/F/src/fmadd_b15-01.S",
@@ -1407,8 +1407,8 @@ string imperas32f[] = '{
     "rv32i_m/F/src/fsub_b4-01.S",
     "rv32i_m/F/src/fsub_b5-01.S",
     "rv32i_m/F/src/fsub_b7-01.S",
-    "rv32i_m/F/src/fsub_b8-01.S",
-    "rv32i_m/F/src/fsw-align-01.S"
+    "rv32i_m/F/src/fsub_b8-01.S"
+    // "rv32i_m/F/src/fsw-align-01.S"
     };
 
 
diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile
index a9a442d38..fffa0e454 100644
--- a/tests/riscof/Makefile
+++ b/tests/riscof/Makefile
@@ -26,10 +26,11 @@ fsd_fld_tempfix:
 build_arch: fsd_fld_tempfix
 	riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser
 	rm -rf $(arch_workdir)/rv$(XLEN)i_m
-	mv -f $(work_dir)/rv$(XLEN)i_m $(arch_workdir)/
+	rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv$(XLEN)i_m/ || echo "error suppressed"
+	rsync -a $(work_dir)/rv64i_m/ $(arch_workdir)/rv$(XLEN)i_m/ || echo "error suppressed"
 
 build_wally:
-	riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run
+	riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run 2>&1 | tee log.txt
 	rm -rf $(wally_workdir)/rv$(XLEN)i_m
 	mv -f $(work_dir)/rv$(XLEN)i_m $(wally_workdir)/
 
diff --git a/tests/riscof/sail_cSim/riscof_sail_cSim.py b/tests/riscof/sail_cSim/riscof_sail_cSim.py
index dc3033ab3..683b816b3 100644
--- a/tests/riscof/sail_cSim/riscof_sail_cSim.py
+++ b/tests/riscof/sail_cSim/riscof_sail_cSim.py
@@ -94,7 +94,7 @@ class sail_cSim(pluginTemplate):
 
             execute = "@cd "+testentry['work_dir']+";"
 
-            cmd = self.compile_cmd.format(testentry['isa'].lower(), self.xlen) + ' ' + test + ' -o ' + elf
+            cmd = self.compile_cmd.format(testentry['isa'].lower().replace('zicsr', ' ', 1), self.xlen) + ' ' + test + ' -o ' + elf
             compile_cmd = cmd + ' -D' + " -D".join(testentry['macros'])
             execute+=compile_cmd+";"
 
diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py
index fd4293954..4f74c72f4 100644
--- a/tests/riscof/spike/riscof_spike.py
+++ b/tests/riscof/spike/riscof_spike.py
@@ -151,7 +151,7 @@ class spike(pluginTemplate):
 
           # substitute all variables in the compile command that we created in the initialize
           # function
-          cmd = self.compile_cmd.format(testentry['isa'].lower(), self.xlen, test, elf, compile_macros)
+          cmd = self.compile_cmd.format(testentry['isa'].lower().replace('zicsr', ' ', 2), self.xlen, test, elf, compile_macros)
 
 	  # if the user wants to disable running the tests and only compile the tests, then
 	  # the "else" clause is executed below assigning the sim command to simple no action

From c29a60c198cda7d861e34945a719cac3b7bac6bf Mon Sep 17 00:00:00 2001
From: Daniel Torres <dtowersm@gmail.com>
Date: Thu, 21 Jul 2022 21:10:15 -0700
Subject: [PATCH 32/36] changed gitignore, updated version of arch tests on
 main build

---
 .gitignore             | 4 ----
 addins/riscv-arch-test | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9c6691eb3..0c8a1655a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,12 +7,8 @@ __pycache__/
 .vscode/
 
 #External repos
-addins
 addins/riscv-arch-test/Makefile.include
 addins/riscv-tests/target
-addins/coremark/work/*
-addins/embench/bd_speed/*
-addins/embench/bd_size/*
 benchmarks/embench/wally*.json
 
 #vsim work files to ignore
diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test
index be67c99bd..e5020bf7b 160000
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@@ -1 +1 @@
-Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
+Subproject commit e5020bf7b345f8efb96c6c939de3162525b7f545

From d22587090bfcab76ef7572ddfc5eb0a1dc8bd105 Mon Sep 17 00:00:00 2001
From: David Harris <david_harris@hmc.edu>
Date: Fri, 22 Jul 2022 04:29:27 +0000
Subject: [PATCH 33/36] Reset MSR on read

---
 pipelined/src/uncore/uartPC16550D.sv | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pipelined/src/uncore/uartPC16550D.sv b/pipelined/src/uncore/uartPC16550D.sv
index 89bdc837d..5eabe60c7 100644
--- a/pipelined/src/uncore/uartPC16550D.sv
+++ b/pipelined/src/uncore/uartPC16550D.sv
@@ -206,6 +206,8 @@ module uartPC16550D(
       // Modem Status Register (8.6.8)
       if (~MEMWb & (A == 3'b110))
         MSR <= #1 Din[3:0];
+      else if (~MEMRb & (A == 3'b110)) 
+        MSR <= #1 4'b0; // Reading MSR clears the flags in MSR bits 3:0
       else begin
         MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send
         MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready

From df568fd202bf53be651e13183810dd3153546e1f Mon Sep 17 00:00:00 2001
From: slmnemo <nicholas.lucioforlife@yahoo.com>
Date: Fri, 22 Jul 2022 07:10:39 -0700
Subject: [PATCH 34/36] Added PLIC and UART tests and new functions to the test
 library

---
 addins/riscv-arch-test                        |  2 +-
 pipelined/testbench/tests.vh                  |  6 +-
 .../rv32i_m/privilege/Makefrag                |  2 +
 .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 81 +++++++++++++++++--
 4 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test
index e5020bf7b..be67c99bd 160000
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@@ -1 +1 @@
-Subproject commit e5020bf7b345f8efb96c6c939de3162525b7f545
+Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh
index b10bb951b..a2145835d 100644
--- a/pipelined/testbench/tests.vh
+++ b/pipelined/testbench/tests.vh
@@ -1603,9 +1603,9 @@ string wally32i[] = '{
 
  string wally32periph[] = '{
     `WALLYTEST,
-    "rv32i_m/privilege/WALLY-gpio-01",
-    "rv32i_m/privilege/WALLY-clint-01"
-    // "rv32i_m/privilege/WALLY-plic-01"
+    // "rv32i_m/privilege/WALLY-gpio-01",
+    // "rv32i_m/privilege/WALLY-clint-01"
+    "rv32i_m/privilege/WALLY-plic-01"
     // "rv32i_m/privilege/WALLY-uart-01"
  };
 
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag
index 56b3bc01f..23806cf67 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag
@@ -55,6 +55,8 @@ target_tests_nosim = \
     WALLY-status-tw-01 \
     WALLY-gpio-01 \
     WALLY-clint-01 \
+    WALLY-plic-01 \
+    WALLY-uart-01 \
 
 
 rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests))
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h
index 7e112c917..17ff15c02 100644
--- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h
@@ -954,6 +954,17 @@ read08_test:
     // address to read in t3, expected 8 bit value in t4 (unused, but there for your perusal).
     li t2, 0xBAD // bad value that will be overwritten on good reads.
     lb t2, 0(t3)
+    andi t2, t2, 0xFF // mask to lower 8 bits
+    sw t2, 0(t1)
+    addi t1, t1, 4
+    addi a6, a6, 4
+    j test_loop // go to next test case
+
+read04_test:
+    // address to read in t3, expected 8 bit value in t4 (unused, but there for your perusal).
+    li t2, 0xBAD // bad value that will be overwritten on good reads.
+    lb t2, 0(t3)
+    andi t2, t2, 15 // mask lower 4 bits
     sw t2, 0(t1)
     addi t1, t1, 4
     addi a6, a6, 4
@@ -974,6 +985,18 @@ readsip_test:  // read the MIP into the signature
     j test_loop // go to next test case
 
 claim_m_plic_interrupts: // clears one non-pending PLIC interrupt
+    li t2, 0x0C00000C // GPIO priority
+    li t3, 7
+    lw t4, 0(t2)
+    sw t3, 0(t2)
+    sw t4, -4(sp)
+    addi sp, sp, -4
+    li t2, 0x0C000028 // UART priority
+    li t3, 7
+    lw t4, 0(t2)
+    sw t3, 0(t2)
+    sw t4, -4(sp)
+    addi sp, sp, -4
     li t2, 0x0C002000
     li t3, 0x0C200004
     li t4, 0xFFF
@@ -982,9 +1005,28 @@ claim_m_plic_interrupts: // clears one non-pending PLIC interrupt
     lw t5, 0(t3) // make PLIC claim
     sw t5, 0(t3) // complete claim made
     sw t6, 0(t2) // restore saved enable status
+    li t2, 0x0C00000C // GPIO priority
+    li t3, 0x0C000028 // UART priority
+    lw t4, 4(sp) // load stored GPIO and UART priority
+    lw t5, 0(sp)
+    addi sp, sp, 8 // restore stack pointer
+    sw t4, 0(t2)
+    sw t5, 0(t3)
     j test_loop
 
 claim_s_plic_interrupts: // clears one non-pending PLIC interrupt
+    li t2, 0x0C00000C // GPIO priority
+    li t3, 7
+    lw t4, 0(t2)
+    sw t3, 0(t2)
+    sw t4, -4(sp)
+    addi sp, sp, -4
+    li t2, 0x0C000028 // UART priority
+    li t3, 7
+    lw t4, 0(t2)
+    sw t3, 0(t2)
+    sw t4, -4(sp)
+    addi sp, sp, -4
     li t2, 0x0C002080
     li t3, 0x0C201004
     li t4, 0xFFF
@@ -993,25 +1035,52 @@ claim_s_plic_interrupts: // clears one non-pending PLIC interrupt
     lw t5, 0(t3) // make PLIC claim
     sw t5, 0(t3) // complete claim made
     sw t6, 0(t2) // restore saved enable status
+    li t2, 0x0C00000C // GPIO priority
+    li t3, 0x0C000028 // UART priority
+    lw t4, 4(sp) // load stored GPIO and UART priority
+    lw t5, 0(sp)
+    addi sp, sp, 8 // restore stack pointer
+    sw t4, 0(t2)
+    sw t5, 0(t3)
+    j test_loop
+
+uart_lsr_intr_wait: // waits for interrupts to be ready
+    li t2, 0x10000002 // IIR
+    li t4, 0x6
+uart_lsr_intr_loop:
+    lb t3, 0(t2)
+    andi t3, t3, 0x7
+    bne t3, t4, uart_lsr_intr_loop
+    sw t3, 0(t1)
+    addi t1, t1, 4
+    addi a6, a6, 4
     j test_loop
 
 uart_data_wait:
     li t2, 0x10000005 // LSR
     li t3, 0x10000002 // IIR
+    li a4, 0x61
+uart_read_LSR_IIR:
     lb t4, 0(t3) // save IIR before potential clear
     lb t5, 0(t2)
-    andi t5, t5, 1  // only care if data is ready
-    li t6, 1
-    beq t5, t6, uart_data_ready
-    j uart_data_wait
+    andi t6, t5, 0x61  // only care if all transmissions are done
+    bne a4, t6, uart_read_LSR_IIR
 
 uart_data_ready:
-    sb t4, 0(t1)
-    sb t5, 1(t1)
+    li t2, 0
+    sw t2, 0(t1) // clear entry deadbeef from memory
+    andi t5, t5, 0x9F // mask THRE and TEMT from signature
+    sb t4, 1(t1) // IIR
+    sb t5, 0(t1) // LSR
     addi t1, t1, 4
     addi a6, a6, 4
     j test_loop
 
+uart_clearmodemintr:
+    li t2, 0x10000006
+    lb t2, 0(t2)
+    j test_loop
+
 goto_s_mode:
     // return to address in t3, 
     li a0, 3 // Trap handler behavior (go to supervisor mode)

From d38369e8bf3543276555736b9d20d8b0514c8970 Mon Sep 17 00:00:00 2001
From: slmnemo <nicholas.lucioforlife@yahoo.com>
Date: Fri, 22 Jul 2022 07:12:55 -0700
Subject: [PATCH 35/36] Added new PLIC and UART tests

---
 .../references/WALLY-plic-01.reference_output | 249 +++++
 .../WALLY-plic-s-01.reference_output          |   0
 .../references/WALLY-uart-01.reference_output |  33 +
 .../rv32i_m/privilege/src/WALLY-plic-01.S     | 951 ++++++++++++++++++
 .../rv32i_m/privilege/src/WALLY-plic-s-01.S   | 493 +++++++++
 .../rv32i_m/privilege/src/WALLY-uart-01.S     | 140 +++
 6 files changed, 1866 insertions(+)
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-s-01.reference_output
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S
 create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S

diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output
new file mode 100644
index 000000000..8c6b8ef6a
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output
@@ -0,0 +1,249 @@
+00000000 # read empty MIP (1.0.0)
+00000008 # check GPIO interrupt pending on intPending0
+00000000 # Claim gives no interrupt due to zero priority
+00000008 # interrupt still pending due to no claim
+00000000 # check no interrupts pending
+00000800 # MEIP set due to PLIC priority (1.0.1) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending
+00000000 # check no interrupts pending
+00000000 # read empty MIP (1.0.2)
+00000000 # check GPIO interrupt pending on intPending0
+00000000 # claim gives no interrupt due to no intPending
+00000000 # no interrupts pending after clear
+00000000 # still no interrupts after clear
+00000000 # read empty MIP (1.1.0) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # Claim gives 3 for ID of GPIO
+00000000 # interrupt still pending due to no claim
+00000000 # check no interrupts pending
+00000800 # MEIP set due to PLIC priority (1.1.1) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000000 # read empty MIP (1.2.0) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # Claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000800 # MEIP set due to PLIC priority (1.2.1) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000000 # read empty MIP (1.3.0) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # Claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000800 # MEIP set due to PLIC priority (1.3.1) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000000 # read empty MIP (1.4.0) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # Claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000800 # MEIP set due to PLIC priority (1.4.1) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000000 # read empty MIP (1.5.0) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # Claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000800 # MEIP set due to PLIC priority (1.5.1) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000000 # read empty MIP (1.6.0) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # Claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000800 # MEIP set due to PLIC priority (1.6.1) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000000 # read empty MIP (1.7.0) 
+00000008 # check GPIO interrupt pending on intPending0
+00000003 # Claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending after claim
+00000000 # check no interrupts pending
+00000800 # MEIP set (2.0)
+00000408 # gpio and uart pending
+00000003 # claim gpio
+00000400 # gpio no longer pending
+00000000 # no interrupts pending
+00000800 # MEIP set (2.1)
+00000408 # gpio and uart pending
+00000003 # claim gpio
+00000400 # gpio no longer pending
+00000000 # no interrupts pending
+00000800 # MEIP set (2.2)
+00000408 # gpio and uart pending
+0000000A # claim uart
+00000008 # uart no longer pending
+00000000 # no interrupts pending
+00000800 # MEIP set (2.3)
+00000408 # gpio and uart pending
+0000000A # claim uart
+00000008 # uart no longer pending
+00000000 # no interrupts pending
+00000000 # MEIP empty (2.4)
+00000408 # gpio and uart pending
+0000000A # claim none
+00000008 # gpio and uart still pending
+00000000 # no interrupts pending
+00000A00 # MEIP and SEIP set (3.0)
+00000408 # check GPIO and UART interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000400 # check GPIO interrupt pending cleared after claim
+00000000 # check no interrupts pending
+00000200 # SEIP set (3.1)
+00000408 # check GPIO and UART interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000400 # check GPIO interrupt pending cleared after claim
+00000000 # check no interrupts pending
+00000200 # SEIP set (3.2)
+00000408 # check GPIO and UART interrupt pending on intPending0
+0000000A # claim UART
+00000008 # GPIO interrupt pending after UART claimcomp
+00000000 # check no interrupts pending
+00000000 # read empty MIP (3.3)
+00000408 # check GPIO and UART interrupt pending on intPending0
+0000000A # claim UART
+00000008 # check UART interrupt pending cleared after claim
+00000000 # check no interrupts pending
+00000A00 # MEIP and SEIP set (4.0)
+00000400 # UART interrupt pending
+0000000A # claim UART
+00000000 # check no interrupts pending
+00000000 # check no interrupts pending
+00000200 # SEIP set (4.1)
+00000400 # UART interrupt pending
+00000000 # nothing in claim register
+00000400 # UART interrupt pending
+00000000 # check no interrupts pending
+00000A00 # MEIP and SEIP set (4.2)
+00000400 # UART interrupt pending
+0000000A # claim UART
+00000000 # check no interrupts pending
+00000000 # check no interrupts pending
+00000800 # MEIP set (4.3)
+00000400 # UART interrupt pending
+0000000A # claim UART
+00000000 # check no interrupts pending
+00000000 # check no interrupts pending
+00000800 # MEIP set (4.4)
+00000400 # UART interrupt pending
+0000000A # claim UART
+00000000 # check no interrupts pending
+00000000 # check no interrupts pending
+00000200 # SEIP set (4.5)
+00000400 # UART interrupt pending
+00000000 # nothing in claim register
+00000400 # UART interrupt pending
+00000000 # check no interrupts pending
+00000000 # All disabled (4.6)
+00000400 # UART interrupt pending
+00000000 # nothing in claim register
+00000400 # UART interrupt pending
+00000000 # check no interrupts pending
+00000200 # SEIP set (5.0)
+00000008 # GPIO interrupt pending
+00000000 # nothing in claim register
+00000008 # GPIO interrupt pending
+00000000 # check no interrupts pending
+00000A00 # MEIP and SEIP set (5.1)
+00000008 # GPIO interrupt pending
+00000003 # claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending
+00000000 # check no interrupts pending
+00000800 # MEIP set (5.2)
+00000008 # GPIO interrupt pending
+00000003 # claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending
+00000000 # check no interrupts pending
+00000A00 # MEIP and SEIP set (5.3)
+00000008 # GPIO interrupt pending
+00000003 # claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending
+00000000 # check no interrupts pending
+00000800 # MEIP set (5.4)
+00000008 # GPIO interrupt pending
+00000003 # claim gives 3 for ID of GPIO
+00000000 # check no interrupts pending
+00000000 # check no interrupts pending
+00000200 # SEIP set (5.5)
+00000008 # GPIO interrupt pending
+00000000 # nothing in claim register
+00000008 # GPIO interrupt pending
+00000000 # check no interrupts pending
+00000000 # read empty MIP (5.6)
+00000008 # GPIO interrupt pending
+00000000 # nothing in claim register
+00000008 # GPIO interrupt pending
+00000000 # check no interrupts pending
+0000000b # written due to goto_s_mode
+00000200 # read sip (7.0)
+00000408 # check GPIO and UART interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000400 # check GPIO interrupt pending cleared after claim
+00000000 # check no interrupts pending
+00000200 # read sip (7.1)
+00000408 # check GPIO and UART interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000400 # check GPIO interrupt pending cleared after claim
+00000000 # check no interrupts pending
+00000200 # read sip (7.2)
+00000408 # check GPIO and UART interrupt pending on intPending0
+0000000A # claim UART
+00000008 # GPIO interrupt pending after UART claimcomp
+00000000 # check no interrupts pending
+00000200 # read sip (7.3)
+00000408 # check GPIO and UART interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000400 # check GPIO interrupt pending cleared after claim
+00000000 # check no interrupts pending
+00000000 # read sip
+00000408 # check GPIO and UART interrupt pending on intPending0
+00000000 # nothing in claim register
+00000408 # check GPIO and UART interrupt pending on intPending0
+00000000 # check no interrupts pending
+00000200 # read sip
+00000408 # check GPIO and UART interrupt pending on intPending0
+00000003 # claim gives 3 for ID of GPIO
+00000400 # check GPIO interrupt pending cleared after claim
+00000000 # check no interrupts pending
+00000000 # read sip
+00000408 # check GPIO and UART interrupt pending on intPending0
+00000000 # nothing in claim register
+00000408 # check GPIO and UART interrupt pending on intPending0
+00000000 # check no interrupts pending
+00000009 # output from ecall in supervisor mode
+00000800 # MEIP set
+00000408 # check GPIO and UART interrupt pending on intPending0
+0000000A # claim UART
+00000008 # GPIO interrupt pending after UART claimcomp
+00000003 # claim gives 3 for ID of GPIO
+00000000 # read empty MIP                # no interrupts, meip is low
+00000000 # check no interrupts pending
+00000800 # MEIP set
+00000008 # GPIO interrupt pending after complete
+00000003 # claim gives 3 for ID of GPIO
+00000000 # read empty MIP                # meip is zeroed
+00000000 # check no interrupts pending
+00000800 # MEIP set
+00000400 # check GPIO interrupt pending cleared after claim
+00000800 # MEIP set
+00000408 # check GPIO and UART interrupt pending on intPending0
+0000000A # claim UART
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-s-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-s-01.reference_output
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output
new file mode 100644
index 000000000..457bc35c8
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output
@@ -0,0 +1,33 @@
+00000000 # Reset tests
+00000001 # 00000000 *** commented because LCR should reset to zero but resets to 3 due to the FPGA
+00000000
+00000060
+00000000
+00000060 # read-write test
+00000020 # transmitter register empty but shift register not
+00000101 # transmitter is not empty when done transmitting 5 bits
+00000000
+00000060
+00000101 # Multi-bit transmission: 5 bits
+00000015
+00000101 # Transmit 6 bits
+0000002A
+00000101 # Transmit 7 bits
+0000007F
+00000101 # Transmit 8 bits
+00000080
+00000002 # Transmission interrupt tests
+00000401 # Interrupt generated by finished transmission
+00000004
+00000006 # IIR return LSR intr and LSR has an overflow error
+00000063
+00000004
+00000001
+00000001 # MODEM interrupt tests
+00000000
+00000011
+00000001
+
+
+
+0000000b # ecall from test termination
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S
new file mode 100644
index 000000000..81a48a23f
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S
@@ -0,0 +1,951 @@
+///////////////////////////////////////////
+//
+// WALLY-plic
+//
+// Author: David_Harris@hmc.edu and Nicholas Lucio <nlucio@hmc.edu>
+//
+// Created 2022-06-16
+//
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+#include "WALLY-TEST-LIB-32.h" 
+
+INIT_TESTS
+
+TRAP_HANDLER m
+
+j run_test_loop // begin test loop/table tests instead of executing inline code.
+
+INIT_TEST_TABLE
+
+END_TESTS
+
+TEST_STACK_AND_DATA
+
+.align 2
+test_cases:
+# ---------------------------------------------------------------------------------------------
+# Test Contents
+#
+#   Here is where the actual tests are held, or rather, what the actual tests do.
+#   each entry consists of 3 values that will be read in as follows:
+#   
+#   '.4byte [x28 Value], [x29 Value], [x30 value]'
+#                     or
+#   '.4byte [address], [value], [test type]'
+#
+#   The encoding for x30 test type values can be found in the test handler in the framework file
+# 
+# ---------------------------------------------------------------------------------------------
+
+# =========== Define PLIC registers ===========
+
+.equ PLIC, 0x0C000000
+.equ PLIC_INTPRI_GPIO, (PLIC+0x00000C)       # GPIO is interrupt 3
+.equ PLIC_INTPRI_UART, (PLIC+0x000028)       # UART is interrupt 10
+.equ PLIC_INTPENDING0, (PLIC+0x001000)       # intPending0 register
+.equ PLIC_INTEN00,     (PLIC+0x002000)       # interrupt enables for context 0 (machine mode) sources 31:1
+.equ PLIC_INTEN10,     (PLIC+0x002080)       # interrupt enables for context 1 (supervisor mode) sources 31:1
+.equ PLIC_THRESH0,     (PLIC+0x200000)       # Priority threshold for context 0 (machine mode)
+.equ PLIC_CLAIM0,      (PLIC+0x200004)       # Claim/Complete register for context 0
+.equ PLIC_THRESH1,     (PLIC+0x201000)       # Priority threshold for context 1 (supervisor mode)
+.equ PLIC_CLAIM1,      (PLIC+0x201004)       # Claim/Complete register for context 1
+
+# =========== Define GPIO registers ===========
+
+.equ GPIO, 0x10060000
+.equ input_val, (GPIO+0x00)
+.equ input_en, (GPIO+0x04)
+.equ output_en, (GPIO+0x08)
+.equ output_val, (GPIO+0x0C)
+.equ rise_ie, (GPIO+0x18)
+.equ rise_ip, (GPIO+0x1C)
+.equ fall_ie, (GPIO+0x20)
+.equ fall_ip, (GPIO+0x24)
+.equ high_ie, (GPIO+0x28)
+.equ high_ip, (GPIO+0x2C)
+.equ low_ie, (GPIO+0x30)
+.equ low_ip, (GPIO+0x34)
+.equ iof_en, (GPIO+0x38)
+.equ iof_sel, (GPIO+0x3C)
+.equ out_xor, (GPIO+0x40)
+
+# =========== Define UART registers ===========
+
+.equ UART, 0x10000000
+.equ UART_IER, (UART+0x01)
+.equ UART_MCR, (UART+0x04)
+.equ UART_MSR, (UART+0x06)
+
+# =========== Initialize UART and GPIO ===========
+
+# GPIO Initialization
+.4byte input_en, 0x00000001, write32_test   # enable bit 0 of input_en
+.4byte output_en, 0x00000001, write32_test  # enable bit 0 of output_en
+.4byte output_val, 0x00000000, write32_test # make sure output_val is 0
+.4byte rise_ie, 0x00000001, write32_test    # enable rise interrupts
+
+# =========== Initialize relevant PLIC registers ===========
+
+.4byte PLIC_INTPRI_GPIO, 0x00000000, write32_test   # set GPIO priority to zero
+.4byte PLIC_INTPRI_UART, 0x00000000, write32_test   # set UART priority to zero
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable s-mode interrupts
+.4byte PLIC_THRESH0, 0x00000000, write32_test       # set m-mode threshold to 0
+.4byte PLIC_THRESH1, 0x00000007, write32_test       # set s-mode threshold to max
+
+# =========== Machine-Mode Priority Testing (1.T.X) ===========
+
+# Test 1.0.0: GPIO int lacks priority (0 = 0)
+.4byte PLIC_THRESH0, 0x00000000, write32_test       # change threshold
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000000, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.0.1: GPIO int has priority (1 > 0)
+.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test   # let GPIO cause interrupts
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.0.2: meip and c/c clear without interrupt pending
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # expect no interrupt pending
+.4byte PLIC_CLAIM0, 0x00000000, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.1.0: GPIO lacks priority (1 = 1)
+.4byte PLIC_THRESH0, 0x00000001, write32_test       # change threshold
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim from earlier
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.1.1: GPIO int has priority (2 > 1)
+.4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test   # let GPIO cause interrupts
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.2.0: GPIO int lacks priority (2 = 2)
+.4byte PLIC_THRESH0, 0x00000002, write32_test       # change threshold
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim from earlier
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.2.1: GPIO int has priority (3 > 2)
+.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test   # let GPIO cause interrupts
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.3.0: GPIO int lacks priority (3 = 3)
+.4byte PLIC_THRESH0, 0x00000003, write32_test       # change threshold
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim from earlier
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.3.1: GPIO int has priority (4 > 3)
+.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test   # let GPIO cause interrupts
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.4.0: GPIO int lacks priority (4 = 4)
+.4byte PLIC_THRESH0, 0x00000004, write32_test       # change threshold
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim from earlier
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.4.1: GPIO int has priority (5 > 4)
+.4byte PLIC_INTPRI_GPIO, 0x00000005, write32_test   # let GPIO cause interrupts
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.5.0: GPIO int lacks priority (5 = 5)
+.4byte PLIC_THRESH0, 0x00000005, write32_test       # change threshold
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim from earlier
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.5.1: GPIO int has priority (6 > 5)
+.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test   # let GPIO cause interrupts
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.6.0: GPIO int lacks priority (6 = 6)
+.4byte PLIC_THRESH0, 0x00000006, write32_test       # change threshold
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim from earlier
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.6.1: GPIO int has priority (7 > 6)
+.4byte PLIC_INTPRI_GPIO, 0x00000007, write32_test   # let GPIO cause interrupts
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# Test 1.7.0: GPIO int lacks priority (7 = 7)
+.4byte PLIC_THRESH0, 0x00000007, write32_test       # change threshold
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim from earlier
+.4byte 0x0, 0x00000000, claim_m_plic_interrupts     # clear interrupt one
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+
+# =========== UART vs GPIO priority (2.X) ===========
+
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable s-mode interrupts
+.4byte PLIC_THRESH0, 0x00000000, write32_test       # set m-mode threshold to 0
+.4byte PLIC_THRESH1, 0x00000007, write32_test       # set s-mode threshold to max
+# UART Initialization
+.4byte UART_IER, 0x08, write08_test   # enable modem status interrupts from CTS
+.4byte UART_MCR, 0x10, write08_test   # enable loopback mode, RTS = 0
+.4byte UART_MSR, 0x00, write08_test   # disable UART interrupt
+
+# Test 2.0: GPIO Priority = UART Priority
+
+.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test   # GPIOPriority = 1
+.4byte PLIC_INTPRI_UART, 0x00000001, write32_test   # UARTPriority = 1
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts            # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts            # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupts pending
+
+# Test 2.1: GPIO Priority > UART Priority
+
+.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test   # GPIOPriority = 3
+.4byte PLIC_INTPRI_UART, 0x00000002, write32_test   # UARTPriority = 2
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 2.2: GPIO Priority < UART Priority
+
+.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test   # GPIOPriority = 4
+.4byte PLIC_INTPRI_UART, 0x00000005, write32_test   # UARTPriority = 5
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x0000000A, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending cleared for UART
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x0000000A, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 2.3: GPIO Priority < UART Priority
+
+.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test   # GPIOPriority = 6
+.4byte PLIC_INTPRI_UART, 0x00000007, write32_test   # UARTPriority = 7
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x0000000A, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending cleared for UART
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x0000000A, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 2.4: Interrupts don't have enough priority
+
+.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test   # GPIOPriority = 4
+.4byte PLIC_INTPRI_UART, 0x00000005, write32_test   # UARTPriority = 5
+.4byte PLIC_THRESH0, 0x00000006, write32_test       # set m-mode threshold to 6
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x0000000A, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x0000000A, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# =========== SEIP tests (3.X) ===========
+
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable s-mode interrupts
+.4byte PLIC_THRESH0, 0x00000000, write32_test       # set m-mode threshold to 0
+.4byte PLIC_THRESH1, 0x00000000, write32_test       # set s-mode threshold to 0
+
+# Test 3.0: Cause machine and supervisor interrupts
+
+.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test   # GPIOPriority = 1
+.4byte PLIC_INTPRI_UART, 0x00000001, write32_test   # UARTPriority = 1
+.4byte PLIC_THRESH0, 0x00000000, write32_test       # set m-mode threshold to 0
+.4byte PLIC_THRESH1, 0x00000000, write32_test       # set s-mode threshold to 0
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000A00, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 3.1: Suppress machine mode interrupts
+
+.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test   # GPIOPriority = 3
+.4byte PLIC_INTPRI_UART, 0x00000002, write32_test   # UARTPriority = 2
+.4byte PLIC_THRESH0, 0x00000007, write32_test       # set m-mode threshold to 7
+.4byte PLIC_THRESH1, 0x00000000, write32_test       # set s-mode threshold to 0
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000200, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 3.2: Cause SEIP with UART first
+
+.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test   # GPIOPriority = 6
+.4byte PLIC_INTPRI_UART, 0x00000007, write32_test   # UARTPriority = 7
+.4byte PLIC_THRESH0, 0x00000007, write32_test       # set m-mode threshold to 7
+.4byte PLIC_THRESH1, 0x00000000, write32_test       # set s-mode threshold to 0
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000200, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x0000000A, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x0000000A, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 3.3: Low SEIP due to insufficient priority
+
+.4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test   # GPIOPriority = 2
+.4byte PLIC_INTPRI_UART, 0x00000003, write32_test   # UARTPriority = 3
+.4byte PLIC_THRESH0, 0x00000004, write32_test       # set m-mode threshold to 4
+.4byte PLIC_THRESH1, 0x00000005, write32_test       # set s-mode threshold to 5
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x0000000A, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x0000000A, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# =========== UART interrupt enable tests (4.X) ===========
+
+.4byte PLIC_THRESH0, 0x00000000, write32_test       # set m-mode threshold to 0
+.4byte PLIC_THRESH1, 0x00000000, write32_test       # set s-mode threshold to 0
+.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test   # GPIO Priority = 1
+.4byte PLIC_INTPRI_UART, 0x00000001, write32_test   # UART Priority = 1
+
+# Test 4.0: GPIO m-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000400, write32_test       # disable GPIO m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable all s-mode interrupts
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000A00, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x0000000A, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x0000000A, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 4.1: UART m-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000008, write32_test       # disable UART m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable all s-mode interrupts
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000200, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000000, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x00000000, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 4.2: GPIO s-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable all m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000400, write32_test       # enable all s-mode interrupts
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000A00, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x0000000A, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x0000000A, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 4.3: UART s-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable all m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000008, write32_test       # enable all s-mode interrupts
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x0000000A, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x0000000A, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 4.4: GPIO and UART s-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable all m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000000, write32_test       # enable all s-mode interrupts
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x0000000A, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x0000000A, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 4.5: GPIO and UART m-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000000, write32_test       # disable GPIO interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable all s-mode interrupts
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000200, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000000, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x00000000, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 4.6: GPIO and UART fully disabled
+
+.4byte PLIC_INTEN00, 0x00000000, write32_test       # disable GPIO interrupts
+.4byte PLIC_INTEN10, 0x00000000, write32_test       # enable all s-mode interrupts
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000200, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000000, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM0, 0x00000000, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# =========== GPIO interrupt enable tests (5.X) ===========
+
+.4byte PLIC_THRESH0, 0x00000000, write32_test       # set m-mode threshold to 0
+.4byte PLIC_THRESH1, 0x00000000, write32_test       # set s-mode threshold to 0
+.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test   # GPIO Priority = 1
+.4byte PLIC_INTPRI_UART, 0x00000001, write32_test   # UART Priority = 1
+
+# Test 5.0: GPIO m-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000400, write32_test       # disable GPIO m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte 0x0, 0x00000200, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000000, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte PLIC_CLAIM0, 0x00000000, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 5.1: UART m-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000008, write32_test       # disable UART m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte 0x0, 0x00000A00, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 5.2: GPIO s-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable all m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000400, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 5.3: UART s-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable all m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000008, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte 0x0, 0x00000A00, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 5.4: GPIO and UART s-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable all m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000000, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 5.5: GPIO and UART m-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000000, write32_test       # disable GPIO interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte 0x0, 0x00000200, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000000, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending for GPIO and UART
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte PLIC_CLAIM0, 0x00000000, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# Test 5.6: GPIO and UART fully disabled
+
+.4byte PLIC_INTEN00, 0x00000000, write32_test       # disable GPIO interrupts
+.4byte PLIC_INTEN10, 0x00000000, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM0, 0x00000000, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending for GPIO and UART
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte PLIC_CLAIM0, 0x00000000, write32_test        # complete claim made earlier
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_m_plic_interrupts       # clear interrupt two
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt one
+.4byte 0x0, 0x0, claim_s_plic_interrupts       # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test   # no interrupts pending
+
+# =========== S-mode enable tests (7.X) ===========
+
+.4byte 0x0, 0x0, goto_s_mode                        # go to s-mode. 0xb written to output
+.4byte PLIC_THRESH0, 0x00000000, write32_test       # set m-mode threshold to 0
+.4byte PLIC_THRESH1, 0x00000000, write32_test       # set s-mode threshold to 0
+.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test   # GPIO Priority = 1
+.4byte PLIC_INTPRI_UART, 0x00000001, write32_test   # UART Priority = 1
+
+# Test 7.0: GPIO m-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000400, write32_test       # disable GPIO m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000200, readsip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM1, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM1, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt one
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupts pending
+
+# Test 7.1: UART m-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000008, write32_test       # disable UART m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000200, readsip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM1, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM1, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt one
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupts pending
+
+# Test 7.2: GPIO s-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable all m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000400, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000200, readsip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM1, 0x0000000A, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM1, 0x0000000A, write32_test        # complete claim made earlier
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt one
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupts pending
+
+# Test 7.3: UART s-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable all m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000008, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000200, readsip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM1, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM1, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt one
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupts pending
+
+# Test 7.4: GPIO and UART s-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable all m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000000, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000000, readsip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM1, 0x00000000, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM1, 0x00000000, write32_test        # complete claim made earlier
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt one
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupts pending
+
+# Test 7.5: GPIO and UART m-mode disabled
+
+.4byte PLIC_INTEN00, 0x00000000, write32_test       # disable GPIO interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000200, readsip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM1, 0x00000003, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM1, 0x00000003, write32_test        # complete claim made earlier
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt one
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupts pending
+
+# Test 7.6: GPIO and UART fully disabled
+
+.4byte PLIC_INTEN00, 0x00000000, write32_test       # disable GPIO interrupts
+.4byte PLIC_INTEN10, 0x00000000, write32_test       # enable all s-mode interrupts
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000000, readsip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+.4byte PLIC_CLAIM1, 0x00000000, read32_test         # read claim register
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending cleared for GPIO
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000001, write32_test            # clear GPIO interrupt
+.4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+.4byte PLIC_CLAIM1, 0x00000000, write32_test        # complete claim made earlier
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt one
+.4byte 0x0, 0x00000000, claim_s_plic_interrupts     # clear interrupt two
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupts pending
+
+# =========== Special claim tests (8) ===========
+
+.4byte 0x0, 0x0, goto_m_mode                        # write 0x9 to output
+
+.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test   # GPIO Priority = 6
+.4byte PLIC_INTPRI_UART, 0x00000007, write32_test   # UART Priority = 7
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable all m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000000, write32_test       # enable all s-mode interrupts
+.4byte PLIC_THRESH0, 0x00000005, write32_test       # set m-mode threshold to 5
+
+# Test 8
+
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte UART_MSR, 0x0F, write08_test                 # cause UART interrupt
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # read interrupt pending
+.4byte PLIC_CLAIM0, 0x0000000A, read32_test         # claim UART
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # UART interrupt cleared
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # claim GPIO
+.4byte 0x0, 0x00000000, readmip_test                # no interrupts, meip is low
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # both interrupts claimed
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete GPIO
+.4byte 0x0, 0x00000800, readmip_test                # GPIO interrupt sets MEIP
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # GPIO bit is set
+.4byte PLIC_CLAIM0, 0x00000003, read32_test         # claim GPIO again
+.4byte 0x0, 0x00000000, readmip_test                # meip is zeroed
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # both interrupts claimed
+.4byte PLIC_CLAIM0, 0x0000000A, write32_test        # complete UART claim
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000400, read32_test    # UART pending
+.4byte PLIC_CLAIM0, 0x00000003, write32_test        # complete GPIO claim
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000408, read32_test    # GPIO and UART pending
+.4byte PLIC_CLAIM0, 0x0000000A, read32_test         # claim UART
+
+.4byte 0x0, 0x0, terminate_test # terminate tests
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S
new file mode 100644
index 000000000..45b87c4b1
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S
@@ -0,0 +1,493 @@
+///////////////////////////////////////////
+//
+// WALLY-gpio
+//
+// Author: David_Harris@hmc.edu and Nicholas Lucio <nlucio@hmc.edu>
+//
+// Created 2022-06-16
+//
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+#include "WALLY-TEST-LIB-32.h" 
+
+INIT_TESTS
+
+TRAP_HANDLER m
+
+j run_test_loop // begin test loop/table tests instead of executing inline code.
+
+INIT_TEST_TABLE
+
+END_TESTS
+
+TEST_STACK_AND_DATA
+
+.align 2
+test_cases:
+# ---------------------------------------------------------------------------------------------
+# Test Contents
+#
+#   Here is where the actual tests are held, or rather, what the actual tests do.
+#   each entry consists of 3 values that will be read in as follows:
+#   
+#   '.4byte [x28 Value], [x29 Value], [x30 value]'
+#                     or
+#   '.4byte [address], [value], [test type]'
+#
+#   The encoding for x30 test type values can be found in the test handler in the framework file
+# 
+# ---------------------------------------------------------------------------------------------
+
+# =========== Define PLIC registers ===========
+
+.equ PLIC, 0x0C000000
+.equ PLIC_INTPRI_GPIO, (PLIC+0x00000C)       # GPIO is interrupt 3
+.equ PLIC_INTPRI_UART, (PLIC+0x000028)      # UART is interrupt 10
+.equ PLIC_INTPENDING0, (PLIC+0x001000)       # intPending0 register
+.equ PLIC_INTEN00,     (PLIC+0x002000)       # interrupt enables for context 0 (machine mode) sources 31:1
+.equ PLIC_INTEN10,     (PLIC+0x002080)       # interrupt enables for context 1 (supervisor mode) sources 31:1
+.equ PLIC_THRESH0,     (PLIC+0x200000)       # Priority threshold for context 0 (machine mode)
+.equ PLIC_CLAIM0,      (PLIC+0x200004)       # Claim/Complete register for context 0
+.equ PLIC_THRESH1,     (PLIC+0x201000)       # Priority threshold for context 1 (supervisor mode)
+.equ PLIC_CLAIM1,      (PLIC+0x201004)       # Claim/Complete register for context 1
+
+# =========== Define GPIO registers ===========
+
+.equ GPIO, 0x10060000
+.equ input_val, (GPIO+0x00)
+.equ input_en, (GPIO+0x04)
+.equ output_en, (GPIO+0x08)
+.equ output_val, (GPIO+0x0C)
+.equ rise_ie, (GPIO+0x18)
+.equ rise_ip, (GPIO+0x1C)
+.equ fall_ie, (GPIO+0x20)
+.equ fall_ip, (GPIO+0x24)
+.equ high_ie, (GPIO+0x28)
+.equ high_ip, (GPIO+0x2C)
+.equ low_ie, (GPIO+0x30)
+.equ low_ip, (GPIO+0x34)
+.equ iof_en, (GPIO+0x38)
+.equ iof_sel, (GPIO+0x3C)
+.equ out_xor, (GPIO+0x40)
+
+# =========== Define UART registers ===========
+
+.equ UART, 0x10000000
+.equ UART_IER, (UART+0x01)
+.equ UART_MCR, (UART+0x04)
+.equ UART_MSR, (UART+0x06)
+
+# =========== Initialize UART and GPIO ===========
+
+# GPIO Initialization
+.4byte input_en, 0x00000001, write32_test   # enable bit 0 of input_en
+.4byte output_en, 0x00000001, write32_test  # enable bit 0 of output_en
+.4byte output_val, 0x00000000, write32_test # make sure output_val is 0
+.4byte rise_ie, 0x00000001, write32_test    # enable rise interrupts
+
+# UART Initialization
+.4byte UART_IER, 0x08, write08_test   # enable modem status interrupts from CTS
+.4byte UART_MCR, 0x10, write08_test   # enable loopback mode, RTS = 0
+.4byte UART_MSR, 0x00, write08_test   # disable UART interrupt
+
+# =========== Initialize relevant PLIC registers ===========
+
+.4byte PLIC_INTPRI_GPIO, 0x00000000, write32_test   # set GPIO priority to zero
+.4byte PLIC_INTPRI_UART, 0x00000000, write32_test   # set UART priority to zero
+.4byte PLIC_INTEN00, 0x00000408, write32_test       # enable m-mode interrupts
+.4byte PLIC_INTEN10, 0x00000408, write32_test       # enable s-mode interrupts
+.4byte PLIC_THRESH0, 0x00000000, write32_test       # set m-mode threshold to 0
+.4byte PLIC_THRESH1, 0x00000007, write32_test       # set s-mode threshold to max
+
+# =========== Machine-Mode Priority Testing (1.T.X) ===========
+
+# Test 1.0.0: GPIO int lacks priority (0 = 0)
+.4byte PLIC_THRESH0, 0x00000000, write32_test       # change threshold
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect no interrupt pending *** pending bug?????
+.4byte 0x0, 0x00000000, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.0.1: GPIO int has priority (1 > 0)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test   # let GPIO cause interrupts
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte 0x0, 0x00000003, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.0.2: meip and c/c clear without interrupt pending
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte 0x0, 0x00000000, readmclaimcomplete_test     # read and clear claimcomplete
+
+# Test 1.1.0: GPIO lacks priority (1 = 1)
+.4byte PLIC_THRESH0, 0x00000001, write32_test       # change threshold
+.4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # expect no interrupt pending
+.4byte 0x0, 0x00000000, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.1.1: GPIO int has priority (2 > 1)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test   # let GPIO cause interrupts
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte 0x0, 0x00000003, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.2.0: GPIO int lacks priority (2 = 2)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_THRESH0, 0x00000002, write32_test       # change threshold
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # expect no interrupt pending
+.4byte 0x0, 0x00000000, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.2.1: GPIO int has priority (3 > 2)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test   # let GPIO cause interrupts
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte 0x0, 0x00000003, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.3.0: GPIO int lacks priority (3 = 3)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_THRESH0, 0x00000003, write32_test       # change threshold
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # expect no interrupt pending
+.4byte 0x0, 0x00000000, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.3.1: GPIO int has priority (4 > 3)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test   # let GPIO cause interrupts
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte 0x0, 0x00000003, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.4.0: GPIO int lacks priority (4 = 4)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_THRESH0, 0x00000004, write32_test       # change threshold
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # expect no interrupt pending
+.4byte 0x0, 0x00000000, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.4.1: GPIO int has priority (5 > 4)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_INTPRI_GPIO, 0x00000005, write32_test   # let GPIO cause interrupts
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte 0x0, 0x00000003, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.5.0: GPIO int lacks priority (5 = 5)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_THRESH0, 0x00000005, write32_test       # change threshold
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # expect no interrupt pending
+.4byte 0x0, 0x00000000, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.5.1: GPIO int has priority (6 > 5)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test   # let GPIO cause interrupts
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte 0x0, 0x00000003, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.6.0: GPIO int lacks priority (6 = 6)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_THRESH0, 0x00000006, write32_test       # change threshold
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # expect no interrupt pending
+.4byte 0x0, 0x00000000, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.6.1: GPIO int has priority (7 > 6)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_INTPRI_GPIO, 0x00000007, write32_test   # let GPIO cause interrupts
+.4byte 0x0, 0x00000800, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000008, read32_test    # expect interrupt pending on bit 3
+.4byte 0x0, 0x00000003, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# Test 1.7.0: GPIO int lacks priority (7 = 7)
+.4byte output_val, 0x00000001, write32_test         # set GPIO rise_ip high
+.4byte PLIC_THRESH0, 0x00000007, write32_test       # change threshold
+.4byte 0x0, 0x00000000, readmip_test                # read mip
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # expect no interrupt pending
+.4byte 0x0, 0x00000000, readmclaimcomplete_test     # read and clear claimcomplete
+.4byte PLIC_INTPENDING0, 0x00000000, read32_test    # interrupt pending was cleared
+.4byte output_val, 0x00000000, write32_test         # clear output_val
+.4byte rise_ip, 0x00000000, write32_test            # clear interrupt
+
+# # =========== UART vs GPIO priority (2.X) =========== *** 
+
+# .4byte PLIC_INTEN00, 0x00000408, write32_test       # enable m-mode interrupts
+# .4byte PLIC_INTEN10, 0x00000408, write32_test       # enable s-mode interrupts
+# .4byte PLIC_THRESH0, 0x00000000, write32_test       # set m-mode threshold to 0
+# .4byte PLIC_THRESH1, 0x00000007, write32_test       # set s-mode threshold to max
+
+# # Test 2.0: GPIO Priority = UART Priority
+
+# .4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+# .4byte UART_MSR, 0x00000010, write08_test           # step 1 of UART interrupt
+# .4byte UART_MSR, 0x00000012, write08_test           # step 2 of UART interrupt
+# .4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test   # GPIOPriority = 1
+# .4byte PLIC_INTPRI_UART, 0x00000001, write32_test   # UARTPriority = 1
+# .4byte 0x0, 0x00000800, readmip_test                # read mip
+# .4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+# .4byte 0x0, 0x00000003, readmclaimcomplete_test     # GPIO claim/complete process
+# .4byte PLIC_INTPENDING0, 0x00000200, read32_test    # GPIO interrupt was cleared
+# .4byte 0x0, 0x00000800, readmip_test                # expect mip to remain high
+# .4byte 0x0, 0x0000000A, readmclaimcomplete_test     # UART claim/complete process
+# .4byte PLIC_INTPENDING0, 0x00000000, read32_test    # UART interrupt pending was cleared
+# .4byte output_val, 0x00000000, write32_test         # clear output_val
+# .4byte rise_ip, 0x00000000, write32_test            # clear GPIO interrupt
+# .4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+
+# # Test 2.1: GPIO Priority < UART Priority
+
+# .4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+# .4byte UART_MSR, 0x00000010, write08_test           # step 1 of UART interrupt
+# .4byte UART_MSR, 0x00000012, write08_test           # step 2 of UART interrupt
+# .4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test   # GPIO Priority = 2
+# .4byte PLIC_INTPRI_UART, 0x00000003, write32_test   # UART Priority = 3
+# .4byte 0x0, 0x00000800, readmip_test                # read mip
+# .4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for UART and GPIO
+# .4byte 0x0, 0x0000000A, readmclaimcomplete_test     # UART claim/complete process
+# .4byte PLIC_INTPENDING0, 0x00000008, read32_test    # UART interrupt was cleared
+# .4byte 0x0, 0x00000800, readmip_test                # expect mip to remain high
+# .4byte 0x0, 0x00000003, readmclaimcomplete_test     # GPIO claim/complete process
+# .4byte PLIC_INTPENDING0, 0x00000000, read32_test    # GPIO interrupt pending was cleared
+# .4byte output_val, 0x00000000, write32_test         # clear output_val
+# .4byte rise_ip, 0x00000000, write32_test            # clear GPIO interrupt
+# .4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+
+# # Test 2.2: GPIO Priority > UART Priority
+
+# .4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+# .4byte UART_MSR, 0x00000010, write08_test           # step 1 of UART interrupt
+# .4byte UART_MSR, 0x00000012, write08_test           # step 2 of UART interrupt
+# .4byte PLIC_INTPRI_GPIO, 0x00000005, write32_test   # GPIO Priority = 5
+# .4byte PLIC_INTPRI_UART, 0x00000004, write32_test   # UART Priority = 4
+# .4byte 0x0, 0x00000800, readmip_test                # read mip
+# .4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+# .4byte 0x0, 0x00000003, readmclaimcomplete_test     # GPIO claim/complete process
+# .4byte PLIC_INTPENDING0, 0x00000200, read32_test    # GPIO interrupt was cleared
+# .4byte 0x0, 0x00000800, readmip_test                # expect mip to remain high
+# .4byte 0x0, 0x0000000A, readmclaimcomplete_test     # UART claim/complete process
+# .4byte PLIC_INTPENDING0, 0x00000000, read32_test    # UART interrupt pending was cleared
+# .4byte output_val, 0x00000000, write32_test         # clear output_val
+# .4byte rise_ip, 0x00000000, write32_test            # clear GPIO interrupt
+# .4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+
+# # Test 2.3: GPIO Priority < UART Priority (2)
+
+# .4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+# .4byte UART_MSR, 0x00000010, write08_test           # step 1 of UART interrupt
+# .4byte UART_MSR, 0x00000012, write08_test           # step 2 of UART interrupt
+# .4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test   # GPIO Priority = 6
+# .4byte PLIC_INTPRI_UART, 0x00000007, write32_test   # UART Priority = 7
+# .4byte 0x0, 0x00000800, readmip_test                # read mip
+# .4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for UART and GPIO
+# .4byte 0x0, 0x0000000A, readmclaimcomplete_test     # UART claim/complete process
+# .4byte PLIC_INTPENDING0, 0x00000008, read32_test    # UART interrupt was cleared
+# .4byte 0x0, 0x00000800, readmip_test                # expect mip to remain high
+# .4byte 0x0, 0x00000003, readmclaimcomplete_test     # GPIO claim/complete process
+# .4byte PLIC_INTPENDING0, 0x00000000, read32_test    # GPIO interrupt pending was cleared
+# .4byte output_val, 0x00000000, write32_test         # clear output_val
+# .4byte rise_ip, 0x00000000, write32_test            # clear GPIO interrupt
+# .4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+
+# # Test 2.4: Interrupts disabled (Thresh0 = 7)
+
+# .4byte output_val, 0x00000001, write32_test         # cause rise_ip to go high
+# .4byte UART_MSR, 0x00000010, write08_test           # step 1 of UART interrupt
+# .4byte UART_MSR, 0x00000012, write08_test           # step 2 of UART interrupt
+# .4byte PLIC_THRESH0, 0x00000007, write32_test       # Disable m-mode interrupts
+# .4byte PLIC_INTPRI_GPIO, 0x00000007, write32_test   # GPIO Priority = 7
+# .4byte PLIC_INTPRI_UART, 0x00000007, write32_test   # UART Priority = 7
+# .4byte 0x0, 0x00000000, readmip_test,               # read mip
+# .4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+# .4byte 0x0, 0x00000000, readmclaimcomplete_test     # no interrupt pending
+# .4byte output_val, 0x00000000, write32_test         # clear output_val
+# .4byte rise_ip, 0x00000000, write32_test            # clear GPIO interrupt
+# .4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+
+# # =========== SEIP tests (3.X) ===========
+
+# .4byte PLIC_INTEN00, 0x00000408, write32_test       # enable m-mode interrupts
+# .4byte PLIC_INTEN10, 0x00000408, write32_test       # enable s-mode interrupts
+# .4byte PLIC_THRESH0, 0x00000000, write32_test       # set m-mode threshold to 0
+# .4byte PLIC_THRESH1, 0x00000000, write32_test       # set s-mode threshold to 0
+
+# # Test 3.0: Cause machine and supervisor interrupts
+
+# .4byte output_val, 0x00000000, write32_test         # cause rise_ip to go high
+# .4byte UART_MSR, 0x00000010, write08_test           # step 1 of UART interrupt
+# .4byte UART_MSR, 0x00000012, write08_test           # step 2 of UART interrupt
+# .4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test   # GPIO Priority = 1
+# .4byte PLIC_INTPRI_UART, 0x00000001, write32_test   # UART Priority = 1
+# .4byte 0x0, 0x00000A00, readmip_test                # Expect high on MEIP and SEIP
+# .4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+# .4byte 0x0, 0x00000003, readmclaimcomplete_test     # Expect GPIO on claim/complete
+# .4byte 0x0, 0x00000A00, readmip_test                # Still expect high on MEIP and SEIP
+# .4byte PLIC_INTPENDING0, 0x00000200, read32_test    # GPIO interrupt was cleared
+# .4byte 0x0, 0x0000000A, readmclaimcomplete_test     # Expect UART on claim/complete
+# .4byte 0x0, 0x00000000, readmip_test                # all interrupts were cleared
+# .4byte PLIC_INTPENDING0, 0x00000000, read32_test    # Pending should also be clear
+# .4byte output_val, 0x00000000, write32_test         # clear output_val
+# .4byte rise_ip, 0x00000000, write32_test            # clear GPIO interrupt
+# .4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+
+# # Test 3.1: Suppress machine mode interrupts
+
+# .4byte output_val, 0x00000000, write32_test         # cause rise_ip to go high
+# .4byte UART_MSR, 0x00000010, write08_test           # step 1 of UART interrupt
+# .4byte UART_MSR, 0x00000012, write08_test           # step 2 of UART interrupt
+# .4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test   # GPIO Priority = 3
+# .4byte PLIC_INTPRI_UART, 0x00000002, write32_test   # UART Priority = 2
+# .4byte PLIC_THRESH0, 0x00000007, write32_test       # set m-mode threshold to 7
+# .4byte 0x0, 0x00000200, readmip_test                # Expect high on SEIP only
+# .4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+# .4byte 0x0, 0x00000003, readmclaimcomplete_test     # Expect GPIO on claim/complete
+# .4byte 0x0, 0x00000200, readmip_test                # Expect high on SEIP only
+# .4byte PLIC_INTPENDING0, 0x00000200, read32_test    # GPIO interrupt was cleared
+# .4byte 0x0, 0x0000000A, readmclaimcomplete_test     # Expect UART on claim/complete
+# .4byte 0x0, 0x00000000, readmip_test                # all interrupts were cleared
+# .4byte PLIC_INTPENDING0, 0x00000000, read32_test    # Pending should also be clear
+# .4byte output_val, 0x00000000, write32_test         # clear output_val
+# .4byte rise_ip, 0x00000000, write32_test            # clear GPIO interrupt
+# .4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+
+# # Test 3.2: Cause SEIP with UART first
+
+# .4byte output_val, 0x00000000, write32_test         # cause rise_ip to go high
+# .4byte UART_MSR, 0x00000010, write08_test           # step 1 of UART interrupt
+# .4byte UART_MSR, 0x00000012, write08_test           # step 2 of UART interrupt
+# .4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test   # GPIO Priority = 6
+# .4byte PLIC_INTPRI_UART, 0x00000007, write32_test   # UART Priority = 7
+# .4byte 0x0, 0x00000200, readmip_test                # Expect high on SEIP only
+# .4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+# .4byte 0x0, 0x0000000A, readmclaimcomplete_test     # Expect UART on claim/complete
+# .4byte 0x0, 0x00000200, readmip_test                # Expect high on SEIP only
+# .4byte PLIC_INTPENDING0, 0x00000008, read32_test    # UART interrupt was cleared
+# .4byte 0x0, 0x00000003, readmclaimcomplete_test     # Expect GPIO on claim/complete
+# .4byte 0x0, 0x00000000, readmip_test                # all interrupts were cleared
+# .4byte PLIC_INTPENDING0, 0x00000000, read32_test    # Pending should also be clear
+# .4byte output_val, 0x00000000, write32_test         # clear output_val
+# .4byte rise_ip, 0x00000000, write32_test            # clear GPIO interrupt
+# .4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+
+# # Test 3.3: Low SEIP due to insufficient priority
+
+# .4byte output_val, 0x00000000, write32_test         # cause rise_ip to go high
+# .4byte UART_MSR, 0x00000010, write08_test           # step 1 of UART interrupt
+# .4byte UART_MSR, 0x00000012, write08_test           # step 2 of UART interrupt
+# .4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test   # GPIO Priority = 2
+# .4byte PLIC_INTPRI_UART, 0x00000003, write32_test   # UART Priority = 3
+# .4byte PLIC_THRESH0, 0x00000004, write32_test       # set m-mode threshold to 7
+# .4byte PLIC_THRESH1, 0x00000005, write32_test       # set s-mode threshold to 7
+# .4byte 0x0, 0x00000000, readmip_test                # no interrupt pending
+# .4byte PLIC_INTPENDING0, 0x00000000, read32_test    # no interrupt pending
+# .4byte 0x0, 0x00000000, readmclaimcomplete_test     # Expect nothing on claim/complete
+# .4byte output_val, 0x00000000, write32_test         # clear output_val
+# .4byte rise_ip, 0x00000000, write32_test            # clear GPIO interrupt
+# .4byte UART_MSR, 0x00000000, write08_test           # clear UART interrupt
+
+# # =========== UART interrupt enable tests (4.X) ===========
+
+# .4byte PLIC_THRESH0, 0x00000000, write32_test       # set m-mode threshold to 0
+# .4byte PLIC_THRESH1, 0x00000000, write32_test       # set s-mode threshold to 0
+# .4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test   # GPIO Priority = 1
+# .4byte PLIC_INTPRI_UART, 0x00000001, write32_test   # UART Priority = 1
+
+# # Test 4.0: GPIO m-mode disabled
+
+# .4byte output_val, 0x00000000, write32_test         # cause rise_ip to go high
+# .4byte UART_MSR, 0x00000010, write08_test           # step 1 of UART interrupt
+# .4byte UART_MSR, 0x00000012, write08_test           # step 2 of UART interrupt
+# .4byte PLIC_INTEN00, 0x000000200, write32_test      # GPIO m-mode interrupt disabled
+# .4byte PLIC_INTEN00, 0x000000208, write32_test      # No s-mode interrupt disabled
+# .4byte 0x0, 0x00000A00, readmip_test                # Expect high on MEIP from UART and SEIP from GPIO
+# .4byte PLIC_INTPENDING0, 0x00000408, read32_test    # interrupt pending for GPIO and UART
+# .4byte 0x0, 0x00000003, readmclaimcomplete_test     # Expect GPIO on claim/complete
+# .4byte 0x0, 0x00000200, readmip_test                # Expect high on MEIP and SEIP from UART
+# .4byte PLIC_INTPENDING0, 0x00000200, read32_test    # interrupt pending for GPIO and UART
+# .4byte 0x0, 0x0000000A, readmclaimcomplete_test     # Expect UART on claim/complete
+# .4byte 0x0, 0x00000000, readmip_test                # all interrupts were cleared
+# .4byte PLIC_INTPENDING0, 0x00000000, read32_test    # Pending should also be clear
+
+# Test 4.1: UART m-mode disabled
+
+
+
+# Test 4.2: GPIO s-mode disabled
+
+
+
+# Test 4.3: UART s-mode disabled
+
+
+
+# Test 4.4: GPIO and UART s-mode disabled
+
+
+
+# Test 4.5: GPIO and UART m-mode disabled
+
+
+
+# Test 4.6: GPIO and UART fully disabled
+
+.4byte 0x0, 0x0, terminate_test # terminate tests
diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S
new file mode 100644
index 000000000..08b1dc25e
--- /dev/null
+++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S
@@ -0,0 +1,140 @@
+///////////////////////////////////////////
+//
+// WALLY-uart
+//
+// Author: David_Harris@hmc.edu and Nicholas Lucio <nlucio@hmc.edu>
+//
+// Created 2022-06-16
+//
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+#include "WALLY-TEST-LIB-32.h" 
+
+INIT_TESTS
+
+TRAP_HANDLER m
+
+j run_test_loop // begin test loop/table tests instead of executing inline code.
+
+INIT_TEST_TABLE
+
+END_TESTS
+
+TEST_STACK_AND_DATA
+
+.align 2
+
+.equ UART, 0x10000000
+.equ UART_RBR, (UART)
+.equ UART_THR, (UART)
+.equ UART_IER, (UART+0x01)
+.equ UART_IIR, (UART+0x02)
+.equ UART_FCR, (UART+0x02)
+.equ UART_LCR, (UART+0x03)
+.equ UART_MCR, (UART+0x04)
+.equ UART_LSR, (UART+0x05)
+.equ UART_MSR, (UART+0x06)
+.equ UART_Scr, (UART+0x07)
+
+test_cases:
+# ---------------------------------------------------------------------------------------------
+# Test Contents
+#
+#   Here is where the actual tests are held, or rather, what the actual tests do.
+#   each entry consists of 3 values that will be read in as follows:
+#   
+#   '.4byte [x28 Value], [x29 Value], [x30 value]'
+#                     or
+#   '.4byte [address], [value], [test type]'
+#
+#   The encoding for x30 test type values can be found in the test handler in the framework file
+# 
+# ---------------------------------------------------------------------------------------------
+
+# =========== UART resets to correct values on master reset ===========
+
+.4byte UART_IER, 0x00, read08_test
+.4byte UART_IIR, 0x01, read08_test # IIR resets to 1
+# .4byte UART_LCR, 0x00, read08_test *** commented out because LCR should reset to zero but resets to 3
+.4byte UART_MCR, 0x00, read08_test
+.4byte UART_LSR, 0x60, read08_test # LSR resets with transmit status bits set
+.4byte UART_MSR, 0x00, read04_test
+
+# =========== Basic read-write ===========
+
+.4byte UART_LCR, 0x00, write08_test # set LCR to reset value *** remove if UART resets to correct value
+.4byte UART_MCR, 0x10, write08_test # put UART into loopback for MSR test
+.4byte UART_LSR, 0x60, read08_test
+.4byte UART_THR, 0x00, write08_test # write value to UART
+.4byte UART_LSR, 0x00, read08_test  # data not ready and transmitter is not empty
+.4byte 0x0, 0x0101, uart_data_wait  # wait for data to become ready then output IIR and LSR
+.4byte UART_RBR, 0x00, read08_test  # read written value
+.4byte UART_LSR, 0x60, read08_test  # read LSR
+
+# =========== Different size read-write ===========
+
+# Transmit 5 bits
+
+.4byte UART_LCR, 0x00, write08_test # set LCR to transmit 5 bits
+.4byte UART_THR, 0x55, write08_test # write value to UART
+.4byte 0x0, 0x0101, uart_data_wait  # wait for data to become ready then output IIR and then LSR
+.4byte UART_RBR, 0x15, read08_test  # read written value without bits 5-7
+
+# Transmit 6 bits
+
+.4byte UART_LCR, 0x01, write08_test # set LCR to transmit six bits
+.4byte UART_THR, 0xAA, write08_test # write value to UART
+.4byte 0x0, 0x0101, uart_data_wait  # wait for data to become ready then output IIR and then LSR
+.4byte UART_RBR, 0x2A, read08_test  # read written value without bits 6 & 7
+
+# Transmit 7 bits
+
+.4byte UART_LCR, 0x02, write08_test # set LCR to transmit seven bits
+.4byte UART_THR, 0xFF, write08_test # write value to UART
+.4byte 0x0, 0x0101, uart_data_wait  # wait for data to become ready then output IIR and then LSR
+.4byte UART_RBR, 0x7F, read08_test  # read written value without bit 7
+
+# Transmit 8 bits
+
+.4byte UART_LCR, 0x03, write08_test # set LCR to transmit seven bits
+.4byte UART_THR, 0x80, write08_test # write value to UART
+.4byte 0x0, 0x0101, uart_data_wait  # wait for data to become ready then output IIR and then LSR
+.4byte UART_RBR, 0x80, read08_test  # read full written value + sign extension
+
+# =========== Transmit-related interrupts ===========
+
+.4byte UART_IER, 0x07, write08_test # enable data available, buffer empty, and line status interrupts
+.4byte UART_IIR, 0x02, read08_test  # buffer should be empty, causing interrupt
+.4byte UART_THR, 0x00, write08_test # write zeroes to transmitter
+.4byte 0x0, 0x0401, uart_data_wait  # IIR should have data ready interrupt
+.4byte UART_THR, 0x01, write08_test # write 1 to transmitter buffer
+.4byte UART_IIR, 0x04, read08_test  # data interrupt should still be high
+.4byte 0x0, 0x06, uart_lsr_intr_wait     # wait for transmission to complete, IIR should throw error due to overrun error.
+.4byte UART_LSR, 0x63, read08_test  # read overrun error from LSR
+.4byte UART_IIR, 0x04, read08_test  # check that LSR interrupt was cleared
+.4byte UART_RBR, 0x01, read08_test  # read previous value from UART
+
+# =========== MODEM interrupts ===========
+
+.4byte UART_MSR, 0x00, write08_test # clear MSR
+.4byte UART_IER, 0x08, write08_test # enable MODEM Status interrupts
+.4byte UART_IIR, 0x01, read08_test  # no interrupts pending
+.4byte UART_MCR, 0x02, write08_test # Cause DCTS interrupt
+.4byte UART_IIR, 0x00, read08_test  # MODEM interrupt
+.4byte UART_MSR, 0x11, read08_test  # Read MSR to clear interrupt
+.4byte UART_IIR, 0x01, read08_test  # interrupt cleared by reading MSR
+
+.4byte 0x0, 0x0, terminate_test
\ No newline at end of file

From e2691c02b75b37815c62a4cc971206302e1290d8 Mon Sep 17 00:00:00 2001
From: cturek <cturek@hmc.edu>
Date: Fri, 22 Jul 2022 16:45:19 +0000
Subject: [PATCH 36/36] Square root negative exponent handling

---
 pipelined/srt/sqrttestgen   | Bin 22792 -> 22792 bytes
 pipelined/srt/sqrttestgen.c |  33 ++++++++++++++++++++++++---------
 pipelined/srt/srt-waves.do  |   1 +
 pipelined/srt/srt.sv        |  22 ++++++++++++----------
 pipelined/srt/testbench.sv  |   8 ++++----
 5 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen
index 45fc6e7868a87d7e5f3b9308e340b3600a392273..7c6efb9f9085d76783d4192e98ba9cd8c44b8d24 100755
GIT binary patch
delta 1737
zcmY+FYfKzf6vyw~T^<Xw%q%T#2`vxPv^*@7BEeVa@)#HzAGEp&Hnma{H`N-&_<??)
zEC@7Au)5}Av-X3nHjT82Dd~KW7{d;rZX2U4iKb|4SkcBoG~#OPS~eZeojqWCley>q
z&i&tW&OLjR9lk_|FVSGs+36<M=zROTuv-^Xpo?B>p|kV!+QwHtdatqe_tAad?tOwB
zX?aW+a%MJbA=|;UOmB{tD3PfsyPYu`2o@6(7p1g?G;ZBPUeh!=Uqx1sXOY{HBgh@d
zVdPHakC6`}UqtRg{xHRXp`++HopQ*1Jo_nxVGi#g1UUo|LL5dA&Tu%6Fvg)DVVpw`
z!UTtI1l=$Gp~nMo<*o(Ov;fveVa-+hQ>vjp<bBA)$a|23$c@M$<cE;YAn!sRLoP=S
zjHBZ@N2>o5$hj!ck+YC5BioRp$hYwPJaQa4hWsnCfxMXF4MQv7h{y@kPRQh8cp2dr
z+4Zc_Kp!lPvq{qoQ+QN{3af~m<nL=q<Bt6{Li~!@J3S&Nm5`#2eokVhX#w<a&?0zG
zvH`9WLE$#vOVFq){|t^De?!o?Sbmwe`fJE+Tv_FPe_A$--H^$z6o|dLqRR6ikmrq0
zVb?E~T5`$3PG;{cEQTTm<RtAsmiz?*h;Q<TtBlAiiW)H**pJ!eIp_F0S0fXkBt{#{
z7fM<@5C}wxPq=UL!kq#Ew-oh@@U#5H@-3*@x5DL<cZFJHeL@KxXCDX^jo0BKez8i9
zb^o+{4=*+blk0J4PFy@V#T&pKatt(S4}iD(8g_BnZ~2YanCL2`FR@Bj#gXwl>skHR
zG~t4dfn&*<-qGNd8K0uwFk~o7omQh$iJ~dPO{e=8%>W@~;DL7PJa;zcUhCH?+$An{
zOg%H>uM~?0`NfG?#4ft>Jr1du_>>bnxP9``vKz|4Z<)r;gtp-NidJ{tG|dobixZ~V
z1-2b5Kb;PB>0o!k?-T?37gz#am}WnS50{O>sVeZ2<_t*!vlG70zz46@8e3s*hEHb=
z()N;ih<P9UdZZyoI&N!nrQ5gLg^WzH8C-9`w}+`xk!^XEU62ZMPeGyvaITAcwC|;V
z1h%q8p^Elk;aho51scdlJDaO%az<S#lC>@Oe@WM|mi<{<A*^lI!V%V2QIG>|BZLzl
zvZYxkuIak>8|^^dV7gOY*UZ%v+)b>seyg(`_kC<`_XZZD8?~PLM<`VQ^Xww&0PCST
zquxA+n{1Lao43+VqpZ5gLuJ<1bQRVm&7S<V<W@Z}Q3p&m7pY}?n?b3)(40Z(0rslg
z4Z)V=D(YqVEv+=p4#BLj^DW{0POM}t#I|%o+=Q3V-|B%h@3sc2t@eZ9W!TFlE}{uu
zOFoN-y3zkSHnTx%Q%(!?dA9Ua)q1}bR^qF3c3`>=b_nSIf1~$(by=IFR9Lw0qHml1
EKl{>MKmY&$

delta 1709
zcmZ9NZ){Ul6u|Fo_op4Nd#~F{-T!uO^RXi9oMf94tgdf+=swIq1rr?XOCvEc^@FJ~
zy3TCX)gjDW6XqAF`au?ASZJb)CTk~RC&r8!5~lc}L5OdHVfF(SEIiL`PaE(i@1FBJ
z=iGblIrpZWIzy+<(3uNnkCo(x=fhv()~J|=sOZTlQ8q^}*9^&@PMkWl^<u-38X7&e
z($TZqI})2UjFg%jl^&udm*&~M@|DmqB_u6MrSVcOtznO@>zuzrR*_F54<mn$ybt*V
z@<HTd$S)%wMGhdpm*-IU4K#%E24$RQe-mMf!|Mn$90CZl99}_)a_}RZ<S>db&tX5p
z0*Ac_3BPzVkq$x0^Y?T;2cR#2&ZC~lM}-r}k0CE3cOYLtZb4o{u0u{CS0QW27096t
zG(1z1?|%k4jZCmTe<GWZuOLgvzal?`{4=rx`8=|W{B54A!)`Dnm5gpAWKo5e5r3Bb
zRw0KDf~tL%)%7?$5TZpuODb9ZzSd8{G_7P+eg%`~@(;$UXa&>ZbG)Hxu|vtE#eGX@
zacd%?T>ZaSD*X~NYODY9&=(a=8-xsgRUAvG5oIl1ElSFo_8oZrqP)y+Bx7X9H@WK@
zA^C)or4w&wCGp5}aCmiE*-#@%ZHV32)KMAck2{iF;3w7guvWo2&;q+aQ08lHTYUfi
z06V(W$oKIBe8sIhx<2jO;Ngo8Z1oqL7M%1O_J!cGC!r;Nu~SI}f4{yN&ZeYbvh=Z=
zf^GA|210_Du~paoIj=Ux>MTukFYB|oUYNN*e;K5MYD%D*?y-+%emLt>BiFQHutNV5
zd{+VXON%2*g*Naj;B9^Btbd!>JjqKUVzY;>SZeL9l7slv4<6-m_t2JW>ckC`X1^WN
zEkDt&@qcuE7J@w)T@Qc^faJ^DZ|iyjB=3I;r0K4%6DaT!yDL7qWf@j=f|Yp7B?*KZ
zzN=t^=bIa?Fni%ku~F#}=LqcSgli>FS4!_1`YmO~2BT1JA`M_V4Br7ZCp8;LmYtWH
zs!l^f7Z{7d{@5MqmcRm52is^=sT}KTI%+e^&@VpRY_+@JJa5TUTdeW%A88S7?C<R^
zq2RQ^5;pFtuY}$a!ikTzTuhO8OrYgrY=6&WnYpbPv)Wz%ATfDotJ#hI?QHd_YL=ok
zvA1^api~C58AZ|y(q32i{mo^=$w;guBgPD#ZALoJy87+3jqU2c1iI5}uPq#=u!5F=
zX0Q+&d)^C?u|wW+N_Vi=lpu|=b)}O!Sna?d{ez9dEVH?Rc&!IdR@kDwj6h0$q<04G
zkmlpTP*?6sAmCN-RS^q0DR^?>E4V0#_8>Ob72Bnr5U9e|_jDHfFEgjF$J~qQdf6yY
Vq2eg}#Me`i!T%qq_lmFG_#ek*L<j%?

diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c
index 7a45449fd..17edc5283 100644
--- a/pipelined/srt/sqrttestgen.c
+++ b/pipelined/srt/sqrttestgen.c
@@ -1,6 +1,6 @@
 /* sqrttestgen.c */
 
-/* Written 19 October 2021 David_Harris@hmc.edu
+/* Written 7/22/2022 by Cedar Turek
 
    This program creates test vectors for mantissa component
    of an IEEE floating point square root. 
@@ -15,6 +15,7 @@
 /* Constants */
 
 #define ENTRIES  17
+#define BIGENT   1000
 #define RANDOM_VECS 500
 
 /* Prototypes */
@@ -34,6 +35,9 @@ void main(void)
 			  1.75, 1.875, 1.99999,
 			  1.1, 1.5, 1.01, 1.001, 1.0001,
 			  2/1.1, 2/1.5, 2/1.25, 2/1.125};
+
+  double bigtest[BIGENT];
+
   double exps[ENTRIES] = {0, 0, 2, 3, 4, 5, 6, 7, 8, 1, 10,
         11, 12, 13, 14, 15, 16};
   int i;
@@ -44,13 +48,14 @@ void main(void)
     exit(1);
   }
 
-  for (i=0; i<ENTRIES; i++) {
-    aFrac = mans[i];
-    aExp  = exps[i] + bias;
-    rFrac = sqrt(aFrac * pow(2, exps[i]));
-    rExp  = (int) (log(rFrac)/log(2) + bias);
-    output(fptr, aExp, aFrac, rExp, rFrac);
-  }
+  // Small Test
+  // for (i=0; i<ENTRIES; i++) {
+  //   aFrac = mans[i];
+  //   aExp  = exps[i] + bias;
+  //   rFrac = sqrt(aFrac * pow(2, exps[i]));
+  //   rExp  = (int) (log(rFrac)/log(2) + bias);
+  //   output(fptr, aExp, aFrac, rExp, rFrac);
+  // }
 
   //                                  WS
   // Test 1: sqrt(1) = 1              0000 0000 0000 00
@@ -67,6 +72,16 @@ void main(void)
   //   output(fptr, a, r);
   // }
 
+  // Big Test
+  for (i=0; i<BIGENT; i++) {
+    bigtest[i] = random_input();
+    aFrac = bigtest[i];
+    aExp  = (i - BIGENT/2) + bias;
+    rFrac = sqrt(aFrac * pow(2, (i - BIGENT/2)));
+    rExp  = (int) (log(rFrac)/log(2) + bias);
+    output(fptr, aExp, aFrac, rExp, rFrac);
+  }
+
   fclose(fptr);
 }
 
@@ -105,6 +120,6 @@ void printhex(FILE *fptr, double m)
 
 double random_input(void)
 {
-  return 1.0 + rand()/32767.0;
+  return 1.0 + ((rand() % 32768)/32767.0);
 }
   
diff --git a/pipelined/srt/srt-waves.do b/pipelined/srt/srt-waves.do
index 2fbf40c18..f911968d0 100644
--- a/pipelined/srt/srt-waves.do
+++ b/pipelined/srt/srt-waves.do
@@ -3,4 +3,5 @@ add wave -noupdate /testbench/srt/*
 add wave -noupdate /testbench/srt/sotfc2/*
 add wave -noupdate /testbench/srt/preproc/*
 add wave -noupdate /testbench/srt/postproc/*
+add wave -noupdate /testbench/srt/expcalc/*
 add wave -noupdate /testbench/srt/divcounter/*
diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv
index 1b61bc14f..3f6cad5ff 100644
--- a/pipelined/srt/srt.sv
+++ b/pipelined/srt/srt.sv
@@ -389,11 +389,11 @@ module expcalc(
   input  logic           Sqrt,
   output logic [`NE-1:0] calcExp
 );
-  logic        [`NE-1:0] SExp, DExp, SXExp;
-  assign SXExp = XExp - (`NE)'(`BIAS);
-  assign SExp  = {1'b0, SXExp[`NE-1:1]} + (`NE)'(`BIAS);
-  assign DExp  = XExp - YExp + (`NE)'(`BIAS);
-  assign calcExp = Sqrt ? SExp : DExp;
+  logic        [`NE+1:0] SExp, DExp, SXExp;
+  assign SXExp = {2'b00, XExp} - (`NE+2)'(`BIAS);
+  assign SExp  = (SXExp >> 1) + (`NE+2)'(`BIAS);
+  assign DExp  = {2'b00, XExp} - {2'b00, YExp} + (`NE+2)'(`BIAS);
+  assign calcExp = Sqrt ? SExp[`NE-1:0] : DExp[`NE-1:0];
 
 endmodule
 
@@ -462,11 +462,13 @@ module srtpostproc(
   end
   assign floatRes = S[`DIVLEN] ? S[`DIVLEN:1] : S[`DIVLEN-1:0];
   assign intRes = intS[`DIVLEN] ? intS[`DIVLEN:1] : intS[`DIVLEN-1:0];
-  assign shiftRem = (intRem >>> (`DIVLEN - dur + 2));
-  always_comb 
-    if (Int)      Result = intRes >> (`DIVLEN - dur);
-    else if (Mod) Result = shiftRem[`DIVLEN-1:0];
-    else          Result = floatRes;
+  assign shiftRem = (intRem >> (zeroCntD));
+  always_comb begin
+    if (Int) begin
+      if (Mod) Result = shiftRem[`DIVLEN-1:0];
+      else Result = intRes >> (`DIVLEN - dur);
+    end else Result = floatRes;
+  end
   assign calcSign = XSign ^ YSign;
 endmodule
 
diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv
index 513305b26..1b40c673a 100644
--- a/pipelined/srt/testbench.sv
+++ b/pipelined/srt/testbench.sv
@@ -53,7 +53,7 @@ module testbench;
  
   // Test parameters
   parameter MEM_SIZE = 40000;
-  parameter MEM_WIDTH = 64+64+64+64;
+  parameter MEM_WIDTH = 64+64+64;
  
   // Test sizes
   `define memr  63:0 
@@ -70,9 +70,9 @@ module testbench;
   integer testnum, errors;
 
   // Equip Int, Sqrt, or IntMod test
-  assign Int =  1'b1;
+  assign Int =  1'b0;
   assign Mod =  1'b0;
-  assign Sqrt = 1'b0;
+  assign Sqrt = 1'b1;
 
   // Divider
   srt srt(.clk, .Start(req), 
@@ -101,7 +101,7 @@ module testbench;
     begin
       testnum = 0; 
       errors = 0;
-      $readmemh ("inttestvectors", Tests);
+      $readmemh ("sqrttestvectors", Tests);
       Vec = Tests[testnum];
       a = Vec[`mema];
       {asign, aExp, afrac} = a;