FMA parameterized and FMA testbench reworked

2025-04-22 12:57:23 -04:00 · 2022-03-19 19:39:03 +00:00 · 2022-03-19 19:39:03 +00:00 · e3d01c875b
commit e3d01c875b
parent d43e868e5f
23 changed files with 3927 additions and 412 deletions
--- a/addins/riscv-arch-test
+++ b/addins/riscv-arch-test
@ -1 +1 @@
-Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
+Subproject commit be67c99bd461742aa1c100bcc0732657faae2230
--- a/pipelined/config/rv64fp/BTBPredictor.txt
+++ b/pipelined/config/rv64fp/BTBPredictor.txt
--- a/pipelined/config/rv64fp/twoBitPredictor.txt
+++ b/pipelined/config/rv64fp/twoBitPredictor.txt
--- a/pipelined/config/rv64fp/wally-config.vh
+++ b/pipelined/config/rv64fp/wally-config.vh
@ -0,0 +1,134 @@
+//////////////////////////////////////////
+// wally-config.vh
+//
+// Written: David_Harris@hmc.edu 4 January 2021
+// Modified: 
+//
+// Purpose: Specify which features are configured
+//          Macros to determine which modes are supported based on MISA
+// 
+// A component of the Wally configurable RISC-V project.
+// 
+// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, 
+// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software 
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 
+// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT 
+// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+///////////////////////////////////////////
+
+// include shared configuration
+`include "wally-shared.vh"
+
+`define FPGA 0
+`define QEMU 0
+`define DESIGN_COMPILER 0
+
+// RV32 or RV64: XLEN = 32 or 64
+`define XLEN 64
+
+// IEEE 754 compliance
+`define IEEE754 1
+
+// MISA RISC-V configuration per specification
+`define MISA (32'h00000104 | 1 << 5 | 0 << 3 | 1 << 18 | 1 << 20 | 1 << 12 | 1 << 0 )
+`define ZICSR_SUPPORTED 1
+`define ZIFENCEI_SUPPORTED 1
+`define COUNTERS 32
+`define ZICOUNTERS_SUPPORTED 1
+
+/// Microarchitectural Features
+`define UARCH_PIPELINED 1
+`define UARCH_SUPERSCALR 0
+`define UARCH_SINGLECYCLE 0
+`define DMEM `MEM_CACHE
+`define IMEM `MEM_CACHE
+`define VIRTMEM_SUPPORTED 1
+`define VECTORED_INTERRUPTS_SUPPORTED 1 
+
+// TLB configuration.  Entries should be a power of 2
+`define ITLB_ENTRIES 32
+`define DTLB_ENTRIES 32
+
+// Cache configuration.  Sizes should be a power of two
+// typical configuration 4 ways, 4096 bytes per way, 256 bit or more lines
+`define DCACHE_NUMWAYS 4
+`define DCACHE_WAYSIZEINBYTES 4096
+`define DCACHE_LINELENINBITS 256
+`define ICACHE_NUMWAYS 4
+`define ICACHE_WAYSIZEINBYTES 4096
+`define ICACHE_LINELENINBITS 256
+
+// Integer Divider Configuration
+// DIV_BITSPERCYCLE must be 1, 2, or 4
+`define DIV_BITSPERCYCLE 4
+
+// Legal number of PMP entries are 0, 16, or 64
+`define PMP_ENTRIES 64
+
+// Address space
+`define RESET_VECTOR 64'h0000000080000000
+
+// Bus Interface width
+`define AHBW 64
+
+// Peripheral Physiccal Addresses
+// Peripheral memory space extends from BASE to BASE+RANGE
+// Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits
+
+// *** each of these is `PA_BITS wide. is this paramaterizable INSIDE the config file?
+`define BOOTROM_SUPPORTED 1'b1
+`define BOOTROM_BASE   56'h00001000 // spec had been 0x1000 to 0x2FFF, but dh truncated to 0x1000 to 0x1FFF because upper half seems to be all zeros and this is easier for decoder
+`define BOOTROM_RANGE  56'h00000FFF
+`define RAM_SUPPORTED 1'b1
+`define RAM_BASE       56'h80000000
+`define RAM_RANGE      56'h7FFFFFFF
+`define EXT_MEM_SUPPORTED 1'b0
+`define EXT_MEM_BASE       56'h80000000
+`define EXT_MEM_RANGE      56'h07FFFFFF
+`define CLINT_SUPPORTED 1'b1
+`define CLINT_BASE  56'h02000000
+`define CLINT_RANGE 56'h0000FFFF
+`define GPIO_SUPPORTED 1'b1
+`define GPIO_BASE   56'h10060000
+`define GPIO_RANGE  56'h000000FF
+`define UART_SUPPORTED 1'b1
+`define UART_BASE   56'h10000000
+`define UART_RANGE  56'h00000007
+`define PLIC_SUPPORTED 1'b1
+`define PLIC_BASE   56'h0C000000
+`define PLIC_RANGE  56'h03FFFFFF
+`define SDC_SUPPORTED 1'b0
+`define SDC_BASE   56'h00012100
+`define SDC_RANGE  56'h0000001F
+
+// Test modes
+
+// Tie GPIO outputs back to inputs
+`define GPIO_LOOPBACK_TEST 1
+
+// Hardware configuration
+`define UART_PRESCALE 1
+
+// Interrupt configuration
+`define PLIC_NUM_SRC 10
+// comment out the following if >=32 sources
+`define PLIC_NUM_SRC_LT_32
+`define PLIC_GPIO_ID 3
+`define PLIC_UART_ID 10
+
+`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt"
+`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt"
+`define BPRED_ENABLED 1
+`define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE
+`define TESTSBP 0
+
+`define REPLAY 0
+`define HPTW_WRITES_SUPPORTED 0
--- a/pipelined/config/shared/wally-shared.vh
+++ b/pipelined/config/shared/wally-shared.vh
@ -50,10 +50,47 @@
 // Number of 64 bit PMP Configuration Register entries (or pairs of 32 bit entries)
 `define PMPCFG_ENTRIES (`PMP_ENTRIES/8)

+
+// Floating-point half-precision
+`define ZFH_SUPPORTED 0
+
+// Floating point constants for Quad, Double, Single, and Half precisions
+`define Q_LEN 128
+`define Q_NE 15
+`define Q_NF 112
+`define Q_BIAS 16383
+`define D_LEN 64
+`define D_NE 11
+`define D_NF 52
+`define D_BIAS 1023
+`define S_LEN 32
+`define S_NE 8
+`define S_NF 23
+`define S_BIAS 127
+`define H_LEN 16
+`define H_NE 5
+`define H_NF 10
+`define H_BIAS 15
+
 // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
-`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
-`define NE   11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
-`define NF   52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
+`define FLEN (`Q_SUPPORTED ? `Q_LEN  : `D_SUPPORTED ? `D_LEN  : `F_SUPPORTED ? `S_LEN  : `H_LEN)
+`define NE   (`Q_SUPPORTED ? `Q_NE   : `D_SUPPORTED ? `D_NE   : `F_SUPPORTED ? `S_NE   : `H_NE)
+`define NF   (`Q_SUPPORTED ? `Q_NF   : `D_SUPPORTED ? `D_NF   : `F_SUPPORTED ? `S_NF   : `H_NF)
+`define FMT  (`Q_SUPPORTED ? 3       : `D_SUPPORTED ? 1       : `F_SUPPORTED ? 0       : 2)
+`define BIAS (`Q_SUPPORTED ? `Q_BIAS : `D_SUPPORTED ? `D_BIAS : `F_SUPPORTED ? `S_BIAS : `H_BIAS)
+
+// Floating point constants needed for FPU paramerterization
+`define FPSIZES (`Q_SUPPORTED+`D_SUPPORTED+`F_SUPPORTED+`ZFH_SUPPORTED)
+`define LEN1  ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_LEN   : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_LEN  : `H_LEN)
+`define NE1   ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NE   : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NE  : `H_NE)
+`define NF1   ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_NF  : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_NF : `H_NF)
+`define FMT1  ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? 1        : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? 0       : 2)
+`define BIAS1 ((`D_SUPPORTED & (`FLEN != `D_LEN)) ? `D_BIAS  : (`F_SUPPORTED & (`FLEN != `S_LEN)) ? `S_BIAS : `H_BIAS)
+`define LEN2  ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_LEN   : `H_LEN)
+`define NE2   ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NE   : `H_NE)
+`define NF2   ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_NF  : `H_NF)
+`define FMT2  ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 0        : 2)
+`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS  : `H_BIAS)

 // Disable spurious Verilator warnings

--- a/pipelined/fpu-testfloat/FMA/tbgen/tb.sv
+++ b/pipelined/fpu-testfloat/FMA/tbgen/tb.sv
@ -1,10 +1,33 @@

-//`include "../../../config/old/rv64icfd/wally-config.vh"
+`include "../../../config/old/rv64icfd/wally-config.vh"

-`define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
-`define NE   11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
-`define NF   52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
-`define XLEN 64
+// `define FLEN (`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : `F_SUPPORTED ? 32 : 16)
+// `define NE   (`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : `F_SUPPORTED ? 8 : 5)
+// `define NF   (`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : `F_SUPPORTED ? 23 : 10)
+// `define FMT (`Q_SUPPORTED ? 3 : `D_SUPPORTED ? 1 : `F_SUPPORTED ? 0 : 2)
+// `define BIAS (`Q_SUPPORTED ? 16383 : `D_SUPPORTED ? 1023 : `F_SUPPORTED ? 127 : 15)
+// `define XLEN 64
+// `define IEEE754 1
+`define Q_SUPPORTED 1
+// `define D_SUPPORTED 0
+// `define F_SUPPORTED 0
+`define H_SUPPORTED 0
+`define FPSIZES ((`Q_SUPPORTED&`D_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) ? 4 : (`Q_SUPPORTED&`D_SUPPORTED&`F_SUPPORTED) | (`Q_SUPPORTED&`D_SUPPORTED&`H_SUPPORTED) | (`Q_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) | (`D_SUPPORTED&`F_SUPPORTED&`H_SUPPORTED) ? 3 : (`Q_SUPPORTED&`D_SUPPORTED) | (`Q_SUPPORTED&`F_SUPPORTED) | (`Q_SUPPORTED&`H_SUPPORTED) | (`D_SUPPORTED&`F_SUPPORTED) | (`D_SUPPORTED&`H_SUPPORTED) | (`F_SUPPORTED&`H_SUPPORTED) ? 2 : 1)
+`define LEN1  ((`D_SUPPORTED & (`FLEN !== 64)) ? 64   : (`F_SUPPORTED & (`FLEN !== 32)) ? 32  : 16)
+`define NE1   ((`D_SUPPORTED & (`FLEN !== 64)) ? 11   : (`F_SUPPORTED & (`FLEN !== 32)) ? 8   : 5)
+`define NF1   ((`D_SUPPORTED & (`FLEN !== 64)) ? 52   : (`F_SUPPORTED & (`FLEN !== 32)) ? 23  : 10)
+`define FMT1  ((`D_SUPPORTED & (`FLEN !== 64)) ? 1    : (`F_SUPPORTED & (`FLEN !== 32)) ? 0   : 2)
+`define BIAS1 ((`D_SUPPORTED & (`FLEN !== 64)) ? 1023 : (`F_SUPPORTED & (`FLEN !== 32)) ? 127 : 15)
+`define LEN2  ((`F_SUPPORTED & (`LEN1 !== 32)) ? 32   : 16)
+`define NE2   ((`F_SUPPORTED & (`LEN1 !== 32)) ? 8    : 5)
+`define NF2   ((`F_SUPPORTED & (`LEN1 !== 32)) ? 23   : 10)
+`define FMT2  ((`F_SUPPORTED & (`LEN1 !== 32)) ? 0    : 2)
+`define BIAS2 ((`F_SUPPORTED & (`LEN1 !== 32)) ? 127  : 15)
+`define LEN3 16
+`define NE3 5//make constants for the constants ie 11/8/5 ect
+`define NF3 10 // always support less hten max - maybe halfs
+`define FMT3 2
+`define BIAS3 15
 module testbench3();

 logic [31:0] errors=0;
@ -15,33 +38,17 @@ module testbench3();
 logic 	[`FLEN-1:0]		ans;
 logic 	[7:0]	 	flags;
 logic 	[2:0]		FrmE;
- logic				FmtE;
+ logic	[`FPSIZES/3:0]			FmtE;
 logic  [`FLEN-1:0]      FMAResM;
 logic  [4:0]       FMAFlgM;
-integer fp;
 logic 	[2:0]		FOpCtrlE;
 logic 		[2*`NF+1:0]		ProdManE; 
 logic 		[3*`NF+5:0]		AlignedAddendE;	
 logic 		[`NE+1:0]		ProdExpE; 
 logic 					AddendStickyE;
 logic 					KillProdE; 
-// logic					XZeroE;
-// logic					YZeroE;
-// logic					ZZeroE;
-// logic					XDenormE;
-// logic					YDenormE;
-// logic					ZDenormE;
-// logic					XInfE;
-// logic					YInfE;
-// logic					ZInfE;
-// logic					XNaNE;
-// logic					YNaNE;
-// logic					ZNaNE;

 logic wnan;
-// logic XNaNE;
-// logic YNaNE;
-// logic ZNaNE;
 logic ansnan, clk;


@ -52,88 +59,86 @@ assign FOpCtrlE = 3'b0;
 // down - 010
 // up - 011
 // nearest max mag - 100  
-assign FrmE = 3'b000;
-assign FmtE = 1'b1;
+assign FrmE = 3'b010;
+assign FmtE = (`FPSIZES/3+1)'(1);

    logic  [`FLEN-1:0] X, Y, Z;
    // logic         FmtE;
    // logic  [2:0]  FOpCtrlE;
    logic        XSgnE, YSgnE, ZSgnE;
    logic [`NE-1:0] XExpE, YExpE, ZExpE;
-    logic [`NF-1:0] XFracE, YFracE, ZFracE;
-    logic        XAssumed1E, YAssumed1E, ZAssumed1E;
+    logic [`NF:0] XManE, YManE, ZManE;
    logic XNormE;
+    logic XExpMaxE;
    logic XNaNE, YNaNE, ZNaNE;
    logic XSNaNE, YSNaNE, ZSNaNE;
    logic XDenormE, YDenormE, ZDenormE;
    logic XZeroE, YZeroE, ZZeroE;
    logic [`NE-1:0] BiasE;
    logic XInfE, YInfE, ZInfE;
-    logic XExpMaxE;
- //***rename to make significand = 1.frac m = significand
-    logic           XFracZero, YFracZero, ZFracZero; // input fraction zero
-    logic           XExpZero, YExpZero, ZExpZero; // input exponent zero
    logic [`FLEN-1:0]    Addend; // value to add (Z or zero)
-    logic           YExpMaxE, ZExpMaxE;  // input exponent all 1s
+    logic           YExpMaxE, ZExpMaxE, Mult;  // input exponent all 1s

-    assign Addend = FOpCtrlE[2] ? (`FLEN)'(0) : Z; // Z is only used in the FMA, and is set to Zero if a multiply opperation
-    assign XSgnE = FmtE ? X[`FLEN-1] : X[31];
-    assign YSgnE = FmtE ? Y[`FLEN-1] : Y[31];
-    assign ZSgnE = FmtE ? Addend[`FLEN-1] : Addend[31];
+	assign Mult = 1'b0;
+  unpacking unpacking(.*);

-    assign XExpE = FmtE ? X[62:52] : {X[30], {3{~X[30]&~XExpZero|XExpMaxE}}, X[29:23]}; 
-    assign YExpE = FmtE ? Y[62:52] : {Y[30], {3{~Y[30]&~YExpZero|YExpMaxE}}, Y[29:23]}; 
-    assign ZExpE = FmtE ? Addend[62:52] : {Addend[30], {3{~Addend[30]&~ZExpZero|ZExpMaxE}}, Addend[29:23]}; 
+// assign	wnan = XNaNE|YNaNE|ZNaNE; 
+// assign	ansnan = FmtE ? &ans[`FLEN-2:`NF] && |ans[`NF-1:0] : &ans[30:23] && |ans[22:0]; 
+ 
+    if (`FPSIZES === 1) begin
+      assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
+      assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);
+    end else if (`FPSIZES === 2) begin                  
+      assign ansnan = FmtE ? &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]) : &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
+      assign wnan = FmtE ? &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]) : &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);
+    end else if (`FPSIZES === 3) begin
+        always_comb begin
+            case (FmtE)
+                `FMT: begin                  
+                  assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
+                  assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);

-    assign XFracE = FmtE ? X[`NF-1:0] : {X[22:0], 29'b0};
-    assign YFracE = FmtE ? Y[`NF-1:0] : {Y[22:0], 29'b0};
-    assign ZFracE = FmtE ? Addend[`NF-1:0] : {Addend[22:0], 29'b0};
+                end
+                `FMT1: begin                    
+                  assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
+                  assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);

-    assign XAssumed1E = FmtE ? |X[62:52] : |X[30:23]; 
-    assign YAssumed1E = FmtE ? |Y[62:52] : |Y[30:23];
-    assign ZAssumed1E = FmtE ? |Z[62:52] : |Z[30:23];
+                end
+                `FMT2: begin
+                    assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]);
+                    assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]);
+                end
+                default: begin
+                    assign ansnan = 0;
+                    assign wnan = 0;
+                end
+            endcase
+        end

-    assign XExpZero = ~XAssumed1E;
-    assign YExpZero = ~YAssumed1E;
-    assign ZExpZero = ~ZAssumed1E;
-   
-    assign XFracZero = ~|XFracE;
-    assign YFracZero = ~|YFracE;
-    assign ZFracZero = ~|ZFracE;
+    end else begin
+        always_comb begin
+            case (FmtE)
+                `FMT: begin                  
+                  assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
+                  assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);

-    assign XExpMaxE = FmtE ? &X[62:52] : &X[30:23];
-    assign YExpMaxE = FmtE ? &Y[62:52] : &Y[30:23];
-    assign ZExpMaxE = FmtE ? &Z[62:52] : &Z[30:23];
-   
-    assign XNormE = ~(XExpMaxE|XExpZero);
-    
-    assign XNaNE = XExpMaxE & ~XFracZero;
-    assign YNaNE = YExpMaxE & ~YFracZero;
-    assign ZNaNE = ZExpMaxE & ~ZFracZero;
+                end
+                `FMT1: begin                    
+                  assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
+                  assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);

-    assign XSNaNE = XNaNE&~XFracE[`NF-1];
-    assign YSNaNE = YNaNE&~YFracE[`NF-1];
-    assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
-
-    assign XDenormE = XExpZero & ~XFracZero;
-    assign YDenormE = YExpZero & ~YFracZero;
-    assign ZDenormE = ZExpZero & ~ZFracZero;
-
-    assign XInfE = XExpMaxE & XFracZero;
-    assign YInfE = YExpMaxE & YFracZero;
-    assign ZInfE = ZExpMaxE & ZFracZero;
-
-    assign XZeroE = XExpZero & XFracZero;
-    assign YZeroE = YExpZero & YFracZero;
-    assign ZZeroE = ZExpZero & ZFracZero;
-
-    assign BiasE = 13'h3ff;
-
-assign	wnan = FmtE ? &FMAResM[`FLEN-2:`NF] & |FMAResM[`NF-1:0] : &FMAResM[30:23] & |FMAResM[22:0]; 
-// assign	XNaNE = FmtE ? &X[62:52] & |X[51:0] : &X[62:55] & |X[54:32]; 
-// assign	YNaNE = FmtE ? &Y[62:52] & |Y[51:0] : &Y[62:55] & |Y[54:32]; 
-// assign	ZNaNE = FmtE ? &Z[62:52] & |Z[51:0] : &Z[62:55] & |Z[54:32]; 
-assign	ansnan = FmtE ? &ans[`FLEN-2:`NF] & |ans[`NF-1:0] : &ans[30:23] & |ans[22:0]; 
+                end
+                `FMT2: begin
+                    assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]);
+                    assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]);
+                end
+                `FMT3: begin
+                    assign ansnan = &ans[`LEN3-2:`NF3]&(|ans[`NF3-1:0]);
+                    assign wnan = &FMAResM[`LEN3-2:`NF3]&(|FMAResM[`NF3-1:0]);
+                end
+            endcase
+        end
+    end
 // instantiate device under test

    logic [3*`NF+5:0]	SumE, SumM;       
@ -141,16 +146,16 @@ assign	ansnan = FmtE ? &ans[`FLEN-2:`NF] & |ans[`NF-1:0] : &ans[30:23] & |ans[22
    logic 			    NegSumE, NegSumM;
    logic 			    ZSgnEffE, ZSgnEffM;
    logic 			    PSgnE, PSgnM;
-    logic [8:0]			NormCntE, NormCntM;
+    logic [$clog2(3*`NF+7)-1:0]			NormCntE, NormCntM;
    
-    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE({XAssumed1E,XFracE}), .YManE({YAssumed1E,YFracE}), .ZManE({ZAssumed1E,ZFracE}),
+    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
                 .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
                .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
                .ProdExpE, .AddendStickyE, .KillProdE); 
-fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM({XAssumed1E,XFracE}), .YManM({YAssumed1E,YFracE}), .ZManM({ZAssumed1E,ZFracE}), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
+fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM(XManE), .YManM(YManE), .ZManM(ZManE), .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE),
              //  .FSrcXE, .FSrcYE, .FSrcZE, .FSrcXM, .FSrcYM, .FSrcZM, 
                .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE),
-               .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM);
+               .FmtM(FmtE), .FrmM(FrmE), .FMAFlgM, .FMAResM, .Mult);


 // produce clock
@ -168,61 +173,156 @@ fma2 UUT2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZEx
 always @(posedge clk)
 begin
  #1; 
-  if (FmtE==1'b1) {X, Y, Z, ans, flags} = testvectors[vectornum];
-  else	begin	  X = {{32{1'b1}}, testvectors[vectornum][135:104]};
-  		  Y = {{32{1'b1}}, testvectors[vectornum][103:72]};
-  		  Z = {{32{1'b1}}, testvectors[vectornum][71:40]};
-  		  ans = {{32{1'b1}}, testvectors[vectornum][39:8]};
-  		  flags = testvectors[vectornum][7:0];
+  if (`FPSIZES === 3 | `FPSIZES === 4) begin
+    if (FmtE==2'b11) {X, Y, Z, ans, flags} = testvectors[vectornum];
+    else if (FmtE==2'b01)	begin	  
+      X = {{`FLEN-64{1'b1}}, testvectors[vectornum][263:200]};
+      Y = {{`FLEN-64{1'b1}}, testvectors[vectornum][199:136]};
+      Z = {{`FLEN-64{1'b1}}, testvectors[vectornum][135:72]};
+      ans = {{`FLEN-64{1'b1}}, testvectors[vectornum][71:8]};
+      flags = testvectors[vectornum][7:0];
+    end
+    else if (FmtE==2'b00)	begin	  
+      X = {{`FLEN-32{1'b1}}, testvectors[vectornum][135:104]};
+      Y = {{`FLEN-32{1'b1}}, testvectors[vectornum][103:72]};
+      Z = {{`FLEN-32{1'b1}}, testvectors[vectornum][71:40]};
+      ans = {{`FLEN-32{1'b1}}, testvectors[vectornum][39:8]};
+      flags = testvectors[vectornum][7:0];
+    end
+    else	begin	  
+      X = {{`FLEN-16{1'b1}}, testvectors[vectornum][71:56]};
+      Y = {{`FLEN-16{1'b1}}, testvectors[vectornum][55:40]};
+      Z = {{`FLEN-16{1'b1}}, testvectors[vectornum][39:24]};
+      ans = {{`FLEN-16{1'b1}}, testvectors[vectornum][23:8]};
+      flags = testvectors[vectornum][7:0];
+    end
+  end
+  else begin
+    if (FmtE==1'b1) {X, Y, Z, ans, flags} = testvectors[vectornum];
+    else if (FmtE==1'b0)	begin	  
+      X = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+4*(`LEN1)-1:8+3*(`LEN1)]};
+      Y = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+3*(`LEN1)-1:8+2*(`LEN1)]};
+      Z = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+2*(`LEN1)-1:8+(`LEN1)]};
+      ans = {{`FLEN-`LEN1{1'b1}}, testvectors[vectornum][8+(`LEN1-1):8]};
+      flags = testvectors[vectornum][7:0];
+    end
  end
 end
 // check results on falling edge of clk
  always @(negedge clk) begin
- 
-	if((FmtE==1'b1) & (FMAFlgM != flags[4:0] | (!wnan & (FMAResM != ans)) | (wnan & ansnan & ~((XNaNE & (FMAResM[`FLEN-2:0] == {XExpE,1'b1,X[`NF-2:0]})) | (YNaNE & (FMAResM[`FLEN-2:0] == {YExpE,1'b1,Y[`NF-2:0]}))  | (ZNaNE & (FMAResM[`FLEN-2:0] == {ZExpE,1'b1,Z[`NF-2:0]})) | (FMAResM[`FLEN-2:0] == ans[`FLEN-2:0]))))) begin
-  //  fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w");
-	// if((FmtE==1'b1) & (FMAFlgM != flags[4:0] | (FMAResM != ans))) begin
-        $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
-		if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero ");
-		if(XDenormE) $display( "xdenorm ");
-		if(YDenormE) $display( "ydenorm ");
-		if(ZDenormE) $display( "zdenorm ");
-		if(FMAFlgM[4] != 0) $display( "invld ");
-		if(FMAFlgM[2] != 0) $display( "ovrflw ");
-		if(FMAFlgM[1] != 0) $display( "unflw ");
-		if(FMAResM[`FLEN] & FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] == 0) $display( "FMAResM=-inf ");
-		if(~FMAResM[`FLEN] & FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] == 0) $display( "FMAResM=+inf ");
-		if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] != 0 & ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
-		if(FMAResM[`FLEN-2:`NF] == {`NE{1'b1}} & FMAResM[`NF-1:0] != 0 & FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
-		if(ans[`FLEN] & ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] == 0) $display( "ans=-inf ");
-		if(~ans[`FLEN] & ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] == 0) $display( "ans=+inf ");
-		if(ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] != 0 & ~ans[`NF-1]) $display( "ans=sigNaN ");
-		if(ans[`FLEN-2:`NF] == {`NE{1'b1}} & ans[`NF-1:0] != 0 & ans[`NF-1]) $display( "ans=qutNaN ");
-        errors = errors + 1;
-	  //if (errors == 10)
-		$stop;
-    end
-    if((FmtE==1'b0)&(FMAFlgM != flags[4:0] | (!wnan & (FMAResM != ans)) | (wnan & ansnan & ~(((XNaNE & (FMAResM[30:0] == {X[30:23],1'b1,X[21:0]})) | (YNaNE & (FMAResM[30:0] == {Y[30:23],1'b1,Y[21:0]}))  | (ZNaNE & (FMAResM[30:0] == {Z[30:23],1'b1,Z[21:0]})) | (FMAResM[30:0] == ans[30:0]))) ))) begin
-        $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
-		if(FMAResM == 64'h8000000000000000) $display( "FMAResM=-zero ");
-		if(~(|X[30:23]) & |X[22:0]) $display( "xdenorm ");
-		if(~(|Y[30:23]) & |Y[22:0]) $display( "ydenorm ");
-		if(~(|Z[30:23]) & |Z[22:0]) $display( "zdenorm ");
-		if(FMAFlgM[4] != 0) $display( "invld ");
-		if(FMAFlgM[2] != 0) $display( "ovrflw ");
-		if(FMAFlgM[1] != 0) $display( "unflw ");
-		if(FMAResM == 64'hFF80000000000000) $display( "FMAResM=-inf ");
-		if(FMAResM == 64'h7F80000000000000) $display( "FMAResM=+inf ");
-		if(&FMAResM[30:23] & |FMAResM[22:0] & ~FMAResM[22]) $display( "FMAResM=sigNaN ");
-		if(&FMAResM[30:23] & |FMAResM[22:0] & FMAResM[22] ) $display( "FMAResM=qutNaN ");
-		if(ans == 64'hFF80000000000000) $display( "ans=-inf ");
-		if(ans == 64'h7F80000000000000) $display( "ans=+inf ");
-		if(&ans[30:23] & |ans[22:0] & ~ans[22] ) $display( "ans=sigNaN ");
-		if(&ans[30:23] & |ans[22:0] & ans[22]) $display( "ans=qutNaN ");
-        errors = errors + 1;
-	  if (errors == 10)
-		$stop;
-    end
+      if (`FPSIZES === 1 | `FPSIZES === 2) begin
+        if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]}))  || (ZNaNE && (FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0]))))) begin
+        //  fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w");
+        // if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (FMAResM !== ans))) begin
+              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
+          if(XDenormE) $display( "xdenorm ");
+          if(YDenormE) $display( "ydenorm ");
+          if(ZDenormE) $display( "zdenorm ");
+          if(FMAFlgM[4] !== 0) $display( "invld ");
+          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
+          if(FMAFlgM[1] !== 0) $display( "unflw ");
+          if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf ");
+          if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf ");
+          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
+          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
+          if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf ");
+          if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf ");
+          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
+          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN ");
+              errors = errors + 1;
+          //if (errors === 10)
+          $stop;
+          end
+          if((FmtE==1'b0)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[`LEN1-2:0] === {X[`LEN1-2:`NF1],1'b1,X[`NF1-2:0]})) || (YNaNE && (FMAResM[`LEN1-2:0] === {Y[`LEN1-2:`NF1],1'b1,Y[`NF1-2:0]}))  || (ZNaNE && (FMAResM[`LEN1-2:0] === {Z[`LEN1-2:`NF1],1'b1,Z[`NF1-2:0]})) || (FMAResM[`LEN1-2:0] === ans[`LEN1-2:0]))) ))) begin
+              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
+          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
+          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
+          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
+          if(FMAFlgM[4] !== 0) $display( "invld ");
+          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
+          if(FMAFlgM[1] !== 0) $display( "unflw ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
+          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
+          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
+              errors = errors + 1;
+        // if (errors === 9)
+          $stop;
+          end
+ end else begin
+   
+        if((FmtE==2'b11) & (FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~((XNaNE && (FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) || (YNaNE && (FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]}))  || (ZNaNE && (FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})) || (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0]))))) begin
+        //  fp = $fopen("/home/kparry/riscv-wally/pipelined/src/fpu/FMA/tbgen/results.dat","w");
+        // if((FmtE==1'b1) & (FMAFlgM !== flags[4:0] || (FMAResM !== ans))) begin
+              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
+          if(XDenormE) $display( "xdenorm ");
+          if(YDenormE) $display( "ydenorm ");
+          if(ZDenormE) $display( "zdenorm ");
+          if(FMAFlgM[4] !== 0) $display( "invld ");
+          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
+          if(FMAFlgM[1] !== 0) $display( "unflw ");
+          if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf ");
+          if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf ");
+          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
+          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
+          if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf ");
+          if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf ");
+          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
+          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN ");
+              errors = errors + 1;
+          //if (errors === 10)
+          $stop;
+          end
+          if((FmtE==1'b01)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[64-2:0] === {X[64-2:52],1'b1,X[52-2:0]})) || (YNaNE && (FMAResM[64-2:0] === {Y[64-2:52],1'b1,Y[52-2:0]}))  || (ZNaNE && (FMAResM[64-2:0] === {Z[64-2:52],1'b1,Z[52-2:0]})) || (FMAResM[62:0] === ans[62:0]))) ))) begin
+              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
+          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
+          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
+          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
+          if(FMAFlgM[4] !== 0) $display( "invld ");
+          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
+          if(FMAFlgM[1] !== 0) $display( "unflw ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
+          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
+          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
+              errors = errors + 1;
+        // if (errors === 9)
+          $stop;
+          end
+          if((FmtE==2'b00)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[32-2:0] === {X[32-2:23],1'b1,X[23-2:0]})) || (YNaNE && (FMAResM[32-2:0] === {Y[32-2:23],1'b1,Y[23-2:0]}))  || (ZNaNE && (FMAResM[32-2:0] === {Z[32-2:23],1'b1,Z[23-2:0]})) || (FMAResM[30:0] === ans[30:0]))) ))) begin
+              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
+          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
+          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
+          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
+          if(FMAFlgM[4] !== 0) $display( "invld ");
+          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
+          if(FMAFlgM[1] !== 0) $display( "unflw ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
+          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
+          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
+              errors = errors + 1;
+        // if (errors === 9)
+          $stop;
+          end
+          if((FmtE==2'b10)&(FMAFlgM !== flags[4:0] || (!wnan && (FMAResM !== ans)) || (wnan && ansnan && ~(((XNaNE && (FMAResM[16-2:0] === {X[16-2:10],1'b1,X[10-2:0]})) || (YNaNE && (FMAResM[16-2:0] === {Y[16-2:10],1'b1,Y[10-2:0]}))  || (ZNaNE && (FMAResM[16-2:0] === {Z[16-2:10],1'b1,Z[10-2:0]})) || (FMAResM[14:0] === ans[14:0]))) ))) begin
+              $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
+          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
+          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
+          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
+          if(FMAFlgM[4] !== 0) $display( "invld ");
+          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
+          if(FMAFlgM[1] !== 0) $display( "unflw ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
+          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
+          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
+              errors = errors + 1;
+        // if (errors === 9)
+          $stop;
+          end
+ end
+	
 vectornum = vectornum + 1;
 if (testvectors[vectornum] === 194'bx) begin
 $display("%d tests completed with %d errors", vectornum, errors);
--- a/pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
+++ b/pipelined/fpu-testfloat/FMA/tbgen/test_gen.sh
@ -1,3 +1,3 @@
-testfloat_gen f64_mulAdd -tininessafter -n 6133248 -rnear_even  -seed 113355 -level 1 > testFloat
+testfloat_gen f128_mulAdd -tininessafter -n 6133248 -rmin  -seed 113355 -level 1 > testFloat
 tr -d ' ' < testFloat > testFloatNoSpace

--- a/pipelined/src/fpu/fcmp.sv
+++ b/pipelined/src/fpu/fcmp.sv
@ -42,6 +42,7 @@ module fcmp (
   //             - if negitive - no
   //             - if positive - yes
   // note: LT does -0 < 0
+   //*** compare Exp and Man together
   assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
   assign EQ = (FSrcXE == FSrcYE);

--- a/pipelined/src/fpu/fcvtfp.sv
+++ b/pipelined/src/fpu/fcvtfp.sv
@ -103,7 +103,7 @@ module cvtfp (
    assign LSBFrac = DSFrac[3];


-    always_comb begin
+    always_comb begin // ***remove guard bit
        // Determine if you add 1
        case (FrmE)
            3'b000: CalcPlus1 = Guard & (Round | (Sticky) | (~Round&~Sticky&LSBFrac));//round to nearest even
@ -166,6 +166,7 @@ module cvtfp (
                {XSgnE, DSResExp, DSResFrac};

        // select the final result based on the opperation
+        //*** in al units before putting into : ? put in a seperate signal
        assign CvtFpResE = FmtE ? {{32{1'b1}},DSRes} : {XSgnE, SDExp, SDFrac[51]|XNaNE, SDFrac[50:0]};
    end else begin
        // select the double to single precision result
--- a/pipelined/src/fpu/fcvtint.sv
+++ b/pipelined/src/fpu/fcvtint.sv
@ -10,7 +10,6 @@ module fcvt (
    input logic             XNaNE,      // is X NaN 
    input logic             XInfE,      // is X infinity
    input logic             XDenormE,   // is X denormalized
-    input logic [10:0]      BiasE,      // bias - depends on precision (max exponent/2)
    input logic [`XLEN-1:0] ForwardedSrcAE,      // integer input
    input logic [2:0]       FOpCtrlE,   // chooses which instruction is done (full list below)
    input logic [2:0]       FrmE,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
@ -70,7 +69,7 @@ module fcvt (
    assign Bits = Res64 ? 8'd64 : 8'd32;

    // calulate the unbiased exponent
-    assign ExpVal = {1'b0,XExpE} - {1'b0,BiasE} + {12'b0, XDenormE};
+    assign ExpVal = {1'b0,XExpE} - {1'b0, (11)'(`BIAS)} + {12'b0, XDenormE};

 ////////////////////////////////////////////////////////

@ -121,7 +120,7 @@ module fcvt (
    assign Round = FOpCtrlE[0] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41];
    assign LSB = FOpCtrlE[0] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43];

-    always_comb begin
+    always_comb begin//*** remove guard bit
        // Determine if you add 1
        case (FrmE)
            3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even
--- a/pipelined/src/fpu/fma.sv
+++ b/pipelined/src/fpu/fma.sv
@ -29,17 +29,12 @@

 `include "wally-config.vh"

-//  `define FLEN 64//(`Q_SUPPORTED ? 128 : `D_SUPPORTED ? 64 : 32)
-//  `define NE   11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8)
-//  `define NF   52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23)
-//  `define XLEN 64
-//  `define IEEE754 1
 module fma(
    input logic                 clk,
    input logic                 reset,
    input logic                 FlushM,     // flush the memory stage
    input logic                 StallM,     // stall memory stage
-    input logic                 FmtE, FmtM, // precision 1 = double 0 = single
+    input logic  [`FPSIZES/3:0] FmtE, FmtM, // precision 1 = double 0 = single
    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
    input logic  [2:0]          FrmM,               // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
    input logic                 XSgnE, YSgnE, ZSgnE,    // input signs - execute stage
@ -75,7 +70,7 @@ module fma(
    logic 			    NegSumE, NegSumM;
    logic 			    ZSgnEffE, ZSgnEffM;
    logic 			    PSgnE, PSgnM;
-    logic [8:0]			NormCntE, NormCntM;
+    logic [$clog2(3*`NF+7)-1:0]			NormCntE, NormCntM;
    logic               Mult;
    
    fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
@ -86,7 +81,7 @@ module fma(
    // E/M pipeline registers
    flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); 
    flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);  
-    flopenrc #(16) EMRegFma4(clk, reset, FlushM, ~StallM, 
+    flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM, 
                            {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0]},
                            {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult});

@ -98,6 +93,7 @@ module fma(
 endmodule
      

+        //*** in al units before putting into : ? put in a seperate signal

 module fma1(
    input logic                 XSgnE, YSgnE, ZSgnE,    // input's signs
@ -106,7 +102,7 @@ module fma1(
    input logic                 XDenormE, YDenormE, ZDenormE, // is the input denormal
    input logic                 XZeroE, YZeroE, ZZeroE, // is the input zero
    input logic  [2:0]          FOpCtrlE,   // 000 = fmadd (X*Y)+Z,  001 = fmsub (X*Y)-Z,  010 = fnmsub -(X*Y)+Z,  011 = fnmadd -(X*Y)-Z,  100 = fmul (X*Y)
-    input logic                 FmtE,       // precision 1 = double 0 = single
+    input logic  [`FPSIZES/3:0] FmtE,       // precision 1 = double 0 = single
    output logic [`NE+1:0]      ProdExpE,       // X exponent + Y exponent - bias in B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
    output logic                AddendStickyE,  // sticky bit that is calculated during alignment
    output logic                KillProdE,      // set the product to zero before addition if the product is too small to matter
@ -115,7 +111,7 @@ module fma1(
    output logic                InvZE,          // intert Z
    output logic                ZSgnEffE,       // the modified Z sign
    output logic                PSgnE,          // the product's sign
-    output logic [8:0]          NormCntE        // normalization shift cnt
+    output logic [$clog2(3*`NF+7)-1:0]          NormCntE        // normalization shift cnt
    );

    logic [`NE-1:0]     Denorm;             // value of a denormaized number based on precision
@ -157,37 +153,63 @@ module fma1(
        
    add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
    
-    loa loa(.A(AlignedAddendInv+{162'b0,InvZE}), .P(ProdManKilled), .NormCntE);
+    loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .NormCntE);

    // Choose the positive sum and accompanying LZA result.
    assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
-    // assign NormCntE = NegSumE ? NNormCnt : PNormCnt;


 endmodule


 module expadd(    
-    input  logic            FmtE,          // precision
-    input  logic [`NE-1:0]  XExpE, YExpE,  // input exponents
-    input  logic            XDenormE, YDenormE,    // are the inputs denormalized
-    input  logic            XZeroE, YZeroE,        // are the inputs zero
-    output logic [`NE-1:0]  XExpVal, YExpVal,      // Exponent value after taking into account denormals
-    output logic [`NE-1:0]  Denorm,        // value of denormalized exponent
-    output logic [`NE+1:0]  ProdExpE       // product's exponent B^(1023)NE+2
+    input  logic [`FPSIZES/3:0] FmtE,          // precision
+    input  logic [`NE-1:0]      XExpE, YExpE,  // input exponents
+    input  logic                XDenormE, YDenormE,    // are the inputs denormalized
+    input  logic                XZeroE, YZeroE,        // are the inputs zero
+    output logic [`NE-1:0]      XExpVal, YExpVal,      // Exponent value after taking into account denormals
+    output logic [`NE-1:0]      Denorm,        // value of denormalized exponent
+    output logic [`NE+1:0]      ProdExpE       // product's exponent B^(1023)NE+2
 );


    // denormalized numbers have diffrent values depending on which precison it is.
-    //      double - 1
-    //      single - 1023-127+1 = 897
-    assign Denorm = FmtE ? 1 : 897;
+    //      FLEN - 1
+    //      Other - BIAS - other bias + 1
+    
+    if (`FPSIZES == 1) begin
+        assign Denorm = 1;
+
+    end else if (`FPSIZES == 2) begin
+        assign Denorm = FmtE ? (`NE)'(1) : (`NE)'(`BIAS)-(`NE)'(`BIAS1)+(`NE)'(1);
+
+    end else if (`FPSIZES == 3) begin
+        always_comb begin
+            case (FmtE)
+                `FMT: assign Denorm = 1;
+                `FMT1: assign Denorm = `BIAS-`BIAS1+1;
+                `FMT2: assign Denorm = `BIAS-`BIAS2+1;
+                default: assign Denorm = 1'bx;
+            endcase
+        end
+
+    end else begin
+        always_comb begin
+            case (FmtE)
+                2'h3: assign Denorm = 1;
+                2'h1: assign Denorm = `BIAS-`D_BIAS+1;
+                2'h0: assign Denorm = `BIAS-`S_BIAS+1;
+                2'h2: assign Denorm = `BIAS-`H_BIAS+1;
+            endcase
+        end
+
+    end

    // pick denormalized value or exponent
    assign XExpVal = XDenormE ? Denorm : XExpE;
    assign YExpVal = YDenormE ? Denorm : YExpE;
    // kill the exponent if the product is zero - either X or Y is 0
-    assign ProdExpE = ({2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, `NE'h3ff})&{`NE+2{~(XZeroE|YZeroE)}};
+    assign ProdExpE = ({2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}};

 endmodule

@ -261,7 +283,7 @@ module align(
    //      - Denormal numbers have a diffrent exponent value depending on the precision
    assign ZExpVal = ZDenormE ? Denorm : ZExpE;
    // assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3);
-    assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - 1020+`NF - {2'b0, ZExpVal};
+    assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpVal};

    // Defualt Addition without shifting
    //          |   54'b0    |  106'b(product)  | 2'b0 |
@ -276,7 +298,7 @@ module align(

        //          |   54'b0    |  106'b(product)  | 2'b0 |
        //  | addnend |
-        if ($signed(AlignCnt) < $signed(13'b0)) begin
+        if ($signed(AlignCnt) < $signed((`NE+2)'(0))) begin
            KillProdE = 1;
            ZManShifted = ZManPreShifted;
            AddendStickyE = ~(XZeroE|YZeroE);
@ -284,7 +306,7 @@ module align(
        // If the Addend is shifted right
        //          |   54'b0    |  106'b(product)  | 2'b0 |
        //                                  | addnend |
-        end else if ($signed(AlignCnt)<=$signed(13'd3*13'd`NF+13'd4))  begin
+        end else if ($signed(AlignCnt)<=$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(5)))  begin
            KillProdE = 0;
            ZManShifted = ZManPreShifted >> AlignCnt;
            AddendStickyE = |(ZManShifted[`NF-1:0]);
@ -356,7 +378,7 @@ endmodule
 module loa( //https://ieeexplore.ieee.org/abstract/document/930098
    input logic  [3*`NF+6:0] A,     // addend
    input logic  [2*`NF+1:0] P,     // product
-    output logic [8:0]       NormCntE   // normalization shift count for the positive result
+    output logic [$clog2(3*`NF+7)-1:0]       NormCntE   // normalization shift count for the positive result
    ); 
    
    logic [3*`NF+6:0] T;
@ -389,14 +411,14 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
 endmodule

 module lzc(
-    input logic  [3*`NF+6:0]    f,
-    output logic [8:0]          NormCntE    // normalization shift
+    input logic  [3*`NF+6:0]            f,
+    output logic [$clog2(3*`NF+7)-1:0]    NormCntE    // normalization shift
 );
    
-    logic [8:0] i;
+    logic [$clog2(3*`NF+7)-1:0] i;
    always_comb begin
        i = 0;
-        while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned(9'd3*9'd`NF+9'd6)) i = i+1;  // search for leading one
+        while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1;  // search for leading one
        NormCntE = i;
    end
 endmodule
@ -410,27 +432,27 @@ endmodule

 module fma2(
    
-    input logic                 XSgnM, YSgnM,        // input signs
-    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
-    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic                 FmtM,       // precision 1 = double 0 = single
-    input logic     [`NE+1:0]   ProdExpM,       // X exponent + Y exponent - bias
-    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                 XZeroM, YZeroM, ZZeroM, // inputs are zero
-    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                 XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
-    input logic     [3*`NF+5:0] SumM,       // the positive sum
-    input logic                 NegSumM,    // was the sum negitive
-    input logic                 InvZM,      // do you invert Z
-    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic                 PSgnM,      // the product's sign
-    input logic                 Mult,       // multiply opperation
-    input logic     [8:0]       NormCntM,   // the normalization shift count
-    output logic    [`FLEN-1:0] FMAResM,    // FMA final result
-    output logic    [4:0]       FMAFlgM);   // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
+    input logic                             XSgnM, YSgnM,        // input signs
+    input logic     [`NE-1:0]               XExpM, YExpM, ZExpM, // input exponents
+    input logic     [`NF:0]                 XManM, YManM, ZManM, // input mantissas
+    input logic     [2:0]                   FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic     [`FPSIZES/3:0]          FmtM,       // precision 1 = double 0 = single
+    input logic     [`NE+1:0]               ProdExpM,       // X exponent + Y exponent - bias
+    input logic                             AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                             KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                             XZeroM, YZeroM, ZZeroM, // inputs are zero
+    input logic                             XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic                             XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                             XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
+    input logic     [3*`NF+5:0]             SumM,       // the positive sum
+    input logic                             NegSumM,    // was the sum negitive
+    input logic                             InvZM,      // do you invert Z
+    input logic                             ZSgnEffM,   // the modified Z sign - depends on instruction
+    input logic                             PSgnM,      // the product's sign
+    input logic                             Mult,       // multiply opperation
+    input logic     [$clog2(3*`NF+7)-1:0]   NormCntM,   // the normalization shift count
+    output logic    [`FLEN-1:0]             FMAResM,    // FMA final result
+    output logic    [4:0]                   FMAFlgM);   // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
   


@ -548,28 +570,27 @@ endmodule


 module normalize(
-    input logic  [3*`NF+5:0]    SumM,       // the positive sum
-    input logic  [`NE-1:0]      ZExpM,      // exponent of Z
-    input logic  [`NE+1:0]      ProdExpM,   // X exponent + Y exponent - bias
-    input logic  [8:0]          NormCntM,   // normalization shift count
-    input logic                 FmtM,       // precision 1 = double 0 = single
-    input logic                 KillProdM,  // is the product set to zero
-    input logic                 AddendStickyM,  // the sticky bit caclulated from the aligned addend
-    input logic                 NegSumM,    // was the sum negitive
-    output logic [`NF+2:0]      NormSum,        // normalized sum
-    output logic                SumZero,        // is the sum zero
-    output logic                NormSumSticky, UfSticky,    // sticky bits
-    output logic [`NE+1:0]      SumExp,         // exponent of the normalized sum
-    output logic                ResultDenorm    // is the result denormalized
+    input logic  [3*`NF+5:0]            SumM,       // the positive sum
+    input logic  [`NE-1:0]              ZExpM,      // exponent of Z
+    input logic  [`NE+1:0]              ProdExpM,   // X exponent + Y exponent - bias
+    input logic  [$clog2(3*`NF+7)-1:0]  NormCntM,   // normalization shift count
+    input logic  [`FPSIZES/3:0]         FmtM,       // precision 1 = double 0 = single
+    input logic                         KillProdM,  // is the product set to zero
+    input logic                         AddendStickyM,  // the sticky bit caclulated from the aligned addend
+    input logic                         NegSumM,    // was the sum negitive
+    output logic [`NF+2:0]              NormSum,        // normalized sum
+    output logic                        SumZero,        // is the sum zero
+    output logic                        NormSumSticky, UfSticky,    // sticky bits
+    output logic [`NE+1:0]              SumExp,         // exponent of the normalized sum
+    output logic                        ResultDenorm    // is the result denormalized
 );
-    logic [`NE+1:0]     SumExpTmp;          // exponent of the normalized sum not taking into account denormal or zero results
-    logic [8:0]         DenormShift;        // right shift if the result is denormalized //***change this later
-    logic [3*`NF+5:0]   CorrSumShifted;     // the shifted sum after LZA correction
-    logic [3*`NF+8:0]   SumShifted;         // the shifted sum before LZA correction
-    logic [`NE+1:0]     SumExpTmpTmp;       // the exponent of the normalized sum with the `FLEN bias
-    logic               PreResultDenorm;    // is the result denormalized - calculated before LZA corection
-    logic               PreResultDenorm2;   // is the result denormalized - calculated before LZA corection
-    logic               LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
+    logic [`NE+1:0]             SumExpTmp;          // exponent of the normalized sum not taking into account denormal or zero results
+    logic [$clog2(3*`NF+7)-1:0] DenormShift;        // right shift if the result is denormalized //***change this later
+    logic [3*`NF+5:0]           CorrSumShifted;     // the shifted sum after LZA correction
+    logic [3*`NF+8:0]           SumShifted;         // the shifted sum before LZA correction
+    logic [`NE+1:0]             SumExpTmpTmp;       // the exponent of the normalized sum with the `FLEN bias
+    logic                       PreResultDenorm;    // is the result denormalized - calculated before LZA corection
+    logic                       LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction

    ///////////////////////////////////////////////////////////////////////////////
    // Normalization
@ -580,14 +601,89 @@ module normalize(

    // calculate the sum's exponent
    assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4));
-    assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}};
+
+    //convert the sum's exponent into the propper percision
+    if (`FPSIZES == 1) begin
+        assign SumExpTmp = SumExpTmpTmp;
+
+    end else if (`FPSIZES == 2) begin
+        assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb begin
+            case (FmtM)
+                `FMT: assign SumExpTmp = SumExpTmpTmp;
+                `FMT1: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS1)&{`NE+2{|SumExpTmpTmp}};
+                `FMT2: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`BIAS2)&{`NE+2{|SumExpTmpTmp}};
+                default: assign SumExpTmp = `NE+2'bx;
+            endcase
+        end
+
+    end else begin
+        always_comb begin
+            case (FmtM)
+                2'h3: assign SumExpTmp = SumExpTmpTmp;
+                2'h1: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`D_BIAS)&{`NE+2{|SumExpTmpTmp}};
+                2'h0: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`S_BIAS)&{`NE+2{|SumExpTmpTmp}};
+                2'h2: assign SumExpTmp = (SumExpTmpTmp-`BIAS+`H_BIAS)&{`NE+2{|SumExpTmpTmp}};
+            endcase
+        end
+
+    end
    
-    logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL;
-    assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
-    assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd2)));
-    assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127);
-    assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd25+13'd1023-13'd127)) | ~|SumExpTmpTmp;
-    assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero;
+    // determine if the result is denormalized
+    
+    if (`FPSIZES == 1) begin
+        logic Sum0LEZ, Sum0GEFL;
+        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
+        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
+        assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
+
+    end else if (`FPSIZES == 2) begin
+        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
+        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
+        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
+        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
+        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
+        assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
+
+    end else if (`FPSIZES == 3) begin
+        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
+        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
+        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
+        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
+        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
+        assign Sum2LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
+        assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|SumExpTmpTmp;
+        always_comb begin
+            case (FmtM)
+                `FMT: assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
+                `FMT1: assign PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
+                `FMT2: assign PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
+                default: assign PreResultDenorm = 1'bx;
+            endcase
+        end
+
+    end else begin
+        logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
+        assign Sum0LEZ  = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
+        assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF  )-(`NE+2)'(2));
+        assign Sum1LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
+        assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|SumExpTmpTmp;
+        assign Sum2LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
+        assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|SumExpTmpTmp;
+        assign Sum3LEZ  = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
+        assign Sum3GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|SumExpTmpTmp;
+        always_comb begin
+            case (FmtM)
+                2'h3: assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
+                2'h1: assign PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
+                2'h0: assign PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
+                2'h2: assign PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
+            endcase
+        end
+
+    end

    // 010. when should be 001.
    //      - shift left one
@ -599,45 +695,66 @@ module normalize(

    // Determine the shift needed for denormal results
    //  - if not denorm add 1 to shift out the leading 1
-    assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1;
+    assign DenormShift = PreResultDenorm ? SumExpTmp[$clog2(3*`NF+7)-1:0] : 1;
    // Normalize the sum
    assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
    // LZA correction
    assign LZAPlus1 = SumShifted[3*`NF+7];
    assign LZAPlus2 = SumShifted[3*`NF+8];
 	// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
-    assign CorrSumShifted =  LZAPlus1&~KillProdM ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
+    assign CorrSumShifted =  LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
    assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3];
+
    // Calculate the sticky bit
-    assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM);
+    if (`FPSIZES == 1) begin
+        assign NormSumSticky = |CorrSumShifted[2*`NF+2:0];
+
+    end else if (`FPSIZES == 2) begin
+        // 3*NF+5 - NF1 - 3
+        assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | 
+        (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&~FmtM);
+
+    end else if (`FPSIZES == 3) begin
+        assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | 
+        (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&((FmtM==`FMT1)|(FmtM==`FMT2))) | 
+        (|CorrSumShifted[3*`NF+2-`NF2:3*`NF+3-`NF1]&(FmtM==`FMT2));
+
+    end else begin        
+        assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | 
+        (|CorrSumShifted[3*`NF+2-`D_NF:2*`NF+3]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | 
+        (|CorrSumShifted[3*`NF+2-`S_NF:3*`NF+3-`D_NF]&((FmtM==0)|(FmtM==2))) |
+        (|CorrSumShifted[3*`NF+2-`H_NF:3*`NF+3-`S_NF]&(FmtM==2));
+
+    end
+
    assign UfSticky = AddendStickyM | NormSumSticky;

    // Determine sum's exponent
    //                          if plus1                     If plus2                                      if said denorm but norm plus 1           if said denorm but norm plus 2
-    assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~ResultDenorm&PreResultDenorm2&~KillProdM}+{12'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
+    assign SumExp = (SumExpTmp+{12'b0, LZAPlus1&~KillProdM}+{11'b0, LZAPlus2&~KillProdM, 1'b0}+{12'b0, ~ResultDenorm&PreResultDenorm&~KillProdM}+{12'b0, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
    // recalculate if the result is denormalized
-    assign ResultDenorm = PreResultDenorm2&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
+    assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];

 endmodule

 module fmaround(
-    input logic             FmtM,       // precision 1 = double 0 = single
-    input logic  [2:0]      FrmM,       // rounding mode
-    input logic             UfSticky,   // sticky bit for underlow calculation
-    input logic  [`NF+2:0]  NormSum,    // normalized sum
-    input logic             AddendStickyM,  // addend's sticky bit
-    input logic             NormSumSticky,  // normalized sum's sticky bit
-    input logic             ZZeroM,         // is Z zero
-    input logic             InvZM,          // invert Z
-    input logic  [`NE+1:0]  SumExp,         // exponent of the normalized sum
-    input logic             ResultSgnTmp,      // the result's sign
-    output logic            CalcPlus1, UfPlus1,  // do you add or subtract on from the result
-    output logic [`NE+1:0]  FullResultExp,      // ResultExp with bits to determine sign and overflow
-    output logic [`NF-1:0]  ResultFrac,         // Result fraction
-    output logic [`NE-1:0]  ResultExp,          // Result exponent
-    output logic            Sticky,             // sticky bit
-    output logic [`FLEN:0]  RoundAdd,           // how much to add to the result
-    output logic            Round, Guard, UfLSBNormSum // bits needed to calculate rounding
+    input logic  [`FPSIZES/3:0] FmtM,       // precision 1 = double 0 = single
+    input logic  [2:0]          FrmM,       // rounding mode
+    input logic                 UfSticky,   // sticky bit for underlow calculation
+    input logic  [`NF+2:0]      NormSum,    // normalized sum
+    input logic                 AddendStickyM,  // addend's sticky bit
+    input logic                 NormSumSticky,  // normalized sum's sticky bit
+    input logic                 ZZeroM,         // is Z zero
+    input logic                 InvZM,          // invert Z
+    input logic  [`NE+1:0]      SumExp,         // exponent of the normalized sum
+    input logic                 ResultSgnTmp,      // the result's sign
+    output logic                CalcPlus1, UfPlus1,  // do you add or subtract on from the result
+    output logic [`NE+1:0]      FullResultExp,      // ResultExp with bits to determine sign and overflow
+    output logic [`NF-1:0]      ResultFrac,         // Result fraction
+    output logic [`NE-1:0]      ResultExp,          // Result exponent
+    output logic                Sticky,             // sticky bit
+    output logic [`FLEN:0]      RoundAdd,           // how much to add to the result
+    output logic                Round, Guard, UfLSBNormSum // bits needed to calculate rounding
 );
    logic           LSBNormSum;         // bit used for rounding - least significant bit of the normalized sum
    logic           SubBySmallNum, UfSubBySmallNum;  // was there supposed to be a subtraction by a small number
@ -676,18 +793,146 @@ module fmaround(
    //      101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
    //      110/111 - Plus1

-    // determine guard, round, and least significant bit of the result
-    assign Guard = FmtM ? NormSum[2] : NormSum[31];
-    assign Round = FmtM ? NormSum[1] : NormSum[30];
-    assign LSBNormSum = FmtM ? NormSum[3] : NormSum[32];
+    if (`FPSIZES == 1) begin
+        // determine guard, round, and least significant bit of the result
+        assign Guard = NormSum[2];
+        assign Round = NormSum[1];
+        assign LSBNormSum = NormSum[3];
+
+        // used to determine underflow flag
+        assign UfGuard = NormSum[1];
+        assign UfRound = NormSum[0];
+        assign UfLSBNormSum = NormSum[2];
+
+        // determine sticky
+        assign Sticky = UfSticky | NormSum[0];
+
+    end else if (`FPSIZES == 2) begin
+        //         \/-------------NF---------------,
+        //      |      NF1       | 3 |             |
+        //          '-------NF1------^
+
+        // determine guard, round, and least significant bit of the result
+        assign Guard = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
+        assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
+        assign LSBNormSum = FmtM ? NormSum[3] : NormSum[`NF-`NF1+3];
+
+        // used to determine underflow flag
+        assign UfGuard = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
+        assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
+        assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
+
+        // determine sticky
+        assign Sticky = UfSticky | (FmtM ? NormSum[0] : NormSum[`NF-`NF1]);
+
+    end else if (`FPSIZES == 3) begin
+        always_comb begin
+            case (FmtM)
+                `FMT: begin
+                    // determine guard, round, and least significant bit of the result
+                    assign Guard = NormSum[2];
+                    assign Round = NormSum[1];
+                    assign LSBNormSum = NormSum[3];
+                    // used to determine underflow flag
+                    assign UfGuard = NormSum[1];
+                    assign UfRound = NormSum[0];
+                    assign UfLSBNormSum = NormSum[2];
+                    // determine sticky
+                    assign Sticky = UfSticky | NormSum[0];
+                end
+                `FMT1: begin
+                    // determine guard, round, and least significant bit of the result
+                    assign Guard = NormSum[`NF-`NF1+2];
+                    assign Round = NormSum[`NF-`NF1+1];
+                    assign LSBNormSum = NormSum[`NF-`NF1+3];
+                    // used to determine underflow flag
+                    assign UfGuard = NormSum[`NF-`NF1+1];
+                    assign UfRound = NormSum[`NF-`NF1];
+                    assign UfLSBNormSum = NormSum[`NF-`NF1+2];
+                    // determine sticky
+                    assign Sticky = UfSticky | NormSum[`NF-`NF1];
+                end
+                `FMT2: begin
+                    // determine guard, round, and least significant bit of the result
+                    assign Guard = NormSum[`NF-`NF2+2];
+                    assign Round = NormSum[`NF-`NF2+1];
+                    assign LSBNormSum = NormSum[`NF-`NF2+3];
+                    // used to determine underflow flag
+                    assign UfGuard = NormSum[`NF-`NF2+1];
+                    assign UfRound = NormSum[`NF-`NF2];
+                    assign UfLSBNormSum = NormSum[`NF-`NF2+2];
+                    // determine sticky
+                    assign Sticky = UfSticky | NormSum[`NF-`NF2];
+                end
+                default: begin
+                    assign Guard = 1'bx;
+                    assign Round = 1'bx;
+                    assign LSBNormSum = 1'bx;
+                    assign UfGuard = 1'bx;
+                    assign UfRound = 1'bx;
+                    assign UfLSBNormSum = 1'bx;
+                    assign Sticky = 1'bx;
+                end
+            endcase
+        end
+
+    end else begin
+        always_comb begin
+            case (FmtM)
+                2'h3: begin
+                    // determine guard, round, and least significant bit of the result
+                    assign Guard = NormSum[2];
+                    assign Round = NormSum[1];
+                    assign LSBNormSum = NormSum[3];
+                    // used to determine underflow flag
+                    assign UfGuard = NormSum[1];
+                    assign UfRound = NormSum[0];
+                    assign UfLSBNormSum = NormSum[2];
+                    // determine sticky
+                    assign Sticky = UfSticky | NormSum[0];
+                end
+                2'h1: begin
+                    // determine guard, round, and least significant bit of the result
+                    assign Guard = NormSum[`NF-`D_NF+2];
+                    assign Round = NormSum[`NF-`D_NF+1];
+                    assign LSBNormSum = NormSum[`NF-`D_NF+3];
+                    // used to determine underflow flag
+                    assign UfGuard = NormSum[`NF-`D_NF+1];
+                    assign UfRound = NormSum[`NF-`D_NF];
+                    assign UfLSBNormSum = NormSum[`NF-`D_NF+2];
+                    // determine sticky
+                    assign Sticky = UfSticky | NormSum[`NF-`D_NF];
+                end
+                2'h0: begin
+                    // determine guard, round, and least significant bit of the result
+                    assign Guard = NormSum[`NF-`S_NF+2];
+                    assign Round = NormSum[`NF-`S_NF+1];
+                    assign LSBNormSum = NormSum[`NF-`S_NF+3];
+                    // used to determine underflow flag
+                    assign UfGuard = NormSum[`NF-`S_NF+1];
+                    assign UfRound = NormSum[`NF-`S_NF];
+                    assign UfLSBNormSum = NormSum[`NF-`S_NF+2];
+                    // determine sticky
+                    assign Sticky = UfSticky | NormSum[`NF-`S_NF];
+                end
+                2'h2: begin
+                    // determine guard, round, and least significant bit of the result
+                    assign Guard = NormSum[`NF-`H_NF+2];
+                    assign Round = NormSum[`NF-`H_NF+1];
+                    assign LSBNormSum = NormSum[`NF-`H_NF+3];
+                    // used to determine underflow flag
+                    assign UfGuard = NormSum[`NF-`H_NF+1];
+                    assign UfRound = NormSum[`NF-`H_NF];
+                    assign UfLSBNormSum = NormSum[`NF-`H_NF+2];
+                    // determine sticky
+                    assign Sticky = UfSticky | NormSum[`NF-`H_NF];
+                end
+            endcase
+        end
+
+    end

-    // used to determine underflow flag
-    assign UfGuard = FmtM ? NormSum[1] : NormSum[30];
-    assign UfRound = FmtM ? NormSum[0] : NormSum[29];
-    assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[31];

-    // determine sticky
-    assign Sticky = UfSticky | NormSum[0];
    // Deterimine if a small number was supposed to be subtrated
    assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
    assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
@ -729,10 +974,40 @@ module fmaround(
    assign Minus1 = CalcMinus1 & (Sticky | Guard | Round);

    // Compute rounded result
-    assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
-                             Minus1 ? {{36{1'b1}}, 29'b0} : {35'b0, Plus1, 29'b0};
-    assign NormSumTruncated = {NormSum[`NF+2:32], NormSum[31:3]&{29{FmtM}}};
+    if (`FPSIZES == 1) begin
+        assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, Plus1};

+    end else if (`FPSIZES == 2) begin
+        // \/FLEN+1
+        //  | NE+2 |        NF      |
+        //  '-NE+2-^----NF1----^
+        // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
+        assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
+                                Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb begin
+            case (FmtM)
+                `FMT: assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
+                `FMT1: assign RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
+                `FMT2: assign RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), Plus1, (`FLEN-1-`NE-`NF2)'(0)};
+                default: assign RoundAdd = (`FLEN+1)'(0);
+            endcase
+        end
+
+    end else begin        
+        always_comb begin
+            case (FmtM)
+                2'h3: assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
+                2'h1: assign RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), Plus1, (`FLEN-1-`NE-`D_NF)'(0)};
+                2'h0: assign RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), Plus1, (`FLEN-1-`NE-`S_NF)'(0)};
+                2'h2: assign RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), Plus1, (`FLEN-1-`NE-`H_NF)'(0)};
+            endcase
+        end
+
+    end
+
+    assign NormSumTruncated = NormSum[`NF+2:3];
    assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
    assign ResultExp = FullResultExp[`NE-1:0];

@ -748,7 +1023,7 @@ module fmaflags(
    input logic  [`NE+1:0]      SumExp,                 // exponent of the normalized sum
    input logic                 ZSgnEffM, PSgnM,        // the product and modified Z signs
    input logic                 Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
-    input logic                 FmtM,                   // precision 1 = double 0 = single
+    input logic  [`FPSIZES/3:0] FmtM,                   // precision 1 = double 0 = single
    output logic                Invalid, Overflow, Underflow, // flags used to select the result
    output logic [4:0]          FMAFlgM // FMA flags
 );
@ -771,8 +1046,34 @@ module fmaflags(
    assign Invalid = SigNaN | ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);  
   
    // Set Overflow flag if the number is too big to be represented
-    //      - Don't set the overflow flag if an overflowed result isn't outputed
-    assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[7:0] | FullResultExp[8];
+    //      - Don't set the overflow flag if an overflowed result isn't outputed    
+    if (`FPSIZES == 1) begin
+        assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
+
+    end else if (`FPSIZES == 2) begin
+        assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
+
+    end else if (`FPSIZES == 3) begin
+        always_comb begin
+            case (FmtM)
+                `FMT: assign GtMaxExp =  &FullResultExp[`NE-1:0] | FullResultExp[`NE];
+                `FMT1: assign GtMaxExp = &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
+                `FMT2: assign GtMaxExp = &FullResultExp[`NE2-1:0] | FullResultExp[`NE2];
+                default: assign GtMaxExp = 1'bx;
+            endcase
+        end
+
+    end else begin        
+        always_comb begin
+            case (FmtM)
+                2'h3: assign GtMaxExp =  &FullResultExp[`NE-1:0] | FullResultExp[`NE];
+                2'h1: assign GtMaxExp = &FullResultExp[`D_NE-1:0] | FullResultExp[`D_NE];
+                2'h0: assign GtMaxExp = &FullResultExp[`S_NE-1:0] | FullResultExp[`S_NE];
+                2'h2: assign GtMaxExp = &FullResultExp[`H_NE-1:0] | FullResultExp[`H_NE];
+            endcase
+        end
+
+    end
    assign Overflow = GtMaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);

    // Set Underflow flag if the number is too small to be represented in normal numbers
@ -793,57 +1094,227 @@ endmodule


 module resultselect(
-    input logic                 XSgnM, YSgnM,        // input signs
-    input logic     [`NE-1:0]   XExpM, YExpM, ZExpM, // input exponents
-    input logic     [`NF:0]     XManM, YManM, ZManM, // input mantissas
-    input logic     [2:0]       FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
-    input logic                 FmtM,       // precision 1 = double 0 = single
-    input logic                 AddendStickyM,  // sticky bit that is calculated during alignment
-    input logic                 KillProdM,      // set the product to zero before addition if the product is too small to matter
-    input logic                 XInfM, YInfM, ZInfM,    // inputs are infinity
-    input logic                 XNaNM, YNaNM, ZNaNM,    // inputs are NaN
-    input logic                 ZSgnEffM,   // the modified Z sign - depends on instruction
-    input logic                 PSgnM,      // the product's sign
-    input logic                 ResultSgn,  // the result's sign
-    input logic                 CalcPlus1,  // rounding bits
-    input logic     [`FLEN:0]   RoundAdd,   // how much to add to the result
-    input logic                 Invalid, Overflow, Underflow,  // flags
-    input logic                 ResultDenorm,       // is the result denormalized
-    input logic     [`NE-1:0]   ResultExp,          // Result exponent
-    input logic     [`NF-1:0]   ResultFrac,         // Result fraction
-    output logic    [`FLEN-1:0] FMAResM     // FMA final result
+    input logic                     XSgnM, YSgnM,        // input signs
+    input logic     [`NE-1:0]       XExpM, YExpM, ZExpM, // input exponents
+    input logic     [`NF:0]         XManM, YManM, ZManM, // input mantissas
+    input logic     [2:0]           FrmM,       // rounding mode 000 = rount to nearest, ties to even   001 = round twords zero  010 = round down  011 = round up  100 = round to nearest, ties to max magnitude
+    input logic     [`FPSIZES/3:0]  FmtM,       // precision 1 = double 0 = single
+    input logic                     AddendStickyM,  // sticky bit that is calculated during alignment
+    input logic                     KillProdM,      // set the product to zero before addition if the product is too small to matter
+    input logic                     XInfM, YInfM, ZInfM,    // inputs are infinity
+    input logic                     XNaNM, YNaNM, ZNaNM,    // inputs are NaN
+    input logic                     ZSgnEffM,   // the modified Z sign - depends on instruction
+    input logic                     PSgnM,      // the product's sign
+    input logic                     ResultSgn,  // the result's sign
+    input logic                     CalcPlus1,  // rounding bits
+    input logic     [`FLEN:0]       RoundAdd,   // how much to add to the result
+    input logic                     Invalid, Overflow, Underflow,  // flags
+    input logic                     ResultDenorm,       // is the result denormalized
+    input logic     [`NE-1:0]       ResultExp,          // Result exponent
+    input logic     [`NF-1:0]       ResultFrac,         // Result fraction
+    output logic    [`FLEN-1:0]     FMAResM     // FMA final result
 );
-    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results
+    logic               InfSgn;
+    logic [`FLEN-1:0]   XNaNResult, YNaNResult, ZNaNResult, InfResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult, NormResult; // possible results
+    assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
+    if (`FPSIZES == 1) begin
+        if(`IEEE754) begin
+            assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
+            assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
+            assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+            assign InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+        end else begin
+            assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+        end
+        assign OverflowResult =  ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
+                                                                                                                    {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
+        assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+        assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
+        assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
+        assign NormResult = {ResultSgn, ResultExp, ResultFrac};
+
+    end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
+        if(`IEEE754) begin
+            assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
+            assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
+            assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
+            assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+        end else begin 
+            assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+        end
+        
+        assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
+                                                                                                                            {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
+                                        ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
+                                                                                                                            {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
+        assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:0], ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+        assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
+        assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
+        assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
+
+    end else if (`FPSIZES == 3) begin
+        always_comb begin
+            case (FmtM)
+                `FMT: begin  
+                    if(`IEEE754) begin
+                        assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
+                        assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
+                        assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+                        assign InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end else begin 
+                        assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end
+                    
+                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
+                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
+                    assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
+                    assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
+                    assign NormResult = {ResultSgn, ResultExp, ResultFrac};
+                end
+                `FMT1: begin  
+                    if(`IEEE754) begin
+                        assign XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
+                        assign YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
+                        assign ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
+                        assign InvalidResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                    end else begin 
+                        assign XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
+                    end
+                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
+                                                                                                                                  {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
+                    assign KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:0], ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
+                    assign UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
+                    assign InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
+                    assign NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
+                end
+                `FMT2: begin  
+                    if(`IEEE754) begin
+                        assign XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
+                        assign YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
+                        assign ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
+                        assign InvalidResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                    end else begin 
+                        assign XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
+                    end
+                    
+                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
+                                                                                                                                  {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
+                    assign KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:0], ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
+                    assign UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
+                    assign InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
+                    assign NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
+                end
+                default: begin
+                    if(`IEEE754) begin
+                        assign XNaNResult = (`FLEN)'(0);
+                        assign YNaNResult = (`FLEN)'(0);
+                        assign ZNaNResult = (`FLEN)'(0);
+                        assign InvalidResult = (`FLEN)'(0);
+                    end else begin 
+                        assign XNaNResult = (`FLEN)'(0);
+                    end
+                    assign OverflowResult = (`FLEN)'(0);
+                    assign KillProdResult = (`FLEN)'(0);
+                    assign UnderflowResult = (`FLEN)'(0);
+                    assign InfResult = (`FLEN)'(0);
+                    assign NormResult = (`FLEN)'(0);
+                end
+            endcase
+        end
+
+    end else begin 
+        always_comb begin
+            case (FmtM)
+                2'h3: begin  
+                    if(`IEEE754) begin
+                        assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
+                        assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
+                        assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
+                        assign InvalidResult = {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end else begin 
+                        assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
+                    end
+                    
+                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
+                                                                                                                                        {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
+                    assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
+                    assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
+                    assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
+                    assign NormResult = {ResultSgn, ResultExp, ResultFrac};
+                end
+                2'h1: begin  
+                    if(`IEEE754) begin
+                        assign XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
+                        assign YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
+                        assign ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
+                        assign InvalidResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                    end else begin 
+                        assign XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
+                    end
+                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
+                                                                                                                                  {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
+                    assign KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:0], ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
+                    assign UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
+                    assign InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
+                    assign NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
+                end
+                2'h0: begin  
+                    if(`IEEE754) begin
+                        assign XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
+                        assign YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
+                        assign ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
+                        assign InvalidResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                    end else begin 
+                        assign XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
+                    end
+                    
+                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
+                                                                                                                                  {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
+                    assign KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:0], ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
+                    assign UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
+                    assign InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
+                    assign NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
+                end
+                2'h2: begin  
+                    if(`IEEE754) begin
+                        assign XNaNResult = {{`FLEN-`H_LEN{1'b1}}, XSgnM, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
+                        assign YNaNResult = {{`FLEN-`H_LEN{1'b1}}, YSgnM, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
+                        assign ZNaNResult = {{`FLEN-`H_LEN{1'b1}}, ZSgnEffM, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
+                        assign InvalidResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                    end else begin 
+                        assign XNaNResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
+                    end
+                    
+                    assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
+                                                                                                              {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};      
+
+                    assign KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:0], ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
+                    assign UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
+                    assign InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
+                    assign NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
+                end
+            endcase
+        end

-    if(`IEEE754) begin
-        assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]};
-        assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]};
-        assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]};
-        assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0};
-      end else begin
-        assign XNaNResult = FmtM ? {1'b0, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, XExpM[7:0], 1'b1, 22'b0};
-        assign YNaNResult = FmtM ? {1'b0, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, YExpM[7:0], 1'b1, 22'b0};
-        assign ZNaNResult = FmtM ? {1'b0, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, 1'b0, ZExpM[7:0], 1'b1, 22'b0};
-        assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, 1'b0, 8'hff, 1'b1, 22'b0};
    end
-     
-    assign OverflowResult =  FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
-                                                                                                                          {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
-                                    ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} :
-                                                                                                                          {{32{1'b1}}, ResultSgn, 8'hff, 23'b0};
-    assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} + (RoundAdd[59:29]&{31{AddendStickyM}})};
-    assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}};
-    assign FMAResM = XNaNM ? XNaNResult :
-                        YNaNM ? YNaNResult :
-                        ZNaNM ? ZNaNResult :
-                        Invalid ? InvalidResult :
-                        XInfM ? FmtM ? {PSgnM, XExpM, XManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  XExpM[7:0], XManM[51:29]} : 
-                        YInfM ? FmtM ? {PSgnM, YExpM, YManM[`NF-1:0]} : {{32{1'b1}}, PSgnM,  YExpM[7:0], YManM[51:29]} :
-                        ZInfM ? FmtM ? {ZSgnEffM, ZExpM, ZManM[`NF-1:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], ZManM[51:29]} :
-                        KillProdM ? KillProdResult :  
-			            Overflow ? OverflowResult :
-                        Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
-                        FmtM ? {ResultSgn, ResultExp, ResultFrac} :
-                               {{32{1'b1}}, ResultSgn, ResultExp[7:0], ResultFrac[51:29]};
+    if(`IEEE754) begin
+        assign FMAResM = XNaNM ? XNaNResult :
+                            YNaNM ? YNaNResult :
+                            ZNaNM ? ZNaNResult :
+                            Invalid ? InvalidResult :
+                            XInfM|YInfM|ZInfM ? InfResult :
+                            KillProdM ? KillProdResult :  
+                            Overflow ? OverflowResult :
+                            Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
+                            NormResult;
+    end else begin
+        assign FMAResM = XNaNM|YNaNM|ZNaNM|Invalid ? XNaNResult :
+                            XInfM|YInfM|ZInfM ? InfResult :
+                            KillProdM ? KillProdResult :  
+                            Overflow ? OverflowResult :
+                            Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :  
+                            NormResult;
+    end

 endmodule
--- a/pipelined/src/fpu/fpu.sv
+++ b/pipelined/src/fpu/fpu.sv
@ -89,7 +89,6 @@ module fpu (
   logic [10:0] 	  XExpM, YExpM, ZExpM;                // input's exponent - memory stage
   logic [52:0] 	  XManE, YManE, ZManE;                // input's fraction - execute stage
   logic [52:0] 	  XManM, YManM, ZManM;                // input's fraction - memory stage
-   logic [10:0] 	  BiasE;                              // bias based on precision (single=7f double=3ff)
   logic 		  XNaNE, YNaNE, ZNaNE;                // is the input a NaN - execute stage
   logic 		  XNaNM, YNaNM, ZNaNM;                // is the input a NaN - memory stage
   logic 		  XNaNQ, YNaNQ;                       // is the input a NaN - divide
@ -179,7 +178,7 @@ module fpu (
   unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, 
         .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, 
         .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, 
-         .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);
+         .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE);

   // FMA
   //   - two stage FMA
@ -231,7 +230,7 @@ module fpu (
         .XSNaNE, .ClassResE);

   // Convert
-   fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE,
+   fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE,
   .CvtResE, .CvtFlgE);

   // data to be stored in memory - to IEU
--- a/pipelined/src/fpu/unpack.sv
+++ b/pipelined/src/fpu/unpack.sv
@ -0,0 +1,361 @@
+`include "wally-config.vh"
+
+module unpack ( 
+    input logic  [`FLEN-1:0] X, Y, Z,
+    input logic  [`FPSIZES/3:0]       FmtE,
+    input logic  [2:0]  FOpCtrlE,
+    output logic        XSgnE, YSgnE, ZSgnE,
+    output logic [`NE-1:0] XExpE, YExpE, ZExpE,
+    output logic [`NF:0] XManE, YManE, ZManE,
+    output logic XNormE,
+    output logic XNaNE, YNaNE, ZNaNE,
+    output logic XSNaNE, YSNaNE, ZSNaNE,
+    output logic XDenormE, YDenormE, ZDenormE,
+    output logic XZeroE, YZeroE, ZZeroE,
+    output logic XInfE, YInfE, ZInfE,
+    output logic XExpMaxE
+);
+ 
+    logic [`NF-1:0] XFracE, YFracE, ZFracE;
+    logic           XExpNonzero, YExpNonzero, ZExpNonzero;
+    logic           XFracZero, YFracZero, ZFracZero; // input fraction zero
+    logic           XExpZero, YExpZero, ZExpZero; // input exponent zero
+    logic           YExpMaxE, ZExpMaxE;  // input exponent all 1s
+    
+    if (`FPSIZES == 1) begin
+        assign XSgnE = X[`FLEN-1];
+        assign YSgnE = Y[`FLEN-1];
+        assign ZSgnE = Z[`FLEN-1];
+
+        assign XExpE = X[`FLEN-2:`NF]; 
+        assign YExpE = Y[`FLEN-2:`NF]; 
+        assign ZExpE = Z[`FLEN-2:`NF]; 
+
+        assign XFracE = X[`NF-1:0];
+        assign YFracE = Y[`NF-1:0];
+        assign ZFracE = Z[`NF-1:0];
+
+        assign XExpNonzero = |XExpE; 
+        assign YExpNonzero = |YExpE;
+        assign ZExpNonzero = |ZExpE;
+
+        assign XExpMaxE = &XExpE;
+        assign YExpMaxE = &YExpE;
+        assign ZExpMaxE = &ZExpE;
+    
+
+    end else if (`FPSIZES == 2) begin
+
+        logic  [`LEN1-1:0]   XLen1, YLen1, ZLen1; // Bottom half or NaN, if not properly NaN boxed
+
+        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
+        assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
+        assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
+        assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};  
+
+        assign XSgnE = FmtE ? X[`FLEN-1] : XLen1[`LEN1-1];
+        assign YSgnE = FmtE ? Y[`FLEN-1] : YLen1[`LEN1-1];
+        assign ZSgnE = FmtE ? Z[`FLEN-1] : ZLen1[`LEN1-1];
+
+        // example double to single conversion:
+        // 1023 = 0011 1111 1111
+        // 127  = 0000 0111 1111 (subtract this)
+        // 896  = 0011 1000 0000
+        // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
+        // dexp = 0bdd dbbb bbbb 
+        // also need to take into account possible zero/denorm/inf/NaN values
+        assign XExpE = FmtE ? X[`FLEN-2:`NF] : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; 
+        assign YExpE = FmtE ? Y[`FLEN-2:`NF] : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; 
+        assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; 
+
+        assign XFracE = FmtE ? X[`NF-1:0] : {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
+        assign YFracE = FmtE ? Y[`NF-1:0] : {YLen1[`NF1-1:0], (`NF-`NF1)'(0)};
+        assign ZFracE = FmtE ? Z[`NF-1:0] : {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)};
+
+        assign XExpNonzero = FmtE ? |X[`FLEN-2:`NF] : |XLen1[`LEN1-2:`NF1]; 
+        assign YExpNonzero = FmtE ? |Y[`FLEN-2:`NF] : |YLen1[`LEN1-2:`NF1];
+        assign ZExpNonzero = FmtE ? |Z[`FLEN-2:`NF] : |ZLen1[`LEN1-2:`NF1];
+
+        assign XExpMaxE = FmtE ? &X[`FLEN-2:`NF] : &XLen1[`LEN1-2:`NF1];
+        assign YExpMaxE = FmtE ? &Y[`FLEN-2:`NF] : &YLen1[`LEN1-2:`NF1];
+        assign ZExpMaxE = FmtE ? &Z[`FLEN-2:`NF] : &ZLen1[`LEN1-2:`NF1];
+    
+
+    end else if (`FPSIZES == 3) begin
+        logic  [`LEN1-1:0]   XLen1, YLen1, ZLen1; // Bottom half or NaN, if not properly NaN boxed
+        logic  [`LEN2-1:0]   XLen2, YLen2, ZLen2; // Bottom half or NaN, if not properly NaN boxed
+        
+        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
+        assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
+        assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)};
+        assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; 
+
+        assign XLen2 = &X[`FLEN-1:`LEN2] ? X[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
+        assign YLen2 = &Y[`FLEN-1:`LEN2] ? Y[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)};
+        assign ZLen2 = &Z[`FLEN-1:`LEN2] ? Z[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; 
+
+        always_comb begin
+            case (FmtE)
+                `FMT: begin
+                    assign XSgnE = X[`FLEN-1];
+                    assign YSgnE = Y[`FLEN-1];
+                    assign ZSgnE = Z[`FLEN-1];
+
+                    assign XExpE = X[`FLEN-2:`NF]; 
+                    assign YExpE = Y[`FLEN-2:`NF]; 
+                    assign ZExpE = Z[`FLEN-2:`NF]; 
+
+                    assign XFracE = X[`NF-1:0];
+                    assign YFracE = Y[`NF-1:0];
+                    assign ZFracE = Z[`NF-1:0];
+
+                    assign XExpNonzero = |X[`FLEN-2:`NF]; 
+                    assign YExpNonzero = |Y[`FLEN-2:`NF];
+                    assign ZExpNonzero = |Z[`FLEN-2:`NF];
+
+                    assign XExpMaxE = &X[`FLEN-2:`NF];
+                    assign YExpMaxE = &Y[`FLEN-2:`NF];
+                    assign ZExpMaxE = &Z[`FLEN-2:`NF];
+                end
+                `FMT1: begin
+                    assign XSgnE = XLen1[`LEN1-1];
+                    assign YSgnE = YLen1[`LEN1-1];
+                    assign ZSgnE = ZLen1[`LEN1-1];
+
+                    // example double to single conversion:
+                    // 1023 = 0011 1111 1111
+                    // 127  = 0000 0111 1111 (subtract this)
+                    // 896  = 0011 1000 0000
+                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
+                    // dexp = 0bdd dbbb bbbb 
+                    // also need to take into account possible zero/denorm/inf/NaN values
+                    assign XExpE = {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; 
+                    assign YExpE = {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; 
+                    assign ZExpE = {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; 
+
+                    assign XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)};
+                    assign YFracE = {YLen1[`NF1-1:0], (`NF-`NF1)'(0)};
+                    assign ZFracE = {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)};
+
+                    assign XExpNonzero = |XLen1[`LEN1-2:`NF1]; 
+                    assign YExpNonzero = |YLen1[`LEN1-2:`NF1];
+                    assign ZExpNonzero = |ZLen1[`LEN1-2:`NF1];
+
+                    assign XExpMaxE = &XLen1[`LEN1-2:`NF1];
+                    assign YExpMaxE = &YLen1[`LEN1-2:`NF1];
+                    assign ZExpMaxE = &ZLen1[`LEN1-2:`NF1];
+                end
+                `FMT2: begin
+                    assign XSgnE = XLen2[`LEN2-1];
+                    assign YSgnE = YLen2[`LEN2-1];
+                    assign ZSgnE = ZLen2[`LEN2-1];
+
+                    // example double to single conversion:
+                    // 1023 = 0011 1111 1111
+                    // 127  = 0000 0111 1111 (subtract this)
+                    // 896  = 0011 1000 0000
+                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
+                    // dexp = 0bdd dbbb bbbb 
+                    // also need to take into account possible zero/denorm/inf/NaN values
+                    assign XExpE = {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]}; 
+                    assign YExpE = {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]}; 
+                    assign ZExpE = {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]}; 
+
+                    assign XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)};
+                    assign YFracE = {YLen2[`NF2-1:0], (`NF-`NF2)'(0)};
+                    assign ZFracE = {ZLen2[`NF2-1:0], (`NF-`NF2)'(0)};
+
+                    assign XExpNonzero = |XLen2[`LEN2-2:`NF2]; 
+                    assign YExpNonzero = |YLen2[`LEN2-2:`NF2];
+                    assign ZExpNonzero = |ZLen2[`LEN2-2:`NF2];
+
+                    assign XExpMaxE = &XLen2[`LEN2-2:`NF2];
+                    assign YExpMaxE = &YLen2[`LEN2-2:`NF2];
+                    assign ZExpMaxE = &ZLen2[`LEN2-2:`NF2];
+                end
+                default: begin
+                    assign XSgnE = 0;
+                    assign YSgnE = 0;
+                    assign ZSgnE = 0;
+                    assign XExpE = 0; 
+                    assign YExpE = 0;
+                    assign ZExpE = 0; 
+                    assign XFracE = 0;
+                    assign YFracE = 0;
+                    assign ZFracE = 0;
+                    assign XExpNonzero = 0; 
+                    assign YExpNonzero = 0;
+                    assign ZExpNonzero = 0;
+                    assign XExpMaxE = 0;
+                    assign YExpMaxE = 0;
+                    assign ZExpMaxE = 0;
+                end
+            endcase
+        end
+
+    end else begin
+        logic  [`LEN1-1:0]   XLen1, YLen1, ZLen1; // Bottom half or NaN, if not properly NaN boxed
+        logic  [`LEN2-1:0]   XLen2, YLen2, ZLen2; // Bottom half or NaN, if not properly NaN boxed
+        logic  [`LEN2-1:0]   XLen3, YLen3, ZLen3; // Bottom half or NaN, if not properly NaN boxed
+        
+        // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN
+        assign XLen1 = &X[`FLEN-1:`D_LEN] ? X[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
+        assign YLen1 = &Y[`FLEN-1:`D_LEN] ? Y[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)};
+        assign ZLen1 = &Z[`FLEN-1:`D_LEN] ? Z[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; 
+
+        assign XLen2 = &X[`FLEN-1:`S_LEN] ? X[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
+        assign YLen2 = &Y[`FLEN-1:`S_LEN] ? Y[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)};
+        assign ZLen2 = &Z[`FLEN-1:`S_LEN] ? Z[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)}; 
+
+        assign XLen3 = &X[`FLEN-1:`H_LEN] ? X[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
+        assign YLen3 = &Y[`FLEN-1:`H_LEN] ? Y[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)};
+        assign ZLen3 = &Z[`FLEN-1:`H_LEN] ? Z[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; 
+
+        always_comb begin
+            case (FmtE)
+                2'b11: begin
+                    assign XSgnE = X[`FLEN-1];
+                    assign YSgnE = Y[`FLEN-1];
+                    assign ZSgnE = Z[`FLEN-1];
+
+                    assign XExpE = X[`FLEN-2:`NF]; 
+                    assign YExpE = Y[`FLEN-2:`NF]; 
+                    assign ZExpE = Z[`FLEN-2:`NF]; 
+
+                    assign XFracE = X[`NF-1:0];
+                    assign YFracE = Y[`NF-1:0];
+                    assign ZFracE = Z[`NF-1:0];
+
+                    assign XExpNonzero = |X[`FLEN-2:`NF]; 
+                    assign YExpNonzero = |Y[`FLEN-2:`NF];
+                    assign ZExpNonzero = |Z[`FLEN-2:`NF];
+
+                    assign XExpMaxE = &X[`FLEN-2:`NF];
+                    assign YExpMaxE = &Y[`FLEN-2:`NF];
+                    assign ZExpMaxE = &Z[`FLEN-2:`NF];
+                end
+                2'b01: begin
+                    assign XSgnE = XLen1[`LEN1-1];
+                    assign YSgnE = YLen1[`LEN1-1];
+                    assign ZSgnE = ZLen1[`LEN1-1];
+
+                    // example double to single conversion:
+                    // 1023 = 0011 1111 1111
+                    // 127  = 0000 0111 1111 (subtract this)
+                    // 896  = 0011 1000 0000
+                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
+                    // dexp = 0bdd dbbb bbbb 
+                    // also need to take into account possible zero/denorm/inf/NaN values
+                    assign XExpE = {XLen1[`D_LEN-2], {`NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]}; 
+                    assign YExpE = {YLen1[`D_LEN-2], {`NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]}; 
+                    assign ZExpE = {ZLen1[`D_LEN-2], {`NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]}; 
+
+                    assign XFracE = {XLen1[`D_NE-1:0], (`NF-`D_NE)'(0)};
+                    assign YFracE = {YLen1[`D_NE-1:0], (`NF-`D_NE)'(0)};
+                    assign ZFracE = {ZLen1[`D_NE-1:0], (`NF-`D_NE)'(0)};
+
+                    assign XExpNonzero = |XLen1[`D_LEN-2:`D_NE]; 
+                    assign YExpNonzero = |YLen1[`D_LEN-2:`D_NE];
+                    assign ZExpNonzero = |ZLen1[`D_LEN-2:`D_NE];
+
+                    assign XExpMaxE = &XLen1[`D_LEN-2:`D_NE];
+                    assign YExpMaxE = &YLen1[`D_LEN-2:`D_NE];
+                    assign ZExpMaxE = &ZLen1[`D_LEN-2:`D_NE];
+                end
+                2'b00: begin
+                    assign XSgnE = XLen2[`S_LEN-1];
+                    assign YSgnE = YLen2[`S_LEN-1];
+                    assign ZSgnE = ZLen2[`S_LEN-1];
+
+                    // example double to single conversion:
+                    // 1023 = 0011 1111 1111
+                    // 127  = 0000 0111 1111 (subtract this)
+                    // 896  = 0011 1000 0000
+                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
+                    // dexp = 0bdd dbbb bbbb 
+                    // also need to take into account possible zero/denorm/inf/NaN values
+                    assign XExpE = {XLen2[`S_LEN-2], {`NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]}; 
+                    assign YExpE = {YLen2[`S_LEN-2], {`NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]}; 
+                    assign ZExpE = {ZLen2[`S_LEN-2], {`NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]}; 
+
+                    assign XFracE = {XLen2[`S_NF-1:0], (`NF-`S_NF)'(0)};
+                    assign YFracE = {YLen2[`S_NF-1:0], (`NF-`S_NF)'(0)};
+                    assign ZFracE = {ZLen2[`S_NF-1:0], (`NF-`S_NF)'(0)};
+
+                    assign XExpNonzero = |XLen2[`S_LEN-2:`S_NF]; 
+                    assign YExpNonzero = |YLen2[`S_LEN-2:`S_NF];
+                    assign ZExpNonzero = |ZLen2[`S_LEN-2:`S_NF];
+
+                    assign XExpMaxE = &XLen2[`S_LEN-2:`S_NF];
+                    assign YExpMaxE = &YLen2[`S_LEN-2:`S_NF];
+                    assign ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF];
+                end
+                2'b10: begin
+                    assign XSgnE = XLen3[`H_LEN-1];
+                    assign YSgnE = YLen3[`H_LEN-1];
+                    assign ZSgnE = ZLen3[`H_LEN-1];
+
+                    // example double to single conversion:
+                    // 1023 = 0011 1111 1111
+                    // 127  = 0000 0111 1111 (subtract this)
+                    // 896  = 0011 1000 0000
+                    // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b 
+                    // dexp = 0bdd dbbb bbbb 
+                    // also need to take into account possible zero/denorm/inf/NaN values
+                    assign XExpE = {XLen3[`H_LEN-2], {`NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]}; 
+                    assign YExpE = {YLen3[`H_LEN-2], {`NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]}; 
+                    assign ZExpE = {ZLen3[`H_LEN-2], {`NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]}; 
+
+                    assign XFracE = {XLen3[`H_NF-1:0], (`NF-`H_NF)'(0)};
+                    assign YFracE = {YLen3[`H_NF-1:0], (`NF-`H_NF)'(0)};
+                    assign ZFracE = {ZLen3[`H_NF-1:0], (`NF-`H_NF)'(0)};
+
+                    assign XExpNonzero = |XLen3[`H_LEN-2:`H_NF]; 
+                    assign YExpNonzero = |YLen3[`H_LEN-2:`H_NF];
+                    assign ZExpNonzero = |ZLen3[`H_LEN-2:`H_NF];
+
+                    assign XExpMaxE = &XLen3[`H_LEN-2:`H_NF];
+                    assign YExpMaxE = &YLen3[`H_LEN-2:`H_NF];
+                    assign ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF];
+                end
+            endcase
+        end
+
+    end
+
+    assign XExpZero = ~XExpNonzero;
+    assign YExpZero = ~YExpNonzero;
+    assign ZExpZero = ~ZExpNonzero;
+
+    assign XFracZero = ~|XFracE;
+    assign YFracZero = ~|YFracE;
+    assign ZFracZero = ~|ZFracE;
+
+    assign XManE = {XExpNonzero, XFracE};
+    assign YManE = {YExpNonzero, YFracE};
+    assign ZManE = {ZExpNonzero, ZFracE};
+
+    assign XNormE = ~(XExpMaxE|XExpZero);
+    
+    // force single precision input to be a NaN if it isn't properly Nan Boxed
+    assign XNaNE = XExpMaxE & ~XFracZero;
+    assign YNaNE = YExpMaxE & ~YFracZero;
+    assign ZNaNE = ZExpMaxE & ~ZFracZero;
+
+    assign XSNaNE = XNaNE&~XFracE[`NF-1];
+    assign YSNaNE = YNaNE&~YFracE[`NF-1];
+    assign ZSNaNE = ZNaNE&~ZFracE[`NF-1];
+
+    assign XDenormE = XExpZero & ~XFracZero;
+    assign YDenormE = YExpZero & ~YFracZero;
+    assign ZDenormE = ZExpZero & ~ZFracZero;
+
+    assign XInfE = XExpMaxE & XFracZero;
+    assign YInfE = YExpMaxE & YFracZero;
+    assign ZInfE = ZExpMaxE & ZFracZero;
+
+    assign XZeroE = XExpZero & XFracZero;
+    assign YZeroE = YExpZero & YFracZero;
+    assign ZZeroE = ZExpZero & ZFracZero;
+    
+endmodule
--- a/pipelined/src/fpu/unpacking.sv
+++ b/pipelined/src/fpu/unpacking.sv
@ -1,95 +0,0 @@
-`include "wally-config.vh"
-
-module unpack ( 
-    input logic  [63:0] X, Y, Z,
-    input logic         FmtE,
-    input logic  [2:0]  FOpCtrlE,
-    output logic        XSgnE, YSgnE, ZSgnE,
-    output logic [10:0] XExpE, YExpE, ZExpE,
-    output logic [52:0] XManE, YManE, ZManE,
-    output logic XNormE,
-    output logic XNaNE, YNaNE, ZNaNE,
-    output logic XSNaNE, YSNaNE, ZSNaNE,
-    output logic XDenormE, YDenormE, ZDenormE,
-    output logic XZeroE, YZeroE, ZZeroE,
-    output logic [10:0] BiasE,
-    output logic XInfE, YInfE, ZInfE,
-    output logic XExpMaxE
-);
- 
-    logic [51:0]    XFracE, YFracE, ZFracE;
-    logic           XExpNonzero, YExpNonzero, ZExpNonzero;
-    logic           XFracZero, YFracZero, ZFracZero; // input fraction zero
-    logic           XExpZero, YExpZero, ZExpZero; // input exponent zero
-    logic           YExpMaxE, ZExpMaxE;  // input exponent all 1s
-    logic  [31:0]   XFloat, YFloat, ZFloat; // Bottom half or NaN, if RV64 and not properly NaN boxed
-
-    // Determine if number is NaN as double precision to check single precision NaN boxing
-    if (`F_SUPPORTED & ~`D_SUPPORTED) begin  // eventually this should change to FLEN when FLEN isn't hardwared to 64
-        assign XFloat = X[31:0]; 
-        assign YFloat = Y[31:0];  
-        assign ZFloat = Z[31:0]; 
-    end else begin
-        assign XFloat = &X[`FLEN-1:32] ? X[31:0] : 32'h7fc00000; 
-        assign YFloat = &Y[`FLEN-1:32] ? Y[31:0] : 32'h7fc00000;
-        assign ZFloat = &Z[`FLEN-1:32] ? Z[31:0] : 32'h7fc00000;
-    end   
-
-    assign XSgnE = FmtE ? X[63] : XFloat[31];
-    assign YSgnE = FmtE ? Y[63] : YFloat[31];
-    assign ZSgnE = FmtE ? Z[63] : ZFloat[31];
-
-    assign XExpE = FmtE ? X[62:52] : {XFloat[30], {3{~XFloat[30]&~XExpZero|XExpMaxE}}, XFloat[29:23]}; 
-    assign YExpE = FmtE ? Y[62:52] : {YFloat[30], {3{~YFloat[30]&~YExpZero|YExpMaxE}}, YFloat[29:23]}; 
-    assign ZExpE = FmtE ? Z[62:52] : {ZFloat[30], {3{~ZFloat[30]&~ZExpZero|ZExpMaxE}}, ZFloat[29:23]}; 
-
-    assign XFracE = FmtE ? X[51:0] : {XFloat[22:0], 29'b0};
-    assign YFracE = FmtE ? Y[51:0] : {YFloat[22:0], 29'b0};
-    assign ZFracE = FmtE ? Z[51:0] : {ZFloat[22:0], 29'b0};
-
-    assign XExpNonzero = FmtE ? |X[62:52] : |XFloat[30:23]; 
-    assign YExpNonzero = FmtE ? |Y[62:52] : |YFloat[30:23];
-    assign ZExpNonzero = FmtE ? |Z[62:52] : |ZFloat[30:23];
-
-    assign XExpZero = ~XExpNonzero;
-    assign YExpZero = ~YExpNonzero;
-    assign ZExpZero = ~ZExpNonzero;
-   
-    assign XFracZero = ~|XFracE;
-    assign YFracZero = ~|YFracE;
-    assign ZFracZero = ~|ZFracE;
-
-    assign XManE = {XExpNonzero, XFracE};
-    assign YManE = {YExpNonzero, YFracE};
-    assign ZManE = {ZExpNonzero, ZFracE};
-
-    assign XExpMaxE = FmtE ? &X[62:52] : &XFloat[30:23];
-    assign YExpMaxE = FmtE ? &Y[62:52] : &YFloat[30:23];
-    assign ZExpMaxE = FmtE ? &Z[62:52] : &ZFloat[30:23];
-  
-    assign XNormE = ~(XExpMaxE|XExpZero);
-    
-    // force single precision input to be a NaN if it isn't properly Nan Boxed
-    assign XNaNE = XExpMaxE & ~XFracZero;
-    assign YNaNE = YExpMaxE & ~YFracZero;
-    assign ZNaNE = ZExpMaxE & ~ZFracZero;
-
-    assign XSNaNE = XNaNE&~XFracE[51];
-    assign YSNaNE = YNaNE&~YFracE[51];
-    assign ZSNaNE = ZNaNE&~ZFracE[51];
-
-    assign XDenormE = XExpZero & ~XFracZero;
-    assign YDenormE = YExpZero & ~YFracZero;
-    assign ZDenormE = ZExpZero & ~ZFracZero;
-
-    assign XInfE = XExpMaxE & XFracZero;
-    assign YInfE = YExpMaxE & YFracZero;
-    assign ZInfE = ZExpMaxE & ZFracZero;
-
-    assign XZeroE = XExpZero & XFracZero;
-    assign YZeroE = YExpZero & YFracZero;
-    assign ZZeroE = ZExpZero & ZFracZero;
-
-    assign BiasE = 11'h3ff; // always use 1023 because exponents are unpacked to double precision
-
-endmodule
--- a/pipelined/testbench/fp/tests/fma-testbench.sv
+++ b/pipelined/testbench/fp/tests/fma-testbench.sv
@ -0,0 +1,279 @@
+
+`include "wally-config.vh"
+`define PATH "../../../../tests/fp/vectors/"
+
+string tests[] = '{
+    "f16_mulAdd_rne.tv",
+    "f16_mulAdd_rz.tv",
+    "f16_mulAdd_ru.tv",
+    "f16_mulAdd_rd.tv",
+    "f16_mulAdd_rnm.tv",
+    "f32_mulAdd_rne.tv",
+    "f32_mulAdd_rz.tv",
+    "f32_mulAdd_ru.tv",
+    "f32_mulAdd_rd.tv",
+    "f32_mulAdd_rnm.tv",
+    "f64_mulAdd_rne.tv",
+    "f64_mulAdd_rz.tv",
+    "f64_mulAdd_ru.tv",
+    "f64_mulAdd_rd.tv",
+    "f64_mulAdd_rnm.tv",
+    "f128_mulAdd_rne.tv",
+    "f128_mulAdd_rz.tv",
+    "f128_mulAdd_ru.tv",
+    "f128_mulAdd_rd.tv",
+    "f128_mulAdd_rnm.tv"
+};
+
+// steps to run FMA tests
+//    1) create test vectors in riscv-wally/tests/fp with: ./run-all.sh
+//    2) go to riscv-wally/pipelined/testbench/fp/tests
+//    3) run ./sim-wally-batch
+
+module fmatestbench();
+
+  logic clk;
+  logic [31:0] errors=0;
+  logic [31:0] vectornum=0;
+  logic [`FLEN*4+7+4+4:0] testvectors[6133248:0];
+  int i = `ZFH_SUPPORTED ? 0 : `F_SUPPORTED ? 5 : `D_SUPPORTED ? 10 : 15; // set i to the first test that is run
+
+  logic [`FLEN-1:0]     X, Y, Z;  // inputs read from TestFloat
+  logic [`FLEN-1:0]	    ans;      // result from TestFloat
+  logic [7:0]	 	        flags;    // flags read form testfloat
+  logic [2:0]		        FrmE;     // rounding mode
+  logic	[`FPSIZES/3:0]  FmtE;     // format - 10 = half, 00 = single, 01 = double, 11 = quad
+  logic [3:0]		        FrmRead;  // rounding mode read from testfloat
+  logic	[3:0]			      FmtRead;  // format read from testfloat
+  logic [`FLEN-1:0]     FMAResM;  // FMA's outputed result
+  logic [4:0]           FMAFlgM;  // FMA's outputed flags
+  logic [2:0]		        FOpCtrlE; // which opperation
+  logic                 wnan;     // is the outputed result NaN
+  logic                 ansnan;   // is the correct answer NaN
+  
+  // signals needed to connect modules
+  logic [`NE+1:0]	  ProdExpE;
+  logic 				    AddendStickyE;
+  logic 					  KillProdE; 
+  logic             XSgnE, YSgnE, ZSgnE;
+  logic [`NE-1:0]   XExpE, YExpE, ZExpE;
+  logic [`NF:0]     XManE, YManE, ZManE;
+  logic             XNormE;
+  logic             XExpMaxE;
+  logic             XNaNE, YNaNE, ZNaNE;
+  logic             XSNaNE, YSNaNE, ZSNaNE;
+  logic             XDenormE, YDenormE, ZDenormE;
+  logic             XInfE, YInfE, ZInfE;
+  logic             XZeroE, YZeroE, ZZeroE;
+  logic             YExpMaxE, ZExpMaxE, Mult;
+  logic [3*`NF+5:0]	SumE;       
+  logic 			      InvZE;
+  logic 			      NegSumE;
+  logic 			      ZSgnEffE;
+  logic 			      PSgnE;
+  logic [$clog2(3*`NF+7)-1:0]	NormCntE;
+
+
+  assign FOpCtrlE = 3'b0; // set to 0 because test float only tests fMADD
+  assign Mult = 1'b0;     // set to zero because not testing multiplication
+
+  // check if the calculated result or correct answer is NaN
+  always_comb begin
+    case (FmtRead)
+        4'b11: begin // quad             
+          assign ansnan = &ans[`FLEN-2:`NF]&(|ans[`NF-1:0]);
+          assign wnan = &FMAResM[`FLEN-2:`NF]&(|FMAResM[`NF-1:0]);
+
+        end
+        4'b01: begin // double                 
+          assign ansnan = &ans[`LEN1-2:`NF1]&(|ans[`NF1-1:0]);
+          assign wnan = &FMAResM[`LEN1-2:`NF1]&(|FMAResM[`NF1-1:0]);
+
+        end
+        4'b00: begin // single
+            assign ansnan = &ans[`LEN2-2:`NF2]&(|ans[`NF2-1:0]);
+            assign wnan = &FMAResM[`LEN2-2:`NF2]&(|FMAResM[`NF2-1:0]);
+        end
+        4'b10: begin // half
+            assign ansnan = &ans[`H_LEN-2:`H_NF]&(|ans[`H_NF-1:0]);
+            assign wnan = &FMAResM[`H_LEN-2:`H_NF]&(|FMAResM[`H_NF-1:0]);
+        end
+    endcase
+  end
+
+  // instantiate devices under test
+  unpack unpack(.X, .Y, .Z, .FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
+                .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
+                .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
+                .XExpMaxE);
+  fma1 fma1(.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
+            .XDenormE, .YDenormE, .ZDenormE,  .XZeroE, .YZeroE, .ZZeroE,
+            .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
+            .ProdExpE, .AddendStickyE, .KillProdE); 
+  fma2 fma2(.XSgnM(XSgnE), .YSgnM(YSgnE), .XExpM(XExpE), .YExpM(YExpE), .ZExpM(ZExpE), .XManM(XManE), .YManM(YManE), .ZManM(ZManE), 
+            .XNaNM(XNaNE), .YNaNM(YNaNE), .ZNaNM(ZNaNE), .XZeroM(XZeroE), .YZeroM(YZeroE), .ZZeroM(ZZeroE), .XInfM(XInfE), .YInfM(YInfE), .ZInfM(ZInfE), 
+            .XSNaNM(XSNaNE), .YSNaNM(YSNaNE), .ZSNaNM(ZSNaNE), .KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE), 
+            .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(FmtE), .FrmM(FrmE), 
+            .FMAFlgM, .FMAResM, .Mult);
+
+
+  // produce clock
+  always begin
+    clk = 1; #5; clk = 0; #5;
+  end
+  
+  // Read first test
+  initial begin
+      $display("\n\nRunning %s vectors", tests[i]);
+      $readmemh({`PATH, tests[i]}, testvectors);
+  end
+
+  // apply test vectors on rising edge of clk
+  always @(posedge clk) begin
+    #1; 
+    flags = testvectors[vectornum][15:8];
+    FrmRead = testvectors[vectornum][7:4];
+    FmtRead = testvectors[vectornum][3:0];
+    if (FmtRead==4'b11 & `Q_SUPPORTED) 	begin       // quad
+      X = testvectors[vectornum][16+4*(`Q_LEN)-1:16+3*(`Q_LEN)];
+      Y = testvectors[vectornum][16+3*(`Q_LEN)-1:16+2*(`Q_LEN)];
+      Z = testvectors[vectornum][16+2*(`Q_LEN)-1:16+`Q_LEN];
+      ans = testvectors[vectornum][16+(`Q_LEN-1):16];
+    end
+    else if (FmtRead==4'b01 & `D_SUPPORTED)	begin	  // double
+      X = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+4*(`D_LEN)-1:16+3*(`D_LEN)]};
+      Y = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+3*(`D_LEN)-1:16+2*(`D_LEN)]};
+      Z = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+2*(`D_LEN)-1:16+`D_LEN]};
+      ans = {{`FLEN-`D_LEN{1'b1}}, testvectors[vectornum][16+(`D_LEN-1):16]};
+    end
+    else if (FmtRead==4'b00 & `F_SUPPORTED)	begin	  // single
+      X = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+4*(`S_LEN)-1:16+3*(`S_LEN)]};
+      Y = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+3*(`S_LEN)-1:16+2*(`S_LEN)]};
+      Z = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+2*(`S_LEN)-1:16+`S_LEN]};
+      ans = {{`FLEN-`S_LEN{1'b1}}, testvectors[vectornum][16+(`S_LEN-1):16]};
+    end
+    else if (FmtRead==4'b10 & `ZFH_SUPPORTED)	begin	  // half
+      X = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+4*(`H_LEN)-1:16+3*(`H_LEN)]};
+      Y = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+3*(`H_LEN)-1:16+2*(`H_LEN)]};
+      Z = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+2*(`H_LEN)-1:16+`H_LEN]};
+      ans = {{`FLEN-`H_LEN{1'b1}}, testvectors[vectornum][16+(`H_LEN-1):16]};
+    end
+    else begin	  
+      X = {`FLEN{1'bx}};
+      Y = {`FLEN{1'bx}};
+      Z = {`FLEN{1'bx}};
+      ans = {`FLEN{1'bx}};
+    end
+
+    // trim format and rounding mode to appropriate size
+    if (`FPSIZES <= 2) FmtE = FmtRead === `FMT; // rewrite format if 2 or less floating formats are supported
+    else FmtE = FmtRead[1:0];
+    FrmE = FrmRead[2:0];
+  end
+
+  // check results on falling edge of clk
+    always @(negedge clk) begin
+      // quad
+        if((FmtRead==4'b11) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`FLEN-2:0] === ans[`FLEN-2:0] | (XNaNE&(FMAResM[`FLEN-2:0] === {X[`FLEN-2:`NF],1'b1,X[`NF-2:0]})) | (YNaNE&(FMAResM[`FLEN-2:0] === {Y[`FLEN-2:`NF],1'b1,Y[`NF-2:0]})) | (ZNaNE&(FMAResM[`FLEN-2:0] === {Z[`FLEN-2:`NF],1'b1,Z[`NF-2:0]})))))) begin
+          $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
+          if(XDenormE) $display( "xdenorm ");
+          if(YDenormE) $display( "ydenorm ");
+          if(ZDenormE) $display( "zdenorm ");
+          if(FMAFlgM[4] !== 0) $display( "invld ");
+          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
+          if(FMAFlgM[1] !== 0) $display( "unflw ");
+          if(FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=-inf ");
+          if(~FMAResM[`FLEN] && FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] === 0) $display( "FMAResM=+inf ");
+          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && ~FMAResM[`NF-1]) $display( "FMAResM=sigNaN ");
+          if(FMAResM[`FLEN-2:`NF] === {`NE{1'b1}} && FMAResM[`NF-1:0] !== 0 && FMAResM[`NF-1]) $display( "FMAResM=qutNaN ");
+          if(ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=-inf ");
+          if(~ans[`FLEN] && ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] === 0) $display( "ans=+inf ");
+          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ~ans[`NF-1]) $display( "ans=sigNaN ");
+          if(ans[`FLEN-2:`NF] === {`NE{1'b1}} && ans[`NF-1:0] !== 0 && ans[`NF-1]) $display( "ans=qutNaN ");
+          errors = errors + 1;
+          if (errors === 1) $stop;
+        end
+      // double
+        if((FmtRead==4'b01) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`D_LEN-2:0] === ans[`D_LEN-2:0] | (XNaNE&(FMAResM[`D_LEN-2:0] === {X[`D_LEN-2:`D_NF],1'b1,X[`D_NF-2:0]})) | (YNaNE&(FMAResM[`D_LEN-2:0] === {Y[`D_LEN-2:`D_NF],1'b1,Y[`D_NF-2:0]})) | (ZNaNE&(FMAResM[`D_LEN-2:0] === {Z[`D_LEN-2:`D_NF],1'b1,Z[`D_NF-2:0]})))))) begin
+          $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
+          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
+          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
+          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
+          if(FMAFlgM[4] !== 0) $display( "invld ");
+          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
+          if(FMAFlgM[1] !== 0) $display( "unflw ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
+          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
+          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
+          errors = errors + 1;
+          if (errors === 1) $stop;
+        end
+      // single
+        if((FmtRead==4'b00) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`S_LEN-2:0] === ans[`S_LEN-2:0] | (XNaNE&(FMAResM[`S_LEN-2:0] === {X[`S_LEN-2:`S_NF],1'b1,X[`S_NF-2:0]})) | (YNaNE&(FMAResM[`S_LEN-2:0] === {Y[`S_LEN-2:`S_NF],1'b1,Y[`S_NF-2:0]})) | (ZNaNE&(FMAResM[`S_LEN-2:0] === {Z[`S_LEN-2:`S_NF],1'b1,Z[`S_NF-2:0]})))))) begin
+          $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
+          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
+          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
+          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
+          if(FMAFlgM[4] !== 0) $display( "invld ");
+          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
+          if(FMAFlgM[1] !== 0) $display( "unflw ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
+          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
+          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
+          errors = errors + 1;
+          if (errors === 1) $stop;
+        end
+      // half
+        if((FmtRead==4'b01) & ~((FMAFlgM === flags[4:0]) | (FMAResM === ans) | (wnan & (FMAResM[`H_LEN-2:0] === ans[`H_LEN-2:0] | (XNaNE&(FMAResM[`H_LEN-2:0] === {X[`H_LEN-2:`H_NF],1'b1,X[`H_NF-2:0]})) | (YNaNE&(FMAResM[`H_LEN-2:0] === {Y[`H_LEN-2:`H_NF],1'b1,Y[`H_NF-2:0]})) | (ZNaNE&(FMAResM[`H_LEN-2:0] === {Z[`H_LEN-2:`H_NF],1'b1,Z[`H_NF-2:0]})))))) begin
+          $display( "%h %h %h %h %h %h %h  Wrong ",X,Y, Z, FMAResM, ans, FMAFlgM, flags);
+          if(~(|X[30:23]) && |X[22:0]) $display( "xdenorm ");
+          if(~(|Y[30:23]) && |Y[22:0]) $display( "ydenorm ");
+          if(~(|Z[30:23]) && |Z[22:0]) $display( "zdenorm ");
+          if(FMAFlgM[4] !== 0) $display( "invld ");
+          if(FMAFlgM[2] !== 0) $display( "ovrflw ");
+          if(FMAFlgM[1] !== 0) $display( "unflw ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && ~FMAResM[22]) $display( "FMAResM=sigNaN ");
+          if(&FMAResM[30:23] && |FMAResM[22:0] && FMAResM[22] ) $display( "FMAResM=qutNaN ");
+          if(&ans[30:23] && |ans[22:0] && ~ans[22] ) $display( "ans=sigNaN ");
+          if(&ans[30:23] && |ans[22:0] && ans[22]) $display( "ans=qutNaN ");
+          errors = errors + 1;
+          if (errors === 1) $stop;
+        end
+        
+	    // if ( vectornum === 3165862) $stop; // uncomment for specific test
+      vectornum = vectornum + 1; // increment test
+      if (testvectors[vectornum][0] === 1'bx) begin // if reached the end of file
+        if (errors) begin // if there were errors
+          $display("%s completed with %d tests and %d errors", tests[i], vectornum, errors);
+          $stop;
+        end
+        else begin // if no errors
+          if(tests[i] === "") begin // if no more tests
+            $display("\nAll tests completed with %d errors\n", errors);
+            $stop;
+          end
+
+          $display("%s completed successfully with %d tests and %d errors (across all tests)\n", tests[i], vectornum, errors);
+
+          // increment tests - skip some precisions if needed
+          if ((i === 4 & ~`F_SUPPORTED) | (i === 9 & ~`D_SUPPORTED) | (i === 14 & ~`Q_SUPPORTED)) i = i+5;
+          if ((i === 9 & ~`D_SUPPORTED) | (i === 14 & ~`Q_SUPPORTED)) i = i+5;
+          if ((i === 14 & ~`Q_SUPPORTED)) i = i+5;
+          i = i+1;
+
+          // if no more tests - finish
+          if(tests[i] === "") begin
+            $display("\nAll tests completed with %d errors\n", errors);
+            $stop;
+          end 
+
+          // read next files
+          $display("Running %s vectors", tests[i]);
+          $readmemh({`PATH, tests[i]}, testvectors);
+          vectornum = 0;
+        end
+      end
+  end
+endmodule
--- a/pipelined/testbench/fp/tests/fma.do
+++ b/pipelined/testbench/fp/tests/fma.do
@ -0,0 +1,50 @@
+# wally-pipelined.do 
+#
+# Modification by Oklahoma State University & Harvey Mudd College
+# Use with Testbench 
+# James Stine, 2008; David Harris 2021
+# Go Cowboys!!!!!!
+#
+# Takes 1:10 to run RV64IC tests using gui
+
+# run with vsim -do "do wally-pipelined.do rv64ic riscvarchtest-64m"
+
+# Use this wally-pipelined.do file to run this example.
+# Either bring up ModelSim and type the following at the "ModelSim>" prompt:
+#     do wally-pipelined.do
+# or, to run from a shell, type the following at the shell prompt:
+#     vsim -do wally-pipelined.do -c
+# (omit the "-c" to see the GUI while running from the shell)
+
+onbreak {resume}
+
+# create library
+if [file exists work] {
+    vdel -all
+}
+vlib work
+
+# compile source files
+# suppress spurious warnngs about 
+# "Extra checking for conflicts with always_comb done at vopt time"
+# because vsim will run vopt
+
+# start and run simulation
+# remove +acc flag for faster sim during regressions if there is no need to access internal signals
+# $num = the added words after the call
+vlog +incdir+../../../config/$1 +incdir+../../../config/shared fma-testbench.sv ../../../src/fpu/fma.sv ../../../src/fpu/unpack.sv -suppress 2583 -suppress 7063
+
+vsim -voptargs=+acc work.fmatestbench
+
+view wave
+#-- display input and output signals as hexidecimal values
+#do ./wave-dos/peripheral-waves.do
+#add log -recursive /*
+#do wave.do deal with when ready
+
+#-- Run the Simulation 
+#run 3600 
+run -all
+noview fma-testbench.sv
+view wave
+
--- a/pipelined/testbench/fp/tests/sim-fma
+++ b/pipelined/testbench/fp/tests/sim-fma
@ -0,0 +1 @@
+vsim -do "do fma.do rv64fp"
--- a/pipelined/testbench/fp/tests/sim-fma-batch
+++ b/pipelined/testbench/fp/tests/sim-fma-batch
@ -0,0 +1 @@
+vsim -c -do "do fma.do rv64fp"
--- a/tests/fp/create_vectors128fma.sh
+++ b/tests/fp/create_vectors128fma.sh
@ -0,0 +1,31 @@
+#!/bin/sh
+
+BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
+OUTPUT="./vectors"
+
+$BUILD/testfloat_gen -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
+$BUILD/testfloat_gen -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
+$BUILD/testfloat_gen -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
+$BUILD/testfloat_gen -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
+
+# format: X_Y_Z_answer_flags_Frm_Fmt
+sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rne.tv
+sed -ie 's/$/_0/' $OUTPUT/f128_mulAdd_rne.tv
+sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rne.tv
+
+sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rz.tv
+sed -ie 's/$/_1/' $OUTPUT/f128_mulAdd_rz.tv
+sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rz.tv
+
+sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_ru.tv
+sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_ru.tv
+sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_ru.tv
+
+sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rd.tv
+sed -ie 's/$/_2/' $OUTPUT/f128_mulAdd_rd.tv
+sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rd.tv
+
+sed -i 's/ /_/g' $OUTPUT/f128_mulAdd_rnm.tv
+sed -ie 's/$/_4/' $OUTPUT/f128_mulAdd_rnm.tv
+sed -ie 's/$/_3/' $OUTPUT/f128_mulAdd_rnm.tv
--- a/tests/fp/create_vectors16fma.sh
+++ b/tests/fp/create_vectors16fma.sh
@ -0,0 +1,31 @@
+#!/bin/sh
+
+BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
+OUTPUT="./vectors"
+
+$BUILD/testfloat_gen -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
+$BUILD/testfloat_gen -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
+$BUILD/testfloat_gen -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
+$BUILD/testfloat_gen -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
+
+# format: X_Y_Z_answer_flags_Frm_Fmt
+sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rne.tv
+sed -ie 's/$/_0/' $OUTPUT/f16_mulAdd_rne.tv
+sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rne.tv
+
+sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rz.tv
+sed -ie 's/$/_1/' $OUTPUT/f16_mulAdd_rz.tv
+sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rz.tv
+
+sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_ru.tv
+sed -ie 's/$/_3/' $OUTPUT/f16_mulAdd_ru.tv
+sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_ru.tv
+
+sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rd.tv
+sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rd.tv
+sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rd.tv
+
+sed -i 's/ /_/g' $OUTPUT/f16_mulAdd_rnm.tv
+sed -ie 's/$/_4/' $OUTPUT/f16_mulAdd_rnm.tv
+sed -ie 's/$/_2/' $OUTPUT/f16_mulAdd_rnm.tv
--- a/tests/fp/create_vectors32fma.sh
+++ b/tests/fp/create_vectors32fma.sh
@ -0,0 +1,31 @@
+#!/bin/sh
+
+BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
+OUTPUT="./vectors"
+
+$BUILD/testfloat_gen -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
+$BUILD/testfloat_gen -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
+$BUILD/testfloat_gen -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
+$BUILD/testfloat_gen -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
+
+# format: X_Y_Z_answer_flags_Frm_Fmt
+sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rne.tv
+sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rne.tv
+sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rne.tv
+
+sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rz.tv
+sed -ie 's/$/_1/' $OUTPUT/f32_mulAdd_rz.tv
+sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rz.tv
+
+sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_ru.tv
+sed -ie 's/$/_3/' $OUTPUT/f32_mulAdd_ru.tv
+sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_ru.tv
+
+sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rd.tv
+sed -ie 's/$/_2/' $OUTPUT/f32_mulAdd_rd.tv
+sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rd.tv
+
+sed -i 's/ /_/g' $OUTPUT/f32_mulAdd_rnm.tv
+sed -ie 's/$/_4/' $OUTPUT/f32_mulAdd_rnm.tv
+sed -ie 's/$/_0/' $OUTPUT/f32_mulAdd_rnm.tv
--- a/tests/fp/create_vectors64fma.sh
+++ b/tests/fp/create_vectors64fma.sh
@ -0,0 +1,31 @@
+#!/bin/sh
+
+BUILD="./TestFloat-3e/build/Linux-x86_64-GCC"
+OUTPUT="./vectors"
+
+$BUILD/testfloat_gen -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
+$BUILD/testfloat_gen -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
+$BUILD/testfloat_gen -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
+$BUILD/testfloat_gen -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
+$BUILD/testfloat_gen -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
+
+# format: X_Y_Z_answer_flags_Frm_Fmt
+sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rne.tv
+sed -ie 's/$/_0/' $OUTPUT/f64_mulAdd_rne.tv
+sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rne.tv
+
+sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rz.tv
+sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rz.tv
+sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rz.tv
+
+sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_ru.tv
+sed -ie 's/$/_3/' $OUTPUT/f64_mulAdd_ru.tv
+sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_ru.tv
+
+sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rd.tv
+sed -ie 's/$/_2/' $OUTPUT/f64_mulAdd_rd.tv
+sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rd.tv
+
+sed -i 's/ /_/g' $OUTPUT/f64_mulAdd_rnm.tv
+sed -ie 's/$/_4/' $OUTPUT/f64_mulAdd_rnm.tv
+sed -ie 's/$/_1/' $OUTPUT/f64_mulAdd_rnm.tv
--- a/tests/fp/run_all.sh
+++ b/tests/fp/run_all.sh
@ -8,3 +8,7 @@
 ./create_vectors64cmp.sh
 ./create_vectors64.sh
 ./create_vectorsi.sh
+./create_vectors16fma.sh
+./create_vectors32fma.sh
+./create_vectors64fma.sh
+./create_vectors128fma.sh