postprocessing unit created and passing all tests

This commit is contained in:
Katherine Parry 2022-06-13 22:47:51 +00:00
parent 802bfd74fb
commit 5f7072bd96
42 changed files with 94516 additions and 2931 deletions

@ -1 +1 @@
Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86
Subproject commit be67c99bd461742aa1c100bcc0732657faae2230

View file

@ -0,0 +1,23 @@
# Makefile
CC = gcc
CFLAGS = -O3
LIBS = -lm
LFLAGS = -L.
# Link against the riscv-isa-sim version of SoftFloat rather than
# the regular version to get RISC-V NaN behavior
IFLAGS = -I$(RISCV)/riscv-isa-sim/softfloat
LIBS = $(RISCV)/riscv-isa-sim/build/libsoftfloat.a
#IFLAGS = -I../../../addins/SoftFloat-3e/source/include/
#LIBS = ../../../addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a
SRCS = $(wildcard *.c)
PROGS = $(patsubst %.c,%,$(SRCS))
all: $(PROGS)
%: %.c
$(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) -o $@ $< $(LIBS)
clean:
rm -f $(PROGS)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,23 @@
# fma.do
#
# run with vsim -do "do fma.do"
# add -c before -do for batch simulation
onbreak {resume}
# create library
vlib worklib
vlog -lint -sv -work worklib fma16.v testbench.v
vopt +acc worklib.testbench_fma16 -work worklib -o testbenchopt
vsim -lib worklib testbenchopt
add wave sim:/testbench_fma16/clk
add wave sim:/testbench_fma16/reset
add wave sim:/testbench_fma16/x
add wave sim:/testbench_fma16/y
add wave sim:/testbench_fma16/z
add wave sim:/testbench_fma16/result
add wave sim:/testbench_fma16/rexpected
run -all

View file

@ -0,0 +1,268 @@
// fma16.sv
// David_Harris@hmc.edu 26 February 2022
// 16-bit floating-point multiply-accumulate
// Operation: general purpose multiply, add, fma, with optional negation
// If mul=1, p = x * y. Else p = x.
// If add=1, result = p + z. Else result = p.
// If negr or negz = 1, negate result or z to handle negations and subtractions
// fadd: mul = 0, add = 1, negr = negz = 0
// fsub: mul = 0, add = 1, negr = 0, negz = 1
// fmul: mul = 1, add = 0, negr = 0, negz = 0
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
`define FFLEN 16
`define Nf 10
`define Ne 5
`define BIAS 15
`define EMIN (-(2**(`Ne-1)-1))
`define EMAX (2**(`Ne-1)-1)
`define NaN 16'h7E00
`define INF 15'h7C00
// rounding modes *** update
`define RZ 3'b00
`define RNE 3'b01
`define RM 3'b10
`define RP 3'b11
module fma16(
input logic [`FFLEN-1:0] x, y, z,
input logic mul, add, negr, negz,
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
output logic [`FFLEN-1:0] result);
logic [`Nf:0] xm, ym, zm; // U1.Nf
logic [`Ne-1:0] xe, ye, ze; // B_Ne
logic xs, ys, zs;
logic zs1; // sign before optional negation
logic [2*`Nf+1:0] pm; // U2.2Nf
logic [`Ne:0] pe; // B_Ne+1
logic ps; // sign of product
logic [22:0] rm;
logic [`Ne+1:0] re;
logic rs;
logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan;
logic [`Ne+1:0] re2;
unpack16 unpack(x, y, z, xm, ym, zm, xe, ye, ze, xs, ys, zs1, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan); // unpack inputs
//signadj16 signadj(negr, negz, xs, ys, zs1, ps, zs); // handle negations
mult16 mult16(mul, xm, ym, xe, ye, xs, ys, pm, pe, ps); // p = x * y
add16 add16(add, pm, zm, pe, ze, ps, zs, negz, rm, re, re2, rs); // r = z + p
postproc16 post(roundmode, xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan, rm, zm, re, ze, rs, zs, ps, re2, result); // normalize, round, pack
endmodule
module mult16(
input logic mul,
input logic [`Nf:0] xm, ym,
input logic [`Ne-1:0] xe, ye,
input logic xs, ys,
output logic [2*`Nf+1:0] pm,
output logic [`Ne:0] pe,
output logic ps);
// only multiply if mul = 1
assign pm = mul ? xm * ym : {1'b0, xm, 10'b0}; // multiply mantiassas
assign pe = mul ? xe + ye - `BIAS : {1'b0, xe}; // add exponents, account for bias
assign ps = xs ^ ys; // negative if X xor Y are negative
endmodule
module add16(
input logic add,
input logic [2*`Nf+1:0] pm, // U2.2Nf
input logic [`Nf:0] zm, // U1.Nf
input logic [`Ne:0] pe, // B_Ne+1
input logic [`Ne-1:0] ze, // B_Ne
input logic ps, zs,
input logic negz,
output logic [22:0] rm,
output logic [`Ne+1:0] re, // B_Ne+2
output logic [`Ne+1:0] re2,
output logic rs);
logic [`Nf*3+7:0] paligned, zaligned, zalignedaddsub, r, r2, rnormed, rnormed2; // U(Nf+6).(2Nf+2) aligned significands
logic signed [`Ne:0] ExpDiff; // Q(Ne+2).0
logic [`Ne:0] AlignCnt; // U(Ne+3) bits to right shift Z for alignment *** check size.
logic [`Nf-1:0] prezsticky;
logic zsticky;
logic effectivesub;
logic rs0;
logic [`Ne:0] leadingzeros, NormCnt; // *** should paramterize size
logic [`Ne:0] re1;
// Alignment shift
assign paligned = {{(`Nf+4){1'b0}}, pm, 2'b00}; // constant shift to prepend leading and trailing 0s.
assign ExpDiff = pe - {1'b0, ze}; // Compute exponent difference as signed number
always_comb // AlignCount mux; see Muller page 254
if (ExpDiff <= (-2*`Nf - 1)) begin AlignCnt = 3*`Nf + 7; re = {1'b0, pe}; end
else if (ExpDiff <= 2) begin AlignCnt = `Nf + 4 - ExpDiff; re = {1'b0, pe}; end
else if (ExpDiff <= `Nf+3) begin AlignCnt = `Nf + 4 - ExpDiff; re = {2'b0, ze}; end
else begin AlignCnt = 0; re = {2'b0, ze}; end
// Shift Zm right by AlignCnt. Produce 3Nf+8 bits of Zaligned in U(Nf+6).(2Nf+2) and Nf bits becoming sticky
assign {zaligned, prezsticky} = {zm, {(3*`Nf+7){1'b0}}} >> AlignCnt; //Right shift
assign zsticky = |prezsticky; // Sticky bit if any of the discarded bits were 1
// Effective subtraction
assign effectivesub = ps ^ zs ^ negz; // subtract |z| from |p|
assign zalignedaddsub = effectivesub ? ~zaligned : zaligned; // invert zaligned for subtraction
// Adder
assign r = paligned + zalignedaddsub + {{`Nf*3+7{1'b0}}, effectivesub}; // add aligned significands
assign rs0 = r[`Nf*3+7]; // sign of the initial result
assign r2 = rs0 ? ~r+1 : r; // invert sum if negative; could optimize with end-around carry?
// Sign Logic
assign rs = ps ^ rs0; // flip the sign if necessary
// Leading zero counter
lzc lzc(r2, leadingzeros); // count number of leading zeros in 2Nf+5 lower digits of r2
assign re1 = pe +2 - leadingzeros; // *** declare, # of bits
// Normalization shift
always_comb // NormCount mux
if (ExpDiff < 3) begin
if (re1 >= `EMIN) begin NormCnt = `Nf + 3 + leadingzeros; re2 = {1'b0, re1}; end
else begin NormCnt = `Nf + 5 + pe - `EMIN; re2 = `EMIN; end
end else begin NormCnt = AlignCnt; re = {2'b00, ze}; end
assign rnormed = r2 << NormCnt; // *** update sticky
/* temporarily comment out to start synth
// One-bit secondary normalization
if (ExpDiff <= 2) begin rnormed2 = rnormed; re2 = re; end // no secondary normalization
else begin // *** handle sticky
if (rnormed[***]) begin rnormed2 = rnormed >> 1; re2 = re+1; end
else if (rnormed[***-1]) begin rnormed2 = rnormed; re2 = re; end
else begin rnormed2 = rnormed << 1; re2 = re-1; end
end
// round
assign l = rnormed2[***]; // least significant bit
assign r = rnormed2[***-1]; // rounding bit
assign s = ***; // sticky bit
always_comb
case (roundmode)
RZ: roundup = 0;
RP: roundup = ~rs & (r | s);
RM: roundup = rs & (r | s);
RNE: roundup = r & (s | l);
default: roundup = 0;
endcase
assign {re3, rrounded} = {re2, rnormed2[***]} + roundup; // increment if necessary
*/
// *** need to handle rounding to MAXNUM vs. INFINITY
// add or pass product through
/* assign rm = add ? arm : {1'b0, pm};
assign re = add ? are : {1'b0, pe};
assign rs = add ? ars : ps; */
endmodule
module lzc(
input logic [`Nf*3+7:0] r2,
output logic [`Ne:0] leadingzeros
);
endmodule
module postproc16(
input logic [1:0] roundmode,
input logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan,
input logic [22:0] rm,
input logic [`Nf:0] zm, // U1.Nf
input logic [6:0] re,
input logic [`Ne-1:0] ze, // B_Ne
input logic rs, zs, ps,
input logic [`Ne+1:0] re2,
output logic [15:0] result);
logic [9:0] uf, uff;
logic [6:0] ue;
logic [6:0] ueb, uebiased;
logic invalid;
// Special cases
// *** not handling signaling NaN
// *** also add overflow/underflow/inexact
always_comb begin
if (xnan | ynan | znan) begin result = `NaN; invalid = 0; end // propagate NANs
else if ((xinf | yinf) & zinf & (ps ^ zs)) begin result = `NaN; invalid = 1; end // infinity - infinity
else if (xzero & yinf | xinf & yzero) begin result = `NaN; invalid = 1; end // zero times infinity
else if (xinf | yinf) begin result = {ps, `INF}; invalid = 0; end // X or Y
else if (zinf) begin result = {zs, `INF}; invalid = 0; end // infinite Z
else if (xzero | yzero) begin result = {zs, ze, zm[`Nf-1:0]}; invalid = 0; end
else if (re2 >= `EMAX) begin result = {rs, `INF}; invalid = 0; end
else begin result = {rs, re[`Ne-1:0], rm[`Nf-1:0]}; invalid = 0; end
end
always_comb
if (rm[21]) begin // normalization right shift by 1 and bump up exponent;
ue = re + 7'b1;
uf = rm[20:11];
end else begin // no normalization shift needed
ue = re;
uf = rm[19:10];
end
// overflow
always_comb begin
ueb = ue-7'd15;
if (ue >= 7'd46) begin // overflow
/* uebiased = 7'd30;
uff = 10'h3ff; */
end else begin
uebiased = ue-7'd15;
uff = uf;
end
end
assign result = {rs, uebiased[4:0], uff};
// add special case handling for zeros, NaN, Infinity
endmodule
module signadj16(
input logic negr, negz,
input logic xs, ys, zs1,
output logic ps, zs);
assign ps = xs ^ ys; // sign of product
assign zs = zs1 ^ negz; // sign of addend
endmodule
module unpack16(
input logic [15:0] x, y, z,
output logic [10:0] xm, ym, zm,
output logic [4:0] xe, ye, ze,
output logic xs, ys, zs,
output logic xzero, yzero, zzero, xinf, yinf, zinf, xnan, ynan, znan);
unpacknum16 upx(x, xm, xe, xs, xzero, xinf, xnan);
unpacknum16 upy(y, ym, ye, ys, yzero, yinf, ynan);
unpacknum16 upz(z, zm, ze, zs, zzero, zinf, znan);
endmodule
module unpacknum16(
input logic [15:0] num,
output logic [10:0] m,
output logic [4:0] e,
output logic s,
output logic zero, inf, nan);
logic [9:0] f; // fraction without leading 1
logic [4:0] eb; // biased exponent
assign {s, eb, f} = num; // pull bit fields out of floating-point number
assign m = {1'b1, f}; // prepend leading 1 to fraction
assign e = eb; // leave bias in exponent ***
assign zero = (e == 0 && f == 0);
assign inf = (e == 31 && f == 0);
assign nan = (e == 31 && f != 0);
endmodule

View file

@ -0,0 +1,24 @@
// fma16.sv
// David_Harris@hmc.edu 26 February 2022
// 16-bit floating-point multiply-accumulate
// Operation: general purpose multiply, add, fma, with optional negation
// If mul=1, p = x * y. Else p = x.
// If add=1, result = p + z. Else result = p.
// If negr or negz = 1, negate result or z to handle negations and subtractions
// fadd: mul = 0, add = 1, negr = negz = 0
// fsub: mul = 0, add = 1, negr = 0, negz = 1
// fmul: mul = 1, add = 0, negr = 0, negz = 0
// fmadd: mul = 1, add = 1, negr = 0, negz = 0
// fmsub: mul = 1, add = 1, negr = 0, negz = 1
// fnmadd: mul = 1, add = 1, negr = 1, negz = 0
// fnmsub: mul = 1, add = 1, negr = 1, negz = 1
module fma16(
input logic [15:0] x, y, z,
input logic mul, add, negr, negz,
input logic [1:0] roundmode, // 00: rz, 01: rne, 10: rp, 11: rn
output logic [15:0] result);
endmodule

View file

@ -0,0 +1,240 @@
#include <stdio.h>
#include <stdint.h>
#include "softfloat.h"
#include "softfloat_types.h"
typedef union sp {
float32_t v;
float f;
} sp;
// lists of tests, terminated with 0x8000
uint16_t easyExponents[] = {15, 0x8000};
uint16_t medExponents[] = {1, 14, 15, 16, 20, 30, 0x8000};
uint16_t allExponents[] = {1, 15, 16, 30, 31, 0x8000};
uint16_t easyFracts[] = {0, 0x200, 0x8000}; // 1.0 and 1.1
uint16_t medFracts[] = {0, 0x200, 0x001, 0x3FF, 0x8000};
uint16_t zeros[] = {0x0000, 0x8000};
uint16_t infs[] = {0x7C00, 0xFC00};
uint16_t nans[] = {0x7D00, 0x7D01};
void softfloatInit(void) {
softfloat_roundingMode = softfloat_round_minMag;
softfloat_exceptionFlags = 0;
softfloat_detectTininess = softfloat_tininess_beforeRounding;
}
float convFloat(float16_t f16) {
float32_t f32;
float res;
sp r;
f32 = f16_to_f32(f16);
r.v = f32;
res = r.f;
return res;
}
void genCase(FILE *fptr, float16_t x, float16_t y, float16_t z, int mul, int add, int negp, int negz, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
float16_t result;
int op, flagVals;
char calc[80], flags[80];
float32_t x32, y32, z32, r32;
float xf, yf, zf, rf;
float16_t smallest;
if (!mul) y.v = 0x3C00; // force y to 1 to avoid multiply
if (!add) z.v = 0x0000; // force z to 0 to avoid add
if (negp) x.v ^= 0x8000; // flip sign of x to negate p
if (negz) z.v ^= 0x8000; // flip sign of z to negate z
op = roundingMode << 4 | mul<<3 | add<<2 | negp<<1 | negz;
// printf("op = %02x rm %d mul %d add %d negp %d negz %d\n", op, roundingMode, mul, add, negp, negz);
softfloat_exceptionFlags = 0; // clear exceptions
result = f16_mulAdd(x, y, z);
sprintf(flags, "NV: %d OF: %d UF: %d NX: %d",
(softfloat_exceptionFlags >> 4) % 2,
(softfloat_exceptionFlags >> 2) % 2,
(softfloat_exceptionFlags >> 1) % 2,
(softfloat_exceptionFlags) % 2);
// pack these four flags into one nibble, discarding DZ flag
flagVals = softfloat_exceptionFlags & 0x7 | ((softfloat_exceptionFlags >> 1) & 0x8);
// convert to floats for printing
xf = convFloat(x);
yf = convFloat(y);
zf = convFloat(z);
rf = convFloat(result);
if (mul)
if (add) sprintf(calc, "%f * %f + %f = %f", xf, yf, zf, rf);
else sprintf(calc, "%f * %f = %f", xf, yf, rf);
else sprintf(calc, "%f + %f = %f", xf, zf, rf);
// omit denorms, which aren't required for this project
smallest.v = 0x0400;
float16_t resultmag = result;
resultmag.v &= 0x7FFF; // take absolute value
if (f16_lt(resultmag, smallest) && (resultmag.v != 0x0000)) fprintf (fptr, "// skip denorm: ");
if (resultmag.v == 0x0000 && !zeroAllowed) fprintf(fptr, "// skip zero: ");
if ((resultmag.v == 0x7C00 || resultmag.v == 0x7BFF) && !infAllowed) fprintf(fptr, "// Skip inf: ");
if (resultmag.v > 0x7C00 && !nanAllowed) fprintf(fptr, "// Skip NaN: ");
fprintf(fptr, "%04x_%04x_%04x_%02x_%04x_%01x // %s %s\n", x.v, y.v, z.v, op, result.v, flagVals, calc, flags);
}
void prepTests(uint16_t *e, uint16_t *f, char *testName, char *desc, float16_t *cases,
FILE *fptr, int *numCases) {
int i, j;
fprintf(fptr, desc); fprintf(fptr, "\n");
*numCases=0;
for (i=0; e[i] != 0x8000; i++)
for (j=0; f[j] != 0x8000; j++) {
cases[*numCases].v = f[j] | e[i]<<10;
*numCases = *numCases + 1;
}
}
void genMulTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
z.v = 0x0000;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<=sgn; k++) {
y.v ^= (k<<15);
genCase(fptr, x, y, z, 1, 0, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
fclose(fptr);
}
void genAddTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
y.v = 0x0000;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
z.v = cases[j].v;
for (k=0; k<=sgn; k++) {
z.v ^= (k<<15);
genCase(fptr, x, y, z, 0, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
fclose(fptr);
}
void genFMATests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, l, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<numCases; k++) {
z.v = cases[k].v;
for (l=0; l<=sgn; l++) {
z.v ^= (l<<15);
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
}
fclose(fptr);
}
void genSpecialTests(uint16_t *e, uint16_t *f, int sgn, char *testName, char *desc, int roundingMode, int zeroAllowed, int infAllowed, int nanAllowed) {
int i, j, k, sx, sy, sz, numCases;
float16_t x, y, z;
float16_t cases[100000];
FILE *fptr;
char fn[80];
sprintf(fn, "work/%s.tv", testName);
fptr = fopen(fn, "w");
prepTests(e, f, testName, desc, cases, fptr, &numCases);
cases[numCases].v = 0x0000; // add +0 case
cases[numCases+1].v = 0x8000; // add -0 case
numCases += 2;
for (i=0; i < numCases; i++) {
x.v = cases[i].v;
for (j=0; j<numCases; j++) {
y.v = cases[j].v;
for (k=0; k<numCases; k++) {
z.v = cases[k].v;
for (sx=0; sx<=sgn; sx++) {
x.v ^= (sx<<15);
for (sy=0; sy<=sgn; sy++) {
y.v ^= (sy<<15);
for (sz=0; sz<=sgn; sz++) {
z.v ^= (sz<<15);
genCase(fptr, x, y, z, 1, 1, 0, 0, roundingMode, zeroAllowed, infAllowed, nanAllowed);
}
}
}
}
}
}
fclose(fptr);
}
int main()
{
softfloatInit(); // configure softfloat modes
// Test cases: multiplication
genMulTests(easyExponents, easyFracts, 0, "fmul_0", "// Multiply with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genMulTests(medExponents, medFracts, 0, "fmul_1", "// Multiply with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genMulTests(medExponents, medFracts, 1, "fmul_2", "// Multiply with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: addition
genAddTests(easyExponents, easyFracts, 0, "fadd_0", "// Add with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genAddTests(medExponents, medFracts, 0, "fadd_1", "// Add with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genAddTests(medExponents, medFracts, 1, "fadd_2", "// Add with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: FMA
genFMATests(easyExponents, easyFracts, 0, "fma_0", "// FMA with exponent of 0, significand of 1.0 and 1.1, RZ", 0, 0, 0, 0);
genFMATests(medExponents, medFracts, 0, "fma_1", "// FMA with various exponents and unsigned fractions, RZ", 0, 0, 0, 0);
genFMATests(medExponents, medFracts, 1, "fma_2", "// FMA with various exponents and signed fractions, RZ", 0, 0, 0, 0);
// Test cases: Zero, Infinity, NaN
genSpecialTests(allExponents, medFracts, 1, "fma_special_rz", "// FMA with special cases, RZ", 0, 1, 1, 1);
// Full test cases with other rounding modes
softfloat_roundingMode = softfloat_round_near_even;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rne", "// FMA with special cases, RNE", 1, 1, 1, 1);
softfloat_roundingMode = softfloat_round_min;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rm", "// FMA with special cases, RM", 2, 1, 1, 1);
softfloat_roundingMode = softfloat_round_max;
genSpecialTests(allExponents, medFracts, 1, "fma_special_rp", "// FMA with special cases, RP", 3, 1, 1, 1);
return 0;
}

8
examples/verilog/fma/lint-fma Executable file
View file

@ -0,0 +1,8 @@
#!/bin/bash
# check for warnings in Verilog code
# The verilator lint tool is faster and better than Modelsim so it is best to run this first.
export PATH=$PATH:/usr/local/bin/
verilator=`which verilator`
basepath=$(dirname $0)/..
$verilator --lint-only --top-module fma16 fma16.v

2
examples/verilog/fma/sim-fma Executable file
View file

@ -0,0 +1,2 @@
vsim -do "do fma.do"

View file

@ -0,0 +1 @@
vsim -c -do "do fma.do"

1
examples/verilog/fma/synth Executable file
View file

@ -0,0 +1 @@
make -C ../../../synthDC synth DESIGN=fma16

View file

@ -0,0 +1,52 @@
/* verilator lint_off STMTDLY */
module testbench_fma16;
reg clk, reset;
reg [15:0] x, y, z, rexpected;
wire [15:0] result;
reg [7:0] ctrl;
reg [3:0] flagsexpected;
reg mul, add, negp, negz;
reg [1:0] roundmode;
reg [31:0] vectornum, errors;
reg [75:0] testvectors[10000:0];
// instantiate device under test
fma16 dut(x, y, z, mul, add, negp, negz, roundmode, result);
// generate clock
always
begin
clk = 1; #5; clk = 0; #5;
end
// at start of test, load vectors and pulse reset
initial
begin
$readmemh("work/fmul_0.tv", testvectors);
vectornum = 0; errors = 0;
reset = 1; #22; reset = 0;
end
// apply test vectors on rising edge of clk
always @(posedge clk)
begin
#1; {x, y, z, ctrl, rexpected, flagsexpected} = testvectors[vectornum];
{roundmode, mul, add, negp, negz} = ctrl[5:0];
end
// check results on falling edge of clk
always @(negedge clk)
if (~reset) begin // skip during reset
if (result !== rexpected) begin // check result // *** should also add tests on flags eventually
$display("Error: inputs %h * %h + %h", x, y, z);
$display(" result = %h (%h expected)", result, rexpected);
errors = errors + 1;
end
vectornum = vectornum + 1;
if (testvectors[vectornum] === 'x) begin
$display("%d tests completed with %d errors",
vectornum, errors);
$stop;
end
end
endmodule

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,130 @@
#!/usr/bin/perl -w
# torturegen.pl
# David_Harris@hmc.edu 19 April 2022
# Convert TestFloat cases into format for fma16 project torture test
# Strip out cases involving denorms
use strict;
my @basenames = ("add", "mul", "mulAdd");
my @roundingmodes = ("rz", "rd", "ru", "rne");
my @names = ();
foreach my $name (@basenames) {
foreach my $mode (@roundingmodes) {
push(@names, "f16_${name}_$mode.tv");
}
}
open(TORTURE, ">work/torture.tv") || die("Can't write torture.tv");
my $datestring = localtime();
print(TORTURE "// Torture tests generated $datestring by $0\n");
foreach my $tv (@names) {
open(TV, "work/$tv") || die("Can't read $tv");
my $type = &getType($tv); # is it mul, add, mulAdd
my $rm = &getRm($tv); # rounding mode
# if ($rm != 0) { next; } # only do rz
print (TORTURE "\n////////// Testcases from $tv of type $type rounding mode $rm\n");
print ("\n////////// Testcases from $tv of type $type rounding mode $rm\n");
my $linecount = 0;
my $babyTorture = 0;
while (<TV>) {
my $line = $_;
$linecount++;
my $density = 10;
if ($type eq "mulAdd") {$density = 500;}
if ($babyTorture) {
$density = 100;
if ($type eq "mulAdd") {$density = 50000;}
}
if ((($linecount + $rm) % $density) != 0) { next }; # too many tests to use
chomp($line); # strip off newline
my @parts = split(/_/, $line);
my ($x, $y, $z, $op, $w, $flags);
$x = $parts[0];
if ($type eq "add") { $y = "0000"; } else {$y = $parts[1]};
if ($type eq "mul") { $z = "3CFF"; } elsif ($type eq "add") {$z = $parts[1]} else { $z = $parts[2]};
$op = $rm << 4;
if ($type eq "mul" || $type eq "mulAdd") { $op = $op + 8; }
if ($type eq "add" || $type eq "mulAdd") { $op = $op + 4; }
my $opname = sprintf("%02x", $op);
if ($type eq "mulAdd") {$w = $parts[3];} else {$w = $parts[2]};
if ($type eq "mulAdd") {$flags = $parts[4];} else {$flags = $parts[3]};
$flags = substr($flags, -1); # take last character
if (&fpval($w) eq "NaN") { $w = "7e00"; }
my $vec = "${x}_${y}_${z}_${opname}_${w}_${flags}";
my $skip = "";
if (&isdenorm($x) || &isdenorm($y) || &isdenorm($z) || &isdenorm($w)) {
$skip = "Skipped denorm";
}
my $summary = &summary($x, $y, $z, $w, $type);
if ($skip ne "") {
print TORTURE "// $skip $tv line $linecount $line $summary\n"
}
else { print TORTURE "$vec // $tv line $linecount $line $summary\n";}
}
close(TV);
}
close(TORTURE);
sub fpval {
my $val = shift;
$val = hex($val); # convert hex string to number
my $frac = $val & 0x3FF;
my $exp = ($val >> 10) & 0x1F;
my $sign = $val >> 15;
my $res;
if ($exp == 31 && $frac != 0) { return "NaN"; }
elsif ($exp == 31) { $res = "INF"; }
elsif ($val == 0) { $res = 0; }
elsif ($exp == 0) { $res = "Denorm"; }
else { $res = sprintf("1.%011b x 2^%d", $frac, $exp-15); }
if ($sign == 1) { $res = "-$res"; }
return $res;
}
sub summary {
my $x = shift; my $y = shift; my $z = shift; my $w = shift; my $type = shift;
my $xv = &fpval($x);
my $yv = &fpval($y);
my $zv = &fpval($z);
my $wv = &fpval($w);
if ($type eq "add") { return "$xv + $zv = $wv"; }
elsif ($type eq "mul") { return "$xv * $yv = $wv"; }
else {return "$xv * $yv + $zv = $wv"; }
}
sub getType {
my $tv = shift;
if ($tv =~ /mulAdd/) { return("mulAdd"); }
elsif ($tv =~ /mul/) { return "mul"; }
else { return "add"; }
}
sub getRm {
my $tv = shift;
if ($tv =~ /rz/) { return 0; }
elsif ($tv =~ /rne/) { return 1; }
elsif ($tv =~ /rd/) {return 2; }
elsif ($tv =~ /ru/) { return 3; }
else { return "bad"; }
}
sub isdenorm {
my $fp = shift;
my $val = hex($fp);
my $expv = $val >> 10;
$expv = $expv & 0x1F;
my $denorm = 0;
if ($expv == 0 && $val != 0) { $denorm = 1;}
# my $e0 = ($expv == 0);
# my $vn0 = ($val != 0);
# my $denorm = 0; #($exp == 0 && $val != 0); # denorm exponent but not all zero
# print("Num $fp Exp $expv Denorm $denorm Done\n");
return $denorm;
}

View file

@ -0,0 +1,62 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /testbench_fma16/clk
add wave -noupdate /testbench_fma16/reset
add wave -noupdate /testbench_fma16/x
add wave -noupdate /testbench_fma16/y
add wave -noupdate /testbench_fma16/z
add wave -noupdate /testbench_fma16/result
add wave -noupdate /testbench_fma16/rexpected
add wave -noupdate /testbench_fma16/dut/x
add wave -noupdate /testbench_fma16/dut/y
add wave -noupdate /testbench_fma16/dut/z
add wave -noupdate /testbench_fma16/dut/mul
add wave -noupdate /testbench_fma16/dut/add
add wave -noupdate /testbench_fma16/dut/negr
add wave -noupdate /testbench_fma16/dut/negz
add wave -noupdate /testbench_fma16/dut/roundmode
add wave -noupdate /testbench_fma16/dut/result
add wave -noupdate /testbench_fma16/dut/XManE
add wave -noupdate /testbench_fma16/dut/YManE
add wave -noupdate /testbench_fma16/dut/ZManE
add wave -noupdate /testbench_fma16/dut/XExpE
add wave -noupdate /testbench_fma16/dut/YExpE
add wave -noupdate /testbench_fma16/dut/ZExpE
add wave -noupdate /testbench_fma16/dut/PExpE
add wave -noupdate /testbench_fma16/dut/Ne
add wave -noupdate /testbench_fma16/dut/upOneExt
add wave -noupdate /testbench_fma16/dut/XSgnE
add wave -noupdate /testbench_fma16/dut/YSgnE
add wave -noupdate /testbench_fma16/dut/ZSgnE
add wave -noupdate /testbench_fma16/dut/PSgnE
add wave -noupdate /testbench_fma16/dut/ProdManE
add wave -noupdate /testbench_fma16/dut/NfracS
add wave -noupdate /testbench_fma16/dut/ProdManAl
add wave -noupdate /testbench_fma16/dut/ZManExt
add wave -noupdate /testbench_fma16/dut/ZManAl
add wave -noupdate /testbench_fma16/dut/Nfrac
add wave -noupdate /testbench_fma16/dut/res
add wave -noupdate -radix decimal /testbench_fma16/dut/AlignCnt
add wave -noupdate /testbench_fma16/dut/NSamt
add wave -noupdate /testbench_fma16/dut/ZExpGreater
add wave -noupdate /testbench_fma16/dut/ACLess
add wave -noupdate /testbench_fma16/dut/upOne
add wave -noupdate /testbench_fma16/dut/KillProd
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {3746 ns} 1} {{Cursor 2} {4169 ns} 0}
quietly wave cursor active 2
configure wave -namecolwidth 237
configure wave -valuecolwidth 64
configure wave -justifyvalue left
configure wave -signalnamewidth 0
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ns
update
WaveRestoreZoom {4083 ns} {4235 ns}

View file

@ -55,20 +55,22 @@
`define Q_NE 32'd15
`define Q_NF 32'd112
`define Q_BIAS 32'd16383
`define Q_FMT 2'd3
`define D_LEN 32'd64
`define D_NE 32'd11
`define D_NF 32'd52
`define D_BIAS 32'd1023
`define D_FMT 32'd1
`define D_FMT 2'd1
`define S_LEN 32'd32
`define S_NE 32'd8
`define S_NF 32'd23
`define S_BIAS 32'd127
`define S_FMT 32'd1
`define S_FMT 2'd0
`define H_LEN 32'd16
`define H_NE 32'd5
`define H_NF 32'd10
`define H_BIAS 32'd15
`define H_FMT 2'd2
// Floating point length FLEN and number of exponent (NE) and fraction (NF) bits
`define FLEN (`Q_SUPPORTED ? `Q_LEN : `D_SUPPORTED ? `D_LEN : `F_SUPPORTED ? `S_LEN : `H_LEN)
@ -91,6 +93,12 @@
`define FMT2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? 2'd0 : 2'd2)
`define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS)
// largest length in IEU/FPU
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9))
`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6))
// Disable spurious Verilator warnings
/* verilator lint_off STMTDLY */

View file

@ -9,4 +9,4 @@
# sqrt - test square ro
# all - test everything
vsim -do "do testfloat.do rv64fpquad cmp"
vsim -do "do testfloat.do rv64fp mul"

View file

@ -1,102 +1,9 @@
add wave -noupdate /testbenchfp/clk
add wave -noupdate -radix decimal /testbenchfp/VectorNum
add wave -group Other -noupdate /testbenchfp/FrmNum
add wave -group Other -noupdate /testbenchfp/X
add wave -group Other -noupdate /testbenchfp/Y
add wave -group Other -noupdate /testbenchfp/Z
add wave -group Other -noupdate /testbenchfp/Res
add wave -group Other -noupdate /testbenchfp/Ans
add wave -group Rne -noupdate /testbenchfp/FmaRneX
add wave -group Rne -noupdate /testbenchfp/FmaRneY
add wave -group Rne -noupdate /testbenchfp/FmaRneZ
add wave -group Rne -noupdate /testbenchfp/FmaRneRes
add wave -group Rne -noupdate /testbenchfp/FmaRneAns
add wave -group Rz -noupdate /testbenchfp/FmaRzX
add wave -group Rz -noupdate /testbenchfp/FmaRzY
add wave -group Rz -noupdate /testbenchfp/FmaRzZ
add wave -group Rz -noupdate /testbenchfp/FmaRzRes
add wave -group Rz -noupdate /testbenchfp/FmaRzAns
add wave -group Ru -noupdate /testbenchfp/FmaRuX
add wave -group Ru -noupdate /testbenchfp/FmaRuY
add wave -group Ru -noupdate /testbenchfp/FmaRuZ
add wave -group Ru -noupdate /testbenchfp/FmaRuRes
add wave -group Ru -noupdate /testbenchfp/FmaRuAns
add wave -group Rd -noupdate /testbenchfp/FmaRdX
add wave -group Rd -noupdate /testbenchfp/FmaRdY
add wave -group Rd -noupdate /testbenchfp/FmaRdZ
add wave -group Rd -noupdate /testbenchfp/FmaRdRes
add wave -group Rd -noupdate /testbenchfp/FmaRdAns
add wave -group Rnm -noupdate /testbenchfp/FmaRnmX
add wave -group Rnm -noupdate /testbenchfp/FmaRnmY
add wave -group Rnm -noupdate /testbenchfp/FmaRnmZ
add wave -group Rnm -noupdate /testbenchfp/FmaRnmRes
add wave -group Rnm -noupdate /testbenchfp/FmaRnmAns
add wave -group AllSignals -noupdate /*
add wave -group AllSignals -noupdate /testbenchfp/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rne/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rne/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rz/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rz/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1ru/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2ru/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rd/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rd/resultselect/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/expadd/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/mult/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/align/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/sign/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/add/*
add wave -group AllSignals -noupdate /testbenchfp/fma1rnm/loa/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/normalize/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaround/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultsign/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/fmaflags/*
add wave -group AllSignals -noupdate /testbenchfp/fma2rnm/resultselect/*
add wave -noupdate /testbenchfp/FrmNum
add wave -noupdate /testbenchfp/X
add wave -noupdate /testbenchfp/Y
add wave -noupdate /testbenchfp/Z
add wave -noupdate /testbenchfp/Res
add wave -noupdate /testbenchfp/Ans

View file

@ -0,0 +1,69 @@
`include "wally-config.vh"
module cvtshiftcalc(
input logic XZeroM,
input logic ToInt,
input logic IntToFp,
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic [`NF:0] XManM, // input mantissas
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic CvtResDenormUfM,
output logic CvtResUf,
output logic [`LGLEN+`NF:0] CvtShiftIn // number to be shifted
);
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
///////////////////////////////////////////////////////////////////////////
// shifter
///////////////////////////////////////////////////////////////////////////
// seclect the input to the shifter
// fp -> int:
// | `XLEN zeros | Mantissa | 0's if nessisary |
// Other problems:
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
// - ex: for the case 0010000.... (double)
// ??? -> fp:
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
// | `NF-1 zeros | Mantissa | 0's if nessisary |
// - otherwise:
// | LzcInM | 0's if nessisary |
assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} :
{CvtLzcInM, {`NF+1{1'b0}}};
// choose the negative of the fraction size
if (`FPSIZES == 1) begin
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
end else if (`FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
default: ResNegNF = 1'bx;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
endcase
end
// determine if the result underflows ??? -> fp
// - if the first 1 is shifted out of the result then the result underflows
// - can't underflow an integer to fp conversions
assign CvtResUf = ($signed(CvtCalcExpM) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroM&~IntToFp;
endmodule

View file

@ -2,13 +2,12 @@
`include "wally-config.vh"
// FOpCtrlE values
// 111 min
// 110 min
// 101 max
// 010 equal
// 001 less than
// 011 less than or equal
module fcmp (
input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlE, // see above table
@ -20,12 +19,13 @@ module fcmp (
input logic XSNaNE, YSNaNE, // is signaling NaN
input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs
output logic CmpNVE, // invalid flag
output logic [`FLEN-1:0] CmpResE // compare resilt
output logic [`FLEN-1:0] CmpFpResE, // compare resilt
output logic [`XLEN-1:0] CmpIntResE // compare resilt
);
logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes;
logic BothZeroE, EitherNaNE, EitherSNaNE;
logic BothZero, EitherNaN, EitherSNaN;
assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers
assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs);
@ -36,9 +36,9 @@ module fcmp (
// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE<YManE)^XSgnE)&~EQ : (XExpE<YExpE)^XSgnE;
assign EQ = (FSrcXE == FSrcYE);
assign BothZeroE = XZeroE&YZeroE;
assign EitherNaNE = XNaNE|YNaNE;
assign EitherSNaNE = XSNaNE|YSNaNE;
assign BothZero = XZeroE&YZeroE;
assign EitherNaN = XNaNE|YNaNE;
assign EitherSNaN = XSNaNE|YSNaNE;
// flags
@ -47,11 +47,11 @@ module fcmp (
// EQ - quiet - sets invalid if signaling NaN input
always_comb begin
case (FOpCtrlE[2:0])
3'b111: CmpNVE = EitherSNaNE;//min
3'b101: CmpNVE = EitherSNaNE;//max
3'b010: CmpNVE = EitherSNaNE;//equal
3'b001: CmpNVE = EitherNaNE;//less than
3'b011: CmpNVE = EitherNaNE;//less than or equal
3'b110: CmpNVE = EitherSNaN;//min
3'b101: CmpNVE = EitherSNaN;//max
3'b010: CmpNVE = EitherSNaN;//equal
3'b001: CmpNVE = EitherNaN;//less than
3'b011: CmpNVE = EitherNaN;//less than or equal
default: CmpNVE = 1'b0;
endcase
end
@ -112,16 +112,12 @@ module fcmp (
endcase
// when one input is a NaN -output the non-NaN
always_comb
case (FOpCtrlE[2:0])
3'b111: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
3'b101: CmpResE = XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE;
3'b010: CmpResE = {(`FLEN-1)'(0), (EQ|BothZeroE) & ~EitherNaNE}; // Equal
3'b001: CmpResE = {(`FLEN-1)'(0), LT & ~BothZeroE & ~EitherNaNE}; // Less than
3'b011: CmpResE = {(`FLEN-1)'(0), (LT|EQ|BothZeroE) & ~EitherNaNE}; // Less than or equal
default: CmpResE = (`FLEN)'(0);
endcase
assign CmpFpResE = FOpCtrlE[0] ? XNaNE ? YNaNE ? NaNRes : FSrcYE // Max
: YNaNE ? FSrcXE : LT ? FSrcYE : FSrcXE :
XNaNE ? YNaNE ? NaNRes : FSrcYE // Min
: YNaNE ? FSrcXE : LT ? FSrcXE : FSrcYE;
assign CmpIntResE = {(`XLEN-1)'(0), (((EQ|BothZero)&FOpCtrlE[1])|(LT&FOpCtrlE[0]&~BothZero))&~EitherNaN};
endmodule

View file

@ -10,99 +10,99 @@ module fctrl (
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic FRegWriteD, // FP register write enable
output logic FDivStartD, // Start division or squareroot
output logic [1:0] FResultSelD, // select result to be written to fp register
output logic [1:0] FResSelD, // select result to be written to fp register
output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit
output logic [1:0] FResSelD, // select one of the results done in the memory stage
output logic [1:0] FIntResSelD, // select the result that will be written to the integer register
output logic [1:0] PostProcSelD,
output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1
output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
output logic FWriteIntD // is the result written to the integer register
);
`define FCTRLW 13
`define FCTRLW 11
logic [`FCTRLW-1:0] ControlsD;
//*** will putting x for don't cares reduce area in synthisis???
// FPU Instruction Decoder
always_comb
if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled
ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1;
ControlsD = `FCTRLW'b0_0_00_00_000_0_1;
else case(OpD)
// FRegWrite_FWriteInt_FResultSel_FOpCtrl_FResSel_FIntResSel_FDivStart_IllegalFPUInstr
// FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr
7'b0000111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b1_0_00_000_00_00_0_0; // flw
3'b011: ControlsD = `FCTRLW'b1_0_00_001_00_00_0_0; // fld
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b1_0_10_00_000_0_0; // flw
3'b011: ControlsD = `FCTRLW'b1_0_10_00_000_0_0; // fld
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
7'b0100111: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_0_00_010_00_00_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_00_011_00_00_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b0_0_00_00_000_0_0; // fsw
3'b011: ControlsD = `FCTRLW'b0_0_00_00_000_0_0; // fsd
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
7'b1000011: ControlsD = `FCTRLW'b1_0_01_000_00_00_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_01_001_00_00_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_01_010_00_00_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_01_011_00_00_0_0; // fnmadd
7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0; // fmadd
7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0; // fmsub
7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0; // fnmsub
7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0; // fnmadd
7'b1010011: casez(Funct7D)
7'b00000??: ControlsD = `FCTRLW'b1_0_01_110_00_00_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_01_111_00_00_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_01_100_00_00_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_10_000_00_00_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_10_001_00_00_1_0; // fsqrt
7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0; // fadd
7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0; // fsub
7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0; // fmul
7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_000_1_0; // fdiv
7'b01011??: ControlsD = `FCTRLW'b1_0_01_01_001_1_0; // fsqrt
7'b00100??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_11_000_01_00_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_11_001_01_00_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_11_010_01_00_0_0; // fsgnjx
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0; // fsgnj
3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0; // fsgnjn
3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0; // fsgnjx
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
7'b00101??: case(Funct3D)
3'b000: ControlsD = `FCTRLW'b1_0_11_111_10_00_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_11_101_10_00_0_0; // fmax
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0; // fmin
3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0; // fmax
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
7'b10100??: case(Funct3D)
3'b010: ControlsD = `FCTRLW'b0_1_11_010_10_00_0_0; // feq
3'b001: ControlsD = `FCTRLW'b0_1_11_001_10_00_0_0; // flt
3'b000: ControlsD = `FCTRLW'b0_1_11_011_10_00_0_0; // fle
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0; // feq
3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0; // flt
3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0; // fle
default: ControlsD = `FCTRLW'b0_0_00_00_000__0_1; // non-implemented instruction
endcase
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_11_000_00_10_0_0; // fclass
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_100_00_01_0_0; // fmv.x.w
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_101_00_01_0_0; // fmv.x.d
else ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
7'b1101000: case(Rs2D[1:0])//***reduce resSel
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.s.w w->s
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.s.wu wu->s
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.s.l l->s
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.s.lu lu->s
7'b11100??: if (Funct3D == 3'b001) ControlsD = `FCTRLW'b0_1_10_00_000_0_0; // fclass
else if (Funct3D[1:0] == 2'b00) ControlsD = `FCTRLW'b0_1_11_00_000_0_0; // fmv.x.w to int reg
else if (Funct3D[1:0] == 2'b01) ControlsD = `FCTRLW'b0_1_11_00_000_0_0; // fmv.x.d to int reg
else ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
7'b1101000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.s.w w->s
2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.s.wu wu->s
2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.s.l l->s
2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.s.lu lu->s
endcase
7'b1100000: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.s s->w
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.s s->wu
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.s s->l
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.s s->lu
2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.s s->w
2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.s s->wu
2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.s s->l
2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.s s->lu
endcase
7'b1111000: ControlsD = `FCTRLW'b1_0_11_000_00_00_0_0; // fmv.w.x
7'b0100000: ControlsD = `FCTRLW'b1_0_11_000_11_00_0_0; // fcvt.s.d
7'b1111000: ControlsD = `FCTRLW'b1_0_00_00_011_0_0; // fmv.w.x to fp reg
7'b0100000: ControlsD = `FCTRLW'b1_0_01_00_000_0_0; // fcvt.s.d
7'b1101001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b1_0_11_101_11_00_0_0; // fcvt.d.w w->d
2'b01: ControlsD = `FCTRLW'b1_0_11_100_11_00_0_0; // fcvt.d.wu wu->d
2'b10: ControlsD = `FCTRLW'b1_0_11_111_11_00_0_0; // fcvt.d.l l->d
2'b11: ControlsD = `FCTRLW'b1_0_11_110_11_00_0_0; // fcvt.d.lu lu->d
2'b00: ControlsD = `FCTRLW'b1_0_01_00_101_0_0; // fcvt.d.w w->d
2'b01: ControlsD = `FCTRLW'b1_0_01_00_100_0_0; // fcvt.d.wu wu->d
2'b10: ControlsD = `FCTRLW'b1_0_01_00_111_0_0; // fcvt.d.l l->d
2'b11: ControlsD = `FCTRLW'b1_0_01_00_110_0_0; // fcvt.d.lu lu->d
endcase
7'b1100001: case(Rs2D[1:0])
2'b00: ControlsD = `FCTRLW'b0_1_11_001_11_11_0_0; // fcvt.w.d d->w
2'b01: ControlsD = `FCTRLW'b0_1_11_000_11_11_0_0; // fcvt.wu.d d->wu
2'b10: ControlsD = `FCTRLW'b0_1_11_011_11_11_0_0; // fcvt.l.d d->l
2'b11: ControlsD = `FCTRLW'b0_1_11_010_11_11_0_0; // fcvt.lu.d d->lu
2'b00: ControlsD = `FCTRLW'b0_1_01_00_001_0_0; // fcvt.w.d d->w
2'b01: ControlsD = `FCTRLW'b0_1_01_00_000_0_0; // fcvt.wu.d d->wu
2'b10: ControlsD = `FCTRLW'b0_1_01_00_011_0_0; // fcvt.l.d d->l
2'b11: ControlsD = `FCTRLW'b0_1_01_00_010_0_0; // fcvt.lu.d d->lu
endcase
7'b1111001: ControlsD = `FCTRLW'b1_0_11_001_00_00_0_0; // fmv.d.x
7'b0100001: ControlsD = `FCTRLW'b1_0_11_001_11_00_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
7'b1111001: ControlsD = `FCTRLW'b1_0_00_00_011_0_0; // fmv.d.x to fp reg
7'b0100001: ControlsD = `FCTRLW'b1_0_01_00_001_0_0; // fcvt.d.s
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
default: ControlsD = `FCTRLW'b0_0_00_000_00_00_0_1; // non-implemented instruction
default: ControlsD = `FCTRLW'b0_0_00_00_000_0_1; // non-implemented instruction
endcase
// unswizzle control bits
assign {FRegWriteD, FWriteIntD, FResultSelD, FOpCtrlD, FResSelD, FIntResSelD, FDivStartD, IllegalFPUInstrD} = ControlsD;
assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD;
// rounding modes:
// 000 - round to nearest, ties to even
@ -121,82 +121,61 @@ module fctrl (
assign FmtD = 0;
else if (`FPSIZES == 2)begin
logic [1:0] FmtTmp;
assign FmtTmp = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtTmp = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtD = (`FMT == FmtTmp);
end
else if (`FPSIZES == 3|`FPSIZES == 4)
assign FmtD = (FResultSelD == 2'b00) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
assign FmtD = (FResSelD == 2'b10)&~FWriteIntD ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0];
// assign FmtD = FResultSelD == 2'b00 ? Funct3D[0] : ((Funct7D[6:3] == 4'b0100)&OpD[4]) | OpD[6:1] == 6'b010000 ? ~Funct7D[0] : Funct7D[0];
// FResultSel:
// 000 - ReadRes - load
// 001 - FMARes - FMA and multiply
// 010 - FAddRes - add and fp to fp
// 011 - FDivRes - divide and squareroot
// 100 - FRes - anything that is written to the fp register and is ready in the memory stage
// FResSel:
// 00 - SrcA - move to fp register
// 01 - SgnRes - sign injection
// 10 - CmpRes - min/max
// 11 - CvtRes - convert to fp
// FIntResSel:
// 00 - CmpRes - less than, equal, or less than or equal
// 01 - FSrcX - move to int register
// 10 - ClassRes - classify
// 11 - CvtRes - convert to signed/unsigned int
// Final Res Sel:
// fp int
// 00 other cmp
// 01 postproc cvt
// 10 store class
// 11 mv
// OpCtrl values:
// div/sqrt
// fdiv = ???0
// fsqrt = ???1
// post processing Sel:
// 00 cvt
// 01 div
// 10 fma
// cmp
// fmin = ?111
// fmax = ?101
// feq = ?010
// flt = ?001
// fle = ?011
// {?, is min or max, is eq or le, is lt or le}
// Other Sel:
// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]}
// 000 - sign 00
// 001 - negate sign 00
// 010 - xor sign 00
// 011 - mv to fp 01
// 110 - min 10
// 101 - max 10
//fma/mult
// fmadd = ?000
// fmsub = ?001
// fnmsub = ?010 -(a*b)+c
// fnmadd = ?011 -(a*b)-c
// fmul = ?100
// {?, is mul, negate product, negate addend}
// sgn inj
// fsgnj = ??00
// fsgnjn = ??01
// fsgnjx = ??10
// add/sub/cnvt
// fadd = 0000
// fsub = 0001
// fcvt.s.d = 0111
// fcvt.d.s = 0111
// Fmt controls the output for fp -> fp
// convert
// fcvt.w.s = 0010
// fcvt.wu.s = 0110
// fcvt.s.w = 0001
// fcvt.s.wu = 0101
// fcvt.l.s = 1010
// fcvt.lu.s = 1110
// fcvt.s.l = 1001
// fcvt.s.lu = 1101
// fcvt.w.d = 0010
// fcvt.wu.d = 0110
// fcvt.d.w = 0001
// fcvt.d.wu = 0101
// fcvt.l.d = 1010
// fcvt.lu.d = 1110
// fcvt.d.l = 1001
// fcvt.d.lu = 1101
// {long, unsigned, to int, from int}
// OpCtrl:
// Fma: {not multiply-add?, negate prod?, negate Z?}
// 000 - fmadd
// 001 - fmsub
// 010 - fnmsub
// 011 - fnmadd
// 100 - mul
// 110 - add
// 111 - sub
// Div:
// 0 - ???
// 1 - ???
// Cvt Int: {Int to Fp?, 64 bit int?, signed int?}
// Cvt Fp: output format
// 10 - to half
// 00 - to single
// 01 - to double
// 11 - to quad
// Cmp: {equal?, less than?}
// 010 - eq
// 001 - lt
// 011 - le
// 110 - min
// 101 - max
// Sgn:
// 00 - sign
// 01 - negate sign
// 10 - xor sign
endmodule

View file

@ -1,8 +1,5 @@
`include "wally-config.vh"
// largest length in IEU/FPU
`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF)
`define LOGLGLEN $unsigned($clog2(`LGLEN+1))
module fcvt (
input logic XSgnE, // input's sign
@ -13,14 +10,13 @@ module fcvt (
input logic FWriteIntE, // is fp->int (since it's writting to the integer register)
input logic XZeroE, // is the input zero
input logic XDenormE, // is the input denormalized
input logic XInfE, // is the input infinity
input logic XNaNE, // is the input a NaN
input logic XSNaNE, // is the input a signaling NaN
input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half)
output logic [`FLEN-1:0] CvtResE, // the fp conversion result
output logic [`XLEN-1:0] CvtIntResE, // the int conversion result
output logic [4:0] CvtFlgE // the conversion's flags
output logic [`NE:0] CvtCalcExpE, // the calculated expoent
output logic [`LOGLGLEN-1:0] CvtShiftAmtE, // how much to shift by
output logic CvtResDenormUfE,// does the result underflow or is denormalized
output logic CvtResSgnE, // the result's sign
output logic IntZeroE, // is the integer zero?
output logic [`LGLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder)
);
// OpCtrls:
@ -41,34 +37,8 @@ module fcvt (
logic [`FMTBITS-1:0] OutFmt; // format of the output
logic [`XLEN-1:0] PosInt; // the positive integer input
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder)
logic [`NE:0] CalcExp; // the calculated expoent
logic [`LOGLGLEN-1:0] ShiftAmt; // how much to shift by
logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted
logic ResDenormUf;// does the result underflow or is denormalized
logic ResUf; // does the result underflow
logic [`LGLEN+`NF:0] Shifted; // the shifted result
logic [`NE-2:0] NewBias; // the bias of the final result
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
logic [`NE-1:0] OldExp; // the old exponent
logic ResSgn; // the result's sign
logic Sticky; // sticky bit - for rounding
logic Round; // round bit - for rounding
logic LSBFrac; // the least significant bit of the fraction - for rounding
logic CalcPlus1; // the calculated plus 1
logic Plus1; // add one to the final result?
logic [`FLEN-1:0] ShiftedPlus1; // plus one shifted to the proper position
logic [`NE:0] FullResExp; // the full result exponent (with the overflow bit)
logic [`NE-1:0] ResExp; // the result's exponent (trimmed to the correct size)
logic [`NF-1:0] ResFrac; // the result's fraction
logic [`XLEN+1:0] NegRes; // the negation of the result
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
logic Overflow, Underflow, Inexact, Invalid; // flags
logic IntInexact, FpInexact, IntInvalid, FpInvalid; // flags for FP and int outputs
logic [`NE-1:0] MaxExp; // the maximum exponent before overflow
logic [1:0] NegResMSBS; // the negitive integer result's most significant bits
logic [`FLEN-1:0] NaNRes, InfRes, Res, UfRes; //various special results
logic KillRes; // kill the result?
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
@ -97,8 +67,9 @@ module fcvt (
// 1) negate the input if the input is a negitive singed integer
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE;
assign PosInt = CvtResSgnE ? -ForwardedSrcAE : ForwardedSrcAE;
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
assign IntZeroE = ~|TrimInt;
///////////////////////////////////////////////////////////////////////////
// lzc
@ -107,32 +78,16 @@ module fcvt (
// choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) |
assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} :
{XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}};
lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt);
lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt);
///////////////////////////////////////////////////////////////////////////
// shifter
///////////////////////////////////////////////////////////////////////////
// seclect the input to the shifter
// fp -> int:
// | `XLEN zeros | Mantissa | 0's if nessisary |
// Other problems:
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
// - ex: for the case 0010000.... (double)
// ??? -> fp:
// - if result is denormalized or underflowed then we want to shift right i.e. shift right then shift left:
// | `NF-1 zeros | Mantissa | 0's if nessisary |
// - otherwise:
// | lzcIn | 0's if nessisary |
assign ShiftIn = ToInt ? {{`XLEN{1'b0}}, XManE[`NF]&~CalcExp[`NE], XManE[`NF-1]|(CalcExp[`NE]&XManE[`NF]), XManE[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} :
ResDenormUf ? {{`NF-1{1'b0}}, XManE, {`LGLEN-`NF+1{1'b0}}} :
{LzcIn, {`NF+1{1'b0}}};
// kill the shift if it's negitive
// kill the shift if it's negitive
// select the amount to shift by
// fp -> int:
// - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
@ -144,47 +99,10 @@ module fcvt (
// - only shift fp -> fp if the intital value is denormalized
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
assign ShiftAmt = ToInt ? CalcExp[`LOGLGLEN-1:0]&{`LOGLGLEN{~CalcExp[`NE]}} :
ResDenormUf&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CalcExp[`LOGLGLEN-1:0] :
assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} :
CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] :
(ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}};
// shift
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// process:
// - start - CalcExp = 1 + XExp - Largest Bias
// | `XLEN zeros | Mantissa | 0's if nessisary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// fp -> fp:
// - if result is denormalized or underflowed:
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
// process:
// - start
// | mantissa | 0's |
//
// - shift right by NF-1 (NF-1)
// | `NF-1 zeros | mantissa | 0's |
//
// - shift left by CalcExp = XExp - Largest bias + new bias
// | 0's | mantissa | 0's |
// | keep |
//
// - if the input is denormalized:
// | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
//
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
assign Shifted = ShiftIn << ShiftAmt;
///////////////////////////////////////////////////////////////////////////
// exp calculations
///////////////////////////////////////////////////////////////////////////
@ -262,40 +180,11 @@ module fcvt (
// - shift left to normilize (-1-ZeroCnt)
// - newBias to make the biased exponent
//
assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})};
// find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion
assign ResDenormUf = (~|CalcExp | CalcExp[`NE])&~XZeroE&~IntToFp;
// choose the negative of the fraction size
if (`FPSIZES == 1) begin
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
end else if (`FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
default: ResNegNF = 1'bx;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
endcase
end
// determine if the result underflows ??? -> fp
// - if the first 1 is shifted out of the result then the result underflows
// - can't underflow an integer to fp conversions
assign ResUf = ($signed(CalcExp) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZeroE&~IntToFp;
assign CvtResDenormUfE = (~|CvtCalcExpE | CvtCalcExpE[`NE])&~XZeroE&~IntToFp;
///////////////////////////////////////////////////////////////////////////
@ -307,498 +196,7 @@ module fcvt (
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed
// - otherwise: the floating point input's sign
assign ResSgn = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
assign CvtResSgnE = IntToFp ? Int64 ? ForwardedSrcAE[`XLEN-1]&Signed : ForwardedSrcAE[31]&Signed : XSgnE;
///////////////////////////////////////////////////////////////////////////
// rounding
///////////////////////////////////////////////////////////////////////////
endmodule
// round to nearest even
// {Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
// 11 - Plus1
// round to zero - do nothing
// round to -infinity - Plus1 if negative
// round to infinity - Plus1 if positive
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0x - do nothing
// 1x - Plus1
// ResUf is used when a fp->fp result underflows but all the bits get shifted out, which leaves nothing for the sticky bit
if (`FPSIZES == 1) begin
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : |Shifted[`LGLEN+`NF-`NF-1:0]|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : Shifted[`LGLEN+`NF-`NF];
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : Shifted[`LGLEN+`NF-`NF+1];
end else if (`FPSIZES == 2) begin
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] :
(OutFmt ? |Shifted[`LGLEN+`NF-`NF-1:0] : |Shifted[`LGLEN+`NF-`NF1-1:0])|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] :
OutFmt ? Shifted[`LGLEN+`NF-`NF] : Shifted[`LGLEN+`NF-`NF1];
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] :
OutFmt ? Shifted[`LGLEN+`NF-`NF+1] : Shifted[`LGLEN+`NF-`NF1+1];
end else if (`FPSIZES == 3) begin
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
always_comb
case (OutFmt)
`FMT: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF+1];
end
`FMT1: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF1-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF1];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF1+1];
end
`FMT2: begin
ToFpSticky = |Shifted[`LGLEN+`NF-`NF2-1:0];
ToFpRound = Shifted[`LGLEN+`NF-`NF2];
ToFpLSBFrac = Shifted[`LGLEN+`NF-`NF2+1];
end
default: begin
ToFpSticky = 1'bx;
ToFpRound = 1'bx;
ToFpLSBFrac = 1'bx;
end
endcase
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
end else if (`FPSIZES == 4) begin
logic ToFpSticky, ToFpRound, ToFpLSBFrac;
always_comb
case (OutFmt)
2'h3: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`Q_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`Q_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`Q_NF+1];
end
2'h1: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`D_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`D_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`D_NF+1];
end
2'h0: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`S_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`S_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`S_NF+1];
end
2'h2: begin
ToFpSticky = |Shifted[`LGLEN+`Q_NF-`H_NF-1:0];
ToFpRound = Shifted[`LGLEN+`Q_NF-`H_NF];
ToFpLSBFrac = Shifted[`LGLEN+`Q_NF-`H_NF+1];
end
endcase
assign Sticky = ToInt ? |Shifted[`LGLEN+`NF-`XLEN-1:0] : ToFpSticky|ResUf;
assign Round = ToInt ? Shifted[`LGLEN+`NF-`XLEN] : ToFpRound;
assign LSBFrac = ToInt ? Shifted[`LGLEN+`NF-`XLEN+1] : ToFpLSBFrac;
end
always_comb
// Determine if you add 1
case (FrmE)
3'b000: CalcPlus1 = Round & (Sticky | LSBFrac);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResSgn;//round down
3'b011: CalcPlus1 = ~ResSgn;//round up
3'b100: CalcPlus1 = Round;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// dont round if exact
assign Plus1 = CalcPlus1&(Round|Sticky);
// shift the 1 to the propper position for rounding
// - dont round it converting to integer
if (`FPSIZES == 1) begin
assign ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
end else if (`FPSIZES == 2) begin
assign ShiftedPlus1 = OutFmt ? {{`FLEN-1{1'b0}},Plus1&~ToInt} : {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ShiftedPlus1 = {{`FLEN-1{1'b0}},Plus1&~ToInt};
`FMT1: ShiftedPlus1 = {{`NE+`NF1{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF1-1{1'b0}}};
`FMT2: ShiftedPlus1 = {{`NE+`NF2{1'b0}}, Plus1&~ToInt, {`FLEN-`NE-`NF2-1{1'b0}}};
default: ShiftedPlus1 = 0;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ShiftedPlus1 = {{`Q_LEN-1{1'b0}},Plus1&~ToInt};
2'h1: ShiftedPlus1 = {{`Q_NE+`D_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`D_NF-1{1'b0}}};
2'h0: ShiftedPlus1 = {{`Q_NE+`S_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`S_NF-1{1'b0}}};
2'h2: ShiftedPlus1 = {{`Q_NE+`H_NF{1'b0}}, Plus1&~ToInt, {`Q_LEN-`Q_NE-`H_NF-1{1'b0}}};
endcase
end
// kill calcExp if the result is denormalized
assign {FullResExp, ResFrac} = {CalcExp&{`NE+1{~ResDenormUf}}, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`NF]} + ShiftedPlus1;
// trim the result's expoent to size
assign ResExp = FullResExp[`NE-1:0];
///////////////////////////////////////////////////////////////////////////
// flags
///////////////////////////////////////////////////////////////////////////
// calculate the flags
// find the maximum exponent (the exponent and larger overflows)
if (`FPSIZES == 1) begin
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
end else if (`FPSIZES == 2) begin
assign MaxExp = ToInt ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end else if (`FPSIZES == 3) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
`FMT: begin
MaxExpFp = {`NE{1'b1}};
end
`FMT1: begin
MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end
`FMT2: begin
MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
end
default: begin
MaxExpFp = 1'bx;
end
endcase
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end else if (`FPSIZES == 4) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
2'h3: begin
MaxExpFp = {`Q_NE{1'b1}};
end
2'h1: begin
MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
end
2'h0: begin
MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
end
2'h2: begin
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
end
endcase
assign MaxExp = ToInt ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end
// if the result exponent is larger then the maximum possible exponent
// | and the exponent is positive
// | | and the input is not NaN or Infinity
// | | |
assign Overflow = ((ResExp >= MaxExp)&~CalcExp[`NE]&(~(XNaNE|XInfE)|IntToFp));
// if the result is denormalized or underflowed
// | and the result did not round into normal values
// | | and the result is not exact
// | | | and the result isn't NaN
// | | | |
assign Underflow = ResDenormUf & ~(ResExp==1 & CalcExp == 0) & (Sticky|Round)&~(XNaNE);
// we are using the IEEE convertToIntegerExact opperations (rather then the exact ones) which do singal the inexact flag
// if there were bits thrown away
// | if overflowed or underflowed
// | | and if not a NaN
// | | |
assign FpInexact = (Sticky|Round|Underflow|Overflow)&(~XNaNE|IntToFp);
// if the result is too small to be represented and not 0
// | and if the result is not invalid (outside the integer bounds)
// | |
assign IntInexact = ((CalcExp[`NE]&~XZeroE)|Sticky|Round)&~Invalid;
// select the inexact flag to output
assign Inexact = ToInt ? IntInexact : FpInexact;
// if an input was a singaling NaN(and we're using a FP input)
// |
assign FpInvalid = (XSNaNE&~IntToFp);
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
// if the input is NaN or infinity
// | if the integer result overflows (out of range)
// | | if the input was negitive but ouputing to a unsigned number
// | | | the result doesn't round to zero
// | | | | or the result rounds up out of bounds
// | | | | and the result didn't underflow
// | | | | |
assign IntInvalid = XNaNE|XInfE|Overflow|((XSgnE&~Signed)&(~((CalcExp[`NE]|(~|CalcExp))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
// |
// or when the positive result rounds up out of range
// select the inexact flag to output
assign Invalid = ToInt ? IntInvalid : FpInvalid;
// pack the flags together
// - fp -> int does not set the overflow or underflow flags
assign CvtFlgE = {Invalid, 1'b0, Overflow&~ToInt, Underflow&~ToInt, Inexact};
///////////////////////////////////////////////////////////////////////////
// result selection
///////////////////////////////////////////////////////////////////////////
// determine if you shoould kill the result
// - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp));
if (`FPSIZES == 1) begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
assign NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
end else begin
assign NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
assign InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
assign UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
assign Res = {ResSgn, ResExp, ResFrac};
end else if (`FPSIZES == 2) begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
end else begin
assign NaNRes = OutFmt ? {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
assign InfRes = OutFmt ? (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
(~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
assign UfRes = OutFmt ? {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]} : {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
assign Res = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {1'b0, {`NE+1{1'b1}}, XManE[`NF-2:0]};
end else begin
NaNRes = {1'b0, {`NE+1{1'b1}}, {`NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {ResSgn, (`FLEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {ResSgn, ResExp, ResFrac};
end
`FMT1: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, XManE[`NF-2:`NF-`NF1]};
end else begin
NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1+1{1'b1}}, {`NF1-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, (`LEN1-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, XManE[`NF-2:`NF-`NF2]};
end else begin
NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2+1{1'b1}}, {`NF2-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, (`LEN2-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
end
default: begin
NaNRes = 1'bx;
InfRes = 1'bx;
UfRes = 1'bx;
Res = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, XManE[`Q_NF-2:0]};
end else begin
NaNRes = {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {ResSgn, {`Q_NE-1{1'b1}}, 1'b0, {`Q_NF{1'b1}}} : {ResSgn, {`Q_NE{1'b1}}, {`Q_NF{1'b0}}};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {ResSgn, (`Q_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {ResSgn, ResExp, ResFrac};
end
2'h1: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`D_NF]};
end else begin
NaNRes = {{`Q_LEN-`D_LEN{1'b1}}, 1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, (`D_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`D_NF]};
end
2'h0: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`S_NF]};
end else begin
NaNRes = {{`Q_LEN-`S_LEN{1'b1}}, 1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, (`S_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`S_NF]};
end
2'h2: begin
// IEEE sends a payload while Riscv says to send a canonical quiet NaN
if(`IEEE754) begin
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, XManE[`Q_NF-2:`Q_NF-`H_NF]};
end else begin
NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}};
end
// determine the infinity result
// - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign
// - otherwise: output infinity with the correct sign
// - kill the infinity singal if the input isn't fp
InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
// result for when the result is killed i.e. underflowes
// - output a rounded 0 with the correct sign
UfRes = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, (`H_LEN-2)'(0), Plus1&FrmE[1]};
// format the result - NaN box single precision (put 1's in the unused msbs)
Res = {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`Q_NF-1:`Q_NF-`H_NF]};
end
endcase
end
// choose the floating point result
// - if the input is NaN (and using the NaN input) output the NaN result
// - if the input is infinity or the output overflows
// - kill the InfE signal if the input isn't a floating point value
// - if killing the result output the underflow result
// - otherwise output the normal result
assign CvtResE = XNaNE&~IntToFp ? NaNRes :
(XInfE&~IntToFp)|Overflow ? InfRes :
KillRes ? UfRes :
Res;
// *** probably can optimize the negation
// select the overflow integer result
// - negitive infinity and out of range negitive input
// | int | long |
// signed | -2^31 | -2^63 |
// unsigned | 0 | 0 |
//
// - positive infinity and out of range negitive input and NaNs
// | int | long |
// signed | 2^31-1 | 2^63-1 |
// unsigned | 2^32-1 | 2^64-1 |
//
// other: 32 bit unsinged result should be sign extended as if it were a signed number
assign OfIntRes = Signed ? XSgnE&~XNaNE ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
XSgnE&~XNaNE ? {`XLEN{1'b0}} : // unsigned negitive
{`XLEN{1'b1}};// unsigned positive
// round and negate the positive result if needed
assign NegRes = XSgnE ? -({2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`LGLEN+`NF:`LGLEN+`NF+1-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
// select the integer output
// - if the input is invalid (out of bounds NaN or Inf) then output overflow result
// - if the input underflows
// - if rounding and signed opperation and negitive input, output -1
// - otherwise output a rounded 0
// - otherwise output the normal result (trmined and sign extended if nessisary)
assign CvtIntResE = Invalid ? OfIntRes :
CalcExp[`NE] ? XSgnE&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
endmodule

View file

@ -34,7 +34,7 @@ module fhazard(
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdM, RdW, // the adress being written to
input logic [1:0] FResultSelM, // the result being selected
input logic [1:0] FResSelM, // the result being selected
output logic FStallD, // stall the decode stage
output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value
);
@ -47,10 +47,12 @@ module fhazard(
FForwardZE = 2'b00; // choose FRD3E
FStallD = 0;
//*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait
// if the needed value is in the memory stage - input 1
if ((Adr1E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 2'b11) FForwardXE = 2'b10; // choose FResM
if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W
@ -59,7 +61,7 @@ module fhazard(
// if the needed value is in the memory stage - input 2
if ((Adr2E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 2'b11) FForwardYE = 2'b10; // choose FResM
if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W
@ -68,7 +70,7 @@ module fhazard(
// if the needed value is in the memory stage - input 3
if ((Adr3E == RdM) & FRegWriteM)
// if the result will be FResM (can be taken from the memory stage)
if(FResultSelM == 2'b11) FForwardZE = 2'b10; // choose FResM
if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM
else FStallD = 1; // otherwise stall
// if the needed value is in the writeback stage
else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W

152
pipelined/src/fpu/flags.sv Normal file
View file

@ -0,0 +1,152 @@
`include "wally-config.vh"
module flags(
input logic XSgnM,
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic Plus1,
input logic InfIn, // is a Inf input being used
input logic XZeroM, YZeroM, // inputs are zero
input logic XNaNM, YNaNM, // inputs are NaN
input logic NaNIn, // is a NaN input being used
input logic Sqrt, // Sqrt?
input logic ToInt, // convert to integer
input logic IntToFp, // convert integer to floating point
input logic Int64, // convert to 64 bit integer
input logic Signed, // convert to a signed integer
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`NE:0] CvtCalcExpM, // the calculated expoent - Cvt
input logic CvtOp, // conversion opperation?
input logic DivOp, // conversion opperation?
input logic FmaOp, // Fma opperation?
input logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
input logic [`NE+1:0] RoundExp, // exponent of the normalized sum
input logic [1:0] NegResMSBS, // the negitive integer result's most significant bits
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
input logic Round, UfLSBRes, Sticky, UfPlus1, // bits used to determine rounding
output logic Invalid, Overflow, Underflow, // flags used to select the res
output logic [4:0] PostProcFlgM // flags
);
logic SigNaN; // is an input a signaling NaN
logic Inexact; // inexact flag
logic FpInexact; // floating point inexact flag
logic IntInexact; // integer inexact flag
logic IntInvalid; // integer invalid flag
logic FmaInvalid; // integer invalid flag
logic DivInvalid; // integer invalid flag
logic DivByZero;
logic [`NE-1:0] MaxExp; // the maximum exponent before overflow
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
if (`FPSIZES == 1) begin
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : {`NE{1'b1}};
end else if (`FPSIZES == 2) begin
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'($unsigned(65)) : (`NE)'($unsigned(33)) :
OutFmt ? {`NE{1'b1}} : {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end else if (`FPSIZES == 3) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
`FMT: begin
MaxExpFp = {`NE{1'b1}};
end
`FMT1: begin
MaxExpFp = {{`NE-`NE1{1'b0}}, {`NE1{1'b1}}};
end
`FMT2: begin
MaxExpFp = {{`NE-`NE2{1'b0}}, {`NE2{1'b1}}};
end
default: begin
MaxExpFp = 1'bx;
end
endcase
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end else if (`FPSIZES == 4) begin
logic [`NE-1:0] MaxExpFp;
always_comb
case (OutFmt)
2'h3: begin
MaxExpFp = {`Q_NE{1'b1}};
end
2'h1: begin
MaxExpFp = {{`Q_NE-`D_NE{1'b0}}, {`D_NE{1'b1}}};
end
2'h0: begin
MaxExpFp = {{`Q_NE-`S_NE{1'b0}}, {`S_NE{1'b1}}};
end
2'h2: begin
MaxExpFp = {{`Q_NE-`H_NE{1'b0}}, {`H_NE{1'b1}}};
end
endcase
assign MaxExp = ToInt&CvtOp ? Int64 ? (`NE)'(65) : (`NE)'(33) : MaxExpFp;
end
// if the result is greater than or equal to the max exponent
// | and the exponent isn't negitive
// | | if the input isnt infinity or NaN
// | | |
assign Overflow = (FullResExp>={2'b0, MaxExp}) & ~FullResExp[`NE+1]&~(InfIn|NaNIn);
// detecting tininess after rounding
// the exponent is negitive
// | the result is denormalized
// | | the result is normal and rounded from a denorm
// | | | and if given an unbounded exponent the result does not round
// | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN
// | | | | | |
assign Underflow = ((FullResExp[`NE+1] | (FullResExp == 0) | ((FullResExp == 1) & (RoundExp == 0) & ~(UfPlus1&UfLSBRes)))&(Round|Sticky))&~(InfIn|NaNIn);
// Set Inexact flag if the res is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed res isn't outputed
assign FpInexact = (Sticky|Overflow|Round|Underflow)&~(InfIn|NaNIn);
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)
// | |
assign IntInexact = ((CvtCalcExpM[`NE]&~XZeroM)|Sticky|Round)&~Invalid;
// select the inexact flag to output
assign Inexact = ToInt ? IntInexact : FpInexact;
// Set Invalid flag for following cases:
// 1) any input is a signaling NaN
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
// if the input is NaN or infinity
// | if the integer res overflows (out of range)
// | | if the input was negitive but ouputing to a unsigned number
// | | | the res doesn't round to zero
// | | | | or the res rounds up out of bounds
// | | | | and the res didn't underflow
// | | | | |
assign IntInvalid = XNaNM|XInfM|Overflow|((XSgnM&~Signed)&(~((CvtCalcExpM[`NE]|(~|CvtCalcExpM))&~Plus1)))|(NegResMSBS[1]^NegResMSBS[0]);
// |
// or when the positive res rounds up out of range
assign SigNaN = (XSNaNM&~(IntToFp&CvtOp)) | (YSNaNM&~CvtOp) | (ZSNaNM&FmaOp);
assign FmaInvalid = ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
assign DivInvalid = ((XInfM & YInfM) | (XZeroM & YZeroM))&~Sqrt | (XSgnM&Sqrt);
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp) | (IntInvalid&CvtOp&ToInt);
assign DivByZero = YZeroM&DivOp;
// Combine flags
// - to integer results do not set the underflow or overflow flags
assign PostProcFlgM = {Invalid, DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact};
endmodule

View file

@ -30,73 +30,6 @@
`include "wally-config.vh"
module fma(
input logic clk,
input logic reset,
input logic FlushM, // flush the memory stage
input logic StallM, // stall memory stage
input logic [`FMTBITS-1:0] FmtE, FmtM, // precision 1 = double 0 = single
input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y)
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic XSgnE, YSgnE, ZSgnE, // input signs - execute stage
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // input exponents - execute stage
input logic [`NF:0] XManE, YManE, ZManE, // input mantissa - execute stage
input logic XSgnM, YSgnM, // input signs - memory stage
input logic [`NE-1:0] ZExpM, // input exponents - memory stage
input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage
input logic ZDenormE, // is denorm
input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage
input logic XNaNM, YNaNM, ZNaNM, // is NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // is signaling NaN
input logic XZeroM, YZeroM, ZZeroM, // is zero - memory stage
input logic XInfM, YInfM, ZInfM, // is infinity
output logic [`FLEN-1:0] FMAResM, // FMA result
output logic [4:0] FMAFlgM); // FMA flags
//fma/mult/add
// fmadd = 000
// fmsub = 001
// fnmsub = 010 -(a*b)+c
// fnmadd = 011 -(a*b)-c
// fmul = 100
// fadd = 110
// fsub = 111
// signals transfered between pipeline stages
logic [3*`NF+5:0] SumE, SumM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic InvZE, InvZM;
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM;
logic Mult;
logic ZDenormM;
fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
.ProdExpE, .AddendStickyE, .KillProdE);
// E/M pipeline registers
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE},
{AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM});
fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM,
.FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM,
.XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult,
.FMAResM, .FMAFlgM);
endmodule
//*** in al units before putting into : ? put in a seperate signal
module fma1(
input logic XSgnE, YSgnE, ZSgnE, // input's signs
input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format
input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format
@ -111,7 +44,7 @@ module fma1(
output logic InvZE, // intert Z
output logic ZSgnEffE, // the modified Z sign
output logic PSgnE, // the product's sign
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift cnt
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift cnt
);
logic [2*`NF+1:0] ProdManE; // 1.X frac * 1.Y frac in U(2.2Nf) format
@ -151,7 +84,7 @@ module fma1(
add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE);
loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .NormCntE);
loa loa(.A(AlignedAddendInv+{(3*`NF+6)'(0),InvZE}), .P(ProdManKilled), .FmaNormCntE);
// Choose the positive sum and accompanying LZA result.
assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0];
@ -332,7 +265,7 @@ endmodule
module loa( //https://ieeexplore.ieee.org/abstract/document/930098
input logic [3*`NF+6:0] A, // addend
input logic [2*`NF+1:0] P, // product
output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift count for the positive result
output logic [$clog2(3*`NF+7)-1:0] FmaNormCntE // normalization shift count for the positive result
);
logic [3*`NF+6:0] T;
@ -360,861 +293,6 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE));
lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(FmaNormCntE));
endmodule
module fma2(
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic [3*`NF+5:0] SumM, // the positive sum
input logic NegSumM, // was the sum negitive
input logic InvZM, // do you invert Z
input logic ZDenormM, // is the original precision denormalized
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic Mult, // multiply opperation
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // the normalization shift count
output logic [`FLEN-1:0] FMAResM, // FMA final result
output logic [4:0] FMAFlgM); // FMA flags {invalid, divide by zero, overflow, underflow, inexact}
logic [`NF-1:0] ResultFrac; // Result fraction
logic [`NE-1:0] ResultExp; // Result exponent
logic ResultSgn, ResultSgnTmp; // Result sign
logic [`NE+1:0] SumExp; // exponent of the normalized sum
logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow
logic [`NF+1:0] NormSum; // normalized sum
logic NormSumSticky; // sticky bit calulated from the normalized sum
logic SumZero; // is the sum zero
logic ResultDenorm; // is the result denormalized
logic Sticky, UfSticky; // Sticky bit
logic CalcPlus1; // do you add or subtract one for rounding
logic UfPlus1; // do you add one (for determining underflow flag)
logic Invalid,Underflow,Overflow; // flags
logic Guard, Round; // bits needed to determine rounding
logic UfLSBNormSum; // bits needed to determine rounding for underflow flag
logic [`FLEN:0] RoundAdd; // how much to add to the result
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum,
.ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm);
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// round to zero
// round to -infinity
// round to infinity
// round to nearest max magnitude
fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp,
.CalcPlus1, .UfPlus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .RoundAdd, .UfLSBNormSum);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .Mult, .ResultSgnTmp, .ResultSgn);
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
fmaflags fmaflags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .XZeroM, .YZeroM,
.XNaNM, .YNaNM, .ZNaNM, .FullResultExp, .SumExp, .ZSgnEffM, .PSgnM, .Round, .Guard, .UfLSBNormSum, .Sticky, .UfPlus1,
.FmtM, .Invalid, .Overflow, .Underflow, .FMAFlgM);
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM,
.FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd,
.ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow,
.ResultDenorm, .ResultExp, .ResultFrac, .FMAResM);
// *** use NF where needed
endmodule
module resultsign(
input logic [2:0] FrmM,
input logic PSgnM, ZSgnEffM,
input logic Underflow,
input logic InvZM,
input logic NegSumM,
input logic SumZero,
input logic Mult,
output logic ResultSgnTmp,
output logic ResultSgn
);
logic ZeroSgn;
// logic ResultSgnTmp;
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// if multiply then Psgn
// otherwise psign
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign ResultSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | ((ZSgnEffM)&PSgnM);
assign ResultSgn = SumZero ? ZeroSgn : ResultSgnTmp;
endmodule
module normalize(
input logic [3*`NF+5:0] SumM, // the positive sum
input logic [`NE-1:0] ZExpM, // exponent of Z
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] NormCntM, // normalization shift count
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic KillProdM, // is the product set to zero
input logic ZDenormM,
input logic AddendStickyM, // the sticky bit caclulated from the aligned addend
output logic [`NF+1:0] NormSum, // normalized sum
output logic SumZero, // is the sum zero
output logic NormSumSticky, UfSticky, // sticky bits
output logic [`NE+1:0] SumExp, // exponent of the normalized sum
output logic ResultDenorm // is the result denormalized
);
logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic [3*`NF+8:0] SumShifted; // the shifted sum before LZA correction
logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
//*** insert bias-bias simplification in fcvt.sv/phone pictures
// Determine if the sum is zero
assign SumZero = ~(|SumM);
// calculate the sum's exponent
assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, NormCntM} + 1 - (`NE+2)'(`NF+4));
//convert the sum's exponent into the propper percision
if (`FPSIZES == 1) begin
assign SumExpTmp = SumExpTmpTmp;
end else if (`FPSIZES == 2) begin
assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: SumExpTmp = SumExpTmpTmp;
`FMT1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|SumExpTmpTmp}};
`FMT2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|SumExpTmpTmp}};
default: SumExpTmp = `NE+2'bx;
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: SumExpTmp = SumExpTmpTmp;
2'h1: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|SumExpTmpTmp}};
2'h0: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|SumExpTmpTmp}};
2'h2: SumExpTmp = (SumExpTmpTmp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|SumExpTmpTmp}};
endcase
end
end
// determine if the result is denormalized
if (`FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|SumExpTmpTmp;
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|SumExpTmpTmp;
always_comb begin
case (FmtM)
`FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
`FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
`FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
default: PreResultDenorm = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp;
assign Sum0GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
assign Sum1LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
assign Sum1GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|SumExpTmpTmp;
assign Sum2LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
assign Sum2GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|SumExpTmpTmp;
assign Sum3LEZ = $signed(SumExpTmpTmp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
assign Sum3GEFL = $signed(SumExpTmpTmp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|SumExpTmpTmp;
always_comb begin
case (FmtM)
2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
endcase
end
end
// 010. when should be 001.
// - shift left one
// - add one from exp
// - if kill prod dont add to exp
// Determine if the result is denormal
// assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero;
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = PreResultDenorm ? SumExpTmp[$clog2(3*`NF+7)-1:0] : 1;
// Normalize the sum
assign SumShifted = {3'b0, SumM} << NormCntM+DenormShift;
// LZA correction
assign LZAPlus1 = SumShifted[3*`NF+7];
assign LZAPlus2 = SumShifted[3*`NF+8];
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0];
assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4];
// Calculate the sticky bit
if (`FPSIZES == 1) begin
assign NormSumSticky = |CorrSumShifted[2*`NF+3:0];
end else if (`FPSIZES == 2) begin
// 3*NF+5 - NF1 - 3
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM);
end else if (`FPSIZES == 3) begin
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) |
(|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2));
end else if (`FPSIZES == 4) begin
assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) |
(|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) |
(|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2));
end
assign UfSticky = AddendStickyM | NormSumSticky;
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign SumExp = (SumExpTmp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResultDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &SumExpTmp&SumShifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResultDenorm)}};
// recalculate if the result is denormalized
assign ResultDenorm = PreResultDenorm&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7];
endmodule
module fmaround(
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic UfSticky, // sticky bit for underlow calculation
input logic [`NF+1:0] NormSum, // normalized sum
input logic AddendStickyM, // addend's sticky bit
input logic NormSumSticky, // normalized sum's sticky bit
input logic ZZeroM, // is Z zero
input logic InvZM, // invert Z
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ResultSgnTmp, // the result's sign
output logic CalcPlus1, UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
output logic [`NF-1:0] ResultFrac, // Result fraction
output logic [`NE-1:0] ResultExp, // Result exponent
output logic Sticky, // sticky bit
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic Round, Guard, UfLSBNormSum // bits needed to calculate rounding
);
logic LSBNormSum; // bit used for rounding - least significant bit of the normalized sum
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
logic UfGuard; // guard bit used to caluculate underflow
logic UfCalcPlus1, CalcMinus1, Plus1, Minus1; // do you add or subtract on from the result
logic [`NF-1:0] NormSumTruncated; // the normalized sum trimed to fit the mantissa
logic UfRound;
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// {Guard, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0xx - do nothing
// 100 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 101 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// 110/111 - Plus1
if (`FPSIZES == 1) begin
// determine guard, round, and least significant bit of the result
assign Round = NormSum[1];
assign LSBNormSum = NormSum[2];
// used to determine underflow flag
assign UfRound = NormSum[0];
end else if (`FPSIZES == 2) begin
// \/-------------NF---------------,
// | NF1 | 2 | |
// '-------NF1------^
// determine guard, round, and least significant bit of the result
assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1];
assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2];
// used to determine underflow flag
assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1];
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[1];
LSBNormSum = NormSum[2];
// used to determine underflow flag
UfRound = NormSum[0];
end
`FMT1: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`NF1+1];
LSBNormSum = NormSum[`NF-`NF1+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`NF1];
end
`FMT2: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`NF2+1];
LSBNormSum = NormSum[`NF-`NF2+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`NF2];
end
default: begin
Round = 1'bx;
LSBNormSum = 1'bx;
UfRound = 1'bx;
end
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[1];
LSBNormSum = NormSum[2];
// used to determine underflow flag
UfRound = NormSum[0];
end
2'h1: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`D_NF+1];
LSBNormSum = NormSum[`NF-`D_NF+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`D_NF];
end
2'h0: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`S_NF+1];
LSBNormSum = NormSum[`NF-`S_NF+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`S_NF];
end
2'h2: begin
// determine guard, round, and least significant bit of the result
Round = NormSum[`NF-`H_NF+1];
LSBNormSum = NormSum[`NF-`H_NF+2];
// used to determine underflow flag
UfRound = NormSum[`NF-`H_NF];
end
endcase
end
end
// used to determine underflow flag
assign UfLSBNormSum = Round;
// determine sticky
assign Sticky = UfSticky | UfRound;
// Deterimine if a small number was supposed to be subtrated
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM; //***here
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM; //***here
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down
3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmM)
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky | Round);
assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky
assign Minus1 = CalcMinus1 & (Sticky | Round);
// Compute rounded result
if (`FPSIZES == 1) begin
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, Plus1};
end else if (`FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = FmtM ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1} :
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), Plus1, (`FLEN-1-`NE-`NF1)'(0)};
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), Plus1, (`FLEN-1-`NE-`NF2)'(0)};
default: RoundAdd = (`FLEN+1)'(0);
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, Plus1};
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), Plus1, (`FLEN-1-`NE-`D_NF)'(0)};
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), Plus1, (`FLEN-1-`NE-`S_NF)'(0)};
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), Plus1, (`FLEN-1-`NE-`H_NF)'(0)};
endcase
end
end
assign NormSumTruncated = NormSum[`NF+1:2];
assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd;
assign ResultExp = FullResultExp[`NE-1:0];
endmodule
module fmaflags(
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XZeroM, YZeroM, // inputs are zero
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ZSgnEffM, PSgnM, // the product and modified Z signs
input logic Round, Guard, UfLSBNormSum, Sticky, UfPlus1, // bits used to determine rounding
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
output logic Invalid, Overflow, Underflow, // flags used to select the result
output logic [4:0] FMAFlgM // FMA flags
);
logic SigNaN; // is an input a signaling NaN
logic GtMaxExp; // is exponent greater than the maximum
logic UnderflowFlag, Inexact; // flags
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
// Set Invalid flag for following cases:
// 1) any input is a signaling NaN
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
assign SigNaN = XSNaNM | YSNaNM | ZSNaNM;
assign Invalid = SigNaN | ((XInfM | YInfM) & ZInfM & (PSgnM ^ ZSgnEffM) & ~XNaNM & ~YNaNM) | (XZeroM & YInfM) | (YZeroM & XInfM);
// Set Overflow flag if the number is too big to be represented
// - Don't set the overflow flag if an overflowed result isn't outputed
if (`FPSIZES == 1) begin
assign GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
end else if (`FPSIZES == 2) begin
assign GtMaxExp = FmtM ? &FullResultExp[`NE-1:0] | FullResultExp[`NE] : &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
`FMT1: GtMaxExp = &FullResultExp[`NE1-1:0] | FullResultExp[`NE1];
`FMT2: GtMaxExp = &FullResultExp[`NE2-1:0] | FullResultExp[`NE2];
default: GtMaxExp = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: GtMaxExp = &FullResultExp[`NE-1:0] | FullResultExp[`NE];
2'h1: GtMaxExp = &FullResultExp[`D_NE-1:0] | FullResultExp[`D_NE];
2'h0: GtMaxExp = &FullResultExp[`S_NE-1:0] | FullResultExp[`S_NE];
2'h2: GtMaxExp = &FullResultExp[`H_NE-1:0] | FullResultExp[`H_NE];
endcase
end
end
assign Overflow = GtMaxExp & ~FullResultExp[`NE+1]&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Underflow flag if the number is too small to be represented in normal numbers
// - Don't set the underflow flag if the result is exact
assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// exp is negitive result is denorm exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal
assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed result isn't outputed
assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM);
// Combine flags
// - FMA can't set the Divide by zero flag
// - Don't set the underflow flag if the result was rounded up to a normal number
assign FMAFlgM = {Invalid, 1'b0, Overflow, UnderflowFlag, Inexact};
endmodule
module resultselect(
input logic XSgnM, YSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic ZDenormM, // is the original precision denormalized
input logic ZZeroM,
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic ResultSgn, // the result's sign
input logic CalcPlus1, // rounding bits
input logic [`FLEN:0] RoundAdd, // how much to add to the result
input logic Invalid, Overflow, Underflow, // flags
input logic ResultDenorm, // is the result denormalized
input logic [`NE-1:0] ResultExp, // Result exponent
input logic [`NF-1:0] ResultFrac, // Result fraction
output logic [`FLEN-1:0] FMAResM // FMA final result
);
logic InfSgn;
logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InfResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult, NormResult; // possible results
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
if (`FPSIZES == 1) begin
if(`IEEE754) begin
assign XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
assign YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
assign ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
assign InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
assign NormResult = {ResultSgn, ResultExp, ResultFrac};
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
if(`IEEE754) begin
assign XNaNResult = FmtM ? {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
assign YNaNResult = FmtM ? {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
assign ZNaNResult = FmtM ? {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
assign InvalidResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
assign XNaNResult = FmtM ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: begin
if(`IEEE754) begin
XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
NormResult = {ResultSgn, ResultExp, ResultFrac};
end
`FMT1: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`LEN1{1'b1}}, XSgnM, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
YNaNResult = {{`FLEN-`LEN1{1'b1}}, YSgnM, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
ZNaNResult = {{`FLEN-`LEN1{1'b1}}, ZSgnEffM, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
InvalidResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} :
{{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)};
KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)};
NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`LEN2{1'b1}}, XSgnM, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
YNaNResult = {{`FLEN-`LEN2{1'b1}}, YSgnM, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
ZNaNResult = {{`FLEN-`LEN2{1'b1}}, ZSgnEffM, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
InvalidResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} :
{{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)};
KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)};
NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]};
end
default: begin
if(`IEEE754) begin
XNaNResult = (`FLEN)'(0);
YNaNResult = (`FLEN)'(0);
ZNaNResult = (`FLEN)'(0);
InvalidResult = (`FLEN)'(0);
end else begin
XNaNResult = (`FLEN)'(0);
end
OverflowResult = (`FLEN)'(0);
KillProdResult = (`FLEN)'(0);
UnderflowResult = (`FLEN)'(0);
InfResult = (`FLEN)'(0);
NormResult = (`FLEN)'(0);
end
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: begin
if(`IEEE754) begin
XNaNResult = {XSgnM, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNResult = {YSgnM, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNResult = {ZSgnEffM, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
XNaNResult = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} :
{ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))};
InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)};
NormResult = {ResultSgn, ResultExp, ResultFrac};
end
2'h1: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`D_LEN{1'b1}}, XSgnM, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
YNaNResult = {{`FLEN-`D_LEN{1'b1}}, YSgnM, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
ZNaNResult = {{`FLEN-`D_LEN{1'b1}}, ZSgnEffM, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
InvalidResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} :
{{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]};
end
2'h0: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`S_LEN{1'b1}}, XSgnM, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
YNaNResult = {{`FLEN-`S_LEN{1'b1}}, YSgnM, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
ZNaNResult = {{`FLEN-`S_LEN{1'b1}}, ZSgnEffM, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
InvalidResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} :
{{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]};
end
2'h2: begin
if(`IEEE754) begin
XNaNResult = {{`FLEN-`H_LEN{1'b1}}, XSgnM, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
YNaNResult = {{`FLEN-`H_LEN{1'b1}}, YSgnM, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
ZNaNResult = {{`FLEN-`H_LEN{1'b1}}, ZSgnEffM, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
InvalidResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end else begin
XNaNResult = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end
OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} :
{{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}};
InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]};
end
endcase
end
end
if(`IEEE754) begin
assign FMAResM = XNaNM ? XNaNResult :
YNaNM ? YNaNResult :
ZNaNM ? ZNaNResult :
Invalid ? InvalidResult :
XInfM|YInfM|ZInfM ? InfResult :
KillProdM ? KillProdResult :
Overflow ? OverflowResult :
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
NormResult;
end else begin
assign FMAResM = XNaNM|YNaNM|ZNaNM|Invalid ? XNaNResult :
XInfM|YInfM|ZInfM ? InfResult :
KillProdM ? KillProdResult :
Overflow ? OverflowResult :
Underflow & ~ResultDenorm & (ResultExp!=1) ? UnderflowResult :
NormResult;
end
endmodule

View file

@ -0,0 +1,127 @@
`include "wally-config.vh"
module fmashiftcalc(
input logic [3*`NF+5:0] SumM, // the positive sum
input logic [`NE-1:0] ZExpM, // exponent of Z
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // normalization shift count
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic KillProdM, // is the product set to zero
input logic ZDenormM,
output logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
output logic SumZero, // is the result denormalized - calculated before LZA corection
output logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*`NF+8:0] FmaShiftIn // is the sum zero
);
logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later
logic [`NE+1:0] NormSumExp; // the exponent of the normalized sum with the `FLEN bias
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
//*** insert bias-bias simplification in fcvt.sv/phone pictures
// Determine if the sum is zero
assign SumZero = ~(|SumM);
// calculate the sum's exponent
assign NormSumExp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({{`NE+2-$unsigned($clog2(3*`NF+7)){1'b0}}, FmaNormCntM} + 1 - (`NE+2)'(`NF+4));
//convert the sum's exponent into the propper percision
if (`FPSIZES == 1) begin
assign ConvNormSumExp = NormSumExp;
end else if (`FPSIZES == 2) begin
assign ConvNormSumExp = FmtM ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
end else if (`FPSIZES == 3) begin
always_comb begin
case (FmtM)
`FMT: ConvNormSumExp = NormSumExp;
`FMT1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}};
`FMT2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}};
default: ConvNormSumExp = `NE+2'bx;
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (FmtM)
2'h3: ConvNormSumExp = NormSumExp;
2'h1: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}};
2'h0: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}};
2'h2: ConvNormSumExp = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}};
endcase
end
end
// determine if the result is denormalized
if (`FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
assign PreResultDenorm = (FmtM ? Sum0LEZ : Sum1LEZ) & (FmtM ? Sum0GEFL : Sum1GEFL) & ~SumZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp;
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2));
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp;
always_comb begin
case (FmtM)
`FMT: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
`FMT1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
`FMT2: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
default: PreResultDenorm = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp;
assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2));
assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS));
assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp;
assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS));
assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp;
assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS));
assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp;
always_comb begin
case (FmtM)
2'h3: PreResultDenorm = Sum0LEZ & Sum0GEFL & ~SumZero;
2'h1: PreResultDenorm = Sum1LEZ & Sum1GEFL & ~SumZero;
2'h0: PreResultDenorm = Sum2LEZ & Sum2GEFL & ~SumZero;
2'h2: PreResultDenorm = Sum3LEZ & Sum3GEFL & ~SumZero;
endcase
end
end
// 010. when should be 001.
// - shift left one
// - add one from exp
// - if kill prod dont add to exp
// Determine if the result is denormal
// assign PreResultDenorm = $signed(ConvNormSumExp)<=0 & ($signed(ConvNormSumExp)>=$signed(-FracLen)) & ~SumZero;
// Determine the shift needed for denormal results
// - if not denorm add 1 to shift out the leading 1
assign DenormShift = PreResultDenorm ? ConvNormSumExp[$clog2(3*`NF+7)-1:0] : 1;
// set and calculate the shift input and amount
assign FmaShiftIn = {3'b0, SumM};
assign FmaShiftAmt = FmaNormCntM+DenormShift;
endmodule

View file

@ -45,6 +45,8 @@ module fpu (
output logic FWriteIntE, // integer register write enables
output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register
output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register
output logic [1:0] FResSelW,
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
output logic [4:0] SetFflagsM // FPU flags (to privileged unit)
@ -68,24 +70,24 @@ module fpu (
logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double
logic FDivStartD, FDivStartE; // Start division or squareroot
logic FWriteIntD; // Write to integer register
logic FWriteIntM; // Write to integer register
logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals
logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register
logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register
logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage
logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister
logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage
logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit
logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input
// regfile signals
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage
logic XSgnM, YSgnM; // input's sign - memory stage
logic XSgnM; // input's sign - memory stage
logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage
logic [`NE-1:0] ZExpM; // input's exponent - memory stage
logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage
@ -95,7 +97,7 @@ module fpu (
logic XNaNQ, YNaNQ; // is the input a NaN - divide
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XDenormE, ZDenormE; // is the input denormalized
logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage
logic XZeroQ, YZeroQ; // is the input zero - divide
@ -104,24 +106,43 @@ module fpu (
logic XInfQ, YInfQ; // is the input infinity - divide
logic XExpMaxE; // is the exponent all ones (max value)
logic FmtQ;
logic FOpCtrlQ;
logic FOpCtrlQ;
// Fma Signals
logic [3*`NF+5:0] SumE, SumM;
logic [`NE+1:0] ProdExpE, ProdExpM;
logic AddendStickyE, AddendStickyM;
logic KillProdE, KillProdM;
logic InvZE, InvZM;
logic NegSumE, NegSumM;
logic ZSgnEffE, ZSgnEffM;
logic PSgnE, PSgnM;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM;
// Cvt Signals
logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent
logic [`LOGLGLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized
logic CvtResSgnE, CvtResSgnM; // the result's sign
logic IntZeroE, IntZeroM; // is the integer zero?
logic [`LGLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
// result and flag signals
logic [63:0] FDivResM, FDivResW; // divide/squareroot result
logic [4:0] FDivFlgM; // divide/squareroot flags
logic [`FLEN-1:0] FMAResM, FMAResW; // FMA/multiply result
logic [4:0] FMAFlgM; // FMA/multiply result
logic [`FLEN-1:0] ReadResW; // read result (load instruction)
logic [`FLEN-1:0] CvtResE; // FP <-> int convert result
logic [`XLEN-1:0] CvtIntResE; // FP <-> int convert result
logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this
logic [`XLEN-1:0] ClassResE; // classify result
logic [`FLEN-1:0] CmpResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`XLEN-1:0] FIntResE; // classify result
logic [`FLEN-1:0] FpResM, FpResW; // classify result
logic [`FLEN-1:0] PostProcResM; // classify result
logic [4:0] PostProcFlgM; // classify result
logic [`XLEN-1:0] FCvtIntResM;
logic [`FLEN-1:0] CmpFpResE; // compare result
logic [`XLEN-1:0] CmpIntResE; // compare result
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`FLEN-1:0] FResE, FResM, FResW; // selected result that is ready in the memory stage
logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage
logic [`XLEN-1:0] FIntResE;
logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register
// other signals
logic FDivSqrtDoneE; // is divide done
@ -133,10 +154,20 @@ module fpu (
// DECODE STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// |||||||||||
// ||| |||
// ||| |||
// ||| |||
// ||| |||
// ||| |||
// |||||||||||
//////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS,
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD,
.FIntResSelD, .FmtD, .FrmD, .FWriteIntD);
.IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD,
.FmtD, .FrmD, .FWriteIntD);
// FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
@ -150,20 +181,31 @@ module fpu (
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]},
{Adr1E, Adr2E, Adr3E});
flopenrc #(16+int'(`FMTBITS-1)) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
flopenrc #(13+int'(`FMTBITS)) DECtrlReg3(clk, reset, FlushE, ~StallE,
{FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD},
{FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE});
// EXECUTION STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// ||||||||||||
// |||
// |||
// |||||||||
// |||
// |||
// ||||||||||||
//////////////////////////////////////////////////////////////////////////////////////////
// Hazard unit for FPU
// - determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM,
fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM,
.FStallD, .FForwardXE, .FForwardYE, .FForwardZE);
// forwarding muxs
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE);
mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, FpResM, FForwardXE, FSrcXE);
mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, FpResM, FForwardYE, FPreSrcYE);
mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, FpResM, FForwardZE, FPreSrcZE);
generate
@ -178,7 +220,7 @@ module fpu (
endgenerate
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), FSrcYE); // Force Z to be 0 for multiply instructions
mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions
// Force Z to be 0 for multiply instructions
generate
@ -201,21 +243,12 @@ module fpu (
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE,
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE);
// FMA
// - two stage FMA
// - execute stage - multiplication and addend shifting
// - memory stage - addition and rounding
// - handles FMA and multiply instructions
fma fma (.clk, .reset, .FlushM, .StallM,
.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE,
.ZDenormE, .XZeroE, .YZeroE, .ZZeroE,
.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM,
.XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM,
.XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM,
.FOpCtrlE,
.FmtE, .FmtM, .FrmM,
.FMAFlgM, .FMAResM);
// fma - does multiply, add, and multiply-add instructions
fma fma (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE,
.XManE, .YManE, .ZManE, .XZeroE, .YZeroE, .ZZeroE,
.FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .FmaNormCntE,
.ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE);
// fpdivsqrt using Goldschmidt's iteration
if(`FLEN == 64) begin
@ -245,11 +278,14 @@ module fpu (
// other FP execution units
fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE,
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE);
.XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE);
fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE);
fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE,
.XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE);
fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE,
.FWriteIntE, .XZeroE, .XDenormE, .FmtE, .CvtCalcExpE,
.CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .IntZeroE,
.CvtLzcInE);
// data to be stored in memory - to IEU
// - FP uses NaN-blocking format
@ -269,16 +305,16 @@ module fpu (
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
endgenerate
// select a result that may be written to the FP register
mux4 #(`FLEN) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, FResSelE, FResE);
mux4 #(5) FFlgMux(5'b0, 5'b0, {CmpNVE, 4'b0}, CvtFlgE, FResSelE, FFlgE);
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register - to IEU
if (`FLEN>`XLEN)
mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE,
CvtIntResE, FIntResSelE, FIntResE);
assign IntSrcXE = FSrcXE[`XLEN-1:0];
else
mux4 #(`XLEN) IntResMux({{`XLEN-`FLEN{CmpResE[`FLEN-1:0]}}, CmpResE}, {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}, ClassResE,
CvtIntResE, FIntResSelE, FIntResE);
assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE};
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
// *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok
// *** make sure the fpu matches the chapter diagram
@ -286,33 +322,68 @@ module fpu (
// flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM);
flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM});
flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM});
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM);
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM});
flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(`FLEN) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM);
flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM);
flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(7+int'(`FMTBITS-1)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResultSelE, FrmE, FmtE},
{FRegWriteM, FResultSelM, FrmM, FmtM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM,
{XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE},
{XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM});
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM,
{FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE},
{FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM});
flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM);
flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM);
flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM,
{AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE},
{AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM});
flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE},
{CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM});
// BEGIN MEMORY STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// ||| |||
// |||||| ||||||
// ||| ||| ||| |||
// ||| ||||| |||
// ||| ||| |||
// ||| |||
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM,
.AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM,
.ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM,
.NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM,
.CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM,
.CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM);
// FPU flag selection - to privileged
mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM);
mux2 #(5) FPUFlgMux ({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
mux2 #(`FLEN) FPUResMux (PreFpResM, PostProcResM, FResSelM[0], FpResM);
// M/W pipe registers
flopenrc #(`FLEN) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW);
flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW);
flopenrc #(`FLEN) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW);
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW,
{FRegWriteM, FResultSelM, FmtM},
{FRegWriteW, FResultSelW, FmtW});
{FRegWriteM, FResSelM, FmtM},
{FRegWriteW, FResSelW, FmtW});
// BEGIN WRITEBACK STAGE
//////////////////////////////////////////////////////////////////////////////////////////
// ||| |||
// ||| |||
// ||| ||| |||
// ||| ||||| |||
// ||| ||| ||| |||
// |||||| ||||||
// ||| |||
//////////////////////////////////////////////////////////////////////////////////////////
// put ReadData into NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
// - for load instruction
@ -328,6 +399,6 @@ module fpu (
endgenerate
// select the result to be written to the FP register
if(`FLEN>=64)
mux4 #(`FLEN) FPUResultMux (ReadResW, FMAResW, {{`FLEN-64{1'b0}},FDivResW}, FResW, FResultSelW, FPUResultW);
mux2 #(`FLEN) FPUResultMux (FpResW, ReadResW, FResSelW[1], FPUResultW);
endmodule // fpu

View file

@ -0,0 +1,29 @@
`include "wally-config.vh"
module lzacorrection(
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
input logic FmaOp,
input logic [`NE+1:0] ConvNormSumExp, // exponent of the normalized sum not taking into account denormal or zero results
input logic PreResultDenorm, // is the result denormalized - calculated before LZA corection
input logic KillProdM, // is the product set to zero
input logic SumZero,
output logic [`CORRSHIFTSZ-1:0] CorrShifted, // the shifted sum before LZA correction
output logic [`NE+1:0] SumExp // exponent of the normalized sum
);
logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction
logic ResDenorm; // is the result denormalized
logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction
// LZA correction
assign LZAPlus1 = Shifted[`NORMSHIFTSZ-2];
assign LZAPlus2 = Shifted[`NORMSHIFTSZ-1];
// the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0];
assign CorrShifted = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent
// if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2
assign SumExp = (ConvNormSumExp+{{`NE+1{1'b0}}, LZAPlus1&~KillProdM}+{{`NE{1'b0}}, LZAPlus2&~KillProdM, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&PreResultDenorm&~KillProdM}+{{`NE+1{1'b0}}, &ConvNormSumExp&Shifted[3*`NF+6]&~KillProdM}) & {`NE+2{~(SumZero|ResDenorm)}};
// recalculate if the result is denormalized
assign ResDenorm = PreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2];
endmodule

View file

@ -0,0 +1,46 @@
`include "wally-config.vh"
// convert shift
// fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// process:
// - start - CalcExp = 1 + XExp - Largest Bias
// | `XLEN zeros | Mantissa | 0's if nessisary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// fp -> fp:
// - if result is denormalized or underflowed:
// | `NF-1 zeros | Mantissa | 0's if nessisary | << NF+CalcExp-1
// process:
// - start
// | mantissa | 0's |
//
// - shift right by NF-1 (NF-1)
// | `NF-1 zeros | mantissa | 0's |
//
// - shift left by CalcExp = XExp - Largest bias + new bias
// | 0's | mantissa | 0's |
// | keep |
//
// - if the input is denormalized:
// | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
//
// int -> fp: | lzcIn | 0's if nessisary | << ZeroCnt+1
// - plus 1 to shift out the first 1
module normshift(
input logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt, // normalization shift count
input logic [`NORMSHIFTSZ-1:0] ShiftIn, // is the sum zero
output logic [`NORMSHIFTSZ-1:0] Shifted // is the sum zero
);
assign Shifted = ShiftIn << ShiftAmt;
endmodule

View file

@ -0,0 +1,203 @@
///////////////////////////////////////////
//
// Written: Katherine Parry, David Harris
// Modified: 6/23/2021
//
// Purpose: Floating point multiply-accumulate of configurable size
//
// A component of the Wally configurable RISC-V project.
//
// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University
//
// MIT LICENSE
// Permission is hereby granted, free of charge, to any person obtaining a copy of this
// software and associated documentation files (the "Software"), to deal in the Software
// without restriction, including without limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all copies or
// substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
// OR OTHER DEALINGS IN THE SOFTWARE.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module postprocess(
input logic XSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] FmtM, // precision 1 = double 0 = single
input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XZeroM, YZeroM, ZZeroM, // inputs are zero
input logic XInfM, YInfM, ZInfM, // inputs are infinity
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic XSNaNM, YSNaNM, ZSNaNM, // inputs are signaling NaNs
input logic [3*`NF+5:0] SumM, // the positive sum
input logic NegSumM, // was the sum negitive
input logic InvZM, // do you invert Z
input logic ZDenormM, // is the original precision denormalized
input logic ZSgnEffM, // the modified Z sign - depends on instruction
input logic PSgnM, // the product's sign
input logic [2:0] FOpCtrlM, // choose which opperation (look below for values)
input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic CvtResDenormUfM,
input logic [`LOGLGLEN-1:0] CvtShiftAmtM, // how much to shift by
input logic CvtResSgnM, // the result's sign
input logic FWriteIntM, // is fp->int (since it's writting to the integer register)
input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder)
input logic IntZeroM, // is the input zero
input logic [1:0] PostProcSelM, // select result to be written to fp register
output logic [`FLEN-1:0] PostProcResM, // FMA final result
output logic [4:0] PostProcFlgM,
output logic [`XLEN-1:0] FCvtIntResM // the int conversion result
);
logic [`NF-1:0] ResFrac; // Result fraction
logic [`NE-1:0] ResExp; // Result exponent
logic [`CORRSHIFTSZ-1:0] CorrShifted; // the shifted sum before LZA correction
logic [`NE+1:0] SumExp; // exponent of the normalized sum
logic [`NE+1:0] FullResExp; // ResExp with bits to determine sign and overflow
logic SumZero; // is the sum zero
logic Sticky; // Sticky bit
logic [3*`NF+8:0] FmaShiftIn; // is the sum zero
logic UfPlus1; // do you add one (for determining underflow flag)
logic Round; // bits needed to determine rounding
logic [`LGLEN+`NF:0] CvtShiftIn; // number to be shifted
logic Mult; // multiply opperation
logic [`FLEN:0] RoundAdd; // how much to add to the result
logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results
logic PreResultDenorm; // is the result denormalized - calculated before LZA corection
logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count
logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count
logic [3*`NF+8:0] ShiftIn; // is the sum zero
logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result
logic Plus1; // add one to the final result?
logic Overflow, Underflow, Invalid; // flags
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic ToInt; // is the opperation an fp->int conversion?
logic [`NE+1:0] RoundExp;
logic [1:0] NegResMSBS;
logic CvtOp;
logic FmaOp;
logic CvtResUf;
logic DivOp;
logic InfIn;
logic ResSgn;
logic NaNIn;
logic UfLSBRes;
logic Sqrt;
logic [`FMTBITS-1:0] OutFmt;
// signals to help readability
assign Signed = FOpCtrlM[0];
assign Int64 = FOpCtrlM[1];
assign IntToFp = FOpCtrlM[2];
assign ToInt = FWriteIntM;
assign Mult = FOpCtrlM[2]&~FOpCtrlM[1]&~FOpCtrlM[0];
assign CvtOp = (PostProcSelM == 2'b00);
assign FmaOp = (PostProcSelM == 2'b10);
assign DivOp = (PostProcSelM == 2'b01);
assign Sqrt = FOpCtrlM[0];
// is there an input of infinity or NaN being used
assign InfIn = (XInfM&~(IntToFp&CvtOp))|(YInfM&~CvtOp)|(ZInfM&FmaOp);
assign NaNIn = (XNaNM&~(IntToFp&CvtOp))|(YNaNM&~CvtOp)|(ZNaNM&FmaOp);
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: FmtM contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? FmtM : (FOpCtrlM[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp|~CvtOp ? FmtM : FOpCtrlM[1:0];
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCalcExpM, .CvtResDenormUfM, .XManM, .CvtLzcInM,
.XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp,
.ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn);
always_comb
case(PostProcSelM)
2'b10: begin // fma
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(3*`NF+7){1'b0}}, FmaShiftAmt};
ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}};
end
2'b00: begin // cvt
ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM};
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}};
end
2'b01: begin //div
ShiftAmt = 0;//{DivShiftAmt};
ShiftIn = 0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn};
end
default: begin
ShiftAmt = 0;
ShiftIn = 0;
end
endcase
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
lzacorrection lzacorrection(.FmaOp, .KillProdM, .PreResultDenorm, .ConvNormSumExp,
.SumZero, .Shifted, .SumExp, .CorrShifted);
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// round to zero
// round to -infinity
// round to infinity
// round to nearest max magnitude
round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM,
.InvZM, .ResSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf,
.UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.FrmM, .PSgnM, .PostProcSelM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky,
.ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, .CvtResSgnM, .ResSgn);
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
flags flags(.XSNaNM, .YSNaNM, .ZSNaNM, .XInfM, .YInfM, .ZInfM, .InfIn, .XZeroM, .YZeroM,
.XSgnM, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCalcExpM,
.XNaNM, .YNaNM, .NaNIn, .ZSgnEffM, .PSgnM, .Round,
.UfLSBRes, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullResExp, .Plus1,
.RoundExp, .NegResMSBS, .Invalid, .Overflow, .Underflow, .PostProcFlgM);
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
resultselect resultselect(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .ZZeroM, .XZeroM,
.IntZeroM, .FrmM, .OutFmt, .AddendStickyM, .KillProdM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .CvtResUf,
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .NegResMSBS,
.FullResExp, .Shifted, .CvtCalcExpM, .ResSgn, .ResExp, .ResFrac, .PostProcResM, .FCvtIntResM);
endmodule

View file

@ -0,0 +1,282 @@
`include "wally-config.vh"
module resultselect(
input logic XSgnM, // input signs
input logic [`NE-1:0] ZExpM, // input exponents
input logic [`NF:0] XManM, YManM, ZManM, // input mantissas
input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn,
input logic XZeroM,
input logic IntZeroM,
input logic NaNIn,
input logic IntToFp,
input logic Int64,
input logic Signed,
input logic CvtOp,
input logic [`NORMSHIFTSZ-1:0] Shifted, // is the sum zero
input logic FmaOp,
input logic Plus1,
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
input logic AddendStickyM, // sticky bit that is calculated during alignment
input logic KillProdM, // set the product to zero before addition if the product is too small to matter
input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN
input logic ZDenormM, // is the original precision denormalized
input logic ZZeroM,
input logic ResSgn, // the res's sign
input logic [`FLEN:0] RoundAdd, // how much to add to the res
input logic Invalid, Overflow, // flags
input logic CvtResUf,
input logic [`NE-1:0] ResExp, // Res exponent
input logic [`NE+1:0] FullResExp, // Res exponent
input logic [`NF-1:0] ResFrac, // Res fraction
output logic [`FLEN-1:0] PostProcResM, // final res
output logic [1:0] NegResMSBS,
output logic [`XLEN-1:0] FCvtIntResM // final res
);
logic [`FLEN-1:0] XNaNRes, YNaNRes, ZNaNRes, InvalidRes, OfRes, KillProdRes, UfRes, NormRes; // possible results
logic OfResMax;
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
logic [`XLEN+1:0] NegRes; // the negation of the result
logic KillRes;
// does the overflow result output the maximum normalized floating point number
// output infinity if the input is infinity
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResSgn) | (FrmM[1:0]==2'b11&ResSgn));
if (`FPSIZES == 1) begin
//NaN res selection depending on standard
if(`IEEE754) begin
assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
assign OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
assign KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
assign UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
assign NormRes = {ResSgn, ResExp, ResFrac};
end else if (`FPSIZES == 2) begin //will the format conversion in killprod work in other conversions?
if(`IEEE754) begin
assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
assign OfRes = OutFmt ? OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}} :
OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
assign KillProdRes = OutFmt ? {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
assign UfRes = OutFmt ? {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]} : {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
assign NormRes = OutFmt ? {ResSgn, ResExp, ResFrac} : {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdRes = {ResSgn, {ZExpM[`NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
NormRes = {ResSgn, ResExp, ResFrac};
end
`FMT1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF1]};
YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF1]};
ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF1]};
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResSgn, {`NE1{1'b1}}, (`NF1)'(0)};
KillProdRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})};
UfRes = {{`FLEN-`LEN1{1'b1}}, {ResSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`LEN1{1'b1}}, ResSgn, ResExp[`NE1-1:0], ResFrac[`NF-1:`NF-`NF1]};
end
`FMT2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, XManM[`NF-2:`NF-`NF2]};
YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, YManM[`NF-2:`NF-`NF2]};
ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`NF2]};
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResSgn, {`NE2{1'b1}}, (`NF2)'(0)};
KillProdRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})};
UfRes = {{`FLEN-`LEN2{1'b1}}, {ResSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`LEN2{1'b1}}, ResSgn, ResExp[`NE2-1:0], ResFrac[`NF-1:`NF-`NF2]};
end
default: begin
if(`IEEE754) begin
XNaNRes = (`FLEN)'(0);
YNaNRes = (`FLEN)'(0);
ZNaNRes = (`FLEN)'(0);
InvalidRes = (`FLEN)'(0);
end else begin
InvalidRes = (`FLEN)'(0);
end
OfRes = (`FLEN)'(0);
KillProdRes = (`FLEN)'(0);
UfRes = (`FLEN)'(0);
NormRes = (`FLEN)'(0);
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, XManM[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, YManM[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, ZManM[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {ResSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResSgn, {`NE{1'b1}}, {`NF{1'b0}}};
KillProdRes = {ResSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})};
UfRes = {ResSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),Plus1&FrmM[1]};
NormRes = {ResSgn, ResExp, ResFrac};
end
2'h1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`D_NF]};
YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`D_NF]};
ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`D_NF]};
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResSgn, {`D_NE{1'b1}}, (`D_NF)'(0)};
KillProdRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`D_LEN{1'b1}}, {ResSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`D_LEN{1'b1}}, ResSgn, ResExp[`D_NE-1:0], ResFrac[`NF-1:`NF-`D_NF]};
end
2'h0: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`S_NF]};
YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`S_NF]};
ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`S_NF]};
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResSgn, {`S_NE{1'b1}}, (`S_NF)'(0)};
KillProdRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`S_LEN{1'b1}}, {ResSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`S_LEN{1'b1}}, ResSgn, ResExp[`S_NE-1:0], ResFrac[`NF-1:`NF-`S_NF]};
end
2'h2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, XManM[`NF-2:`NF-`H_NF]};
YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, YManM[`NF-2:`NF-`H_NF]};
ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, ZManM[`NF-2:`NF-`H_NF]};
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)};
KillProdRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~(ZDenormM|ZZeroM), ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})};
UfRes = {{`FLEN-`H_LEN{1'b1}}, {ResSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), Plus1&FrmM[1]}};
NormRes = {{`FLEN-`H_LEN{1'b1}}, ResSgn, ResExp[`H_NE-1:0], ResFrac[`NF-1:`NF-`H_NF]};
end
endcase
end
// determine if you shoould kill the res - Cvt
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZeroM&~IntToFp)|(IntZeroM&IntToFp)) : FullResExp[`NE+1];//Underflow & ~ResDenorm & (ResExp!=1);
if(`IEEE754) begin
assign PostProcResM = XNaNM&~(IntToFp&CvtOp) ? XNaNRes :
YNaNM&~CvtOp ? YNaNRes :
ZNaNM&FmaOp ? ZNaNRes :
Invalid ? InvalidRes :
Overflow|InfIn ? OfRes :
KillProdM&FmaOp ? KillProdRes :
KillRes ? UfRes :
NormRes;
end else begin
assign PostProcResM = NaNIn|Invalid ? InvalidRes :
Overflow|InfIn ? OfRes :
KillProdM&FmaOp ? KillProdRes :
KillRes ? UfRes :
NormRes;
end
///////////////////////////////////////////////////////////////////////////////////////
//
// ||||||||||| ||| ||| |||||||||||||
// ||| |||||| ||| |||
// ||| ||| ||| ||| |||
// ||| ||| |||||| |||
// ||||||||||| ||| ||| |||
//
///////////////////////////////////////////////////////////////////////////////////////
// *** probably can optimize the negation
// select the overflow integer res
// - negitive infinity and out of range negitive input
// | int | long |
// signed | -2^31 | -2^63 |
// unsigned | 0 | 0 |
//
// - positive infinity and out of range negitive input and NaNs
// | int | long |
// signed | 2^31-1 | 2^63-1 |
// unsigned | 2^32-1 | 2^64-1 |
//
// other: 32 bit unsinged res should be sign extended as if it were a signed number
assign OfIntRes = Signed ? XSgnM&~XNaNM ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive
Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive
XSgnM&~XNaNM ? {`XLEN{1'b0}} : // unsigned negitive
{`XLEN{1'b1}};// unsigned positive
// round and negate the positive res if needed
assign NegRes = XSgnM ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1};
//*** false critical path probably
assign NegResMSBS = Signed ? Int64 ? NegRes[`XLEN:`XLEN-1] : NegRes[32:31] :
Int64 ? NegRes[`XLEN+1:`XLEN] : NegRes[33:32];
// select the integer output
// - if the input is invalid (out of bounds NaN or Inf) then output overflow res
// - if the input underflows
// - if rounding and signed opperation and negitive input, output -1
// - otherwise output a rounded 0
// - otherwise output the normal res (trmined and sign extended if nessisary)
assign FCvtIntResM = Invalid ? OfIntRes :
CvtCalcExpM[`NE] ? XSgnM&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point??
Int64 ? NegRes[`XLEN-1:0] : {{`XLEN-32{NegRes[31]}}, NegRes[31:0]};
endmodule

View file

@ -0,0 +1,50 @@
`include "wally-config.vh"
module resultsign(
input logic [2:0] FrmM,
input logic PSgnM, ZSgnEffM,
input logic InvZM,
input logic ZInfM,
input logic InfIn,
input logic NegSumM,
input logic [1:0] PostProcSelM,
input logic [`NE+1:0] SumExp,
input logic SumZero,
input logic Mult,
input logic Round,
input logic Sticky,
input logic CvtResSgnM,
output logic ResSgn
);
logic ZeroSgn;
logic InfSgn;
logic FmaResSgn;
logic FmaResSgnTmp;
logic Underflow;
// logic ResultSgnTmp;
// Determine the sign if the sum is zero
// if cancelation then 0 unless round to -infinity
// if multiply then Psgn
// otherwise psign
assign Underflow = SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky));
assign ZeroSgn = (PSgnM^ZSgnEffM)&~Underflow&~Mult ? FrmM[1:0] == 2'b10 : PSgnM;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign FmaResSgnTmp = InvZM&(ZSgnEffM)&NegSumM | InvZM&PSgnM&~NegSumM | (ZSgnEffM&PSgnM);
assign InfSgn = ZInfM ? ZSgnEffM : PSgnM;
assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp;
always_comb
case(PostProcSelM)
2'b10: ResSgn = FmaResSgn; // fma
2'b00: ResSgn = CvtResSgnM; // cvt
2'b01: ResSgn = 0; // divide
default: ResSgn = 1'bx;
endcase
endmodule

316
pipelined/src/fpu/round.sv Normal file
View file

@ -0,0 +1,316 @@
`include "wally-config.vh"
// what position is XLEN in?
// options:
// 1: XLEN > NF > NF1
// 2: NF > XLEN > NF1
// 3: NF > NF1 > XLEN
// single and double will always be smaller than XLEN
`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
module round(
input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single
input logic [2:0] FrmM, // rounding mode
input logic FmaOp,
input logic [1:0] PostProcSelM,
input logic CvtResDenormUfM,
input logic ToInt,
input logic CvtOp,
input logic CvtResUf,
input logic [`CORRSHIFTSZ-1:0] CorrShifted,
input logic AddendStickyM, // addend's sticky bit
input logic ZZeroM, // is Z zero
input logic InvZM, // invert Z
input logic [`NE+1:0] SumExp, // exponent of the normalized sum
input logic ResSgn, // the result's sign
input logic [`NE:0] CvtCalcExpM, // the calculated expoent
output logic UfPlus1, // do you add or subtract on from the result
output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow
output logic [`NF-1:0] ResFrac, // Result fraction
output logic [`NE-1:0] ResExp, // Result exponent
output logic Sticky, // sticky bit
output logic [`NE+1:0] RoundExp,
output logic Plus1,
output logic [`FLEN:0] RoundAdd, // how much to add to the result
output logic Round, UfLSBRes // bits needed to calculate rounding
);
logic LSBRes; // bit used for rounding - least significant bit of the normalized sum
logic SubBySmallNum, UfSubBySmallNum; // was there supposed to be a subtraction by a small number
logic UfCalcPlus1, CalcMinus1, Minus1; // do you add or subtract on from the result
logic NormSumSticky; // normalized sum's sticky bit
logic UfSticky; // sticky bit for underlow calculation
logic [`NF-1:0] RoundFrac;
logic FpRes, IntRes;
logic UfRound;
logic FpRound, FpLSBRes, FpUfRound;
logic CalcPlus1, FpPlus1;
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// {Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - plus 1 otherwise
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - Plus 1 otherwise
assign IntRes = CvtOp & ToInt;
assign FpRes = ~IntRes;
// sticky bit calculation
if (`FPSIZES == 1) begin
// 1: XLEN > NF
// | XLEN |
// | NF |1|1|
// ^ ^ if floating point result
// ^ if not an FMA result
if (`XLENPOS == 1)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN
if (`XLENPOS == 2)assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 2) begin
// XLEN is either 64 or 32
// so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 3) begin
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 4) begin
// Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
// 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (`XLENPOS == 3) assign NormSumSticky = (|CorrShifted[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|CorrShifted[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|CorrShifted[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|CorrShifted[`CORRSHIFTSZ-`Q_NF-2:0]);
end
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign UfSticky = AddendStickyM&FmaOp | NormSumSticky | CvtResUf&CvtOp | SumExp[`NE+1]&FmaOp;
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (`FPSIZES == 1) begin
assign FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
assign FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
assign FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
end else if (`FPSIZES == 2) begin
assign FpRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-1] : CorrShifted[`CORRSHIFTSZ-`NF1-1];
assign FpLSBRes = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF] : CorrShifted[`CORRSHIFTSZ-`NF1];
assign FpUfRound = OutFmt ? CorrShifted[`CORRSHIFTSZ-`NF-2] : CorrShifted[`CORRSHIFTSZ-`NF1-2];
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF-2];
end
`FMT1: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`NF1-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF1];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF1-2];
end
`FMT2: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`NF2-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`NF2];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`NF2-2];
end
default: begin
FpRound = 1'bx;
FpLSBRes = 1'bx;
FpUfRound = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`Q_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`Q_NF-2];
end
2'h1: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`D_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`D_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`D_NF-2];
end
2'h0: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`S_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`S_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`S_NF-2];
end
2'h2: begin
FpRound = CorrShifted[`CORRSHIFTSZ-`H_NF-1];
FpLSBRes = CorrShifted[`CORRSHIFTSZ-`H_NF];
FpUfRound = CorrShifted[`CORRSHIFTSZ-`H_NF-2];
end
endcase
end
assign Round = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-1] : FpRound;
assign LSBRes = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN] : FpLSBRes;
assign UfRound = ToInt&CvtOp ? CorrShifted[`CORRSHIFTSZ-`XLEN-2] : FpUfRound;
// used to determine underflow flag
assign UfLSBRes = FpRound;
// determine sticky
assign Sticky = UfSticky | UfRound;
// Deterimine if a small number was supposed to be subtrated - For Fma calculation only
assign SubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky|UfRound) & ~ZZeroM & FmaOp;
assign UfSubBySmallNum = AddendStickyM & InvZM & ~(NormSumSticky) & ~ZZeroM & FmaOp;
always_comb begin
// Determine if you add 1
case (FrmM)
3'b000: CalcPlus1 = Round & ((Sticky| LSBRes)&~SubBySmallNum);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = ResSgn & ~(SubBySmallNum & ~Round);//round down
3'b011: CalcPlus1 = ~ResSgn & ~(SubBySmallNum & ~Round);//round up
3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (FrmM)
3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBRes)&~UfSubBySmallNum);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = ResSgn & ~(UfSubBySmallNum & ~UfRound);//round down
3'b011: UfCalcPlus1 = ~ResSgn & ~(UfSubBySmallNum & ~UfRound);//round up
3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
// Determine if you subtract 1
case (FrmM)
3'b000: CalcMinus1 = 0;//round to nearest even
3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero
3'b010: CalcMinus1 = ~ResSgn & ~Round & SubBySmallNum;//round down
3'b011: CalcMinus1 = ResSgn & ~Round & SubBySmallNum;//round up
3'b100: CalcMinus1 = 0;//round to nearest max magnitude
default: CalcMinus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky | Round);
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
assign UfPlus1 = UfCalcPlus1 & Sticky; // UfRound is part of sticky
assign Minus1 = CalcMinus1 & (Sticky | Round);
// Compute rounded result
if (`FPSIZES == 1) begin
assign RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{`FLEN{1'b0}}, FpPlus1};
end else if (`FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = OutFmt ? Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1} :
Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
end else if (`FPSIZES == 3) begin
always_comb begin
case (OutFmt)
`FMT: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
`FMT1: RoundAdd = Minus1 ? {{`NE+2+`NF1{1'b1}}, (`FLEN-1-`NE-`NF1)'(0)} : {(`NE+1+`NF1)'(0), FpPlus1, (`FLEN-1-`NE-`NF1)'(0)};
`FMT2: RoundAdd = Minus1 ? {{`NE+2+`NF2{1'b1}}, (`FLEN-1-`NE-`NF2)'(0)} : {(`NE+1+`NF2)'(0), FpPlus1, (`FLEN-1-`NE-`NF2)'(0)};
default: RoundAdd = (`FLEN+1)'(0);
endcase
end
end else if (`FPSIZES == 4) begin
always_comb begin
case (OutFmt)
2'h3: RoundAdd = Minus1 ? {`FLEN+1{1'b1}} : {{{`FLEN{1'b0}}}, FpPlus1};
2'h1: RoundAdd = Minus1 ? {{`NE+2+`D_NF{1'b1}}, (`FLEN-1-`NE-`D_NF)'(0)} : {(`NE+1+`D_NF)'(0), FpPlus1, (`FLEN-1-`NE-`D_NF)'(0)};
2'h0: RoundAdd = Minus1 ? {{`NE+2+`S_NF{1'b1}}, (`FLEN-1-`NE-`S_NF)'(0)} : {(`NE+1+`S_NF)'(0), FpPlus1, (`FLEN-1-`NE-`S_NF)'(0)};
2'h2: RoundAdd = Minus1 ? {{`NE+2+`H_NF{1'b1}}, (`FLEN-1-`NE-`H_NF)'(0)} : {(`NE+1+`H_NF)'(0), FpPlus1, (`FLEN-1-`NE-`H_NF)'(0)};
endcase
end
end
// determine the result to be roundned
assign RoundFrac = CorrShifted[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
always_comb
case(PostProcSelM)
2'b10: RoundExp = SumExp; // fma
2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt
2'b01: RoundExp = 0; // divide
default: RoundExp = 0;
endcase
// round the result
// - if the fraction overflows one should be added to the exponent
assign {FullResExp, ResFrac} = {RoundExp, RoundFrac} + RoundAdd;
assign ResExp = FullResExp[`NE-1:0];
endmodule

View file

@ -61,6 +61,8 @@ module datapath (
(* mark_debug = "true" *) input logic RegWriteW,
input logic SquashSCW,
input logic [2:0] ResultSrcW,
input logic [`XLEN-1:0] FCvtIntResW,
input logic [1:0] FResSelW,
output logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
@ -120,14 +122,17 @@ module datapath (
flopenrc #(`XLEN) IFResultWReg(clk, reset, FlushW, ~StallW, IFResultM, IFResultW);
flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW);
flopen #(`XLEN) ReadDataWReg(clk, ~StallW, ReadDataM, ReadDataW);
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
// floating point interactions: fcvt, fp stores
if (`F_SUPPORTED) begin:fpmux
logic [`XLEN-1:0] IFCvtResultW;
mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, IFResultM);
mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE);
mux2 #(`XLEN) cvtresultmuxW(IFResultW, FCvtIntResW, ~FResSelW[1]&FResSelW[0], IFCvtResultW);
mux5 #(`XLEN) resultmuxW(IFCvtResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
end else begin:fpmux
assign IFResultM = IEUResultM; assign WriteDataE = ForwardedSrcBE;
mux5 #(`XLEN) resultmuxW(IFResultW, ReadDataW, CSRReadValW, MDUResultW, SCResultW, ResultSrcW, ResultW);
end
// handle Store Conditional result if atomic extension supported

View file

@ -61,6 +61,8 @@ module ieu (
// Writeback stage
input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MDUResultW,
input logic [1:0] FResSelW,
input logic [`XLEN-1:0] FCvtIntResW,
output logic [4:0] RdW,
output logic [`XLEN-1:0] ReadDataW,
// input logic [`XLEN-1:0] PCLinkW,
@ -105,8 +107,8 @@ module ieu (
.clk, .reset, .ImmSrcD, .InstrD, .StallE, .FlushE, .ForwardAE, .ForwardBE,
.ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .IllegalFPUInstrE,
.FWriteDataE, .PCE, .PCLinkE, .FlagsE, .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE,
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW,
.StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataE, .FResSelW,
.StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, .FCvtIntResW,
.CSRReadValW, .ReadDataM, .MDUResultW, .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW);
forward fw(

View file

@ -93,10 +93,12 @@ module wallypipelinedcore (
logic FWriteIntE;
logic [`XLEN-1:0] FWriteDataE;
logic [`XLEN-1:0] FIntResM;
logic [`XLEN-1:0] FCvtIntResW;
logic FDivBusyE;
logic IllegalFPUInstrD, IllegalFPUInstrE;
logic FRegWriteM;
logic FPUStallD;
logic [1:0] FResSelW;
logic [4:0] SetFflagsM;
// memory management unit signals
@ -230,6 +232,8 @@ module wallypipelinedcore (
.CSRReadValW, .ReadDataM, .MDUResultW,
.RdW, .ReadDataW,
.InstrValidM,
.FCvtIntResW,
.FResSelW,
// hazards
.StallD, .StallE, .StallM, .StallW,
@ -390,6 +394,8 @@ module wallypipelinedcore (
.FWriteIntE, // integer register write enable
.FWriteDataE, // Data to be written to memory
.FIntResM, // data to be written to integer register
.FCvtIntResW, // fp -> int conversion result to be stored in int register
.FResSelW, // fpu result selection
.FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage)
.IllegalFPUInstrD, // Is the instruction an illegal fpu instruction
.SetFflagsM // FPU flags (to privileged unit)

View file

@ -10,120 +10,64 @@ module testbenchfp;
parameter TEST="none";
string Tests[]; // list of tests to be run
string FmaRneTests[]; // list of FMA round to nearest even tests to run
string FmaRuTests[]; // list of FMA round up tests to run
string FmaRdTests[]; // list of FMA round down tests to run
string FmaRzTests[]; // list of FMA round twords zero
string FmaRnmTests[]; // list of FMA round to nearest max magnitude
logic [2:0] OpCtrl[]; // list of op controls
logic [2:0] Unit[]; // list of units being tested
logic WriteInt[]; // Is being written to integer resgiter
logic [2:0] Frm[4:0] = {3'b100, 3'b010, 3'b011, 3'b001, 3'b000}; // rounding modes: rne-000, rz-001, ru-011, rd-010, rnm-100
logic [1:0] Fmt[]; // list of formats for the other units
logic [1:0] FmaFmt[]; // list of formats for the FMA
logic clk=0;
logic [31:0] TestNum=0; // index for the test
logic [31:0] FmaTestNum=0; // index for the test
logic [31:0] OpCtrlNum=0; // index for OpCtrl
logic [31:0] errors=0; // how many errors
logic [31:0] VectorNum=0; // index for test vector
logic [31:0] FmaVectorNum=0; // index for test vector
logic [31:0] FrmNum=0; // index for rounding mode
logic [`FLEN*4+7:0] TestVectors[46464:0]; // list of test vectors
logic [`FLEN*4+7:0] FmaRneVectors[6133248:0]; // list of fma rne test vectors
logic [`FLEN*4+7:0] FmaRuVectors[6133248:0]; // list of fma ru test vectors
logic [`FLEN*4+7:0] FmaRdVectors[6133248:0]; // list of fma rd test vectors
logic [`FLEN*4+7:0] FmaRzVectors[6133248:0]; // list of fma rz test vectors
logic [`FLEN*4+7:0] FmaRnmVectors[6133248:0]; // list of fma rnm test vectors
logic [`FLEN*4+7:0] TestVectors[6133248:0]; // list of test vectors
logic [1:0] FmaFmtVal, FmtVal; // value of the current Fmt
logic [1:0] FmtVal; // value of the current Fmt
logic [2:0] UnitVal, OpCtrlVal, FrmVal; // vlaue of the currnet Unit/OpCtrl/FrmVal
logic WriteIntVal; // value of the current WriteInt
logic [`FLEN-1:0] X, Y, Z; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRneX, FmaRneY, FmaRneZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRzX, FmaRzY, FmaRzZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRuX, FmaRuY, FmaRuZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRdX, FmaRdY, FmaRdZ; // inputs read from TestFloat
logic [`FLEN-1:0] FmaRnmX, FmaRnmY, FmaRnmZ; // inputs read from TestFloat
logic [`XLEN-1:0] SrcA; // integer input
logic [`FLEN-1:0] Ans; // correct answer from TestFloat
logic [`FLEN-1:0] FmaRneAns, FmaRzAns, FmaRuAns, FmaRdAns, FmaRnmAns; // flags read form testfloat
logic [`FLEN-1:0] Res; // result from other units
logic [`FLEN-1:0] FmaRneRes, FmaRzRes, FmaRuRes, FmaRdRes, FmaRnmRes; // results from FMA
logic [4:0] AnsFlg; // correct flags read from testfloat
logic [4:0] FmaRneAnsFlg, FmaRzAnsFlg, FmaRuAnsFlg, FmaRdAnsFlg, FmaRnmAnsFlg; // flags read form testfloat
logic [4:0] ResFlg; // Result flags
logic [4:0] FmaRneResFlg, FmaRzResFlg, FmaRuResFlg, FmaRdResFlg, FmaRnmResFlg; // flags read form testfloat
logic [`FMTBITS-1:0] ModFmt, FmaModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
logic [`FLEN-1:0] FmaRes, DivRes, CmpRes, CvtRes; // Results from each unit
logic [`XLEN-1:0] CvtIntRes; // Results from each unit
logic [4:0] ResFlg, Flg; // Result flags
logic [`FMTBITS-1:0] ModFmt; // format - 10 = half, 00 = single, 01 = double, 11 = quad
logic [`FLEN-1:0] FpRes, FpCmpRes; // Results from each unit
logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit
logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags
logic ResNaN, FmaRneResNaN, FmaRzResNaN, FmaRuResNaN, FmaRdResNaN, FmaRnmResNaN; // is the outputed result NaN
logic AnsNaN, FmaRneAnsNaN, FmaRzAnsNaN, FmaRuAnsNaN, FmaRdAnsNaN, FmaRnmAnsNaN; // is the correct answer NaN
logic NaNGood, FmaRneNaNGood, FmaRzNaNGood, FmaRuNaNGood, FmaRdNaNGood, FmaRnmNaNGood; // is the NaN answer correct
logic AnsNaN, ResNaN, NaNGood;
logic XSgn, YSgn, ZSgn; // sign of the inputs
logic FmaRneXSgn, FmaRneYSgn, FmaRneZSgn;
logic FmaRzXSgn, FmaRzYSgn, FmaRzZSgn;
logic FmaRuXSgn, FmaRuYSgn, FmaRuZSgn;
logic FmaRdXSgn, FmaRdYSgn, FmaRdZSgn;
logic FmaRnmXSgn, FmaRnmYSgn, FmaRnmZSgn;
logic [`NE-1:0] XExp, YExp, ZExp; // exponent of the inputs
logic [`NE-1:0] FmaRneXExp, FmaRneYExp, FmaRneZExp;
logic [`NE-1:0] FmaRzXExp, FmaRzYExp, FmaRzZExp;
logic [`NE-1:0] FmaRuXExp, FmaRuYExp, FmaRuZExp;
logic [`NE-1:0] FmaRdXExp, FmaRdYExp, FmaRdZExp;
logic [`NE-1:0] FmaRnmXExp, FmaRnmYExp, FmaRnmZExp;
logic [`NF:0] XMan, YMan, ZMan; // mantissas of the inputs
logic [`NF:0] FmaRneXMan, FmaRneYMan, FmaRneZMan;
logic [`NF:0] FmaRzXMan, FmaRzYMan, FmaRzZMan;
logic [`NF:0] FmaRuXMan, FmaRuYMan, FmaRuZMan;
logic [`NF:0] FmaRdXMan, FmaRdYMan, FmaRdZMan;
logic [`NF:0] FmaRnmXMan, FmaRnmYMan, FmaRnmZMan;
logic XNaN, YNaN, ZNaN; // is the input NaN
logic FmaRneXNaN, FmaRneYNaN, FmaRneZNaN;
logic FmaRzXNaN, FmaRzYNaN, FmaRzZNaN;
logic FmaRuXNaN, FmaRuYNaN, FmaRuZNaN;
logic FmaRdXNaN, FmaRdYNaN, FmaRdZNaN;
logic FmaRnmXNaN, FmaRnmYNaN, FmaRnmZNaN;
logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN
logic FmaRneXSNaN, FmaRneYSNaN, FmaRneZSNaN;
logic FmaRzXSNaN, FmaRzYSNaN, FmaRzZSNaN;
logic FmaRuXSNaN, FmaRuYSNaN, FmaRuZSNaN;
logic FmaRdXSNaN, FmaRdYSNaN, FmaRdZSNaN;
logic FmaRnmXSNaN, FmaRnmYSNaN, FmaRnmZSNaN;
logic XDenorm, ZDenorm; // is the input denormalized
logic FmaRneXDenorm, FmaRneZDenorm;
logic FmaRzXDenorm, FmaRzZDenorm;
logic FmaRuXDenorm, FmaRuZDenorm;
logic FmaRdXDenorm, FmaRdZDenorm;
logic FmaRnmXDenorm, FmaRnmZDenorm;
logic XInf, YInf, ZInf; // is the input infinity
logic FmaRneXInf, FmaRneYInf, FmaRneZInf;
logic FmaRzXInf, FmaRzYInf, FmaRzZInf;
logic FmaRuXInf, FmaRuYInf, FmaRuZInf;
logic FmaRdXInf, FmaRdYInf, FmaRdZInf;
logic FmaRnmXInf, FmaRnmYInf, FmaRnmZInf;
logic XZero, YZero, ZZero; // is the input zero
logic FmaRneXZero, FmaRneYZero, FmaRneZZero;
logic FmaRzXZero, FmaRzYZero, FmaRzZZero;
logic FmaRuXZero, FmaRuYZero, FmaRuZZero;
logic FmaRdXZero, FmaRdYZero, FmaRdZZero;
logic FmaRnmXZero, FmaRnmYZero, FmaRnmZZero;
logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones
logic [`LGLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder)
logic IntZeroE;
logic CvtResSgnE;
logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5;
logic [`NE:0] CvtCalcExpE; // the calculated expoent
logic [`LOGLGLEN-1:0] CvtShiftAmtE; // how much to shift by
logic CvtResDenormUfE;
// in-between FMA signals
logic Mult;
logic [`NE+1:0] ProdExpE, FmaRneProdExp, FmaRzProdExp, FmaRuProdExp, FmaRdProdExp, FmaRnmProdExp;
logic AddendStickyE, FmaRneAddendSticky, FmaRzAddendSticky, FmaRuAddendSticky, FmaRdAddendSticky, FmaRnmAddendSticky;
logic KillProdE, FmaRneKillProd, FmaRzKillProd, FmaRuKillProd, FmaRdKillProd, FmaRnmKillProd;
logic [$clog2(3*`NF+7)-1:0] NormCntE, FmaRneNormCnt, FmaRzNormCnt, FmaRuNormCnt, FmaRdNormCnt, FmaRnmNormCnt;
logic [3*`NF+5:0] SumE, FmaRneSum, FmaRzSum, FmaRuSum, FmaRdSum, FmaRnmSum;
logic InvZE, FmaRneInvZ, FmaRzInvZ, FmaRuInvZ, FmaRdInvZ, FmaRnmInvZ;
logic NegSumE, FmaRneNegSum, FmaRzNegSum, FmaRuNegSum, FmaRdNegSum, FmaRnmNegSum;
logic ZSgnEffE, FmaRneZSgnEff, FmaRzZSgnEff, FmaRuZSgnEff, FmaRdZSgnEff, FmaRnmZSgnEff;
logic PSgnE, FmaRnePSgn, FmaRzPSgn, FmaRuPSgn, FmaRdPSgn, FmaRnmPSgn;
logic [`NE+1:0] ProdExpE;
logic AddendStickyE;
logic KillProdE;
logic [$clog2(3*`NF+7)-1:0] FmaNormCntE;
logic [3*`NF+5:0] SumE;
logic InvZE;
logic NegSumE;
logic ZSgnEffE;
logic PSgnE;
///////////////////////////////////////////////////////////////////////////////////////////////
@ -282,15 +226,13 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if fused-mutliply-add is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f128_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f128_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f128_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f128_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f128_mulAdd_rnm.tv"};
// add the format for the Fma
FmaFmt = {FmaFmt, 2'b11};
Tests = {Tests, f128fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b11};
end
end
end
if (`D_SUPPORTED) begin // if double precision is supported
@ -411,14 +353,13 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if the fused multiply add is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f64_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f64_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f64_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f64_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f64_mulAdd_rnm.tv"};
FmaFmt = {FmaFmt, 2'b01};
Tests = {Tests, f64fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b01};
end
end
end
if (`F_SUPPORTED) begin // if single precision being supported
@ -523,14 +464,13 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f32_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f32_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f32_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f32_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f32_mulAdd_rnm.tv"};
FmaFmt = {FmaFmt, 2'b00};
Tests = {Tests, f32fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b00};
end
end
end
if (`ZFH_SUPPORTED) begin // if half precision supported
@ -617,19 +557,18 @@ module testbenchfp;
// end
// end
if (TEST === "fma" | TEST === "all") begin // if fma is being tested
// add each rounding mode to it's own list of tests
// - fma tests are very long, so run all rounding modes in parallel
FmaRneTests = {FmaRneTests, "f16_mulAdd_rne.tv"};
FmaRzTests = {FmaRzTests, "f16_mulAdd_rz.tv"};
FmaRuTests = {FmaRuTests, "f16_mulAdd_ru.tv"};
FmaRdTests = {FmaRdTests, "f16_mulAdd_rd.tv"};
FmaRnmTests = {FmaRnmTests, "f16_mulAdd_rnm.tv"};
FmaFmt = {FmaFmt, 2'b10};
Tests = {Tests, f16fma};
OpCtrl = {OpCtrl, `FMA_OPCTRL};
WriteInt = {WriteInt, 1'b0};
for(int i = 0; i<5; i++) begin
Unit = {Unit, `FMAUNIT};
Fmt = {Fmt, 2'b10};
end
end
end
// check if nothing is being tested
if (Tests.size() == 0 & FmaRneTests.size() == 0 & FmaRuTests.size() == 0 & FmaRdTests.size() == 0 & FmaRzTests.size() == 0 & FmaRnmTests.size() == 0) begin
if (Tests.size() == 0) begin
$display("TEST %s not supported in this configuration", TEST);
$stop;
end
@ -648,26 +587,17 @@ module testbenchfp;
// Read the first test
initial begin
$display("\n\nRunning %s vectors", Tests[TestNum]);
$display("Running FMA precision %d", FmaTestNum);
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
// set the test index to 0
TestNum = 0;
FmaTestNum = 0;
end
// set a the signals for all tests
always_comb FmaFmtVal = FmaFmt[FmaTestNum];
always_comb UnitVal = Unit[TestNum];
always_comb FmtVal = Fmt[TestNum];
always_comb OpCtrlVal = OpCtrl[OpCtrlNum];
always_comb WriteIntVal = WriteInt[OpCtrlNum];
always_comb FrmVal = Frm[FrmNum];
assign Mult = OpCtrlVal === 3'b100;
// modify the format signal if only 2 percisions supported
// - 1 for the larger precision
@ -675,61 +605,9 @@ module testbenchfp;
always_comb begin
if(`FMTBITS == 1) ModFmt = FmtVal == `FMT;
else ModFmt = FmtVal;
if(`FMTBITS == 1) FmaModFmt = FmaFmtVal == `FMT;
else FmaModFmt = FmaFmtVal;
end
// extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector
readfmavectors readfmarnevectors (.clk, .TestVector(FmaRneVectors[FmaVectorNum]), .Ans(FmaRneAns), .AnsFlg(FmaRneAnsFlg),
.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
.XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN),
.XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN),
.XDenormE(FmaRneXDenorm), .ZDenormE(FmaRneZDenorm),
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
.XInfE(FmaRneXInf), .YInfE(FmaRneYInf), .ZInfE(FmaRneZInf), .FmaModFmt, .FmaFmt(FmaFmtVal),
.X(FmaRneX), .Y(FmaRneY), .Z(FmaRneZ));
readfmavectors readfmarzvectors (.clk, .TestVector(FmaRzVectors[FmaVectorNum]), .Ans(FmaRzAns), .AnsFlg(FmaRzAnsFlg),
.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaModFmt,
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
.XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN),
.XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN),
.XDenormE(FmaRzXDenorm), .ZDenormE(FmaRzZDenorm),
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
.XInfE(FmaRzXInf), .YInfE(FmaRzYInf), .ZInfE(FmaRzZInf), .FmaFmt(FmaFmtVal),
.X(FmaRzX), .Y(FmaRzY), .Z(FmaRzZ));
readfmavectors readfmaruvectors (.clk, .TestVector(FmaRuVectors[FmaVectorNum]), .Ans(FmaRuAns), .AnsFlg(FmaRuAnsFlg),
.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaModFmt,
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
.XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN),
.XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN),
.XDenormE(FmaRuXDenorm), .ZDenormE(FmaRuZDenorm),
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
.XInfE(FmaRuXInf), .YInfE(FmaRuYInf), .ZInfE(FmaRuZInf), .FmaFmt(FmaFmtVal),
.X(FmaRuX), .Y(FmaRuY), .Z(FmaRuZ));
readfmavectors readfmardvectors (.clk, .TestVector(FmaRdVectors[FmaVectorNum]), .Ans(FmaRdAns), .AnsFlg(FmaRdAnsFlg),
.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaModFmt,
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
.XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN),
.XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN),
.XDenormE(FmaRdXDenorm), .ZDenormE(FmaRdZDenorm),
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
.XInfE(FmaRdXInf), .YInfE(FmaRdYInf), .ZInfE(FmaRdZInf), .FmaFmt(FmaFmtVal),
.X(FmaRdX), .Y(FmaRdY), .Z(FmaRdZ));
readfmavectors readfmarnmvectors (.clk, .TestVector(FmaRnmVectors[FmaVectorNum]), .Ans(FmaRnmAns), .AnsFlg(FmaRnmAnsFlg),
.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaModFmt,
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
.XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN),
.XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN),
.XDenormE(FmaRnmXDenorm), .ZDenormE(FmaRnmZDenorm),
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
.XInfE(FmaRnmXInf), .YInfE(FmaRnmYInf), .ZInfE(FmaRnmZInf), .FmaFmt(FmaFmtVal),
.X(FmaRnmX), .Y(FmaRnmY), .Z(FmaRnmZ));
readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA,
.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal),
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal),
@ -754,124 +632,30 @@ module testbenchfp;
///////////////////////////////////////////////////////////////////////////////////////////////
// instantiate devices under test
// - one fma for each precison
// - all the units for the other tests (including fma for add/sub/mul)
fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn),
.XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp),
.XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan),
.XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ),
.NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn),
.ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd));
fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn),
.ZExpM(FmaRneZExp), .ZDenormM(FmaRneZDenorm),
.XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan),
.XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN),
.XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero),
.XInfM(FmaRneXInf), .YInfM(FmaRneYInf), .ZInfM(FmaRneZInf),
.XSNaNM(FmaRneXSNaN), .YSNaNM(FmaRneYSNaN), .ZSNaNM(FmaRneZSNaN),
.KillProdM(FmaRneSumKillProd), .AddendStickyM(FmaRneAddendSticky), .ProdExpM(FmaRneProdExp),
.SumM((FmaRneSum)), .NegSumM(FmaRneNegSum), .InvZM(FmaRneInvZ), .NormCntM(FmaRneNormCnt), .ZSgnEffM(FmaRneZSgnEff),
.PSgnM(FmaRnePSgn), .FmtM(FmaModFmt), .FrmM(`RNE),
.FMAFlgM(FmaRneResFlg), .FMAResM(FmaRneRes), .Mult(1'b0));
fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn),
.XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp),
.XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan),
.XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ),
.NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn),
.ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd));
fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn),
.ZExpM(FmaRzZExp), .ZDenormM(FmaRzZDenorm),
.XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan),
.XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN),
.XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero),
.XInfM(FmaRzXInf), .YInfM(FmaRzYInf), .ZInfM(FmaRzZInf),
.XSNaNM(FmaRzXSNaN), .YSNaNM(FmaRzYSNaN), .ZSNaNM(FmaRzZSNaN),
.KillProdM(FmaRzSumKillProd), .AddendStickyM(FmaRzAddendSticky), .ProdExpM(FmaRzProdExp),
.SumM((FmaRzSum)), .NegSumM(FmaRzNegSum), .InvZM(FmaRzInvZ), .NormCntM(FmaRzNormCnt), .ZSgnEffM(FmaRzZSgnEff),
.PSgnM(FmaRzPSgn), .FmtM(FmaModFmt), .FrmM(`RZ),
.FMAFlgM(FmaRzResFlg), .FMAResM(FmaRzRes), .Mult(1'b0));
fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn),
.XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp),
.XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan),
.XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ),
.NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn),
.ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd));
fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn),
.ZExpM(FmaRuZExp), .ZDenormM(FmaRuZDenorm),
.XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan),
.XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN),
.XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero),
.XInfM(FmaRuXInf), .YInfM(FmaRuYInf), .ZInfM(FmaRuZInf),
.XSNaNM(FmaRuXSNaN), .YSNaNM(FmaRuYSNaN), .ZSNaNM(FmaRuZSNaN),
.KillProdM(FmaRuSumKillProd), .AddendStickyM(FmaRuAddendSticky), .ProdExpM(FmaRuProdExp),
.SumM((FmaRuSum)), .NegSumM(FmaRuNegSum), .InvZM(FmaRuInvZ), .NormCntM(FmaRuNormCnt), .ZSgnEffM(FmaRuZSgnEff),
.PSgnM(FmaRuPSgn), .FmtM(FmaModFmt), .FrmM(`RU),
.FMAFlgM(FmaRuResFlg), .FMAResM(FmaRuRes), .Mult(1'b0));
fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn),
.XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp),
.XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan),
.XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ),
.NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn),
.ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd));
fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn),
.ZExpM(FmaRdZExp), .ZDenormM(FmaRdZDenorm),
.XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan),
.XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN),
.XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero),
.XInfM(FmaRdXInf), .YInfM(FmaRdYInf), .ZInfM(FmaRdZInf),
.XSNaNM(FmaRdXSNaN), .YSNaNM(FmaRdYSNaN), .ZSNaNM(FmaRdZSNaN),
.KillProdM(FmaRdSumKillProd), .AddendStickyM(FmaRdAddendSticky), .ProdExpM(FmaRdProdExp),
.SumM((FmaRdSum)), .NegSumM(FmaRdNegSum), .InvZM(FmaRdInvZ), .NormCntM(FmaRdNormCnt), .ZSgnEffM(FmaRdZSgnEff),
.PSgnM(FmaRdPSgn), .FmtM(FmaModFmt), .FrmM(`RD),
.FMAFlgM(FmaRdResFlg), .FMAResM(FmaRdRes), .Mult(1'b0));
fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn),
.XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp),
.XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan),
.XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero),
.FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ),
.NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn),
.ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd));
fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn),
.ZExpM(FmaRnmZExp), .ZDenormM(FmaRnmZDenorm),
.XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan),
.XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN),
.XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero),
.XInfM(FmaRnmXInf), .YInfM(FmaRnmYInf), .ZInfM(FmaRnmZInf),
.XSNaNM(FmaRnmXSNaN), .YSNaNM(FmaRnmYSNaN), .ZSNaNM(FmaRnmZSNaN),
.KillProdM(FmaRnmSumKillProd), .AddendStickyM(FmaRnmAddendSticky), .ProdExpM(FmaRnmProdExp),
.SumM((FmaRnmSum)), .NegSumM(FmaRnmNegSum), .InvZM(FmaRnmInvZ), .NormCntM(FmaRnmNormCnt), .ZSgnEffM(FmaRnmZSgnEff),
.PSgnM(FmaRnmPSgn), .FmtM(FmaModFmt), .FrmM(`RNM),
.FMAFlgM(FmaRnmResFlg), .FMAResM(FmaRnmRes), .Mult(1'b0));
fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
fma fma(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn),
.XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp),
.XManE(XMan), .YManE(YMan), .ZManE(ZMan),
.XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero),
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE,
.FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE,
.ProdExpE, .AddendStickyE, .KillProdE);
fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn),
.ZExpM(ZExp), .ZDenormM(ZDenorm),
.XManM(XMan), .YManM(YMan), .ZManM(ZMan),
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN),
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero),
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf),
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN),
postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]),
.ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal),
.XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE),
.XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE),
.XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE),
.XInfM(XInf), .YInfM(YInf), .ZInfM(ZInf), .CvtResSgnM(CvtResSgnE), .FWriteIntM(WriteIntVal),
.XSNaNM(XSNaN), .YSNaNM(YSNaN), .ZSNaNM(ZSNaN), .CvtLzcInM(CvtLzcInE), .IntZeroM(IntZeroE),
.KillProdM(KillProdE), .AddendStickyM(AddendStickyE), .ProdExpM(ProdExpE),
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .NormCntM(NormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.FMAFlgM(FmaFlg), .FMAResM(FmaRes), .Mult);
// fcvtfp fcvtfp (.XExpE(XExp), .XManE(XMan), .XSgnE(XSgn), .XZeroE(XZero), .XDenormE(XDenorm), .XInfE(XInf),
// .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtFpResE(CvtFpRes), .CvtFpFlgE(CvtFpFlg));
.SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal),
.PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes));
fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal),
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal),
.XInfE(XInf), .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt),
.CvtResE(CvtRes), .CvtIntResE(CvtIntRes), .CvtFlgE(CvtFlg));
.XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE,
.FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE);
fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp),
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpResE(CmpRes));
.XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes),
.XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes));
// fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf),
// .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal),
// .CvtRes, .CvtFlgE);
@ -900,60 +684,6 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
///////////////////////////////////////////////////////////////////////////////////////////////
//Check if the correct answer and result is a NaN
always_comb begin
case (FmaFmtVal)
4'b11: begin // quad
FmaRneAnsNaN = &FmaRneAns[`Q_LEN-2:`Q_NF]&(|FmaRneAns[`Q_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`Q_LEN-2:`Q_NF]&(|FmaRneRes[`Q_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`Q_LEN-2:`Q_NF]&(|FmaRzAns[`Q_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`Q_LEN-2:`Q_NF]&(|FmaRzRes[`Q_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`Q_LEN-2:`Q_NF]&(|FmaRuAns[`Q_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`Q_LEN-2:`Q_NF]&(|FmaRuRes[`Q_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`Q_LEN-2:`Q_NF]&(|FmaRdAns[`Q_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`Q_LEN-2:`Q_NF]&(|FmaRdRes[`Q_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`Q_LEN-2:`Q_NF]&(|FmaRnmAns[`Q_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`Q_LEN-2:`Q_NF]&(|FmaRnmRes[`Q_NF-1:0]);
end
4'b01: begin // double
FmaRneAnsNaN = &FmaRneAns[`D_LEN-2:`D_NF]&(|FmaRneAns[`D_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`D_LEN-2:`D_NF]&(|FmaRneRes[`D_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`D_LEN-2:`D_NF]&(|FmaRzAns[`D_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`D_LEN-2:`D_NF]&(|FmaRzRes[`D_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`D_LEN-2:`D_NF]&(|FmaRuAns[`D_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`D_LEN-2:`D_NF]&(|FmaRuRes[`D_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`D_LEN-2:`D_NF]&(|FmaRdAns[`D_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`D_LEN-2:`D_NF]&(|FmaRdRes[`D_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`D_LEN-2:`D_NF]&(|FmaRnmAns[`D_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`D_LEN-2:`D_NF]&(|FmaRnmRes[`D_NF-1:0]);
end
4'b00: begin // single
FmaRneAnsNaN = &FmaRneAns[`S_LEN-2:`S_NF]&(|FmaRneAns[`S_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`S_LEN-2:`S_NF]&(|FmaRneRes[`S_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`S_LEN-2:`S_NF]&(|FmaRzAns[`S_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`S_LEN-2:`S_NF]&(|FmaRzRes[`S_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`S_LEN-2:`S_NF]&(|FmaRuAns[`S_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`S_LEN-2:`S_NF]&(|FmaRuRes[`S_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`S_LEN-2:`S_NF]&(|FmaRdAns[`S_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`S_LEN-2:`S_NF]&(|FmaRdRes[`S_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`S_LEN-2:`S_NF]&(|FmaRnmAns[`S_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`S_LEN-2:`S_NF]&(|FmaRnmRes[`S_NF-1:0]);
end
4'b10: begin // half
FmaRneAnsNaN = &FmaRneAns[`H_LEN-2:`H_NF]&(|FmaRneAns[`H_NF-1:0]);
FmaRneResNaN = &FmaRneRes[`H_LEN-2:`H_NF]&(|FmaRneRes[`H_NF-1:0]);
FmaRzAnsNaN = &FmaRzAns[`H_LEN-2:`H_NF]&(|FmaRzAns[`H_NF-1:0]);
FmaRzResNaN = &FmaRzRes[`H_LEN-2:`H_NF]&(|FmaRzRes[`H_NF-1:0]);
FmaRuAnsNaN = &FmaRuAns[`H_LEN-2:`H_NF]&(|FmaRuAns[`H_NF-1:0]);
FmaRuResNaN = &FmaRuRes[`H_LEN-2:`H_NF]&(|FmaRuRes[`H_NF-1:0]);
FmaRdAnsNaN = &FmaRdAns[`H_LEN-2:`H_NF]&(|FmaRdAns[`H_NF-1:0]);
FmaRdResNaN = &FmaRdRes[`H_LEN-2:`H_NF]&(|FmaRdRes[`H_NF-1:0]);
FmaRnmAnsNaN = &FmaRnmAns[`H_LEN-2:`H_NF]&(|FmaRnmAns[`H_NF-1:0]);
FmaRnmResNaN = &FmaRnmRes[`H_LEN-2:`H_NF]&(|FmaRnmRes[`H_NF-1:0]);
end
endcase
end
always_comb begin
if(UnitVal === `CVTINTUNIT | UnitVal === `CMPUNIT) begin
// an integer output can't be a NaN
@ -1004,20 +734,20 @@ fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWr
always_comb begin
// select the result to check
case (UnitVal)
`FMAUNIT: Res = FmaRes;
`DIVUNIT: Res = DivRes;
`FMAUNIT: Res = FpRes;
`DIVUNIT: Res = FpRes;
`CMPUNIT: Res = CmpRes;
`CVTINTUNIT: if(WriteIntVal) Res = CvtIntRes; else Res = CvtRes;
`CVTFPUNIT: Res = CvtRes;
`CVTINTUNIT: if(WriteIntVal) Res = IntRes; else Res = FpRes;
`CVTFPUNIT: Res = FpRes;
endcase
// select the flag to check
case (UnitVal)
`FMAUNIT: ResFlg = FmaFlg;
`DIVUNIT: ResFlg = DivFlg;
`FMAUNIT: ResFlg = Flg;
`DIVUNIT: ResFlg = Flg;
`CMPUNIT: ResFlg = CmpFlg;
`CVTINTUNIT: ResFlg = CvtFlg;
`CVTFPUNIT: ResFlg = CvtFlg;
`CVTINTUNIT: ResFlg = Flg;
`CVTFPUNIT: ResFlg = Flg;
endcase
end
// check results on falling edge of clk
@ -1027,117 +757,6 @@ end
// check if the NaN value is good. IEEE754-2019 sections 6.3 and 6.2.3 specify:
// - the sign of the NaN does not matter for the opperations being tested
// - when 2 or more NaNs are inputed the NaN that is propigated doesn't matter
case (FmaFmtVal)
4'b11: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneX[`Q_LEN-2:`Q_NF],1'b1,FmaRneX[`Q_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneY[`Q_LEN-2:`Q_NF],1'b1,FmaRneY[`Q_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`Q_LEN-2:0] === {FmaRneZ[`Q_LEN-2:`Q_NF],1'b1,FmaRneZ[`Q_NF-2:0]})));
4'b01: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneX[`D_LEN-2:`D_NF],1'b1,FmaRneX[`D_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneY[`D_LEN-2:`D_NF],1'b1,FmaRneY[`D_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`D_LEN-2:0] === {FmaRneZ[`D_LEN-2:`D_NF],1'b1,FmaRneZ[`D_NF-2:0]})));
4'b00: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneX[`S_LEN-2:`S_NF],1'b1,FmaRneX[`S_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneY[`S_LEN-2:`S_NF],1'b1,FmaRneY[`S_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`S_LEN-2:0] === {FmaRneZ[`S_LEN-2:`S_NF],1'b1,FmaRneZ[`S_NF-2:0]})));
4'b10: FmaRneNaNGood =(((`IEEE754==0)&FmaRneAnsNaN&(FmaRneRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRneAnsFlg[4]&(FmaRneRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRneXNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneX[`H_LEN-2:`H_NF],1'b1,FmaRneX[`H_NF-2:0]})) |
(FmaRneYNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneY[`H_LEN-2:`H_NF],1'b1,FmaRneY[`H_NF-2:0]})) |
(FmaRneZNaN&(FmaRneRes[`H_LEN-2:0] === {FmaRneZ[`H_LEN-2:`H_NF],1'b1,FmaRneZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzX[`Q_LEN-2:`Q_NF],1'b1,FmaRzX[`Q_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzY[`Q_LEN-2:`Q_NF],1'b1,FmaRzY[`Q_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`Q_LEN-2:0] === {FmaRzZ[`Q_LEN-2:`Q_NF],1'b1,FmaRzZ[`Q_NF-2:0]})));
4'b01: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzX[`D_LEN-2:`D_NF],1'b1,FmaRzX[`D_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzY[`D_LEN-2:`D_NF],1'b1,FmaRzY[`D_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`D_LEN-2:0] === {FmaRzZ[`D_LEN-2:`D_NF],1'b1,FmaRzZ[`D_NF-2:0]})));
4'b00: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzX[`S_LEN-2:`S_NF],1'b1,FmaRzX[`S_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzY[`S_LEN-2:`S_NF],1'b1,FmaRzY[`S_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`S_LEN-2:0] === {FmaRzZ[`S_LEN-2:`S_NF],1'b1,FmaRzZ[`S_NF-2:0]})));
4'b10: FmaRzNaNGood = (((`IEEE754==0)&FmaRzAnsNaN&(FmaRzRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRzAnsFlg[4]&(FmaRzRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRzXNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzX[`H_LEN-2:`H_NF],1'b1,FmaRzX[`H_NF-2:0]})) |
(FmaRzYNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzY[`H_LEN-2:`H_NF],1'b1,FmaRzY[`H_NF-2:0]})) |
(FmaRzZNaN&(FmaRzRes[`H_LEN-2:0] === {FmaRzZ[`H_LEN-2:`H_NF],1'b1,FmaRzZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuX[`Q_LEN-2:`Q_NF],1'b1,FmaRuX[`Q_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuY[`Q_LEN-2:`Q_NF],1'b1,FmaRuY[`Q_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`Q_LEN-2:0] === {FmaRuZ[`Q_LEN-2:`Q_NF],1'b1,FmaRuZ[`Q_NF-2:0]})));
4'b01: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`Q_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuX[`D_LEN-2:`D_NF],1'b1,FmaRuX[`D_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuY[`D_LEN-2:`D_NF],1'b1,FmaRuY[`D_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`D_LEN-2:0] === {FmaRuZ[`D_LEN-2:`D_NF],1'b1,FmaRuZ[`D_NF-2:0]})));
4'b00: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuX[`S_LEN-2:`S_NF],1'b1,FmaRuX[`S_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuY[`S_LEN-2:`S_NF],1'b1,FmaRuY[`S_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`S_LEN-2:0] === {FmaRuZ[`S_LEN-2:`S_NF],1'b1,FmaRuZ[`S_NF-2:0]})));
4'b10: FmaRuNaNGood = (((`IEEE754==0)&FmaRuAnsNaN&(FmaRuRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRuAnsFlg[4]&(FmaRuRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRuXNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuX[`H_LEN-2:`H_NF],1'b1,FmaRuX[`H_NF-2:0]})) |
(FmaRuYNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuY[`H_LEN-2:`H_NF],1'b1,FmaRuY[`H_NF-2:0]})) |
(FmaRuZNaN&(FmaRuRes[`H_LEN-2:0] === {FmaRuZ[`H_LEN-2:`H_NF],1'b1,FmaRuZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdX[`Q_LEN-2:`Q_NF],1'b1,FmaRdX[`Q_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdY[`Q_LEN-2:`Q_NF],1'b1,FmaRdY[`Q_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`Q_LEN-2:0] === {FmaRdZ[`Q_LEN-2:`Q_NF],1'b1,FmaRdZ[`Q_NF-2:0]})));
4'b01: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdX[`D_LEN-2:`D_NF],1'b1,FmaRdX[`D_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdY[`D_LEN-2:`D_NF],1'b1,FmaRdY[`D_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`D_LEN-2:0] === {FmaRdZ[`D_LEN-2:`D_NF],1'b1,FmaRdZ[`D_NF-2:0]})));
4'b00: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdX[`S_LEN-2:`S_NF],1'b1,FmaRdX[`S_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdY[`S_LEN-2:`S_NF],1'b1,FmaRdY[`S_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`S_LEN-2:0] === {FmaRdZ[`S_LEN-2:`S_NF],1'b1,FmaRdZ[`S_NF-2:0]})));
4'b10: FmaRdNaNGood = (((`IEEE754==0)&FmaRdAnsNaN&(FmaRdRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRdAnsFlg[4]&(FmaRdRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRdXNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdX[`H_LEN-2:`H_NF],1'b1,FmaRdX[`H_NF-2:0]})) |
(FmaRdYNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdY[`H_LEN-2:`H_NF],1'b1,FmaRdY[`H_NF-2:0]})) |
(FmaRdZNaN&(FmaRdRes[`H_LEN-2:0] === {FmaRdZ[`H_LEN-2:`H_NF],1'b1,FmaRdZ[`H_NF-2:0]})));
endcase
case (FmaFmtVal)
4'b11: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`Q_LEN-2:0] === {{`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmX[`Q_LEN-2:`Q_NF],1'b1,FmaRnmX[`Q_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmY[`Q_LEN-2:`Q_NF],1'b1,FmaRnmY[`Q_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`Q_LEN-2:0] === {FmaRnmZ[`Q_LEN-2:`Q_NF],1'b1,FmaRnmZ[`Q_NF-2:0]})));
4'b01: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`D_LEN-1:0] === {1'b0, {`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`D_LEN-2:0] === {{`D_NE+1{1'b1}}, {`D_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmX[`D_LEN-2:`D_NF],1'b1,FmaRnmX[`D_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmY[`D_LEN-2:`D_NF],1'b1,FmaRnmY[`D_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`D_LEN-2:0] === {FmaRnmZ[`D_LEN-2:`D_NF],1'b1,FmaRnmZ[`D_NF-2:0]})));
4'b00: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`S_LEN-1:0] === {1'b0, {`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`S_LEN-2:0] === {{`S_NE+1{1'b1}}, {`S_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmX[`S_LEN-2:`S_NF],1'b1,FmaRnmX[`S_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmY[`S_LEN-2:`S_NF],1'b1,FmaRnmY[`S_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`S_LEN-2:0] === {FmaRnmZ[`S_LEN-2:`S_NF],1'b1,FmaRnmZ[`S_NF-2:0]})));
4'b10: FmaRnmNaNGood =(((`IEEE754==0)&FmaRnmAnsNaN&(FmaRnmRes[`H_LEN-1:0] === {1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRnmAnsFlg[4]&(FmaRnmRes[`H_LEN-2:0] === {{`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}})) |
(FmaRnmXNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmX[`H_LEN-2:`H_NF],1'b1,FmaRnmX[`H_NF-2:0]})) |
(FmaRnmYNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmY[`H_LEN-2:`H_NF],1'b1,FmaRnmY[`H_NF-2:0]})) |
(FmaRnmZNaN&(FmaRnmRes[`H_LEN-2:0] === {FmaRnmZ[`H_LEN-2:`H_NF],1'b1,FmaRnmZ[`H_NF-2:0]})));
endcase
if (UnitVal !== `CVTFPUNIT & UnitVal !== `CVTINTUNIT)
case (FmtVal)
4'b11: NaNGood = (((`IEEE754==0)&AnsNaN&(Res === {1'b0, {`Q_NE+1{1'b1}}, {`Q_NF-1{1'b0}}})) |
@ -1221,77 +840,8 @@ end
$stop;
end
// check if the fma tests are correct
if(~((FmaRneRes === FmaRneAns | FmaRneNaNGood | FmaRneNaNGood === 1'bx) & (FmaRneResFlg === FmaRneAnsFlg | FmaRneAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RNE");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRneX, FmaRneY, FmaRneZ, FmaRneRes, FmaRneResFlg, FmaRneAns, FmaRneAnsFlg);
$stop;
end
if(~((FmaRzRes === FmaRzAns | FmaRzNaNGood | FmaRzNaNGood === 1'bx) & (FmaRzResFlg === FmaRzAnsFlg | FmaRzAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RZ");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRzX, FmaRzY, FmaRzZ, FmaRzRes, FmaRzResFlg, FmaRzAns, FmaRzAnsFlg);
$stop;
end
if(~((FmaRuRes === FmaRuAns | FmaRuNaNGood | FmaRuNaNGood === 1'bx) & (FmaRuResFlg === FmaRuAnsFlg | FmaRuAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RU");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRuX, FmaRuY, FmaRuZ, FmaRuRes, FmaRuResFlg, FmaRuAns, FmaRuAnsFlg);
$stop;
end
if(~((FmaRdRes === FmaRdAns | FmaRdNaNGood | FmaRdNaNGood === 1'bx) & (FmaRdResFlg === FmaRdAnsFlg | FmaRdAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RD");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRdX, FmaRdY, FmaRdZ, FmaRdRes, FmaRdResFlg, FmaRdAns, FmaRdAnsFlg);
$stop;
end
if(~((FmaRnmRes === FmaRnmAns | FmaRnmNaNGood | FmaRnmNaNGood === 1'bx) & (FmaRnmResFlg === FmaRnmAnsFlg | FmaRnmAnsFlg === 5'bx))) begin
errors += 1;
$display("There is an error in FMA - RNM");
$display("inputs: %h %h %h\n Res: %h %h\n Ans: %h %h", FmaRnmX, FmaRnmY, FmaRnmZ, FmaRnmRes, FmaRnmResFlg, FmaRnmAns, FmaRnmAnsFlg);
$stop;
end
VectorNum += 1; // increment the vector
FmaVectorNum += 1; // increment the vector
// check to see if there more vectors in this test
// *** fix this so that fma and other run sepratly - re-add fma num
if ((FmaRneVectors[FmaVectorNum][0] === 1'bx &
FmaRzVectors[FmaVectorNum][0] === 1'bx &
FmaRuVectors[FmaVectorNum][0] === 1'bx &
FmaRdVectors[FmaVectorNum][0] === 1'bx &
FmaRnmVectors[FmaVectorNum][0] === 1'bx & FmaRneTests[FmaTestNum] !== "" )) begin // if reached the end of file
// increment the test
FmaTestNum += 1;
// clear the vectors
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
// read next files
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
// set the vector index back to 0
FmaVectorNum = 0;
// if no more Tests - finish
if(Tests[TestNum] === "" &
FmaRneTests[FmaTestNum] === "" &
FmaRzTests[FmaTestNum] === "" &
FmaRuTests[FmaTestNum] === "" &
FmaRdTests[FmaTestNum] === "" &
FmaRnmTests[FmaTestNum] === "") begin
$display("\nAll Tests completed with %d errors\n", errors);
$stop;
end
$display("Running FMA precision %d", FmaTestNum);
end
if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file
@ -1299,14 +849,9 @@ end
TestNum += 1;
// clear the vectors
for(int i=0; i<46465; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
for(int i=0; i<6133248; i++) TestVectors[i] = {`FLEN*4+8{1'bx}};
// read next files
$readmemh({`PATH, Tests[TestNum]}, TestVectors);
$readmemh({`PATH, FmaRneTests[FmaTestNum]}, FmaRneVectors);
$readmemh({`PATH, FmaRuTests[FmaTestNum]}, FmaRuVectors);
$readmemh({`PATH, FmaRdTests[FmaTestNum]}, FmaRdVectors);
$readmemh({`PATH, FmaRzTests[FmaTestNum]}, FmaRzVectors);
$readmemh({`PATH, FmaRnmTests[FmaTestNum]}, FmaRnmVectors);
// set the vector index back to 0
VectorNum = 0;
@ -1317,12 +862,7 @@ end
else FrmNum = 0;
// if no more Tests - finish
if(Tests[TestNum] === "" &
FmaRneTests[FmaTestNum] === "" &
FmaRzTests[FmaTestNum] === "" &
FmaRuTests[FmaTestNum] === "" &
FmaRdTests[FmaTestNum] === "" &
FmaRnmTests[FmaTestNum] === "") begin
if(Tests[TestNum] === "") begin
$display("\nAll Tests completed with %d errors\n", errors);
$stop;
end
@ -1335,89 +875,6 @@ endmodule
module readfmavectors (
input logic clk,
input logic [`FMTBITS-1:0] FmaModFmt, // the modified format
input logic [1:0] FmaFmt, // the format of the FMA inputs
input logic [`FLEN*4+7:0] TestVector, // the test vector
output logic [`FLEN-1:0] Ans, // the correct answer
output logic [4:0] AnsFlg, // the correct flag
output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ
output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision)
output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN
output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN
output logic XDenormE, ZDenormE, // is XYZ denormalized
output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero
output logic XInfE, YInfE, ZInfE, // is XYZ infinity
output logic [`FLEN-1:0] X, Y, Z // inputs
);
logic XExpMaxE; // signals the unpacker outputs but isn't used in FMA
// apply test vectors on rising edge of clk
// Format of vectors Inputs(1/2/3)_AnsFlg
always @(posedge clk) begin
#1;
AnsFlg = TestVector[4:0];
case (FmaFmt)
2'b11: begin // quad
X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
end
2'b00: begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
end
endcase
end
unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE,
.XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE,
.XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE,
.XExpMaxE, .ZDenormE);
endmodule
module readvectors (
input logic clk,
input logic [`FLEN*4+7:0] TestVector,
@ -1451,33 +908,61 @@ module readvectors (
`FMAUNIT:
case (Fmt)
2'b11: begin // quad
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
if(OpCtrl === `FMA_OPCTRL) begin
X = TestVector[8+4*(`Q_LEN)-1:8+3*(`Q_LEN)];
Y = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
Z = TestVector[8+2*(`Q_LEN)-1:8+`Q_LEN];
end
else begin
X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)];
if(OpCtrl === `MUL_OPCTRL) Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; else Y = {2'b0, {`Q_NE-1{1'b1}}, (`Q_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = 0; else Z = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)];
end
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // double
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}};
else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
if(OpCtrl === `FMA_OPCTRL) begin
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+4*(`D_LEN)-1:8+3*(`D_LEN)]};
Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+`D_LEN]};
end
else begin
X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
else Y = {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}};
else Z = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]};
end
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
end
2'b00: begin // single
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}};
else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
if(OpCtrl === `FMA_OPCTRL) begin
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+4*(`S_LEN)-1:8+3*(`S_LEN)]};
Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+`S_LEN]};
end
else begin
X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
else Y = {{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}};
else Z = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+(`S_LEN)]};
end
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
end
2'b10: begin // half
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}};
else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
if(OpCtrl === `FMA_OPCTRL) begin
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+4*(`H_LEN)-1:8+3*(`H_LEN)]};
Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+`H_LEN]};
end
else begin
X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]};
if(OpCtrl === `MUL_OPCTRL) Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
else Y = {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)};
if(OpCtrl === `MUL_OPCTRL) Z = {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}};
else Z = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]};
end
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
end
endcase
@ -1532,19 +1017,19 @@ module readvectors (
2'b11: begin // quad
case (OpCtrl[1:0])
2'b11: begin // quad
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
X = {TestVector[8+`Q_LEN+`Q_LEN-1:8+(`Q_LEN)]};
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // double
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
X = {TestVector[8+`Q_LEN+`D_LEN-1:8+(`D_LEN)]};
Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]};
end
2'b00: begin // single
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
X = {TestVector[8+`Q_LEN+`S_LEN-1:8+(`S_LEN)]};
Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]};
end
2'b10: begin // half
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
X = {TestVector[8+`Q_LEN+`H_LEN-1:8+(`H_LEN)]};
Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]};
end
endcase
@ -1628,12 +1113,12 @@ module readvectors (
Ans = TestVector[8+(`Q_LEN-1):8];
end
2'b01: begin // quad -> long
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
X = {TestVector[8+`XLEN+`Q_LEN-1:8+(`XLEN)]};
SrcA = {`XLEN{1'bx}};
Ans = {TestVector[8+(`XLEN-1):8]};
end
2'b00: begin // quad -> int
X = {{`FLEN-`Q_LEN{1'b1}}, TestVector[8+32+`Q_LEN-1:8+(32)]};
X = {TestVector[8+32+`Q_LEN-1:8+(32)]};
SrcA = {`XLEN{1'bx}};
Ans = {{`XLEN-32{TestVector[8+32-1]}},TestVector[8+(32-1):8]};
end

View file

@ -396,6 +396,7 @@ module riscvassertions;
assert (`F_SUPPORTED | ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)");
assert (`I_SUPPORTED ^ `E_SUPPORTED) else $error("Exactly one of I and E must be supported");
assert (`XLEN == 64 | ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32");
assert (`FLEN<=`XLEN | `DMEM == `MEM_CACHE) else $error("Wally does not support FLEN > XLEN unleses data cache is supported");
assert (`DCACHE_WAYSIZEINBYTES <= 4096 | (`DMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)");
assert (`DCACHE_LINELENINBITS >= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled");
assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size");

View file

@ -2,7 +2,7 @@
`define ADD_OPCTRL 3'b110
`define MUL_OPCTRL 3'b100
`define SUB_OPCTRL 3'b111
`define FADD_OPCTRL 3'b000
`define FMA_OPCTRL 3'b000
`define DIV_OPCTRL 3'b000
`define SQRT_OPCTRL 3'b001
`define LE_OPCTRL 3'b011
@ -21,11 +21,11 @@
`define RU 3'b011
`define RD 3'b010
`define RNM 3'b100
`define FMAUNIT 0
`define FMAUNIT 2
`define DIVUNIT 1
`define CVTINTUNIT 2
`define CVTFPUNIT 3
`define CMPUNIT 4
`define CVTINTUNIT 0
`define CVTFPUNIT 4
`define CMPUNIT 3
string f16rv32cvtint[] = '{
"ui32_to_f16_rne.tv",

View file

@ -1105,11 +1105,11 @@ string imperas32f[] = '{
// "rv64i_m/D/d_fdiv_b20-01", // looks like flags
// "rv64i_m/D/d_fdiv_b2-01", // also flags
// "rv64i_m/D/d_fdiv_b21-01", // positive NaNs again
"rv64i_m/D/d_fdiv_b3-01",
// "rv64i_m/D/d_fdiv_b3-01",
// "rv64i_m/D/d_fdiv_b4-01", // flags
"rv64i_m/D/d_fdiv_b5-01",
// "rv64i_m/D/d_fdiv_b5-01",
// "rv64i_m/D/d_fdiv_b6-01", // flags
"rv64i_m/D/d_fdiv_b7-01",
// "rv64i_m/D/d_fdiv_b7-01",
// "rv64i_m/D/d_fdiv_b8-01", // flags
// "rv64i_m/D/d_fdiv_b9-01", might be a flag too
"rv64i_m/D/d_feq_b1-01",

View file

@ -2,482 +2,482 @@
BUILD="../../addins/TestFloat-3e/build/Linux-x86_64-GCC"
OUTPUT="./vectors"
echo "Creating ui32_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f16 > $OUTPUT/ui32_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f16 > $OUTPUT/ui32_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f16 > $OUTPUT/ui32_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f16 > $OUTPUT/ui32_to_f16_rnm.tv
echo "Creating ui32_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f32 > $OUTPUT/ui32_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f32 > $OUTPUT/ui32_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f32 > $OUTPUT/ui32_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f32 > $OUTPUT/ui32_to_f32_rnm.tv
echo "Creating ui32_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f64 > $OUTPUT/ui32_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f64 > $OUTPUT/ui32_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f64 > $OUTPUT/ui32_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f64 > $OUTPUT/ui32_to_f64_rnm.tv
echo "Creating ui32_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
$BUILD/testfloat_gen -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
$BUILD/testfloat_gen -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui32_to_f128 > $OUTPUT/ui32_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui32_to_f128 > $OUTPUT/ui32_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui32_to_f128 > $OUTPUT/ui32_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui32_to_f128 > $OUTPUT/ui32_to_f128_rnm.tv
echo "Creating ui64_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f16 > $OUTPUT/ui64_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f16 > $OUTPUT/ui64_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f16 > $OUTPUT/ui64_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f16 > $OUTPUT/ui64_to_f16_rnm.tv
echo "Creating ui64_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f32 > $OUTPUT/ui64_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f32 > $OUTPUT/ui64_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f32 > $OUTPUT/ui64_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f32 > $OUTPUT/ui64_to_f32_rnm.tv
echo "Creating ui64_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f64 > $OUTPUT/ui64_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f64 > $OUTPUT/ui64_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f64 > $OUTPUT/ui64_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f64 > $OUTPUT/ui64_to_f64_rnm.tv
echo "Creating ui64_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
$BUILD/testfloat_gen -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
$BUILD/testfloat_gen -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even ui64_to_f128 > $OUTPUT/ui64_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax ui64_to_f128 > $OUTPUT/ui64_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin ui64_to_f128 > $OUTPUT/ui64_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag ui64_to_f128 > $OUTPUT/ui64_to_f128_rnm.tv
echo "Creating i32_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f16 > $OUTPUT/i32_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f16 > $OUTPUT/i32_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f16 > $OUTPUT/i32_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f16 > $OUTPUT/i32_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f16 > $OUTPUT/i32_to_f16_rnm.tv
echo "Creating i32_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f32 > $OUTPUT/i32_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f32 > $OUTPUT/i32_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f32 > $OUTPUT/i32_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f32 > $OUTPUT/i32_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f32 > $OUTPUT/i32_to_f32_rnm.tv
echo "Creating i32_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f64 > $OUTPUT/i32_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f64 > $OUTPUT/i32_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f64 > $OUTPUT/i32_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f64 > $OUTPUT/i32_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f64 > $OUTPUT/i32_to_f64_rnm.tv
echo "Creating i32_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
$BUILD/testfloat_gen -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
$BUILD/testfloat_gen -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i32_to_f128 > $OUTPUT/i32_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i32_to_f128 > $OUTPUT/i32_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i32_to_f128 > $OUTPUT/i32_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i32_to_f128 > $OUTPUT/i32_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i32_to_f128 > $OUTPUT/i32_to_f128_rnm.tv
echo "Creating i64_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f16 > $OUTPUT/i64_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f16 > $OUTPUT/i64_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f16 > $OUTPUT/i64_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f16 > $OUTPUT/i64_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f16 > $OUTPUT/i64_to_f16_rnm.tv
echo "Creating i64_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f32 > $OUTPUT/i64_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f32 > $OUTPUT/i64_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f32 > $OUTPUT/i64_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f32 > $OUTPUT/i64_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f32 > $OUTPUT/i64_to_f32_rnm.tv
echo "Creating i64_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f64 > $OUTPUT/i64_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f64 > $OUTPUT/i64_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f64 > $OUTPUT/i64_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f64 > $OUTPUT/i64_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f64 > $OUTPUT/i64_to_f64_rnm.tv
echo "Creating i64_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
$BUILD/testfloat_gen -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
$BUILD/testfloat_gen -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even i64_to_f128 > $OUTPUT/i64_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag i64_to_f128 > $OUTPUT/i64_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax i64_to_f128 > $OUTPUT/i64_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin i64_to_f128 > $OUTPUT/i64_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag i64_to_f128 > $OUTPUT/i64_to_f128_rnm.tv
echo "Creating f16_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui32 > $OUTPUT/f16_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui32 > $OUTPUT/f16_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui32 > $OUTPUT/f16_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui32 > $OUTPUT/f16_to_ui32_rnm.tv
echo "Creating f32_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui32 > $OUTPUT/f32_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui32 > $OUTPUT/f32_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui32 > $OUTPUT/f32_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui32 > $OUTPUT/f32_to_ui32_rnm.tv
echo "Creating f64_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui32 > $OUTPUT/f64_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui32 > $OUTPUT/f64_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui32 > $OUTPUT/f64_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui32 > $OUTPUT/f64_to_ui32_rnm.tv
echo "Creating f128_to_ui32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui32 > $OUTPUT/f128_to_ui32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui32 > $OUTPUT/f128_to_ui32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui32 > $OUTPUT/f128_to_ui32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui32 > $OUTPUT/f128_to_ui32_rnm.tv
echo "Creating f16_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_ui64 > $OUTPUT/f16_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_ui64 > $OUTPUT/f16_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_ui64 > $OUTPUT/f16_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_ui64 > $OUTPUT/f16_to_ui64_rnm.tv
echo "Creating f32_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_ui64 > $OUTPUT/f32_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_ui64 > $OUTPUT/f32_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_ui64 > $OUTPUT/f32_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_ui64 > $OUTPUT/f32_to_ui64_rnm.tv
echo "Creating f64_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_ui64 > $OUTPUT/f64_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_ui64 > $OUTPUT/f64_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_ui64 > $OUTPUT/f64_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_ui64 > $OUTPUT/f64_to_ui64_rnm.tv
echo "Creating f128_to_ui64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_ui64 > $OUTPUT/f128_to_ui64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_ui64 > $OUTPUT/f128_to_ui64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_ui64 > $OUTPUT/f128_to_ui64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_ui64 > $OUTPUT/f128_to_ui64_rnm.tv
echo "Creating f16_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i32 > $OUTPUT/f16_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i32 > $OUTPUT/f16_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i32 > $OUTPUT/f16_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i32 > $OUTPUT/f16_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i32 > $OUTPUT/f16_to_i32_rnm.tv
echo "Creating f32_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i32 > $OUTPUT/f32_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i32 > $OUTPUT/f32_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i32 > $OUTPUT/f32_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i32 > $OUTPUT/f32_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i32 > $OUTPUT/f32_to_i32_rnm.tv
echo "Creating f64_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i32 > $OUTPUT/f64_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i32 > $OUTPUT/f64_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i32 > $OUTPUT/f64_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i32 > $OUTPUT/f64_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i32 > $OUTPUT/f64_to_i32_rnm.tv
echo "Creating f128_to_i32 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i32 > $OUTPUT/f128_to_i32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i32 > $OUTPUT/f128_to_i32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i32 > $OUTPUT/f128_to_i32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i32 > $OUTPUT/f128_to_i32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i32 > $OUTPUT/f128_to_i32_rnm.tv
echo "Creating f16_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f16_to_i64 > $OUTPUT/f16_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f16_to_i64 > $OUTPUT/f16_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f16_to_i64 > $OUTPUT/f16_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f16_to_i64 > $OUTPUT/f16_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f16_to_i64 > $OUTPUT/f16_to_i64_rnm.tv
echo "Creating f32_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f32_to_i64 > $OUTPUT/f32_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f32_to_i64 > $OUTPUT/f32_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f32_to_i64 > $OUTPUT/f32_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f32_to_i64 > $OUTPUT/f32_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f32_to_i64 > $OUTPUT/f32_to_i64_rnm.tv
echo "Creating f64_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f64_to_i64 > $OUTPUT/f64_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f64_to_i64 > $OUTPUT/f64_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f64_to_i64 > $OUTPUT/f64_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f64_to_i64 > $OUTPUT/f64_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f64_to_i64 > $OUTPUT/f64_to_i64_rnm.tv
echo "Creating f128_to_i64 convert vectors"
$BUILD/testfloat_gen -rnear_even -exact f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
$BUILD/testfloat_gen -rminMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
$BUILD/testfloat_gen -rmax -exact f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
$BUILD/testfloat_gen -rmin -exact f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag -exact f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_even f128_to_i64 > $OUTPUT/f128_to_i64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rminMag f128_to_i64 > $OUTPUT/f128_to_i64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmax f128_to_i64 > $OUTPUT/f128_to_i64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rmin f128_to_i64 > $OUTPUT/f128_to_i64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -exact -rnear_maxMag f128_to_i64 > $OUTPUT/f128_to_i64_rnm.tv
echo "Creating f16_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
$BUILD/testfloat_gen -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
$BUILD/testfloat_gen -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f32 > $OUTPUT/f16_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f32 > $OUTPUT/f16_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f32 > $OUTPUT/f16_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f32 > $OUTPUT/f16_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f32 > $OUTPUT/f16_to_f32_rnm.tv
echo "Creating f16_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
$BUILD/testfloat_gen -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
$BUILD/testfloat_gen -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f64 > $OUTPUT/f16_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f64 > $OUTPUT/f16_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f64 > $OUTPUT/f16_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f64 > $OUTPUT/f16_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f64 > $OUTPUT/f16_to_f64_rnm.tv
echo "Creating f16_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
$BUILD/testfloat_gen -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
$BUILD/testfloat_gen -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_to_f128 > $OUTPUT/f16_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_to_f128 > $OUTPUT/f16_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_to_f128 > $OUTPUT/f16_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_to_f128 > $OUTPUT/f16_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_to_f128 > $OUTPUT/f16_to_f128_rnm.tv
echo "Creating f32_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
$BUILD/testfloat_gen -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
$BUILD/testfloat_gen -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f16 > $OUTPUT/f32_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f16 > $OUTPUT/f32_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f16 > $OUTPUT/f32_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f16 > $OUTPUT/f32_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f16 > $OUTPUT/f32_to_f16_rnm.tv
echo "Creating f32_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
$BUILD/testfloat_gen -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
$BUILD/testfloat_gen -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f64 > $OUTPUT/f32_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f64 > $OUTPUT/f32_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f64 > $OUTPUT/f32_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f64 > $OUTPUT/f32_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f64 > $OUTPUT/f32_to_f64_rnm.tv
echo "Creating f32_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
$BUILD/testfloat_gen -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
$BUILD/testfloat_gen -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_to_f128 > $OUTPUT/f32_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_to_f128 > $OUTPUT/f32_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_to_f128 > $OUTPUT/f32_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_to_f128 > $OUTPUT/f32_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_to_f128 > $OUTPUT/f32_to_f128_rnm.tv
echo "Creating f64_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
$BUILD/testfloat_gen -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
$BUILD/testfloat_gen -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f16 > $OUTPUT/f64_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f16 > $OUTPUT/f64_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f16 > $OUTPUT/f64_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f16 > $OUTPUT/f64_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f16 > $OUTPUT/f64_to_f16_rnm.tv
echo "Creating f64_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
$BUILD/testfloat_gen -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
$BUILD/testfloat_gen -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f32 > $OUTPUT/f64_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f32 > $OUTPUT/f64_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f32 > $OUTPUT/f64_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f32 > $OUTPUT/f64_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f32 > $OUTPUT/f64_to_f32_rnm.tv
echo "Creating f64_to_f128 convert vectors"
$BUILD/testfloat_gen -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
$BUILD/testfloat_gen -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
$BUILD/testfloat_gen -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
$BUILD/testfloat_gen -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_to_f128 > $OUTPUT/f64_to_f128_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_to_f128 > $OUTPUT/f64_to_f128_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_to_f128 > $OUTPUT/f64_to_f128_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_to_f128 > $OUTPUT/f64_to_f128_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_to_f128 > $OUTPUT/f64_to_f128_rnm.tv
echo "Creating f128_to_f16 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
$BUILD/testfloat_gen -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
$BUILD/testfloat_gen -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f16 > $OUTPUT/f128_to_f16_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f16 > $OUTPUT/f128_to_f16_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f16 > $OUTPUT/f128_to_f16_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f16 > $OUTPUT/f128_to_f16_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f16 > $OUTPUT/f128_to_f16_rnm.tv
echo "Creating f128_to_f32 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
$BUILD/testfloat_gen -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
$BUILD/testfloat_gen -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f32 > $OUTPUT/f128_to_f32_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f32 > $OUTPUT/f128_to_f32_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f32 > $OUTPUT/f128_to_f32_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f32 > $OUTPUT/f128_to_f32_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f32 > $OUTPUT/f128_to_f32_rnm.tv
echo "Creating f128_to_f64 convert vectors"
$BUILD/testfloat_gen -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
$BUILD/testfloat_gen -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
$BUILD/testfloat_gen -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
$BUILD/testfloat_gen -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_to_f64 > $OUTPUT/f128_to_f64_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_to_f64 > $OUTPUT/f128_to_f64_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_to_f64 > $OUTPUT/f128_to_f64_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_to_f64 > $OUTPUT/f128_to_f64_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_to_f64 > $OUTPUT/f128_to_f64_rnm.tv
echo "Creating f16_add vectors"
$BUILD/testfloat_gen -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
$BUILD/testfloat_gen -rminMag f16_add > $OUTPUT/f16_add_rz.tv
$BUILD/testfloat_gen -rmax f16_add > $OUTPUT/f16_add_ru.tv
$BUILD/testfloat_gen -rmin f16_add > $OUTPUT/f16_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_add > $OUTPUT/f16_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_add > $OUTPUT/f16_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_add > $OUTPUT/f16_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_add > $OUTPUT/f16_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_add > $OUTPUT/f16_add_rnm.tv
echo "Creating f32_add vectors"
$BUILD/testfloat_gen -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
$BUILD/testfloat_gen -rminMag f32_add > $OUTPUT/f32_add_rz.tv
$BUILD/testfloat_gen -rmax f32_add > $OUTPUT/f32_add_ru.tv
$BUILD/testfloat_gen -rmin f32_add > $OUTPUT/f32_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_add > $OUTPUT/f32_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_add > $OUTPUT/f32_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_add > $OUTPUT/f32_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_add > $OUTPUT/f32_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_add > $OUTPUT/f32_add_rnm.tv
echo "Creating f64_add vectors"
$BUILD/testfloat_gen -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
$BUILD/testfloat_gen -rminMag f64_add > $OUTPUT/f64_add_rz.tv
$BUILD/testfloat_gen -rmax f64_add > $OUTPUT/f64_add_ru.tv
$BUILD/testfloat_gen -rmin f64_add > $OUTPUT/f64_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_add > $OUTPUT/f64_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_add > $OUTPUT/f64_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_add > $OUTPUT/f64_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_add > $OUTPUT/f64_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_add > $OUTPUT/f64_add_rnm.tv
echo "Creating f128_add vectors"
$BUILD/testfloat_gen -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
$BUILD/testfloat_gen -rminMag f128_add > $OUTPUT/f128_add_rz.tv
$BUILD/testfloat_gen -rmax f128_add > $OUTPUT/f128_add_ru.tv
$BUILD/testfloat_gen -rmin f128_add > $OUTPUT/f128_add_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_add > $OUTPUT/f128_add_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_add > $OUTPUT/f128_add_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_add > $OUTPUT/f128_add_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_add > $OUTPUT/f128_add_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_add > $OUTPUT/f128_add_rnm.tv
echo "Creating f16_sub vectors"
$BUILD/testfloat_gen -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
$BUILD/testfloat_gen -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
$BUILD/testfloat_gen -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
$BUILD/testfloat_gen -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_sub > $OUTPUT/f16_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_sub > $OUTPUT/f16_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_sub > $OUTPUT/f16_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_sub > $OUTPUT/f16_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_sub > $OUTPUT/f16_sub_rnm.tv
echo "Creating f32_sub vectors"
$BUILD/testfloat_gen -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
$BUILD/testfloat_gen -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
$BUILD/testfloat_gen -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
$BUILD/testfloat_gen -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_sub > $OUTPUT/f32_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_sub > $OUTPUT/f32_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_sub > $OUTPUT/f32_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_sub > $OUTPUT/f32_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_sub > $OUTPUT/f32_sub_rnm.tv
echo "Creating f64_sub vectors"
$BUILD/testfloat_gen -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
$BUILD/testfloat_gen -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
$BUILD/testfloat_gen -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
$BUILD/testfloat_gen -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_sub > $OUTPUT/f64_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_sub > $OUTPUT/f64_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_sub > $OUTPUT/f64_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_sub > $OUTPUT/f64_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_sub > $OUTPUT/f64_sub_rnm.tv
echo "Creating f128_sub vectors"
$BUILD/testfloat_gen -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
$BUILD/testfloat_gen -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
$BUILD/testfloat_gen -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
$BUILD/testfloat_gen -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_sub > $OUTPUT/f128_sub_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_sub > $OUTPUT/f128_sub_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_sub > $OUTPUT/f128_sub_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_sub > $OUTPUT/f128_sub_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_sub > $OUTPUT/f128_sub_rnm.tv
echo "Creating f16_mul vectors"
$BUILD/testfloat_gen -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
$BUILD/testfloat_gen -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
$BUILD/testfloat_gen -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
$BUILD/testfloat_gen -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mul > $OUTPUT/f16_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mul > $OUTPUT/f16_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mul > $OUTPUT/f16_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mul > $OUTPUT/f16_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mul > $OUTPUT/f16_mul_rnm.tv
echo "Creating f32_mul vectors"
$BUILD/testfloat_gen -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
$BUILD/testfloat_gen -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
$BUILD/testfloat_gen -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
$BUILD/testfloat_gen -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mul > $OUTPUT/f32_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mul > $OUTPUT/f32_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mul > $OUTPUT/f32_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mul > $OUTPUT/f32_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mul > $OUTPUT/f32_mul_rnm.tv
echo "Creating f64_mul vectors"
$BUILD/testfloat_gen -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
$BUILD/testfloat_gen -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
$BUILD/testfloat_gen -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
$BUILD/testfloat_gen -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mul > $OUTPUT/f64_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mul > $OUTPUT/f64_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mul > $OUTPUT/f64_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mul > $OUTPUT/f64_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mul > $OUTPUT/f64_mul_rnm.tv
echo "Creating f128_mul vectors"
$BUILD/testfloat_gen -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
$BUILD/testfloat_gen -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
$BUILD/testfloat_gen -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
$BUILD/testfloat_gen -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mul > $OUTPUT/f128_mul_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mul > $OUTPUT/f128_mul_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mul > $OUTPUT/f128_mul_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mul > $OUTPUT/f128_mul_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mul > $OUTPUT/f128_mul_rnm.tv
echo "Creating f16_div vectors"
$BUILD/testfloat_gen -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
$BUILD/testfloat_gen -rminMag f16_div > $OUTPUT/f16_div_rz.tv
$BUILD/testfloat_gen -rmax f16_div > $OUTPUT/f16_div_ru.tv
$BUILD/testfloat_gen -rmin f16_div > $OUTPUT/f16_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_div > $OUTPUT/f16_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_div > $OUTPUT/f16_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_div > $OUTPUT/f16_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_div > $OUTPUT/f16_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_div > $OUTPUT/f16_div_rnm.tv
echo "Creating f32_div vectors"
$BUILD/testfloat_gen -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
$BUILD/testfloat_gen -rminMag f32_div > $OUTPUT/f32_div_rz.tv
$BUILD/testfloat_gen -rmax f32_div > $OUTPUT/f32_div_ru.tv
$BUILD/testfloat_gen -rmin f32_div > $OUTPUT/f32_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_div > $OUTPUT/f32_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_div > $OUTPUT/f32_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_div > $OUTPUT/f32_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_div > $OUTPUT/f32_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_div > $OUTPUT/f32_div_rnm.tv
echo "Creating f64_div vectors"
$BUILD/testfloat_gen -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
$BUILD/testfloat_gen -rminMag f64_div > $OUTPUT/f64_div_rz.tv
$BUILD/testfloat_gen -rmax f64_div > $OUTPUT/f64_div_ru.tv
$BUILD/testfloat_gen -rmin f64_div > $OUTPUT/f64_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_div > $OUTPUT/f64_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_div > $OUTPUT/f64_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_div > $OUTPUT/f64_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_div > $OUTPUT/f64_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_div > $OUTPUT/f64_div_rnm.tv
echo "Creating f128_div vectors"
$BUILD/testfloat_gen -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
$BUILD/testfloat_gen -rminMag f128_div > $OUTPUT/f128_div_rz.tv
$BUILD/testfloat_gen -rmax f128_div > $OUTPUT/f128_div_ru.tv
$BUILD/testfloat_gen -rmin f128_div > $OUTPUT/f128_div_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_div > $OUTPUT/f128_div_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_div > $OUTPUT/f128_div_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_div > $OUTPUT/f128_div_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_div > $OUTPUT/f128_div_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_div > $OUTPUT/f128_div_rnm.tv
echo "Creating f16_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f16_sqrt > $OUTPUT/f16_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f16_sqrt > $OUTPUT/f16_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f16_sqrt > $OUTPUT/f16_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f16_sqrt > $OUTPUT/f16_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f16_sqrt > $OUTPUT/f16_sqrt_rnm.tv
echo "Creating f32_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f32_sqrt > $OUTPUT/f32_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f32_sqrt > $OUTPUT/f32_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f32_sqrt > $OUTPUT/f32_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f32_sqrt > $OUTPUT/f32_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f32_sqrt > $OUTPUT/f32_sqrt_rnm.tv
echo "Creating f64_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f64_sqrt > $OUTPUT/f64_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f64_sqrt > $OUTPUT/f64_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f64_sqrt > $OUTPUT/f64_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f64_sqrt > $OUTPUT/f64_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f64_sqrt > $OUTPUT/f64_sqrt_rnm.tv
echo "Creating f128_sqrt vectors"
$BUILD/testfloat_gen -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
$BUILD/testfloat_gen -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
$BUILD/testfloat_gen -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
$BUILD/testfloat_gen -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_even f128_sqrt > $OUTPUT/f128_sqrt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rminMag f128_sqrt > $OUTPUT/f128_sqrt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmax f128_sqrt > $OUTPUT/f128_sqrt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rmin f128_sqrt > $OUTPUT/f128_sqrt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 2 -rnear_maxMag f128_sqrt > $OUTPUT/f128_sqrt_rnm.tv
echo "Creating f16_eq vectors"
$BUILD/testfloat_gen -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
$BUILD/testfloat_gen -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
$BUILD/testfloat_gen -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
$BUILD/testfloat_gen -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_eq > $OUTPUT/f16_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_eq > $OUTPUT/f16_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_eq > $OUTPUT/f16_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_eq > $OUTPUT/f16_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_eq > $OUTPUT/f16_eq_rnm.tv
echo "Creating f32_eq vectors"
$BUILD/testfloat_gen -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
$BUILD/testfloat_gen -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
$BUILD/testfloat_gen -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
$BUILD/testfloat_gen -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_eq > $OUTPUT/f32_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_eq > $OUTPUT/f32_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_eq > $OUTPUT/f32_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_eq > $OUTPUT/f32_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_eq > $OUTPUT/f32_eq_rnm.tv
echo "Creating f64_eq vectors"
$BUILD/testfloat_gen -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
$BUILD/testfloat_gen -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
$BUILD/testfloat_gen -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
$BUILD/testfloat_gen -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_eq > $OUTPUT/f64_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_eq > $OUTPUT/f64_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_eq > $OUTPUT/f64_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_eq > $OUTPUT/f64_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_eq > $OUTPUT/f64_eq_rnm.tv
echo "Creating f128_eq vectors"
$BUILD/testfloat_gen -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
$BUILD/testfloat_gen -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
$BUILD/testfloat_gen -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
$BUILD/testfloat_gen -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_eq > $OUTPUT/f128_eq_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_eq > $OUTPUT/f128_eq_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_eq > $OUTPUT/f128_eq_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_eq > $OUTPUT/f128_eq_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_eq > $OUTPUT/f128_eq_rnm.tv
echo "Creating f16_le vectors"
$BUILD/testfloat_gen -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
$BUILD/testfloat_gen -rminMag f16_le > $OUTPUT/f16_le_rz.tv
$BUILD/testfloat_gen -rmax f16_le > $OUTPUT/f16_le_ru.tv
$BUILD/testfloat_gen -rmin f16_le > $OUTPUT/f16_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_le > $OUTPUT/f16_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_le > $OUTPUT/f16_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_le > $OUTPUT/f16_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_le > $OUTPUT/f16_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_le > $OUTPUT/f16_le_rnm.tv
echo "Creating f32_le vectors"
$BUILD/testfloat_gen -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
$BUILD/testfloat_gen -rminMag f32_le > $OUTPUT/f32_le_rz.tv
$BUILD/testfloat_gen -rmax f32_le > $OUTPUT/f32_le_ru.tv
$BUILD/testfloat_gen -rmin f32_le > $OUTPUT/f32_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_le > $OUTPUT/f32_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_le > $OUTPUT/f32_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_le > $OUTPUT/f32_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_le > $OUTPUT/f32_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_le > $OUTPUT/f32_le_rnm.tv
echo "Creating f64_le vectors"
$BUILD/testfloat_gen -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
$BUILD/testfloat_gen -rminMag f64_le > $OUTPUT/f64_le_rz.tv
$BUILD/testfloat_gen -rmax f64_le > $OUTPUT/f64_le_ru.tv
$BUILD/testfloat_gen -rmin f64_le > $OUTPUT/f64_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_le > $OUTPUT/f64_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_le > $OUTPUT/f64_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_le > $OUTPUT/f64_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_le > $OUTPUT/f64_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_le > $OUTPUT/f64_le_rnm.tv
echo "Creating f128_le vectors"
$BUILD/testfloat_gen -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
$BUILD/testfloat_gen -rminMag f128_le > $OUTPUT/f128_le_rz.tv
$BUILD/testfloat_gen -rmax f128_le > $OUTPUT/f128_le_ru.tv
$BUILD/testfloat_gen -rmin f128_le > $OUTPUT/f128_le_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_le > $OUTPUT/f128_le_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_le > $OUTPUT/f128_le_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_le > $OUTPUT/f128_le_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_le > $OUTPUT/f128_le_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_le > $OUTPUT/f128_le_rnm.tv
echo "Creating f16_lt vectors"
$BUILD/testfloat_gen -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
$BUILD/testfloat_gen -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
$BUILD/testfloat_gen -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
$BUILD/testfloat_gen -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_lt > $OUTPUT/f16_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_lt > $OUTPUT/f16_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_lt > $OUTPUT/f16_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_lt > $OUTPUT/f16_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_lt > $OUTPUT/f16_lt_rnm.tv
echo "Creating f32_lt vectors"
$BUILD/testfloat_gen -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
$BUILD/testfloat_gen -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
$BUILD/testfloat_gen -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
$BUILD/testfloat_gen -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_lt > $OUTPUT/f32_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_lt > $OUTPUT/f32_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_lt > $OUTPUT/f32_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_lt > $OUTPUT/f32_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_lt > $OUTPUT/f32_lt_rnm.tv
echo "Creating f64_lt vectors"
$BUILD/testfloat_gen -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
$BUILD/testfloat_gen -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
$BUILD/testfloat_gen -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
$BUILD/testfloat_gen -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_lt > $OUTPUT/f64_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_lt > $OUTPUT/f64_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_lt > $OUTPUT/f64_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_lt > $OUTPUT/f64_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_lt > $OUTPUT/f64_lt_rnm.tv
echo "Creating f128_lt vectors"
$BUILD/testfloat_gen -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
$BUILD/testfloat_gen -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
$BUILD/testfloat_gen -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
$BUILD/testfloat_gen -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_lt > $OUTPUT/f128_lt_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_lt > $OUTPUT/f128_lt_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_lt > $OUTPUT/f128_lt_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_lt > $OUTPUT/f128_lt_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_lt > $OUTPUT/f128_lt_rnm.tv
echo "Creating f16_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f16_mulAdd > $OUTPUT/f16_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f16_mulAdd > $OUTPUT/f16_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f16_mulAdd > $OUTPUT/f16_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f16_mulAdd > $OUTPUT/f16_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f16_mulAdd > $OUTPUT/f16_mulAdd_rnm.tv
echo "Creating f32_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f32_mulAdd > $OUTPUT/f32_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f32_mulAdd > $OUTPUT/f32_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f32_mulAdd > $OUTPUT/f32_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f32_mulAdd > $OUTPUT/f32_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f32_mulAdd > $OUTPUT/f32_mulAdd_rnm.tv
echo "Creating f64_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f64_mulAdd > $OUTPUT/f64_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f64_mulAdd > $OUTPUT/f64_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f64_mulAdd > $OUTPUT/f64_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f64_mulAdd > $OUTPUT/f64_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f64_mulAdd > $OUTPUT/f64_mulAdd_rnm.tv
echo "Creating f128_mulAdd vectors"
$BUILD/testfloat_gen -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
$BUILD/testfloat_gen -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
$BUILD/testfloat_gen -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
$BUILD/testfloat_gen -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
$BUILD/testfloat_gen -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_even f128_mulAdd > $OUTPUT/f128_mulAdd_rne.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rminMag f128_mulAdd > $OUTPUT/f128_mulAdd_rz.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmax f128_mulAdd > $OUTPUT/f128_mulAdd_ru.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rmin f128_mulAdd > $OUTPUT/f128_mulAdd_rd.tv
$BUILD/testfloat_gen -tininessafter -level 1 -rnear_maxMag f128_mulAdd > $OUTPUT/f128_mulAdd_rnm.tv