Clean up tabs

This commit is contained in:
David Harris 2023-01-15 18:23:09 -08:00
parent 364cf97c34
commit dc74bcff5b
35 changed files with 2357 additions and 2376 deletions

View file

@ -28,41 +28,41 @@
`include "wally-config.vh"
module fclassify (
input logic Xs, // sign bit
input logic XNaN, // is NaN
input logic XSNaN, // is signaling NaN
input logic XSubnorm, // is Subnormal
input logic XZero, // is zero
input logic XInf, // is infinity
output logic [`XLEN-1:0] ClassRes // classify result
input logic Xs, // sign bit
input logic XNaN, // is NaN
input logic XSNaN, // is signaling NaN
input logic XSubnorm, // is Subnormal
input logic XZero, // is zero
input logic XInf, // is infinity
output logic [`XLEN-1:0] ClassRes // classify result
);
logic PInf, PZero, PNorm, PSubnorm; // is the input a positive infinity/zero/normal/subnormal
logic NInf, NZero, NNorm, NSubnorm; // is the input a negitive infinity/zero/normal/subnormal
logic XNorm; // is the input normal
// determine the sub categories
assign XNorm= ~(XNaN | XInf| XSubnorm| XZero);
assign PInf = ~Xs&XInf;
assign NInf = Xs&XInf;
assign PNorm = ~Xs&XNorm;
assign NNorm = Xs&XNorm;
assign PSubnorm = ~Xs&XSubnorm;
assign NSubnorm = Xs&XSubnorm;
assign PZero = ~Xs&XZero;
assign NZero = Xs&XZero;
logic PInf, PZero, PNorm, PSubnorm; // is the input a positive infinity/zero/normal/subnormal
logic NInf, NZero, NNorm, NSubnorm; // is the input a negitive infinity/zero/normal/subnormal
logic XNorm; // is the input normal
// determine the sub categories
assign XNorm= ~(XNaN | XInf| XSubnorm| XZero);
assign PInf = ~Xs&XInf;
assign NInf = Xs&XInf;
assign PNorm = ~Xs&XNorm;
assign NNorm = Xs&XNorm;
assign PSubnorm = ~Xs&XSubnorm;
assign NSubnorm = Xs&XSubnorm;
assign PZero = ~Xs&XZero;
assign NZero = Xs&XZero;
// determine sub category and combine into the result
// bit 0 - -Inf
// bit 1 - -Norm
// bit 2 - -Subnorm
// bit 3 - -Zero
// bit 4 - +Zero
// bit 5 - +Subnorm
// bit 6 - +Norm
// bit 7 - +Inf
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf};
// determine sub category and combine into the result
// bit 0 - -Inf
// bit 1 - -Norm
// bit 2 - -Subnorm
// bit 3 - -Zero
// bit 4 - +Zero
// bit 5 - +Subnorm
// bit 6 - +Norm
// bit 7 - +Inf
// bit 8 - signaling NaN
// bit 9 - quiet NaN
assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf};
endmodule

View file

@ -37,124 +37,124 @@
// 011 less than or equal
module fcmp (
input logic [`FMTBITS-1:0] Fmt, // format of fp number
input logic [2:0] OpCtrl, // see above table
input logic Xs, Ys, // input signs
input logic [`NE-1:0] Xe, Ye, // input exponents
input logic [`NF:0] Xm, Ym, // input mantissa
input logic XZero, YZero, // is zero
input logic XNaN, YNaN, // is NaN
input logic XSNaN, YSNaN, // is signaling NaN
input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker)
output logic CmpNV, // invalid flag
output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result
output logic [`XLEN-1:0] CmpIntRes // compare integer result
);
input logic [`FMTBITS-1:0] Fmt, // format of fp number
input logic [2:0] OpCtrl, // see above table
input logic Xs, Ys, // input signs
input logic [`NE-1:0] Xe, Ye, // input exponents
input logic [`NF:0] Xm, Ym, // input mantissa
input logic XZero, YZero, // is zero
input logic XNaN, YNaN, // is NaN
input logic XSNaN, YSNaN, // is signaling NaN
input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker)
output logic CmpNV, // invalid flag
output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result
output logic [`XLEN-1:0] CmpIntRes // compare integer result
);
logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes; // NaN result
logic BothZero; // are both inputs zero
logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers
assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison
assign EQ = (X == Y);
logic LTabs, LT, EQ; // is X < or > or = Y
logic [`FLEN-1:0] NaNRes; // NaN result
logic BothZero; // are both inputs zero
logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN
assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers
assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison
assign EQ = (X == Y);
assign BothZero = XZero&YZero;
assign EitherNaN = XNaN|YNaN;
assign EitherSNaN = XSNaN|YSNaN;
assign BothZero = XZero&YZero;
assign EitherNaN = XNaN|YNaN;
assign EitherSNaN = XSNaN|YSNaN;
// flags
// Min/Max - if an input is a signaling NaN set invalid flag
// LT/LE - signaling - sets invalid if NaN input
// EQ - quiet - sets invalid if signaling NaN input
always_comb begin
case (OpCtrl[2:0])
3'b110: CmpNV = EitherSNaN;//min
3'b101: CmpNV = EitherSNaN;//max
3'b010: CmpNV = EitherSNaN;//equal
3'b001: CmpNV = EitherNaN;//less than
3'b011: CmpNV = EitherNaN;//less than or equal
default: CmpNV = 1'bx;
endcase
end
// flags
// Min/Max - if an input is a signaling NaN set invalid flag
// LT/LE - signaling - sets invalid if NaN input
// EQ - quiet - sets invalid if signaling NaN input
always_comb begin
case (OpCtrl[2:0])
3'b110: CmpNV = EitherSNaN;//min
3'b101: CmpNV = EitherSNaN;//max
3'b010: CmpNV = EitherSNaN;//equal
3'b001: CmpNV = EitherNaN;//less than
3'b011: CmpNV = EitherNaN;//less than or equal
default: CmpNV = 1'bx;
endcase
end
// fmin/fmax of two NaNs returns a quiet NaN of the appropriate size
// for IEEE, return the payload of X
// for RISC-V, return the canonical NaN
// fmin/fmax of two NaNs returns a quiet NaN of the appropriate size
// for IEEE, return the payload of X
// for RISC-V, return the canonical NaN
// select the NaN result
if (`FPSIZES == 1)
if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
// select the NaN result
if (`FPSIZES == 1)
if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
else if (`FPSIZES == 2)
if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
else if (`FPSIZES == 3)
always_comb
case (Fmt)
`FMT:
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
`FMT1:
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
`FMT2:
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
default: NaNRes = {`FLEN{1'bx}};
endcase
else if (`FPSIZES == 2)
if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
else if (`FPSIZES == 3)
always_comb
case (Fmt)
`FMT:
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
`FMT1:
if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
`FMT2:
if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
default: NaNRes = {`FLEN{1'bx}};
endcase
else if (`FPSIZES == 4)
always_comb
case (Fmt)
2'h3:
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
2'h1:
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
2'h0:
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
2'h2:
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
endcase
else if (`FPSIZES == 4)
always_comb
case (Fmt)
2'h3:
if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
2'h1:
if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
2'h0:
if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
2'h2:
if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
endcase
// Min/Max
// - outputs the min/max of X and Y
// - -0 < 0
// - if both are NaN return quiet X
// - if one is a NaN output the non-NaN
always_comb
if(OpCtrl[0]) // MAX
if(XNaN)
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
else
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = Y; // X < Y
else CmpFpRes = X; // X > Y
else // MIN
if(XNaN)
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
else
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = X; // X < Y
else CmpFpRes = Y; // X > Y
// LT/LE/EQ
// - -0 = 0
// - inf = inf and -inf = -inf
// - return 0 if comparison with NaN (unordered)
assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
// Min/Max
// - outputs the min/max of X and Y
// - -0 < 0
// - if both are NaN return quiet X
// - if one is a NaN output the non-NaN
always_comb
if(OpCtrl[0]) // MAX
if(XNaN)
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
else
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = Y; // X < Y
else CmpFpRes = X; // X > Y
else // MIN
if(XNaN)
if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN
else CmpFpRes = Y; // X = NaN Y != NaN
else
if(YNaN) CmpFpRes = X; // X != NaN Y = NaN
else // X,Y != NaN
if(LT) CmpFpRes = X; // X < Y
else CmpFpRes = Y; // X > Y
// LT/LE/EQ
// - -0 = 0
// - inf = inf and -inf = -inf
// - return 0 if comparison with NaN (unordered)
assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN};
endmodule

View file

@ -30,208 +30,206 @@
`include "wally-config.vh"
module fcvt (
input logic Xs, // input's sign
input logic [`NE-1:0] Xe, // input's exponent
input logic [`NF:0] Xm, // input's fraction
input logic [`XLEN-1:0] Int, // integer input - from IEU
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic ToInt, // is fp->int (since it's writting to the integer register)
input logic XZero, // is the input zero
input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
output logic [`NE:0] Ce, // the calculated expoent
output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by
output logic ResSubnormUf,// does the result underflow or is subnormal
output logic Cs, // the result's sign
output logic IntZero, // is the integer zero?
output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder)
);
input logic Xs, // input's sign
input logic [`NE-1:0] Xe, // input's exponent
input logic [`NF:0] Xm, // input's fraction
input logic [`XLEN-1:0] Int, // integer input - from IEU
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic ToInt, // is fp->int (since it's writting to the integer register)
input logic XZero, // is the input zero
input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half)
output logic [`NE:0] Ce, // the calculated expoent
output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by
output logic ResSubnormUf,// does the result underflow or is subnormal
output logic Cs, // the result's sign
output logic IntZero, // is the integer zero?
output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder)
);
// OpCtrls:
// fp->fp conversions: {0, output precision} - only one of the operations writes to the int register
// half - 10
// single - 00
// double - 01
// quad - 11
// int<->fp conversions: {is int->fp?, is the integer 64-bit?, is the integer signed?}
// bit 2 bit 1 bit 0
// for example: signed long -> single floating point has the OpCode 101
// OpCtrls:
// fp->fp conversions: {0, output precision} - only one of the operations writes to the int register
// half - 10
// single - 00
// double - 01
// quad - 11
// int<->fp conversions: {is int->fp?, is the integer 64-bit?, is the integer signed?}
// bit 2 bit 1 bit 0
// for example: signed long -> single floating point has the OpCode 101
logic [`FMTBITS-1:0] OutFmt; // format of the output
logic [`XLEN-1:0] PosInt; // the positive integer input
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
logic [`NE-2:0] NewBias; // the bias of the final result
logic [`NE-1:0] OldExp; // the old exponent
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder)
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
// seperate OpCtrl for code readability
assign Signed = OpCtrl[0];
assign Int64 = OpCtrl[1];
assign IntToFp = OpCtrl[2];
logic [`FMTBITS-1:0] OutFmt; // format of the output
logic [`XLEN-1:0] PosInt; // the positive integer input
logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size
logic [`NE-2:0] NewBias; // the bias of the final result
logic [`NE-1:0] OldExp; // the old exponent
logic Signed; // is the opperation with a signed integer?
logic Int64; // is the integer 64 bits?
logic IntToFp; // is the opperation an int->fp conversion?
logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder)
logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - int -> fp: Fmt contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0];
// seperate OpCtrl for code readability
assign Signed = OpCtrl[0];
assign Int64 = OpCtrl[1];
assign IntToFp = OpCtrl[2];
///////////////////////////////////////////////////////////////////////////
// negation
///////////////////////////////////////////////////////////////////////////
// 1) negate the input if the input is a negitive singed integer
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - int -> fp: Fmt contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0];
assign PosInt = Cs ? -Int : Int;
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
assign IntZero = ~|TrimInt;
///////////////////////////////////////////////////////////////////////////
// lzc
///////////////////////////////////////////////////////////////////////////
// choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) |
assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} :
{Xm, {`CVTLEN-`NF{1'b0}}};
// used as shifter input in postprocessor
assign LzcIn = LzcInFull[`CVTLEN-1:0];
lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
///////////////////////////////////////////////////////////////////////////
// exp calculations
///////////////////////////////////////////////////////////////////////////
// Select the bias of the output
// fp -> int : select 1
// ??? -> fp : pick the new bias depending on the output format
if (`FPSIZES == 1) begin
assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin
logic [`NE-2:0] NewBiasToFp;
assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
end else if (`FPSIZES == 3) begin
logic [`NE-2:0] NewBiasToFp;
always_comb
case (OutFmt)
`FMT: NewBiasToFp = (`NE-1)'(`BIAS);
`FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
`FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
default: NewBiasToFp = {`NE-1{1'bx}};
endcase
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
end else if (`FPSIZES == 4) begin
logic [`NE-2:0] NewBiasToFp;
always_comb
case (OutFmt)
2'h3: NewBiasToFp = (`NE-1)'(`Q_BIAS);
2'h1: NewBiasToFp = (`NE-1)'(`D_BIAS);
2'h0: NewBiasToFp = (`NE-1)'(`S_BIAS);
2'h2: NewBiasToFp = (`NE-1)'(`H_BIAS);
endcase
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
end
///////////////////////////////////////////////////////////////////////////
// negation
///////////////////////////////////////////////////////////////////////////
// 1) negate the input if the input is a negitive singed integer
// 2) trim the input to the proper size (kill the 32 most significant zeroes if needed)
assign PosInt = Cs ? -Int : Int;
assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt;
assign IntZero = ~|TrimInt;
///////////////////////////////////////////////////////////////////////////
// lzc
///////////////////////////////////////////////////////////////////////////
// choose the input to the leading zero counter i.e. priority encoder
// int -> fp : | positive integer | 00000... (if needed) |
// fp -> fp : | fraction | 00000... (if needed) |
assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} :
{Xm, {`CVTLEN-`NF{1'b0}}};
// used as shifter input in postprocessor
assign LzcIn = LzcInFull[`CVTLEN-1:0];
lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros));
///////////////////////////////////////////////////////////////////////////
// exp calculations
///////////////////////////////////////////////////////////////////////////
// Select the bias of the output
// fp -> int : select 1
// ??? -> fp : pick the new bias depending on the output format
if (`FPSIZES == 1) begin
assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS);
end else if (`FPSIZES == 2) begin
logic [`NE-2:0] NewBiasToFp;
assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1);
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
end else if (`FPSIZES == 3) begin
logic [`NE-2:0] NewBiasToFp;
always_comb
case (OutFmt)
`FMT: NewBiasToFp = (`NE-1)'(`BIAS);
`FMT1: NewBiasToFp = (`NE-1)'(`BIAS1);
`FMT2: NewBiasToFp = (`NE-1)'(`BIAS2);
default: NewBiasToFp = {`NE-1{1'bx}};
endcase
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
end else if (`FPSIZES == 4) begin
logic [`NE-2:0] NewBiasToFp;
always_comb
case (OutFmt)
2'h3: NewBiasToFp = (`NE-1)'(`Q_BIAS);
2'h1: NewBiasToFp = (`NE-1)'(`D_BIAS);
2'h0: NewBiasToFp = (`NE-1)'(`S_BIAS);
2'h2: NewBiasToFp = (`NE-1)'(`H_BIAS);
endcase
assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp;
end
// select the old exponent
// int -> fp : largest bias + XLEN-1
// fp -> ??? : XExp
assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe;
// calculate CalcExp
// fp -> fp :
// - XExp - Largest bias + new bias - (LeadingZeros+1)
// only do ^ if the input was subnormal
// - convert the expoenent to the final preciaion (Exp - oldBias + newBias)
// - correct the expoent when there is a normalization shift ( + LeadingZeros+1)
// - the plus 1 is built into the leading zeros by counting the leading zeroes in the mantissa rather than the fraction
// fp -> int : XExp - Largest Bias + 1 - (LeadingZeros+1)
// | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// process:
// - start
// | `XLEN zeros | Mantissa | 0's if nessisary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// - if the input is subnormal then we dont shift... so the "- LeadingZeros" is just leftovers from other options
// int -> fp : largest bias + XLEN-1 - Largest bias + new bias - LeadingZeros = XLEN-1 + NewBias - LeadingZeros
// Process:
// |XLEN|.0000
// - shifted right by XLEN (XLEN)
// 000000.|XLEN|
// - shift left to normilize (-LeadingZeros)
// 000000.1...
// - shift left 1 to normalize
// 000001.stuff
// - newBias to make the biased exponent
//
// oldexp - biasold - LeadingZeros + newbias
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias};
// select the old exponent
// int -> fp : largest bias + XLEN-1
// fp -> ??? : XExp
assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe;
// calculate CalcExp
// fp -> fp :
// - XExp - Largest bias + new bias - (LeadingZeros+1)
// only do ^ if the input was subnormal
// - convert the expoenent to the final preciaion (Exp - oldBias + newBias)
// - correct the expoent when there is a normalization shift ( + LeadingZeros+1)
// - the plus 1 is built into the leading zeros by counting the leading zeroes in the mantissa rather than the fraction
// fp -> int : XExp - Largest Bias + 1 - (LeadingZeros+1)
// | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp
// process:
// - start
// | `XLEN zeros | Mantissa | 0's if nessisary |
//
// - shift left 1 (1)
// | `XLEN-1 zeros |bit| frac | 0's if nessisary |
// . <- binary point
//
// - shift left till unbiased exponent is 0 (XExp - Largest Bias)
// | 0's | Mantissa | 0's if nessisary |
// | keep |
//
// - if the input is subnormal then we dont shift... so the "- LeadingZeros" is just leftovers from other options
// int -> fp : largest bias + XLEN-1 - Largest bias + new bias - LeadingZeros = XLEN-1 + NewBias - LeadingZeros
// Process:
// |XLEN|.0000
// - shifted right by XLEN (XLEN)
// 000000.|XLEN|
// - shift left to normilize (-LeadingZeros)
// 000000.1...
// - shift left 1 to normalize
// 000001.stuff
// - newBias to make the biased exponent
//
// oldexp - biasold - LeadingZeros + newbias
assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias};
// find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion
assign ResSubnormUf = (~|Ce | Ce[`NE])&~XZero&~IntToFp;
// find if the result is dnormal or underflows
// - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0)
// - can't underflow an integer to Fp conversion
assign ResSubnormUf = (~|Ce | Ce[`NE])&~XZero&~IntToFp;
///////////////////////////////////////////////////////////////////////////
// shifter
///////////////////////////////////////////////////////////////////////////
// kill the shift if it's negitive
// select the amount to shift by
// fp -> int:
// - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
// - don't shift if supposed to shift right (underflowed or Subnorm input)
// subnormal/undeflowed result fp -> fp:
// - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
// ??? -> fp:
// - shift left by LeadingZeros - to shift till the result is normalized
// - only shift fp -> fp if the intital value is subnormal
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
always_comb
if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}};
else if (ResSubnormUf) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0];
else ShiftAmt = LeadingZeros;
///////////////////////////////////////////////////////////////////////////
// shifter
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
// sign
///////////////////////////////////////////////////////////////////////////
// kill the shift if it's negitive
// select the amount to shift by
// fp -> int:
// - shift left by CalcExp - essentially shifting until the unbiased exponent = 0
// - don't shift if supposed to shift right (underflowed or Subnorm input)
// subnormal/undeflowed result fp -> fp:
// - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0
// ??? -> fp:
// - shift left by LeadingZeros - to shift till the result is normalized
// - only shift fp -> fp if the intital value is subnormal
// - this is a problem because the input to the lzc was the fraction rather than the mantissa
// - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true?
always_comb
if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}};
else if (ResSubnormUf) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0];
else ShiftAmt = LeadingZeros;
///////////////////////////////////////////////////////////////////////////
// sign
///////////////////////////////////////////////////////////////////////////
// determine the sign of the result
// - if int -> fp
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed
// - otherwise: the floating point input's sign
always_comb
if(IntToFp)
if(Int64) Cs = Int[`XLEN-1]&Signed;
else Cs = Int[31]&Signed;
else Cs = Xs;
// determine the sign of the result
// - if int -> fp
// - if 64-bit : check the msb of the 64-bit integer input and if it's signed
// - if 32-bit : check the msb of the 32-bit integer input and if it's signed
// - otherwise: the floating point input's sign
always_comb
if(IntToFp)
if(Int64) Cs = Int[`XLEN-1]&Signed;
else Cs = Int[31]&Signed;
else Cs = Xs;
endmodule

View file

@ -29,14 +29,14 @@
`include "wally-config.vh"
module fhazard(
input logic [4:0] Adr1D, Adr2D, Adr3D, // read data adresses
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteE, FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdE, RdM, RdW, // the adress being written to
input logic [1:0] FResSelM, // the result being selected
input logic XEnD, YEnD, ZEnD, // are the inputs needed
output logic FPUStallD, // stall the decode stage
output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value
input logic [4:0] Adr1D, Adr2D, Adr3D, // read data adresses
input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses
input logic FRegWriteE, FRegWriteM, FRegWriteW, // is the fp register being written to
input logic [4:0] RdE, RdM, RdW, // the adress being written to
input logic [1:0] FResSelM, // the result being selected
input logic XEnD, YEnD, ZEnD, // are the inputs needed
output logic FPUStallD, // stall the decode stage
output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value
);
logic MatchDE; // is a value needed in decode stage being worked on in execute stage
@ -73,7 +73,6 @@ module fhazard(
if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM
// if the needed value is in the writeback stage
end else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FResult64W
end
endmodule

View file

@ -29,69 +29,69 @@
`include "wally-config.vh"
module fma(
input logic Xs, Ys, Zs, // input's signs
input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
input logic XZero, YZero, ZZero, // is the input zero
input logic [2:0] OpCtrl, // operation control
output logic ASticky, // sticky bit that is calculated during alignment
output logic [3*`NF+3:0] Sm, // the positive sum's significand
output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A)
output logic As, // the aligned addend's sign (modified Z sign for other opperations)
output logic Ps, // the product's sign
output logic Ss, // the sum's sign
output logic [`NE+1:0] Se, // the sum's exponent
output logic [$clog2(3*`NF+5)-1:0] SCnt // normalization shift count
input logic Xs, Ys, Zs, // input's signs
input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format
input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format
input logic XZero, YZero, ZZero, // is the input zero
input logic [2:0] OpCtrl, // operation control
output logic ASticky, // sticky bit that is calculated during alignment
output logic [3*`NF+3:0] Sm, // the positive sum's significand
output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A)
output logic As, // the aligned addend's sign (modified Z sign for other opperations)
output logic Ps, // the product's sign
output logic Ss, // the sum's sign
output logic [`NE+1:0] Se, // the sum's exponent
output logic [$clog2(3*`NF+5)-1:0] SCnt // normalization shift count
);
// OpCtrl:
// Fma: {not multiply-add?, negate prod?, negate Z?}
// 000 - fmadd
// 001 - fmsub
// 010 - fnmsub
// 011 - fnmadd
// 100 - mul
// 110 - add
// 111 - sub
// OpCtrl:
// Fma: {not multiply-add?, negate prod?, negate Z?}
// 000 - fmadd
// 001 - fmsub
// 010 - fnmsub
// 011 - fnmadd
// 100 - mul
// 110 - add
// 111 - sub
logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format
logic [3*`NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF)
logic [3*`NF+3:0] AmInv; // aligned addend's mantissa possibly inverted
logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf)
logic KillProd; // set the product to zero before addition if the product is too small to matter
logic [`NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format
logic [3*`NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF)
logic [3*`NF+3:0] AmInv; // aligned addend's mantissa possibly inverted
logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf)
logic KillProd; // set the product to zero before addition if the product is too small to matter
logic [`NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
// - If the product is zero then kill the exponent
// - Multiply the mantissas
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Calculate the product
// - When multipliying two fp numbers, add the exponents
// - Subtract the bias (XExp + YExp has two biases, one from each exponent)
// - If the product is zero then kill the exponent
// - Multiply the mantissas
///////////////////////////////////////////////////////////////////////////////
// calculate the product's exponent
fmaexpadd expadd(.Xe, .Ye, .XZero, .YZero, .Pe);
// calculate the product's exponent
fmaexpadd expadd(.Xe, .Ye, .XZero, .YZero, .Pe);
// multiplication of the mantissa's
fmamult mult(.Xm, .Ym, .Pm);
// calculate the signs and take the opperation into account
fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA);
// multiplication of the mantissa's
fmamult mult(.Xm, .Ym, .Pm);
// calculate the signs and take the opperation into account
fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA);
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd);
// ///////////////////////////////////////////////////////////////////////////////
// // Addition/LZA
// ///////////////////////////////////////////////////////////////////////////////
fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd);
// ///////////////////////////////////////////////////////////////////////////////
// // Addition/LZA
// ///////////////////////////////////////////////////////////////////////////////
fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss);
fmalza #(3*`NF+4) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt);
fmalza #(3*`NF+4) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt);
endmodule

View file

@ -29,48 +29,49 @@
`include "wally-config.vh"
module fmaadd(
input logic [3*`NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
input logic [`NE-1:0] Ze, // exponent of Z
input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
input logic [`NE+1:0] Pe, // product's exponet
input logic [2*`NF+1:0] Pm, // the product's mantissa
input logic InvA, // invert the aligned addend
input logic KillProd, // should the product be set to 0
input logic ASticky, // Alighed addend's sticky bit
output logic [3*`NF+3:0] AmInv, // aligned addend possibly inverted
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
output logic Ss, // sum's sign
output logic [`NE+1:0] Se, // sum's exponent
output logic [3*`NF+3:0] Sm // the positive sum
input logic [3*`NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1)
input logic [`NE-1:0] Ze, // exponent of Z
input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations)
input logic [`NE+1:0] Pe, // product's exponet
input logic [2*`NF+1:0] Pm, // the product's mantissa
input logic InvA, // invert the aligned addend
input logic KillProd, // should the product be set to 0
input logic ASticky, // Alighed addend's sticky bit
output logic [3*`NF+3:0] AmInv, // aligned addend possibly inverted
output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed
output logic Ss, // sum's sign
output logic [`NE+1:0] Se, // sum's exponent
output logic [3*`NF+3:0] Sm // the positive sum
);
logic [3*`NF+3:0] PreSum, NegPreSum; // possibly negitive sum
logic NegSum; // was the sum negitive
///////////////////////////////////////////////////////////////////////////////
// Addition
///////////////////////////////////////////////////////////////////////////////
// Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition
assign AmInv = {3*`NF+4{InvA}}^Am;
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign PmKilled = {2*`NF+2{~KillProd}}&Pm;
// Do the addition
// - calculate a positive and negitive sum in parallel
// if there was a small negitive number killed in the alignment stage one needs to be subtracted from the sum
// prod - addend where some of the addend is put into the sticky bit then don't add +1 from negation
// ie ~(InvA&ASticky&~KillProd)&InvA = (~ASticky|KillProd)&InvA
// addend - prod where product is killed (and not exactly zero) then don't add +1 from negation
// ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA
// in this case this result is only ever selected when InvA=1 so we can remove &InvA
assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*`NF+4{1'b0}}, (~ASticky|KillProd)&InvA};
assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 1'b0};
// Choose the positive sum and accompanying LZA result.
assign Sm = NegSum ? NegPreSum : PreSum;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign Ss = NegSum^Ps;
assign Se = KillProd ? {2'b0, Ze} : Pe;
logic [3*`NF+3:0] PreSum, NegPreSum; // possibly negitive sum
logic NegSum; // was the sum negitive
///////////////////////////////////////////////////////////////////////////////
// Addition
///////////////////////////////////////////////////////////////////////////////
// Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition
assign AmInv = {3*`NF+4{InvA}}^Am;
// Kill the product if the product is too small to effect the addition (determined in fma1.sv)
assign PmKilled = {2*`NF+2{~KillProd}}&Pm;
// Do the addition
// - calculate a positive and negitive sum in parallel
// if there was a small negitive number killed in the alignment stage one needs to be subtracted from the sum
// prod - addend where some of the addend is put into the sticky bit then don't add +1 from negation
// ie ~(InvA&ASticky&~KillProd)&InvA = (~ASticky|KillProd)&InvA
// addend - prod where product is killed (and not exactly zero) then don't add +1 from negation
// ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA
// in this case this result is only ever selected when InvA=1 so we can remove &InvA
assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*`NF+4{1'b0}}, (~ASticky|KillProd)&InvA};
assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 1'b0};
// Choose the positive sum and accompanying LZA result.
assign Sm = NegSum ? NegPreSum : PreSum;
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign Ss = NegSum^Ps;
assign Se = KillProd ? {2'b0, Ze} : Pe;
endmodule

View file

@ -30,70 +30,68 @@
`include "wally-config.vh"
module fmaalign(
input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
input logic [`NF:0] Zm, // significand in U(0.NF) format]
input logic XZero, YZero, ZZero,// is the input zero
output logic [3*`NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic ASticky, // Sticky bit calculated from the aliged addend
output logic KillProd // should the product be set to zero
input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format
input logic [`NF:0] Zm, // significand in U(0.NF) format]
input logic XZero, YZero, ZZero,// is the input zero
output logic [3*`NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1)
output logic ASticky, // Sticky bit calculated from the aliged addend
output logic KillProd // should the product be set to zero
);
logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*`NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*`NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic KillZ; // should the addend be killed
logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format
logic [4*`NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1)
logic [4*`NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1)
logic KillZ; // should the addend be killed
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Alignment shifter
///////////////////////////////////////////////////////////////////////////////
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+2) - {2'b0, Ze};
// determine the shift count for alignment
// - negitive means Z is larger, so shift Z left
// - positive means the product is larger, so shift Z right
// This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed
assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+2) - {2'b0, Ze};
// Defualt Addition with only inital left shift
// | 53'b0 | 106'b(product) | 1'b0 |
// | addnend |
assign ZmPreshifted = {Zm,(3*`NF+3)'(0)};
assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(3));
always_comb begin
// If the product is too small to effect the sum, kill the product
// Defualt Addition with only inital left shift
// | 53'b0 | 106'b(product) | 1'b0 |
// | addnend |
// | addnend |
if (KillProd) begin
ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+1)'(0)};
ASticky = ~(XZero|YZero);
assign ZmPreshifted = {Zm,(3*`NF+3)'(0)};
assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero;
assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(3));
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the product to be considered too small
// - The 2 extra bits are needed for rounding
always_comb
begin
// If the product is too small to effect the sum, kill the product
// | 53'b0 | 106'b(product) | 1'b0 |
// | addnend |
end else if (KillZ) begin
ZmShifted = 0;
ASticky = ~ZZero;
// | 53'b0 | 106'b(product) | 1'b0 |
// | addnend |
if (KillProd) begin
ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+1)'(0)};
ASticky = ~(XZero|YZero);
// If the Addend is shifted right
// | 53'b0 | 106'b(product) | 1'b0 |
// | addnend |
end else begin
ZmShifted = ZmPreshifted >> ACnt;
ASticky = |(ZmShifted[`NF-1:0]);
// If the addend is too small to effect the addition
// - The addend has to shift two past the end of the product to be considered too small
// - The 2 extra bits are needed for rounding
// | 53'b0 | 106'b(product) | 1'b0 |
// | addnend |
end else if (KillZ) begin
ZmShifted = 0;
ASticky = ~ZZero;
// If the Addend is shifted right
// | 53'b0 | 106'b(product) | 1'b0 |
// | addnend |
end else begin
ZmShifted = ZmPreshifted >> ACnt;
ASticky = |(ZmShifted[`NF-1:0]);
end
end
end
assign Am = ZmShifted[4*`NF+3:`NF];
assign Am = ZmShifted[4*`NF+3:`NF];
endmodule

View file

@ -29,15 +29,15 @@
`include "wally-config.vh"
module fmaexpadd(
input logic [`NE-1:0] Xe, Ye, // input's exponents
input logic XZero, YZero, // are the inputs zero
output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2
input logic [`NE-1:0] Xe, Ye, // input's exponents
input logic XZero, YZero, // are the inputs zero
output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2
);
logic PZero; // is the product zero
// kill the exponent if the product is zero - either X or Y is 0
assign PZero = XZero | YZero;
assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)});
logic PZero; // is the product zero?
// kill the exponent if the product is zero - either X or Y is 0
assign PZero = XZero | YZero;
assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)});
endmodule

View file

@ -7,6 +7,7 @@
// Purpose: Leading Zero Anticipator
//
// Documentation: RISC-V System on Chip Design Chapter 13 (Figure 13.14)
// See also [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
//
// A component of the CORE-V-WALLY configurable RISC-V project.
//
@ -28,32 +29,33 @@
`include "wally-config.vh"
module fmalza #(WIDTH) ( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001]
input logic [WIDTH-1:0] A, // addend
input logic [2*`NF+1:0] Pm, // product
input logic Cin, // carry in
input logic sub, // subtraction
output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result
);
module fmalza #(WIDTH) (
input logic [WIDTH-1:0] A, // addend
input logic [2*`NF+1:0] Pm, // product
input logic Cin, // carry in
input logic sub, // subtraction
output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result
);
logic [WIDTH:0] F;
logic [WIDTH-1:0] B, P, Guard, K;
logic [WIDTH-1:0] Pp1, Gm1, Km1;
logic [WIDTH:0] F; // most significant bit of F indicates leading digit
logic [WIDTH-1:0] B; // zero-extended product with same size as aligned A
logic [WIDTH-1:0] P, G, K; // propagate, generate, kill for each column
logic [WIDTH-1:0] Pp1, Gm1, Km1; // propagate shifted right by 1, generate/kill shifted left 1
assign B = {{(`NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product
assign B = {{(`NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product
assign P = A^B;
assign Guard = A&B;
assign K= ~A&~B;
assign P = A^B;
assign G = A&B;
assign K= ~A&~B;
assign Pp1 = {sub, P[WIDTH-1:1]};
assign Gm1 = {Guard[WIDTH-2:0], Cin};
assign Km1 = {K[WIDTH-2:0], ~Cin};
// Apply function to determine Leading pattern
// - note: the paper linked above uses the numbering system where 0 is the most significant bit
assign F[WIDTH] = ~sub&P[WIDTH-1];
assign F[WIDTH-1:0] = (Pp1&(Guard&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | Guard&~Gm1));
assign Pp1 = {sub, P[WIDTH-1:1]}; // shift P right by 1 (for P_i+1) , use subtract flag in most significant bit
assign Gm1 = {G[WIDTH-2:0], Cin}; // shift G left by 1 (for G_i-1) and bring in Cin
assign Km1 = {K[WIDTH-2:0], ~Cin}; // shift K left by 1 (for K_i-1) and bring in Cin
// Apply function to determine Leading pattern
// - note: Schmookler01 uses the numbering system where 0 is the most significant bit
assign F[WIDTH] = ~sub&P[WIDTH-1];
assign F[WIDTH-1:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1));
lzc #(WIDTH+1) lzc (.num(F), .ZeroCnt(SCnt));
lzc #(WIDTH+1) lzc (.num(F), .ZeroCnt(SCnt));
endmodule

View file

@ -29,9 +29,10 @@
`include "wally-config.vh"
module fmamult(
input logic [`NF:0] Xm, Ym, // x and y significand
output logic [2*`NF+1:0] Pm // product's significand
input logic [`NF:0] Xm, Ym, // x and y significand
output logic [2*`NF+1:0] Pm // product's significand
);
assign Pm = Xm * Ym;
assign Pm = Xm * Ym;
endmodule

View file

@ -29,19 +29,14 @@
`include "wally-config.vh"
module fmasign(
input logic [2:0] OpCtrl, // opperation contol
input logic Xs, Ys, Zs, // sign of the inputs
output logic Ps, // the product's sign - takes opperation into account
output logic As, // aligned addend sign used in fma - takes opperation into account
output logic InvA // Effective subtraction: invert addend
input logic [2:0] OpCtrl, // opperation contol
input logic Xs, Ys, Zs, // sign of the inputs
output logic Ps, // the product's sign - takes opperation into account
output logic As, // aligned addend sign used in fma - takes opperation into account
output logic InvA // Effective subtraction: invert addend
);
// Calculate the product's sign
// Negate product's sign if FNMADD or FNMSUB
// flip is negation opperation
assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]);
// flip addend sign for subtraction
assign As = Zs^OpCtrl[0];
// Effective subtraction when product and addend have opposite signs
assign InvA = As ^ Ps;
assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]); // product sign. Negate for FMNADD or FNMSUB
assign As = Zs^OpCtrl[0]; // flip addend sign for subtraction
assign InvA = As ^ Ps; // Effective subtraction when product and addend have opposite signs
endmodule

View file

@ -29,327 +29,327 @@
`include "wally-config.vh"
module fpu (
input logic clk,
input logic reset,
// Hazards
input logic StallE, StallM, StallW, // stall signals (from HZU)
input logic FlushE, FlushM, FlushW, // flush signals (from HZU)
output logic FPUStallD, // Stall the decode stage (To HZU)
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
// CSRs
input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit)
input logic [2:0] FRM_REGW, // Rounding mode (from CSR)
// Decode stage
input logic [31:0] InstrD, // instruction (from IFU)
// Execute stage
input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations
input logic IntDivE, W64E, // Integer division on FPU
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
input logic [4:0] RdE, // which FP register to write to (from IEU)
output logic FWriteIntE, // integer register write enable (to IEU)
output logic FCvtIntE, // Convert to int (to IEU)
// Memory stage
input logic [2:0] Funct3M, // Funct fields of instruction specify type of operations
input logic [4:0] RdM, // which FP register to write to (from IEU)
output logic FRegWriteM, // FP register write enable (to privileged unit)
output logic FpLoadStoreM, // Fp load instruction? (to LSU)
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU)
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit)
output logic [4:0] SetFflagsM, // FPU flags (to privileged unit)
// Writeback stage
input logic [4:0] RdW, // which FP register to write to (from IEU)
input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
output logic FCvtIntW, // select FCvtIntRes (to IEU)
output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU)
input logic clk,
input logic reset,
// Hazards
input logic StallE, StallM, StallW, // stall signals (from HZU)
input logic FlushE, FlushM, FlushW, // flush signals (from HZU)
output logic FPUStallD, // Stall the decode stage (To HZU)
output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU)
// CSRs
input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit)
input logic [2:0] FRM_REGW, // Rounding mode (from CSR)
// Decode stage
input logic [31:0] InstrD, // instruction (from IFU)
// Execute stage
input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations
input logic IntDivE, W64E, // Integer division on FPU
input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU)
input logic [4:0] RdE, // which FP register to write to (from IEU)
output logic FWriteIntE, // integer register write enable (to IEU)
output logic FCvtIntE, // Convert to int (to IEU)
// Memory stage
input logic [2:0] Funct3M, // Funct fields of instruction specify type of operations
input logic [4:0] RdM, // which FP register to write to (from IEU)
output logic FRegWriteM, // FP register write enable (to privileged unit)
output logic FpLoadStoreM, // Fp load instruction? (to LSU)
output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU)
output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU)
output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit)
output logic [4:0] SetFflagsM, // FPU flags (to privileged unit)
// Writeback stage
input logic [4:0] RdW, // which FP register to write to (from IEU)
input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU)
output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU)
output logic FCvtIntW, // select FCvtIntRes (to IEU)
output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU)
);
// RISC-V FPU specifics:
// - multiprecision support uses NAN-boxing, putting 1's in unused msbs
// - RISC-V detects underflow after rounding
// RISC-V FPU specifics:
// - multiprecision support uses NAN-boxing, putting 1's in unused msbs
// - RISC-V detects underflow after rounding
// control signals
logic FRegWriteW; // FP register write enable
logic [2:0] FrmM; // FP rounding mode
logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
logic FDivStartE, IDivStartE; // Start division or squareroot
logic FWriteIntM; // Write to integer register
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelE, FResSelM, FResSelW; // Select one of the results that finish in the memory stage
logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit
logic [4:0] Adr1D, Adr2D, Adr3D; // register adresses of each input
logic [4:0] Adr1E, Adr2E, Adr3E; // register adresses of each input
logic XEnD, YEnD, ZEnD; // X, Y, Z inputs used for current operation
logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation
logic FRegWriteE; // Write floating-point register
// control signals
logic FRegWriteW; // FP register write enable
logic [2:0] FrmM; // FP rounding mode
logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double
logic FDivStartE, IDivStartE; // Start division or squareroot
logic FWriteIntM; // Write to integer register
logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals
logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component
logic [1:0] FResSelE, FResSelM, FResSelW; // Select one of the results that finish in the memory stage
logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit
logic [4:0] Adr1D, Adr2D, Adr3D; // register adresses of each input
logic [4:0] Adr1E, Adr2E, Adr3E; // register adresses of each input
logic XEnD, YEnD, ZEnD; // X, Y, Z inputs used for current operation
logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation
logic FRegWriteE; // Write floating-point register
// regfile signals
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
// regfile signals
logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage
logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage
logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding)
logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding)
logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding)
logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding)
// unpacking signals
logic XsE, YsE, ZsE; // input's sign - execute stage
logic XsM, YsM; // input's sign - memory stage
logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
logic [`NE-1:0] ZeM; // input's exponent - memory stage
logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage
logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XSubnormE; // is the input subnormal
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM; // is the input zero - memory stage
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XExpMaxE; // is the exponent all ones (max value)
// unpacking signals
logic XsE, YsE, ZsE; // input's sign - execute stage
logic XsM, YsM; // input's sign - memory stage
logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage
logic [`NE-1:0] ZeM; // input's exponent - memory stage
logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage
logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage
logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage
logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage
logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage
logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage
logic XSubnormE; // is the input subnormal
logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage
logic XZeroM, YZeroM; // is the input zero - memory stage
logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage
logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage
logic XExpMaxE; // is the exponent all ones (max value)
// Fma Signals
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying
logic [3*`NF+3:0] SmE, SmM; // Sum significand
logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output
logic [`NE+1:0] SeE,SeM; // Sum exponent
logic InvAE, InvAM; // Invert addend
logic AsE, AsM; // Addend sign
logic PsE, PsM; // Product sign
logic SsE, SsM; // Sum sign
logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count
// Cvt Signals
logic [`NE:0] CeE, CeM; // convert intermediate expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal
logic CsE, CsM; // convert result sign
logic IntZeroE, IntZeroM; // is the integer zero?
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
// divide signals
logic [`DIVb:0] QmM; // fdivsqrt signifcand
logic [`NE+1:0] QeM; // fdivsqrt exponent
logic DivStickyM; // fdivsqrt sticky bit
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
// Fma Signals
logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting
logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying
logic [3*`NF+3:0] SmE, SmM; // Sum significand
logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output
logic [`NE+1:0] SeE,SeM; // Sum exponent
logic InvAE, InvAM; // Invert addend
logic AsE, AsM; // Addend sign
logic PsE, PsM; // Product sign
logic SsE, SsM; // Sum sign
logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count
// Cvt Signals
logic [`NE:0] CeE, CeM; // convert intermediate expoent
logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by
logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal
logic CsE, CsM; // convert result sign
logic IntZeroE, IntZeroM; // is the integer zero?
logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder)
logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU)
// divide signals
logic [`DIVb:0] QmM; // fdivsqrt signifcand
logic [`NE+1:0] QeM; // fdivsqrt exponent
logic DivStickyM; // fdivsqrt sticky bit
logic FDivDoneE, IFDivStartE; // fdivsqrt control signals
logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU)
// result and flag signals
logic [`XLEN-1:0] ClassResE; // classify result
logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max)
logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le)
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
logic [`FLEN-1:0] PostProcResM; // Postprocessor output
logic [4:0] PostProcFlgM; // Postprocessor flags
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register
// result and flag signals
logic [`XLEN-1:0] ClassResE; // classify result
logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max)
logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le)
logic CmpNVE; // compare invalid flag (Not Valid)
logic [`FLEN-1:0] SgnResE; // sign injection result
logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move)
logic [`FLEN-1:0] PostProcResM; // Postprocessor output
logic [4:0] PostProcFlgM; // Postprocessor flags
logic PreNVE, PreNVM; // selected flag that is ready in the memory stage
logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result
logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage
logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register
// other signals
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed
logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt
logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
// other signals
logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv
logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed
logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed
logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt
logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer
//////////////////////////////////////////////////////////////////////////////////////////
// Decode Stage: fctrl decoder, read register file
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
// Decode Stage: fctrl decoder, read register file
//////////////////////////////////////////////////////////////////////////////////////////
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
.Funct3E, .IntDivE, .InstrD,
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
.reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
.FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM,
.IllegalFPUInstrM, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE,
.FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW,
.Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E);
// calculate FP control signals
fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]),
.Funct3E, .IntDivE, .InstrD,
.StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE,
.reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM,
.FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM,
.IllegalFPUInstrM, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE,
.FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW,
.Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E);
// FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]),
.a4(RdW), .wd4(FResultW),
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
// FP register file
fregfile fregfile (.clk, .reset, .we4(FRegWriteW),
.a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]),
.a4(RdW), .wd4(FResultW),
.rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D));
// D/E pipeline registers
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
// D/E pipeline registers
flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E);
flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E);
flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E);
//////////////////////////////////////////////////////////////////////////////////////////
// Execute Stage: hazards, forwarding, unpacking, execution units
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
// Execute Stage: hazards, forwarding, unpacking, execution units
//////////////////////////////////////////////////////////////////////////////////////////
// Hazard unit for FPU: determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E,
.FRegWriteE, .FRegWriteM, .FRegWriteW, .RdE, .RdM, .RdW, .FResSelM,
.XEnD, .YEnD, .ZEnD, .FPUStallD, .ForwardXE, .ForwardYE, .ForwardZE);
// Hazard unit for FPU: determines if any forwarding or stalls are needed
fhazard fhazard(.Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E,
.FRegWriteE, .FRegWriteM, .FRegWriteW, .RdE, .RdM, .RdW, .FResSelM,
.XEnD, .YEnD, .ZEnD, .FPUStallD, .ForwardXE, .ForwardYE, .ForwardZE);
// forwarding muxs
mux3 #(`FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE);
mux3 #(`FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE);
mux3 #(`FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);
// forwarding muxs
mux3 #(`FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE);
mux3 #(`FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE);
mux3 #(`FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE);
// Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z
generate
if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)};
else if(`FPSIZES == 2)
mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)},
{{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)},
{{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)},
{2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
endgenerate
assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10);
mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
// Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z
// For add and subtract, Z comes from second source operand
generate
if(`FPSIZES == 1) assign BoxedZeroE = 0;
else if(`FPSIZES == 2)
mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}},
{{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}},
{{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}},
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
endgenerate
assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]};
mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);
// Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z
generate
if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)};
else if(`FPSIZES == 2)
mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)},
{{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)},
{{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)},
{2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes
endgenerate
assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10);
mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract
// Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z
// For add and subtract, Z comes from second source operand
generate
if(`FPSIZES == 1) assign BoxedZeroE = 0;
else if(`FPSIZES == 2)
mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}},
{{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}},
{{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}},
(`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes
endgenerate
assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]};
mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE);
// unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity
unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
.Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE),
.XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE),
.YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE),
.ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE));
// fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub
fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .OpCtrl(OpCtrlE),
.As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE));
// unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity
unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE),
.Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE),
.XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE),
.YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE),
.ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE));
// fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub
fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE),
.XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .OpCtrl(OpCtrlE),
.As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE));
// divide and square root: fdiv, fsqrt, optionally integer division
fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
.QmM, .FIntDivResultM);
// divide and square root: fdiv, fsqrt, optionally integer division
fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]),
.XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE,
.ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E,
.StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM,
.QmM, .FIntDivResultM);
// compare: fmin/fmax, flt/fle/feq
fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
.Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE),
.XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE),
.CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
// compare: fmin/fmax, flt/fle/feq
fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE),
.Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE),
.XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE),
.CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE));
// sign injection: fsgnj/fsgnjx/fsgnjn
fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE));
// sign injection: fsgnj/fsgnjx/fsgnjn
fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE));
// classify: fclass
fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE),
.XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
// classify: fclass
fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE),
.XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE));
// convert: fcvt.*.*
fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
.ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE),
.ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE));
// convert: fcvt.*.*
fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE),
.ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE),
.ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE));
// NaN Box SrcA to convert integer to requested FP size
generate
if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE};
else if(`FPSIZES == 2)
mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ForwardedSrcAE[`S_LEN-1:0]},
{{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]},
{{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]},
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
endgenerate
// NaN Box SrcA to convert integer to requested FP size
generate
if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE};
else if(`FPSIZES == 2)
mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE);
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ForwardedSrcAE[`S_LEN-1:0]},
{{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]},
{{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]},
{{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes
endgenerate
// select a result that may be written to the FP register
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
// select a result that may be written to the FP register
mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE);
assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE);
// select the result that may be written to the integer register - to IEU
generate
if(`FPSIZES == 1)
assign SgnExtXE = XE;
else if(`FPSIZES == 2)
mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{XsE}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE);
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{XsE}}, XE[`H_LEN-1:0]},
{{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]},
{{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]},
XE, FmtE, SgnExtXE);
endgenerate
if (`FLEN>`XLEN)
assign IntSrcXE = SgnExtXE[`XLEN-1:0];
else
assign IntSrcXE = {{`XLEN-`FLEN{XsE}}, SgnExtXE};
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
// select the result that may be written to the integer register - to IEU
generate
if(`FPSIZES == 1)
assign SgnExtXE = XE;
else if(`FPSIZES == 2)
mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{XsE}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE);
else if(`FPSIZES == 3 | `FPSIZES == 4)
mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{XsE}}, XE[`H_LEN-1:0]},
{{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]},
{{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]},
XE, FmtE, SgnExtXE);
endgenerate
if (`FLEN>`XLEN)
assign IntSrcXE = SgnExtXE[`XLEN-1:0];
else
assign IntSrcXE = {{`XLEN-`FLEN{XsE}}, SgnExtXE};
mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE);
// E/M pipe registers
// E/M pipe registers
// Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources
assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE);
// Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources
assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE);
flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM,
{XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
{FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
{FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE},
{CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM});
flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);
flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM);
flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM);
flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM});
flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM);
flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM);
flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM,
{XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE},
{XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM});
flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM);
flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM);
flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM,
{FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE},
{FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM});
flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM,
{CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE},
{CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM});
flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM);
//////////////////////////////////////////////////////////////////////////////////////////
// Memory Stage: postprocessor and result muxes
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
// Memory Stage: postprocessor and result muxes
//////////////////////////////////////////////////////////////////////////////////////////
postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM),
.FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM),
.ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM),
.PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM),
.FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM),
.ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM),
.FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM),
.CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM),
.ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM),
.PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM));
// FPU flag selection - to privileged
mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
mux2 #(`FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
// FPU flag selection - to privileged
mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM);
mux2 #(`FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM);
// M/W pipe registers
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW);
// M/W pipe registers
flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW);
flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW);
flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW);
//////////////////////////////////////////////////////////////////////////////////////////
// Writeback Stage: result mux
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////
// Writeback Stage: result mux
//////////////////////////////////////////////////////////////////////////////////////////
// select the result to be written to the FP register
mux2 #(`FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW);
// select the result to be written to the FP register
mux2 #(`FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW);
endmodule // fpu

View file

@ -50,33 +50,29 @@ module fsgninj (
// - uses NaN-blocking format
// - if there are any unsused bits the most significant bits are filled with 1s
if (`FPSIZES == 1)
if (`FPSIZES == 1)
assign SgnRes = {ResSgn, X[`FLEN-2:0]};
else if (`FPSIZES == 2)
else if (`FPSIZES == 2)
assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]};
else if (`FPSIZES == 3) begin
else if (`FPSIZES == 3) begin
logic [2:0] SgnBits;
always_comb
case (Fmt)
`FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
`FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
`FMT2: SgnBits = {2'b11, ResSgn};
default: SgnBits = {3{1'bx}};
endcase
always_comb
case (Fmt)
`FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]};
`FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]};
`FMT2: SgnBits = {2'b11, ResSgn};
default: SgnBits = {3{1'bx}};
endcase
assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]};
end else if (`FPSIZES == 4) begin
logic [3:0] SgnBits;
always_comb
case (Fmt)
`Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
`D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
`S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
`H_FMT: SgnBits = {3'b111, ResSgn};
endcase
always_comb
case (Fmt)
`Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]};
`D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]};
`S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]};
`H_FMT: SgnBits = {3'b111, ResSgn};
endcase
assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]};
end

View file

@ -29,79 +29,79 @@
`include "wally-config.vh"
module cvtshiftcalc(
input logic XZero, // is the input zero?
input logic ToInt, // to integer conversion?
input logic IntToFp, // interger to floating point conversion?
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`NE:0] CvtCe, // the calculated expoent
input logic [`NF:0] Xm, // input mantissas
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
input logic CvtResSubnormUf, // is the conversion result subnormal or underlows
output logic CvtResUf, // does the cvt result unerflow
output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted
input logic XZero, // is the input zero?
input logic ToInt, // to integer conversion?
input logic IntToFp, // interger to floating point conversion?
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [`NE:0] CvtCe, // the calculated expoent
input logic [`NF:0] Xm, // input mantissas
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
input logic CvtResSubnormUf, // is the conversion result subnormal or underlows
output logic CvtResUf, // does the cvt result unerflow
output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted
);
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF)
///////////////////////////////////////////////////////////////////////////
// shifter
///////////////////////////////////////////////////////////////////////////
// seclect the input to the shifter
// fp -> int:
// | `XLEN zeros | mantissa | 0's if nessisary |
// .
// Other problems:
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
// - ex: for the case 0010000.... (double)
// ??? -> fp:
// - if result is subnormal or underflowed then we want to shift right i.e. shift right then shift left:
// | `NF-1 zeros | mantissa | 0's if nessisary |
// .
// - otherwise:
// | LzcInM | 0's if nessisary |
// .
// change to int shift to the left one
always_comb
// get rid of round bit if needed
// | add sticky bit if needed
// | |
if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}};
else if (CvtResSubnormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}};
else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}};
// choose the negative of the fraction size
if (`FPSIZES == 1) begin
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
end else if (`FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
default: ResNegNF = 1'bx;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
endcase
end
///////////////////////////////////////////////////////////////////////////
// shifter
///////////////////////////////////////////////////////////////////////////
// seclect the input to the shifter
// fp -> int:
// | `XLEN zeros | mantissa | 0's if nessisary |
// .
// Other problems:
// - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding)
// - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1
// - ex: for the case 0010000.... (double)
// ??? -> fp:
// - if result is subnormal or underflowed then we want to shift right i.e. shift right then shift left:
// | `NF-1 zeros | mantissa | 0's if nessisary |
// .
// - otherwise:
// | LzcInM | 0's if nessisary |
// .
// change to int shift to the left one
always_comb
// get rid of round bit if needed
// | add sticky bit if needed
// | |
if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}};
else if (CvtResSubnormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}};
else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}};
// choose the negative of the fraction size
if (`FPSIZES == 1) begin
assign ResNegNF = -($clog2(`NF)+1)'(`NF);
end else if (`FPSIZES == 2) begin
assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1);
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResNegNF = -($clog2(`NF)+1)'(`NF);
`FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1);
`FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2);
default: ResNegNF = 1'bx;
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF);
2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF);
2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF);
2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF);
endcase
end
// determine if the result underflows ??? -> fp
// - if the first 1 is shifted out of the result then the result underflows
// - can't underflow an integer to fp conversions
assign CvtResUf = ($signed(CvtCe) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp;
// determine if the result underflows ??? -> fp
// - if the first 1 is shifted out of the result then the result underflows
// - can't underflow an integer to fp conversions
assign CvtResUf = ($signed(CvtCe) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp;
endmodule

View file

@ -29,45 +29,46 @@
`include "wally-config.vh"
module divshiftcalc(
input logic [`DIVb:0] DivQm, // divsqrt significand
input logic [`NE+1:0] DivQe, // divsqrt exponent
output logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount
output logic [`NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input
output logic DivResSubnorm, // is the divsqrt result subnormal
output logic DivSubnormShiftPos // is the subnormal shift amount positive
input logic [`DIVb:0] DivQm, // divsqrt significand
input logic [`NE+1:0] DivQe, // divsqrt exponent
output logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount
output logic [`NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input
output logic DivResSubnorm, // is the divsqrt result subnormal
output logic DivSubnormShiftPos // is the subnormal shift amount positive
);
logic [`LOGNORMSHIFTSZ-1:0] NormShift; // normalized result shift amount
logic [`LOGNORMSHIFTSZ-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive)
logic [`NE+1:0] DivSubnormShift; // subnormal result shift amount
// is the result subnormal
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
assign DivResSubnorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
// if the result is subnormal
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivQe+NF+1-1
assign DivSubnormShift = (`NE+2)'(`NF)+DivQe;
assign DivSubnormShiftPos = ~DivSubnormShift[`NE+1];
logic [`LOGNORMSHIFTSZ-1:0] NormShift; // normalized result shift amount
logic [`LOGNORMSHIFTSZ-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive)
logic [`NE+1:0] DivSubnormShift; // subnormal result shift amount
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// 00000000.xxxxxxx... << NF Exp = DivQe+1
// 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
// inital Left shift amount = NF
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
assign NormShift = (`LOGNORMSHIFTSZ)'(`NF);
// is the result subnormal
// if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes
assign DivResSubnorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]);
// if the shift amount is negitive then don't shift (keep sticky bit)
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[`LOGNORMSHIFTSZ-1:0] : '0;
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
// if the result is subnormal
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1
// .0000xxxxxxxxxxx... >> 1 Exp = 1
// Left shift amount = DivQe+NF+1-1
assign DivSubnormShift = (`NE+2)'(`NF)+DivQe;
assign DivSubnormShiftPos = ~DivSubnormShift[`NE+1];
// pre-shift the divider result for normalization
assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb-1-`NF{1'b0}}};
// if the result is normalized
// 00000000x.xxxxxx... Exp = DivQe
// .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1
// 00000000.xxxxxxx... << NF Exp = DivQe+1
// 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards)
// 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after)
// inital Left shift amount = NF
// shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit
assign NormShift = (`LOGNORMSHIFTSZ)'(`NF);
// if the shift amount is negitive then don't shift (keep sticky bit)
// need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES)
assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[`LOGNORMSHIFTSZ-1:0] : '0;
assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift;
// pre-shift the divider result for normalization
assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb-1-`NF{1'b0}}};
endmodule

View file

@ -28,185 +28,186 @@
`include "wally-config.vh"
module flags(
input logic Xs, // X sign
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn, // is a Inf input being used
input logic XInf, YInf, ZInf, // inputs are infinity
input logic NaNIn, // is a NaN input being used
input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs
input logic XZero, YZero, // inputs are zero
input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
input logic [`NE+1:0] Me, // exponent of the normalized sum
// rounding
input logic Plus1, // do you add one for rounding
input logic Round, Guard, Sticky, // bits used to determine rounding
input logic UfPlus1, // do you add one for rounding for the unbounded exponent result
// convert
input logic CvtOp, // conversion opperation?
input logic ToInt, // convert to integer
input logic IntToFp, // convert integer to floating point
input logic Int64, // convert to 64 bit integer
input logic Signed, // convert to a signed integer
input logic [`NE:0] CvtCe, // the calculated expoent - Cvt
input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits
// divsqrt
input logic DivOp, // conversion opperation?
input logic Sqrt, // Sqrt?
// fma
input logic FmaOp, // Fma opperation?
input logic FmaAs, FmaPs, // the product and modified Z signs
// flags
output logic DivByZero, // divide by zero flag
output logic Overflow, // overflow flag to select result
output logic Invalid, // invalid flag to select the result
output logic IntInvalid, // invalid integer result to select
output logic [4:0] PostProcFlg // flags
input logic Xs, // X sign
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn, // is a Inf input being used
input logic XInf, YInf, ZInf, // inputs are infinity
input logic NaNIn, // is a NaN input being used
input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs
input logic XZero, YZero, // inputs are zero
input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
input logic [`NE+1:0] Me, // exponent of the normalized sum
// rounding
input logic Plus1, // do you add one for rounding
input logic Round, Guard, Sticky, // bits used to determine rounding
input logic UfPlus1, // do you add one for rounding for the unbounded exponent result
// convert
input logic CvtOp, // conversion opperation?
input logic ToInt, // convert to integer
input logic IntToFp, // convert integer to floating point
input logic Int64, // convert to 64 bit integer
input logic Signed, // convert to a signed integer
input logic [`NE:0] CvtCe, // the calculated expoent - Cvt
input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits
// divsqrt
input logic DivOp, // conversion opperation?
input logic Sqrt, // Sqrt?
// fma
input logic FmaOp, // Fma opperation?
input logic FmaAs, FmaPs, // the product and modified Z signs
// flags
output logic DivByZero, // divide by zero flag
output logic Overflow, // overflow flag to select result
output logic Invalid, // invalid flag to select the result
output logic IntInvalid, // invalid integer result to select
output logic [4:0] PostProcFlg // flags
);
logic SigNaN; // is an input a signaling NaN
logic Inexact; // final inexact flag
logic FpInexact; // floating point inexact flag
logic IntInexact; // integer inexact flag
logic FmaInvalid; // integer invalid flag
logic DivInvalid; // integer invalid flag
logic Underflow; // Underflow flag
logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
logic ShiftGtIntSz; // is the shift greater than the the integer size (use Re to account for possible roundning "shift")
///////////////////////////////////////////////////////////////////////////////
// Overflow
///////////////////////////////////////////////////////////////////////////////
logic SigNaN; // is an input a signaling NaN
logic Inexact; // final inexact flag
logic FpInexact; // floating point inexact flag
logic IntInexact; // integer inexact flag
logic FmaInvalid; // integer invalid flag
logic DivInvalid; // integer invalid flag
logic Underflow; // Underflow flag
logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent
logic ShiftGtIntSz; // is the shift greater than the the integer size (use Re to account for possible roundning "shift")
// determine if the result exponent is greater than or equal to the maximum exponent or
// the shift amount is greater than the integers size (for cvt to int)
// ShiftGtIntSz calculation:
// a left shift of intlen+1 is still in range but any more than that is an overflow
// inital: | 64 0's | XLEN |
// | 64 0's | XLEN | << 64
// | XLEN | 00000... |
// 65 = ...0 0 0 0 0 1 0 0 0 0 0 1
// | or | | or |
// 33 = ...0 0 0 0 0 0 1 0 0 0 0 1
// | or | | or |
// larger or equal if:
// - any of the bits after the most significan 1 is one
// - the most signifcant in 65 or 33 is still a one in the number and
// one of the later bits is one
if (`FPSIZES == 1) begin
assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
///////////////////////////////////////////////////////////////////////////////
// Overflow
///////////////////////////////////////////////////////////////////////////////
end else if (`FPSIZES == 2) begin
assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
// determine if the result exponent is greater than or equal to the maximum exponent or
// the shift amount is greater than the integers size (for cvt to int)
// ShiftGtIntSz calculation:
// a left shift of intlen+1 is still in range but any more than that is an overflow
// inital: | 64 0's | XLEN |
// | 64 0's | XLEN | << 64
// | XLEN | 00000... |
// 65 = ...0 0 0 0 0 1 0 0 0 0 0 1
// | or | | or |
// 33 = ...0 0 0 0 0 0 1 0 0 0 0 1
// | or | | or |
// larger or equal if:
// - any of the bits after the most significan 1 is one
// - the most signifcant in 65 or 33 is still a one in the number and
// one of the later bits is one
if (`FPSIZES == 1) begin
assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
`FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
`FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
default: ResExpGteMax = 1'bx;
endcase
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 2) begin
assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
`Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
`D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
`S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
`H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
endcase
assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE];
`FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]);
`FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]);
default: ResExpGteMax = 1'bx;
endcase
assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
`Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE];
`D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]);
`S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]);
`H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]);
endcase
assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64));
end
// calulate overflow flag:
// if the result is greater than or equal to the max exponent(not taking into account sign)
// | and the exponent isn't negitive
// | | if the input isnt infinity or NaN
// | | |
assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
// calulate overflow flag:
// if the result is greater than or equal to the max exponent(not taking into account sign)
// | and the exponent isn't negitive
// | | if the input isnt infinity or NaN
// | | |
assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero);
///////////////////////////////////////////////////////////////////////////////
// Underflow
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Underflow
///////////////////////////////////////////////////////////////////////////////
// calculate underflow flag: detecting tininess after rounding
// the exponent is negitive
// | the result is subnormal
// | | the result is normal and rounded from a Subnorm
// | | | and if given an unbounded exponent the result does not round
// | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN
// | | | | | |
assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
// calculate underflow flag: detecting tininess after rounding
// the exponent is negitive
// | the result is subnormal
// | | the result is normal and rounded from a Subnorm
// | | | and if given an unbounded exponent the result does not round
// | | | | and if the result is not exact
// | | | | | and if the input isnt infinity or NaN
// | | | | | |
assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid);
///////////////////////////////////////////////////////////////////////////////
// Inexact
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Inexact
///////////////////////////////////////////////////////////////////////////////
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed res isn't outputed
assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid);
//assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero);
// Set Inexact flag if the result is diffrent from what would be outputed given infinite precision
// - Don't set the underflow flag if an underflowed res isn't outputed
assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid);
//assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero);
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)
// | |
assign IntInexact = ((CvtCe[`NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid;
// if the res is too small to be represented and not 0
// | and if the res is not invalid (outside the integer bounds)
// | |
assign IntInexact = ((CvtCe[`NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid;
// select the inexact flag to output
assign Inexact = ToInt ? IntInexact : FpInexact;
// select the inexact flag to output
assign Inexact = ToInt ? IntInexact : FpInexact;
///////////////////////////////////////////////////////////////////////////////
// Invalid
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Invalid
///////////////////////////////////////////////////////////////////////////////
// Set Invalid flag for following cases:
// 1) any input is a signaling NaN
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
// Set Invalid flag for following cases:
// 1) any input is a signaling NaN
// 2) Inf - Inf (unless x or y is NaN)
// 3) 0 * Inf
// invalid flag for integer result
// if the input is NaN or infinity
// | if the integer res overflows (out of range)
// | | if the input was negitive but ouputing to a unsigned number
// | | | the res doesn't round to zero
// | | | | or the res rounds up out of bounds
// | | | | and the res didn't underflow
// | | | | |
assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
// |
// or when the positive res rounds up out of range
assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp);
// invalid flag for fma
assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~NaNIn) | (XZero & YInf) | (YZero & XInf);
//invalid flag for division
assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero);
// invalid flag for integer result
// if the input is NaN or infinity
// | if the integer res overflows (out of range)
// | | if the input was negitive but ouputing to a unsigned number
// | | | the res doesn't round to zero
// | | | | or the res rounds up out of bounds
// | | | | and the res didn't underflow
// | | | | |
assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]);
// |
// or when the positive res rounds up out of range
assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp);
// invalid flag for fma
assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~NaNIn) | (XZero & YInf) | (YZero & XInf);
//invalid flag for division
assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero);
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp);
///////////////////////////////////////////////////////////////////////////////
// Divide by Zero
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Divide by Zero
///////////////////////////////////////////////////////////////////////////////
// if dividing by zero and not 0/0
// - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn);
// if dividing by zero and not 0/0
// - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator)
assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn);
///////////////////////////////////////////////////////////////////////////////
// final flags
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// final flags
///////////////////////////////////////////////////////////////////////////////
// Combine flags
// - to integer results do not set the underflow or overflow flags
assign PostProcFlg = {Invalid|(IntInvalid&CvtOp&ToInt), DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact};
// Combine flags
// - to integer results do not set the underflow or overflow flags
assign PostProcFlg = {Invalid|(IntInvalid&CvtOp&ToInt), DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact};
endmodule

View file

@ -29,119 +29,109 @@
`include "wally-config.vh"
module fmashiftcalc(
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [`NE+1:0] FmaSe, // sum's exponent
input logic [3*`NF+3:0] FmaSm, // the positive sum
input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // normalization shift count
output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
output logic FmaSZero, // is the result subnormal - calculated before LZA corection
output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
output logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*`NF+5:0] FmaShiftIn // is the sum zero
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [`NE+1:0] FmaSe, // sum's exponent
input logic [3*`NF+3:0] FmaSm, // the positive sum
input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // normalization shift count
output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
output logic FmaSZero, // is the result subnormal - calculated before LZA corection
output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
output logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt, // normalization shift count
output logic [3*`NF+5:0] FmaShiftIn // is the sum zero
);
logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias
logic [`NE+1:0] BiasCorr; // correction for bias
logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias
logic [`NE+1:0] BiasCorr; // correction for bias
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
// Determine if the sum is zero
assign FmaSZero = ~(|FmaSm);
// Determine if the sum is zero
assign FmaSZero = ~(|FmaSm);
// calculate the sum's exponent
assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+5)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3);
//convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin
assign NormSumExp = PreNormSumExp;
end else if (`FPSIZES == 2) begin
assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS);
assign NormSumExp = PreNormSumExp+BiasCorr;
end else if (`FPSIZES == 3) begin
always_comb begin
case (Fmt)
`FMT: BiasCorr = '0;
`FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS);
`FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS);
default: BiasCorr = 'x;
endcase
end
assign NormSumExp = PreNormSumExp+BiasCorr;
end else if (`FPSIZES == 4) begin
always_comb begin
case (Fmt)
2'h3: BiasCorr = '0;
2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS);
2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS);
2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS);
endcase
end
assign NormSumExp = PreNormSumExp+BiasCorr;
// calculate the sum's exponent
assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+5)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3);
//convert the sum's exponent into the proper percision
if (`FPSIZES == 1) begin
assign NormSumExp = PreNormSumExp;
end else if (`FPSIZES == 2) begin
assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS);
assign NormSumExp = PreNormSumExp+BiasCorr;
end else if (`FPSIZES == 3) begin
always_comb begin
case (Fmt)
`FMT: BiasCorr = '0;
`FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS);
`FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS);
default: BiasCorr = 'x;
endcase
end
// determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen) & ~FmaSZero
if (`FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2));
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp;
always_comb begin
case (Fmt)
`FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
`FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
`FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
default: FmaPreResultSubnorm = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS));
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp;
assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS));
assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp;
always_comb begin
case (Fmt)
2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
2'h1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
2'h0: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
2'h2: FmaPreResultSubnorm = Sum3LEZ & Sum3GEFL & ~FmaSZero;
endcase
end
assign NormSumExp = PreNormSumExp+BiasCorr;
end else if (`FPSIZES == 4) begin
always_comb begin
case (Fmt)
2'h3: BiasCorr = '0;
2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS);
2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS);
2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS);
endcase
end
assign NormSumExp = PreNormSumExp+BiasCorr;
end
// determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen) & ~FmaSZero
if (`FPSIZES == 1) begin
logic Sum0LEZ, Sum0GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
end else if (`FPSIZES == 2) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero;
end else if (`FPSIZES == 3) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2));
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp;
always_comb begin
case (Fmt)
`FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
`FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
`FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
default: FmaPreResultSubnorm = 1'bx;
endcase
end
end else if (`FPSIZES == 4) begin
logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL;
assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp;
assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2));
assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS));
assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp;
assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS));
assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp;
assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS));
assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp;
always_comb begin
case (Fmt)
2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero;
2'h1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero;
2'h0: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero;
2'h2: FmaPreResultSubnorm = Sum3LEZ & Sum3GEFL & ~FmaSZero;
endcase
end
end
// set and calculate the shift input and amount
// - shift once if killing a product and the result is subnormal
assign FmaShiftIn = {2'b0, FmaSm};
if (`FPSIZES == 1)
assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1;
else
assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1;
// set and calculate the shift input and amount
// - shift once if killing a product and the result is subnormal
assign FmaShiftIn = {2'b0, FmaSm};
if (`FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1;
else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1;
endmodule

View file

@ -73,10 +73,10 @@
// .
module normshift(
input logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt, // shift amount
input logic [`NORMSHIFTSZ-1:0] ShiftIn, // number to be shifted
output logic [`NORMSHIFTSZ-1:0] Shifted // shifted result
input logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt, // shift amount
input logic [`NORMSHIFTSZ-1:0] ShiftIn, // number to be shifted
output logic [`NORMSHIFTSZ-1:0] Shifted // shifted result
);
assign Shifted = ShiftIn << ShiftAmt;
assign Shifted = ShiftIn << ShiftAmt;
endmodule

View file

@ -29,198 +29,198 @@
`include "wally-config.vh"
module postprocess (
// general signals
input logic Xs, Ys, // input signs
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic XZero, YZero, // inputs are zero
input logic XInf, YInf, ZInf, // inputs are infinity
input logic XNaN, YNaN, ZNaN, // inputs are NaN
input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs
input logic [1:0] PostProcSel, // select result to be written to fp register
//fma signals
input logic FmaAs, // the modified Z sign - depends on instruction
input logic FmaPs, // the product's sign
input logic FmaSs, // Sum sign
input logic [`NE+1:0] FmaSe, // the sum's exponent
input logic [3*`NF+3:0] FmaSm, // the positive sum
input logic FmaASticky, // sticky bit that is calculated during alignment
input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // the normalization shift count
//divide signals
input logic DivSticky, // divider sticky bit
input logic [`NE+1:0] DivQe, // divsqrt exponent
input logic [`DIVb:0] DivQm, // divsqrt significand
// conversion signals
input logic CvtCs, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent
input logic CvtResSubnormUf, // the convert result is subnormal or underflows
input logic [`LOGCVTLEN-1:0] CvtShiftAmt,// how much to shift by
input logic ToInt, // is fp->int (since it's writting to the integer register)
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
input logic IntZero, // is the integer input zero
// final results
output logic [`FLEN-1:0] PostProcRes,// postprocessor final result
output logic [4:0] PostProcFlg,// postprocesser flags
output logic [`XLEN-1:0] FCvtIntRes // the integer conversion result
);
// general signals
logic Rs; // result sign
logic [`NF-1:0] Rf; // Result fraction
logic [`NE-1:0] Re; // Result exponent
logic Ms; // norMalized sign
logic [`CORRSHIFTSZ-1:0] Mf; // norMalized fraction
logic [`NE+1:0] Me; // normalized exponent
logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow
logic UfPlus1; // do you add one (for determining underflow flag)
logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount
logic [`NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift
logic [`NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction)
logic Plus1; // add one to the final result?
logic Overflow; // overflow flag used to select results
logic Invalid; // invalid flag used to select results
logic Guard, Round, Sticky; // bits needed to determine rounding
logic [`FMTBITS-1:0] OutFmt; // output format
// fma signals
logic [`NE+1:0] FmaMe; // exponent of the normalized sum
logic FmaSZero; // is the sum zero
logic [3*`NF+5:0] FmaShiftIn; // fma shift input
logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection
logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma
// division singals
logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
logic [`NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
logic [`NE+1:0] Qe; // divsqrt corrected exponent after corretion shift
logic DivByZero; // divide by zero flag
logic DivResSubnorm; // is the divsqrt result subnormal
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
// conversion signals
logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted for converter
logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result
logic [`XLEN+1:0] CvtNegRes; // possibly negated integer result
logic CvtResUf; // did the convert result underflow
logic IntInvalid; // invalid integer flag
// readability signals
logic Mult; // multiply opperation
logic Sqrt; // is the divsqrt opperation sqrt
logic Int64; // is the integer 64 bits?
logic Signed; // is the opperation with a signed integer?
logic IntToFp; // is the opperation an int->fp conversion?
logic CvtOp; // convertion opperation
logic FmaOp; // fma opperation
logic DivOp; // divider opperation
logic InfIn; // are any of the inputs infinity
logic NaNIn; // are any of the inputs NaN
// general signals
input logic Xs, Ys, // input signs
input logic [`NF:0] Xm, Ym, Zm, // input mantissas
input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude
input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single
input logic [2:0] OpCtrl, // choose which opperation (look below for values)
input logic XZero, YZero, // inputs are zero
input logic XInf, YInf, ZInf, // inputs are infinity
input logic XNaN, YNaN, ZNaN, // inputs are NaN
input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs
input logic [1:0] PostProcSel, // select result to be written to fp register
//fma signals
input logic FmaAs, // the modified Z sign - depends on instruction
input logic FmaPs, // the product's sign
input logic FmaSs, // Sum sign
input logic [`NE+1:0] FmaSe, // the sum's exponent
input logic [3*`NF+3:0] FmaSm, // the positive sum
input logic FmaASticky, // sticky bit that is calculated during alignment
input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // the normalization shift count
//divide signals
input logic DivSticky, // divider sticky bit
input logic [`NE+1:0] DivQe, // divsqrt exponent
input logic [`DIVb:0] DivQm, // divsqrt significand
// conversion signals
input logic CvtCs, // the result's sign
input logic [`NE:0] CvtCe, // the calculated expoent
input logic CvtResSubnormUf, // the convert result is subnormal or underflows
input logic [`LOGCVTLEN-1:0] CvtShiftAmt,// how much to shift by
input logic ToInt, // is fp->int (since it's writting to the integer register)
input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb)
input logic IntZero, // is the integer input zero
// final results
output logic [`FLEN-1:0] PostProcRes,// postprocessor final result
output logic [4:0] PostProcFlg,// postprocesser flags
output logic [`XLEN-1:0] FCvtIntRes // the integer conversion result
);
// general signals
logic Rs; // result sign
logic [`NF-1:0] Rf; // Result fraction
logic [`NE-1:0] Re; // Result exponent
logic Ms; // norMalized sign
logic [`CORRSHIFTSZ-1:0] Mf; // norMalized fraction
logic [`NE+1:0] Me; // normalized exponent
logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow
logic UfPlus1; // do you add one (for determining underflow flag)
logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount
logic [`NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift
logic [`NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction)
logic Plus1; // add one to the final result?
logic Overflow; // overflow flag used to select results
logic Invalid; // invalid flag used to select results
logic Guard, Round, Sticky; // bits needed to determine rounding
logic [`FMTBITS-1:0] OutFmt; // output format
// fma signals
logic [`NE+1:0] FmaMe; // exponent of the normalized sum
logic FmaSZero; // is the sum zero
logic [3*`NF+5:0] FmaShiftIn; // fma shift input
logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results
logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection
logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma
// division singals
logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount
logic [`NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input
logic [`NE+1:0] Qe; // divsqrt corrected exponent after corretion shift
logic DivByZero; // divide by zero flag
logic DivResSubnorm; // is the divsqrt result subnormal
logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed)
// conversion signals
logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted for converter
logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result
logic [`XLEN+1:0] CvtNegRes; // possibly negated integer result
logic CvtResUf; // did the convert result underflow
logic IntInvalid; // invalid integer flag
// readability signals
logic Mult; // multiply opperation
logic Sqrt; // is the divsqrt opperation sqrt
logic Int64; // is the integer 64 bits?
logic Signed; // is the opperation with a signed integer?
logic IntToFp; // is the opperation an int->fp conversion?
logic CvtOp; // convertion opperation
logic FmaOp; // fma opperation
logic DivOp; // divider opperation
logic InfIn; // are any of the inputs infinity
logic NaNIn; // are any of the inputs NaN
// signals to help readability
assign Signed = OpCtrl[0];
assign Int64 = OpCtrl[1];
assign IntToFp = OpCtrl[2];
assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
assign CvtOp = (PostProcSel == 2'b00);
assign FmaOp = (PostProcSel == 2'b10);
assign DivOp = (PostProcSel == 2'b01);
assign Sqrt = OpCtrl[0];
// signals to help readability
assign Signed = OpCtrl[0];
assign Int64 = OpCtrl[1];
assign IntToFp = OpCtrl[2];
assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0];
assign CvtOp = (PostProcSel == 2'b00);
assign FmaOp = (PostProcSel == 2'b10);
assign DivOp = (PostProcSel == 2'b01);
assign Sqrt = OpCtrl[0];
// is there an input of infinity or NaN being used
assign InfIn = XInf|YInf|ZInf;
assign NaNIn = XNaN|YNaN|ZNaN;
// is there an input of infinity or NaN being used
assign InfIn = XInf|YInf|ZInf;
assign NaNIn = XNaN|YNaN|ZNaN;
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: Fmt contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
// choose the ouptut format depending on the opperation
// - fp -> fp: OpCtrl contains the percision of the output
// - otherwise: Fmt contains the percision of the output
if (`FPSIZES == 2)
assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT);
else if (`FPSIZES == 3 | `FPSIZES == 4)
assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0];
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Normalization
///////////////////////////////////////////////////////////////////////////////
// final claulations before shifting
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
// final claulations before shifting
cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn,
.XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn);
fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
.FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);
fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe,
.FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn);
divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn);
// select which unit's output to shift
always_comb
case(PostProcSel)
2'b10: begin // fma
ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt};
ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}};
end
2'b00: begin // cvt
ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt};
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
end
2'b01: begin //divsqrt
ShiftAmt = DivShiftAmt;
ShiftIn = DivShiftIn;
end
default: begin
ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}};
ShiftIn = {`NORMSHIFTSZ{1'bx}};
end
endcase
// main normalization shift
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
// select which unit's output to shift
always_comb
case(PostProcSel)
2'b10: begin // fma
ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt};
ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}};
end
2'b00: begin // cvt
ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt};
ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}};
end
2'b01: begin //divsqrt
ShiftAmt = DivShiftAmt;
ShiftIn = DivShiftIn;
end
default: begin
ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}};
ShiftIn = {`NORMSHIFTSZ{1'bx}};
end
endcase
// main normalization shift
normshift normshift (.ShiftIn, .ShiftAmt, .Shifted);
// correct for LZA/divsqrt error
shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);
// correct for LZA/divsqrt error
shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp,
.DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf);
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to nearest even
// round to zero
// round to -infinity
// round to infinity
// round to nearest max magnitude
// round to nearest even
// round to zero
// round to -infinity
// round to infinity
// round to nearest max magnitude
// calulate result sign used in rounding unit
roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
// calulate result sign used in rounding unit
roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms);
round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf,
.DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe,
.Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf,
.DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me);
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Sign calculation
///////////////////////////////////////////////////////////////////////////////
resultsign resultsign(.Frm, .FmaPs, .FmaAs, .Round, .Sticky, .Guard,
.FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Rs);
resultsign resultsign(.Frm, .FmaPs, .FmaAs, .Round, .Sticky, .Guard,
.FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Rs);
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Flags
///////////////////////////////////////////////////////////////////////////////
flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
.Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
.NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero,
.Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
.Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero,
.Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe,
.NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero,
.Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1,
.Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg);
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Select the result
///////////////////////////////////////////////////////////////////////////////
negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes);
specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
.IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf,
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
.XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes);
specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid,
.IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf,
.NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes,
.XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes);
endmodule

View file

@ -29,52 +29,52 @@
`include "wally-config.vh"
module resultsign(
input logic [2:0] Frm, // rounding mode
input logic FmaOp, // is the operation an Fma
input logic Mult, // is the fma opperation multipy
input logic ZInf, // is Z infinity
input logic InfIn, // are any of the inputs infinity
input logic FmaSZero, // is the fma sum zero
input logic Ms, // normalized result sign
input logic FmaPs, // product's sign
input logic FmaAs, // aligned addend's sign
input logic Guard, // guard bit for rounding
input logic Round, // round bit for rounding
input logic Sticky, // sticky bit for rounding
output logic Rs // result sign
input logic [2:0] Frm, // rounding mode
input logic FmaOp, // is the operation an Fma
input logic Mult, // is the fma opperation multipy
input logic ZInf, // is Z infinity
input logic InfIn, // are any of the inputs infinity
input logic FmaSZero, // is the fma sum zero
input logic Ms, // normalized result sign
input logic FmaPs, // product's sign
input logic FmaAs, // aligned addend's sign
input logic Guard, // guard bit for rounding
input logic Round, // round bit for rounding
input logic Sticky, // sticky bit for rounding
output logic Rs // result sign
);
logic Zeros; // zero result sign
logic Infs; // infinity result sign
logic Zeros; // zero result sign
logic Infs; // infinity result sign
// determine the sign for a result of 0
// The IEEE754-2019 standard specifies:
// - the sign of an exact zero sum (with operands of diffrent signs) should be positive unless rounding toward negitive infinity
// - when the exact result of an FMA opperation is non-zero, but is zero due to rounding, use the sign of the exact result
// - if x = +0 or -0 then x+x=x and x-(-x)=x
// - the sign of a product is the exclisive or or the opperand's signs
// Zero sign will only be selected if:
// - P=Z and a cancelation occurs - exact zero
// - Z is zero and P is zero - exact zero
// - P is killed and Z is zero - Psgn
// - Z is killed and P is zero - impossible
// Zero sign calculation:
// - if a multiply opperation is done, then use the products sign(Ps)
// - if the zero sum is not exactly zero i.e. Round|Sticky use the sign of the exact result (which is the product's sign)
// - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign
assign Zeros = (FmaPs^FmaAs)&~(Round|Guard|Sticky)&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
// determine the sign for a result of 0
// The IEEE754-2019 standard specifies:
// - the sign of an exact zero sum (with operands of diffrent signs) should be positive unless rounding toward negitive infinity
// - when the exact result of an FMA opperation is non-zero, but is zero due to rounding, use the sign of the exact result
// - if x = +0 or -0 then x+x=x and x-(-x)=x
// - the sign of a product is the exclisive or or the opperand's signs
// Zero sign will only be selected if:
// - P=Z and a cancelation occurs - exact zero
// - Z is zero and P is zero - exact zero
// - P is killed and Z is zero - Psgn
// - Z is killed and P is zero - impossible
// Zero sign calculation:
// - if a multiply opperation is done, then use the products sign(Ps)
// - if the zero sum is not exactly zero i.e. Round|Sticky use the sign of the exact result (which is the product's sign)
// - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign
assign Zeros = (FmaPs^FmaAs)&~(Round|Guard|Sticky)&~Mult ? Frm[1:0] == 2'b10 : FmaPs;
// determine the sign of an infinity result
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign Infs = ZInf ? FmaAs : FmaPs;
// determine the sign of an infinity result
// is the result negitive
// if p - z is the Sum negitive
// if -p + z is the Sum positive
// if -p - z then the Sum is negitive
assign Infs = ZInf ? FmaAs : FmaPs;
// select the result sign
always_comb
if(InfIn&FmaOp) Rs = Infs;
else if(FmaSZero&FmaOp) Rs = Zeros;
else Rs = Ms;
// select the result sign
always_comb
if(InfIn&FmaOp) Rs = Infs;
else if(FmaSZero&FmaOp) Rs = Zeros;
else Rs = Ms;
endmodule

View file

@ -37,294 +37,295 @@
`define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3)
module round(
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [2:0] Frm, // rounding mode
input logic [1:0] PostProcSel, // select the postprocessor output
input logic Ms, // normalized sign
input logic [`CORRSHIFTSZ-1:0] Mf, // normalized fraction
// fma
input logic FmaOp, // is an fma opperation being done?
input logic [`NE+1:0] FmaMe, // exponent of the normalized sum for fma
input logic FmaASticky, // addend's sticky bit
// divsqrt
input logic DivOp, // is a division opperation being done
input logic DivSticky, // divsqrt sticky bit
input logic [`NE+1:0] Qe, // the divsqrt calculated expoent
// cvt
input logic CvtOp, // is a convert opperation being done
input logic ToInt, // is the cvt op a cvt to integer
input logic CvtResSubnormUf, // is the cvt result subnormal or underflow
input logic CvtResUf, // does the cvt result underflow
input logic [`NE:0] CvtCe, // the cvt calculated expoent
// outputs
output logic [`NE+1:0] Me, // normalied fraction
output logic UfPlus1, // do you add one to the result if given an unbounded exponent
output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
output logic [`NE-1:0] Re, // Result exponent
output logic [`NF-1:0] Rf, // Result fractionNormS
output logic Sticky, // sticky bit
output logic Plus1, // do you add one to the final result
output logic Round, Guard // bits needed to calculate rounding
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic [2:0] Frm, // rounding mode
input logic [1:0] PostProcSel, // select the postprocessor output
input logic Ms, // normalized sign
input logic [`CORRSHIFTSZ-1:0] Mf, // normalized fraction
// fma
input logic FmaOp, // is an fma opperation being done?
input logic [`NE+1:0] FmaMe, // exponent of the normalized sum for fma
input logic FmaASticky, // addend's sticky bit
// divsqrt
input logic DivOp, // is a division opperation being done
input logic DivSticky, // divsqrt sticky bit
input logic [`NE+1:0] Qe, // the divsqrt calculated expoent
// cvt
input logic CvtOp, // is a convert opperation being done
input logic ToInt, // is the cvt op a cvt to integer
input logic CvtResSubnormUf, // is the cvt result subnormal or underflow
input logic CvtResUf, // does the cvt result underflow
input logic [`NE:0] CvtCe, // the cvt calculated expoent
// outputs
output logic [`NE+1:0] Me, // normalied fraction
output logic UfPlus1, // do you add one to the result if given an unbounded exponent
output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow
output logic [`NE-1:0] Re, // Result exponent
output logic [`NF-1:0] Rf, // Result fractionNormS
output logic Sticky, // sticky bit
output logic Plus1, // do you add one to the final result
output logic Round, Guard // bits needed to calculate rounding
);
logic UfCalcPlus1; // calculated plus one for unbounded exponent
logic NormSticky; // normalized sum's sticky bit
logic [`NF-1:0] RoundFrac; // rounded fraction
logic FpRes; // is the result a floating point
logic IntRes; // is the result an integer
logic FpGuard, FpRound; // floating point round/guard bits
logic FpLsbRes; // least significant bit of floating point result
logic LsbRes; // lsb of result
logic CalcPlus1; // calculated plus1
logic FpPlus1; // do you add one to the fp result
logic [`FLEN:0] RoundAdd; // how much to add to the result
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
logic UfCalcPlus1; // calculated plus one for unbounded exponent
logic NormSticky; // normalized sum's sticky bit
logic [`NF-1:0] RoundFrac; // rounded fraction
logic FpRes; // is the result a floating point
logic IntRes; // is the result an integer
logic FpGuard, FpRound; // floating point round/guard bits
logic FpLsbRes; // least significant bit of floating point result
logic LsbRes; // lsb of result
logic CalcPlus1; // calculated plus1
logic FpPlus1; // do you add one to the fp result
logic [`FLEN:0] RoundAdd; // how much to add to the result
// round to nearest even
// {Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - plus 1 otherwise
///////////////////////////////////////////////////////////////////////////////
// Rounding
///////////////////////////////////////////////////////////////////////////////
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to nearest even
// {Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1 if result is odd (LSBNormSum = 1)
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - plus 1 otherwise
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to -infinity
// - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - Plus 1 otherwise
// round to infinity
// - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0
// - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0
// round to nearest max magnitude
// {Guard, Round, Sticky}
// 0x - do nothing
// 10 - tie - Plus1
// - don't add 1 if a small number was supposed to be subtracted
// 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number)
// - Plus 1 otherwise
// determine what format the final result is in: int or fp
assign IntRes = CvtOp & ToInt;
assign FpRes = ~IntRes;
// determine what format the final result is in: int or fp
assign IntRes = CvtOp & ToInt;
assign FpRes = ~IntRes;
// sticky bit calculation
if (`FPSIZES == 1) begin
// sticky bit calculation
if (`FPSIZES == 1) begin
// 1: XLEN > NF
// | XLEN |
// | NF |1|1|
// ^ ^ if floating point result
// ^ if not an FMA result
if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN
if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 1: XLEN > NF
// | XLEN |
// | NF |1|1|
// ^ ^ if floating point result
// ^ if not an FMA result
if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN
if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 2) begin
// XLEN is either 64 or 32
// so half and single are always smaller then XLEN
end else if (`FPSIZES == 2) begin
// XLEN is either 64 or 32
// so half and single are always smaller then XLEN
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 3) begin
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 3) begin
// 1: XLEN > NF > NF1
if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:0]);
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
// 3: NF > NF1 > XLEN
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`NF-2:0]);
end else if (`FPSIZES == 4) begin
// Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
// 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
end else if (`FPSIZES == 4) begin
// Quad precision will always be greater than XLEN
// 2: NF > XLEN > NF1
if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
// 3: NF > NF1 > XLEN
// The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer
if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) |
(|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) |
(|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) |
(|Mf[`CORRSHIFTSZ-`Q_NF-2:0]);
end
end
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivSticky&DivOp;
// only add the Addend sticky if doing an FMA opperation
// - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits)
assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivSticky&DivOp;
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (`FPSIZES == 1) begin
assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
assign FpRound = Mf[`CORRSHIFTSZ-`NF-2];
// determine round and LSB of the rounded value
// - underflow round bit is used to determint the underflow flag
if (`FPSIZES == 1) begin
assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
assign FpRound = Mf[`CORRSHIFTSZ-`NF-2];
end else if (`FPSIZES == 2) begin
assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
end else if (`FPSIZES == 2) begin
assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1];
assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1];
assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2];
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
FpRound = Mf[`CORRSHIFTSZ-`NF-2];
end
`FMT1: begin
FpGuard = Mf[`CORRSHIFTSZ-`NF1-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`NF1];
FpRound = Mf[`CORRSHIFTSZ-`NF1-2];
end
`FMT2: begin
FpGuard = Mf[`CORRSHIFTSZ-`NF2-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`NF2];
FpRound = Mf[`CORRSHIFTSZ-`NF2-2];
end
default: begin
FpGuard = 1'bx;
FpLsbRes = 1'bx;
FpRound = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF];
FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
end
2'h1: begin
FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF];
FpRound = Mf[`CORRSHIFTSZ-`D_NF-2];
end
2'h0: begin
FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF];
FpRound = Mf[`CORRSHIFTSZ-`S_NF-2];
end
2'h2: begin
FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF];
FpRound = Mf[`CORRSHIFTSZ-`H_NF-2];
end
endcase
end
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
FpGuard = Mf[`CORRSHIFTSZ-`NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`NF];
FpRound = Mf[`CORRSHIFTSZ-`NF-2];
end
`FMT1: begin
FpGuard = Mf[`CORRSHIFTSZ-`NF1-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`NF1];
FpRound = Mf[`CORRSHIFTSZ-`NF1-2];
end
`FMT2: begin
FpGuard = Mf[`CORRSHIFTSZ-`NF2-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`NF2];
FpRound = Mf[`CORRSHIFTSZ-`NF2-2];
end
default: begin
FpGuard = 1'bx;
FpLsbRes = 1'bx;
FpRound = 1'bx;
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF];
FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2];
end
2'h1: begin
FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF];
FpRound = Mf[`CORRSHIFTSZ-`D_NF-2];
end
2'h0: begin
FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF];
FpRound = Mf[`CORRSHIFTSZ-`S_NF-2];
end
2'h2: begin
FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1];
FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF];
FpRound = Mf[`CORRSHIFTSZ-`H_NF-2];
end
endcase
end
assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard;
assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes;
assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound;
assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard;
assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes;
assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound;
always_comb begin
// Determine if you add 1
case (Frm)
3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = Ms;//round down
3'b011: CalcPlus1 = ~Ms;//round up
3'b100: CalcPlus1 = Guard;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (Frm)
3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = Ms;//round down
3'b011: UfCalcPlus1 = ~Ms;//round up
3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
end
always_comb begin
// Determine if you add 1
case (Frm)
3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even
3'b001: CalcPlus1 = 0;//round to zero
3'b010: CalcPlus1 = Ms;//round down
3'b011: CalcPlus1 = ~Ms;//round up
3'b100: CalcPlus1 = Guard;//round to nearest max magnitude
default: CalcPlus1 = 1'bx;
endcase
// Determine if you add 1 (for underflow flag)
case (Frm)
3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even
3'b001: UfCalcPlus1 = 0;//round to zero
3'b010: UfCalcPlus1 = Ms;//round down
3'b011: UfCalcPlus1 = ~Ms;//round up
3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude
default: UfCalcPlus1 = 1'bx;
endcase
end
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky|Round|Guard);
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
assign UfPlus1 = UfCalcPlus1 & (Sticky|Round);
// If an answer is exact don't round
assign Plus1 = CalcPlus1 & (Sticky|Round|Guard);
assign FpPlus1 = Plus1&~(ToInt&CvtOp);
assign UfPlus1 = UfCalcPlus1 & (Sticky|Round);
// place Plus1 into the proper position for the format
if (`FPSIZES == 1) begin
assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
// place Plus1 into the proper position for the format
if (`FPSIZES == 1) begin
assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1};
end else if (`FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
end else if (`FPSIZES == 2) begin
// \/FLEN+1
// | NE+2 | NF |
// '-NE+2-^----NF1----^
// `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1
assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt};
end else if (`FPSIZES == 3) begin
assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
end else if (`FPSIZES == 3) begin
assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)};
end else if (`FPSIZES == 4)
assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
end else if (`FPSIZES == 4)
assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)};
// trim unneeded bits from fraction
assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
// trim unneeded bits from fraction
assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF];
// select the exponent
always_comb
case(PostProcSel)
2'b10: Me = FmaMe; // fma
2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
// 2'b01: Me = DivDone ? Qe : '0; // divide
2'b01: Me = Qe; // divide
default: Me = '0;
endcase
// select the exponent
always_comb
case(PostProcSel)
2'b10: Me = FmaMe; // fma
2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt
// 2'b01: Me = DivDone ? Qe : '0; // divide
2'b01: Me = Qe; // divide
default: Me = '0;
endcase
// round the result
// - if the fraction overflows one should be added to the exponent
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
assign Re = FullRe[`NE-1:0];
// round the result
// - if the fraction overflows one should be added to the exponent
assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd;
assign Re = FullRe[`NE-1:0];
endmodule

View file

@ -28,23 +28,23 @@
`include "wally-config.vh"
module roundsign(
input logic Xs, // x sign
input logic Ys, // y sign
input logic CvtCs, // convert result sign
input logic FmaSs, // fma sum sign
input logic Sqrt, // sqrt oppertion? (when using divsqrt unit)
input logic FmaOp, // is fma opperation
input logic DivOp, // is divsqrt opperation
input logic CvtOp, // is cvt opperation
output logic Ms // normalized result sign
input logic Xs, // x sign
input logic Ys, // y sign
input logic CvtCs, // convert result sign
input logic FmaSs, // fma sum sign
input logic Sqrt, // sqrt oppertion? (when using divsqrt unit)
input logic FmaOp, // is fma opperation
input logic DivOp, // is divsqrt opperation
input logic CvtOp, // is cvt opperation
output logic Ms // normalized result sign
);
logic Qs; // divsqrt result sign
logic Qs; // divsqrt result sign
// calculate divsqrt sign
assign Qs = Xs^(Ys&~Sqrt);
// calculate divsqrt sign
assign Qs = Xs^(Ys&~Sqrt);
// Select sign for rounding calulation
assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
// Select sign for rounding calulation
assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp);
endmodule

View file

@ -25,67 +25,69 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
////////////////////////////////////////////////////////////////////////////////////////////////
`include "wally-config.vh"
module shiftcorrection(
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
// divsqrt
input logic DivOp, // is it a divsqrt opperation
input logic DivResSubnorm, // is the divsqrt result subnormal
input logic [`NE+1:0] DivQe, // the divsqrt result's exponent
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
//fma
input logic FmaOp, // is it an fma opperation
input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
input logic FmaSZero,
// output
output logic [`NE+1:0] FmaMe, // exponent of the normalized sum
output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
output logic [`NE+1:0] Qe // corrected exponent for divider
input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction
// divsqrt
input logic DivOp, // is it a divsqrt opperation
input logic DivResSubnorm, // is the divsqrt result subnormal
input logic [`NE+1:0] DivQe, // the divsqrt result's exponent
input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed)
//fma
input logic FmaOp, // is it an fma opperation
input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results
input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection
input logic FmaSZero,
// output
output logic [`NE+1:0] FmaMe, // exponent of the normalized sum
output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction
output logic [`NE+1:0] Qe // corrected exponent for divider
);
logic [3*`NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
logic [`CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted
logic [`CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift
logic ResSubnorm; // is the result Subnormal
logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction
logic LeftShiftQm; // should the divsqrt result be shifted one to the left
logic [3*`NF+3:0] CorrSumShifted; // the shifted sum after LZA correction
logic [`CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted
logic [`CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift
logic ResSubnorm; // is the result Subnormal
logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction
logic LeftShiftQm; // should the divsqrt result be shifted one to the left
// LZA correction
assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1];
// LZA correction
assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1];
// correct the shifting error caused by the LZA
// - the only possible mantissa for a plus two is all zeroes
// - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
mux2 #(`NORMSHIFTSZ-2) lzacorrmux(Shifted[`NORMSHIFTSZ-3:0], Shifted[`NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);
// correct the shifting error caused by the LZA
// - the only possible mantissa for a plus two is all zeroes
// - a one has to propigate all the way through a sum. so we can leave the bottom statement alone
mux2 #(`NORMSHIFTSZ-2) lzacorrmux(Shifted[`NORMSHIFTSZ-3:0], Shifted[`NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted);
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
assign CorrQm0 = Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
assign CorrQm1 = Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1];
mux2 #(`CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
// correct the shifting of the divsqrt caused by producing a result in (2, .5] range
// condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm)
assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1));
assign CorrQm0 = Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2];
assign CorrQm1 = Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1];
mux2 #(`CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted);
// if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
always_comb
if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}};
else if (DivOp&~DivResSubnorm) Mf = CorrQmShifted;
else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits
always_comb
if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}};
else if (DivOp&~DivResSubnorm) Mf = CorrQmShifted;
else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ];
// Determine sum's exponent
// main exponent issues:
// - LZA was one too large
// - LZA was two too large
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 1
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
// if plus1 If plus2 kill if the result Zero or actually subnormal
// | | |
assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}};
// recalculate if the result is subnormal after LZA correction
assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1];
// Determine sum's exponent
// main exponent issues:
// - LZA was one too large
// - LZA was two too large
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 1
// - if the result was calulated to be subnorm but it's norm and the LZA was off by 2
// if plus1 If plus2 kill if the result Zero or actually subnormal
// | | |
assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}};
// recalculate if the result is subnormal after LZA correction
assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1];
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1};
// the quotent is in the range [.5,2) if there is no early termination
// if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift
assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1};
endmodule

View file

@ -29,291 +29,281 @@
`include "wally-config.vh"
module specialcase(
input logic Xs, // X sign
input logic [`NF:0] Xm, Ym, Zm, // input significand's
input logic XNaN, YNaN, ZNaN, // are the inputs NaN
input logic [2:0] Frm, // rounding mode
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn, // are any inputs infinity
input logic NaNIn, // are any input NaNs
input logic XInf, YInf, // are X or Y inifnity
input logic XZero, // is X zero
input logic Plus1, // do you add one for rounding
input logic Rs, // the result's sign
input logic Invalid, Overflow, // flags to choose the result
input logic [`NE-1:0] Re, // Result exponent
input logic [`NE+1:0] FullRe, // Result full exponent
input logic [`NF-1:0] Rf, // Result fraction
// fma
input logic FmaOp, // is it a fma opperation
// divsqrt
input logic DivOp, // is it a divsqrt opperation
input logic DivByZero, // divide by zero flag
// cvt
input logic CvtOp, // is it a conversion opperation
input logic IntZero, // is the integer input zero
input logic IntToFp, // is cvt int -> fp opperation
input logic Int64, // is the integer 64 bits
input logic Signed, // is the integer signed
input logic [`NE:0] CvtCe, // the calculated expoent for cvt
input logic IntInvalid, // integer invalid flag to choose the result
input logic CvtResUf, // does the convert result underflow
input logic [`XLEN+1:0] CvtNegRes, // the possibly negated of the integer result
// outputs
output logic [`FLEN-1:0] PostProcRes,// final result
output logic [`XLEN-1:0] FCvtIntRes // final integer result
input logic Xs, // X sign
input logic [`NF:0] Xm, Ym, Zm, // input significand's
input logic XNaN, YNaN, ZNaN, // are the inputs NaN
input logic [2:0] Frm, // rounding mode
input logic [`FMTBITS-1:0] OutFmt, // output format
input logic InfIn, // are any inputs infinity
input logic NaNIn, // are any input NaNs
input logic XInf, YInf, // are X or Y inifnity
input logic XZero, // is X zero
input logic Plus1, // do you add one for rounding
input logic Rs, // the result's sign
input logic Invalid, Overflow, // flags to choose the result
input logic [`NE-1:0] Re, // Result exponent
input logic [`NE+1:0] FullRe, // Result full exponent
input logic [`NF-1:0] Rf, // Result fraction
// fma
input logic FmaOp, // is it a fma opperation
// divsqrt
input logic DivOp, // is it a divsqrt opperation
input logic DivByZero, // divide by zero flag
// cvt
input logic CvtOp, // is it a conversion opperation
input logic IntZero, // is the integer input zero
input logic IntToFp, // is cvt int -> fp opperation
input logic Int64, // is the integer 64 bits
input logic Signed, // is the integer signed
input logic [`NE:0] CvtCe, // the calculated expoent for cvt
input logic IntInvalid, // integer invalid flag to choose the result
input logic CvtResUf, // does the convert result underflow
input logic [`XLEN+1:0] CvtNegRes, // the possibly negated of the integer result
// outputs
output logic [`FLEN-1:0] PostProcRes,// final result
output logic [`XLEN-1:0] FCvtIntRes // final integer result
);
logic [`FLEN-1:0] XNaNRes; // X is NaN result
logic [`FLEN-1:0] YNaNRes; // Y is NaN result
logic [`FLEN-1:0] ZNaNRes; // Z is NaN result
logic [`FLEN-1:0] InvalidRes; // Invalid result result
logic [`FLEN-1:0] UfRes; // underflowed result result
logic [`FLEN-1:0] OfRes; // overflowed result result
logic [`FLEN-1:0] NormRes; // normal result
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
logic OfResMax; // does the of result output maximum norm fp number
logic KillRes; // kill the result for underflow
logic SelOfRes; // should the overflow result be selected
logic [`FLEN-1:0] XNaNRes; // X is NaN result
logic [`FLEN-1:0] YNaNRes; // Y is NaN result
logic [`FLEN-1:0] ZNaNRes; // Z is NaN result
logic [`FLEN-1:0] InvalidRes; // Invalid result result
logic [`FLEN-1:0] UfRes; // underflowed result result
logic [`FLEN-1:0] OfRes; // overflowed result result
logic [`FLEN-1:0] NormRes; // normal result
logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output
logic OfResMax; // does the of result output maximum norm fp number
logic KillRes; // kill the result for underflow
logic SelOfRes; // should the overflow result be selected
// does the overflow result output the maximum normalized floating point number
// output infinity if the input is infinity
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
// does the overflow result output the maximum normalized floating point number
// output infinity if the input is infinity
assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs));
// select correct outputs for special cases
if (`FPSIZES == 1) begin
//NaN res selection depending on standard
if(`IEEE754) begin
assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
// select correct outputs for special cases
if (`FPSIZES == 1) begin
//NaN res selection depending on standard
if(`IEEE754) begin
assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
assign OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
assign UfRes = {Rs, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
assign NormRes = {Rs, Re, Rf};
assign OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
assign UfRes = {Rs, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)};
assign NormRes = {Rs, Re, Rf};
end else if (`FPSIZES == 2) begin
if(`IEEE754) begin
assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
end else if (`FPSIZES == 2) begin
if(`IEEE754) begin
assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
always_comb
if(OutFmt)
if(OfResMax) OfRes = {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}};
else OfRes = {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
else
if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}};
else OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
assign UfRes = OutFmt ? {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
assign NormRes = OutFmt ? {Rs, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
always_comb
if(OutFmt)
if(OfResMax) OfRes = {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}};
else OfRes = {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
else
if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}};
else OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
assign UfRes = OutFmt ? {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
assign NormRes = OutFmt ? {Rs, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {Rs, Re, Rf};
end
`FMT1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
UfRes = {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
end
`FMT2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2{1'b1}}, (`NF2)'(0)};
UfRes = {{`FLEN-`LEN2{1'b1}}, Rs, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`LEN2{1'b1}}, Rs, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]};
end
default: begin
if(`IEEE754) begin
XNaNRes = (`FLEN)'(0);
YNaNRes = (`FLEN)'(0);
ZNaNRes = (`FLEN)'(0);
InvalidRes = (`FLEN)'(0);
end else begin
InvalidRes = (`FLEN)'(0);
end
OfRes = (`FLEN)'(0);
UfRes = (`FLEN)'(0);
NormRes = (`FLEN)'(0);
end
endcase
end else if (`FPSIZES == 3) begin
always_comb
case (OutFmt)
`FMT: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {Rs, Re, Rf};
end
`FMT1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]};
YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]};
ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]};
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)};
UfRes = {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]};
end
`FMT2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]};
YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]};
ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]};
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2{1'b1}}, (`NF2)'(0)};
UfRes = {{`FLEN-`LEN2{1'b1}}, Rs, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`LEN2{1'b1}}, Rs, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]};
end
default: begin
if(`IEEE754) begin
XNaNRes = (`FLEN)'(0);
YNaNRes = (`FLEN)'(0);
ZNaNRes = (`FLEN)'(0);
InvalidRes = (`FLEN)'(0);
end else begin
InvalidRes = (`FLEN)'(0);
end
OfRes = (`FLEN)'(0);
UfRes = (`FLEN)'(0);
NormRes = (`FLEN)'(0);
end
endcase
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {Rs, Re, Rf};
end
2'h1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE{1'b1}}, (`D_NF)'(0)};
UfRes = {{`FLEN-`D_LEN{1'b1}}, Rs, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`D_LEN{1'b1}}, Rs, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]};
end
2'h0: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE{1'b1}}, (`S_NF)'(0)};
UfRes = {{`FLEN-`S_LEN{1'b1}}, Rs, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`S_LEN{1'b1}}, Rs, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]};
end
2'h2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE{1'b1}}, (`H_NF)'(0)};
// zero is exact if dividing by infinity so don't add 1
UfRes = {{`FLEN-`H_LEN{1'b1}}, Rs, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`H_LEN{1'b1}}, Rs, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]};
end
endcase
end
end else if (`FPSIZES == 4) begin
always_comb
case (OutFmt)
2'h3: begin
if(`IEEE754) begin
XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]};
YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]};
ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]};
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end else begin
InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}};
end
OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}};
UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {Rs, Re, Rf};
end
2'h1: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]};
YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]};
ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]};
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE{1'b1}}, (`D_NF)'(0)};
UfRes = {{`FLEN-`D_LEN{1'b1}}, Rs, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`D_LEN{1'b1}}, Rs, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]};
end
2'h0: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]};
YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]};
ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]};
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE{1'b1}}, (`S_NF)'(0)};
UfRes = {{`FLEN-`S_LEN{1'b1}}, Rs, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`S_LEN{1'b1}}, Rs, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]};
end
2'h2: begin
if(`IEEE754) begin
XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]};
YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]};
ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]};
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end else begin
InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)};
end
OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE{1'b1}}, (`H_NF)'(0)};
// zero is exact if dividing by infinity so don't add 1
UfRes = {{`FLEN-`H_LEN{1'b1}}, Rs, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)};
NormRes = {{`FLEN-`H_LEN{1'b1}}, Rs, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]};
end
endcase
end
// determine if you shoould kill the res - Cvt
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
// calculate if the overflow result should be selected
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
// output infinity with result sign if divide by zero
if(`IEEE754)
always_comb
if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes;
else if(YNaN&~CvtOp) PostProcRes = YNaNRes;
else if(ZNaN&FmaOp) PostProcRes = ZNaNRes;
else if(Invalid) PostProcRes = InvalidRes;
else if(SelOfRes) PostProcRes = OfRes;
else if(KillRes) PostProcRes = UfRes;
else PostProcRes = NormRes;
else
always_comb
if(NaNIn|Invalid) PostProcRes = InvalidRes;
else if(SelOfRes) PostProcRes = OfRes;
else if(KillRes) PostProcRes = UfRes;
else PostProcRes = NormRes;
///////////////////////////////////////////////////////////////////////////////////////
// integer result selection
///////////////////////////////////////////////////////////////////////////////////////
// determine if you shoould kill the res - Cvt
// - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0
// - dont set to zero if fp input is zero but not using the fp input
// - dont set to zero if int input is zero but not using the int input
assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1);
// calculate if the overflow result should be selected
assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp));
// output infinity with result sign if divide by zero
if(`IEEE754)
always_comb
if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes;
else if(YNaN&~CvtOp) PostProcRes = YNaNRes;
else if(ZNaN&FmaOp) PostProcRes = ZNaNRes;
else if(Invalid) PostProcRes = InvalidRes;
else if(SelOfRes) PostProcRes = OfRes;
else if(KillRes) PostProcRes = UfRes;
else PostProcRes = NormRes;
// select the overflow integer res
// - negitive infinity and out of range negitive input
// | int | long |
// signed | -2^31 | -2^63 |
// unsigned | 0 | 0 |
//
// - positive infinity and out of range positive input and NaNs
// | int | long |
// signed | 2^31-1 | 2^63-1 |
// unsigned | 2^32-1 | 2^64-1 |
//
// other: 32 bit unsinged res should be sign extended as if it were a signed number
always_comb
if(Signed)
if(Xs&~NaNIn) // signed negitive
if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
else // signed positive
if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
else
always_comb
if(NaNIn|Invalid) PostProcRes = InvalidRes;
else if(SelOfRes) PostProcRes = OfRes;
else if(KillRes) PostProcRes = UfRes;
else PostProcRes = NormRes;
if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
else OfIntRes = {`XLEN{1'b1}}; // unsigned positive
///////////////////////////////////////////////////////////////////////////////////////
// integer result selection
///////////////////////////////////////////////////////////////////////////////////////
// select the overflow integer res
// - negitive infinity and out of range negitive input
// | int | long |
// signed | -2^31 | -2^63 |
// unsigned | 0 | 0 |
//
// - positive infinity and out of range positive input and NaNs
// | int | long |
// signed | 2^31-1 | 2^63-1 |
// unsigned | 2^32-1 | 2^64-1 |
//
// other: 32 bit unsinged res should be sign extended as if it were a signed number
always_comb
if(Signed)
if(Xs&~NaNIn) // signed negitive
if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}};
else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}};
else // signed positive
if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}};
else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}};
else
if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive
else OfIntRes = {`XLEN{1'b1}}; // unsigned positive
// select the integer output
// - if the input is invalid (out of bounds NaN or Inf) then output overflow res
// - if the input underflows
// - if rounding and signed opperation and negitive input, output -1
// - otherwise output a rounded 0
// - otherwise output the normal res (trmined and sign extended if nessisary)
always_comb
if(IntInvalid) FCvtIntRes = OfIntRes;
else if(CvtCe[`NE])
if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}};
else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1};
else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0];
else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
// select the integer output
// - if the input is invalid (out of bounds NaN or Inf) then output overflow res
// - if the input underflows
// - if rounding and signed opperation and negitive input, output -1
// - otherwise output a rounded 0
// - otherwise output the normal res (trmined and sign extended if nessisary)
always_comb
if(IntInvalid) FCvtIntRes = OfIntRes;
else if(CvtCe[`NE])
if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}};
else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1};
else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0];
else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]};
endmodule

View file

@ -28,35 +28,35 @@
`include "wally-config.vh"
module unpack (
input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
input logic XEn, YEn, ZEn, // input enables
output logic Xs, Ys, Zs, // sign bits of XYZ
output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
output logic XNaN, YNaN, ZNaN, // is XYZ a NaN
output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
output logic XSubnorm, // is X subnormal
output logic XZero, YZero, ZZero, // is XYZ zero
output logic XInf, YInf, ZInf, // is XYZ infinity
output logic XExpMax // does X have the maximum exponent (NaN or Inf)
input logic [`FLEN-1:0] X, Y, Z, // inputs from register file
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
input logic XEn, YEn, ZEn, // input enables
output logic Xs, Ys, Zs, // sign bits of XYZ
output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision)
output logic XNaN, YNaN, ZNaN, // is XYZ a NaN
output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN
output logic XSubnorm, // is X subnormal
output logic XZero, YZero, ZZero, // is XYZ zero
output logic XInf, YInf, ZInf, // is XYZ infinity
output logic XExpMax // does X have the maximum exponent (NaN or Inf)
);
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
logic YExpMax, ZExpMax; // is the exponent all 1s
unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
.NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
.Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero));
unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
.NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
.Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero));
logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero
logic XFracZero, YFracZero, ZFracZero; // is the fraction zero
logic YExpMax, ZExpMax; // is the exponent all 1s
unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn),
.NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero),
.Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero));
unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
.NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
.Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero));
// is the input subnormal
assign XSubnorm = ~XExpNonZero & ~XFracZero;
endmodule
unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn),
.NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero),
.Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero));
unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn),
.NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero),
.Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero));
// is the input subnormal
assign XSubnorm = ~XExpNonZero & ~XFracZero;
endmodule

View file

@ -28,243 +28,243 @@
`include "wally-config.vh"
module unpackinput (
input logic [`FLEN-1:0] In, // inputs from register file
input logic En, // enable the input
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic Sgn, // sign bits of XYZ
output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision)
output logic NaN, // is XYZ a NaN
output logic SNaN, // is XYZ a signaling NaN
output logic Zero, // is XYZ zero
output logic Inf, // is XYZ infinity
output logic ExpNonZero, // is the exponent not zero
output logic FracZero, // is the fraction zero
output logic ExpMax // does In have the maximum exponent (NaN or Inf)
input logic [`FLEN-1:0] In, // inputs from register file
input logic En, // enable the input
input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half
output logic Sgn, // sign bits of XYZ
output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision)
output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision)
output logic NaN, // is XYZ a NaN
output logic SNaN, // is XYZ a signaling NaN
output logic Zero, // is XYZ zero
output logic Inf, // is XYZ infinity
output logic ExpNonZero, // is the exponent not zero
output logic FracZero, // is the fraction zero
output logic ExpMax // does In have the maximum exponent (NaN or Inf)
);
logic [`NF-1:0] Frac; // Fraction of XYZ
logic BadNaNBox; // is the NaN boxing bad
if (`FPSIZES == 1) begin // if there is only one floating point format supported
assign BadNaNBox = 0;
assign Sgn = In[`FLEN-1]; // sign bit
assign Frac = In[`NF-1:0]; // fraction (no assumed 1)
assign ExpNonZero = |In[`FLEN-2:`NF]; // is the exponent non-zero
assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. subnormal numbers have effective biased exponent of 1
assign ExpMax = &In[`FLEN-2:`NF]; // is the exponent all 1's
end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported
//***need better names for these constants
// largest format | smaller format
//----------------------------------
// `FLEN | `LEN1 length of floating point number
// `NE | `NE1 length of exponent
// `NF | `NF1 length of fraction
// `BIAS | `BIAS1 exponent's bias value
// `FMT | `FMT1 precision's format value - Q=11 D=01 Sticky=00 H=10
// Possible combinantions specified by spec:
// double and single
// single and half
logic [`NF-1:0] Frac; // Fraction of XYZ
logic BadNaNBox; // is the NaN boxing bad
if (`FPSIZES == 1) begin // if there is only one floating point format supported
assign BadNaNBox = 0;
assign Sgn = In[`FLEN-1]; // sign bit
assign Frac = In[`NF-1:0]; // fraction (no assumed 1)
assign ExpNonZero = |In[`FLEN-2:`NF]; // is the exponent non-zero
assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. subnormal numbers have effective biased exponent of 1
assign ExpMax = &In[`FLEN-2:`NF]; // is the exponent all 1's
end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported
//***need better names for these constants
// largest format | smaller format
//----------------------------------
// `FLEN | `LEN1 length of floating point number
// `NE | `NE1 length of exponent
// `NF | `NF1 length of fraction
// `BIAS | `BIAS1 exponent's bias value
// `FMT | `FMT1 precision's format value - Q=11 D=01 Sticky=00 H=10
// Not needed but can also handle:
// quad and double
// quad and single
// quad and half
// double and half
// Possible combinantions specified by spec:
// double and single
// single and half
assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
// Not needed but can also handle:
// quad and double
// quad and single
// quad and half
// double and half
// choose sign bit depending on format - 1=larger precsion 0=smaller precision
assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1];
assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing
// extract the fraction, add trailing zeroes to the mantissa if nessisary
assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
// choose sign bit depending on format - 1=larger precsion 0=smaller precision
assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1];
// is the exponent non-zero
assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
// extract the fraction, add trailing zeroes to the mantissa if nessisary
assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)};
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/Subnorm/inf/NaN values
// is the exponent non-zero
assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1];
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
// - if the original precision had a Subnormal number convert the exponent value 1
assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
// is the exponent all 1's
assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/Subnorm/inf/NaN values
end else if (`FPSIZES == 3) begin // three floating point precsions supported
// extract the exponent, converting the smaller exponent into the larger precision if nessisary
// - if the original precision had a Subnormal number convert the exponent value 1
assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
//***need better names for these constants
// largest format | larger format | smallest format
//---------------------------------------------------
// `FLEN | `LEN1 | `LEN2 length of floating point number
// `NE | `NE1 | `NE2 length of exponent
// `NF | `NF1 | `NF2 length of fraction
// `BIAS | `BIAS1 | `BIAS2 exponent's bias value
// `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 Sticky=00 H=10
// is the exponent all 1's
assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1];
// Possible combinantions specified by spec:
// quad and double and single
// double and single and half
end else if (`FPSIZES == 3) begin // three floating point precsions supported
// Not needed but can also handle:
// quad and double and half
// quad and single and half
//***need better names for these constants
// largest format | larger format | smallest format
//---------------------------------------------------
// `FLEN | `LEN1 | `LEN2 length of floating point number
// `NE | `NE1 | `NE2 length of exponent
// `NF | `NF1 | `NF2 length of fraction
// `BIAS | `BIAS1 | `BIAS2 exponent's bias value
// `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 Sticky=00 H=10
// Check NaN boxing
always_comb
case (Fmt)
`FMT: BadNaNBox = 0;
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
default: BadNaNBox = 1'bx;
endcase
// Possible combinantions specified by spec:
// quad and double and single
// double and single and half
// extract the sign bit
always_comb
case (Fmt)
`FMT: Sgn = In[`FLEN-1];
`FMT1: Sgn = In[`LEN1-1];
`FMT2: Sgn = In[`LEN2-1];
default: Sgn = 1'bx;
endcase
// Not needed but can also handle:
// quad and double and half
// quad and single and half
// extract the fraction
always_comb
case (Fmt)
`FMT: Frac = In[`NF-1:0];
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
default: Frac = {`NF{1'bx}};
endcase
// Check NaN boxing
always_comb
case (Fmt)
`FMT: BadNaNBox = 0;
`FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1];
`FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2];
default: BadNaNBox = 1'bx;
endcase
// is the exponent non-zero
always_comb
case (Fmt)
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
default: ExpNonZero = 1'bx;
endcase
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/Subnorm/inf/NaN values
// extract the sign bit
always_comb
case (Fmt)
`FMT: Sgn = In[`FLEN-1];
`FMT1: Sgn = In[`LEN1-1];
`FMT2: Sgn = In[`LEN2-1];
default: Sgn = 1'bx;
endcase
// convert the larger precision's exponent to use the largest precision's bias
always_comb
case (Fmt)
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
default: Exp = {`NE{1'bx}};
endcase
// extract the fraction
always_comb
case (Fmt)
`FMT: Frac = In[`NF-1:0];
`FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)};
`FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)};
default: Frac = {`NF{1'bx}};
endcase
// is the exponent all 1's
always_comb
case (Fmt)
`FMT: ExpMax = &In[`FLEN-2:`NF];
`FMT1: ExpMax = &In[`LEN1-2:`NF1];
`FMT2: ExpMax = &In[`LEN2-2:`NF2];
default: ExpMax = 1'bx;
endcase
// is the exponent non-zero
always_comb
case (Fmt)
`FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double)
`FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single)
`FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half)
default: ExpNonZero = 1'bx;
endcase
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/Subnorm/inf/NaN values
end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half
// quad | double | single | half
//-------------------------------------------------------------------
// `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number
// `Q_NE | `D_NE | `S_NE | `H_NE length of exponent
// `Q_NF | `D_NF | `S_NF | `H_NF length of fraction
// `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value
// `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 Sticky=00 H=10
// convert the larger precision's exponent to use the largest precision's bias
always_comb
case (Fmt)
`FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero};
`FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero};
`FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero};
default: Exp = {`NE{1'bx}};
endcase
// Check NaN boxing
always_comb
case (Fmt)
2'b11: BadNaNBox = 0;
2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN];
endcase
// is the exponent all 1's
always_comb
case (Fmt)
`FMT: ExpMax = &In[`FLEN-2:`NF];
`FMT1: ExpMax = &In[`LEN1-2:`NF1];
`FMT2: ExpMax = &In[`LEN2-2:`NF2];
default: ExpMax = 1'bx;
endcase
// extract sign bit
always_comb
case (Fmt)
2'b11: Sgn = In[`Q_LEN-1];
2'b01: Sgn = In[`D_LEN-1];
2'b00: Sgn = In[`S_LEN-1];
2'b10: Sgn = In[`H_LEN-1];
endcase
end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half
// quad | double | single | half
//-------------------------------------------------------------------
// `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number
// `Q_NE | `D_NE | `S_NE | `H_NE length of exponent
// `Q_NF | `D_NF | `S_NF | `H_NF length of fraction
// `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value
// `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 Sticky=00 H=10
// extract the fraction
always_comb
case (Fmt)
2'b11: Frac = In[`Q_NF-1:0];
2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
endcase
// Check NaN boxing
always_comb
case (Fmt)
2'b11: BadNaNBox = 0;
2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN];
2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN];
2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN];
endcase
// is the exponent non-zero
always_comb
case (Fmt)
2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF];
2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF];
endcase
// extract sign bit
always_comb
case (Fmt)
2'b11: Sgn = In[`Q_LEN-1];
2'b01: Sgn = In[`D_LEN-1];
2'b00: Sgn = In[`S_LEN-1];
2'b10: Sgn = In[`H_LEN-1];
endcase
// extract the fraction
always_comb
case (Fmt)
2'b11: Frac = In[`Q_NF-1:0];
2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)};
2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)};
2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)};
endcase
// is the exponent non-zero
always_comb
case (Fmt)
2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF];
2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF];
2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF];
2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF];
endcase
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/Subnorm/inf/NaN values
// convert the double precsion exponent into quad precsion
// 1 is added to the exponent if the input is zero or subnormal
always_comb
case (Fmt)
2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero};
endcase
// example double to single conversion:
// 1023 = 0011 1111 1111
// 127 = 0000 0111 1111 (subtract this)
// 896 = 0011 1000 0000
// sexp = 0000 bbbb bbbb (add this) b = bit d = ~b
// dexp = 0bdd dbbb bbbb
// also need to take into account possible zero/Subnorm/inf/NaN values
// convert the double precsion exponent into quad precsion
// 1 is added to the exponent if the input is zero or subnormal
always_comb
case (Fmt)
2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero};
2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero};
2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero};
2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero};
endcase
// is the exponent all 1's
always_comb
case (Fmt)
2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
2'b10: ExpMax = &In[`H_LEN-2:`H_NF];
endcase
// is the exponent all 1's
always_comb
case (Fmt)
2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF];
2'b01: ExpMax = &In[`D_LEN-2:`D_NF];
2'b00: ExpMax = &In[`S_LEN-2:`S_NF];
2'b10: ExpMax = &In[`H_LEN-2:`H_NF];
endcase
end
end
// Output logic
assign FracZero = ~|Frac; // is the fraction zero?
assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if Subnormal or zero) to create the significand
assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN?
assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
assign Inf = ExpMax & FracZero &En; // is the input infinity?
assign Zero = ~ExpNonZero & FracZero; // is the input zero?
// Output logic
assign FracZero = ~|Frac; // is the fraction zero?
assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if Subnormal or zero) to create the significand
assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN?
assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN?
assign Inf = ExpMax & FracZero &En; // is the input infinity?
assign Zero = ~ExpNonZero & FracZero; // is the input zero?
endmodule

View file

@ -39,16 +39,18 @@ module atomic (
input logic IgnoreRequest,
output logic [`XLEN-1:0] IMAWriteDataM,
output logic SquashSCW,
output logic [1:0] LSURWM);
output logic [1:0] LSURWM
);
logic [`XLEN-1:0] AMOResult;
logic MemReadM;
logic [`XLEN-1:0] AMOResult;
logic MemReadM;
amoalu amoalu(.srca(ReadDataM), .srcb(IHWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]),
.result(AMOResult));
mux2 #(`XLEN) wdmux(IHWriteDataM, AMOResult, LSUAtomicM[1], IMAWriteDataM);
assign MemReadM = PreLSURWM[1] & ~IgnoreRequest;
lrsc lrsc(.clk, .reset, .StallW, .MemReadM, .PreLSURWM, .LSUAtomicM, .PAdrM,
.SquashSCW, .LSURWM);
lrsc lrsc(.clk, .reset, .StallW, .MemReadM, .PreLSURWM, .LSUAtomicM, .PAdrM, .SquashSCW, .LSURWM);
endmodule

View file

@ -35,10 +35,10 @@ module dtim(
output logic [`LLEN-1:0] ReadDataWordM
);
logic we;
logic we;
localparam ADDR_WDITH = $clog2(`DTIM_RANGE/8);
localparam OFFSET = $clog2(`LLEN/8);
localparam OFFSET = $clog2(`LLEN/8);
assign we = MemRWM[0] & ~FlushW; // have to ignore write if Trap.

View file

@ -29,49 +29,50 @@
`include "wally-config.vh"
module endianswap #(parameter LEN=`XLEN) (
input logic BigEndianM,
input logic [LEN-1:0] a,
output logic [LEN-1:0] y);
input logic BigEndianM,
input logic [LEN-1:0] a,
output logic [LEN-1:0] y
);
if(LEN == 128) begin
always_comb
if (BigEndianM) begin // swap endianness
y[127:120] = a[7:0];
y[119:112] = a[15:8];
y[111:104] = a[23:16];
y[103:96] = a[31:24];
y[95:88] = a[39:32];
y[87:80] = a[47:40];
y[79:72] = a[55:48];
y[71:64] = a[63:56];
y[63:56] = a[71:64];
y[55:48] = a[79:72];
y[47:40] = a[87:80];
y[39:32] = a[95:88];
y[31:24] = a[103:96];
y[23:16] = a[111:104];
y[15:8] = a[119:112];
y[7:0] = a[127:120];
end else y = a;
if (BigEndianM) begin // swap endianness
y[127:120] = a[7:0];
y[119:112] = a[15:8];
y[111:104] = a[23:16];
y[103:96] = a[31:24];
y[95:88] = a[39:32];
y[87:80] = a[47:40];
y[79:72] = a[55:48];
y[71:64] = a[63:56];
y[63:56] = a[71:64];
y[55:48] = a[79:72];
y[47:40] = a[87:80];
y[39:32] = a[95:88];
y[31:24] = a[103:96];
y[23:16] = a[111:104];
y[15:8] = a[119:112];
y[7:0] = a[127:120];
end else y = a;
end else if(LEN == 64) begin
always_comb
if (BigEndianM) begin // swap endianness
y[63:56] = a[7:0];
y[55:48] = a[15:8];
y[47:40] = a[23:16];
y[39:32] = a[31:24];
y[31:24] = a[39:32];
y[23:16] = a[47:40];
y[15:8] = a[55:48];
y[7:0] = a[63:56];
end else y = a;
if (BigEndianM) begin // swap endianness
y[63:56] = a[7:0];
y[55:48] = a[15:8];
y[47:40] = a[23:16];
y[39:32] = a[31:24];
y[31:24] = a[39:32];
y[23:16] = a[47:40];
y[15:8] = a[55:48];
y[7:0] = a[63:56];
end else y = a;
end else begin
always_comb
if (BigEndianM) begin
y[31:24] = a[7:0];
y[23:16] = a[15:8];
y[15:8] = a[23:16];
y[7:0] = a[31:24];
y[31:24] = a[7:0];
y[23:16] = a[15:8];
y[15:8] = a[23:16];
y[7:0] = a[31:24];
end else y = a;
end
endmodule

View file

@ -27,22 +27,22 @@
`include "wally-config.vh"
module lrsc
(
input logic clk, reset,
input logic StallW,
input logic MemReadM,
input logic [1:0] PreLSURWM,
output logic [1:0] LSURWM,
input logic [1:0] LSUAtomicM,
input logic [`PA_BITS-1:0] PAdrM, // from mmu to dcache
output logic SquashSCW
module lrsc(
input logic clk, reset,
input logic StallW,
input logic MemReadM,
input logic [1:0] PreLSURWM,
output logic [1:0] LSURWM,
input logic [1:0] LSUAtomicM,
input logic [`PA_BITS-1:0] PAdrM, // from mmu to dcache
output logic SquashSCW
);
// Handle atomic load reserved / store conditional
logic [`PA_BITS-1:2] ReservationPAdrW;
logic ReservationValidM, ReservationValidW;
logic lrM, scM, WriteAdrMatchM;
logic SquashSCM;
logic [`PA_BITS-1:2] ReservationPAdrW;
logic ReservationValidM, ReservationValidW;
logic lrM, scM, WriteAdrMatchM;
logic SquashSCM;
assign lrM = MemReadM & LSUAtomicM[0];
assign scM = PreLSURWM[0] & LSUAtomicM[0];
@ -55,6 +55,7 @@ module lrsc
else if (scM) ReservationValidM = 0; // clear valid on store to same address or any sc
else ReservationValidM = ReservationValidW; // otherwise don't change valid
end
flopenr #(`PA_BITS-2) resadrreg(clk, reset, lrM & ~StallW, PAdrM[`PA_BITS-1:2], ReservationPAdrW); // could drop clear on this one but not valid
flopenr #(1) resvldreg(clk, reset, ~StallW, ReservationValidM, ReservationValidW);
flopenr #(1) squashreg(clk, reset, ~StallW, SquashSCM, SquashSCW);

View file

@ -32,63 +32,63 @@
`include "wally-config.vh"
module lsu (
input logic clk, reset,
input logic StallM, FlushM, StallW, FlushW,
output logic LSUStallM, // LSU stalls pipeline during a multicycle operation.
// connected to cpu (controls)
input logic [1:0] MemRWM, // Read/Write control
input logic [2:0] Funct3M, // Size of memory operation
input logic [6:0] Funct7M, // Atomic memory operation function
input logic [1:0] AtomicM, // Atomic memory operation
input logic FlushDCacheM, // Flush D cache to next level of memory
output logic CommittedM, // Delay interrupts while memory operation in flight
output logic SquashSCW, // Store conditional failed disable write to GPR
output logic DCacheMiss, // D cache miss for performance counters
output logic DCacheAccess, // D cache memory access for performance counters
// address and write data
input logic [`XLEN-1:0] IEUAdrE, // Execution stage memory address
(* mark_debug = "true" *) output logic [`XLEN-1:0] IEUAdrM, // Memory stage memory address
(* mark_debug = "true" *) input logic [`XLEN-1:0] WriteDataM, // Write data from IEU
output logic [`LLEN-1:0] ReadDataW, // Read data to IEU or FPU
// cpu privilege
input logic [1:0] PrivilegeModeW, // Current privilege mode
input logic BigEndianM, // Swap byte order to big endian
input logic sfencevmaM, // Virtual memory address fence
// fpu
input logic [`FLEN-1:0] FWriteDataM, // Write data from FPU
input logic FpLoadStoreM, // Selects FPU as store for write data
// faults
output logic LoadPageFaultM, StoreAmoPageFaultM, // Page fault exceptions
output logic LoadMisalignedFaultM, // Load address misaligned fault
output logic LoadAccessFaultM, // Load access fault (PMA)
output logic HPTWInstrAccessFaultM, // HPTW generated access fault during instruction fetch
// cpu hazard unit (trap)
output logic StoreAmoMisalignedFaultM, // Store or AMO address misaligned fault
output logic StoreAmoAccessFaultM, // Store or AMO access fault
// connect to ahb
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] LSUHADDR, // Bus address from LSU to EBU
(* mark_debug = "true" *) input logic [`XLEN-1:0] HRDATA, // Bus read data from LSU to EBU
(* mark_debug = "true" *) output logic [`XLEN-1:0] LSUHWDATA, // Bus write data from LSU to EBU
(* mark_debug = "true" *) input logic LSUHREADY, // Bus ready from LSU to EBU
(* mark_debug = "true" *) output logic LSUHWRITE, // Bus write operation from LSU to EBU
(* mark_debug = "true" *) output logic [2:0] LSUHSIZE, // Bus operation size from LSU to EBU
(* mark_debug = "true" *) output logic [2:0] LSUHBURST, // Bus burst from LSU to EBU
(* mark_debug = "true" *) output logic [1:0] LSUHTRANS, // Bus transaction type from LSU to EBU
(* mark_debug = "true" *) output logic [`XLEN/8-1:0] LSUHWSTRB, // Bus byte write enables from LSU to EBU
// page table walker
input logic [`XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege
input logic [1:0] STATUS_MPP, // Machine previous privilege mode
input logic [`XLEN-1:0] PCF, // Fetch PC
input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk
input logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits
output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB
output logic [1:0] PageType, // Type of page table entry to write to ITLB
output logic ITLBWriteF, // Write PTE to ITLB
output logic SelHPTW, // During a HPTW walk the effective privilege mode becomes S_MODE
input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // PMP address from privileged unit
);
input logic clk, reset,
input logic StallM, FlushM, StallW, FlushW,
output logic LSUStallM, // LSU stalls pipeline during a multicycle operation.
// connected to cpu (controls)
input logic [1:0] MemRWM, // Read/Write control
input logic [2:0] Funct3M, // Size of memory operation
input logic [6:0] Funct7M, // Atomic memory operation function
input logic [1:0] AtomicM, // Atomic memory operation
input logic FlushDCacheM, // Flush D cache to next level of memory
output logic CommittedM, // Delay interrupts while memory operation in flight
output logic SquashSCW, // Store conditional failed disable write to GPR
output logic DCacheMiss, // D cache miss for performance counters
output logic DCacheAccess, // D cache memory access for performance counters
// address and write data
input logic [`XLEN-1:0] IEUAdrE, // Execution stage memory address
(* mark_debug = "true" *) output logic [`XLEN-1:0] IEUAdrM, // Memory stage memory address
(* mark_debug = "true" *) input logic [`XLEN-1:0] WriteDataM, // Write data from IEU
output logic [`LLEN-1:0] ReadDataW, // Read data to IEU or FPU
// cpu privilege
input logic [1:0] PrivilegeModeW, // Current privilege mode
input logic BigEndianM, // Swap byte order to big endian
input logic sfencevmaM, // Virtual memory address fence
// fpu
input logic [`FLEN-1:0] FWriteDataM, // Write data from FPU
input logic FpLoadStoreM, // Selects FPU as store for write data
// faults
output logic LoadPageFaultM, StoreAmoPageFaultM, // Page fault exceptions
output logic LoadMisalignedFaultM, // Load address misaligned fault
output logic LoadAccessFaultM, // Load access fault (PMA)
output logic HPTWInstrAccessFaultM, // HPTW generated access fault during instruction fetch
// cpu hazard unit (trap)
output logic StoreAmoMisalignedFaultM, // Store or AMO address misaligned fault
output logic StoreAmoAccessFaultM, // Store or AMO access fault
// connect to ahb
(* mark_debug = "true" *) output logic [`PA_BITS-1:0] LSUHADDR, // Bus address from LSU to EBU
(* mark_debug = "true" *) input logic [`XLEN-1:0] HRDATA, // Bus read data from LSU to EBU
(* mark_debug = "true" *) output logic [`XLEN-1:0] LSUHWDATA, // Bus write data from LSU to EBU
(* mark_debug = "true" *) input logic LSUHREADY, // Bus ready from LSU to EBU
(* mark_debug = "true" *) output logic LSUHWRITE, // Bus write operation from LSU to EBU
(* mark_debug = "true" *) output logic [2:0] LSUHSIZE, // Bus operation size from LSU to EBU
(* mark_debug = "true" *) output logic [2:0] LSUHBURST, // Bus burst from LSU to EBU
(* mark_debug = "true" *) output logic [1:0] LSUHTRANS, // Bus transaction type from LSU to EBU
(* mark_debug = "true" *) output logic [`XLEN/8-1:0] LSUHWSTRB, // Bus byte write enables from LSU to EBU
// page table walker
input logic [`XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR
input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege
input logic [1:0] STATUS_MPP, // Machine previous privilege mode
input logic [`XLEN-1:0] PCF, // Fetch PC
input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk
input logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits
output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB
output logic [1:0] PageType, // Type of page table entry to write to ITLB
output logic ITLBWriteF, // Write PTE to ITLB
output logic SelHPTW, // During a HPTW walk the effective privilege mode becomes S_MODE
input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit
input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // PMP address from privileged unit
);
logic [`XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer
logic [`XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer

View file

@ -34,7 +34,7 @@ module subwordread
input logic FpLoadStoreM,
input logic BigEndianM,
output logic [`LLEN-1:0] ReadDataM
);
);
logic [7:0] ByteM;
logic [15:0] HalfwordM;

View file

@ -29,7 +29,8 @@
module subwordwrite (
input logic [2:0] LSUFunct3M,
input logic [`LLEN-1:0] IMAFWriteDataM,
output logic [`LLEN-1:0] LittleEndianWriteDataM);
output logic [`LLEN-1:0] LittleEndianWriteDataM
);
// Replicate data for subword writes
if (`LLEN == 128) begin:sww

View file

@ -29,7 +29,8 @@
module swbytemask #(parameter WORDLEN = `XLEN)(
input logic [2:0] Size,
input logic [$clog2(WORDLEN/8)-1:0] Adr,
output logic [WORDLEN/8-1:0] ByteMask);
output logic [WORDLEN/8-1:0] ByteMask
);
assign ByteMask = ((2**(2**Size))-1) << Adr;

View file

@ -11,7 +11,7 @@
//
// Compatible with most of PC16550D with the following known exceptions:
// Generates 2 rather than 1.5 stop bits when 5-bit word length is slected and LCR[2] = 1
// Timeout not ye implemented***
// Timeout not yet implemented***
//
// Documentation: RISC-V System on Chip Design Chapter 15
//
@ -141,15 +141,15 @@ module uartPC16550D(
MCR <= #1 5'b0;
LSR <= #1 8'b01100000;
MSR <= #1 4'b0;
DLL <= #1 8'd1; // this cannot be zero with DLM also zer0.
DLM <= #1 8'b0;
DLL <= #1 8'd1; // this cannot be zero with DLM also zer0.
DLM <= #1 8'b0;
SCR <= #1 8'b0; // not strictly necessary to reset
end else begin
if (~MEMWb) begin
/* verilator lint_off CASEINCOMPLETE */
case (A)
3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section
3'b001: if (DLAB) DLM <= #1 Din; else IER <= #1 Din[3:0];
3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section
3'b001: if (DLAB) DLM <= #1 Din; else IER <= #1 Din[3:0];
3'b010: FCR <= #1 {Din[7:6], 2'b0, Din[3], 2'b0, Din[0]}; // Write only FIFO Control Register; 4:5 reserved and 2:1 self-clearing
3'b011: LCR <= #1 Din;
3'b100: MCR <= #1 Din[4:0];