diff --git a/pipelined/src/fpu/fclassify.sv b/pipelined/src/fpu/fclassify.sv index 68cdfcedb..af159878f 100644 --- a/pipelined/src/fpu/fclassify.sv +++ b/pipelined/src/fpu/fclassify.sv @@ -28,41 +28,41 @@ `include "wally-config.vh" module fclassify ( - input logic Xs, // sign bit - input logic XNaN, // is NaN - input logic XSNaN, // is signaling NaN - input logic XSubnorm, // is Subnormal - input logic XZero, // is zero - input logic XInf, // is infinity - output logic [`XLEN-1:0] ClassRes // classify result + input logic Xs, // sign bit + input logic XNaN, // is NaN + input logic XSNaN, // is signaling NaN + input logic XSubnorm, // is Subnormal + input logic XZero, // is zero + input logic XInf, // is infinity + output logic [`XLEN-1:0] ClassRes // classify result ); - logic PInf, PZero, PNorm, PSubnorm; // is the input a positive infinity/zero/normal/subnormal - logic NInf, NZero, NNorm, NSubnorm; // is the input a negitive infinity/zero/normal/subnormal - logic XNorm; // is the input normal - - // determine the sub categories - assign XNorm= ~(XNaN | XInf| XSubnorm| XZero); - assign PInf = ~Xs&XInf; - assign NInf = Xs&XInf; - assign PNorm = ~Xs&XNorm; - assign NNorm = Xs&XNorm; - assign PSubnorm = ~Xs&XSubnorm; - assign NSubnorm = Xs&XSubnorm; - assign PZero = ~Xs&XZero; - assign NZero = Xs&XZero; + logic PInf, PZero, PNorm, PSubnorm; // is the input a positive infinity/zero/normal/subnormal + logic NInf, NZero, NNorm, NSubnorm; // is the input a negitive infinity/zero/normal/subnormal + logic XNorm; // is the input normal + + // determine the sub categories + assign XNorm= ~(XNaN | XInf| XSubnorm| XZero); + assign PInf = ~Xs&XInf; + assign NInf = Xs&XInf; + assign PNorm = ~Xs&XNorm; + assign NNorm = Xs&XNorm; + assign PSubnorm = ~Xs&XSubnorm; + assign NSubnorm = Xs&XSubnorm; + assign PZero = ~Xs&XZero; + assign NZero = Xs&XZero; - // determine sub category and combine into the result - // bit 0 - -Inf - // bit 1 - -Norm - // bit 2 - -Subnorm - // bit 3 - -Zero - // bit 4 - +Zero - // bit 5 - +Subnorm - // bit 6 - +Norm - // bit 7 - +Inf - // bit 8 - signaling NaN - // bit 9 - quiet NaN - assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf}; + // determine sub category and combine into the result + // bit 0 - -Inf + // bit 1 - -Norm + // bit 2 - -Subnorm + // bit 3 - -Zero + // bit 4 - +Zero + // bit 5 - +Subnorm + // bit 6 - +Norm + // bit 7 - +Inf + // bit 8 - signaling NaN + // bit 9 - quiet NaN + assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PSubnorm, PZero, NZero, NSubnorm, NNorm, NInf}; endmodule diff --git a/pipelined/src/fpu/fcmp.sv b/pipelined/src/fpu/fcmp.sv index e749e9576..3be33d997 100755 --- a/pipelined/src/fpu/fcmp.sv +++ b/pipelined/src/fpu/fcmp.sv @@ -37,124 +37,124 @@ // 011 less than or equal module fcmp ( - input logic [`FMTBITS-1:0] Fmt, // format of fp number - input logic [2:0] OpCtrl, // see above table - input logic Xs, Ys, // input signs - input logic [`NE-1:0] Xe, Ye, // input exponents - input logic [`NF:0] Xm, Ym, // input mantissa - input logic XZero, YZero, // is zero - input logic XNaN, YNaN, // is NaN - input logic XSNaN, YSNaN, // is signaling NaN - input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker) - output logic CmpNV, // invalid flag - output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result - output logic [`XLEN-1:0] CmpIntRes // compare integer result - ); + input logic [`FMTBITS-1:0] Fmt, // format of fp number + input logic [2:0] OpCtrl, // see above table + input logic Xs, Ys, // input signs + input logic [`NE-1:0] Xe, Ye, // input exponents + input logic [`NF:0] Xm, Ym, // input mantissa + input logic XZero, YZero, // is zero + input logic XNaN, YNaN, // is NaN + input logic XSNaN, YSNaN, // is signaling NaN + input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker) + output logic CmpNV, // invalid flag + output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result + output logic [`XLEN-1:0] CmpIntRes // compare integer result +); - logic LTabs, LT, EQ; // is X < or > or = Y - logic [`FLEN-1:0] NaNRes; // NaN result - logic BothZero; // are both inputs zero - logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN - - assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers - assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison - assign EQ = (X == Y); + logic LTabs, LT, EQ; // is X < or > or = Y + logic [`FLEN-1:0] NaNRes; // NaN result + logic BothZero; // are both inputs zero + logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN + + assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers + assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison + assign EQ = (X == Y); - assign BothZero = XZero&YZero; - assign EitherNaN = XNaN|YNaN; - assign EitherSNaN = XSNaN|YSNaN; + assign BothZero = XZero&YZero; + assign EitherNaN = XNaN|YNaN; + assign EitherSNaN = XSNaN|YSNaN; - // flags - // Min/Max - if an input is a signaling NaN set invalid flag - // LT/LE - signaling - sets invalid if NaN input - // EQ - quiet - sets invalid if signaling NaN input - always_comb begin - case (OpCtrl[2:0]) - 3'b110: CmpNV = EitherSNaN;//min - 3'b101: CmpNV = EitherSNaN;//max - 3'b010: CmpNV = EitherSNaN;//equal - 3'b001: CmpNV = EitherNaN;//less than - 3'b011: CmpNV = EitherNaN;//less than or equal - default: CmpNV = 1'bx; - endcase - end + // flags + // Min/Max - if an input is a signaling NaN set invalid flag + // LT/LE - signaling - sets invalid if NaN input + // EQ - quiet - sets invalid if signaling NaN input + always_comb begin + case (OpCtrl[2:0]) + 3'b110: CmpNV = EitherSNaN;//min + 3'b101: CmpNV = EitherSNaN;//max + 3'b010: CmpNV = EitherSNaN;//equal + 3'b001: CmpNV = EitherNaN;//less than + 3'b011: CmpNV = EitherNaN;//less than or equal + default: CmpNV = 1'bx; + endcase + end - // fmin/fmax of two NaNs returns a quiet NaN of the appropriate size - // for IEEE, return the payload of X - // for RISC-V, return the canonical NaN + // fmin/fmax of two NaNs returns a quiet NaN of the appropriate size + // for IEEE, return the payload of X + // for RISC-V, return the canonical NaN - // select the NaN result - if (`FPSIZES == 1) - if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + // select the NaN result + if (`FPSIZES == 1) + if(`IEEE754) assign NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + else assign NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - else if (`FPSIZES == 2) - if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - - else if (`FPSIZES == 3) - always_comb - case (Fmt) - `FMT: - if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - `FMT1: - if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - `FMT2: - if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; - else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - default: NaNRes = {`FLEN{1'bx}}; - endcase + else if (`FPSIZES == 2) + if(`IEEE754) assign NaNRes = Fmt ? {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + else assign NaNRes = Fmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + + else if (`FPSIZES == 3) + always_comb + case (Fmt) + `FMT: + if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + `FMT1: + if(`IEEE754) NaNRes = {{`FLEN-`LEN1{1'b1}}, Xs, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + else NaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + `FMT2: + if(`IEEE754) NaNRes = {{`FLEN-`LEN2{1'b1}}, Xs, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; + else NaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + default: NaNRes = {`FLEN{1'bx}}; + endcase - else if (`FPSIZES == 4) - always_comb - case (Fmt) - 2'h3: - if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - 2'h1: - if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; - else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; - 2'h0: - if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; - else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; - 2'h2: - if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; - else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; - endcase + else if (`FPSIZES == 4) + always_comb + case (Fmt) + 2'h3: + if(`IEEE754) NaNRes = {Xs, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + else NaNRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + 2'h1: + if(`IEEE754) NaNRes = {{`FLEN-`D_LEN{1'b1}}, Xs, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; + else NaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + 2'h0: + if(`IEEE754) NaNRes = {{`FLEN-`S_LEN{1'b1}}, Xs, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; + else NaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + 2'h2: + if(`IEEE754) NaNRes = {{`FLEN-`H_LEN{1'b1}}, Xs, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; + else NaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + endcase - // Min/Max - // - outputs the min/max of X and Y - // - -0 < 0 - // - if both are NaN return quiet X - // - if one is a NaN output the non-NaN - always_comb - if(OpCtrl[0]) // MAX - if(XNaN) - if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN - else CmpFpRes = Y; // X = NaN Y != NaN - else - if(YNaN) CmpFpRes = X; // X != NaN Y = NaN - else // X,Y != NaN - if(LT) CmpFpRes = Y; // X < Y - else CmpFpRes = X; // X > Y - else // MIN - if(XNaN) - if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN - else CmpFpRes = Y; // X = NaN Y != NaN - else - if(YNaN) CmpFpRes = X; // X != NaN Y = NaN - else // X,Y != NaN - if(LT) CmpFpRes = X; // X < Y - else CmpFpRes = Y; // X > Y - - // LT/LE/EQ - // - -0 = 0 - // - inf = inf and -inf = -inf - // - return 0 if comparison with NaN (unordered) - assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN}; - + // Min/Max + // - outputs the min/max of X and Y + // - -0 < 0 + // - if both are NaN return quiet X + // - if one is a NaN output the non-NaN + always_comb + if(OpCtrl[0]) // MAX + if(XNaN) + if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN + else CmpFpRes = Y; // X = NaN Y != NaN + else + if(YNaN) CmpFpRes = X; // X != NaN Y = NaN + else // X,Y != NaN + if(LT) CmpFpRes = Y; // X < Y + else CmpFpRes = X; // X > Y + else // MIN + if(XNaN) + if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN + else CmpFpRes = Y; // X = NaN Y != NaN + else + if(YNaN) CmpFpRes = X; // X != NaN Y = NaN + else // X,Y != NaN + if(LT) CmpFpRes = X; // X < Y + else CmpFpRes = Y; // X > Y + + // LT/LE/EQ + // - -0 = 0 + // - inf = inf and -inf = -inf + // - return 0 if comparison with NaN (unordered) + assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN}; + endmodule diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index e70947526..2f121a75a 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -30,208 +30,206 @@ `include "wally-config.vh" module fcvt ( - input logic Xs, // input's sign - input logic [`NE-1:0] Xe, // input's exponent - input logic [`NF:0] Xm, // input's fraction - input logic [`XLEN-1:0] Int, // integer input - from IEU - input logic [2:0] OpCtrl, // choose which opperation (look below for values) - input logic ToInt, // is fp->int (since it's writting to the integer register) - input logic XZero, // is the input zero - input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) - output logic [`NE:0] Ce, // the calculated expoent - output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by - output logic ResSubnormUf,// does the result underflow or is subnormal - output logic Cs, // the result's sign - output logic IntZero, // is the integer zero? - output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder) - ); + input logic Xs, // input's sign + input logic [`NE-1:0] Xe, // input's exponent + input logic [`NF:0] Xm, // input's fraction + input logic [`XLEN-1:0] Int, // integer input - from IEU + input logic [2:0] OpCtrl, // choose which opperation (look below for values) + input logic ToInt, // is fp->int (since it's writting to the integer register) + input logic XZero, // is the input zero + input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) + output logic [`NE:0] Ce, // the calculated expoent + output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by + output logic ResSubnormUf,// does the result underflow or is subnormal + output logic Cs, // the result's sign + output logic IntZero, // is the integer zero? + output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder) + ); - // OpCtrls: - // fp->fp conversions: {0, output precision} - only one of the operations writes to the int register - // half - 10 - // single - 00 - // double - 01 - // quad - 11 - // int<->fp conversions: {is int->fp?, is the integer 64-bit?, is the integer signed?} - // bit 2 bit 1 bit 0 - // for example: signed long -> single floating point has the OpCode 101 + // OpCtrls: + // fp->fp conversions: {0, output precision} - only one of the operations writes to the int register + // half - 10 + // single - 00 + // double - 01 + // quad - 11 + // int<->fp conversions: {is int->fp?, is the integer 64-bit?, is the integer signed?} + // bit 2 bit 1 bit 0 + // for example: signed long -> single floating point has the OpCode 101 + + logic [`FMTBITS-1:0] OutFmt; // format of the output + logic [`XLEN-1:0] PosInt; // the positive integer input + logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size + logic [`NE-2:0] NewBias; // the bias of the final result + logic [`NE-1:0] OldExp; // the old exponent + logic Signed; // is the opperation with a signed integer? + logic Int64; // is the integer 64 bits? + logic IntToFp; // is the opperation an int->fp conversion? + logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder) + logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC + // seperate OpCtrl for code readability + assign Signed = OpCtrl[0]; + assign Int64 = OpCtrl[1]; + assign IntToFp = OpCtrl[2]; - logic [`FMTBITS-1:0] OutFmt; // format of the output - logic [`XLEN-1:0] PosInt; // the positive integer input - logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size - logic [`NE-2:0] NewBias; // the bias of the final result - logic [`NE-1:0] OldExp; // the old exponent - logic Signed; // is the opperation with a signed integer? - logic Int64; // is the integer 64 bits? - logic IntToFp; // is the opperation an int->fp conversion? - logic [`CVTLEN:0] LzcInFull; // input to the Leading Zero Counter (priority encoder) - logic [`LOGCVTLEN-1:0] LeadingZeros; // output from the LZC + // choose the ouptut format depending on the opperation + // - fp -> fp: OpCtrl contains the percision of the output + // - int -> fp: Fmt contains the percision of the output + if (`FPSIZES == 2) + assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT); + else if (`FPSIZES == 3 | `FPSIZES == 4) + assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0]; - // seperate OpCtrl for code readability - assign Signed = OpCtrl[0]; - assign Int64 = OpCtrl[1]; - assign IntToFp = OpCtrl[2]; + /////////////////////////////////////////////////////////////////////////// + // negation + /////////////////////////////////////////////////////////////////////////// + // 1) negate the input if the input is a negitive singed integer + // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed) - // choose the ouptut format depending on the opperation - // - fp -> fp: OpCtrl contains the percision of the output - // - int -> fp: Fmt contains the percision of the output - if (`FPSIZES == 2) - assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT); - else if (`FPSIZES == 3 | `FPSIZES == 4) - assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0]; + assign PosInt = Cs ? -Int : Int; + assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt; + assign IntZero = ~|TrimInt; + + /////////////////////////////////////////////////////////////////////////// + // lzc + /////////////////////////////////////////////////////////////////////////// + + // choose the input to the leading zero counter i.e. priority encoder + // int -> fp : | positive integer | 00000... (if needed) | + // fp -> fp : | fraction | 00000... (if needed) | + assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} : + {Xm, {`CVTLEN-`NF{1'b0}}}; + + // used as shifter input in postprocessor + assign LzcIn = LzcInFull[`CVTLEN-1:0]; + + lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros)); + + /////////////////////////////////////////////////////////////////////////// + // exp calculations + /////////////////////////////////////////////////////////////////////////// + + // Select the bias of the output + // fp -> int : select 1 + // ??? -> fp : pick the new bias depending on the output format + if (`FPSIZES == 1) begin + assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS); + + end else if (`FPSIZES == 2) begin + logic [`NE-2:0] NewBiasToFp; + assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; + + end else if (`FPSIZES == 3) begin + logic [`NE-2:0] NewBiasToFp; + always_comb + case (OutFmt) + `FMT: NewBiasToFp = (`NE-1)'(`BIAS); + `FMT1: NewBiasToFp = (`NE-1)'(`BIAS1); + `FMT2: NewBiasToFp = (`NE-1)'(`BIAS2); + default: NewBiasToFp = {`NE-1{1'bx}}; + endcase + assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; + + end else if (`FPSIZES == 4) begin + logic [`NE-2:0] NewBiasToFp; + always_comb + case (OutFmt) + 2'h3: NewBiasToFp = (`NE-1)'(`Q_BIAS); + 2'h1: NewBiasToFp = (`NE-1)'(`D_BIAS); + 2'h0: NewBiasToFp = (`NE-1)'(`S_BIAS); + 2'h2: NewBiasToFp = (`NE-1)'(`H_BIAS); + endcase + assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; + end - /////////////////////////////////////////////////////////////////////////// - // negation - /////////////////////////////////////////////////////////////////////////// - // 1) negate the input if the input is a negitive singed integer - // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed) - - assign PosInt = Cs ? -Int : Int; - assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt; - assign IntZero = ~|TrimInt; - - /////////////////////////////////////////////////////////////////////////// - // lzc - /////////////////////////////////////////////////////////////////////////// - - // choose the input to the leading zero counter i.e. priority encoder - // int -> fp : | positive integer | 00000... (if needed) | - // fp -> fp : | fraction | 00000... (if needed) | - assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} : - {Xm, {`CVTLEN-`NF{1'b0}}}; - - // used as shifter input in postprocessor - assign LzcIn = LzcInFull[`CVTLEN-1:0]; - - lzc #(`CVTLEN+1) lzc (.num(LzcInFull), .ZeroCnt(LeadingZeros)); - - /////////////////////////////////////////////////////////////////////////// - // exp calculations - /////////////////////////////////////////////////////////////////////////// - - // Select the bias of the output - // fp -> int : select 1 - // ??? -> fp : pick the new bias depending on the output format - if (`FPSIZES == 1) begin - assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS); - - end else if (`FPSIZES == 2) begin - logic [`NE-2:0] NewBiasToFp; - assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); - assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; - - end else if (`FPSIZES == 3) begin - logic [`NE-2:0] NewBiasToFp; - always_comb - case (OutFmt) - `FMT: NewBiasToFp = (`NE-1)'(`BIAS); - `FMT1: NewBiasToFp = (`NE-1)'(`BIAS1); - `FMT2: NewBiasToFp = (`NE-1)'(`BIAS2); - default: NewBiasToFp = {`NE-1{1'bx}}; - endcase - assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; - - end else if (`FPSIZES == 4) begin - logic [`NE-2:0] NewBiasToFp; - always_comb - case (OutFmt) - 2'h3: NewBiasToFp = (`NE-1)'(`Q_BIAS); - 2'h1: NewBiasToFp = (`NE-1)'(`D_BIAS); - 2'h0: NewBiasToFp = (`NE-1)'(`S_BIAS); - 2'h2: NewBiasToFp = (`NE-1)'(`H_BIAS); - endcase - assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; - end + // select the old exponent + // int -> fp : largest bias + XLEN-1 + // fp -> ??? : XExp + assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe; + + // calculate CalcExp + // fp -> fp : + // - XExp - Largest bias + new bias - (LeadingZeros+1) + // only do ^ if the input was subnormal + // - convert the expoenent to the final preciaion (Exp - oldBias + newBias) + // - correct the expoent when there is a normalization shift ( + LeadingZeros+1) + // - the plus 1 is built into the leading zeros by counting the leading zeroes in the mantissa rather than the fraction + // fp -> int : XExp - Largest Bias + 1 - (LeadingZeros+1) + // | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp + // process: + // - start + // | `XLEN zeros | Mantissa | 0's if nessisary | + // + // - shift left 1 (1) + // | `XLEN-1 zeros |bit| frac | 0's if nessisary | + // . <- binary point + // + // - shift left till unbiased exponent is 0 (XExp - Largest Bias) + // | 0's | Mantissa | 0's if nessisary | + // | keep | + // + // - if the input is subnormal then we dont shift... so the "- LeadingZeros" is just leftovers from other options + // int -> fp : largest bias + XLEN-1 - Largest bias + new bias - LeadingZeros = XLEN-1 + NewBias - LeadingZeros + // Process: + // |XLEN|.0000 + // - shifted right by XLEN (XLEN) + // 000000.|XLEN| + // - shift left to normilize (-LeadingZeros) + // 000000.1... + // - shift left 1 to normalize + // 000001.stuff + // - newBias to make the biased exponent + // + // oldexp - biasold - LeadingZeros + newbias + assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias}; - // select the old exponent - // int -> fp : largest bias + XLEN-1 - // fp -> ??? : XExp - assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe; - - // calculate CalcExp - // fp -> fp : - // - XExp - Largest bias + new bias - (LeadingZeros+1) - // only do ^ if the input was subnormal - // - convert the expoenent to the final preciaion (Exp - oldBias + newBias) - // - correct the expoent when there is a normalization shift ( + LeadingZeros+1) - // - the plus 1 is built into the leading zeros by counting the leading zeroes in the mantissa rather than the fraction - // fp -> int : XExp - Largest Bias + 1 - (LeadingZeros+1) - // | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp - // process: - // - start - // | `XLEN zeros | Mantissa | 0's if nessisary | - // - // - shift left 1 (1) - // | `XLEN-1 zeros |bit| frac | 0's if nessisary | - // . <- binary point - // - // - shift left till unbiased exponent is 0 (XExp - Largest Bias) - // | 0's | Mantissa | 0's if nessisary | - // | keep | - // - // - if the input is subnormal then we dont shift... so the "- LeadingZeros" is just leftovers from other options - // int -> fp : largest bias + XLEN-1 - Largest bias + new bias - LeadingZeros = XLEN-1 + NewBias - LeadingZeros - // Process: - // |XLEN|.0000 - // - shifted right by XLEN (XLEN) - // 000000.|XLEN| - // - shift left to normilize (-LeadingZeros) - // 000000.1... - // - shift left 1 to normalize - // 000001.stuff - // - newBias to make the biased exponent - // - // oldexp - biasold - LeadingZeros + newbias - assign Ce = {1'b0, OldExp} - (`NE+1)'(`BIAS) - {{`NE-`LOGCVTLEN+1{1'b0}}, (LeadingZeros)} + {2'b0, NewBias}; + // find if the result is dnormal or underflows + // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) + // - can't underflow an integer to Fp conversion + assign ResSubnormUf = (~|Ce | Ce[`NE])&~XZero&~IntToFp; - // find if the result is dnormal or underflows - // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) - // - can't underflow an integer to Fp conversion - assign ResSubnormUf = (~|Ce | Ce[`NE])&~XZero&~IntToFp; + /////////////////////////////////////////////////////////////////////////// + // shifter + /////////////////////////////////////////////////////////////////////////// + // kill the shift if it's negitive + // select the amount to shift by + // fp -> int: + // - shift left by CalcExp - essentially shifting until the unbiased exponent = 0 + // - don't shift if supposed to shift right (underflowed or Subnorm input) + // subnormal/undeflowed result fp -> fp: + // - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0 + // ??? -> fp: + // - shift left by LeadingZeros - to shift till the result is normalized + // - only shift fp -> fp if the intital value is subnormal + // - this is a problem because the input to the lzc was the fraction rather than the mantissa + // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? + always_comb + if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}}; + else if (ResSubnormUf) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0]; + else ShiftAmt = LeadingZeros; - /////////////////////////////////////////////////////////////////////////// - // shifter - /////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////////////////////////////// + // sign + /////////////////////////////////////////////////////////////////////////// - // kill the shift if it's negitive - // select the amount to shift by - // fp -> int: - // - shift left by CalcExp - essentially shifting until the unbiased exponent = 0 - // - don't shift if supposed to shift right (underflowed or Subnorm input) - // subnormal/undeflowed result fp -> fp: - // - shift left by NF-1+CalcExp - to shift till the biased expoenent is 0 - // ??? -> fp: - // - shift left by LeadingZeros - to shift till the result is normalized - // - only shift fp -> fp if the intital value is subnormal - // - this is a problem because the input to the lzc was the fraction rather than the mantissa - // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - always_comb - if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}}; - else if (ResSubnormUf) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0]; - else ShiftAmt = LeadingZeros; - - - /////////////////////////////////////////////////////////////////////////// - // sign - /////////////////////////////////////////////////////////////////////////// - - // determine the sign of the result - // - if int -> fp - // - if 64-bit : check the msb of the 64-bit integer input and if it's signed - // - if 32-bit : check the msb of the 32-bit integer input and if it's signed - // - otherwise: the floating point input's sign - always_comb - if(IntToFp) - if(Int64) Cs = Int[`XLEN-1]&Signed; - else Cs = Int[31]&Signed; - else Cs = Xs; + // determine the sign of the result + // - if int -> fp + // - if 64-bit : check the msb of the 64-bit integer input and if it's signed + // - if 32-bit : check the msb of the 32-bit integer input and if it's signed + // - otherwise: the floating point input's sign + always_comb + if(IntToFp) + if(Int64) Cs = Int[`XLEN-1]&Signed; + else Cs = Int[31]&Signed; + else Cs = Xs; endmodule diff --git a/pipelined/src/fpu/fhazard.sv b/pipelined/src/fpu/fhazard.sv index 68c801048..dadbf6d4f 100644 --- a/pipelined/src/fpu/fhazard.sv +++ b/pipelined/src/fpu/fhazard.sv @@ -29,14 +29,14 @@ `include "wally-config.vh" module fhazard( - input logic [4:0] Adr1D, Adr2D, Adr3D, // read data adresses - input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses - input logic FRegWriteE, FRegWriteM, FRegWriteW, // is the fp register being written to - input logic [4:0] RdE, RdM, RdW, // the adress being written to - input logic [1:0] FResSelM, // the result being selected - input logic XEnD, YEnD, ZEnD, // are the inputs needed - output logic FPUStallD, // stall the decode stage - output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value + input logic [4:0] Adr1D, Adr2D, Adr3D, // read data adresses + input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses + input logic FRegWriteE, FRegWriteM, FRegWriteW, // is the fp register being written to + input logic [4:0] RdE, RdM, RdW, // the adress being written to + input logic [1:0] FResSelM, // the result being selected + input logic XEnD, YEnD, ZEnD, // are the inputs needed + output logic FPUStallD, // stall the decode stage + output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value ); logic MatchDE; // is a value needed in decode stage being worked on in execute stage @@ -73,7 +73,6 @@ module fhazard( if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM // if the needed value is in the writeback stage end else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FResult64W - end endmodule diff --git a/pipelined/src/fpu/fma/fma.sv b/pipelined/src/fpu/fma/fma.sv index b4add41dc..437c698ec 100644 --- a/pipelined/src/fpu/fma/fma.sv +++ b/pipelined/src/fpu/fma/fma.sv @@ -29,69 +29,69 @@ `include "wally-config.vh" module fma( - input logic Xs, Ys, Zs, // input's signs - input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format - input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format - input logic XZero, YZero, ZZero, // is the input zero - input logic [2:0] OpCtrl, // operation control - output logic ASticky, // sticky bit that is calculated during alignment - output logic [3*`NF+3:0] Sm, // the positive sum's significand - output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) - output logic As, // the aligned addend's sign (modified Z sign for other opperations) - output logic Ps, // the product's sign - output logic Ss, // the sum's sign - output logic [`NE+1:0] Se, // the sum's exponent - output logic [$clog2(3*`NF+5)-1:0] SCnt // normalization shift count + input logic Xs, Ys, Zs, // input's signs + input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format + input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format + input logic XZero, YZero, ZZero, // is the input zero + input logic [2:0] OpCtrl, // operation control + output logic ASticky, // sticky bit that is calculated during alignment + output logic [3*`NF+3:0] Sm, // the positive sum's significand + output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) + output logic As, // the aligned addend's sign (modified Z sign for other opperations) + output logic Ps, // the product's sign + output logic Ss, // the sum's sign + output logic [`NE+1:0] Se, // the sum's exponent + output logic [$clog2(3*`NF+5)-1:0] SCnt // normalization shift count ); - // OpCtrl: - // Fma: {not multiply-add?, negate prod?, negate Z?} - // 000 - fmadd - // 001 - fmsub - // 010 - fnmsub - // 011 - fnmadd - // 100 - mul - // 110 - add - // 111 - sub + // OpCtrl: + // Fma: {not multiply-add?, negate prod?, negate Z?} + // 000 - fmadd + // 001 - fmsub + // 010 - fnmsub + // 011 - fnmadd + // 100 - mul + // 110 - add + // 111 - sub - logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format - logic [3*`NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF) - logic [3*`NF+3:0] AmInv; // aligned addend's mantissa possibly inverted - logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf) - logic KillProd; // set the product to zero before addition if the product is too small to matter - logic [`NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign + logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format + logic [3*`NF+3:0] Am; // addend aligned's mantissa for addition in U(NF+4.2NF) + logic [3*`NF+3:0] AmInv; // aligned addend's mantissa possibly inverted + logic [2*`NF+1:0] PmKilled; // the product's mantissa possibly killed U(2.2Nf) + logic KillProd; // set the product to zero before addition if the product is too small to matter + logic [`NE+1:0] Pe; // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign - /////////////////////////////////////////////////////////////////////////////// - // Calculate the product - // - When multipliying two fp numbers, add the exponents - // - Subtract the bias (XExp + YExp has two biases, one from each exponent) - // - If the product is zero then kill the exponent - // - Multiply the mantissas - /////////////////////////////////////////////////////////////////////////////// - + /////////////////////////////////////////////////////////////////////////////// + // Calculate the product + // - When multipliying two fp numbers, add the exponents + // - Subtract the bias (XExp + YExp has two biases, one from each exponent) + // - If the product is zero then kill the exponent + // - Multiply the mantissas + /////////////////////////////////////////////////////////////////////////////// + - // calculate the product's exponent - fmaexpadd expadd(.Xe, .Ye, .XZero, .YZero, .Pe); + // calculate the product's exponent + fmaexpadd expadd(.Xe, .Ye, .XZero, .YZero, .Pe); - // multiplication of the mantissa's - fmamult mult(.Xm, .Ym, .Pm); - - // calculate the signs and take the opperation into account - fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA); + // multiplication of the mantissa's + fmamult mult(.Xm, .Ym, .Pm); + + // calculate the signs and take the opperation into account + fmasign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As, .InvA); - /////////////////////////////////////////////////////////////////////////////// - // Alignment shifter - /////////////////////////////////////////////////////////////////////////////// - fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd); - - // /////////////////////////////////////////////////////////////////////////////// - // // Addition/LZA - // /////////////////////////////////////////////////////////////////////////////// - - fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); + /////////////////////////////////////////////////////////////////////////////// + // Alignment shifter + /////////////////////////////////////////////////////////////////////////////// + fmaalign align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ASticky, .KillProd); + + // /////////////////////////////////////////////////////////////////////////////// + // // Addition/LZA + // /////////////////////////////////////////////////////////////////////////////// + + fmaadd add(.Am, .Pm, .Ze, .Pe, .Ps, .KillProd, .ASticky, .AmInv, .PmKilled, .InvA, .Sm, .Se, .Ss); - fmalza #(3*`NF+4) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt); - + fmalza #(3*`NF+4) lza(.A(AmInv), .Pm(PmKilled), .Cin(InvA & (~ASticky | KillProd)), .sub(InvA), .SCnt); + endmodule diff --git a/pipelined/src/fpu/fma/fmaadd.sv b/pipelined/src/fpu/fma/fmaadd.sv index 20899cc83..b8b61bd6e 100644 --- a/pipelined/src/fpu/fma/fmaadd.sv +++ b/pipelined/src/fpu/fma/fmaadd.sv @@ -29,48 +29,49 @@ `include "wally-config.vh" module fmaadd( - input logic [3*`NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) - input logic [`NE-1:0] Ze, // exponent of Z - input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations) - input logic [`NE+1:0] Pe, // product's exponet - input logic [2*`NF+1:0] Pm, // the product's mantissa - input logic InvA, // invert the aligned addend - input logic KillProd, // should the product be set to 0 - input logic ASticky, // Alighed addend's sticky bit - output logic [3*`NF+3:0] AmInv, // aligned addend possibly inverted - output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed - output logic Ss, // sum's sign - output logic [`NE+1:0] Se, // sum's exponent - output logic [3*`NF+3:0] Sm // the positive sum + input logic [3*`NF+3:0] Am, // aligned addend's mantissa for addition in U(NF+5.2NF+1) + input logic [`NE-1:0] Ze, // exponent of Z + input logic Ps, // the product sign and the alligend addeded's sign (Modified Z sign for other opperations) + input logic [`NE+1:0] Pe, // product's exponet + input logic [2*`NF+1:0] Pm, // the product's mantissa + input logic InvA, // invert the aligned addend + input logic KillProd, // should the product be set to 0 + input logic ASticky, // Alighed addend's sticky bit + output logic [3*`NF+3:0] AmInv, // aligned addend possibly inverted + output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed + output logic Ss, // sum's sign + output logic [`NE+1:0] Se, // sum's exponent + output logic [3*`NF+3:0] Sm // the positive sum ); - logic [3*`NF+3:0] PreSum, NegPreSum; // possibly negitive sum - logic NegSum; // was the sum negitive - /////////////////////////////////////////////////////////////////////////////// - // Addition - /////////////////////////////////////////////////////////////////////////////// - - // Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition - assign AmInv = {3*`NF+4{InvA}}^Am; - // Kill the product if the product is too small to effect the addition (determined in fma1.sv) - assign PmKilled = {2*`NF+2{~KillProd}}&Pm; - // Do the addition - // - calculate a positive and negitive sum in parallel - // if there was a small negitive number killed in the alignment stage one needs to be subtracted from the sum - // prod - addend where some of the addend is put into the sticky bit then don't add +1 from negation - // ie ~(InvA&ASticky&~KillProd)&InvA = (~ASticky|KillProd)&InvA - // addend - prod where product is killed (and not exactly zero) then don't add +1 from negation - // ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA - // in this case this result is only ever selected when InvA=1 so we can remove &InvA - assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*`NF+4{1'b0}}, (~ASticky|KillProd)&InvA}; - assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 1'b0}; - - // Choose the positive sum and accompanying LZA result. - assign Sm = NegSum ? NegPreSum : PreSum; - // is the result negitive - // if p - z is the Sum negitive - // if -p + z is the Sum positive - // if -p - z then the Sum is negitive - assign Ss = NegSum^Ps; - assign Se = KillProd ? {2'b0, Ze} : Pe; + logic [3*`NF+3:0] PreSum, NegPreSum; // possibly negitive sum + logic NegSum; // was the sum negitive + + /////////////////////////////////////////////////////////////////////////////// + // Addition + /////////////////////////////////////////////////////////////////////////////// + + // Choose an inverted or non-inverted addend. Put carry into adder/LZA for addition + assign AmInv = {3*`NF+4{InvA}}^Am; + // Kill the product if the product is too small to effect the addition (determined in fma1.sv) + assign PmKilled = {2*`NF+2{~KillProd}}&Pm; + // Do the addition + // - calculate a positive and negitive sum in parallel + // if there was a small negitive number killed in the alignment stage one needs to be subtracted from the sum + // prod - addend where some of the addend is put into the sticky bit then don't add +1 from negation + // ie ~(InvA&ASticky&~KillProd)&InvA = (~ASticky|KillProd)&InvA + // addend - prod where product is killed (and not exactly zero) then don't add +1 from negation + // ie ~(InvA&ASticky&KillProd)&InvA = (~ASticky|~KillProd)&InvA + // in this case this result is only ever selected when InvA=1 so we can remove &InvA + assign {NegSum, PreSum} = {{`NF+2{1'b0}}, PmKilled, 1'b0} + {InvA, AmInv} + {{3*`NF+4{1'b0}}, (~ASticky|KillProd)&InvA}; + assign NegPreSum = Am + {{`NF+1{1'b1}}, ~PmKilled, 1'b0} + {(3*`NF+2)'(0), ~ASticky|~KillProd, 1'b0}; + + // Choose the positive sum and accompanying LZA result. + assign Sm = NegSum ? NegPreSum : PreSum; + // is the result negitive + // if p - z is the Sum negitive + // if -p + z is the Sum positive + // if -p - z then the Sum is negitive + assign Ss = NegSum^Ps; + assign Se = KillProd ? {2'b0, Ze} : Pe; endmodule diff --git a/pipelined/src/fpu/fma/fmaalign.sv b/pipelined/src/fpu/fma/fmaalign.sv index 0f1764b29..b51c2a7f1 100644 --- a/pipelined/src/fpu/fma/fmaalign.sv +++ b/pipelined/src/fpu/fma/fmaalign.sv @@ -30,70 +30,68 @@ `include "wally-config.vh" module fmaalign( - input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format - input logic [`NF:0] Zm, // significand in U(0.NF) format] - input logic XZero, YZero, ZZero,// is the input zero - output logic [3*`NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1) - output logic ASticky, // Sticky bit calculated from the aliged addend - output logic KillProd // should the product be set to zero + input logic [`NE-1:0] Xe, Ye, Ze, // biased exponents in B(NE.0) format + input logic [`NF:0] Zm, // significand in U(0.NF) format] + input logic XZero, YZero, ZZero,// is the input zero + output logic [3*`NF+3:0] Am, // addend aligned for addition in U(NF+5.2NF+1) + output logic ASticky, // Sticky bit calculated from the aliged addend + output logic KillProd // should the product be set to zero ); - logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format - logic [4*`NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) - logic [4*`NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) - logic KillZ; // should the addend be killed + logic [`NE+1:0] ACnt; // how far to shift the addend to align with the product in Q(NE+2.0) format + logic [4*`NF+3:0] ZmShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) + logic [4*`NF+3:0] ZmPreshifted; // input to the alignment shifter U(NF+5.3NF+1) + logic KillZ; // should the addend be killed - /////////////////////////////////////////////////////////////////////////////// - // Alignment shifter - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Alignment shifter + /////////////////////////////////////////////////////////////////////////////// - // determine the shift count for alignment - // - negitive means Z is larger, so shift Z left - // - positive means the product is larger, so shift Z right - // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed - assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+2) - {2'b0, Ze}; + // determine the shift count for alignment + // - negitive means Z is larger, so shift Z left + // - positive means the product is larger, so shift Z right + // This could have been done using Pe, but ACnt is on the critical path so we replicate logic for speed + assign ACnt = {2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)} + (`NE+2)'(`NF+2) - {2'b0, Ze}; + + // Defualt Addition with only inital left shift + // | 53'b0 | 106'b(product) | 1'b0 | + // | addnend | + + assign ZmPreshifted = {Zm,(3*`NF+3)'(0)}; + + assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; + assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(3)); + + always_comb begin + // If the product is too small to effect the sum, kill the product - // Defualt Addition with only inital left shift // | 53'b0 | 106'b(product) | 1'b0 | - // | addnend | + // | addnend | + if (KillProd) begin + ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+1)'(0)}; + ASticky = ~(XZero|YZero); - assign ZmPreshifted = {Zm,(3*`NF+3)'(0)}; - - assign KillProd = (ACnt[`NE+1]&~ZZero)|XZero|YZero; - assign KillZ = $signed(ACnt)>$signed((`NE+2)'(3)*(`NE+2)'(`NF)+(`NE+2)'(3)); + // If the addend is too small to effect the addition + // - The addend has to shift two past the end of the product to be considered too small + // - The 2 extra bits are needed for rounding - always_comb - begin - - // If the product is too small to effect the sum, kill the product + // | 53'b0 | 106'b(product) | 1'b0 | + // | addnend | + end else if (KillZ) begin + ZmShifted = 0; + ASticky = ~ZZero; - // | 53'b0 | 106'b(product) | 1'b0 | - // | addnend | - if (KillProd) begin - ZmShifted = {(`NF+2)'(0), Zm, (2*`NF+1)'(0)}; - ASticky = ~(XZero|YZero); + // If the Addend is shifted right + // | 53'b0 | 106'b(product) | 1'b0 | + // | addnend | + end else begin + ZmShifted = ZmPreshifted >> ACnt; + ASticky = |(ZmShifted[`NF-1:0]); - // If the addend is too small to effect the addition - // - The addend has to shift two past the end of the product to be considered too small - // - The 2 extra bits are needed for rounding - - // | 53'b0 | 106'b(product) | 1'b0 | - // | addnend | - end else if (KillZ) begin - ZmShifted = 0; - ASticky = ~ZZero; - - // If the Addend is shifted right - // | 53'b0 | 106'b(product) | 1'b0 | - // | addnend | - end else begin - ZmShifted = ZmPreshifted >> ACnt; - ASticky = |(ZmShifted[`NF-1:0]); - - end end + end - assign Am = ZmShifted[4*`NF+3:`NF]; + assign Am = ZmShifted[4*`NF+3:`NF]; endmodule diff --git a/pipelined/src/fpu/fma/fmaexpadd.sv b/pipelined/src/fpu/fma/fmaexpadd.sv index dfafa410e..3c615274f 100644 --- a/pipelined/src/fpu/fma/fmaexpadd.sv +++ b/pipelined/src/fpu/fma/fmaexpadd.sv @@ -29,15 +29,15 @@ `include "wally-config.vh" module fmaexpadd( - input logic [`NE-1:0] Xe, Ye, // input's exponents - input logic XZero, YZero, // are the inputs zero - output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2 + input logic [`NE-1:0] Xe, Ye, // input's exponents + input logic XZero, YZero, // are the inputs zero + output logic [`NE+1:0] Pe // product's exponent B^(1023)NE+2 ); - logic PZero; // is the product zero - - // kill the exponent if the product is zero - either X or Y is 0 - assign PZero = XZero | YZero; - assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)}); + logic PZero; // is the product zero? + + // kill the exponent if the product is zero - either X or Y is 0 + assign PZero = XZero | YZero; + assign Pe = PZero ? '0 : ({2'b0, Xe} + {2'b0, Ye} - {2'b0, (`NE)'(`BIAS)}); endmodule diff --git a/pipelined/src/fpu/fma/fmalza.sv b/pipelined/src/fpu/fma/fmalza.sv index 0dad87d84..640e4ed80 100644 --- a/pipelined/src/fpu/fma/fmalza.sv +++ b/pipelined/src/fpu/fma/fmalza.sv @@ -7,6 +7,7 @@ // Purpose: Leading Zero Anticipator // // Documentation: RISC-V System on Chip Design Chapter 13 (Figure 13.14) +// See also [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] // // A component of the CORE-V-WALLY configurable RISC-V project. // @@ -28,32 +29,33 @@ `include "wally-config.vh" -module fmalza #(WIDTH) ( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] - input logic [WIDTH-1:0] A, // addend - input logic [2*`NF+1:0] Pm, // product - input logic Cin, // carry in - input logic sub, // subtraction - output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result - ); +module fmalza #(WIDTH) ( + input logic [WIDTH-1:0] A, // addend + input logic [2*`NF+1:0] Pm, // product + input logic Cin, // carry in + input logic sub, // subtraction + output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result +); - logic [WIDTH:0] F; - logic [WIDTH-1:0] B, P, Guard, K; - logic [WIDTH-1:0] Pp1, Gm1, Km1; + logic [WIDTH:0] F; // most significant bit of F indicates leading digit + logic [WIDTH-1:0] B; // zero-extended product with same size as aligned A + logic [WIDTH-1:0] P, G, K; // propagate, generate, kill for each column + logic [WIDTH-1:0] Pp1, Gm1, Km1; // propagate shifted right by 1, generate/kill shifted left 1 - assign B = {{(`NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product + assign B = {{(`NF+1){1'b0}}, Pm, 1'b0}; // Zero extend product - assign P = A^B; - assign Guard = A&B; - assign K= ~A&~B; + assign P = A^B; + assign G = A&B; + assign K= ~A&~B; - assign Pp1 = {sub, P[WIDTH-1:1]}; - assign Gm1 = {Guard[WIDTH-2:0], Cin}; - assign Km1 = {K[WIDTH-2:0], ~Cin}; - - // Apply function to determine Leading pattern - // - note: the paper linked above uses the numbering system where 0 is the most significant bit - assign F[WIDTH] = ~sub&P[WIDTH-1]; - assign F[WIDTH-1:0] = (Pp1&(Guard&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | Guard&~Gm1)); + assign Pp1 = {sub, P[WIDTH-1:1]}; // shift P right by 1 (for P_i+1) , use subtract flag in most significant bit + assign Gm1 = {G[WIDTH-2:0], Cin}; // shift G left by 1 (for G_i-1) and bring in Cin + assign Km1 = {K[WIDTH-2:0], ~Cin}; // shift K left by 1 (for K_i-1) and bring in Cin + + // Apply function to determine Leading pattern + // - note: Schmookler01 uses the numbering system where 0 is the most significant bit + assign F[WIDTH] = ~sub&P[WIDTH-1]; + assign F[WIDTH-1:0] = (Pp1&(G&~Km1 | K&~Gm1)) | (~Pp1&(K&~Km1 | G&~Gm1)); - lzc #(WIDTH+1) lzc (.num(F), .ZeroCnt(SCnt)); + lzc #(WIDTH+1) lzc (.num(F), .ZeroCnt(SCnt)); endmodule diff --git a/pipelined/src/fpu/fma/fmamult.sv b/pipelined/src/fpu/fma/fmamult.sv index 62fa5bc11..541ba3687 100644 --- a/pipelined/src/fpu/fma/fmamult.sv +++ b/pipelined/src/fpu/fma/fmamult.sv @@ -29,9 +29,10 @@ `include "wally-config.vh" module fmamult( - input logic [`NF:0] Xm, Ym, // x and y significand - output logic [2*`NF+1:0] Pm // product's significand + input logic [`NF:0] Xm, Ym, // x and y significand + output logic [2*`NF+1:0] Pm // product's significand ); - assign Pm = Xm * Ym; + + assign Pm = Xm * Ym; endmodule diff --git a/pipelined/src/fpu/fma/fmasign.sv b/pipelined/src/fpu/fma/fmasign.sv index 2bf0ee660..a2308da7f 100644 --- a/pipelined/src/fpu/fma/fmasign.sv +++ b/pipelined/src/fpu/fma/fmasign.sv @@ -29,19 +29,14 @@ `include "wally-config.vh" module fmasign( - input logic [2:0] OpCtrl, // opperation contol - input logic Xs, Ys, Zs, // sign of the inputs - output logic Ps, // the product's sign - takes opperation into account - output logic As, // aligned addend sign used in fma - takes opperation into account - output logic InvA // Effective subtraction: invert addend + input logic [2:0] OpCtrl, // opperation contol + input logic Xs, Ys, Zs, // sign of the inputs + output logic Ps, // the product's sign - takes opperation into account + output logic As, // aligned addend sign used in fma - takes opperation into account + output logic InvA // Effective subtraction: invert addend ); - // Calculate the product's sign - // Negate product's sign if FNMADD or FNMSUB - // flip is negation opperation - assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]); - // flip addend sign for subtraction - assign As = Zs^OpCtrl[0]; - // Effective subtraction when product and addend have opposite signs - assign InvA = As ^ Ps; + assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]); // product sign. Negate for FMNADD or FNMSUB + assign As = Zs^OpCtrl[0]; // flip addend sign for subtraction + assign InvA = As ^ Ps; // Effective subtraction when product and addend have opposite signs endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 66e97f54f..6abb76b0b 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -29,327 +29,327 @@ `include "wally-config.vh" module fpu ( - input logic clk, - input logic reset, - // Hazards - input logic StallE, StallM, StallW, // stall signals (from HZU) - input logic FlushE, FlushM, FlushW, // flush signals (from HZU) - output logic FPUStallD, // Stall the decode stage (To HZU) - output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU) - // CSRs - input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit) - input logic [2:0] FRM_REGW, // Rounding mode (from CSR) - // Decode stage - input logic [31:0] InstrD, // instruction (from IFU) - // Execute stage - input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations - input logic IntDivE, W64E, // Integer division on FPU - input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU) - input logic [4:0] RdE, // which FP register to write to (from IEU) - output logic FWriteIntE, // integer register write enable (to IEU) - output logic FCvtIntE, // Convert to int (to IEU) - // Memory stage - input logic [2:0] Funct3M, // Funct fields of instruction specify type of operations - input logic [4:0] RdM, // which FP register to write to (from IEU) - output logic FRegWriteM, // FP register write enable (to privileged unit) - output logic FpLoadStoreM, // Fp load instruction? (to LSU) - output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU) - output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) - output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit) - output logic [4:0] SetFflagsM, // FPU flags (to privileged unit) - // Writeback stage - input logic [4:0] RdW, // which FP register to write to (from IEU) - input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU) - output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) - output logic FCvtIntW, // select FCvtIntRes (to IEU) - output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU) + input logic clk, + input logic reset, + // Hazards + input logic StallE, StallM, StallW, // stall signals (from HZU) + input logic FlushE, FlushM, FlushW, // flush signals (from HZU) + output logic FPUStallD, // Stall the decode stage (To HZU) + output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU) + // CSRs + input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit) + input logic [2:0] FRM_REGW, // Rounding mode (from CSR) + // Decode stage + input logic [31:0] InstrD, // instruction (from IFU) + // Execute stage + input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations + input logic IntDivE, W64E, // Integer division on FPU + input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU) + input logic [4:0] RdE, // which FP register to write to (from IEU) + output logic FWriteIntE, // integer register write enable (to IEU) + output logic FCvtIntE, // Convert to int (to IEU) + // Memory stage + input logic [2:0] Funct3M, // Funct fields of instruction specify type of operations + input logic [4:0] RdM, // which FP register to write to (from IEU) + output logic FRegWriteM, // FP register write enable (to privileged unit) + output logic FpLoadStoreM, // Fp load instruction? (to LSU) + output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU) + output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) + output logic IllegalFPUInstrM, // Is the instruction an illegal fpu instruction (to privileged unit) + output logic [4:0] SetFflagsM, // FPU flags (to privileged unit) + // Writeback stage + input logic [4:0] RdW, // which FP register to write to (from IEU) + input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU) + output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) + output logic FCvtIntW, // select FCvtIntRes (to IEU) + output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU) ); - // RISC-V FPU specifics: - // - multiprecision support uses NAN-boxing, putting 1's in unused msbs - // - RISC-V detects underflow after rounding + // RISC-V FPU specifics: + // - multiprecision support uses NAN-boxing, putting 1's in unused msbs + // - RISC-V detects underflow after rounding - // control signals - logic FRegWriteW; // FP register write enable - logic [2:0] FrmM; // FP rounding mode - logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double - logic FDivStartE, IDivStartE; // Start division or squareroot - logic FWriteIntM; // Write to integer register - logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals - logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelE, FResSelM, FResSelW; // Select one of the results that finish in the memory stage - logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit - logic [4:0] Adr1D, Adr2D, Adr3D; // register adresses of each input - logic [4:0] Adr1E, Adr2E, Adr3E; // register adresses of each input - logic XEnD, YEnD, ZEnD; // X, Y, Z inputs used for current operation - logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation - logic FRegWriteE; // Write floating-point register + // control signals + logic FRegWriteW; // FP register write enable + logic [2:0] FrmM; // FP rounding mode + logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double + logic FDivStartE, IDivStartE; // Start division or squareroot + logic FWriteIntM; // Write to integer register + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals + logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelE, FResSelM, FResSelW; // Select one of the results that finish in the memory stage + logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit + logic [4:0] Adr1D, Adr2D, Adr3D; // register adresses of each input + logic [4:0] Adr1E, Adr2E, Adr3E; // register adresses of each input + logic XEnD, YEnD, ZEnD; // X, Y, Z inputs used for current operation + logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation + logic FRegWriteE; // Write floating-point register - // regfile signals - logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding) - logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) - logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding) - logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding) + // regfile signals + logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding) + logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) + logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding) + logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding) - // unpacking signals - logic XsE, YsE, ZsE; // input's sign - execute stage - logic XsM, YsM; // input's sign - memory stage - logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage - logic [`NE-1:0] ZeM; // input's exponent - memory stage - logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage - logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage - logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage - logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage - logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage - logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XSubnormE; // is the input subnormal - logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage - logic XZeroM, YZeroM; // is the input zero - memory stage - logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage - logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage - logic XExpMaxE; // is the exponent all ones (max value) + // unpacking signals + logic XsE, YsE, ZsE; // input's sign - execute stage + logic XsM, YsM; // input's sign - memory stage + logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage + logic [`NE-1:0] ZeM; // input's exponent - memory stage + logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage + logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage + logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage + logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage + logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage + logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage + logic XSubnormE; // is the input subnormal + logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage + logic XZeroM, YZeroM; // is the input zero - memory stage + logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage + logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage + logic XExpMaxE; // is the exponent all ones (max value) - // Fma Signals - logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting - logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying - logic [3*`NF+3:0] SmE, SmM; // Sum significand - logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output - logic [`NE+1:0] SeE,SeM; // Sum exponent - logic InvAE, InvAM; // Invert addend - logic AsE, AsM; // Addend sign - logic PsE, PsM; // Product sign - logic SsE, SsM; // Sum sign - logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count - - // Cvt Signals - logic [`NE:0] CeE, CeM; // convert intermediate expoent - logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by - logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal - logic CsE, CsM; // convert result sign - logic IntZeroE, IntZeroM; // is the integer zero? - logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) - logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU) - - // divide signals - logic [`DIVb:0] QmM; // fdivsqrt signifcand - logic [`NE+1:0] QeM; // fdivsqrt exponent - logic DivStickyM; // fdivsqrt sticky bit - logic FDivDoneE, IFDivStartE; // fdivsqrt control signals - logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU) + // Fma Signals + logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting + logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying + logic [3*`NF+3:0] SmE, SmM; // Sum significand + logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output + logic [`NE+1:0] SeE,SeM; // Sum exponent + logic InvAE, InvAM; // Invert addend + logic AsE, AsM; // Addend sign + logic PsE, PsM; // Product sign + logic SsE, SsM; // Sum sign + logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count + + // Cvt Signals + logic [`NE:0] CeE, CeM; // convert intermediate expoent + logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by + logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal + logic CsE, CsM; // convert result sign + logic IntZeroE, IntZeroM; // is the integer zero? + logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU) + + // divide signals + logic [`DIVb:0] QmM; // fdivsqrt signifcand + logic [`NE+1:0] QeM; // fdivsqrt exponent + logic DivStickyM; // fdivsqrt sticky bit + logic FDivDoneE, IFDivStartE; // fdivsqrt control signals + logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU) - // result and flag signals - logic [`XLEN-1:0] ClassResE; // classify result - logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max) - logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le) - logic CmpNVE; // compare invalid flag (Not Valid) - logic [`FLEN-1:0] SgnResE; // sign injection result - logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move) - logic [`FLEN-1:0] PostProcResM; // Postprocessor output - logic [4:0] PostProcFlgM; // Postprocessor flags - logic PreNVE, PreNVM; // selected flag that is ready in the memory stage - logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result - logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage - logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register + // result and flag signals + logic [`XLEN-1:0] ClassResE; // classify result + logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max) + logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le) + logic CmpNVE; // compare invalid flag (Not Valid) + logic [`FLEN-1:0] SgnResE; // sign injection result + logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move) + logic [`FLEN-1:0] PostProcResM; // Postprocessor output + logic [4:0] PostProcFlgM; // Postprocessor flags + logic PreNVE, PreNVM; // selected flag that is ready in the memory stage + logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result + logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage + logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register - // other signals - logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv - logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed - logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed - logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt - logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer + // other signals + logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv + logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed + logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed + logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt + logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer - ////////////////////////////////////////////////////////////////////////////////////////// - // Decode Stage: fctrl decoder, read register file - ////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////// + // Decode Stage: fctrl decoder, read register file + ////////////////////////////////////////////////////////////////////////////////////////// - // calculate FP control signals - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), - .Funct3E, .IntDivE, .InstrD, - .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, - .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, - .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM, - .IllegalFPUInstrM, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE, - .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW, - .Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E); + // calculate FP control signals + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), + .Funct3E, .IntDivE, .InstrD, + .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, + .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, + .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM, + .IllegalFPUInstrM, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE, + .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .FCvtIntW, + .Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E); - // FP register file - fregfile fregfile (.clk, .reset, .we4(FRegWriteW), - .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), - .a4(RdW), .wd4(FResultW), - .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); + // FP register file + fregfile fregfile (.clk, .reset, .we4(FRegWriteW), + .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), + .a4(RdW), .wd4(FResultW), + .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); - // D/E pipeline registers - flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + // D/E pipeline registers + flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - ////////////////////////////////////////////////////////////////////////////////////////// - // Execute Stage: hazards, forwarding, unpacking, execution units - ////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////// + // Execute Stage: hazards, forwarding, unpacking, execution units + ////////////////////////////////////////////////////////////////////////////////////////// - // Hazard unit for FPU: determines if any forwarding or stalls are needed - fhazard fhazard(.Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E, - .FRegWriteE, .FRegWriteM, .FRegWriteW, .RdE, .RdM, .RdW, .FResSelM, - .XEnD, .YEnD, .ZEnD, .FPUStallD, .ForwardXE, .ForwardYE, .ForwardZE); + // Hazard unit for FPU: determines if any forwarding or stalls are needed + fhazard fhazard(.Adr1D, .Adr2D, .Adr3D, .Adr1E, .Adr2E, .Adr3E, + .FRegWriteE, .FRegWriteM, .FRegWriteW, .RdE, .RdM, .RdW, .FResSelM, + .XEnD, .YEnD, .ZEnD, .FPUStallD, .ForwardXE, .ForwardYE, .ForwardZE); - // forwarding muxs - mux3 #(`FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE); - mux3 #(`FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE); - mux3 #(`FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE); + // forwarding muxs + mux3 #(`FLEN) fxemux (FRD1E, FResultW, PreFpResM, ForwardXE, XE); + mux3 #(`FLEN) fyemux (FRD2E, FResultW, PreFpResM, ForwardYE, PreYE); + mux3 #(`FLEN) fzemux (FRD3E, FResultW, PreFpResM, ForwardZE, PreZE); - // Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z - generate - if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}; - else if(`FPSIZES == 2) - mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes - else if(`FPSIZES == 3 | `FPSIZES == 4) - mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)}, - {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)}, - {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)}, - {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes - endgenerate - assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10); - mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract - - // Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z - // For add and subtract, Z comes from second source operand - generate - if(`FPSIZES == 1) assign BoxedZeroE = 0; - else if(`FPSIZES == 2) - mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes - else if(`FPSIZES == 3 | `FPSIZES == 4) - mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}}, - {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}, - {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}, - (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes - endgenerate - assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}; - mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE); + // Select NAN-boxed value of Y = 1.0 in proper format for fma to add/subtract X*Y+Z + generate + if(`FPSIZES == 1) assign BoxedOneE = {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}; + else if(`FPSIZES == 2) + mux2 #(`FLEN) fonemux ({{`FLEN-`LEN1{1'b1}}, 2'b0, {`NE1-1{1'b1}}, (`NF1)'(0)}, {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes + else if(`FPSIZES == 3 | `FPSIZES == 4) + mux4 #(`FLEN) fonemux ({{`FLEN-`S_LEN{1'b1}}, 2'b0, {`S_NE-1{1'b1}}, (`S_NF)'(0)}, + {{`FLEN-`D_LEN{1'b1}}, 2'b0, {`D_NE-1{1'b1}}, (`D_NF)'(0)}, + {{`FLEN-`H_LEN{1'b1}}, 2'b0, {`H_NE-1{1'b1}}, (`H_NF)'(0)}, + {2'b0, {`NE-1{1'b1}}, (`NF)'(0)}, FmtE, BoxedOneE); // NaN boxing zeroes + endgenerate + assign FmaAddSubE = OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10); + mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, FmaAddSubE, YE); // Force Y to be 1 for add/subtract + + // Select NAN-boxed value of Z = 0.0 in proper format for FMA for multiply X*Y+Z + // For add and subtract, Z comes from second source operand + generate + if(`FPSIZES == 1) assign BoxedZeroE = 0; + else if(`FPSIZES == 2) + mux2 #(`FLEN) fmulzeromux ({{`FLEN-`LEN1{1'b1}}, {`LEN1{1'b0}}}, (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes + else if(`FPSIZES == 3 | `FPSIZES == 4) + mux4 #(`FLEN) fmulzeromux ({{`FLEN-`S_LEN{1'b1}}, {`S_LEN{1'b0}}}, + {{`FLEN-`D_LEN{1'b1}}, {`D_LEN{1'b0}}}, + {{`FLEN-`H_LEN{1'b1}}, {`H_LEN{1'b0}}}, + (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes + endgenerate + assign FmaZSelE = {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}; + mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, FmaZSelE, ZE); - // unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity - unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), - .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE), - .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), - .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE), - .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), - .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE)); - - // fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub - fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), - .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .OpCtrl(OpCtrlE), - .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE)); + // unpack unit: splits FP inputs into their parts and classifies SNaN, NaN, Subnorm, Norm, Zero, Infifnity + unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), + .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE), + .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), + .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE), + .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), + .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE)); + + // fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub + fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), + .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .OpCtrl(OpCtrlE), + .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), .Sm(SmE), .InvA(InvAE), .SCnt(SCntE), .ASticky(FmaAStickyE)); - // divide and square root: fdiv, fsqrt, optionally integer division - fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), - .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, - .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, - .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, - .QmM, .FIntDivResultM); + // divide and square root: fdiv, fsqrt, optionally integer division + fdivsqrt fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), + .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, + .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, + .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .QeM, + .QmM, .FIntDivResultM); - // compare: fmin/fmax, flt/fle/feq - fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), - .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), - .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), - .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE)); + // compare: fmin/fmax, flt/fle/feq + fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), + .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), + .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), + .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE)); - // sign injection: fsgnj/fsgnjx/fsgnjn - fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE)); + // sign injection: fsgnj/fsgnjx/fsgnjn + fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE)); - // classify: fclass - fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE), - .XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE)); + // classify: fclass + fclassify fclassify (.Xs(XsE), .XSubnorm(XSubnormE), .XZero(XZeroE), .XNaN(XNaNE), + .XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE)); - // convert: fcvt.*.* - fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), - .ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE), - .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); + // convert: fcvt.*.* + fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), + .ToInt(FWriteIntE), .XZero(XZeroE), .Fmt(FmtE), .Ce(CeE), .ShiftAmt(CvtShiftAmtE), + .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); - // NaN Box SrcA to convert integer to requested FP size - generate - if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}; - else if(`FPSIZES == 2) - mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); - else if(`FPSIZES == 3 | `FPSIZES == 4) - mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ForwardedSrcAE[`S_LEN-1:0]}, - {{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]}, - {{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]}, - {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes - endgenerate + // NaN Box SrcA to convert integer to requested FP size + generate + if(`FPSIZES == 1) assign AlignedSrcAE = {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}; + else if(`FPSIZES == 2) + mux2 #(`FLEN) SrcAMux ({{`FLEN-`LEN1{1'b1}}, ForwardedSrcAE[`LEN1-1:0]}, {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); + else if(`FPSIZES == 3 | `FPSIZES == 4) + mux4 #(`FLEN) SrcAMux ({{`FLEN-`S_LEN{1'b1}}, ForwardedSrcAE[`S_LEN-1:0]}, + {{`FLEN-`D_LEN{1'b1}}, ForwardedSrcAE[`D_LEN-1:0]}, + {{`FLEN-`H_LEN{1'b1}}, ForwardedSrcAE[`H_LEN-1:0]}, + {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes + endgenerate - // select a result that may be written to the FP register - mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); - assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); + // select a result that may be written to the FP register + mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); + assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); - // select the result that may be written to the integer register - to IEU - generate - if(`FPSIZES == 1) - assign SgnExtXE = XE; - else if(`FPSIZES == 2) - mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{XsE}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE); - else if(`FPSIZES == 3 | `FPSIZES == 4) - mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{XsE}}, XE[`H_LEN-1:0]}, - {{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]}, - {{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]}, - XE, FmtE, SgnExtXE); - endgenerate - if (`FLEN>`XLEN) - assign IntSrcXE = SgnExtXE[`XLEN-1:0]; - else - assign IntSrcXE = {{`XLEN-`FLEN{XsE}}, SgnExtXE}; - mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); + // select the result that may be written to the integer register - to IEU + generate + if(`FPSIZES == 1) + assign SgnExtXE = XE; + else if(`FPSIZES == 2) + mux2 #(`FLEN) sgnextmux ({{`FLEN-`LEN1{XsE}}, XE[`LEN1-1:0]}, XE, FmtE, SgnExtXE); + else if(`FPSIZES == 3 | `FPSIZES == 4) + mux4 #(`FLEN) fmulzeromux ({{`FLEN-`H_LEN{XsE}}, XE[`H_LEN-1:0]}, + {{`FLEN-`S_LEN{XsE}}, XE[`S_LEN-1:0]}, + {{`FLEN-`D_LEN{XsE}}, XE[`D_LEN-1:0]}, + XE, FmtE, SgnExtXE); + endgenerate + if (`FLEN>`XLEN) + assign IntSrcXE = SgnExtXE[`XLEN-1:0]; + else + assign IntSrcXE = {{`XLEN-`FLEN{XsE}}, SgnExtXE}; + mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); - // E/M pipe registers + // E/M pipe registers - // Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources - assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE); + // Need to stall during divsqrt iterations to avoid capturing bad flags from stale forwarded sources + assign StallUnpackedM = StallM | (FDivBusyE & ~IFDivStartE | FDivDoneE); - flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM); - flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM); - flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM}); - flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); - flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); - flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM, - {XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, - {XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); - flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); - flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); - flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, - {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, - {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM}); - flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, - {CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE}, - {CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM}); - flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM); + flopenrc #(`NF+1) EMFpReg2 (clk, reset, FlushM, ~StallM, XmE, XmM); + flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YmE, YmM); + flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM}); + flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); + flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); + flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM, + {XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, + {XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); + flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); + flopenrc #(3*`NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); + flopenrc #($clog2(3*`NF+5)+7+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, + {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, + {FmaAStickyM, InvAM, SCntM, AsM, PsM, SsM, SeM}); + flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, + {CeE, CvtShiftAmtE, CvtResSubnormUfE, CsE, IntZeroE, CvtLzcInE}, + {CeM, CvtShiftAmtM, CvtResSubnormUfM, CsM, IntZeroM, CvtLzcInM}); + flopenrc #(`FLEN) FWriteDataMReg (clk, reset, FlushM, ~StallM, YE, FWriteDataM); - ////////////////////////////////////////////////////////////////////////////////////////// - // Memory Stage: postprocessor and result muxes - ////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////// + // Memory Stage: postprocessor and result muxes + ////////////////////////////////////////////////////////////////////////////////////////// - postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), - .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), - .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), - .FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), - .CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), - .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), - .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); + postprocess postprocess(.Xs(XsM), .Ys(YsM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), + .FmaASticky(FmaAStickyM), .XZero(XZeroM), .YZero(YZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), + .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), + .FmaSm(SmM), .DivQe(QeM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), + .CvtCe(CeM), .CvtResSubnormUf(CvtResSubnormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), + .ToInt(FWriteIntM), .DivSticky(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), + .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); - // FPU flag selection - to privileged - mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM); - mux2 #(`FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM); + // FPU flag selection - to privileged + mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM); + mux2 #(`FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM); - // M/W pipe registers - flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); - flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); - flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW); + // M/W pipe registers + flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); + flopenrc #(`XLEN) MWRegIntCvtRes(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); + flopenrc #(`XLEN) MWRegIntDivRes(clk, reset, FlushW, ~StallW, FIntDivResultM, FIntDivResultW); - ////////////////////////////////////////////////////////////////////////////////////////// - // Writeback Stage: result mux - ////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////// + // Writeback Stage: result mux + ////////////////////////////////////////////////////////////////////////////////////////// - // select the result to be written to the FP register - mux2 #(`FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW); + // select the result to be written to the FP register + mux2 #(`FLEN) FResultMux (FpResW, ReadDataW, FResSelW[1], FResultW); endmodule // fpu diff --git a/pipelined/src/fpu/fsgninj.sv b/pipelined/src/fpu/fsgninj.sv index 0348426a2..d5568cabe 100755 --- a/pipelined/src/fpu/fsgninj.sv +++ b/pipelined/src/fpu/fsgninj.sv @@ -50,33 +50,29 @@ module fsgninj ( // - uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s - if (`FPSIZES == 1) + if (`FPSIZES == 1) assign SgnRes = {ResSgn, X[`FLEN-2:0]}; - - else if (`FPSIZES == 2) + else if (`FPSIZES == 2) assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]}; - - else if (`FPSIZES == 3) begin + else if (`FPSIZES == 3) begin logic [2:0] SgnBits; - always_comb - case (Fmt) - `FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]}; - `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]}; - `FMT2: SgnBits = {2'b11, ResSgn}; - default: SgnBits = {3{1'bx}}; - endcase + always_comb + case (Fmt) + `FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]}; + `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]}; + `FMT2: SgnBits = {2'b11, ResSgn}; + default: SgnBits = {3{1'bx}}; + endcase assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]}; - - end else if (`FPSIZES == 4) begin logic [3:0] SgnBits; - always_comb - case (Fmt) - `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]}; - `D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]}; - `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]}; - `H_FMT: SgnBits = {3'b111, ResSgn}; - endcase + always_comb + case (Fmt) + `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]}; + `D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]}; + `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]}; + `H_FMT: SgnBits = {3'b111, ResSgn}; + endcase assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]}; end diff --git a/pipelined/src/fpu/postproc/cvtshiftcalc.sv b/pipelined/src/fpu/postproc/cvtshiftcalc.sv index 6c7516fa3..7297824f3 100644 --- a/pipelined/src/fpu/postproc/cvtshiftcalc.sv +++ b/pipelined/src/fpu/postproc/cvtshiftcalc.sv @@ -29,79 +29,79 @@ `include "wally-config.vh" module cvtshiftcalc( - input logic XZero, // is the input zero? - input logic ToInt, // to integer conversion? - input logic IntToFp, // interger to floating point conversion? - input logic [`FMTBITS-1:0] OutFmt, // output format - input logic [`NE:0] CvtCe, // the calculated expoent - input logic [`NF:0] Xm, // input mantissas - input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) - input logic CvtResSubnormUf, // is the conversion result subnormal or underlows - output logic CvtResUf, // does the cvt result unerflow - output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted + input logic XZero, // is the input zero? + input logic ToInt, // to integer conversion? + input logic IntToFp, // interger to floating point conversion? + input logic [`FMTBITS-1:0] OutFmt, // output format + input logic [`NE:0] CvtCe, // the calculated expoent + input logic [`NF:0] Xm, // input mantissas + input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) + input logic CvtResSubnormUf, // is the conversion result subnormal or underlows + output logic CvtResUf, // does the cvt result unerflow + output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted ); - logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF) + + logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF) + + /////////////////////////////////////////////////////////////////////////// + // shifter + /////////////////////////////////////////////////////////////////////////// + + // seclect the input to the shifter + // fp -> int: + // | `XLEN zeros | mantissa | 0's if nessisary | + // . + // Other problems: + // - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding) + // - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1 + // - ex: for the case 0010000.... (double) + // ??? -> fp: + // - if result is subnormal or underflowed then we want to shift right i.e. shift right then shift left: + // | `NF-1 zeros | mantissa | 0's if nessisary | + // . + // - otherwise: + // | LzcInM | 0's if nessisary | + // . + // change to int shift to the left one + always_comb + // get rid of round bit if needed + // | add sticky bit if needed + // | | + if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}}; + else if (CvtResSubnormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}}; + else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}}; + + // choose the negative of the fraction size + if (`FPSIZES == 1) begin + assign ResNegNF = -($clog2(`NF)+1)'(`NF); + + end else if (`FPSIZES == 2) begin + assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1); + + end else if (`FPSIZES == 3) begin + always_comb + case (OutFmt) + `FMT: ResNegNF = -($clog2(`NF)+1)'(`NF); + `FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1); + `FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2); + default: ResNegNF = 1'bx; + endcase + + end else if (`FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF); + 2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF); + 2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF); + 2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF); + endcase + end - /////////////////////////////////////////////////////////////////////////// - // shifter - /////////////////////////////////////////////////////////////////////////// - - // seclect the input to the shifter - // fp -> int: - // | `XLEN zeros | mantissa | 0's if nessisary | - // . - // Other problems: - // - if shifting to the right (neg CalcExp) then don't a 1 in the round bit (to prevent an incorrect plus 1 later durring rounding) - // - we do however want to keep the one in the sticky bit so set one of bits in the sticky bit area to 1 - // - ex: for the case 0010000.... (double) - // ??? -> fp: - // - if result is subnormal or underflowed then we want to shift right i.e. shift right then shift left: - // | `NF-1 zeros | mantissa | 0's if nessisary | - // . - // - otherwise: - // | LzcInM | 0's if nessisary | - // . - // change to int shift to the left one - always_comb - // get rid of round bit if needed - // | add sticky bit if needed - // | | - if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}}; - else if (CvtResSubnormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}}; - else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}}; - - // choose the negative of the fraction size - if (`FPSIZES == 1) begin - assign ResNegNF = -($clog2(`NF)+1)'(`NF); - - end else if (`FPSIZES == 2) begin - assign ResNegNF = OutFmt ? -($clog2(`NF)+1)'(`NF) : -($clog2(`NF)+1)'(`NF1); - - end else if (`FPSIZES == 3) begin - always_comb - case (OutFmt) - `FMT: ResNegNF = -($clog2(`NF)+1)'(`NF); - `FMT1: ResNegNF = -($clog2(`NF)+1)'(`NF1); - `FMT2: ResNegNF = -($clog2(`NF)+1)'(`NF2); - default: ResNegNF = 1'bx; - endcase - - end else if (`FPSIZES == 4) begin - always_comb - case (OutFmt) - 2'h3: ResNegNF = -($clog2(`NF)+1)'(`Q_NF); - 2'h1: ResNegNF = -($clog2(`NF)+1)'(`D_NF); - 2'h0: ResNegNF = -($clog2(`NF)+1)'(`S_NF); - 2'h2: ResNegNF = -($clog2(`NF)+1)'(`H_NF); - endcase - end - - - - // determine if the result underflows ??? -> fp - // - if the first 1 is shifted out of the result then the result underflows - // - can't underflow an integer to fp conversions - assign CvtResUf = ($signed(CvtCe) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp; - + + // determine if the result underflows ??? -> fp + // - if the first 1 is shifted out of the result then the result underflows + // - can't underflow an integer to fp conversions + assign CvtResUf = ($signed(CvtCe) < $signed({{`NE-$clog2(`NF){1'b1}}, ResNegNF}))&~XZero&~IntToFp; + endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/postproc/divshiftcalc.sv b/pipelined/src/fpu/postproc/divshiftcalc.sv index a8f00b46f..76668516c 100644 --- a/pipelined/src/fpu/postproc/divshiftcalc.sv +++ b/pipelined/src/fpu/postproc/divshiftcalc.sv @@ -29,45 +29,46 @@ `include "wally-config.vh" module divshiftcalc( - input logic [`DIVb:0] DivQm, // divsqrt significand - input logic [`NE+1:0] DivQe, // divsqrt exponent - output logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount - output logic [`NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input - output logic DivResSubnorm, // is the divsqrt result subnormal - output logic DivSubnormShiftPos // is the subnormal shift amount positive + input logic [`DIVb:0] DivQm, // divsqrt significand + input logic [`NE+1:0] DivQe, // divsqrt exponent + output logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt, // divsqrt shift amount + output logic [`NORMSHIFTSZ-1:0] DivShiftIn, // divsqrt shift input + output logic DivResSubnorm, // is the divsqrt result subnormal + output logic DivSubnormShiftPos // is the subnormal shift amount positive ); - logic [`LOGNORMSHIFTSZ-1:0] NormShift; // normalized result shift amount - logic [`LOGNORMSHIFTSZ-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive) - logic [`NE+1:0] DivSubnormShift; // subnormal result shift amount - - // is the result subnormal - // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes - assign DivResSubnorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]); - // if the result is subnormal - // 00000000x.xxxxxx... Exp = DivQe - // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 - // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1 - // .0000xxxxxxxxxxx... >> 1 Exp = 1 - // Left shift amount = DivQe+NF+1-1 - assign DivSubnormShift = (`NE+2)'(`NF)+DivQe; - assign DivSubnormShiftPos = ~DivSubnormShift[`NE+1]; + logic [`LOGNORMSHIFTSZ-1:0] NormShift; // normalized result shift amount + logic [`LOGNORMSHIFTSZ-1:0] DivSubnormShiftAmt; // subnormal result shift amount (killed if negitive) + logic [`NE+1:0] DivSubnormShift; // subnormal result shift amount - // if the result is normalized - // 00000000x.xxxxxx... Exp = DivQe - // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 - // 00000000.xxxxxxx... << NF Exp = DivQe+1 - // 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards) - // 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after) - // inital Left shift amount = NF - // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit - assign NormShift = (`LOGNORMSHIFTSZ)'(`NF); + // is the result subnormal + // if the exponent is 1 then the result needs to be normalized then the result is Subnormalizes + assign DivResSubnorm = DivQe[`NE+1]|(~|DivQe[`NE+1:0]); - // if the shift amount is negitive then don't shift (keep sticky bit) - // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) - assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[`LOGNORMSHIFTSZ-1:0] : '0; - assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift; + // if the result is subnormal + // 00000000x.xxxxxx... Exp = DivQe + // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 + // .00xxxxxxxxxxxxx... << DivQe+NF+1 Exp = +1 + // .0000xxxxxxxxxxx... >> 1 Exp = 1 + // Left shift amount = DivQe+NF+1-1 + assign DivSubnormShift = (`NE+2)'(`NF)+DivQe; + assign DivSubnormShiftPos = ~DivSubnormShift[`NE+1]; - // pre-shift the divider result for normalization - assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb-1-`NF{1'b0}}}; + // if the result is normalized + // 00000000x.xxxxxx... Exp = DivQe + // .00000000xxxxxxx... >> NF+1 Exp = DivQe+NF+1 + // 00000000.xxxxxxx... << NF Exp = DivQe+1 + // 00000000x.xxxxxx... << NF Exp = DivQe (extra shift done afterwards) + // 00000000xx.xxxxx... << 1? Exp = DivQe-1 (determined after) + // inital Left shift amount = NF + // shift one more if the it's a minimally redundent radix 4 - one entire cycle needed for integer bit + assign NormShift = (`LOGNORMSHIFTSZ)'(`NF); + + // if the shift amount is negitive then don't shift (keep sticky bit) + // need to multiply the early termination shift by LOGR*DIVCOPIES = left shift of log2(LOGR*DIVCOPIES) + assign DivSubnormShiftAmt = DivSubnormShiftPos ? DivSubnormShift[`LOGNORMSHIFTSZ-1:0] : '0; + assign DivShiftAmt = DivResSubnorm ? DivSubnormShiftAmt : NormShift; + + // pre-shift the divider result for normalization + assign DivShiftIn = {{`NF{1'b0}}, DivQm, {`NORMSHIFTSZ-`DIVb-1-`NF{1'b0}}}; endmodule diff --git a/pipelined/src/fpu/postproc/flags.sv b/pipelined/src/fpu/postproc/flags.sv index c1dcab858..701cf0524 100644 --- a/pipelined/src/fpu/postproc/flags.sv +++ b/pipelined/src/fpu/postproc/flags.sv @@ -28,185 +28,186 @@ `include "wally-config.vh" module flags( - input logic Xs, // X sign - input logic [`FMTBITS-1:0] OutFmt, // output format - input logic InfIn, // is a Inf input being used - input logic XInf, YInf, ZInf, // inputs are infinity - input logic NaNIn, // is a NaN input being used - input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs - input logic XZero, YZero, // inputs are zero - input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow - input logic [`NE+1:0] Me, // exponent of the normalized sum - // rounding - input logic Plus1, // do you add one for rounding - input logic Round, Guard, Sticky, // bits used to determine rounding - input logic UfPlus1, // do you add one for rounding for the unbounded exponent result - // convert - input logic CvtOp, // conversion opperation? - input logic ToInt, // convert to integer - input logic IntToFp, // convert integer to floating point - input logic Int64, // convert to 64 bit integer - input logic Signed, // convert to a signed integer - input logic [`NE:0] CvtCe, // the calculated expoent - Cvt - input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits - // divsqrt - input logic DivOp, // conversion opperation? - input logic Sqrt, // Sqrt? - // fma - input logic FmaOp, // Fma opperation? - input logic FmaAs, FmaPs, // the product and modified Z signs - // flags - output logic DivByZero, // divide by zero flag - output logic Overflow, // overflow flag to select result - output logic Invalid, // invalid flag to select the result - output logic IntInvalid, // invalid integer result to select - output logic [4:0] PostProcFlg // flags + input logic Xs, // X sign + input logic [`FMTBITS-1:0] OutFmt, // output format + input logic InfIn, // is a Inf input being used + input logic XInf, YInf, ZInf, // inputs are infinity + input logic NaNIn, // is a NaN input being used + input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs + input logic XZero, YZero, // inputs are zero + input logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow + input logic [`NE+1:0] Me, // exponent of the normalized sum + // rounding + input logic Plus1, // do you add one for rounding + input logic Round, Guard, Sticky, // bits used to determine rounding + input logic UfPlus1, // do you add one for rounding for the unbounded exponent result + // convert + input logic CvtOp, // conversion opperation? + input logic ToInt, // convert to integer + input logic IntToFp, // convert integer to floating point + input logic Int64, // convert to 64 bit integer + input logic Signed, // convert to a signed integer + input logic [`NE:0] CvtCe, // the calculated expoent - Cvt + input logic [1:0] CvtNegResMsbs, // the negitive integer result's most significant bits + // divsqrt + input logic DivOp, // conversion opperation? + input logic Sqrt, // Sqrt? + // fma + input logic FmaOp, // Fma opperation? + input logic FmaAs, FmaPs, // the product and modified Z signs + // flags + output logic DivByZero, // divide by zero flag + output logic Overflow, // overflow flag to select result + output logic Invalid, // invalid flag to select the result + output logic IntInvalid, // invalid integer result to select + output logic [4:0] PostProcFlg // flags ); - logic SigNaN; // is an input a signaling NaN - logic Inexact; // final inexact flag - logic FpInexact; // floating point inexact flag - logic IntInexact; // integer inexact flag - logic FmaInvalid; // integer invalid flag - logic DivInvalid; // integer invalid flag - logic Underflow; // Underflow flag - logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent - logic ShiftGtIntSz; // is the shift greater than the the integer size (use Re to account for possible roundning "shift") - /////////////////////////////////////////////////////////////////////////////// - // Overflow - /////////////////////////////////////////////////////////////////////////////// + logic SigNaN; // is an input a signaling NaN + logic Inexact; // final inexact flag + logic FpInexact; // floating point inexact flag + logic IntInexact; // integer inexact flag + logic FmaInvalid; // integer invalid flag + logic DivInvalid; // integer invalid flag + logic Underflow; // Underflow flag + logic ResExpGteMax; // is the result greater than or equal to the maximum floating point expoent + logic ShiftGtIntSz; // is the shift greater than the the integer size (use Re to account for possible roundning "shift") - // determine if the result exponent is greater than or equal to the maximum exponent or - // the shift amount is greater than the integers size (for cvt to int) - // ShiftGtIntSz calculation: - // a left shift of intlen+1 is still in range but any more than that is an overflow - // inital: | 64 0's | XLEN | - // | 64 0's | XLEN | << 64 - // | XLEN | 00000... | - // 65 = ...0 0 0 0 0 1 0 0 0 0 0 1 - // | or | | or | - // 33 = ...0 0 0 0 0 0 1 0 0 0 0 1 - // | or | | or | - // larger or equal if: - // - any of the bits after the most significan 1 is one - // - the most signifcant in 65 or 33 is still a one in the number and - // one of the later bits is one - if (`FPSIZES == 1) begin - assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; - assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); + /////////////////////////////////////////////////////////////////////////////// + // Overflow + /////////////////////////////////////////////////////////////////////////////// - end else if (`FPSIZES == 2) begin - assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); + // determine if the result exponent is greater than or equal to the maximum exponent or + // the shift amount is greater than the integers size (for cvt to int) + // ShiftGtIntSz calculation: + // a left shift of intlen+1 is still in range but any more than that is an overflow + // inital: | 64 0's | XLEN | + // | 64 0's | XLEN | << 64 + // | XLEN | 00000... | + // 65 = ...0 0 0 0 0 1 0 0 0 0 0 1 + // | or | | or | + // 33 = ...0 0 0 0 0 0 1 0 0 0 0 1 + // | or | | or | + // larger or equal if: + // - any of the bits after the most significan 1 is one + // - the most signifcant in 65 or 33 is still a one in the number and + // one of the later bits is one + if (`FPSIZES == 1) begin + assign ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); - assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); - end else if (`FPSIZES == 3) begin - always_comb - case (OutFmt) - `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; - `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); - `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]); - default: ResExpGteMax = 1'bx; - endcase - assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); + end else if (`FPSIZES == 2) begin + assign ResExpGteMax = OutFmt ? &FullRe[`NE-1:0] | FullRe[`NE] : &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); - end else if (`FPSIZES == 4) begin - always_comb - case (OutFmt) - `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE]; - `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]); - `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]); - `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]); - endcase - assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); - end + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); + end else if (`FPSIZES == 3) begin + always_comb + case (OutFmt) + `FMT: ResExpGteMax = &FullRe[`NE-1:0] | FullRe[`NE]; + `FMT1: ResExpGteMax = &FullRe[`NE1-1:0] | (|FullRe[`NE:`NE1]); + `FMT2: ResExpGteMax = &FullRe[`NE2-1:0] | (|FullRe[`NE:`NE2]); + default: ResExpGteMax = 1'bx; + endcase + assign ShiftGtIntSz = (|FullRe[`NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); + + end else if (`FPSIZES == 4) begin + always_comb + case (OutFmt) + `Q_FMT: ResExpGteMax = &FullRe[`Q_NE-1:0] | FullRe[`Q_NE]; + `D_FMT: ResExpGteMax = &FullRe[`D_NE-1:0] | (|FullRe[`Q_NE:`D_NE]); + `S_FMT: ResExpGteMax = &FullRe[`S_NE-1:0] | (|FullRe[`Q_NE:`S_NE]); + `H_FMT: ResExpGteMax = &FullRe[`H_NE-1:0] | (|FullRe[`Q_NE:`H_NE]); + endcase + assign ShiftGtIntSz = (|FullRe[`Q_NE:7]|(FullRe[6]&~Int64)) | ((|FullRe[4:0]|(FullRe[5]&Int64))&((FullRe[5]&~Int64) | FullRe[6]&Int64)); + end - // calulate overflow flag: - // if the result is greater than or equal to the max exponent(not taking into account sign) - // | and the exponent isn't negitive - // | | if the input isnt infinity or NaN - // | | | - assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero); + // calulate overflow flag: + // if the result is greater than or equal to the max exponent(not taking into account sign) + // | and the exponent isn't negitive + // | | if the input isnt infinity or NaN + // | | | + assign Overflow = ResExpGteMax & ~FullRe[`NE+1]&~(InfIn|NaNIn|DivByZero); - /////////////////////////////////////////////////////////////////////////////// - // Underflow - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Underflow + /////////////////////////////////////////////////////////////////////////////// - // calculate underflow flag: detecting tininess after rounding - // the exponent is negitive - // | the result is subnormal - // | | the result is normal and rounded from a Subnorm - // | | | and if given an unbounded exponent the result does not round - // | | | | and if the result is not exact - // | | | | | and if the input isnt infinity or NaN - // | | | | | | - assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid); + // calculate underflow flag: detecting tininess after rounding + // the exponent is negitive + // | the result is subnormal + // | | the result is normal and rounded from a Subnorm + // | | | and if given an unbounded exponent the result does not round + // | | | | and if the result is not exact + // | | | | | and if the input isnt infinity or NaN + // | | | | | | + assign Underflow = ((FullRe[`NE+1] | (FullRe == 0) | ((FullRe == 1) & (Me == 0) & ~(UfPlus1&Guard)))&(Round|Sticky|Guard))&~(InfIn|NaNIn|DivByZero|Invalid); - /////////////////////////////////////////////////////////////////////////////// - // Inexact - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Inexact + /////////////////////////////////////////////////////////////////////////////// - // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision - // - Don't set the underflow flag if an underflowed res isn't outputed - assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid); - //assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero); + // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision + // - Don't set the underflow flag if an underflowed res isn't outputed + assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid); + //assign FpInexact = (Sticky|Guard|Overflow|Round)&~(InfIn|NaNIn|DivByZero|Invalid|XZero); - // if the res is too small to be represented and not 0 - // | and if the res is not invalid (outside the integer bounds) - // | | - assign IntInexact = ((CvtCe[`NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid; + // if the res is too small to be represented and not 0 + // | and if the res is not invalid (outside the integer bounds) + // | | + assign IntInexact = ((CvtCe[`NE]&~XZero)|Sticky|Round|Guard)&~IntInvalid; - // select the inexact flag to output - assign Inexact = ToInt ? IntInexact : FpInexact; + // select the inexact flag to output + assign Inexact = ToInt ? IntInexact : FpInexact; - /////////////////////////////////////////////////////////////////////////////// - // Invalid - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Invalid + /////////////////////////////////////////////////////////////////////////////// - // Set Invalid flag for following cases: - // 1) any input is a signaling NaN - // 2) Inf - Inf (unless x or y is NaN) - // 3) 0 * Inf + // Set Invalid flag for following cases: + // 1) any input is a signaling NaN + // 2) Inf - Inf (unless x or y is NaN) + // 3) 0 * Inf - // invalid flag for integer result - // if the input is NaN or infinity - // | if the integer res overflows (out of range) - // | | if the input was negitive but ouputing to a unsigned number - // | | | the res doesn't round to zero - // | | | | or the res rounds up out of bounds - // | | | | and the res didn't underflow - // | | | | | - assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); - // | - // or when the positive res rounds up out of range - - assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp); - - // invalid flag for fma - assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~NaNIn) | (XZero & YInf) | (YZero & XInf); - - //invalid flag for division - assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero); + // invalid flag for integer result + // if the input is NaN or infinity + // | if the integer res overflows (out of range) + // | | if the input was negitive but ouputing to a unsigned number + // | | | the res doesn't round to zero + // | | | | or the res rounds up out of bounds + // | | | | and the res didn't underflow + // | | | | | + assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); + // | + // or when the positive res rounds up out of range + + assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp); + + // invalid flag for fma + assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~NaNIn) | (XZero & YInf) | (YZero & XInf); + + //invalid flag for division + assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt&~NaNIn&~XZero); - assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp); + assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp); - /////////////////////////////////////////////////////////////////////////////// - // Divide by Zero - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Divide by Zero + /////////////////////////////////////////////////////////////////////////////// - // if dividing by zero and not 0/0 - // - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator) - assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn); + // if dividing by zero and not 0/0 + // - don't set flag if an input is NaN or Inf(IEEE says has to be a finite numerator) + assign DivByZero = YZero&DivOp&~Sqrt&~(XZero|NaNIn|InfIn); - /////////////////////////////////////////////////////////////////////////////// - // final flags - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // final flags + /////////////////////////////////////////////////////////////////////////////// - // Combine flags - // - to integer results do not set the underflow or overflow flags - assign PostProcFlg = {Invalid|(IntInvalid&CvtOp&ToInt), DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact}; + // Combine flags + // - to integer results do not set the underflow or overflow flags + assign PostProcFlg = {Invalid|(IntInvalid&CvtOp&ToInt), DivByZero, Overflow&~(ToInt&CvtOp), Underflow&~(ToInt&CvtOp), Inexact}; endmodule diff --git a/pipelined/src/fpu/postproc/fmashiftcalc.sv b/pipelined/src/fpu/postproc/fmashiftcalc.sv index f861d9162..dabf0dfde 100644 --- a/pipelined/src/fpu/postproc/fmashiftcalc.sv +++ b/pipelined/src/fpu/postproc/fmashiftcalc.sv @@ -29,119 +29,109 @@ `include "wally-config.vh" module fmashiftcalc( - input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single - input logic [`NE+1:0] FmaSe, // sum's exponent - input logic [3*`NF+3:0] FmaSm, // the positive sum - input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // normalization shift count - output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results - output logic FmaSZero, // is the result subnormal - calculated before LZA corection - output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection - output logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt, // normalization shift count - output logic [3*`NF+5:0] FmaShiftIn // is the sum zero + input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [`NE+1:0] FmaSe, // sum's exponent + input logic [3*`NF+3:0] FmaSm, // the positive sum + input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // normalization shift count + output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results + output logic FmaSZero, // is the result subnormal - calculated before LZA corection + output logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection + output logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt, // normalization shift count + output logic [3*`NF+5:0] FmaShiftIn // is the sum zero ); - logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias - logic [`NE+1:0] BiasCorr; // correction for bias + logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias + logic [`NE+1:0] BiasCorr; // correction for bias - /////////////////////////////////////////////////////////////////////////////// - // Normalization - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Normalization + /////////////////////////////////////////////////////////////////////////////// - // Determine if the sum is zero - assign FmaSZero = ~(|FmaSm); + // Determine if the sum is zero + assign FmaSZero = ~(|FmaSm); - // calculate the sum's exponent - assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+5)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3); - - //convert the sum's exponent into the proper percision - if (`FPSIZES == 1) begin - assign NormSumExp = PreNormSumExp; - - end else if (`FPSIZES == 2) begin - assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS); - assign NormSumExp = PreNormSumExp+BiasCorr; - - end else if (`FPSIZES == 3) begin - always_comb begin - case (Fmt) - `FMT: BiasCorr = '0; - `FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS); - `FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS); - default: BiasCorr = 'x; - endcase - end - assign NormSumExp = PreNormSumExp+BiasCorr; - - end else if (`FPSIZES == 4) begin - always_comb begin - case (Fmt) - 2'h3: BiasCorr = '0; - 2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS); - 2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS); - 2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS); - endcase - end - assign NormSumExp = PreNormSumExp+BiasCorr; + // calculate the sum's exponent + assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+5)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+3); + //convert the sum's exponent into the proper percision + if (`FPSIZES == 1) begin + assign NormSumExp = PreNormSumExp; + end else if (`FPSIZES == 2) begin + assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS); + assign NormSumExp = PreNormSumExp+BiasCorr; + end else if (`FPSIZES == 3) begin + always_comb begin + case (Fmt) + `FMT: BiasCorr = '0; + `FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS); + `FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS); + default: BiasCorr = 'x; + endcase end - - // determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen) & ~FmaSZero - if (`FPSIZES == 1) begin - logic Sum0LEZ, Sum0GEFL; - assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; - assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; - - end else if (`FPSIZES == 2) begin - logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; - assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; - assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); - assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp; - assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; - - end else if (`FPSIZES == 3) begin - logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; - assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; - assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); - assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp; - assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2)); - assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp; - always_comb begin - case (Fmt) - `FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; - `FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; - `FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; - default: FmaPreResultSubnorm = 1'bx; - endcase - end - - end else if (`FPSIZES == 4) begin - logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL; - assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; - assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS)); - assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp; - assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS)); - assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp; - assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS)); - assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp; - always_comb begin - case (Fmt) - 2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; - 2'h1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; - 2'h0: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; - 2'h2: FmaPreResultSubnorm = Sum3LEZ & Sum3GEFL & ~FmaSZero; - endcase - end - + assign NormSumExp = PreNormSumExp+BiasCorr; + end else if (`FPSIZES == 4) begin + always_comb begin + case (Fmt) + 2'h3: BiasCorr = '0; + 2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS); + 2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS); + 2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS); + endcase end + assign NormSumExp = PreNormSumExp+BiasCorr; + end + + // determine if the result is subnormal: (NormSumExp <= 0) & (NormSumExp >= -FracLen) & ~FmaSZero + if (`FPSIZES == 1) begin + logic Sum0LEZ, Sum0GEFL; + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + end else if (`FPSIZES == 2) begin + logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp; + assign FmaPreResultSubnorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; + end else if (`FPSIZES == 3) begin + logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp; + assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2)); + assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp; + always_comb begin + case (Fmt) + `FMT: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + `FMT1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + `FMT2: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; + default: FmaPreResultSubnorm = 1'bx; + endcase + end + end else if (`FPSIZES == 4) begin + logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL; + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp; + assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS)); + assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp; + assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS)); + assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp; + always_comb begin + case (Fmt) + 2'h3: FmaPreResultSubnorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; + 2'h1: FmaPreResultSubnorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; + 2'h0: FmaPreResultSubnorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; + 2'h2: FmaPreResultSubnorm = Sum3LEZ & Sum3GEFL & ~FmaSZero; + endcase + end + end - // set and calculate the shift input and amount - // - shift once if killing a product and the result is subnormal - assign FmaShiftIn = {2'b0, FmaSm}; - if (`FPSIZES == 1) - assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1; - else - assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1; + // set and calculate the shift input and amount + // - shift once if killing a product and the result is subnormal + assign FmaShiftIn = {2'b0, FmaSm}; + if (`FPSIZES == 1) assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2): FmaSCnt+1; + else assign FmaShiftAmt = FmaPreResultSubnorm ? FmaSe[$clog2(3*`NF+5)-1:0]+($clog2(3*`NF+5))'(`NF+2)+BiasCorr[$clog2(3*`NF+5)-1:0]: FmaSCnt+1; endmodule diff --git a/pipelined/src/fpu/postproc/normshift.sv b/pipelined/src/fpu/postproc/normshift.sv index 8c4405962..44469e316 100644 --- a/pipelined/src/fpu/postproc/normshift.sv +++ b/pipelined/src/fpu/postproc/normshift.sv @@ -73,10 +73,10 @@ // . module normshift( - input logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt, // shift amount - input logic [`NORMSHIFTSZ-1:0] ShiftIn, // number to be shifted - output logic [`NORMSHIFTSZ-1:0] Shifted // shifted result + input logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt, // shift amount + input logic [`NORMSHIFTSZ-1:0] ShiftIn, // number to be shifted + output logic [`NORMSHIFTSZ-1:0] Shifted // shifted result ); - assign Shifted = ShiftIn << ShiftAmt; - + + assign Shifted = ShiftIn << ShiftAmt; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/postproc/postprocess.sv b/pipelined/src/fpu/postproc/postprocess.sv index f15214457..07723d7f1 100644 --- a/pipelined/src/fpu/postproc/postprocess.sv +++ b/pipelined/src/fpu/postproc/postprocess.sv @@ -29,198 +29,198 @@ `include "wally-config.vh" module postprocess ( - // general signals - input logic Xs, Ys, // input signs - input logic [`NF:0] Xm, Ym, Zm, // input mantissas - input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single - input logic [2:0] OpCtrl, // choose which opperation (look below for values) - input logic XZero, YZero, // inputs are zero - input logic XInf, YInf, ZInf, // inputs are infinity - input logic XNaN, YNaN, ZNaN, // inputs are NaN - input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs - input logic [1:0] PostProcSel, // select result to be written to fp register - //fma signals - input logic FmaAs, // the modified Z sign - depends on instruction - input logic FmaPs, // the product's sign - input logic FmaSs, // Sum sign - input logic [`NE+1:0] FmaSe, // the sum's exponent - input logic [3*`NF+3:0] FmaSm, // the positive sum - input logic FmaASticky, // sticky bit that is calculated during alignment - input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // the normalization shift count - //divide signals - input logic DivSticky, // divider sticky bit - input logic [`NE+1:0] DivQe, // divsqrt exponent - input logic [`DIVb:0] DivQm, // divsqrt significand - // conversion signals - input logic CvtCs, // the result's sign - input logic [`NE:0] CvtCe, // the calculated expoent - input logic CvtResSubnormUf, // the convert result is subnormal or underflows - input logic [`LOGCVTLEN-1:0] CvtShiftAmt,// how much to shift by - input logic ToInt, // is fp->int (since it's writting to the integer register) - input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) - input logic IntZero, // is the integer input zero - // final results - output logic [`FLEN-1:0] PostProcRes,// postprocessor final result - output logic [4:0] PostProcFlg,// postprocesser flags - output logic [`XLEN-1:0] FCvtIntRes // the integer conversion result - ); - - // general signals - logic Rs; // result sign - logic [`NF-1:0] Rf; // Result fraction - logic [`NE-1:0] Re; // Result exponent - logic Ms; // norMalized sign - logic [`CORRSHIFTSZ-1:0] Mf; // norMalized fraction - logic [`NE+1:0] Me; // normalized exponent - logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow - logic UfPlus1; // do you add one (for determining underflow flag) - logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount - logic [`NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift - logic [`NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction) - logic Plus1; // add one to the final result? - logic Overflow; // overflow flag used to select results - logic Invalid; // invalid flag used to select results - logic Guard, Round, Sticky; // bits needed to determine rounding - logic [`FMTBITS-1:0] OutFmt; // output format - // fma signals - logic [`NE+1:0] FmaMe; // exponent of the normalized sum - logic FmaSZero; // is the sum zero - logic [3*`NF+5:0] FmaShiftIn; // fma shift input - logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results - logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection - logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma - // division singals - logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount - logic [`NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input - logic [`NE+1:0] Qe; // divsqrt corrected exponent after corretion shift - logic DivByZero; // divide by zero flag - logic DivResSubnorm; // is the divsqrt result subnormal - logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed) - // conversion signals - logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted for converter - logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result - logic [`XLEN+1:0] CvtNegRes; // possibly negated integer result - logic CvtResUf; // did the convert result underflow - logic IntInvalid; // invalid integer flag - // readability signals - logic Mult; // multiply opperation - logic Sqrt; // is the divsqrt opperation sqrt - logic Int64; // is the integer 64 bits? - logic Signed; // is the opperation with a signed integer? - logic IntToFp; // is the opperation an int->fp conversion? - logic CvtOp; // convertion opperation - logic FmaOp; // fma opperation - logic DivOp; // divider opperation - logic InfIn; // are any of the inputs infinity - logic NaNIn; // are any of the inputs NaN + // general signals + input logic Xs, Ys, // input signs + input logic [`NF:0] Xm, Ym, Zm, // input mantissas + input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude + input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single + input logic [2:0] OpCtrl, // choose which opperation (look below for values) + input logic XZero, YZero, // inputs are zero + input logic XInf, YInf, ZInf, // inputs are infinity + input logic XNaN, YNaN, ZNaN, // inputs are NaN + input logic XSNaN, YSNaN, ZSNaN, // inputs are signaling NaNs + input logic [1:0] PostProcSel, // select result to be written to fp register + //fma signals + input logic FmaAs, // the modified Z sign - depends on instruction + input logic FmaPs, // the product's sign + input logic FmaSs, // Sum sign + input logic [`NE+1:0] FmaSe, // the sum's exponent + input logic [3*`NF+3:0] FmaSm, // the positive sum + input logic FmaASticky, // sticky bit that is calculated during alignment + input logic [$clog2(3*`NF+5)-1:0] FmaSCnt, // the normalization shift count + //divide signals + input logic DivSticky, // divider sticky bit + input logic [`NE+1:0] DivQe, // divsqrt exponent + input logic [`DIVb:0] DivQm, // divsqrt significand + // conversion signals + input logic CvtCs, // the result's sign + input logic [`NE:0] CvtCe, // the calculated expoent + input logic CvtResSubnormUf, // the convert result is subnormal or underflows + input logic [`LOGCVTLEN-1:0] CvtShiftAmt,// how much to shift by + input logic ToInt, // is fp->int (since it's writting to the integer register) + input logic [`CVTLEN-1:0] CvtLzcIn, // input to the Leading Zero Counter (without msb) + input logic IntZero, // is the integer input zero + // final results + output logic [`FLEN-1:0] PostProcRes,// postprocessor final result + output logic [4:0] PostProcFlg,// postprocesser flags + output logic [`XLEN-1:0] FCvtIntRes // the integer conversion result + ); + + // general signals + logic Rs; // result sign + logic [`NF-1:0] Rf; // Result fraction + logic [`NE-1:0] Re; // Result exponent + logic Ms; // norMalized sign + logic [`CORRSHIFTSZ-1:0] Mf; // norMalized fraction + logic [`NE+1:0] Me; // normalized exponent + logic [`NE+1:0] FullRe; // Re with bits to determine sign and overflow + logic UfPlus1; // do you add one (for determining underflow flag) + logic [`LOGNORMSHIFTSZ-1:0] ShiftAmt; // normalization shift amount + logic [`NORMSHIFTSZ-1:0] ShiftIn; // input to normalization shift + logic [`NORMSHIFTSZ-1:0] Shifted; // the ouput of the normalized shifter (before shift correction) + logic Plus1; // add one to the final result? + logic Overflow; // overflow flag used to select results + logic Invalid; // invalid flag used to select results + logic Guard, Round, Sticky; // bits needed to determine rounding + logic [`FMTBITS-1:0] OutFmt; // output format + // fma signals + logic [`NE+1:0] FmaMe; // exponent of the normalized sum + logic FmaSZero; // is the sum zero + logic [3*`NF+5:0] FmaShiftIn; // fma shift input + logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account Subnormal or zero results + logic FmaPreResultSubnorm; // is the result subnormal - calculated before LZA corection + logic [$clog2(3*`NF+5)-1:0] FmaShiftAmt;// normalization shift amount for fma + // division singals + logic [`LOGNORMSHIFTSZ-1:0] DivShiftAmt; // divsqrt shif amount + logic [`NORMSHIFTSZ-1:0] DivShiftIn; // divsqrt shift input + logic [`NE+1:0] Qe; // divsqrt corrected exponent after corretion shift + logic DivByZero; // divide by zero flag + logic DivResSubnorm; // is the divsqrt result subnormal + logic DivSubnormShiftPos; // is the divsqrt subnorm shift amout positive (not underflowed) + // conversion signals + logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted for converter + logic [1:0] CvtNegResMsbs; // most significant bits of possibly negated int result + logic [`XLEN+1:0] CvtNegRes; // possibly negated integer result + logic CvtResUf; // did the convert result underflow + logic IntInvalid; // invalid integer flag + // readability signals + logic Mult; // multiply opperation + logic Sqrt; // is the divsqrt opperation sqrt + logic Int64; // is the integer 64 bits? + logic Signed; // is the opperation with a signed integer? + logic IntToFp; // is the opperation an int->fp conversion? + logic CvtOp; // convertion opperation + logic FmaOp; // fma opperation + logic DivOp; // divider opperation + logic InfIn; // are any of the inputs infinity + logic NaNIn; // are any of the inputs NaN - // signals to help readability - assign Signed = OpCtrl[0]; - assign Int64 = OpCtrl[1]; - assign IntToFp = OpCtrl[2]; - assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0]; - assign CvtOp = (PostProcSel == 2'b00); - assign FmaOp = (PostProcSel == 2'b10); - assign DivOp = (PostProcSel == 2'b01); - assign Sqrt = OpCtrl[0]; + // signals to help readability + assign Signed = OpCtrl[0]; + assign Int64 = OpCtrl[1]; + assign IntToFp = OpCtrl[2]; + assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0]; + assign CvtOp = (PostProcSel == 2'b00); + assign FmaOp = (PostProcSel == 2'b10); + assign DivOp = (PostProcSel == 2'b01); + assign Sqrt = OpCtrl[0]; - // is there an input of infinity or NaN being used - assign InfIn = XInf|YInf|ZInf; - assign NaNIn = XNaN|YNaN|ZNaN; + // is there an input of infinity or NaN being used + assign InfIn = XInf|YInf|ZInf; + assign NaNIn = XNaN|YNaN|ZNaN; - // choose the ouptut format depending on the opperation - // - fp -> fp: OpCtrl contains the percision of the output - // - otherwise: Fmt contains the percision of the output - if (`FPSIZES == 2) - assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); - else if (`FPSIZES == 3 | `FPSIZES == 4) - assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; + // choose the ouptut format depending on the opperation + // - fp -> fp: OpCtrl contains the percision of the output + // - otherwise: Fmt contains the percision of the output + if (`FPSIZES == 2) + assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); + else if (`FPSIZES == 3 | `FPSIZES == 4) + assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; - /////////////////////////////////////////////////////////////////////////////// - // Normalization - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Normalization + /////////////////////////////////////////////////////////////////////////////// - // final claulations before shifting - cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn, - .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); + // final claulations before shifting + cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResSubnormUf, .Xm, .CvtLzcIn, + .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, - .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn); + fmashiftcalc fmashiftcalc(.FmaSm, .FmaSCnt, .Fmt, .NormSumExp, .FmaSe, + .FmaSZero, .FmaPreResultSubnorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); + divshiftcalc divshiftcalc(.DivQe, .DivQm, .DivResSubnorm, .DivSubnormShiftPos, .DivShiftAmt, .DivShiftIn); - // select which unit's output to shift - always_comb - case(PostProcSel) - 2'b10: begin // fma - ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt}; - ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}}; - end - 2'b00: begin // cvt - ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt}; - ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; - end - 2'b01: begin //divsqrt - ShiftAmt = DivShiftAmt; - ShiftIn = DivShiftIn; - end - default: begin - ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}}; - ShiftIn = {`NORMSHIFTSZ{1'bx}}; - end - endcase - - // main normalization shift - normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); + // select which unit's output to shift + always_comb + case(PostProcSel) + 2'b10: begin // fma + ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(3*`NF+5){1'b0}}, FmaShiftAmt}; + ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+6){1'b0}}}; + end + 2'b00: begin // cvt + ShiftAmt = {{`LOGNORMSHIFTSZ-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmt}; + ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; + end + 2'b01: begin //divsqrt + ShiftAmt = DivShiftAmt; + ShiftIn = DivShiftIn; + end + default: begin + ShiftAmt = {`LOGNORMSHIFTSZ{1'bx}}; + ShiftIn = {`NORMSHIFTSZ{1'bx}}; + end + endcase + + // main normalization shift + normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - // correct for LZA/divsqrt error - shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp, - .DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf); + // correct for LZA/divsqrt error + shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultSubnorm, .NormSumExp, + .DivResSubnorm, .DivSubnormShiftPos, .DivOp, .DivQe, .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf); - /////////////////////////////////////////////////////////////////////////////// - // Rounding - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// - // round to nearest even - // round to zero - // round to -infinity - // round to infinity - // round to nearest max magnitude + // round to nearest even + // round to zero + // round to -infinity + // round to infinity + // round to nearest max magnitude - // calulate result sign used in rounding unit - roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); + // calulate result sign used in rounding unit + roundsign roundsign(.FmaOp, .DivOp, .CvtOp, .Sqrt, .FmaSs, .Xs, .Ys, .CvtCs, .Ms); - round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe, - .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf, - .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me); + round round(.OutFmt, .Frm, .FmaASticky, .Plus1, .PostProcSel, .CvtCe, .Qe, + .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResSubnormUf, .Mf, .ToInt, .CvtResUf, + .DivSticky, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .Sticky, .Round, .Guard, .Me); - /////////////////////////////////////////////////////////////////////////////// - // Sign calculation - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Sign calculation + /////////////////////////////////////////////////////////////////////////////// - resultsign resultsign(.Frm, .FmaPs, .FmaAs, .Round, .Sticky, .Guard, - .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Rs); + resultsign resultsign(.Frm, .FmaPs, .FmaAs, .Round, .Sticky, .Guard, + .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Rs); - /////////////////////////////////////////////////////////////////////////////// - // Flags - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Flags + /////////////////////////////////////////////////////////////////////////////// - flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, - .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, - .NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero, - .Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, - .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); + flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, + .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, + .NaNIn, .FmaAs, .FmaPs, .Round, .IntInvalid, .DivByZero, + .Guard, .Sticky, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, + .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); - /////////////////////////////////////////////////////////////////////////////// - // Select the result - /////////////////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////// + // Select the result + /////////////////////////////////////////////////////////////////////////////// - negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); + negateintres negateintres(.Xs, .Shifted, .Signed, .Int64, .Plus1, .CvtNegResMsbs, .CvtNegRes); - specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, - .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, - .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, - .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes); + specialcase specialcase(.Xs, .Xm, .Ym, .Zm, .XZero, .IntInvalid, + .IntZero, .Frm, .OutFmt, .XNaN, .YNaN, .ZNaN, .CvtResUf, + .NaNIn, .IntToFp, .Int64, .Signed, .CvtOp, .FmaOp, .Plus1, .Invalid, .Overflow, .InfIn, .CvtNegRes, + .XInf, .YInf, .DivOp, .DivByZero, .FullRe, .CvtCe, .Rs, .Re, .Rf, .PostProcRes, .FCvtIntRes); endmodule diff --git a/pipelined/src/fpu/postproc/resultsign.sv b/pipelined/src/fpu/postproc/resultsign.sv index 2ac0ae3c0..7eeba9e8a 100644 --- a/pipelined/src/fpu/postproc/resultsign.sv +++ b/pipelined/src/fpu/postproc/resultsign.sv @@ -29,52 +29,52 @@ `include "wally-config.vh" module resultsign( - input logic [2:0] Frm, // rounding mode - input logic FmaOp, // is the operation an Fma - input logic Mult, // is the fma opperation multipy - input logic ZInf, // is Z infinity - input logic InfIn, // are any of the inputs infinity - input logic FmaSZero, // is the fma sum zero - input logic Ms, // normalized result sign - input logic FmaPs, // product's sign - input logic FmaAs, // aligned addend's sign - input logic Guard, // guard bit for rounding - input logic Round, // round bit for rounding - input logic Sticky, // sticky bit for rounding - output logic Rs // result sign + input logic [2:0] Frm, // rounding mode + input logic FmaOp, // is the operation an Fma + input logic Mult, // is the fma opperation multipy + input logic ZInf, // is Z infinity + input logic InfIn, // are any of the inputs infinity + input logic FmaSZero, // is the fma sum zero + input logic Ms, // normalized result sign + input logic FmaPs, // product's sign + input logic FmaAs, // aligned addend's sign + input logic Guard, // guard bit for rounding + input logic Round, // round bit for rounding + input logic Sticky, // sticky bit for rounding + output logic Rs // result sign ); - logic Zeros; // zero result sign - logic Infs; // infinity result sign + logic Zeros; // zero result sign + logic Infs; // infinity result sign - // determine the sign for a result of 0 - // The IEEE754-2019 standard specifies: - // - the sign of an exact zero sum (with operands of diffrent signs) should be positive unless rounding toward negitive infinity - // - when the exact result of an FMA opperation is non-zero, but is zero due to rounding, use the sign of the exact result - // - if x = +0 or -0 then x+x=x and x-(-x)=x - // - the sign of a product is the exclisive or or the opperand's signs - // Zero sign will only be selected if: - // - P=Z and a cancelation occurs - exact zero - // - Z is zero and P is zero - exact zero - // - P is killed and Z is zero - Psgn - // - Z is killed and P is zero - impossible - // Zero sign calculation: - // - if a multiply opperation is done, then use the products sign(Ps) - // - if the zero sum is not exactly zero i.e. Round|Sticky use the sign of the exact result (which is the product's sign) - // - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign - assign Zeros = (FmaPs^FmaAs)&~(Round|Guard|Sticky)&~Mult ? Frm[1:0] == 2'b10 : FmaPs; + // determine the sign for a result of 0 + // The IEEE754-2019 standard specifies: + // - the sign of an exact zero sum (with operands of diffrent signs) should be positive unless rounding toward negitive infinity + // - when the exact result of an FMA opperation is non-zero, but is zero due to rounding, use the sign of the exact result + // - if x = +0 or -0 then x+x=x and x-(-x)=x + // - the sign of a product is the exclisive or or the opperand's signs + // Zero sign will only be selected if: + // - P=Z and a cancelation occurs - exact zero + // - Z is zero and P is zero - exact zero + // - P is killed and Z is zero - Psgn + // - Z is killed and P is zero - impossible + // Zero sign calculation: + // - if a multiply opperation is done, then use the products sign(Ps) + // - if the zero sum is not exactly zero i.e. Round|Sticky use the sign of the exact result (which is the product's sign) + // - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign + assign Zeros = (FmaPs^FmaAs)&~(Round|Guard|Sticky)&~Mult ? Frm[1:0] == 2'b10 : FmaPs; - // determine the sign of an infinity result - // is the result negitive - // if p - z is the Sum negitive - // if -p + z is the Sum positive - // if -p - z then the Sum is negitive - assign Infs = ZInf ? FmaAs : FmaPs; + // determine the sign of an infinity result + // is the result negitive + // if p - z is the Sum negitive + // if -p + z is the Sum positive + // if -p - z then the Sum is negitive + assign Infs = ZInf ? FmaAs : FmaPs; - // select the result sign - always_comb - if(InfIn&FmaOp) Rs = Infs; - else if(FmaSZero&FmaOp) Rs = Zeros; - else Rs = Ms; + // select the result sign + always_comb + if(InfIn&FmaOp) Rs = Infs; + else if(FmaSZero&FmaOp) Rs = Zeros; + else Rs = Ms; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/postproc/round.sv b/pipelined/src/fpu/postproc/round.sv index 820b1a92d..ee5a44751 100644 --- a/pipelined/src/fpu/postproc/round.sv +++ b/pipelined/src/fpu/postproc/round.sv @@ -37,294 +37,295 @@ `define XLENPOS ((`XLEN>`NF) ? 1 : (`XLEN>`NF1) ? 2 : 3) module round( - input logic [`FMTBITS-1:0] OutFmt, // output format - input logic [2:0] Frm, // rounding mode - input logic [1:0] PostProcSel, // select the postprocessor output - input logic Ms, // normalized sign - input logic [`CORRSHIFTSZ-1:0] Mf, // normalized fraction - // fma - input logic FmaOp, // is an fma opperation being done? - input logic [`NE+1:0] FmaMe, // exponent of the normalized sum for fma - input logic FmaASticky, // addend's sticky bit - // divsqrt - input logic DivOp, // is a division opperation being done - input logic DivSticky, // divsqrt sticky bit - input logic [`NE+1:0] Qe, // the divsqrt calculated expoent - // cvt - input logic CvtOp, // is a convert opperation being done - input logic ToInt, // is the cvt op a cvt to integer - input logic CvtResSubnormUf, // is the cvt result subnormal or underflow - input logic CvtResUf, // does the cvt result underflow - input logic [`NE:0] CvtCe, // the cvt calculated expoent - // outputs - output logic [`NE+1:0] Me, // normalied fraction - output logic UfPlus1, // do you add one to the result if given an unbounded exponent - output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow - output logic [`NE-1:0] Re, // Result exponent - output logic [`NF-1:0] Rf, // Result fractionNormS - output logic Sticky, // sticky bit - output logic Plus1, // do you add one to the final result - output logic Round, Guard // bits needed to calculate rounding + input logic [`FMTBITS-1:0] OutFmt, // output format + input logic [2:0] Frm, // rounding mode + input logic [1:0] PostProcSel, // select the postprocessor output + input logic Ms, // normalized sign + input logic [`CORRSHIFTSZ-1:0] Mf, // normalized fraction + // fma + input logic FmaOp, // is an fma opperation being done? + input logic [`NE+1:0] FmaMe, // exponent of the normalized sum for fma + input logic FmaASticky, // addend's sticky bit + // divsqrt + input logic DivOp, // is a division opperation being done + input logic DivSticky, // divsqrt sticky bit + input logic [`NE+1:0] Qe, // the divsqrt calculated expoent + // cvt + input logic CvtOp, // is a convert opperation being done + input logic ToInt, // is the cvt op a cvt to integer + input logic CvtResSubnormUf, // is the cvt result subnormal or underflow + input logic CvtResUf, // does the cvt result underflow + input logic [`NE:0] CvtCe, // the cvt calculated expoent + // outputs + output logic [`NE+1:0] Me, // normalied fraction + output logic UfPlus1, // do you add one to the result if given an unbounded exponent + output logic [`NE+1:0] FullRe, // Re with bits to determine sign and overflow + output logic [`NE-1:0] Re, // Result exponent + output logic [`NF-1:0] Rf, // Result fractionNormS + output logic Sticky, // sticky bit + output logic Plus1, // do you add one to the final result + output logic Round, Guard // bits needed to calculate rounding ); - logic UfCalcPlus1; // calculated plus one for unbounded exponent - logic NormSticky; // normalized sum's sticky bit - logic [`NF-1:0] RoundFrac; // rounded fraction - logic FpRes; // is the result a floating point - logic IntRes; // is the result an integer - logic FpGuard, FpRound; // floating point round/guard bits - logic FpLsbRes; // least significant bit of floating point result - logic LsbRes; // lsb of result - logic CalcPlus1; // calculated plus1 - logic FpPlus1; // do you add one to the fp result - logic [`FLEN:0] RoundAdd; // how much to add to the result - /////////////////////////////////////////////////////////////////////////////// - // Rounding - /////////////////////////////////////////////////////////////////////////////// + logic UfCalcPlus1; // calculated plus one for unbounded exponent + logic NormSticky; // normalized sum's sticky bit + logic [`NF-1:0] RoundFrac; // rounded fraction + logic FpRes; // is the result a floating point + logic IntRes; // is the result an integer + logic FpGuard, FpRound; // floating point round/guard bits + logic FpLsbRes; // least significant bit of floating point result + logic LsbRes; // lsb of result + logic CalcPlus1; // calculated plus1 + logic FpPlus1; // do you add one to the fp result + logic [`FLEN:0] RoundAdd; // how much to add to the result - // round to nearest even - // {Round, Sticky} - // 0x - do nothing - // 10 - tie - Plus1 if result is odd (LSBNormSum = 1) - // - don't add 1 if a small number was supposed to be subtracted - // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) - // - plus 1 otherwise + /////////////////////////////////////////////////////////////////////////////// + // Rounding + /////////////////////////////////////////////////////////////////////////////// - // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + // round to nearest even + // {Round, Sticky} + // 0x - do nothing + // 10 - tie - Plus1 if result is odd (LSBNormSum = 1) + // - don't add 1 if a small number was supposed to be subtracted + // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // - plus 1 otherwise - // round to -infinity - // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 - // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 + // round to zero - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 - // round to infinity - // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 - // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 + // round to -infinity + // - Plus1 if negative unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a positive result with guard and round bits of 0 - // round to nearest max magnitude - // {Guard, Round, Sticky} - // 0x - do nothing - // 10 - tie - Plus1 - // - don't add 1 if a small number was supposed to be subtracted - // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) - // - Plus 1 otherwise + // round to infinity + // - Plus1 if positive unless a small number was supposed to be subtracted from a result with guard and round bits of 0 + // - subtract 1 if a small number was supposed to be subtracted from a negative result with guard and round bits of 0 + + // round to nearest max magnitude + // {Guard, Round, Sticky} + // 0x - do nothing + // 10 - tie - Plus1 + // - don't add 1 if a small number was supposed to be subtracted + // 11 - do nothing if a small number was supposed to subtracted (the sticky bit was set by the small number) + // - Plus 1 otherwise - // determine what format the final result is in: int or fp - assign IntRes = CvtOp & ToInt; - assign FpRes = ~IntRes; + // determine what format the final result is in: int or fp + assign IntRes = CvtOp & ToInt; + assign FpRes = ~IntRes; - // sticky bit calculation - if (`FPSIZES == 1) begin + // sticky bit calculation + if (`FPSIZES == 1) begin - // 1: XLEN > NF - // | XLEN | - // | NF |1|1| - // ^ ^ if floating point result - // ^ if not an FMA result - if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); - // 2: NF > XLEN - if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | - (|Mf[`CORRSHIFTSZ-`NF-2:0]); + // 1: XLEN > NF + // | XLEN | + // | NF |1|1| + // ^ ^ if floating point result + // ^ if not an FMA result + if (`XLENPOS == 1)assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); + // 2: NF > XLEN + if (`XLENPOS == 2)assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); - end else if (`FPSIZES == 2) begin - // XLEN is either 64 or 32 - // so half and single are always smaller then XLEN + end else if (`FPSIZES == 2) begin + // XLEN is either 64 or 32 + // so half and single are always smaller then XLEN - // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | - (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); - // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | - (|Mf[`CORRSHIFTSZ-`NF-2:0]); - // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | - (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | - (|Mf[`CORRSHIFTSZ-`NF-2:0]); + // 1: XLEN > NF > NF1 + if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); + // 2: NF > XLEN > NF1 + if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~OutFmt) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~OutFmt)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); + // 3: NF > NF1 > XLEN + if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&IntRes) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~OutFmt|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); - end else if (`FPSIZES == 3) begin - // 1: XLEN > NF > NF1 - if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | - (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); - // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | - (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | - (|Mf[`CORRSHIFTSZ-`NF-2:0]); - // 3: NF > NF1 > XLEN - if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | - (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | - (|Mf[`CORRSHIFTSZ-`NF-2:0]); + end else if (`FPSIZES == 3) begin + // 1: XLEN > NF > NF1 + if (`XLENPOS == 1) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:0]); + // 2: NF > XLEN > NF1 + if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`NF1-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF-1]&(IntRes|~(OutFmt==`FMT))) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); + // 3: NF > NF1 > XLEN + if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`NF2-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&(OutFmt==`FMT1)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`NF1-1]&((OutFmt==`FMT1)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF1-2:`CORRSHIFTSZ-`NF-1]&(~(OutFmt==`FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`NF-2:0]); - end else if (`FPSIZES == 4) begin - // Quad precision will always be greater than XLEN - // 2: NF > XLEN > NF1 - if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); - // 3: NF > NF1 > XLEN - // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer - if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | - (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | - (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | - (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | - (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); + end else if (`FPSIZES == 4) begin + // Quad precision will always be greater than XLEN + // 2: NF > XLEN > NF1 + if (`XLENPOS == 2) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`D_NF-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&~(OutFmt==`Q_FMT)) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); + // 3: NF > NF1 > XLEN + // The extra XLEN bit will be ored later when caculating the final sticky bit - the ufplus1 not needed for integer + if (`XLENPOS == 3) assign NormSticky = (|Mf[`CORRSHIFTSZ-`H_NF-2:`CORRSHIFTSZ-`S_NF-1]&FpRes&(OutFmt==`H_FMT)) | + (|Mf[`CORRSHIFTSZ-`S_NF-2:`CORRSHIFTSZ-`XLEN-1]&FpRes&((OutFmt==`S_FMT)|(OutFmt==`H_FMT))) | + (|Mf[`CORRSHIFTSZ-`XLEN-2:`CORRSHIFTSZ-`D_NF-1]&((OutFmt==`S_FMT)|(OutFmt==`H_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`D_NF-2:`CORRSHIFTSZ-`Q_NF-1]&(~(OutFmt==`Q_FMT)|IntRes)) | + (|Mf[`CORRSHIFTSZ-`Q_NF-2:0]); - end - + end + - // only add the Addend sticky if doing an FMA opperation - // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) - assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivSticky&DivOp; - + // only add the Addend sticky if doing an FMA opperation + // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) + assign Sticky = FmaASticky&FmaOp | NormSticky | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivSticky&DivOp; + - // determine round and LSB of the rounded value - // - underflow round bit is used to determint the underflow flag - if (`FPSIZES == 1) begin - assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1]; - assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF]; - assign FpRound = Mf[`CORRSHIFTSZ-`NF-2]; + // determine round and LSB of the rounded value + // - underflow round bit is used to determint the underflow flag + if (`FPSIZES == 1) begin + assign FpGuard = Mf[`CORRSHIFTSZ-`NF-1]; + assign FpLsbRes = Mf[`CORRSHIFTSZ-`NF]; + assign FpRound = Mf[`CORRSHIFTSZ-`NF-2]; - end else if (`FPSIZES == 2) begin - assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; - assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; - assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; + end else if (`FPSIZES == 2) begin + assign FpGuard = OutFmt ? Mf[`CORRSHIFTSZ-`NF-1] : Mf[`CORRSHIFTSZ-`NF1-1]; + assign FpLsbRes = OutFmt ? Mf[`CORRSHIFTSZ-`NF] : Mf[`CORRSHIFTSZ-`NF1]; + assign FpRound = OutFmt ? Mf[`CORRSHIFTSZ-`NF-2] : Mf[`CORRSHIFTSZ-`NF1-2]; - end else if (`FPSIZES == 3) begin - always_comb - case (OutFmt) - `FMT: begin - FpGuard = Mf[`CORRSHIFTSZ-`NF-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`NF]; - FpRound = Mf[`CORRSHIFTSZ-`NF-2]; - end - `FMT1: begin - FpGuard = Mf[`CORRSHIFTSZ-`NF1-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`NF1]; - FpRound = Mf[`CORRSHIFTSZ-`NF1-2]; - end - `FMT2: begin - FpGuard = Mf[`CORRSHIFTSZ-`NF2-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`NF2]; - FpRound = Mf[`CORRSHIFTSZ-`NF2-2]; - end - default: begin - FpGuard = 1'bx; - FpLsbRes = 1'bx; - FpRound = 1'bx; - end - endcase - end else if (`FPSIZES == 4) begin - always_comb - case (OutFmt) - 2'h3: begin - FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF]; - FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2]; - end - 2'h1: begin - FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF]; - FpRound = Mf[`CORRSHIFTSZ-`D_NF-2]; - end - 2'h0: begin - FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF]; - FpRound = Mf[`CORRSHIFTSZ-`S_NF-2]; - end - 2'h2: begin - FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1]; - FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF]; - FpRound = Mf[`CORRSHIFTSZ-`H_NF-2]; - end - endcase - end + end else if (`FPSIZES == 3) begin + always_comb + case (OutFmt) + `FMT: begin + FpGuard = Mf[`CORRSHIFTSZ-`NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`NF]; + FpRound = Mf[`CORRSHIFTSZ-`NF-2]; + end + `FMT1: begin + FpGuard = Mf[`CORRSHIFTSZ-`NF1-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`NF1]; + FpRound = Mf[`CORRSHIFTSZ-`NF1-2]; + end + `FMT2: begin + FpGuard = Mf[`CORRSHIFTSZ-`NF2-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`NF2]; + FpRound = Mf[`CORRSHIFTSZ-`NF2-2]; + end + default: begin + FpGuard = 1'bx; + FpLsbRes = 1'bx; + FpRound = 1'bx; + end + endcase + end else if (`FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + FpGuard = Mf[`CORRSHIFTSZ-`Q_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`Q_NF]; + FpRound = Mf[`CORRSHIFTSZ-`Q_NF-2]; + end + 2'h1: begin + FpGuard = Mf[`CORRSHIFTSZ-`D_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`D_NF]; + FpRound = Mf[`CORRSHIFTSZ-`D_NF-2]; + end + 2'h0: begin + FpGuard = Mf[`CORRSHIFTSZ-`S_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`S_NF]; + FpRound = Mf[`CORRSHIFTSZ-`S_NF-2]; + end + 2'h2: begin + FpGuard = Mf[`CORRSHIFTSZ-`H_NF-1]; + FpLsbRes = Mf[`CORRSHIFTSZ-`H_NF]; + FpRound = Mf[`CORRSHIFTSZ-`H_NF-2]; + end + endcase + end - assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard; - assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes; - assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound; + assign Guard = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-1] : FpGuard; + assign LsbRes = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN] : FpLsbRes; + assign Round = ToInt&CvtOp ? Mf[`CORRSHIFTSZ-`XLEN-2] : FpRound; - always_comb begin - // Determine if you add 1 - case (Frm) - 3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even - 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = Ms;//round down - 3'b011: CalcPlus1 = ~Ms;//round up - 3'b100: CalcPlus1 = Guard;//round to nearest max magnitude - default: CalcPlus1 = 1'bx; - endcase - // Determine if you add 1 (for underflow flag) - case (Frm) - 3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even - 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = Ms;//round down - 3'b011: UfCalcPlus1 = ~Ms;//round up - 3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude - default: UfCalcPlus1 = 1'bx; - endcase - - end + always_comb begin + // Determine if you add 1 + case (Frm) + 3'b000: CalcPlus1 = Guard & (Round|Sticky|LsbRes);//round to nearest even + 3'b001: CalcPlus1 = 0;//round to zero + 3'b010: CalcPlus1 = Ms;//round down + 3'b011: CalcPlus1 = ~Ms;//round up + 3'b100: CalcPlus1 = Guard;//round to nearest max magnitude + default: CalcPlus1 = 1'bx; + endcase + // Determine if you add 1 (for underflow flag) + case (Frm) + 3'b000: UfCalcPlus1 = Round & (Sticky|Guard);//round to nearest even + 3'b001: UfCalcPlus1 = 0;//round to zero + 3'b010: UfCalcPlus1 = Ms;//round down + 3'b011: UfCalcPlus1 = ~Ms;//round up + 3'b100: UfCalcPlus1 = Round;//round to nearest max magnitude + default: UfCalcPlus1 = 1'bx; + endcase + + end - // If an answer is exact don't round - assign Plus1 = CalcPlus1 & (Sticky|Round|Guard); - assign FpPlus1 = Plus1&~(ToInt&CvtOp); - assign UfPlus1 = UfCalcPlus1 & (Sticky|Round); + // If an answer is exact don't round + assign Plus1 = CalcPlus1 & (Sticky|Round|Guard); + assign FpPlus1 = Plus1&~(ToInt&CvtOp); + assign UfPlus1 = UfCalcPlus1 & (Sticky|Round); - // place Plus1 into the proper position for the format - if (`FPSIZES == 1) begin - assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; + // place Plus1 into the proper position for the format + if (`FPSIZES == 1) begin + assign RoundAdd = {{`FLEN{1'b0}}, FpPlus1}; - end else if (`FPSIZES == 2) begin - // \/FLEN+1 - // | NE+2 | NF | - // '-NE+2-^----NF1----^ - // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 - assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt}; + end else if (`FPSIZES == 2) begin + // \/FLEN+1 + // | NE+2 | NF | + // '-NE+2-^----NF1----^ + // `FLEN+1-`NE-2-`NF1 = FLEN-1-NE-NF1 + assign RoundAdd = {(`NE+1+`NF1)'(0), FpPlus1&~OutFmt, (`NF-`NF1-1)'(0), FpPlus1&OutFmt}; - end else if (`FPSIZES == 3) begin - assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)}; + end else if (`FPSIZES == 3) begin + assign RoundAdd = {(`NE+1+`NF2)'(0), FpPlus1&(OutFmt==`FMT2), (`NF1-`NF2-1)'(0), FpPlus1&(OutFmt==`FMT1), (`NF-`NF1-1)'(0), FpPlus1&(OutFmt==`FMT)}; - end else if (`FPSIZES == 4) - assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)}; + end else if (`FPSIZES == 4) + assign RoundAdd = {(`Q_NE+1+`H_NF)'(0), FpPlus1&(OutFmt==`H_FMT), (`S_NF-`H_NF-1)'(0), FpPlus1&(OutFmt==`S_FMT), (`D_NF-`S_NF-1)'(0), FpPlus1&(OutFmt==`D_FMT), (`Q_NF-`D_NF-1)'(0), FpPlus1&(OutFmt==`Q_FMT)}; - // trim unneeded bits from fraction - assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; - + // trim unneeded bits from fraction + assign RoundFrac = Mf[`CORRSHIFTSZ-1:`CORRSHIFTSZ-`NF]; + - // select the exponent - always_comb - case(PostProcSel) - 2'b10: Me = FmaMe; // fma - 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt - // 2'b01: Me = DivDone ? Qe : '0; // divide - 2'b01: Me = Qe; // divide - default: Me = '0; - endcase + // select the exponent + always_comb + case(PostProcSel) + 2'b10: Me = FmaMe; // fma + 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResSubnormUf|CvtResUf}}; // cvt + // 2'b01: Me = DivDone ? Qe : '0; // divide + 2'b01: Me = Qe; // divide + default: Me = '0; + endcase - // round the result - // - if the fraction overflows one should be added to the exponent - assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd; - assign Re = FullRe[`NE-1:0]; + // round the result + // - if the fraction overflows one should be added to the exponent + assign {FullRe, Rf} = {Me, RoundFrac} + RoundAdd; + assign Re = FullRe[`NE-1:0]; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/postproc/roundsign.sv b/pipelined/src/fpu/postproc/roundsign.sv index 40231ecd3..8809f0a4b 100644 --- a/pipelined/src/fpu/postproc/roundsign.sv +++ b/pipelined/src/fpu/postproc/roundsign.sv @@ -28,23 +28,23 @@ `include "wally-config.vh" module roundsign( - input logic Xs, // x sign - input logic Ys, // y sign - input logic CvtCs, // convert result sign - input logic FmaSs, // fma sum sign - input logic Sqrt, // sqrt oppertion? (when using divsqrt unit) - input logic FmaOp, // is fma opperation - input logic DivOp, // is divsqrt opperation - input logic CvtOp, // is cvt opperation - output logic Ms // normalized result sign + input logic Xs, // x sign + input logic Ys, // y sign + input logic CvtCs, // convert result sign + input logic FmaSs, // fma sum sign + input logic Sqrt, // sqrt oppertion? (when using divsqrt unit) + input logic FmaOp, // is fma opperation + input logic DivOp, // is divsqrt opperation + input logic CvtOp, // is cvt opperation + output logic Ms // normalized result sign ); - logic Qs; // divsqrt result sign + logic Qs; // divsqrt result sign - // calculate divsqrt sign - assign Qs = Xs^(Ys&~Sqrt); + // calculate divsqrt sign + assign Qs = Xs^(Ys&~Sqrt); - // Select sign for rounding calulation - assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); + // Select sign for rounding calulation + assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/postproc/shiftcorrection.sv b/pipelined/src/fpu/postproc/shiftcorrection.sv index b7003cf56..2fb0b5d7e 100644 --- a/pipelined/src/fpu/postproc/shiftcorrection.sv +++ b/pipelined/src/fpu/postproc/shiftcorrection.sv @@ -25,67 +25,69 @@ // either express or implied. See the License for the specific language governing permissions // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// + `include "wally-config.vh" module shiftcorrection( - input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction - // divsqrt - input logic DivOp, // is it a divsqrt opperation - input logic DivResSubnorm, // is the divsqrt result subnormal - input logic [`NE+1:0] DivQe, // the divsqrt result's exponent - input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed) - //fma - input logic FmaOp, // is it an fma opperation - input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results - input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection - input logic FmaSZero, - // output - output logic [`NE+1:0] FmaMe, // exponent of the normalized sum - output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction - output logic [`NE+1:0] Qe // corrected exponent for divider + input logic [`NORMSHIFTSZ-1:0] Shifted, // the shifted sum before LZA correction + // divsqrt + input logic DivOp, // is it a divsqrt opperation + input logic DivResSubnorm, // is the divsqrt result subnormal + input logic [`NE+1:0] DivQe, // the divsqrt result's exponent + input logic DivSubnormShiftPos, // is the subnorm divider shift amount positive (ie not underflowed) + //fma + input logic FmaOp, // is it an fma opperation + input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account Subnormal or zero results + input logic FmaPreResultSubnorm, // is the result subnormal - calculated before LZA corection + input logic FmaSZero, + // output + output logic [`NE+1:0] FmaMe, // exponent of the normalized sum + output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction + output logic [`NE+1:0] Qe // corrected exponent for divider ); - logic [3*`NF+3:0] CorrSumShifted; // the shifted sum after LZA correction - logic [`CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted - logic [`CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift - logic ResSubnorm; // is the result Subnormal - logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction - logic LeftShiftQm; // should the divsqrt result be shifted one to the left + logic [3*`NF+3:0] CorrSumShifted; // the shifted sum after LZA correction + logic [`CORRSHIFTSZ-1:0] CorrQm0, CorrQm1; // portions of Shifted to select for CorrQmShifted + logic [`CORRSHIFTSZ-1:0] CorrQmShifted; // the shifted divsqrt result after one bit shift + logic ResSubnorm; // is the result Subnormal + logic LZAPlus1; // add one or two to the sum's exponent due to LZA correction + logic LeftShiftQm; // should the divsqrt result be shifted one to the left - // LZA correction - assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1]; + // LZA correction + assign LZAPlus1 = Shifted[`NORMSHIFTSZ-1]; - // correct the shifting error caused by the LZA - // - the only possible mantissa for a plus two is all zeroes - // - a one has to propigate all the way through a sum. so we can leave the bottom statement alone - mux2 #(`NORMSHIFTSZ-2) lzacorrmux(Shifted[`NORMSHIFTSZ-3:0], Shifted[`NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted); + // correct the shifting error caused by the LZA + // - the only possible mantissa for a plus two is all zeroes + // - a one has to propigate all the way through a sum. so we can leave the bottom statement alone + mux2 #(`NORMSHIFTSZ-2) lzacorrmux(Shifted[`NORMSHIFTSZ-3:0], Shifted[`NORMSHIFTSZ-2:1], LZAPlus1, CorrSumShifted); - // correct the shifting of the divsqrt caused by producing a result in (2, .5] range - // condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm) - assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1)); - assign CorrQm0 = Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; - assign CorrQm1 = Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1]; - mux2 #(`CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted); + // correct the shifting of the divsqrt caused by producing a result in (2, .5] range + // condition: if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Subnorm) + assign LeftShiftQm = (LZAPlus1|(DivQe==1&~LZAPlus1)); + assign CorrQm0 = Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; + assign CorrQm1 = Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1]; + mux2 #(`CORRSHIFTSZ) divcorrmux(CorrQm0, CorrQm1, LeftShiftQm, CorrQmShifted); + + // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits + always_comb + if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}}; + else if (DivOp&~DivResSubnorm) Mf = CorrQmShifted; + else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; - // if the result of the divider was calculated to be subnormal, then the result was correctly normalized, so select the top shifted bits - always_comb - if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+4){1'b0}}}; - else if (DivOp&~DivResSubnorm) Mf = CorrQmShifted; - else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; - // Determine sum's exponent - // main exponent issues: - // - LZA was one too large - // - LZA was two too large - // - if the result was calulated to be subnorm but it's norm and the LZA was off by 1 - // - if the result was calulated to be subnorm but it's norm and the LZA was off by 2 - // if plus1 If plus2 kill if the result Zero or actually subnormal - // | | | - assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}}; - - // recalculate if the result is subnormal after LZA correction - assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1]; + // Determine sum's exponent + // main exponent issues: + // - LZA was one too large + // - LZA was two too large + // - if the result was calulated to be subnorm but it's norm and the LZA was off by 1 + // - if the result was calulated to be subnorm but it's norm and the LZA was off by 2 + // if plus1 If plus2 kill if the result Zero or actually subnormal + // | | | + assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1} +{{`NE+1{1'b0}}, FmaPreResultSubnorm}) & {`NE+2{~(FmaSZero|ResSubnorm)}}; + + // recalculate if the result is subnormal after LZA correction + assign ResSubnorm = FmaPreResultSubnorm&~Shifted[`NORMSHIFTSZ-2]&~Shifted[`NORMSHIFTSZ-1]; - // the quotent is in the range [.5,2) if there is no early termination - // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift - assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1}; + // the quotent is in the range [.5,2) if there is no early termination + // if the quotent < 1 and not Subnormal then subtract 1 to account for the normalization shift + assign Qe = (DivResSubnorm & DivSubnormShiftPos) ? '0 : DivQe - {(`NE+1)'(0), ~LZAPlus1}; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/postproc/specialcase.sv b/pipelined/src/fpu/postproc/specialcase.sv index f87c42f02..6b2985c07 100644 --- a/pipelined/src/fpu/postproc/specialcase.sv +++ b/pipelined/src/fpu/postproc/specialcase.sv @@ -29,291 +29,281 @@ `include "wally-config.vh" module specialcase( - input logic Xs, // X sign - input logic [`NF:0] Xm, Ym, Zm, // input significand's - input logic XNaN, YNaN, ZNaN, // are the inputs NaN - input logic [2:0] Frm, // rounding mode - input logic [`FMTBITS-1:0] OutFmt, // output format - input logic InfIn, // are any inputs infinity - input logic NaNIn, // are any input NaNs - input logic XInf, YInf, // are X or Y inifnity - input logic XZero, // is X zero - input logic Plus1, // do you add one for rounding - input logic Rs, // the result's sign - input logic Invalid, Overflow, // flags to choose the result - input logic [`NE-1:0] Re, // Result exponent - input logic [`NE+1:0] FullRe, // Result full exponent - input logic [`NF-1:0] Rf, // Result fraction - // fma - input logic FmaOp, // is it a fma opperation - // divsqrt - input logic DivOp, // is it a divsqrt opperation - input logic DivByZero, // divide by zero flag - // cvt - input logic CvtOp, // is it a conversion opperation - input logic IntZero, // is the integer input zero - input logic IntToFp, // is cvt int -> fp opperation - input logic Int64, // is the integer 64 bits - input logic Signed, // is the integer signed - input logic [`NE:0] CvtCe, // the calculated expoent for cvt - input logic IntInvalid, // integer invalid flag to choose the result - input logic CvtResUf, // does the convert result underflow - input logic [`XLEN+1:0] CvtNegRes, // the possibly negated of the integer result - // outputs - output logic [`FLEN-1:0] PostProcRes,// final result - output logic [`XLEN-1:0] FCvtIntRes // final integer result + input logic Xs, // X sign + input logic [`NF:0] Xm, Ym, Zm, // input significand's + input logic XNaN, YNaN, ZNaN, // are the inputs NaN + input logic [2:0] Frm, // rounding mode + input logic [`FMTBITS-1:0] OutFmt, // output format + input logic InfIn, // are any inputs infinity + input logic NaNIn, // are any input NaNs + input logic XInf, YInf, // are X or Y inifnity + input logic XZero, // is X zero + input logic Plus1, // do you add one for rounding + input logic Rs, // the result's sign + input logic Invalid, Overflow, // flags to choose the result + input logic [`NE-1:0] Re, // Result exponent + input logic [`NE+1:0] FullRe, // Result full exponent + input logic [`NF-1:0] Rf, // Result fraction + // fma + input logic FmaOp, // is it a fma opperation + // divsqrt + input logic DivOp, // is it a divsqrt opperation + input logic DivByZero, // divide by zero flag + // cvt + input logic CvtOp, // is it a conversion opperation + input logic IntZero, // is the integer input zero + input logic IntToFp, // is cvt int -> fp opperation + input logic Int64, // is the integer 64 bits + input logic Signed, // is the integer signed + input logic [`NE:0] CvtCe, // the calculated expoent for cvt + input logic IntInvalid, // integer invalid flag to choose the result + input logic CvtResUf, // does the convert result underflow + input logic [`XLEN+1:0] CvtNegRes, // the possibly negated of the integer result + // outputs + output logic [`FLEN-1:0] PostProcRes,// final result + output logic [`XLEN-1:0] FCvtIntRes // final integer result ); - logic [`FLEN-1:0] XNaNRes; // X is NaN result - logic [`FLEN-1:0] YNaNRes; // Y is NaN result - logic [`FLEN-1:0] ZNaNRes; // Z is NaN result - logic [`FLEN-1:0] InvalidRes; // Invalid result result - logic [`FLEN-1:0] UfRes; // underflowed result result - logic [`FLEN-1:0] OfRes; // overflowed result result - logic [`FLEN-1:0] NormRes; // normal result - logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output - logic OfResMax; // does the of result output maximum norm fp number - logic KillRes; // kill the result for underflow - logic SelOfRes; // should the overflow result be selected + + logic [`FLEN-1:0] XNaNRes; // X is NaN result + logic [`FLEN-1:0] YNaNRes; // Y is NaN result + logic [`FLEN-1:0] ZNaNRes; // Z is NaN result + logic [`FLEN-1:0] InvalidRes; // Invalid result result + logic [`FLEN-1:0] UfRes; // underflowed result result + logic [`FLEN-1:0] OfRes; // overflowed result result + logic [`FLEN-1:0] NormRes; // normal result + logic [`XLEN-1:0] OfIntRes; // the overflow result for integer output + logic OfResMax; // does the of result output maximum norm fp number + logic KillRes; // kill the result for underflow + logic SelOfRes; // should the overflow result be selected - // does the overflow result output the maximum normalized floating point number - // output infinity if the input is infinity - assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs)); + // does the overflow result output the maximum normalized floating point number + // output infinity if the input is infinity + assign OfResMax = (~InfIn|(IntToFp&CvtOp))&~DivByZero&((Frm[1:0]==2'b01) | (Frm[1:0]==2'b10&~Rs) | (Frm[1:0]==2'b11&Rs)); + // select correct outputs for special cases + if (`FPSIZES == 1) begin + //NaN res selection depending on standard + if(`IEEE754) begin + assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end - // select correct outputs for special cases - if (`FPSIZES == 1) begin - //NaN res selection depending on standard - if(`IEEE754) begin - assign XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - assign YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - assign ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - assign InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end + assign OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; + assign UfRes = {Rs, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = {Rs, Re, Rf}; - assign OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; - assign UfRes = {Rs, {`FLEN-2{1'b0}}, Plus1&Frm[1]&~(DivOp&YInf)}; - assign NormRes = {Rs, Re, Rf}; + end else if (`FPSIZES == 2) begin + if(`IEEE754) begin + assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; + assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; + assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end else begin + assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end - end else if (`FPSIZES == 2) begin - if(`IEEE754) begin - assign XNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - assign YNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; - assign ZNaNRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end else begin - assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end + always_comb + if(OutFmt) + if(OfResMax) OfRes = {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}}; + else OfRes = {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; + else + if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}}; + else OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)}; + assign UfRes = OutFmt ? {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + assign NormRes = OutFmt ? {Rs, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; - always_comb - if(OutFmt) - if(OfResMax) OfRes = {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}}; - else OfRes = {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; - else - if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}}; - else OfRes = {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)}; - assign UfRes = OutFmt ? {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - assign NormRes = OutFmt ? {Rs, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + end else if (`FPSIZES == 3) begin + always_comb + case (OutFmt) + `FMT: begin + if(`IEEE754) begin + XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; + UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Rs, Re, Rf}; + end + `FMT1: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; + YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; + ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; + InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; + end + OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)}; + UfRes = {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; + end + `FMT2: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; + YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]}; + ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]}; + InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2{1'b1}}, (`NF2)'(0)}; + UfRes = {{`FLEN-`LEN2{1'b1}}, Rs, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`LEN2{1'b1}}, Rs, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]}; + end + default: begin + if(`IEEE754) begin + XNaNRes = (`FLEN)'(0); + YNaNRes = (`FLEN)'(0); + ZNaNRes = (`FLEN)'(0); + InvalidRes = (`FLEN)'(0); + end else begin + InvalidRes = (`FLEN)'(0); + end + OfRes = (`FLEN)'(0); + UfRes = (`FLEN)'(0); + NormRes = (`FLEN)'(0); + end + endcase - end else if (`FPSIZES == 3) begin - always_comb - case (OutFmt) - `FMT: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {Rs, Re, Rf}; - end - `FMT1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF1]}; - YNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF1]}; - ZNaNRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF1]}; - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; - end - OfRes = OfResMax ? {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Rs, {`NE1{1'b1}}, (`NF1)'(0)}; - UfRes = {{`FLEN-`LEN1{1'b1}}, Rs, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN1{1'b1}}, Rs, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; - end - `FMT2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Xm[`NF-2:`NF-`NF2]}; - YNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Ym[`NF-2:`NF-`NF2]}; - ZNaNRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, Zm[`NF-2:`NF-`NF2]}; - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`LEN2{1'b1}}, 1'b0, {`NE2{1'b1}}, 1'b1, (`NF2-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, Rs, {`NE2{1'b1}}, (`NF2)'(0)}; - UfRes = {{`FLEN-`LEN2{1'b1}}, Rs, (`LEN2-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`LEN2{1'b1}}, Rs, Re[`NE2-1:0], Rf[`NF-1:`NF-`NF2]}; - end - default: begin - if(`IEEE754) begin - XNaNRes = (`FLEN)'(0); - YNaNRes = (`FLEN)'(0); - ZNaNRes = (`FLEN)'(0); - InvalidRes = (`FLEN)'(0); - end else begin - InvalidRes = (`FLEN)'(0); - end - OfRes = (`FLEN)'(0); - UfRes = (`FLEN)'(0); - NormRes = (`FLEN)'(0); - end - endcase + end else if (`FPSIZES == 4) begin + always_comb + case (OutFmt) + 2'h3: begin + if(`IEEE754) begin + XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; + YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; + ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end else begin + InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; + end + + OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; + UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {Rs, Re, Rf}; + end + 2'h1: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; + YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]}; + ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]}; + InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; + end + OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE{1'b1}}, (`D_NF)'(0)}; + UfRes = {{`FLEN-`D_LEN{1'b1}}, Rs, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`D_LEN{1'b1}}, Rs, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]}; + end + 2'h0: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; + YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]}; + ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]}; + InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE{1'b1}}, (`S_NF)'(0)}; + UfRes = {{`FLEN-`S_LEN{1'b1}}, Rs, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`S_LEN{1'b1}}, Rs, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]}; + end + 2'h2: begin + if(`IEEE754) begin + XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; + YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]}; + ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]}; + InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + end else begin + InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; + end + + OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE{1'b1}}, (`H_NF)'(0)}; + // zero is exact if dividing by infinity so don't add 1 + UfRes = {{`FLEN-`H_LEN{1'b1}}, Rs, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; + NormRes = {{`FLEN-`H_LEN{1'b1}}, Rs, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]}; + end + endcase + end - end else if (`FPSIZES == 4) begin - always_comb - case (OutFmt) - 2'h3: begin - if(`IEEE754) begin - XNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Xm[`NF-2:0]}; - YNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Ym[`NF-2:0]}; - ZNaNRes = {1'b0, {`NE{1'b1}}, 1'b1, Zm[`NF-2:0]}; - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end else begin - InvalidRes = {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}}; - end - - OfRes = OfResMax ? {Rs, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Rs, {`NE{1'b1}}, {`NF{1'b0}}}; - UfRes = {Rs, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {Rs, Re, Rf}; - end - 2'h1: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`D_NF]}; - YNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`D_NF]}; - ZNaNRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`D_NF]}; - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`D_LEN{1'b1}}, 1'b0, {`D_NE{1'b1}}, 1'b1, (`D_NF-1)'(0)}; - end - OfRes = OfResMax ? {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, Rs, {`D_NE{1'b1}}, (`D_NF)'(0)}; - UfRes = {{`FLEN-`D_LEN{1'b1}}, Rs, (`D_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`D_LEN{1'b1}}, Rs, Re[`D_NE-1:0], Rf[`NF-1:`NF-`D_NF]}; - end - 2'h0: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`S_NF]}; - YNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`S_NF]}; - ZNaNRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`S_NF]}; - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`S_LEN{1'b1}}, 1'b0, {`S_NE{1'b1}}, 1'b1, (`S_NF-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, Rs, {`S_NE{1'b1}}, (`S_NF)'(0)}; - UfRes = {{`FLEN-`S_LEN{1'b1}}, Rs, (`S_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`S_LEN{1'b1}}, Rs, Re[`S_NE-1:0], Rf[`NF-1:`NF-`S_NF]}; - end - 2'h2: begin - if(`IEEE754) begin - XNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Xm[`NF-2:`NF-`H_NF]}; - YNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Ym[`NF-2:`NF-`H_NF]}; - ZNaNRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, Zm[`NF-2:`NF-`H_NF]}; - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; - end else begin - InvalidRes = {{`FLEN-`H_LEN{1'b1}}, 1'b0, {`H_NE{1'b1}}, 1'b1, (`H_NF-1)'(0)}; - end - - OfRes = OfResMax ? {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, Rs, {`H_NE{1'b1}}, (`H_NF)'(0)}; - // zero is exact if dividing by infinity so don't add 1 - UfRes = {{`FLEN-`H_LEN{1'b1}}, Rs, (`H_LEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; - NormRes = {{`FLEN-`H_LEN{1'b1}}, Rs, Re[`H_NE-1:0], Rf[`NF-1:`NF-`H_NF]}; - end - endcase - end + // determine if you shoould kill the res - Cvt + // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 + // - dont set to zero if fp input is zero but not using the fp input + // - dont set to zero if int input is zero but not using the int input + assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1); + + // calculate if the overflow result should be selected + assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); + + // output infinity with result sign if divide by zero + if(`IEEE754) + always_comb + if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes; + else if(YNaN&~CvtOp) PostProcRes = YNaNRes; + else if(ZNaN&FmaOp) PostProcRes = ZNaNRes; + else if(Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; + else + always_comb + if(NaNIn|Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; - + /////////////////////////////////////////////////////////////////////////////////////// + // integer result selection + /////////////////////////////////////////////////////////////////////////////////////// - - - // determine if you shoould kill the res - Cvt - // - do so if the res underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 - // - dont set to zero if fp input is zero but not using the fp input - // - dont set to zero if int input is zero but not using the int input - assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResSubnorm & (Re!=1); - - // calculate if the overflow result should be selected - assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); - - - // output infinity with result sign if divide by zero - if(`IEEE754) - always_comb - if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes; - else if(YNaN&~CvtOp) PostProcRes = YNaNRes; - else if(ZNaN&FmaOp) PostProcRes = ZNaNRes; - else if(Invalid) PostProcRes = InvalidRes; - else if(SelOfRes) PostProcRes = OfRes; - else if(KillRes) PostProcRes = UfRes; - else PostProcRes = NormRes; + // select the overflow integer res + // - negitive infinity and out of range negitive input + // | int | long | + // signed | -2^31 | -2^63 | + // unsigned | 0 | 0 | + // + // - positive infinity and out of range positive input and NaNs + // | int | long | + // signed | 2^31-1 | 2^63-1 | + // unsigned | 2^32-1 | 2^64-1 | + // + // other: 32 bit unsinged res should be sign extended as if it were a signed number + always_comb + if(Signed) + if(Xs&~NaNIn) // signed negitive + if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}}; + else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}}; + else // signed positive + if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}}; + else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}}; else - always_comb - if(NaNIn|Invalid) PostProcRes = InvalidRes; - else if(SelOfRes) PostProcRes = OfRes; - else if(KillRes) PostProcRes = UfRes; - else PostProcRes = NormRes; + if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive + else OfIntRes = {`XLEN{1'b1}}; // unsigned positive - - - - - /////////////////////////////////////////////////////////////////////////////////////// - // integer result selection - /////////////////////////////////////////////////////////////////////////////////////// - - // select the overflow integer res - // - negitive infinity and out of range negitive input - // | int | long | - // signed | -2^31 | -2^63 | - // unsigned | 0 | 0 | - // - // - positive infinity and out of range positive input and NaNs - // | int | long | - // signed | 2^31-1 | 2^63-1 | - // unsigned | 2^32-1 | 2^64-1 | - // - // other: 32 bit unsinged res should be sign extended as if it were a signed number - always_comb - if(Signed) - if(Xs&~NaNIn) // signed negitive - if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}}; - else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}}; - else // signed positive - if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}}; - else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}}; - else - if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive - else OfIntRes = {`XLEN{1'b1}}; // unsigned positive - - - // select the integer output - // - if the input is invalid (out of bounds NaN or Inf) then output overflow res - // - if the input underflows - // - if rounding and signed opperation and negitive input, output -1 - // - otherwise output a rounded 0 - // - otherwise output the normal res (trmined and sign extended if nessisary) - always_comb - if(IntInvalid) FCvtIntRes = OfIntRes; - else if(CvtCe[`NE]) - if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}}; - else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1}; - else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0]; - else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; + // select the integer output + // - if the input is invalid (out of bounds NaN or Inf) then output overflow res + // - if the input underflows + // - if rounding and signed opperation and negitive input, output -1 + // - otherwise output a rounded 0 + // - otherwise output the normal res (trmined and sign extended if nessisary) + always_comb + if(IntInvalid) FCvtIntRes = OfIntRes; + else if(CvtCe[`NE]) + if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}}; + else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1}; + else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0]; + else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index 8e6ba2559..13addc2e3 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -28,35 +28,35 @@ `include "wally-config.vh" module unpack ( - input logic [`FLEN-1:0] X, Y, Z, // inputs from register file - input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half - input logic XEn, YEn, ZEn, // input enables - output logic Xs, Ys, Zs, // sign bits of XYZ - output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) - output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) - output logic XNaN, YNaN, ZNaN, // is XYZ a NaN - output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN - output logic XSubnorm, // is X subnormal - output logic XZero, YZero, ZZero, // is XYZ zero - output logic XInf, YInf, ZInf, // is XYZ infinity - output logic XExpMax // does X have the maximum exponent (NaN or Inf) + input logic [`FLEN-1:0] X, Y, Z, // inputs from register file + input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half + input logic XEn, YEn, ZEn, // input enables + output logic Xs, Ys, Zs, // sign bits of XYZ + output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) + output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) + output logic XNaN, YNaN, ZNaN, // is XYZ a NaN + output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN + output logic XSubnorm, // is X subnormal + output logic XZero, YZero, ZZero, // is XYZ zero + output logic XInf, YInf, ZInf, // is XYZ infinity + output logic XExpMax // does X have the maximum exponent (NaN or Inf) ); - - logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero - logic XFracZero, YFracZero, ZFracZero; // is the fraction zero - logic YExpMax, ZExpMax; // is the exponent all 1s - - unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), - .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero), - .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero)); - unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn), - .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero), - .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero)); + logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero + logic XFracZero, YFracZero, ZFracZero; // is the fraction zero + logic YExpMax, ZExpMax; // is the exponent all 1s + + unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), + .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero), + .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero)); - unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn), - .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero), - .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero)); - // is the input subnormal - assign XSubnorm = ~XExpNonZero & ~XFracZero; - endmodule \ No newline at end of file + unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn), + .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero), + .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero)); + + unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn), + .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero), + .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero)); + // is the input subnormal + assign XSubnorm = ~XExpNonZero & ~XFracZero; +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv index 9d2b0b6ab..34663bd8c 100644 --- a/pipelined/src/fpu/unpackinput.sv +++ b/pipelined/src/fpu/unpackinput.sv @@ -28,243 +28,243 @@ `include "wally-config.vh" module unpackinput ( - input logic [`FLEN-1:0] In, // inputs from register file - input logic En, // enable the input - input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half - output logic Sgn, // sign bits of XYZ - output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) - output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision) - output logic NaN, // is XYZ a NaN - output logic SNaN, // is XYZ a signaling NaN - output logic Zero, // is XYZ zero - output logic Inf, // is XYZ infinity - output logic ExpNonZero, // is the exponent not zero - output logic FracZero, // is the fraction zero - output logic ExpMax // does In have the maximum exponent (NaN or Inf) + input logic [`FLEN-1:0] In, // inputs from register file + input logic En, // enable the input + input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half + output logic Sgn, // sign bits of XYZ + output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) + output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision) + output logic NaN, // is XYZ a NaN + output logic SNaN, // is XYZ a signaling NaN + output logic Zero, // is XYZ zero + output logic Inf, // is XYZ infinity + output logic ExpNonZero, // is the exponent not zero + output logic FracZero, // is the fraction zero + output logic ExpMax // does In have the maximum exponent (NaN or Inf) ); - - logic [`NF-1:0] Frac; // Fraction of XYZ - logic BadNaNBox; // is the NaN boxing bad - - if (`FPSIZES == 1) begin // if there is only one floating point format supported - assign BadNaNBox = 0; - assign Sgn = In[`FLEN-1]; // sign bit - assign Frac = In[`NF-1:0]; // fraction (no assumed 1) - assign ExpNonZero = |In[`FLEN-2:`NF]; // is the exponent non-zero - assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. subnormal numbers have effective biased exponent of 1 - assign ExpMax = &In[`FLEN-2:`NF]; // is the exponent all 1's - - end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported - //***need better names for these constants - // largest format | smaller format - //---------------------------------- - // `FLEN | `LEN1 length of floating point number - // `NE | `NE1 length of exponent - // `NF | `NF1 length of fraction - // `BIAS | `BIAS1 exponent's bias value - // `FMT | `FMT1 precision's format value - Q=11 D=01 Sticky=00 H=10 - // Possible combinantions specified by spec: - // double and single - // single and half + logic [`NF-1:0] Frac; // Fraction of XYZ + logic BadNaNBox; // is the NaN boxing bad + + if (`FPSIZES == 1) begin // if there is only one floating point format supported + assign BadNaNBox = 0; + assign Sgn = In[`FLEN-1]; // sign bit + assign Frac = In[`NF-1:0]; // fraction (no assumed 1) + assign ExpNonZero = |In[`FLEN-2:`NF]; // is the exponent non-zero + assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; // exponent. subnormal numbers have effective biased exponent of 1 + assign ExpMax = &In[`FLEN-2:`NF]; // is the exponent all 1's + + end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported + //***need better names for these constants + // largest format | smaller format + //---------------------------------- + // `FLEN | `LEN1 length of floating point number + // `NE | `NE1 length of exponent + // `NF | `NF1 length of fraction + // `BIAS | `BIAS1 exponent's bias value + // `FMT | `FMT1 precision's format value - Q=11 D=01 Sticky=00 H=10 - // Not needed but can also handle: - // quad and double - // quad and single - // quad and half - // double and half + // Possible combinantions specified by spec: + // double and single + // single and half - assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing + // Not needed but can also handle: + // quad and double + // quad and single + // quad and half + // double and half - // choose sign bit depending on format - 1=larger precsion 0=smaller precision - assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1]; + assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing - // extract the fraction, add trailing zeroes to the mantissa if nessisary - assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; + // choose sign bit depending on format - 1=larger precsion 0=smaller precision + assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1]; - // is the exponent non-zero - assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; + // extract the fraction, add trailing zeroes to the mantissa if nessisary + assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; - // example double to single conversion: - // 1023 = 0011 1111 1111 - // 127 = 0000 0111 1111 (subtract this) - // 896 = 0011 1000 0000 - // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b - // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/Subnorm/inf/NaN values + // is the exponent non-zero + assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; - // extract the exponent, converting the smaller exponent into the larger precision if nessisary - // - if the original precision had a Subnormal number convert the exponent value 1 - assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; - - // is the exponent all 1's - assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; - + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/Subnorm/inf/NaN values - end else if (`FPSIZES == 3) begin // three floating point precsions supported + // extract the exponent, converting the smaller exponent into the larger precision if nessisary + // - if the original precision had a Subnormal number convert the exponent value 1 + assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; - //***need better names for these constants - // largest format | larger format | smallest format - //--------------------------------------------------- - // `FLEN | `LEN1 | `LEN2 length of floating point number - // `NE | `NE1 | `NE2 length of exponent - // `NF | `NF1 | `NF2 length of fraction - // `BIAS | `BIAS1 | `BIAS2 exponent's bias value - // `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 Sticky=00 H=10 + // is the exponent all 1's + assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; + - // Possible combinantions specified by spec: - // quad and double and single - // double and single and half + end else if (`FPSIZES == 3) begin // three floating point precsions supported - // Not needed but can also handle: - // quad and double and half - // quad and single and half + //***need better names for these constants + // largest format | larger format | smallest format + //--------------------------------------------------- + // `FLEN | `LEN1 | `LEN2 length of floating point number + // `NE | `NE1 | `NE2 length of exponent + // `NF | `NF1 | `NF2 length of fraction + // `BIAS | `BIAS1 | `BIAS2 exponent's bias value + // `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 Sticky=00 H=10 - // Check NaN boxing - always_comb - case (Fmt) - `FMT: BadNaNBox = 0; - `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1]; - `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2]; - default: BadNaNBox = 1'bx; - endcase + // Possible combinantions specified by spec: + // quad and double and single + // double and single and half - // extract the sign bit - always_comb - case (Fmt) - `FMT: Sgn = In[`FLEN-1]; - `FMT1: Sgn = In[`LEN1-1]; - `FMT2: Sgn = In[`LEN2-1]; - default: Sgn = 1'bx; - endcase + // Not needed but can also handle: + // quad and double and half + // quad and single and half - // extract the fraction - always_comb - case (Fmt) - `FMT: Frac = In[`NF-1:0]; - `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)}; - `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)}; - default: Frac = {`NF{1'bx}}; - endcase + // Check NaN boxing + always_comb + case (Fmt) + `FMT: BadNaNBox = 0; + `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1]; + `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2]; + default: BadNaNBox = 1'bx; + endcase - // is the exponent non-zero - always_comb - case (Fmt) - `FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double) - `FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single) - `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half) - default: ExpNonZero = 1'bx; - endcase - - // example double to single conversion: - // 1023 = 0011 1111 1111 - // 127 = 0000 0111 1111 (subtract this) - // 896 = 0011 1000 0000 - // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b - // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/Subnorm/inf/NaN values + // extract the sign bit + always_comb + case (Fmt) + `FMT: Sgn = In[`FLEN-1]; + `FMT1: Sgn = In[`LEN1-1]; + `FMT2: Sgn = In[`LEN2-1]; + default: Sgn = 1'bx; + endcase - // convert the larger precision's exponent to use the largest precision's bias - always_comb - case (Fmt) - `FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; - `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; - `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; - default: Exp = {`NE{1'bx}}; - endcase + // extract the fraction + always_comb + case (Fmt) + `FMT: Frac = In[`NF-1:0]; + `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)}; + `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)}; + default: Frac = {`NF{1'bx}}; + endcase - // is the exponent all 1's - always_comb - case (Fmt) - `FMT: ExpMax = &In[`FLEN-2:`NF]; - `FMT1: ExpMax = &In[`LEN1-2:`NF1]; - `FMT2: ExpMax = &In[`LEN2-2:`NF2]; - default: ExpMax = 1'bx; - endcase + // is the exponent non-zero + always_comb + case (Fmt) + `FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double) + `FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single) + `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half) + default: ExpNonZero = 1'bx; + endcase + + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/Subnorm/inf/NaN values - end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half - - // quad | double | single | half - //------------------------------------------------------------------- - // `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number - // `Q_NE | `D_NE | `S_NE | `H_NE length of exponent - // `Q_NF | `D_NF | `S_NF | `H_NF length of fraction - // `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value - // `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 Sticky=00 H=10 + // convert the larger precision's exponent to use the largest precision's bias + always_comb + case (Fmt) + `FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; + `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; + `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; + default: Exp = {`NE{1'bx}}; + endcase - // Check NaN boxing - always_comb - case (Fmt) - 2'b11: BadNaNBox = 0; - 2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN]; - 2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN]; - 2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN]; - endcase + // is the exponent all 1's + always_comb + case (Fmt) + `FMT: ExpMax = &In[`FLEN-2:`NF]; + `FMT1: ExpMax = &In[`LEN1-2:`NF1]; + `FMT2: ExpMax = &In[`LEN2-2:`NF2]; + default: ExpMax = 1'bx; + endcase - // extract sign bit - always_comb - case (Fmt) - 2'b11: Sgn = In[`Q_LEN-1]; - 2'b01: Sgn = In[`D_LEN-1]; - 2'b00: Sgn = In[`S_LEN-1]; - 2'b10: Sgn = In[`H_LEN-1]; - endcase - + end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half + + // quad | double | single | half + //------------------------------------------------------------------- + // `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number + // `Q_NE | `D_NE | `S_NE | `H_NE length of exponent + // `Q_NF | `D_NF | `S_NF | `H_NF length of fraction + // `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value + // `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 Sticky=00 H=10 - // extract the fraction - always_comb - case (Fmt) - 2'b11: Frac = In[`Q_NF-1:0]; - 2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; - 2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; - 2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; - endcase + // Check NaN boxing + always_comb + case (Fmt) + 2'b11: BadNaNBox = 0; + 2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN]; + 2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN]; + 2'b10: BadNaNBox = ~&In[`Q_LEN-1:`H_LEN]; + endcase - // is the exponent non-zero - always_comb - case (Fmt) - 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF]; - 2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF]; - 2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; - 2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF]; - endcase + // extract sign bit + always_comb + case (Fmt) + 2'b11: Sgn = In[`Q_LEN-1]; + 2'b01: Sgn = In[`D_LEN-1]; + 2'b00: Sgn = In[`S_LEN-1]; + 2'b10: Sgn = In[`H_LEN-1]; + endcase + + + // extract the fraction + always_comb + case (Fmt) + 2'b11: Frac = In[`Q_NF-1:0]; + 2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; + 2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; + 2'b10: Frac = {In[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; + endcase + + // is the exponent non-zero + always_comb + case (Fmt) + 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF]; + 2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF]; + 2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; + 2'b10: ExpNonZero = |In[`H_LEN-2:`H_NF]; + endcase - // example double to single conversion: - // 1023 = 0011 1111 1111 - // 127 = 0000 0111 1111 (subtract this) - // 896 = 0011 1000 0000 - // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b - // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/Subnorm/inf/NaN values - - // convert the double precsion exponent into quad precsion - // 1 is added to the exponent if the input is zero or subnormal - always_comb - case (Fmt) - 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero}; - 2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero}; - 2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero}; - 2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero}; - endcase + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/Subnorm/inf/NaN values + + // convert the double precsion exponent into quad precsion + // 1 is added to the exponent if the input is zero or subnormal + always_comb + case (Fmt) + 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero}; + 2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero}; + 2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero}; + 2'b10: Exp = {In[`H_LEN-2], {`Q_NE-`H_NE{~In[`H_LEN-2]}}, In[`H_LEN-3:`H_NF+1], In[`H_NF]|~ExpNonZero}; + endcase - // is the exponent all 1's - always_comb - case (Fmt) - 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF]; - 2'b01: ExpMax = &In[`D_LEN-2:`D_NF]; - 2'b00: ExpMax = &In[`S_LEN-2:`S_NF]; - 2'b10: ExpMax = &In[`H_LEN-2:`H_NF]; - endcase + // is the exponent all 1's + always_comb + case (Fmt) + 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF]; + 2'b01: ExpMax = &In[`D_LEN-2:`D_NF]; + 2'b00: ExpMax = &In[`S_LEN-2:`S_NF]; + 2'b10: ExpMax = &In[`H_LEN-2:`H_NF]; + endcase - end + end - // Output logic - assign FracZero = ~|Frac; // is the fraction zero? - assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if Subnormal or zero) to create the significand - assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN? - assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN? - assign Inf = ExpMax & FracZero &En; // is the input infinity? - assign Zero = ~ExpNonZero & FracZero; // is the input zero? + // Output logic + assign FracZero = ~|Frac; // is the fraction zero? + assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if Subnormal or zero) to create the significand + assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN? + assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN? + assign Inf = ExpMax & FracZero &En; // is the input infinity? + assign Zero = ~ExpNonZero & FracZero; // is the input zero? endmodule \ No newline at end of file diff --git a/pipelined/src/lsu/atomic.sv b/pipelined/src/lsu/atomic.sv index 62a4e945e..64c1dc4bf 100644 --- a/pipelined/src/lsu/atomic.sv +++ b/pipelined/src/lsu/atomic.sv @@ -39,16 +39,18 @@ module atomic ( input logic IgnoreRequest, output logic [`XLEN-1:0] IMAWriteDataM, output logic SquashSCW, - output logic [1:0] LSURWM); + output logic [1:0] LSURWM +); - logic [`XLEN-1:0] AMOResult; - logic MemReadM; + logic [`XLEN-1:0] AMOResult; + logic MemReadM; amoalu amoalu(.srca(ReadDataM), .srcb(IHWriteDataM), .funct(LSUFunct7M), .width(LSUFunct3M[1:0]), .result(AMOResult)); + mux2 #(`XLEN) wdmux(IHWriteDataM, AMOResult, LSUAtomicM[1], IMAWriteDataM); assign MemReadM = PreLSURWM[1] & ~IgnoreRequest; - lrsc lrsc(.clk, .reset, .StallW, .MemReadM, .PreLSURWM, .LSUAtomicM, .PAdrM, - .SquashSCW, .LSURWM); + + lrsc lrsc(.clk, .reset, .StallW, .MemReadM, .PreLSURWM, .LSUAtomicM, .PAdrM, .SquashSCW, .LSURWM); endmodule diff --git a/pipelined/src/lsu/dtim.sv b/pipelined/src/lsu/dtim.sv index 3ad107ae0..d4fe7a081 100644 --- a/pipelined/src/lsu/dtim.sv +++ b/pipelined/src/lsu/dtim.sv @@ -35,10 +35,10 @@ module dtim( output logic [`LLEN-1:0] ReadDataWordM ); - logic we; + logic we; localparam ADDR_WDITH = $clog2(`DTIM_RANGE/8); - localparam OFFSET = $clog2(`LLEN/8); + localparam OFFSET = $clog2(`LLEN/8); assign we = MemRWM[0] & ~FlushW; // have to ignore write if Trap. diff --git a/pipelined/src/lsu/endianswap.sv b/pipelined/src/lsu/endianswap.sv index b4978e7f7..e1fa19637 100644 --- a/pipelined/src/lsu/endianswap.sv +++ b/pipelined/src/lsu/endianswap.sv @@ -29,49 +29,50 @@ `include "wally-config.vh" module endianswap #(parameter LEN=`XLEN) ( - input logic BigEndianM, - input logic [LEN-1:0] a, - output logic [LEN-1:0] y); + input logic BigEndianM, + input logic [LEN-1:0] a, + output logic [LEN-1:0] y +); if(LEN == 128) begin always_comb - if (BigEndianM) begin // swap endianness - y[127:120] = a[7:0]; - y[119:112] = a[15:8]; - y[111:104] = a[23:16]; - y[103:96] = a[31:24]; - y[95:88] = a[39:32]; - y[87:80] = a[47:40]; - y[79:72] = a[55:48]; - y[71:64] = a[63:56]; - y[63:56] = a[71:64]; - y[55:48] = a[79:72]; - y[47:40] = a[87:80]; - y[39:32] = a[95:88]; - y[31:24] = a[103:96]; - y[23:16] = a[111:104]; - y[15:8] = a[119:112]; - y[7:0] = a[127:120]; - end else y = a; + if (BigEndianM) begin // swap endianness + y[127:120] = a[7:0]; + y[119:112] = a[15:8]; + y[111:104] = a[23:16]; + y[103:96] = a[31:24]; + y[95:88] = a[39:32]; + y[87:80] = a[47:40]; + y[79:72] = a[55:48]; + y[71:64] = a[63:56]; + y[63:56] = a[71:64]; + y[55:48] = a[79:72]; + y[47:40] = a[87:80]; + y[39:32] = a[95:88]; + y[31:24] = a[103:96]; + y[23:16] = a[111:104]; + y[15:8] = a[119:112]; + y[7:0] = a[127:120]; + end else y = a; end else if(LEN == 64) begin always_comb - if (BigEndianM) begin // swap endianness - y[63:56] = a[7:0]; - y[55:48] = a[15:8]; - y[47:40] = a[23:16]; - y[39:32] = a[31:24]; - y[31:24] = a[39:32]; - y[23:16] = a[47:40]; - y[15:8] = a[55:48]; - y[7:0] = a[63:56]; - end else y = a; + if (BigEndianM) begin // swap endianness + y[63:56] = a[7:0]; + y[55:48] = a[15:8]; + y[47:40] = a[23:16]; + y[39:32] = a[31:24]; + y[31:24] = a[39:32]; + y[23:16] = a[47:40]; + y[15:8] = a[55:48]; + y[7:0] = a[63:56]; + end else y = a; end else begin always_comb if (BigEndianM) begin - y[31:24] = a[7:0]; - y[23:16] = a[15:8]; - y[15:8] = a[23:16]; - y[7:0] = a[31:24]; + y[31:24] = a[7:0]; + y[23:16] = a[15:8]; + y[15:8] = a[23:16]; + y[7:0] = a[31:24]; end else y = a; end endmodule diff --git a/pipelined/src/lsu/lrsc.sv b/pipelined/src/lsu/lrsc.sv index b14261161..80b584dce 100644 --- a/pipelined/src/lsu/lrsc.sv +++ b/pipelined/src/lsu/lrsc.sv @@ -27,22 +27,22 @@ `include "wally-config.vh" -module lrsc - ( - input logic clk, reset, - input logic StallW, - input logic MemReadM, - input logic [1:0] PreLSURWM, - output logic [1:0] LSURWM, - input logic [1:0] LSUAtomicM, - input logic [`PA_BITS-1:0] PAdrM, // from mmu to dcache - output logic SquashSCW +module lrsc( + input logic clk, reset, + input logic StallW, + input logic MemReadM, + input logic [1:0] PreLSURWM, + output logic [1:0] LSURWM, + input logic [1:0] LSUAtomicM, + input logic [`PA_BITS-1:0] PAdrM, // from mmu to dcache + output logic SquashSCW ); + // Handle atomic load reserved / store conditional - logic [`PA_BITS-1:2] ReservationPAdrW; - logic ReservationValidM, ReservationValidW; - logic lrM, scM, WriteAdrMatchM; - logic SquashSCM; + logic [`PA_BITS-1:2] ReservationPAdrW; + logic ReservationValidM, ReservationValidW; + logic lrM, scM, WriteAdrMatchM; + logic SquashSCM; assign lrM = MemReadM & LSUAtomicM[0]; assign scM = PreLSURWM[0] & LSUAtomicM[0]; @@ -55,6 +55,7 @@ module lrsc else if (scM) ReservationValidM = 0; // clear valid on store to same address or any sc else ReservationValidM = ReservationValidW; // otherwise don't change valid end + flopenr #(`PA_BITS-2) resadrreg(clk, reset, lrM & ~StallW, PAdrM[`PA_BITS-1:2], ReservationPAdrW); // could drop clear on this one but not valid flopenr #(1) resvldreg(clk, reset, ~StallW, ReservationValidM, ReservationValidW); flopenr #(1) squashreg(clk, reset, ~StallW, SquashSCM, SquashSCW); diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index d6278176d..e69fbcb8b 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -32,63 +32,63 @@ `include "wally-config.vh" module lsu ( - input logic clk, reset, - input logic StallM, FlushM, StallW, FlushW, - output logic LSUStallM, // LSU stalls pipeline during a multicycle operation. - // connected to cpu (controls) - input logic [1:0] MemRWM, // Read/Write control - input logic [2:0] Funct3M, // Size of memory operation - input logic [6:0] Funct7M, // Atomic memory operation function - input logic [1:0] AtomicM, // Atomic memory operation - input logic FlushDCacheM, // Flush D cache to next level of memory - output logic CommittedM, // Delay interrupts while memory operation in flight - output logic SquashSCW, // Store conditional failed disable write to GPR - output logic DCacheMiss, // D cache miss for performance counters - output logic DCacheAccess, // D cache memory access for performance counters - // address and write data - input logic [`XLEN-1:0] IEUAdrE, // Execution stage memory address - (* mark_debug = "true" *) output logic [`XLEN-1:0] IEUAdrM, // Memory stage memory address - (* mark_debug = "true" *) input logic [`XLEN-1:0] WriteDataM, // Write data from IEU - output logic [`LLEN-1:0] ReadDataW, // Read data to IEU or FPU - // cpu privilege - input logic [1:0] PrivilegeModeW, // Current privilege mode - input logic BigEndianM, // Swap byte order to big endian - input logic sfencevmaM, // Virtual memory address fence - // fpu - input logic [`FLEN-1:0] FWriteDataM, // Write data from FPU - input logic FpLoadStoreM, // Selects FPU as store for write data - // faults - output logic LoadPageFaultM, StoreAmoPageFaultM, // Page fault exceptions - output logic LoadMisalignedFaultM, // Load address misaligned fault - output logic LoadAccessFaultM, // Load access fault (PMA) - output logic HPTWInstrAccessFaultM, // HPTW generated access fault during instruction fetch - // cpu hazard unit (trap) - output logic StoreAmoMisalignedFaultM, // Store or AMO address misaligned fault - output logic StoreAmoAccessFaultM, // Store or AMO access fault - // connect to ahb - (* mark_debug = "true" *) output logic [`PA_BITS-1:0] LSUHADDR, // Bus address from LSU to EBU - (* mark_debug = "true" *) input logic [`XLEN-1:0] HRDATA, // Bus read data from LSU to EBU - (* mark_debug = "true" *) output logic [`XLEN-1:0] LSUHWDATA, // Bus write data from LSU to EBU - (* mark_debug = "true" *) input logic LSUHREADY, // Bus ready from LSU to EBU - (* mark_debug = "true" *) output logic LSUHWRITE, // Bus write operation from LSU to EBU - (* mark_debug = "true" *) output logic [2:0] LSUHSIZE, // Bus operation size from LSU to EBU - (* mark_debug = "true" *) output logic [2:0] LSUHBURST, // Bus burst from LSU to EBU - (* mark_debug = "true" *) output logic [1:0] LSUHTRANS, // Bus transaction type from LSU to EBU - (* mark_debug = "true" *) output logic [`XLEN/8-1:0] LSUHWSTRB, // Bus byte write enables from LSU to EBU - // page table walker - input logic [`XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR - input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege - input logic [1:0] STATUS_MPP, // Machine previous privilege mode - input logic [`XLEN-1:0] PCF, // Fetch PC - input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk - input logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits - output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB - output logic [1:0] PageType, // Type of page table entry to write to ITLB - output logic ITLBWriteF, // Write PTE to ITLB - output logic SelHPTW, // During a HPTW walk the effective privilege mode becomes S_MODE - input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // PMP address from privileged unit - ); + input logic clk, reset, + input logic StallM, FlushM, StallW, FlushW, + output logic LSUStallM, // LSU stalls pipeline during a multicycle operation. + // connected to cpu (controls) + input logic [1:0] MemRWM, // Read/Write control + input logic [2:0] Funct3M, // Size of memory operation + input logic [6:0] Funct7M, // Atomic memory operation function + input logic [1:0] AtomicM, // Atomic memory operation + input logic FlushDCacheM, // Flush D cache to next level of memory + output logic CommittedM, // Delay interrupts while memory operation in flight + output logic SquashSCW, // Store conditional failed disable write to GPR + output logic DCacheMiss, // D cache miss for performance counters + output logic DCacheAccess, // D cache memory access for performance counters + // address and write data + input logic [`XLEN-1:0] IEUAdrE, // Execution stage memory address + (* mark_debug = "true" *) output logic [`XLEN-1:0] IEUAdrM, // Memory stage memory address + (* mark_debug = "true" *) input logic [`XLEN-1:0] WriteDataM, // Write data from IEU + output logic [`LLEN-1:0] ReadDataW, // Read data to IEU or FPU + // cpu privilege + input logic [1:0] PrivilegeModeW, // Current privilege mode + input logic BigEndianM, // Swap byte order to big endian + input logic sfencevmaM, // Virtual memory address fence + // fpu + input logic [`FLEN-1:0] FWriteDataM, // Write data from FPU + input logic FpLoadStoreM, // Selects FPU as store for write data + // faults + output logic LoadPageFaultM, StoreAmoPageFaultM, // Page fault exceptions + output logic LoadMisalignedFaultM, // Load address misaligned fault + output logic LoadAccessFaultM, // Load access fault (PMA) + output logic HPTWInstrAccessFaultM, // HPTW generated access fault during instruction fetch + // cpu hazard unit (trap) + output logic StoreAmoMisalignedFaultM, // Store or AMO address misaligned fault + output logic StoreAmoAccessFaultM, // Store or AMO access fault + // connect to ahb + (* mark_debug = "true" *) output logic [`PA_BITS-1:0] LSUHADDR, // Bus address from LSU to EBU + (* mark_debug = "true" *) input logic [`XLEN-1:0] HRDATA, // Bus read data from LSU to EBU + (* mark_debug = "true" *) output logic [`XLEN-1:0] LSUHWDATA, // Bus write data from LSU to EBU + (* mark_debug = "true" *) input logic LSUHREADY, // Bus ready from LSU to EBU + (* mark_debug = "true" *) output logic LSUHWRITE, // Bus write operation from LSU to EBU + (* mark_debug = "true" *) output logic [2:0] LSUHSIZE, // Bus operation size from LSU to EBU + (* mark_debug = "true" *) output logic [2:0] LSUHBURST, // Bus burst from LSU to EBU + (* mark_debug = "true" *) output logic [1:0] LSUHTRANS, // Bus transaction type from LSU to EBU + (* mark_debug = "true" *) output logic [`XLEN/8-1:0] LSUHWSTRB, // Bus byte write enables from LSU to EBU + // page table walker + input logic [`XLEN-1:0] SATP_REGW, // SATP (supervisor address translation and protection) CSR + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, // STATUS CSR bits: make executable readable, supervisor user memory, machine privilege + input logic [1:0] STATUS_MPP, // Machine previous privilege mode + input logic [`XLEN-1:0] PCF, // Fetch PC + input logic ITLBMissF, // ITLB miss causes HPTW (hardware pagetable walker) walk + input logic InstrDAPageFaultF, // ITLB hit needs to update dirty or access bits + output logic [`XLEN-1:0] PTE, // Page table entry write to ITLB + output logic [1:0] PageType, // Type of page table entry to write to ITLB + output logic ITLBWriteF, // Write PTE to ITLB + output logic SelHPTW, // During a HPTW walk the effective privilege mode becomes S_MODE + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], // PMP configuration from privileged unit + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // PMP address from privileged unit +); logic [`XLEN+1:0] IEUAdrExtM; // Memory stage address zero-extended to PA_BITS or XLEN whichever is longer logic [`XLEN+1:0] IEUAdrExtE; // Execution stage address zero-extended to PA_BITS or XLEN whichever is longer diff --git a/pipelined/src/lsu/subwordread.sv b/pipelined/src/lsu/subwordread.sv index 0a31408ad..ade4d3d43 100644 --- a/pipelined/src/lsu/subwordread.sv +++ b/pipelined/src/lsu/subwordread.sv @@ -34,7 +34,7 @@ module subwordread input logic FpLoadStoreM, input logic BigEndianM, output logic [`LLEN-1:0] ReadDataM - ); +); logic [7:0] ByteM; logic [15:0] HalfwordM; diff --git a/pipelined/src/lsu/subwordwrite.sv b/pipelined/src/lsu/subwordwrite.sv index 85b6bc50b..ae18b8402 100644 --- a/pipelined/src/lsu/subwordwrite.sv +++ b/pipelined/src/lsu/subwordwrite.sv @@ -29,7 +29,8 @@ module subwordwrite ( input logic [2:0] LSUFunct3M, input logic [`LLEN-1:0] IMAFWriteDataM, - output logic [`LLEN-1:0] LittleEndianWriteDataM); + output logic [`LLEN-1:0] LittleEndianWriteDataM +); // Replicate data for subword writes if (`LLEN == 128) begin:sww diff --git a/pipelined/src/lsu/swbytemask.sv b/pipelined/src/lsu/swbytemask.sv index 3cbde9544..6167e4ea2 100644 --- a/pipelined/src/lsu/swbytemask.sv +++ b/pipelined/src/lsu/swbytemask.sv @@ -29,7 +29,8 @@ module swbytemask #(parameter WORDLEN = `XLEN)( input logic [2:0] Size, input logic [$clog2(WORDLEN/8)-1:0] Adr, - output logic [WORDLEN/8-1:0] ByteMask); + output logic [WORDLEN/8-1:0] ByteMask +); assign ByteMask = ((2**(2**Size))-1) << Adr; diff --git a/pipelined/src/uncore/uartPC16550D.sv b/pipelined/src/uncore/uartPC16550D.sv index d629368f0..3666a4493 100644 --- a/pipelined/src/uncore/uartPC16550D.sv +++ b/pipelined/src/uncore/uartPC16550D.sv @@ -11,7 +11,7 @@ // // Compatible with most of PC16550D with the following known exceptions: // Generates 2 rather than 1.5 stop bits when 5-bit word length is slected and LCR[2] = 1 -// Timeout not ye implemented*** +// Timeout not yet implemented*** // // Documentation: RISC-V System on Chip Design Chapter 15 // @@ -141,15 +141,15 @@ module uartPC16550D( MCR <= #1 5'b0; LSR <= #1 8'b01100000; MSR <= #1 4'b0; - DLL <= #1 8'd1; // this cannot be zero with DLM also zer0. - DLM <= #1 8'b0; + DLL <= #1 8'd1; // this cannot be zero with DLM also zer0. + DLM <= #1 8'b0; SCR <= #1 8'b0; // not strictly necessary to reset end else begin if (~MEMWb) begin /* verilator lint_off CASEINCOMPLETE */ case (A) - 3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section - 3'b001: if (DLAB) DLM <= #1 Din; else IER <= #1 Din[3:0]; + 3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section + 3'b001: if (DLAB) DLM <= #1 Din; else IER <= #1 Din[3:0]; 3'b010: FCR <= #1 {Din[7:6], 2'b0, Din[3], 2'b0, Din[0]}; // Write only FIFO Control Register; 4:5 reserved and 2:1 self-clearing 3'b011: LCR <= #1 Din; 3'b100: MCR <= #1 Din[4:0];