From efb972c6d30c9037184f0902bf0437cc7fe08857 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 27 May 2022 08:23:46 -0700 Subject: [PATCH 01/19] Removed guard bit from fma rounding --- pipelined/src/fpu/fma.sv | 138 +++++++++++++-------------------------- 1 file changed, 46 insertions(+), 92 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 97735c5f7..30b352f0c 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -465,7 +465,7 @@ module fma2( logic ResultSgn, ResultSgnTmp; // Result sign logic [`NE+1:0] SumExp; // exponent of the normalized sum logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow - logic [`NF+2:0] NormSum; // normalized sum + logic [`NF+1:0] NormSum; // normalized sum logic NormSumSticky; // sticky bit calulated from the normalized sum logic SumZero; // is the sum zero logic ResultDenorm; // is the result denormalized @@ -582,7 +582,7 @@ module normalize( input logic KillProdM, // is the product set to zero input logic ZOrigDenormM, input logic AddendStickyM, // the sticky bit caclulated from the aligned addend - output logic [`NF+2:0] NormSum, // normalized sum + output logic [`NF+1:0] NormSum, // normalized sum output logic SumZero, // is the sum zero output logic NormSumSticky, UfSticky, // sticky bits output logic [`NE+1:0] SumExp, // exponent of the normalized sum @@ -707,27 +707,27 @@ module normalize( assign LZAPlus2 = SumShifted[3*`NF+8]; // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; - assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3]; + assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4]; // Calculate the sticky bit if (`FPSIZES == 1) begin - assign NormSumSticky = |CorrSumShifted[2*`NF+2:0]; + assign NormSumSticky = |CorrSumShifted[2*`NF+3:0]; end else if (`FPSIZES == 2) begin // 3*NF+5 - NF1 - 3 - assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | - (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&~FmtM); + assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | + (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM); end else if (`FPSIZES == 3) begin - assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | - (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&((FmtM==`FMT1)|(FmtM==`FMT2))) | - (|CorrSumShifted[3*`NF+2-`NF2:3*`NF+3-`NF1]&(FmtM==`FMT2)); + assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | + (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) | + (|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2)); end else if (`FPSIZES == 4) begin - assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | - (|CorrSumShifted[3*`NF+2-`D_NF:2*`NF+3]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | - (|CorrSumShifted[3*`NF+2-`S_NF:3*`NF+3-`D_NF]&((FmtM==0)|(FmtM==2))) | - (|CorrSumShifted[3*`NF+2-`H_NF:3*`NF+3-`S_NF]&(FmtM==2)); + assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | + (|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | + (|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) | + (|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2)); end @@ -745,7 +745,7 @@ module fmaround( input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single input logic [2:0] FrmM, // rounding mode input logic UfSticky, // sticky bit for underlow calculation - input logic [`NF+2:0] NormSum, // normalized sum + input logic [`NF+1:0] NormSum, // normalized sum input logic AddendStickyM, // addend's sticky bit input logic NormSumSticky, // normalized sum's sticky bit input logic ZZeroM, // is Z zero @@ -799,83 +799,53 @@ module fmaround( if (`FPSIZES == 1) begin // determine guard, round, and least significant bit of the result - assign Guard = NormSum[2]; assign Round = NormSum[1]; - assign LSBNormSum = NormSum[3]; + assign LSBNormSum = NormSum[2]; // used to determine underflow flag - assign UfGuard = NormSum[1]; assign UfRound = NormSum[0]; - assign UfLSBNormSum = NormSum[2]; - - // determine sticky - assign Sticky = UfSticky | NormSum[0]; end else if (`FPSIZES == 2) begin // \/-------------NF---------------, - // | NF1 | 3 | | + // | NF1 | 2 | | // '-------NF1------^ // determine guard, round, and least significant bit of the result - assign Guard = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2]; assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1]; - assign LSBNormSum = FmtM ? NormSum[3] : NormSum[`NF-`NF1+3]; + assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2]; // used to determine underflow flag - assign UfGuard = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1]; assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1]; - assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2]; - // determine sticky - assign Sticky = UfSticky | (FmtM ? NormSum[0] : NormSum[`NF-`NF1]); end else if (`FPSIZES == 3) begin always_comb begin case (FmtM) `FMT: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[2]; Round = NormSum[1]; - LSBNormSum = NormSum[3]; + LSBNormSum = NormSum[2]; // used to determine underflow flag - UfGuard = NormSum[1]; UfRound = NormSum[0]; - UfLSBNormSum = NormSum[2]; - // determine sticky - Sticky = UfSticky | NormSum[0]; end `FMT1: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`NF1+2]; Round = NormSum[`NF-`NF1+1]; - LSBNormSum = NormSum[`NF-`NF1+3]; + LSBNormSum = NormSum[`NF-`NF1+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`NF1+1]; UfRound = NormSum[`NF-`NF1]; - UfLSBNormSum = NormSum[`NF-`NF1+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`NF1]; end `FMT2: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`NF2+2]; Round = NormSum[`NF-`NF2+1]; - LSBNormSum = NormSum[`NF-`NF2+3]; + LSBNormSum = NormSum[`NF-`NF2+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`NF2+1]; UfRound = NormSum[`NF-`NF2]; - UfLSBNormSum = NormSum[`NF-`NF2+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`NF2]; end default: begin - Guard = 1'bx; Round = 1'bx; LSBNormSum = 1'bx; - UfGuard = 1'bx; UfRound = 1'bx; - UfLSBNormSum = 1'bx; - Sticky = 1'bx; end endcase end @@ -885,56 +855,40 @@ module fmaround( case (FmtM) 2'h3: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[2]; Round = NormSum[1]; - LSBNormSum = NormSum[3]; + LSBNormSum = NormSum[2]; // used to determine underflow flag - UfGuard = NormSum[1]; UfRound = NormSum[0]; - UfLSBNormSum = NormSum[2]; - // determine sticky - Sticky = UfSticky | NormSum[0]; end 2'h1: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`D_NF+2]; Round = NormSum[`NF-`D_NF+1]; - LSBNormSum = NormSum[`NF-`D_NF+3]; + LSBNormSum = NormSum[`NF-`D_NF+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`D_NF+1]; UfRound = NormSum[`NF-`D_NF]; - UfLSBNormSum = NormSum[`NF-`D_NF+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`D_NF]; end 2'h0: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`S_NF+2]; Round = NormSum[`NF-`S_NF+1]; - LSBNormSum = NormSum[`NF-`S_NF+3]; + LSBNormSum = NormSum[`NF-`S_NF+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`S_NF+1]; UfRound = NormSum[`NF-`S_NF]; - UfLSBNormSum = NormSum[`NF-`S_NF+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`S_NF]; end 2'h2: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`H_NF+2]; Round = NormSum[`NF-`H_NF+1]; - LSBNormSum = NormSum[`NF-`H_NF+3]; + LSBNormSum = NormSum[`NF-`H_NF+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`H_NF+1]; UfRound = NormSum[`NF-`H_NF]; - UfLSBNormSum = NormSum[`NF-`H_NF+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`H_NF]; end endcase end end + // used to determine underflow flag + assign UfLSBNormSum = Round; + // determine sticky + assign Sticky = UfSticky | UfRound; // Deterimine if a small number was supposed to be subtrated @@ -944,28 +898,28 @@ module fmaround( always_comb begin // Determine if you add 1 case (FrmM) - 3'b000: CalcPlus1 = Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&LSBNormSum&~SubBySmallNum));//round to nearest even + 3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round down - 3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round up - 3'b100: CalcPlus1 = (Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&~SubBySmallNum)));//round to nearest max magnitude + 3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down + 3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up + 3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) case (FrmM) - 3'b000: UfCalcPlus1 = UfGuard & (UfRound | (UfSticky&UfRound|~UfSubBySmallNum) | (~Sticky&UfLSBNormSum&~UfSubBySmallNum));//round to nearest even + 3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round down - 3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round up - 3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (UfSticky&~(~UfRound&UfSubBySmallNum)) | (~Sticky&~UfSubBySmallNum)));//round to nearest max magnitude + 3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down + 3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up + 3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase // Determine if you subtract 1 case (FrmM) 3'b000: CalcMinus1 = 0;//round to nearest even - 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero - 3'b010: CalcMinus1 = ~ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round down - 3'b011: CalcMinus1 = ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round up + 3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero + 3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down + 3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up 3'b100: CalcMinus1 = 0;//round to nearest max magnitude default: CalcMinus1 = 1'bx; endcase @@ -973,9 +927,9 @@ module fmaround( end // If an answer is exact don't round - assign Plus1 = CalcPlus1 & (Sticky | Guard | Round); - assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);//UfRound is part of sticky - assign Minus1 = CalcMinus1 & (Sticky | Guard | Round); + assign Plus1 = CalcPlus1 & (Sticky | Round); + assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky + assign Minus1 = CalcMinus1 & (Sticky | Round); // Compute rounded result if (`FPSIZES == 1) begin @@ -1011,7 +965,7 @@ module fmaround( end - assign NormSumTruncated = NormSum[`NF+2:3]; + assign NormSumTruncated = NormSum[`NF+1:2]; assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; assign ResultExp = FullResultExp[`NE-1:0]; @@ -1083,12 +1037,12 @@ module fmaflags( // Set Underflow flag if the number is too small to be represented in normal numbers // - Don't set the underflow flag if the result is exact - assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // exp is negitive result is denorm exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal - assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed result isn't outputed - assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // Combine flags // - FMA can't set the Divide by zero flag From b288f812ab282848cbc5c4933e656b825b9299ea Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 27 May 2022 09:04:02 -0700 Subject: [PATCH 02/19] moved lzc to generic and small optimizations on fcvt --- pipelined/regression/fp.do | 2 +- pipelined/src/fpu/fcvt.sv | 39 +++++++++++++----------------------- pipelined/src/fpu/fma.sv | 14 +------------ pipelined/src/generic/lzc.sv | 13 ++++++++++++ 4 files changed, 29 insertions(+), 39 deletions(-) create mode 100644 pipelined/src/generic/lzc.sv diff --git a/pipelined/regression/fp.do b/pipelined/regression/fp.do index 208118fc6..68c240c8a 100644 --- a/pipelined/regression/fp.do +++ b/pipelined/regression/fp.do @@ -32,7 +32,7 @@ vlib work # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals # $num = the added words after the call -vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv -suppress 2583,7063,8607,2697 +vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697 vsim -voptargs=+acc work.testbenchfp -G TEST=$2 diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 55e6706c5..dfe98a793 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -39,9 +39,10 @@ module fcvt ( logic [`FPSIZES/3:0] OutFmt; // format of the output logic [`XLEN-1:0] PosInt; // the positive integer input + logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder) logic [`NE:0] CalcExp; // the calculated expoent - logic [$clog2(`LGLEN):0] ShiftAmt; // how much to shift by + logic [$clog2(`LGLEN)-1:0] ShiftAmt; // how much to shift by logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted logic ResDenormUf;// does the result underflow or is denormalized logic ResUf; // does the result underflow @@ -71,6 +72,7 @@ module fcvt ( logic Int64; // is the integer 64 bits? logic IntToFp; // is the opperation an int->fp conversion? logic ToInt; // is the opperation an fp->int conversion? + logic [$clog2(`LGLEN)-1:0] ZeroCnt; // output from the LZC // seperate OpCtrl for code readability @@ -91,18 +93,11 @@ module fcvt ( /////////////////////////////////////////////////////////////////////////// // negation /////////////////////////////////////////////////////////////////////////// - // negate the input if the input is a negitive singed integer - // - remove leading ones if the input is a unsigned 32-bit integer - // - // Negitive input - // 64-bit input : negate the input - // 32-bit input : trim to 32-bits and negate the input - // Positive input - // 64-bit input : do nothing - // 32-bit input : trim to 32-bits + // 1) negate the input if the input is a negitive singed integer + // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed) - assign PosInt = ResSgn ? Int64 ? -ForwardedSrcAE : {{`XLEN-32{1'b0}}, -ForwardedSrcAE[31:0]} : - Int64 ? ForwardedSrcAE : {{`XLEN-32{1'b0}}, ForwardedSrcAE[31:0]}; + assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE; + assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt; /////////////////////////////////////////////////////////////////////////// // lzc @@ -111,16 +106,10 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign LzcIn = IntToFp ? {PosInt, {`LGLEN-`XLEN{1'b0}}} : // I->F - {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}}; // F->F + assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} : + {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}}; - // lglen is the largest possible value of ZeroCnt (NF or XLEN) hence normcnt must be log2(lglen) bits - logic [$clog2(`LGLEN):0] i, ZeroCnt; - always_comb begin - i = 0; - while (~LzcIn[`LGLEN-1-i] & i <= `LGLEN-1) i = i+1; // search for leading one - ZeroCnt = i; - end + lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt); /////////////////////////////////////////////////////////////////////////// @@ -154,9 +143,9 @@ module fcvt ( // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN):0]&{$clog2(`LGLEN)+1{~CalcExp[`NE]}} : - ResDenormUf&~IntToFp ? ($clog2(`LGLEN)+1)'(`NF-1)+CalcExp[$clog2(`LGLEN):0] : - (ZeroCnt+1)&{$clog2(`LGLEN)+1{XOrigDenormE|IntToFp}}; + assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN)-1:0]&{$clog2(`LGLEN){~CalcExp[`NE]}} : + ResDenormUf&~IntToFp ? ($clog2(`LGLEN))'(`NF-1)+CalcExp[$clog2(`LGLEN)-1:0] : + (ZeroCnt+1)&{$clog2(`LGLEN){XOrigDenormE|IntToFp}}; // shift // fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp @@ -568,7 +557,7 @@ module fcvt ( // - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 // - dont set to zero if fp input is zero but not using the fp input // - dont set to zero if int input is zero but not using the int input - assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|PosInt&IntToFp)); + assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp)); if (`FPSIZES == 1) begin // IEEE sends a payload while Riscv says to send a canonical quiet NaN diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 30b352f0c..179bc264b 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -409,22 +409,10 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098 - lzc lzc(.f, .NormCntE); + lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE)); endmodule -module lzc( - input logic [3*`NF+6:0] f, - output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift -); - - logic [$clog2(3*`NF+7)-1:0] i; - always_comb begin - i = 0; - while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1; // search for leading one - NormCntE = i; - end -endmodule diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv new file mode 100644 index 000000000..78ac99e50 --- /dev/null +++ b/pipelined/src/generic/lzc.sv @@ -0,0 +1,13 @@ +//leading zero counter i.e. priority encoder +module lzc #(parameter WIDTH=1) ( + input logic [WIDTH-1:0] num, + output logic [$clog2(WIDTH)-1:0] ZeroCnt +); + + logic [$clog2(WIDTH)-1:0] i; + always_comb begin + i = 0; + while (~num[WIDTH-1-i] & $unsigned(i) <= $unsigned(WIDTH-1)) i = i+1; // search for leading one + ZeroCnt = i; + end +endmodule From 3c63db9554b81a1d35aa46b189412437e4ae56dd Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 27 May 2022 11:36:04 -0700 Subject: [PATCH 03/19] some optimizations in unpacker --- addins/riscv-arch-test | 2 +- pipelined/src/fpu/fcvt.sv | 10 ++++---- pipelined/src/fpu/fma.sv | 2 +- pipelined/src/fpu/unpack.sv | 36 ++++++++++++++--------------- pipelined/src/generic/lzc.sv | 4 +++- pipelined/testbench/testbench-fp.sv | 6 ++--- 6 files changed, 31 insertions(+), 29 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 307c77b26..ad04e119a 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 +Subproject commit ad04e119a5d846a1c11159786ad3382cf5ad3649 diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index dfe98a793..74a2829a0 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -17,9 +17,9 @@ module fcvt ( input logic XSNaNE, // is the input a signaling NaN input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [`FPSIZES/3:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half) - output logic [`FLEN-1:0] CvtResE, // the fp to fp conversion's result - output logic [`XLEN-1:0] CvtIntResE, // the fp to fp conversion's result - output logic [4:0] CvtFlgE // the fp to fp conversion's flags + output logic [`FLEN-1:0] CvtResE, // the fp conversion result + output logic [`XLEN-1:0] CvtIntResE, // the int conversion result + output logic [4:0] CvtFlgE // the conversion's flags ); // OpCtrls: @@ -261,7 +261,7 @@ module fcvt ( // - shift left to normilize (-1-ZeroCnt) // - newBias to make the biased exponent // - assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XOrigDenormE|IntToFp} - {{`NE-$clog2(`LGLEN){1'b0}}, (ZeroCnt&{$clog2(`LGLEN)+1{XOrigDenormE|IntToFp}})}; + assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XOrigDenormE|IntToFp} - {{`NE-$clog2(`LGLEN)+1{1'b0}}, (ZeroCnt&{$clog2(`LGLEN){XOrigDenormE|IntToFp}})}; // find if the result is dnormal or underflows // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - can't underflow an integer to Fp conversion @@ -744,7 +744,7 @@ module fcvt ( NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}}; end // determine the infinity result - // - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign + // - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign // - otherwise: output infinity with the correct sign // - kill the infinity singal if the input isn't fp InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 179bc264b..fca4930cd 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -587,7 +587,7 @@ module normalize( /////////////////////////////////////////////////////////////////////////////// // Normalization /////////////////////////////////////////////////////////////////////////////// - //*** insert bias-bias simplification in fcvt.sv/phone pictures/ whiteboard... if still there + //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero assign SumZero = ~(|SumM); diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index 44ffc2838..06ceff56b 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -96,9 +96,9 @@ module unpack ( // extract the exponent, converting the smaller exponent into the larger precision if nessisary // - if the original precision had a denormal number convert the exponent value 1 - assign XExpE = FmtE ? X[`FLEN-2:`NF] : XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; - assign YExpE = FmtE ? Y[`FLEN-2:`NF] : YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; - assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; + assign XExpE = FmtE ? X[`FLEN-2:`NF] : XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1]}; + assign YExpE = FmtE ? Y[`FLEN-2:`NF] : YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1]}; + assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1]}; // is the input (in it's original format) denormalized assign XOrigDenormE = FmtE ? 0 : ~|XLen1[`LEN1-2:`NF1] & ~XFracZero; @@ -257,9 +257,9 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the larger precision's exponent to use the largest precision's bias - XExpE = XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; - YExpE = YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; - ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; + XExpE = XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]}}, XLen1[`LEN1-3:`NF1]}; + YExpE = YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]}}, YLen1[`LEN1-3:`NF1]}; + ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]}}, ZLen1[`LEN1-3:`NF1]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)}; @@ -282,9 +282,9 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the smallest precision's exponent to use the largest precision's bias - XExpE = XOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]}; - YExpE = YOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]}; - ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]}; + XExpE = XOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]}}, XLen2[`LEN2-3:`NF2]}; + YExpE = YOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]}}, YLen2[`LEN2-3:`NF2]}; + ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]}}, ZLen2[`LEN2-3:`NF2]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)}; @@ -447,9 +447,9 @@ module unpack ( // convert the double precsion exponent into quad precsion - XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]}; - YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]}; - ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]}; + XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]}}, XLen1[`D_LEN-3:`D_NF]}; + YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]}}, YLen1[`D_LEN-3:`D_NF]}; + ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]}}, ZLen1[`D_LEN-3:`D_NF]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; @@ -471,9 +471,9 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the single precsion exponent into quad precsion - XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]}; - YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]}; - ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]}; + XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]}}, XLen2[`S_LEN-3:`S_NF]}; + YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]}}, YLen2[`S_LEN-3:`S_NF]}; + ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]}}, ZLen2[`S_LEN-3:`S_NF]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; @@ -495,9 +495,9 @@ module unpack ( // also need to take into account possible zero/denorm/inf/NaN values // convert the half precsion exponent into quad precsion - XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]}; - YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]}; - ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]}; + XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]}}, XLen3[`H_LEN-3:`H_NF]}; + YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]}}, YLen3[`H_LEN-3:`H_NF]}; + ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]}}, ZLen3[`H_LEN-3:`H_NF]}; // extract the fraction and add the nessesary trailing zeros XFracE = {XLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv index 78ac99e50..5b1c22f91 100644 --- a/pipelined/src/generic/lzc.sv +++ b/pipelined/src/generic/lzc.sv @@ -3,11 +3,13 @@ module lzc #(parameter WIDTH=1) ( input logic [WIDTH-1:0] num, output logic [$clog2(WIDTH)-1:0] ZeroCnt ); +/* verilator lint_off CMPCONST */ logic [$clog2(WIDTH)-1:0] i; always_comb begin i = 0; - while (~num[WIDTH-1-i] & $unsigned(i) <= $unsigned(WIDTH-1)) i = i+1; // search for leading one + while (~num[WIDTH-1-(32)'(i)] & $unsigned(i) <= $unsigned(($clog2(WIDTH))'(WIDTH-1))) i = i+1; // search for leading one ZeroCnt = i; end +/* verilator lint_on CMPCONST */ endmodule diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 3e90aeaf4..cb214ce8f 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -1174,13 +1174,13 @@ end /////////////////////////////////////////////////////////////////////////////////////////////// // check if the non-fma test is correct - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); $stop; end - + // TestFloat sets the result to all 1's when there is an invalid result, however in // http://www.jhauser.us/arithmetic/TestFloat-3/doc/TestFloat-general.html it says // for an unsigned integer result 0 is also okay @@ -1470,7 +1470,7 @@ module readvectors ( Ans = TestVector[8]; end 2'b10: begin // half - X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+3*(`H_LEN)-1:12+(`H_LEN)]}; + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+2*(`H_LEN)-1:12+(`H_LEN)]}; Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+(`H_LEN)-1:12]}; Ans = TestVector[8]; end From fa453986ad42a485e25db0adc26ba2b4a0bfcbc3 Mon Sep 17 00:00:00 2001 From: Madeleine Masser-Frye <51804758+mmasserfrye@users.noreply.github.com> Date: Fri, 27 May 2022 20:59:23 +0000 Subject: [PATCH 04/19] plotting updates, normalization --- synthDC/ppaAnalyze.py | 119 +++++++++++++++++++++++++++++++----------- synthDC/ppaData.csv | 34 ++++++------ 2 files changed, 107 insertions(+), 46 deletions(-) diff --git a/synthDC/ppaAnalyze.py b/synthDC/ppaAnalyze.py index d7f161802..14ab01ab4 100755 --- a/synthDC/ppaAnalyze.py +++ b/synthDC/ppaAnalyze.py @@ -5,6 +5,7 @@ from operator import index import subprocess import csv import re +from matplotlib.cbook import flatten import matplotlib.pyplot as plt import matplotlib.lines as lines import matplotlib.axes as axes @@ -29,7 +30,7 @@ def synthsintocsv(): ''' writes a CSV with one line for every available synthesis each line contains the module, tech, width, target freq, and resulting metrics ''' - + print("This takes a moment...") bashCommand = "find . -path '*runs/ppa*rv32e*' -prune" output = subprocess.check_output(['bash','-c', bashCommand]) allSynths = output.decode("utf-8").split('\n')[:-1] @@ -91,7 +92,7 @@ def cleanup(): def getVals(tech, module, var, freq=None): ''' for a specified tech, module, and variable/metric returns a list of values for that metric in ascending width order with the appropriate units - works at a specified target frequency or if none is given, uses the synthesis with the min delay for each width + works at a specified target frequency or if none is given, uses the synthesis with the best achievable delay for each width ''' if (var == 'delay'): @@ -101,11 +102,12 @@ def getVals(tech, module, var, freq=None): elif (var == 'lpower'): units = " (nW)" elif (var == 'denergy'): - units = " (pJ)" + units = " (nJ)" global widths metric = [] widthL = [] + if (freq != None): for oneSynth in allSynths: if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module): @@ -118,7 +120,7 @@ def getVals(tech, module, var, freq=None): m = 100000 # large number to start for oneSynth in allSynths: if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == module): - if (oneSynth.delay < m): + if (oneSynth.delay < m) & (1000/oneSynth.delay > oneSynth.freq): m = oneSynth.delay osdict = oneSynth._asdict() met = osdict[var] @@ -127,6 +129,8 @@ def getVals(tech, module, var, freq=None): if ('flop' in module) & (var == 'area'): metric = [m/2 for m in metric] # since two flops in each module + if (var == 'denergy'): + metric = [m*1000 for m in metric] # more practical units for regression coefs return metric, units @@ -177,9 +181,20 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn'): fullLeg = [] global techcolors global widths + + metrics = ['delay', 'area', 'lpower', 'denergy'] + ind1 = metrics.index(var) + techs = ['sky90', 'gf32', 'tsmc28'] + global norms + for combo in techcolors: tech, c, m = combo metric, units = getVals(tech, module, var, freq=freq) + + ind2 = techs.index(tech) + norm = norms[ind1][ind2] + metric = [m/norm for m in metric] # comment out to not normalize + if len(metric) == 5: xp, pred, leg = regress(widths, metric, combo, fits) fullLeg += leg @@ -191,10 +206,13 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn'): ax.set_xticks(widths) ax.set_xlabel("Width (bits)") - ax.set_ylabel(str.title(var) + units) + + ylabeldic = {"lpower": "Leakage Power", "denergy": "Dynamic Energy", "area": "Area", "delay": "Delay"} + + ax.set_ylabel(ylabeldic[var] + units) if singlePlot: - titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best delay)" + titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)" ax.set_title(module + titleStr) plt.show() @@ -343,31 +361,69 @@ def squareAreaDelay(tech, mod, width): delaysL[ind] += [oneSynth.delay] areasL[ind] += [oneSynth.area] - fig = plt.figure() - ax = fig.add_subplot(111) + f, (ax1) = plt.subplots(1, 1) + ax2 = ax1.twinx() for ind in [0,1]: areas = areasL[ind] delays = delaysL[ind] - freqs = freqsL[ind] - + targets = freqsL[ind] + targets = [1000/f for f in targets] + if ('flop' in mod): areas = [m/2 for m in areas] # since two flops in each module - freqs, delays, areas = noOutliers(freqs, delays, areas) # comment out to see all syntheses + targets, delays, areas = noOutliers(targets, delays, areas) # comment out to see all + + if not ind: + achievedDelays = delays c = 'blue' if ind else 'green' - plt.scatter(delays, areas, color=c) - - legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='o', label='timing achieved'), - lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')] - - plt.legend(handles=legend_elements) + ax1.scatter(targets, delays, marker='^', color=c) + ax2.scatter(targets, areas, marker='s', color=c) - plt.xlabel("Delay Achieved (ns)") - plt.ylabel('Area (sq microns)') - plt.title(mod + '_' + str(width)) - ax.set_aspect(1./ax.get_data_ratio()) + bestAchieved = min(achievedDelays) + + legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='^', label='delay (timing achieved)'), + lines.Line2D([0], [0], color='green', ls='', marker='s', label='area (timing achieved)'), + lines.Line2D([0], [0], color='blue', ls='', marker='^', label='delay (timing violated)'), + lines.Line2D([0], [0], color='blue', ls='', marker='s', label='area (timing violated)')] + + ax2.legend(handles=legend_elements, loc='upper left') + + ax1.set_xlabel("Delay Targeted (ns)") + ax1.set_ylabel("Delay Achieved (ns)") + ax2.set_ylabel('Area (sq microns)') + ax1.set_title(mod + '_' + str(width)) + + squarify(f) + + xvals = np.array(ax1.get_xlim()) + frac = (min(flatten(delaysL))-xvals[0])/(xvals[1]-xvals[0]) + areaLowerLim = min(flatten(areasL))-100 + areaUpperLim = max(flatten(areasL))/frac + areaLowerLim + ax2.set_ylim([areaLowerLim, areaUpperLim]) + ax1.plot(xvals, xvals, ls="--", c=".3") + ax1.hlines(y=bestAchieved, xmin=xvals[0], xmax=xvals[1], color="black", ls='--') + plt.show() +def squarify(fig): + ''' helper function for squareAreaDelay() + forces matplotlib figure to be a square + ''' + w, h = fig.get_size_inches() + if w > h: + t = fig.subplotpars.top + b = fig.subplotpars.bottom + axs = h*(t-b) + l = (1.-axs/w)/2 + fig.subplots_adjust(left=l, right=1-l) + else: + t = fig.subplotpars.right + b = fig.subplotpars.left + axs = w*(t-b) + l = (1.-axs/h)/2 + fig.subplots_adjust(bottom=l, top=1-l) + def adprodpow(areas, delays, pow): ''' for each value in [areas] returns area*delay^pow helper function for freqPlot''' @@ -380,15 +436,15 @@ def adprodpow(areas, delays, pow): def plotPPA(mod, freq=None): ''' for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits - if no freq specified, uses the synthesis with min delay for each width + if no freq specified, uses the synthesis with best achievable delay for each width overlays data from both techs ''' fig, axs = plt.subplots(2, 2) oneMetricPlot(mod, 'delay', ax=axs[0,0], fits='cg', freq=freq) - oneMetricPlot(mod, 'area', ax=axs[0,1], fits='s', freq=freq) - oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits='s', freq=freq) - oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits='s', freq=freq) - titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best delay)" + oneMetricPlot(mod, 'area', ax=axs[0,1], fits='cl', freq=freq) + oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits='cl', freq=freq) + oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits='cl', freq=freq) + titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)" plt.suptitle(mod + titleStr) plt.show() @@ -398,13 +454,16 @@ if __name__ == '__main__': Synth = namedtuple("Synth", "module tech width freq delay area lpower denergy") techcolors = [['sky90', 'green', 'o'], ['tsmc28', 'blue', '^']] # add another list here for gf32 widths = [8, 16, 32, 64, 128] + norms = [[43.2, 15, 12.2], [1.96, .351, .252], [1.98, .3116, 1.09], [1, 1, 1]] # [sky, gf, tsmc][fo4, invx1area, leakage, energy] # synthsintocsv() # slow, run only when new synth runs to add to csv synthsfromcsv('ppaData.csv') # your csv here! ### examples - # oneMetricPlot('add', 'delay') - # freqPlot('sky90', 'comparator', 16) - # plotPPA('add') - squareAreaDelay('sky90', 'comparator', 16) \ No newline at end of file + # freqPlot('tsmc28', 'add', 16) + squareAreaDelay('sky90', 'add', 32) + squareAreaDelay('sky90', 'mult', 32) + squareAreaDelay('sky90', 'comparator', 32) + plotPPA('add') + plotPPA('comparator') \ No newline at end of file diff --git a/synthDC/ppaData.csv b/synthDC/ppaData.csv index b5b0435cc..1c240ad24 100644 --- a/synthDC/ppaData.csv +++ b/synthDC/ppaData.csv @@ -13,6 +13,7 @@ flopenr,sky90,64,5619,0.204566,4385.500035,2100.0,4.961134631999999 comparator,sky90,128,10,0.842074,1997.240039,243.506,0.001300162256 add,sky90,8,6896,0.144869,331.240005,219.731,0.060410373 shiftleft,sky90,128,3484,0.313597,11188.660188,8590.0,2.418146467 +add,sky90,32,3120,0.320213,1107.40002,307.68,0.18700439200000005 flop,sky90,128,8476,0.070789,4264.959961,2070.0,3.6420232610000003 flopr,sky90,8,11879,0.11919,400.820003,214.285,0.662589129 add,tsmc28,64,3000,0.312507,227.052001,1070.0,0.0621263916 @@ -41,7 +42,6 @@ shiftleft,sky90,8,10222,0.097799,394.940007,435.049,0.06836150099999999 flopenr,sky90,64,4723,0.18608,4327.680086,2230.0,3.9400579199999997 flop,sky90,128,15539,0.070789,4264.959961,2070.0,6.676960058000001 alu,sky90,16,10000,0.304,3555.440059,2890.0,2.593728 -add,sky90,32,4320,0.254861,1716.960028,866.723,0.373881087 add,tsmc28,32,21130,0.080875,367.668003,1860.0,0.15414775 flop,sky90,8,14409,0.070789,266.559998,129.629,0.3870813309 comparator,sky90,64,4636,0.215691,2072.700029,1840.0,0.345752673 @@ -50,6 +50,7 @@ add,tsmc28,8,9092,0.108452,21.42,108.14,0.0057154204 add,sky90,16,4174,0.239287,549.780011,304.811,0.103371984 alu,sky90,16,3524,0.29417,3599.540061,2670.0,0.90839696 priorityonehot,sky90,8,21600,0.054084,157.780003,56.585,0.0190267512 +add,sky90,32,4368,0.268519,1731.660029,883.74,0.399824791 shiftleft,sky90,32,6375,0.159792,3330.040049,3530.0,0.627343392 priorityonehot,sky90,128,5185,0.274609,2437.260036,1210.0,0.250718017 add,tsmc28,128,6900,0.144862,733.320004,3010.0,0.22192858399999998 @@ -59,8 +60,8 @@ csa,sky90,128,16929,0.060643,4264.960083,3260.0,1.3935761400000002 shiftleft,sky90,32,6250,0.159977,2964.500038,3130.0,0.547281317 flopr,sky90,64,6988,0.11201,2728.319991,1360.0,2.4349853899999996 flop,sky90,64,19777,0.070789,2132.47998,1040.0,4.249180514000001 -add,sky90,32,3680,0.271527,1465.100024,591.825,0.289176255 priorityonehot,sky90,8,22800,0.054084,157.780003,56.585,0.0200976144 +add,sky90,32,3744,0.29863,1565.060028,830.413,0.31117246000000004 floprasync,sky90,8,15000,0.071444,362.600007,161.167,0.40944556400000004 mult,sky90,64,657,1.52205,69763.260863,23900.0,57.09818369999999 decoder,sky90,8,26064,0.037953,49.980001,39.023,0.0030893742000000003 @@ -145,7 +146,6 @@ add,sky90,16,4595,0.221986,817.320014,742.91,0.15871998999999998 flopenr,sky90,16,5285,0.169538,1127.000031,688.586,0.8848188220000001 priorityencoder,sky90,128,7500,0.113763,1058.400021,117.974,0.040499627999999996 priorityencoder,sky90,8,10131,0.104625,85.260002,26.481,0.0075225375 -add,sky90,32,4800,0.258491,1955.100033,1070.0,0.5043159410000001 add,tsmc28,8,7880,0.123121,20.538,106.097,0.0054665724 decoder,sky90,8,30334,0.032475,70.560001,88.439,0.006699592499999999 add,tsmc28,16,6443,0.138825,50.274,244.477,0.012882959999999999 @@ -190,6 +190,7 @@ mult,sky90,32,944,1.085045,32407.620517,26800.0,28.648443135 shiftleft,sky90,16,10769,0.131174,1153.460019,1350.0,0.26549617600000003 add,tsmc28,16,3000,0.32096,41.202,203.505,0.0116572672 add,tsmc28,128,8400,0.119042,1050.084009,4830.0,0.29831925200000003 +add,sky90,32,3978,0.280475,1768.90003,1000.0,0.34245997499999997 mult,sky90,32,4000,1.091389,31262.980534,24900.0,123.890113724 priorityencoder,sky90,8,9176,0.104625,85.260002,26.481,0.006821550000000001 floprasync,sky90,128,7500,0.071444,5785.920113,2580.0,3.3043564439999997 @@ -219,7 +220,6 @@ add,sky90,8,6355,0.157048,343.980005,234.605,0.064546728 csa,sky90,64,22360,0.060643,2195.200043,1740.0,0.937237565 priorityencoder,sky90,32,9184,0.111067,293.020006,53.82,0.015460526399999999 decoder,sky90,16,28542,0.039572,499.800013,875.782,0.058249984000000005 -add,sky90,32,4160,0.253175,2031.540036,1240.0,0.41900462499999996 shiftleft,sky90,8,11111,0.091007,491.960005,678.321,0.07371567000000001 alu,sky90,16,2073,0.481803,1688.540032,395.679,0.278963937 priorityonehot,sky90,128,3407,0.293484,1910.02003,670.082,0.107415144 @@ -253,6 +253,7 @@ add,tsmc28,8,7500,0.131988,20.916,106.321,0.0055698936 flopr,sky90,16,9317,0.10124,776.160012,486.897,0.78248396 priorityencoder,sky90,8,9749,0.104625,85.260002,26.481,0.0072505124999999995 csa,sky90,8,15971,0.062613,203.840004,117.131,0.0491950341 +add,sky90,32,4680,0.257118,1882.58003,1100.0,0.439157544 mult,sky90,128,584,1.712328,298800.044147,115000.0,257.92111732800004 priorityonehot,sky90,8,18400,0.054629,109.760001,31.371,0.009920626399999998 comparator,sky90,8,10909,0.11361,387.1,565.114,0.0965685 @@ -274,6 +275,7 @@ comparator,sky90,16,7200,0.15891,771.260013,1090.0,0.12331416 shiftleft,sky90,128,2968,0.33687,9142.420162,5660.0,1.7459972099999999 flop,sky90,32,13279,0.070789,1066.23999,518.516,1.4265894803 decoder,sky90,32,7500,0.115541,147.000003,15.758,0.006470296 +add,sky90,32,3588,0.278585,1182.860022,345.668,0.22342517000000003 decoder,sky90,128,7658,0.130462,549.78001,153.219,0.041225991999999996 mult,sky90,16,1122,0.891172,6478.780105,3540.0,4.677761828 shifter,sky90,16,5000,0.209586,2120.720031,2150.0,0.46528091999999993 @@ -307,8 +309,10 @@ flopr,sky90,128,15000,0.125811,5740.839996,3160.0,11.995198173 flopr,sky90,64,12112,0.101659,2816.520013,1550.0,3.8755460570000007 add,sky90,128,2615,0.390136,6662.040117,2450.0,1.2094216 flop,sky90,128,13561,0.070789,4264.959961,2070.0,5.826996535 +add,sky90,32,3900,0.280206,1679.720027,892.235,0.337928436 comparator,sky90,64,4727,0.225291,2499.000023,2710.0,0.465000624 add,sky90,8,7708,0.161451,407.680008,375.802,0.084923226 +add,sky90,32,4056,0.253823,1918.840034,1040.0,0.38657242900000005 add,tsmc28,16,16300,0.067336,189.63,1050.0,0.04902060799999999 priorityencoder,sky90,32,10000,0.111067,293.020006,53.82,0.016882183999999998 decoder,sky90,8,29973,0.032971,66.640001,78.184,0.0064062653 @@ -337,6 +341,7 @@ add,tsmc28,128,8232,0.121475,945.504008,4240.0,0.27429055 shiftleft,sky90,8,7500,0.132768,218.540002,147.871,0.034785216 priorityencoder,sky90,64,9782,0.112447,546.840011,77.149,0.028561538 add,tsmc28,64,7202,0.138773,305.424001,1310.0,0.09256159100000001 +add,sky90,32,3333,0.299576,1153.460022,384.333,0.20880447200000002 add,tsmc28,128,6720,0.148758,707.742004,2940.0,0.21629413200000003 mult,sky90,32,852,1.173643,23514.120391,12700.0,21.016425201 mult,sky90,32,741,1.349466,17389.120212,4650.0,10.286979318 @@ -380,7 +385,6 @@ priorityonehot,sky90,16,15000,0.086192,739.900005,1110.0,0.11920353600000001 shiftleft,sky90,128,3355,0.309977,11750.200195,9570.0,2.415650761 add,sky90,8,7437,0.151519,495.880011,457.493,0.09409329899999999 flop,sky90,64,14974,0.070789,2132.47998,1040.0,3.217289261 -add,sky90,32,3920,0.273454,2044.280039,1330.0,0.41154826999999994 csa,sky90,64,15971,0.062613,1630.720032,943.002,0.39320964 alu,sky90,16,2764,0.361248,2302.020041,1050.0,0.497438496 add,sky90,16,6307,0.225596,1023.12002,1010.0,0.281769404 @@ -419,7 +423,6 @@ add,tsmc28,8,14791,0.06639,27.468,134.31,0.007946883 flopenr,sky90,16,20000,0.189692,1098.580025,591.454,4.502529312 shiftleft,sky90,64,4348,0.23035,5490.940094,4500.0,1.0674419000000002 flop,sky90,8,14126,0.070789,266.559998,129.629,0.37948567120000004 -add,sky90,32,3200,0.312424,1121.120021,296.836,0.203700448 shiftleft,sky90,16,8154,0.128748,1062.320016,1070.0,0.17020485600000002 shiftleft,sky90,8,11333,0.092595,545.860006,815.115,0.089168985 priorityonehot,sky90,16,10667,0.09706,282.240005,85.616,0.025555897999999997 @@ -431,6 +434,7 @@ csa,sky90,16,17249,0.060643,533.12001,432.126,0.178714921 shiftleft,sky90,8,11778,0.091769,674.240011,1040.0,0.101037669 add,sky90,128,2718,0.407908,7287.280117,3350.0,1.463573904 floprasync,sky90,128,15000,0.071444,5785.920113,2580.0,6.602568704 +add,sky90,32,3666,0.278178,1498.420028,715.058,0.276508932 alu,sky90,32,3128,0.389409,5641.860104,2720.0,1.566592407 priorityonehot,sky90,32,7067,0.141491,1078.980015,1580.0,0.14389634700000004 floprasync,sky90,8,10000,0.071444,362.600007,161.167,0.2729375132 @@ -500,6 +504,7 @@ flopr,sky90,8,10947,0.11919,403.760003,218.217,0.60977604 add,sky90,16,10,2.032906,221.479998,55.29,0.0012902854382000001 flopr,sky90,128,10947,0.172973,5340.020018,2310.0,10.278747551999999 shiftleft,sky90,64,4261,0.234657,5289.060089,3950.0,0.980396946 +add,sky90,32,3822,0.282243,1657.18003,864.512,0.31752337500000005 priorityonehot,sky90,16,13333,0.077249,976.080015,1550.0,0.164694868 flopenr,sky90,128,6637,0.228828,8134.980007,3210.0,11.399295648 shiftleft,sky90,64,6087,0.227478,6715.940117,5940.0,1.7761482240000002 @@ -583,6 +588,7 @@ add,tsmc28,128,5040,0.197577,488.502002,2230.0,0.143045748 add,sky90,16,4144,0.240621,555.660011,274.571,0.092639085 alu,sky90,64,2409,0.452715,12468.540233,6180.0,2.755676205 add,sky90,16,2609,0.375085,405.720008,52.28,0.050598966499999995 +add,sky90,32,4134,0.25292,1966.860033,1110.0,0.40720119999999993 alu,sky90,128,2061,0.515343,27812.400516,13300.0,6.941154867 priorityonehot,sky90,64,4762,0.212289,1107.400013,650.606,0.09828980699999999 mult,sky90,8,1709,0.599356,2453.920037,2010.0,1.442649892 @@ -594,7 +600,6 @@ csa,sky90,32,16929,0.060643,1066.240021,827.644,0.348818536 csa,sky90,32,22360,0.060643,1097.600021,868.175,0.468891676 add,tsmc28,32,15394,0.081095,348.768003,1770.0,0.110694675 add,tsmc28,16,25000,0.066258,202.608001,1140.0,0.082027404 -add,sky90,32,2400,0.41509,958.440019,151.083,0.1286779 csa,sky90,16,12777,0.067531,329.280006,134.949,0.0528362544 decoder,sky90,8,39096,0.030694,184.240003,330.692,0.021700658 add,sky90,128,2359,0.423881,5520.340104,1490.0,0.846490357 @@ -615,7 +620,6 @@ alu,sky90,32,10000,0.384364,6083.84011,3640.0,5.60018348 flopr,sky90,32,20000,0.085865,1540.560029,1070.0,2.7735253650000002 flop,sky90,8,13279,0.070789,266.559998,129.629,0.35677656 csa,sky90,8,18207,0.060643,266.560005,213.306,0.0942877364 -add,sky90,32,6000,0.271774,1746.36003,955.901,0.5761608800000001 flop,sky90,64,13844,0.070789,2132.47998,1040.0,2.974482991 csa,sky90,128,16820,0.060643,4264.960083,3260.0,1.384661619 floprasync,sky90,8,8398,0.071444,362.600007,161.167,0.229263796 @@ -660,15 +664,16 @@ add,sky90,64,2788,0.358537,2637.180048,758.693,0.45928589700000005 flop,sky90,128,14126,0.070789,4264.959961,2070.0,6.069802805000001 flop,sky90,32,12996,0.070789,1066.23999,518.516,1.3962139204 decoder,sky90,8,35838,0.030694,237.160005,420.74,0.025291855999999998 -add,sky90,32,3840,0.291206,1547.420027,784.112,0.299650974 add,tsmc28,64,7500,0.133293,307.944001,1320.0,0.09437144399999998 alu,sky90,32,2398,0.416982,5257.700098,2000.0,1.094160768 add,tsmc28,128,7728,0.129394,854.910008,3690.0,0.25193011800000004 +add,sky90,32,3432,0.290785,1156.400022,335.133,0.20762049 flopenr,sky90,64,2892,0.298899,3245.75997,644.425,1.6744321980000003 priorityonehot,sky90,128,4000,0.253946,2661.680036,1330.0,0.210521234 floprasync,sky90,32,13997,0.071444,1446.480028,643.984,1.539332424 csa,sky90,32,16291,0.060643,1066.240021,825.615,0.33547707600000004 flopenr,sky90,64,4627,0.20887,3954.300054,1660.0,3.0662116000000004 +add,sky90,32,2340,0.42591,958.440019,152.032,0.12734709000000002 mux2,sky90,1,10,0.060639,6.86,1.19,3.1229084999999996e-07 flop,sky90,64,10000,0.070789,2132.47998,1040.0,2.1485735702000004 decoder,sky90,8,33883,0.030694,263.620004,439.421,0.027102802 @@ -695,6 +700,7 @@ decoder,sky90,16,25538,0.039572,265.580003,416.038,0.028729272 flopenr,sky90,64,5836,0.198621,4564.840035,2580.0,4.922821485 shiftleft,sky90,8,10444,0.095384,335.160004,328.601,0.060759608 add,sky90,8,5409,0.182541,209.720004,99.155,0.041436807000000006 +add,sky90,32,4212,0.276372,1701.280028,896.35,0.33496286399999997 add,tsmc28,32,1000,0.912322,67.157999,231.062,0.0220781924 flopenr,sky90,32,5764,0.185375,2024.679996,668.031,1.3873465 flop,sky90,32,15000,0.070789,1066.23999,518.516,1.6115399006000002 @@ -707,6 +713,7 @@ mult,sky90,16,976,1.024406,4960.760064,1320.0,2.087739428 add,tsmc28,16,6443,0.138825,50.274,244.477,0.012882959999999999 csa,sky90,128,18139,0.060643,4264.960083,3260.0,1.492970017 comparator,sky90,64,4364,0.229142,1709.120026,1020.0,0.276803536 +add,sky90,32,5460,0.27667,1690.500029,859.028,0.45456881 alu,sky90,32,2659,0.384337,6206.340103,3560.0,1.485846842 flopenr,sky90,64,5079,0.203824,4340.420085,2230.0,4.60947976 add,tsmc28,8,9056,0.108551,21.42,107.887,0.0057749132 @@ -718,7 +725,6 @@ decoder,sky90,8,24773,0.04026,44.100001,23.272,0.002604822 mult,sky90,8,1855,0.605444,2332.40004,1740.0,1.4470111599999997 flopenr,sky90,64,5013,0.228449,4007.220058,1760.0,3.779231807 add,sky90,8,25000,0.151154,660.520013,864.531,0.39103539800000003 -add,sky90,32,10,4.160501,456.679995,112.161,0.005429453805000001 shiftleft,sky90,16,7231,0.138234,1233.820018,1400.0,0.21619797600000001 add,tsmc28,64,9413,0.106226,423.108003,1900.0,0.12534668 decoder,sky90,8,10000,0.085629,37.240001,2.355,0.0012364827599999997 @@ -853,6 +859,7 @@ add,tsmc28,32,14791,0.079295,378.630002,1900.0,0.11220242500000001 decoder,sky90,8,25279,0.038956,48.020001,35.206,0.0031047931999999994 add,tsmc28,64,7732,0.129331,331.128002,1450.0,0.102042159 flopr,sky90,64,10714,0.17183,2815.540026,1390.0,5.43756035 +add,sky90,32,2631,0.379925,977.060019,169.107,0.140952175 priorityonehot,sky90,16,10000,0.099923,281.260004,117.94,0.02398152 mult,sky90,8,10,2.076433,1009.399998,211.637,0.005689426420000001 decoder,sky90,128,17868,0.101057,1072.12001,985.334,0.202922456 @@ -938,7 +945,6 @@ mult,sky90,8,1091,0.915221,1167.180013,211.892,0.30293815099999993 add,sky90,64,3636,0.330032,3266.340054,1220.0,0.79537712 flop,sky90,128,14692,0.070789,4264.959961,2070.0,6.313033809 add,tsmc28,8,15000,0.06579,28.728,137.18,0.008302698 -add,sky90,32,4080,0.256294,1991.360031,1240.0,0.408532636 shiftleft,sky90,16,10000,0.128994,1192.660017,1420.0,0.242379726 mult,sky90,64,10,14.7933,46798.920227,5460.0,2.7101325599999995 floprasync,sky90,16,14557,0.071444,723.240014,321.992,0.798601032 @@ -948,7 +954,6 @@ mult,sky90,128,10,29.334627,180734.540854,18000.0,22.264981893 flop,sky90,64,15539,0.070789,2132.47998,1040.0,3.3386216070000003 add,tsmc28,8,12074,0.081502,23.31,115.92,0.0062838042000000005 flopr,sky90,128,12811,0.174211,5123.439977,1890.0,10.893762252 -add,sky90,32,5000,0.2505,1933.540033,1030.0,0.4726935 mult,sky90,64,714,1.400528,87215.101373,43900.0,85.31176259200001 alu,sky90,64,2496,0.442869,12618.480223,6700.0,2.9570363129999997 priorityencoder,sky90,8,10323,0.104625,85.260002,26.481,0.0076690125 @@ -977,7 +982,6 @@ alu,sky90,64,2365,0.452964,12152.980222,6200.0,2.5982015040000004 priorityonehot,sky90,16,11111,0.089821,300.860005,305.978,0.029281646 comparator,sky90,32,5474,0.192304,1188.740012,1430.0,0.20691910400000002 flopenr,sky90,32,4803,0.217601,2179.52003,1080.0,2.520907585 -add,sky90,32,4240,0.268332,1829.660028,1090.0,0.373518144 csa,sky90,32,17568,0.060643,1066.240021,827.644,0.36203871 comparator,sky90,128,4000,0.268954,4027.800041,3660.0,0.679377804 decoder,sky90,8,31928,0.031295,106.82,190.81,0.010796775 @@ -988,7 +992,6 @@ add,tsmc28,32,8620,0.115079,146.538001,644.995,0.045571284000000004 flop,sky90,16,16104,0.070789,533.119995,259.258,0.8651406846 add,tsmc28,64,5043,0.178584,231.210001,1080.0,0.06107572799999999 priorityonehot,sky90,16,15556,0.088601,610.540002,811.656,0.097726903 -add,sky90,32,5600,0.254525,1871.800028,877.446,0.50039615 shiftleft,sky90,128,3871,0.303026,12747.840208,11600.0,3.235408602 flop,sky90,8,12996,0.070789,266.559998,129.629,0.34911011129999997 shiftleft,sky90,64,4435,0.24668,5129.320094,4030.0,1.0940258000000003 @@ -1084,7 +1087,6 @@ floprasync,sky90,128,13437,0.071444,5785.920113,2580.0,5.9146344279999985 shiftleft,sky90,128,3032,0.329767,9579.500162,6250.0,1.8898946769999998 flopr,sky90,32,12578,0.101547,1445.500023,882.979,2.039469948 alu,sky90,128,2217,0.514448,27540.940502,14000.0,7.25886128 -add,sky90,32,3760,0.278449,1689.520028,834.387,0.323279289 csa,sky90,128,16610,0.060643,4264.960083,3260.0,1.3673783640000001 floprasync,sky90,64,14557,0.071444,2892.960056,1290.0,3.203048852 flopenr,sky90,16,4228,0.180729,842.799992,176.142,0.3973688523 @@ -1093,6 +1095,7 @@ priorityonehot,sky90,32,7200,0.143094,1101.520018,1470.0,0.16956639 comparator,sky90,8,8545,0.116724,205.800003,165.947,0.041670467999999995 mult,sky90,128,528,1.893939,255011.682875,66500.0,175.06625146500002 shiftleft,sky90,64,10000,0.23373,6486.620108,6060.0,3.09762369 +add,sky90,32,2857,0.349019,998.620019,202.848,0.164736968 flop,sky90,16,13561,0.070789,533.119995,259.258,0.7285179146000001 priorityonehot,sky90,16,5000,0.196212,130.340003,29.8,0.005788254 mult,sky90,8,5000,0.552339,4261.040075,5050.0,5.394142674 @@ -1102,7 +1105,6 @@ comparator,sky90,16,8000,0.158838,801.640006,1190.0,0.15169029 flopenr,sky90,8,9518,0.148606,636.020015,366.016,0.9204655639999999 alu,sky90,16,3455,0.289435,3445.680058,2290.0,0.80289269 add,tsmc28,8,21130,0.050365,90.846,513.587,0.020700015 -add,sky90,32,4000,0.280842,1730.680031,849.828,0.358635234 priorityonehot,sky90,16,10222,0.097791,313.600004,134.808,0.026892525000000004 flopr,sky90,128,12112,0.177282,5399.800033,2390.0,11.989758942 flopr,sky90,8,12112,0.11919,400.820003,214.285,0.675533163 From d5c249bf71c0bc3cace64218bae3827a88ed7584 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Fri, 27 May 2022 14:37:10 -0700 Subject: [PATCH 05/19] unpacker adds 1 to denorm expoents --- .../build/Linux-x86_64-GCC/softfloat.a | Bin 513510 -> 571590 bytes pipelined/src/fpu/fcvt.sv | 6 +- pipelined/src/fpu/fma.sv | 67 ++++----- pipelined/src/fpu/fpu.sv | 9 +- pipelined/src/fpu/unpack.sv | 132 +++++++++--------- pipelined/testbench/testbench-fp.sv | 46 +++--- 6 files changed, 119 insertions(+), 141 deletions(-) diff --git a/addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a b/addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a index 442544836bc5c22fe7bfc3bbe1541edbac4d77ad..69cd932a8c39140eb3b940cb0e8e364b76562fb6 100644 GIT binary patch literal 571590 zcmeFa4SZC^y*ECa_W;d_tuIuqEG@RAw}m9c2v%hm6FiF>OBF1SZ$KbHDguQKVxLw* zlPIT{%5Al^_x857|Jy#+r?>U#y%*72F&mV(wi-xP)O#(y6cbW0XbTaQ{J+0BGiPUK zcal9T)}yk zEbtk|#{HDBPfufP;y%XC`vzm5yNa>!nT${iD1!ryl}W3PRM zu|H2|?8pcf2voB`(U~mpp%+--v}r7G*6A$p@f;TT)DtXFel-hJPhf$IKF$KujpJajm1%LiIEO5hq7FddWtC9a!;IG@w0^dgZ509|GPo8FhU%bHr|B3X?Utxi5 zzhHsi-_HWieUk-VLH@o17WgyDefI^HGxF;!=alPN&N&yboO4T9&V+iFGx--RXX*%+ zGvk{q=ZY09=h~mLoJCi&od5luDAL;T)qmh;o+SkAv)%yRzy z2+MhRGs}s8gXL_S&T@WtKg;?3Dwgvc(qBZGJ)dSduN`GMZ|-L~M=oc%fe*9X!lf*? zWCqJU?UO9`tY5L*vA40@Pwi*96JB7s=Uu>ZFGRjezQl58+{$vl@GX`*2k?2HX1R-i zxAb9_yJ|Dby>%zcz3l+YZNGx$-rdM@|GAUp{^SQN_vbTM?ynB9++WwT+|G?G_qQup z?$f8U+&|pUa`&OU*QT-DH;=O1BfnyKfnTt^qWf81Nh8ZU?P`{H_D@;fC;o}$efkBK zS6RvOru=~Ag+I*l>~FHXnO|Xfb8cgK^N@d03Cmk{Cd+GD#q!o%!SdRUvb;M0|4sqR z`##F~>FF%*mndfw%6;?#miIW)fBP`Y>t4$8o?F54UWETD%0KW)miL#hv%Gg!vHaYJ zS^mfpmjB^zu>3PGXZd4aV)6YMEPu)YmLIx;<=a=W{L6pE^5^`5<-H!kAkINVFjQ=!B=Onf|jRQ!TNeu@Q)K%!FRvG3jSpuEBM!Gtl<71u!8@0 zCM$UGJFH;y2v+det*l@t%HQ=#R`3k)|K|WJcx44E=xbyJZ(Pj^22lUtG*+0ukrj?Q zlNEk=6)POQn-z|kzzWa(F)RE`2`jAp3M-s)D=Q4W!3r-uzzRQK&kC>F$_lSv!3rBM zW`#?yW`(POcPrwzpUw*JL^=O_IV=1z@PBrQ75>M5R`_2}v%+nNcO(4|C~qIiIgrB& z|AO-0euEX|Ze>Ly|A`fyaw{u3<2hDzPBkkU_YGEr`B7AH1uHrq=@(zXifVqrie?;P zMPHcCioWa_#Eaiupt4F||#Wf%Mcu&~}+VsWPc}B}HpoQp%7;>P=EG zhigscdDV4FdloEfS_&%9U$}VbqN^KLEL*9L)BJ@ClWPJ%!(Z7{pPU5%EzA7n%U3L0 zeMQ59#mnbb+*q+vsXaNV+>kV>a&AR>q$|I?A~r;sBH2_fC2M7`n7?>s!}KNd7pYBY ztf-zHnYHYaWjEclZ0Y6mFIU>KK*DA%n@+iuBy@WD{6+I)4ZzUT8Yfj|QX=Itm8hjP zR##Z9Zg zYMaipnYyMYuE4BcIBVJLNYbetTg|7%z;Wl3}+sTC~oK zH&QobUW!msbycEt5$2_fFfZK=d!?&UwFq9iz~iMWO_xEGErTdp1Vzb7u{xQ^TP<jqyS-!?}++&3(*b>4!zq z!o{mNXhp+KZdQnY9%%S7_m(uco&?TaDJ7stp8Msd#SL!il9&?FWF@+optKBAU)?aL zvncTUSN(bNrdN{e6us!=+D zrcl+xPlX{VG0)w6oYEpl)7uS7N~{4I#YGsj7a@>e zhLlug5M*kDAia!B_9EhGN25*~4iWeI%HxRvaihtp>4wBBMyIkYFFibURCxZxu((lg zT-<1Kb6oE3_PI{w@Dh|5oI3tgNO)p+>i8oe;fVo4{=^8?(c>ZWcw`geE-^9#Eyc1ag)nYNKaC z!rkt9Q7{EJtUL!iD`!1~@5)mIIP)P00-XC$*cGV=aQ?&dyFwKp;y?r;7Q}t@{IWEsn#6(AUnk2OwQB#uVyHc1=Ls?Gp$jAcIKqbL>7D;g)5i^Q+ z-c9vdq*1-j<@mf)lJ0dL;dw|>-ks)d2PEyq6nBAw`ZQ-n%_IYO(J2FbAPMIRMHRg?2IXAK1Cf4yLESs1FmP>eP;e2_{oKi~*ccx@?bcm!RRAudK zQl&&vfYnuLfu=H)nw3HTH6hSynOj5ZTATr1QEf<&9Y`rjb^;G?b=9<6_}Xg2!bz30 zmU-<(62h~VCEAm;I(OMhw@w8J##NC5q(!y46=`x)f`{QP4JQ`d+>-DdC9U?zj*6u> zvCQT|Q&!kS5!&KfY7vwnO%Vw&_~J0FpbTk>kl=>2%V;iwn>KX$m@ItpEVT+X#eG_n zS)_|WDMQyVo##j`UxjO{5wxJ?B5=m#EmyfPZ7F2BoTSNx@HME^Lgd8N3sK=$dP{43 z*(kLTImz`xRJd1&OUHCQa#PZFN=<@@q(wLt1g5Qnl6w!Dr>l`@p0++O!Fyg-#;~GH zdATTqLEpe{%!VYrV~O%; zF)Mv>jpFGMd6T;*fO-!FSSs{N`=supfJ~&|+8jNHiM#@&5`%+MGoe2zXlW_t6`7{{ zvTS2gTB30*JYib>cy4q^N*jsR`~h_ds2XW1ZhCSj^h`CcN^mM(POzjtB@(nA$pn4k zcyL30xBb)(o-=vUaFE^y4osa21QtW24WE*drV(j}&O^~>yO$v7E*q{7fSWH})02ts z=2J<(5?sK2kX*f9sPR0ao=d8(at|d{Ogg{X%{gzxn%d~}oIrp{7ho5&L`*^%o0pRa zhj=i=MzN9XP3xQ^_LOI=yE^PCPgr+#j_umQF1_TE3xeZk*JFFSDL7?9)r9iTRy6Tg z#jTa)6UwV5RFt`J#_~{BDSr9*U3&R!hH5$pmWv;`Q(cb*zH$W)AC(5q8dX@d0SzSh z*&^*D!XL_`KIO54gX!|*rtLAcLFu%DnCrDp0V0^odyx#ISS~9_1#)oGi{tYq|!%Srp7S4~&XA|nN z0W^WO${Vm+KVj)IXfPP2&q9}3aHEJXTG}*$^!bJrv0K=L6%9+~PiVMdZsUsiH#LaL z--CA+aB1vx3Wh|P9vwuS=922hB0~6bKFPEzWkB;Ouj)R9ND+S7;^XKaufJR^6k%CE zrK?WXFa7h7D8p*LHEKi&>v?ODMzbtw>=AFqk6e)dC;q6fs=E#mDpSY5S7g%rrQ^>* zKAO!TkzaayMMFmmKrwu3r^J`=bafL-7yZHYyB@ljf4@#v(#v zuRmv3tC^HA-Z)f-6I+*d9hy}y+_oSEsx$V>d%%(%ZDig9!-Z)p_pf?KfljB`dcS?v^HgV^!?DI zJ>CQ!%_l(;kLG8TdEG(Lpp!gW5JGs8ME7$a4?k;nA8_JPr^D`k)pm~9#c%jq^>F)4 zcF}v)tIu_d8uFjS=<%#qpMmzPYad>p(`{3${5xe6+a*)b^|*SV4EYJ8Z+r9+{mfL~ zZV>r(PuEvXygXBUlfEtSv(>l%QE#}2>+P=vyb!;nu}AzKkv}MW>FN5iN#EWi5VF;` zOGSgS)wg>@{n_fdJUl2ntMYK~Oi_9! zE|UpW;?poILw&;VaM?d}kGo8H_&kwc_w;Aj#KSYiH}P+Lu3@C_n= zP%9@>9{zy9D<1}b4${-DYZr;hm@Ni8s z$-|GWKJFZ}w{;A{frZ|B>uzY{uUi{C?BW4`Z2Va8@K313HiT&mj%OZDYb7cV z_xhZ|!`<_rLvV5TdbglcQ1~Y*FZbvq`dKP`>5WBz=7zrhy+CHreEnHA@$yXZO}t#< zXUog))Y>hhdi(!YdHH+Q$M=d3zgJ#9MG{WqNIg* zKMeYK#SXjekac(5F5XV6`RmyzERJWMJ_hYqdAir<^gO+mpWGjg zvGZIm)q<|ab#!G4oG^O2M=#OOOnG~Q$gg|)vu)z-nc|yxyTs3yw;%6oEmF_TR!=`x zYb}>(6F?t-6K|iKsh(a@F4n3BVDh!4%@k;A1@ zP!Nlzfie|N7#{D@N%S*Q9{*L5U-$H9+{EKE#W(SIiJvWxzfW(ti0kdI1-uZyq_Ia_ z;s+5{U8ZX>+XYg&ij1(1Z(56y`fs*8en8Zlt^O|0&wNg6zfM0Bj}HlgE*b`ZV#TBi zbungm9-nwSWu|8gHz|V~;_XTB_=vNEc)2rRJI@?`w&lgC*cGj7+YyEqUu$jL5iNew zb|%}-VSqW$-%l&<#Yb#sd8zHpEb_Hjb1Zm$jYiLO(c_)h2hk~&*L!_7@cK*?doX-Y zv#0XO(X>#e#tFmoJ$mt8y8HAZ$i1=nr#X_V0ugT2;xei~^Cq63DZYv4OZ;qk{sXeX zny6cgKy&bQZLSy=kq~D-Tk6|zw7V)f>1;s)#>yz@%)(? z^Ze?hMVsMy{;^$uw_A>$bMraQL1*Cbv)DvE*w%sdcfS0N)pu)5GxfcqqiqmSU;Lf$ z^6)FgFCV{b?*!@d2Sueg8k6IB?dCMJU*+*$pNYpSZ!o~+z+_E6Dec7HOZDg?W&q7C z)s01j=7P@4m#dkSu>LHYczLGyCSES_v*qRASK6!w_4b>1`Mm-us3IerDKGC3c;&<3 zn|S$qU4QQv^-A9_FW)N&HBoE7PCpYbm-ff-`nx<&p6MFRq)NYQH1{Yb!3p!7~MP|@^%BwmPPtO$J#M32ywmkhOY9p1f z-hLBLzfa%<6;Xu*fF)z1e+{n_$#xh9h>PcP3@Kd&r5?}J>Ev9WpD za%AewXVLNW?)}8qUnSm+tyO+N#u^i16}#=0w?eTVaP*LM_cop!vNn>=?)wZ44a>3M z@t2D69m=D*emwJd{Tz+g=M)~FZdFE6-m9y;^%$a8kpAFKRG#nA$?Yb$5k-)DWARUO zL+AOotC^HA=WL-*^o z6+3X&k_scd4GV7`}pj*09roj2oT5`Q%2$FuA>9qm`we7rtuvLki-R*@Ut z&h3dWGd%i;@h=s=^u{7Ub3*6&Ps$9Muk#F(%*YhqBr_y_wld>wt=%%Jx8Ec)I8lHU zl!(I5RAvN4McK-X)q>AuYbVSU^=B(HLZbc(t^YdxOfqAGz+W^B{=~}j{H^1kcb@OH zpHgLVhj@DuGNV>(+&&pT{A{FUVCv1&BF-BS4q7Zhuysoph$Ng!qMUv z?CzK7$j%@(aEq|V8+B%u+QmCT%l2VZ|s1YDjSyC?Wvy~;17iTL==85{Vl_k=C$o3qNJYPAyESaos0}ua>gvsS1 zf97XBi-^ez7vg?GmOyfZXGh}u3&OKzUm0tPv>eF`Z=(El*7VMBduXuM*+TcT+N0Zw zV4d_8S!3{OvhA@^(e*`kOGgpT48>jtR<3p186Y{9uw3+tj>Zl4w!Ao1hkSv^l#Zs? zDRZ9PG9qxzb>VBn*U3HS+KLWdrM-Sk%f6{iliTgViVhq?vYoR#sjd&(>#s%aC`CY; z4!BKpc16q3v{9XX8)$9JIT&%Cudz064?BN`;0quA?^@?~SR6YLb@tGeGkW{O9=m0G zz@GAK)T+hV*4F~Dx#5;qrlLCF+v{W7ifWt}ZRbt9`(@jCHd_3w?TqSldU@U52O`d! z;VF+dJriyZAGOwG=@hnUAcT;ehP-it+%3sZI+_B-G-Q*++^8XWe4bP*G=x3%fbTdtqG)@Pb zWN4=NCK)R6vz4Ly^@fVR=wOo!mFJ3r8Vxd)p`}70n>h^r9HbM=4r%$;2^%7787k_} zT84`Hvwb(ky#jw&89J%lpFLJJ#rL}@MyIjIM&b>oMTG?g`EYr{y4qtUwI>^)Nl=$U)}WsagKDGl-e4!e8>+YQn)cna^mNRmus<=m97$0T3kNKN*4 zV0Xnb_pQ)AUI?Aidj9~jSE-ApcR=eUN^(f}Sb?uRc04I0Gibh!Z;~mQ;!By5WD}!K z4_~V=h&0ua&P7?uRUpD4fj68?x=7$_=xm8ruT0m$bx0`0PjCNnkufL{g|Evb%BJH| z)(rw*$^?49sGoN@H;Y2Eeb>c*3jBH07{%%RpE#*9>17wHk>!TnfA9VJ<^E-YeAg*8 zBgEShf@Z^u|)DH`05!qug~7# zs~aiqd?3^y{lSr0FHCzLf4^AbU;A5U9v$3XWqkhma*<#6GUbh#;*$+XW$L_9;#0cn zWc|`N*@$0Jk(98+lOU6g*eo&xWk%uaHsW=FQ<+I~2l0FHBd7OE$0z-XBy|Ws)$J7x zRc%Rn8YI4ir>mRF=@NzY3*YZO#tu>c#{|Cg1XxC~rkr{Xk`m?l=|x^nH=yJ5$5VWR zoUGJgdGj&;MniPHWv?%Ih{%d#08y}MCnqu`9$h~Q@Vt_+_1(@|tF{vfcr8g+G=Mi# z<&Cgg;y4cQR6uypc;^lH5UGi+d-$dhsiaPs6y?%MQJO?mFYrjrE6EDy7PCAaS zG}V8=dUn}%d{fx|=rev_0hrz`aA==uWO@`yUy>Cmi`vx)26 zt-E98byMPD>pPv$xeKG~^9rD7KNrQNk+OKSjLyI2&^cebWgytJ3nzAKa5R|mf2*_a z37;F?$AZWIFKwQti_(xD&pck&zr8+l9?vI{#^nD#^ly(2V$exnP~BKWXzX?U`+I68 zB`h9&4`Sl+nc|yxyu{Cz$3O11Fgc*N-^Am$3$$P|ddB*9kHE{8$4mV+TOL1JbTnHY zUnCZXvemys0)M!*l!;a4;-25h{eE?-eL#C{H#IG(u9|ksmGoM@hJ}+VXDuV!=x_Yg z(aHUOmxrtKhr&yAfQVKck61Q3?;kI)~S-XjO&B+rq9 zq0f1ftrwCAny=%VY(0set*v*G=eUF)PS2TGRXN4AvNE-`Ci{MGms}g97P;ekQqM8# zIp8_7mWjow_v`eIJ2&3z|4Du4#MXB{tW6M|cboK``V94*iB-uTQXbaE^6r6TZey)e zj8MsY=)Up6{?9!9qepN4`H1g7-`=+N;Z>iz>Gun-z3%^AwQ=U`%Wj!|%CyG@H}@}i zdD%1n_S^o>(;p7}>bv)E{r|r5FF*b6h2N@dzjfUitv;9R`PnGaA5HtR8mYY~k@Edt zgQp#ZRA;`MR+yR@kaYJ;=sOMSechnIEEjMYmp*L0`OBBDSho6#h6Rh4&#ky@WzxI8 zlhSUiSjko{Ym7B6SvEhGjCN!BN`JTFKA79RC(hkC3;760wtJ0<)KX1xw-_Ne1EIo`@MTaLod_n z*V}L6!JiNVGH)3CiB;7?pUY7HJHNvBJNT9>3wa^lo)G&G8^zwV&N<>7!P{Efi7PT^ zFuwn+c<;!Tmn`eS9oB<+Gi~daal4(i@tF1Csb8?;J*Sp+S`Y399A8lDvg0q6+VR&5 z4?hFIHl&WW1D*6KHpHA~5DyA?xgF^8b;6NqB)|6Zq{_J!Cf-kJM%F)!RA5ZLlZhS?I)nBO z-XcYAyl5c7DUXB)6?hV#8{*x5D;k!}pAc(U9b*&R*GAN@T*)T*xO;u957?s*)DN0# zrgurmaYGjF^sys-pPq>jPfh%75=1E%AfhSNBWI9Y)BmJ^>ofNk0#Em-4Aqe=pfaC(i}p3FMi~L{6Fz0=_eue2vC_i zzHF4pPn5*@3N5Z2y6hJ>O^rK$Rz9J zgjnL`bOSp6#Hz{u);cCt3zH;sdt-w#I3eDQ5O~Q5_LHAX+duY>&u+bQ^_u&~FWz|9 zwrkI8`RNre#V($4^oD=9$Ua=w{Kl{ zQ+!EI!FE#PviDE$J4TC@NyoxGc>(1xP z@5E)!i#I8XLWs9Vu{-+hc3PWY@t(6<_Lc+&?0c*SM@@6ya1O=yk8*8{ogxctk7;(i z_tdhT)`Q&qzSMoC#P}Aw#VmTJ}%6&wWVI0kc z*CS3@g^2J{CkuQ@f0>};o9vfz!RMu&;@5t;LAGD`e(e{z$Iv7jE__j( z2Q|q|TeN>z`{lez{_K}5zOf{2d1`csw>%*;Lq)XO%6!SCVX!A( z@k>`XRlQx*FZXx+%8V{SsOh2{>FM+{$_&x|B@$8iI{w7-s{OswyW0Ob=9$Xm4Dog( zWX6@WALG1zF1wY!U-73>`y%#Ld<cQyCUYUeNs`TJ@`|pbU6JTMB@NEo2M{# zJ^8gIr7bz1F9|k|C@Bq>6xGt;58-G`j%V4Ve|L}50DU6fl1?^VYJkIDYOJW99htT4 zl4UpDv~20+^Dj@@;L#J=+)JaMpB9eJ{rvRlSJuv&J1aabT02*fW{QYImF!CTgCm{e z@6rB|<6+!&DVJm$r>tCVHX@DQ+$&wlla-nvYlGH zWmIp!NtSg9grGzezNDW7*CRk>3UW)Z_dbk}2XO;BzRCXCo@V?ASK<#R%Z}Cl&9z$l zb^4iP*Q?I zeB#YiW;uSUll2eByG`%MyG<=j3G3~zMH<<33nST~cK`J@(7$1qA4Gw% zQwRi`r1`fw^rgKTGDZ3o0PK{l+)jB+w^M*eriv(UN8c~RH(rHZ)5`PrP(%Cv*^f0t z^N5~rzjh2gV^W6vE5c;Q+(vfH3fM6gJ7+);rCcBqucWywd9~!hCOhX9S)u0ZypEnTDpQ~Fnc|!5 z9Em@ith@kq>usVYm!<=z$!oH6Bz}+zQ=Fuq1l1!zRVB?q#HF2K%Z$R;@lAHlctKD( zPBeamEAb^fUENgGYSI6o@cpjY-6J}5Jr$-nQJe=c$;yyu{}L~!8_@A5R!>r|!VLfJ zf%j|8j&-aNYj&)8jlO0##$B^3OIfoUoO*K2E@^~62pOl_Pjb!@Z%HT1hFY_mR5`?& zotEfN!kt{Rlj9;Mj6OCdS$1;GPEst{*LkT)mURiVU^2QVOC;DN%lbuzkjyB2DT^f7 zB+LG8*6gZ%e!Hux!nfTg&tDMDL!9vG*6hY$&8`e?9F*6w~r zk)|`SdN;U+#VoOUM_fO|uG=Ek?qqZW*6zq&8Dj0uEpHpG-qkQVlR){^wYz`$K-cbK zShBmcq$teS?hu$Z^kD=VTmP~p-Rz?|Nl!(p^S}QiXMA#5>Gqf2ueG}rVU)CX$EA_4 z(+DTm?vlqcB%=SC3{J5{P4>>owL6V)BC7Lhlf83t?an2bZg1=KGs()#*Y2kH+~+D6 zvNiL2O*bg)IK;b^*ypl0x&0higOhh}+Yh_3{x$a40lt(sr7Jd`&fdje9;KYWn@Wp# z>k67b8E{YEHM;v;o7{Wxh0Jc-7Tck3Z}DSz|L#ZlxsBmcqZDDkcGNiF$o(sR*8cN! z4sTXTu)#ftcj?fgkLxwZv+UE?^yGbmY}WLutA<$9(-Qrz>3vBVXLa8yDBz@a3f0qv z3}MF|kNZ<{Tts&>mT~FUgor;$?MT`arv0h^loe{e&f~~lqcRmX!Z9+%r};)@>UNXF zAI=^$?N7bPTao&wx8Ee=dIV}vD?-vwg6k2WGLz;Y;_@ta2w~Of_+ zZxpTR2{|{g1`%INdo+&O$Rc%3kL1#Dj%es#_UJRzt>tfFP0#Cd@|qq=4&$00B}zI4 zMSyyk)XO1c!SS@F=g~*>ljfM}#v(#*$-h-Q z#cI6+I0<`3&C_qgj}P51oNrrW&=$oWvRdaOrX1nB+qWf@hyG@eqw{KSEU&hKaiBxHb*Q}gc2lPK#IvbPDSKp95`Q?JZnB&1l?7?O-hPwaB=Lh9 zemb5^WhTuX#5)9@-Y*@04)T#66B7BQH(C@Zc{-h0D8x_VOL)4vsf>R5gOfhV1S&(v zCzw9=0I|J-P$Y9zr_=8gB&NeJ7wsSREX4U!{GH7{#_!7x&%#Bfi1W^sVCt}rTK+WJ z25ZMPrH@DAf3ocMv#afG(>T#==ju}HZhUn3vC-Fruf@$J2HwB#;uX(GGlqEo*4TDo z?##kPC8ug!4-wB^PqFn?>H`y;eqCj>0 z2v_1uc-p#=&$GWoJN()&a-WC#r#exb2QtYzIU$yKIo*JcKXGz}&-X1<`aa9BK^dG7 zZ-!#OY&^qWe`%@xkZ5NIKKNkawW;>nom}`8Z}(-g@KUmsQKtf1uDOv8Ih-R}`jMP8qi_Iql%5o6x+k?D*DMSrM=Nxd9GCLB-uJsy2TKXrLM7I2yqIxp7s zO@S_1l#DVepZH6*_{6KJ{&M_OC+i=Mmz(UCZ>xnVVX8}YCcEVxktwKVMp*Jv2_{}l zWhTuX#5aiydcSo1Imk!Pen{k(Ua2Th@@&6$OOGh1U-*9QmOX+{q~obhr=N+Jj~DHi z<0#9}@h48M@@2PF;xmNq{!r%U(n|AX?UK%k{hsg_!&gO|L*dyG=iTru7%;QLYhU49 z+<6Ko)qZ0;yVkxaOq!y|+IP53Gu>YM77x@#ru=rrDfqkjYmq50tSG#BRbvL5b-t<({_lPZj=61nXB-ox;TQdHR{F;*T+X^l#ncPx1 zvm~@+M9JhYl$3`{#!o8=BI7h}_eDnLT#kgxOUi4K6YQZ4LaNfg4Awa;3W56|>_L6) zkkb%tnS!4#U#N|rH^8;`R7jzmbWTIv;*uHsT7(Z!Bj9Mh(8$Q|Cyf@&NQYl#fd6j; zJcu&M5z7;7kH?4lO6u`wSITjAJC{1vCcAPx5g2YB|99W#OQe*tP|7&dzS!>s-37Ym z>0r_os7(Dy$rRsYS4#ZhbPL(E}Z9vMc4J3JN^wN&3lnJpxo_(i}uQC;-g} zt4_x^*_G?W_{ni1U4n2WzJ#Z%o9eq))E^Wb_G?#mhyh(lg(*&_pGg0I8XG|eMOt|;)?ZpQ z^3uGKh1eqVWo!~Dh)0u3bm`@@X|8k-Og=MYPmcb*uOw z>`a=|>i(G5=SC=?X?DlbfvNRD`h%mMCH3wEu`fNk@Ls0Mgt36rT#)liWTTfYr*)K9 zbtd~VQ+(ptRA#waD8dqdIG%2@FYnXJlTp3>Ci_z22PLBLB~O)LlYO~eWYGJiU8>vB@c}kuT9CU9(;n3iusPn3O|A{^wTONtOQfe2!jaQIQjXDiQ>uT|G zzw1h`X>MQvn04~JpnL9*_S;ZuaewT;62?{EFZ~--*~$ibhR){wX7oCB#%9t!Sxw34 z+j7595^Tx42|HzYr5|?6hQ0|!WA9&P=$yrE$OmB;oq?)VnZaoRSQh90NSv5=$x>4N zSwyQ{5^=tu+CxDB_am#0M>|H2i|B5O&1$k^_6Pw^x&b-jc{FDw&y{eK9kZE`(m0*x z(ep%Q>N7i2e3Km`@rRRnCOc+-nzj>2Z~qLy3h@)}q$tb=PnmYEBSR^LI z5-+D4(D5fusqpu0jVieYoB5jRy~^N(crz0FWA^&bms;2(!zTsfFX1Xw4@BCF<#usr zLKsp6CJax@X1k2@u2R(dUhJ4^mwZG16K(y=B!xIi=^un0qst_(&q@1b)48}wT6jx( zVL1sSw&~uP5u^)>wgf-q5nh@%{L$0Bz`H$GeZ&W`Jw^OlgFjy36Dy`?07Vp~~TC+*Nz(bq-C12L+rSdGXl0kRfE~@n~ns@e