diff --git a/.gitignore b/.gitignore index 1e986c3bd..6e4868820 100644 --- a/.gitignore +++ b/.gitignore @@ -104,3 +104,5 @@ pipelined/config/rv64ic_noPriv pipelined/config/rv64ic_orig synthDC/Summary.csv pipelined/srt/exptestgen +pipelined/srt/testgen +pipelined/srt/qst2 diff --git a/addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a b/addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a index 442544836..69cd932a8 100644 Binary files a/addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a and b/addins/SoftFloat-3e/build/Linux-x86_64-GCC/softfloat.a differ diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 307c77b26..be67c99bd 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 307c77b26e070ae85ffea665ad9b642b40e33c86 +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/pipelined/regression/fp.do b/pipelined/regression/fp.do index 208118fc6..68c240c8a 100644 --- a/pipelined/regression/fp.do +++ b/pipelined/regression/fp.do @@ -32,7 +32,7 @@ vlib work # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals # $num = the added words after the call -vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv -suppress 2583,7063,8607,2697 +vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697 vsim -voptargs=+acc work.testbenchfp -G TEST=$2 diff --git a/pipelined/regression/sim-fp-batch b/pipelined/regression/sim-fp-batch index 7e2c6a341..ca0ba3d38 100755 --- a/pipelined/regression/sim-fp-batch +++ b/pipelined/regression/sim-fp-batch @@ -7,4 +7,4 @@ # sqrt - test square root # all - test everything -vsim -c -do "do fp.do rv64fp fma" \ No newline at end of file +vsim -c -do "do fp.do rv64fp cvtfp" \ No newline at end of file diff --git a/pipelined/regression/wally-pipelined.do b/pipelined/regression/wally-pipelined.do index 0dadea94b..700c129dc 100644 --- a/pipelined/regression/wally-pipelined.do +++ b/pipelined/regression/wally-pipelined.do @@ -48,7 +48,11 @@ if {$2 eq "buildroot" || $2 eq "buildroot-checkpoint"} { } elseif {$2 eq "buildroot-no-trace"} { vlog -lint -work work_${1}_${2} +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-linux.sv ../testbench/common/*.sv ../src/*/*.sv ../src/*/*/*.sv -suppress 2583 # start and run simulation +<<<<<<< HEAD vopt +acc work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=0 -G INSTR_WAVEON=0 -G CHECKPOINT=0 -G NO_IE_MTIME_CHECKPOINT=1 -G DEBUG_TRACE=0 -o testbenchopt +======= + vopt +acc work_${1}_${2}.testbench -work work_${1}_${2} -G RISCV_DIR=$3 -G INSTR_LIMIT=0 -G INSTR_WAVEON=0 -G CHECKPOINT=0 -G NO_IE_MTIME_CHECKPOINT=1 -o testbenchopt +>>>>>>> parent of 9eb374b6... Changed NO_IE_MTIME_CHECKPOINT so it uses the new parameter name vsim -lib work_${1}_${2} testbenchopt -suppress 8852,12070,3084,3829 #-- Run the Simulation diff --git a/pipelined/src/fpu/adderparts.sv b/pipelined/src/fpu/adderparts.sv deleted file mode 100644 index 045a8accc..000000000 --- a/pipelined/src/fpu/adderparts.sv +++ /dev/null @@ -1,758 +0,0 @@ -// The following module make up the basic building blocks that -// are used by the cla64, cla_sub64, and cla52. - -module INVBLOCK ( GIN, GOUT ); - - input GIN; - output GOUT; - - assign GOUT = ~ GIN; - -endmodule // INVBLOCK - - -module XXOR1 ( A, B, GIN, SUM ); - - input A; - input B; - input GIN; - output SUM; - - assign SUM = ( ~ (A ^ B)) ^ GIN; - -endmodule // XXOR1 - - -module BLOCK0 ( A, B, POUT, GOUT ); - - input A; - input B; - output POUT; - output GOUT; - - assign POUT = ~ (A | B); - assign GOUT = ~ (A & B); - -endmodule // BLOCK0 - - -module BLOCK1 ( PIN1, PIN2, GIN1, GIN2, POUT, GOUT ); - - input PIN1; - input PIN2; - input GIN1; - input GIN2; - output POUT; - output GOUT; - - assign POUT = ~ (PIN1 | PIN2); - assign GOUT = ~ (GIN2 & (PIN2 | GIN1)); - -endmodule // BLOCK1 - - -module BLOCK2 ( PIN1, PIN2, GIN1, GIN2, POUT, GOUT ); - - input PIN1; - input PIN2; - input GIN1; - input GIN2; - output POUT; - output GOUT; - - assign POUT = ~ (PIN1 & PIN2); - assign GOUT = ~ (GIN2 | (PIN2 & GIN1)); - -endmodule // BLOCK2 - - -module BLOCK1A ( PIN2, GIN1, GIN2, GOUT ); - - input PIN2; - input GIN1; - input GIN2; - output GOUT; - - assign GOUT = ~ (GIN2 & (PIN2 | GIN1)); - -endmodule // BLOCK1A - - -module BLOCK2A ( PIN2, GIN1, GIN2, GOUT ); - - input PIN2; - input GIN1; - input GIN2; - output GOUT; - - assign GOUT = ~ (GIN2 | (PIN2 & GIN1)); - -endmodule -//***KEP all 0:63, 0:64 ect changed - changed due to lint warning -module PRESTAGE_64 ( A, B, CIN, POUT, GOUT ); - - input [63:0] A; - input [63:0] B; - input CIN; - - output [63:0] POUT; - output [64:0] GOUT; - - BLOCK0 U10 (A[0] , B[0] , POUT[0] , GOUT[1] ); - BLOCK0 U11 (A[1] , B[1] , POUT[1] , GOUT[2] ); - BLOCK0 U12 (A[2] , B[2] , POUT[2] , GOUT[3] ); - BLOCK0 U13 (A[3] , B[3] , POUT[3] , GOUT[4] ); - BLOCK0 U14 (A[4] , B[4] , POUT[4] , GOUT[5] ); - BLOCK0 U15 (A[5] , B[5] , POUT[5] , GOUT[6] ); - BLOCK0 U16 (A[6] , B[6] , POUT[6] , GOUT[7] ); - BLOCK0 U17 (A[7] , B[7] , POUT[7] , GOUT[8] ); - BLOCK0 U18 (A[8] , B[8] , POUT[8] , GOUT[9] ); - BLOCK0 U19 (A[9] , B[9] , POUT[9] , GOUT[10] ); - BLOCK0 U110 (A[10] , B[10] , POUT[10] , GOUT[11] ); - BLOCK0 U111 (A[11] , B[11] , POUT[11] , GOUT[12] ); - BLOCK0 U112 (A[12] , B[12] , POUT[12] , GOUT[13] ); - BLOCK0 U113 (A[13] , B[13] , POUT[13] , GOUT[14] ); - BLOCK0 U114 (A[14] , B[14] , POUT[14] , GOUT[15] ); - BLOCK0 U115 (A[15] , B[15] , POUT[15] , GOUT[16] ); - BLOCK0 U116 (A[16] , B[16] , POUT[16] , GOUT[17] ); - BLOCK0 U117 (A[17] , B[17] , POUT[17] , GOUT[18] ); - BLOCK0 U118 (A[18] , B[18] , POUT[18] , GOUT[19] ); - BLOCK0 U119 (A[19] , B[19] , POUT[19] , GOUT[20] ); - BLOCK0 U120 (A[20] , B[20] , POUT[20] , GOUT[21] ); - BLOCK0 U121 (A[21] , B[21] , POUT[21] , GOUT[22] ); - BLOCK0 U122 (A[22] , B[22] , POUT[22] , GOUT[23] ); - BLOCK0 U123 (A[23] , B[23] , POUT[23] , GOUT[24] ); - BLOCK0 U124 (A[24] , B[24] , POUT[24] , GOUT[25] ); - BLOCK0 U125 (A[25] , B[25] , POUT[25] , GOUT[26] ); - BLOCK0 U126 (A[26] , B[26] , POUT[26] , GOUT[27] ); - BLOCK0 U127 (A[27] , B[27] , POUT[27] , GOUT[28] ); - BLOCK0 U128 (A[28] , B[28] , POUT[28] , GOUT[29] ); - BLOCK0 U129 (A[29] , B[29] , POUT[29] , GOUT[30] ); - BLOCK0 U130 (A[30] , B[30] , POUT[30] , GOUT[31] ); - BLOCK0 U131 (A[31] , B[31] , POUT[31] , GOUT[32] ); - BLOCK0 U132 (A[32] , B[32] , POUT[32] , GOUT[33] ); - BLOCK0 U133 (A[33] , B[33] , POUT[33] , GOUT[34] ); - BLOCK0 U134 (A[34] , B[34] , POUT[34] , GOUT[35] ); - BLOCK0 U135 (A[35] , B[35] , POUT[35] , GOUT[36] ); - BLOCK0 U136 (A[36] , B[36] , POUT[36] , GOUT[37] ); - BLOCK0 U137 (A[37] , B[37] , POUT[37] , GOUT[38] ); - BLOCK0 U138 (A[38] , B[38] , POUT[38] , GOUT[39] ); - BLOCK0 U139 (A[39] , B[39] , POUT[39] , GOUT[40] ); - BLOCK0 U140 (A[40] , B[40] , POUT[40] , GOUT[41] ); - BLOCK0 U141 (A[41] , B[41] , POUT[41] , GOUT[42] ); - BLOCK0 U142 (A[42] , B[42] , POUT[42] , GOUT[43] ); - BLOCK0 U143 (A[43] , B[43] , POUT[43] , GOUT[44] ); - BLOCK0 U144 (A[44] , B[44] , POUT[44] , GOUT[45] ); - BLOCK0 U145 (A[45] , B[45] , POUT[45] , GOUT[46] ); - BLOCK0 U146 (A[46] , B[46] , POUT[46] , GOUT[47] ); - BLOCK0 U147 (A[47] , B[47] , POUT[47] , GOUT[48] ); - BLOCK0 U148 (A[48] , B[48] , POUT[48] , GOUT[49] ); - BLOCK0 U149 (A[49] , B[49] , POUT[49] , GOUT[50] ); - BLOCK0 U150 (A[50] , B[50] , POUT[50] , GOUT[51] ); - BLOCK0 U151 (A[51] , B[51] , POUT[51] , GOUT[52] ); - BLOCK0 U152 (A[52] , B[52] , POUT[52] , GOUT[53] ); - BLOCK0 U153 (A[53] , B[53] , POUT[53] , GOUT[54] ); - BLOCK0 U154 (A[54] , B[54] , POUT[54] , GOUT[55] ); - BLOCK0 U155 (A[55] , B[55] , POUT[55] , GOUT[56] ); - BLOCK0 U156 (A[56] , B[56] , POUT[56] , GOUT[57] ); - BLOCK0 U157 (A[57] , B[57] , POUT[57] , GOUT[58] ); - BLOCK0 U158 (A[58] , B[58] , POUT[58] , GOUT[59] ); - BLOCK0 U159 (A[59] , B[59] , POUT[59] , GOUT[60] ); - BLOCK0 U160 (A[60] , B[60] , POUT[60] , GOUT[61] ); - BLOCK0 U161 (A[61] , B[61] , POUT[61] , GOUT[62] ); - BLOCK0 U162 (A[62] , B[62] , POUT[62] , GOUT[63] ); - BLOCK0 U163 (A[63] , B[63] , POUT[63] , GOUT[64] ); - INVBLOCK U2 (CIN , GOUT[0] ); - -endmodule // PRESTAGE_64 - - -module DBLC_0_64 ( PIN, GIN, POUT, GOUT ); - - input [63:0] PIN; - input [64:0] GIN; - - output [62:0] POUT; - output [64:0] GOUT; - - INVBLOCK U10 (GIN[0] , GOUT[0] ); - BLOCK1A U21 (PIN[0] , GIN[0] , GIN[1] , GOUT[1] ); - BLOCK1 U32 (PIN[0] , PIN[1] , GIN[1] , GIN[2] , POUT[0] , GOUT[2] ); - BLOCK1 U33 (PIN[1] , PIN[2] , GIN[2] , GIN[3] , POUT[1] , GOUT[3] ); - BLOCK1 U34 (PIN[2] , PIN[3] , GIN[3] , GIN[4] , POUT[2] , GOUT[4] ); - BLOCK1 U35 (PIN[3] , PIN[4] , GIN[4] , GIN[5] , POUT[3] , GOUT[5] ); - BLOCK1 U36 (PIN[4] , PIN[5] , GIN[5] , GIN[6] , POUT[4] , GOUT[6] ); - BLOCK1 U37 (PIN[5] , PIN[6] , GIN[6] , GIN[7] , POUT[5] , GOUT[7] ); - BLOCK1 U38 (PIN[6] , PIN[7] , GIN[7] , GIN[8] , POUT[6] , GOUT[8] ); - BLOCK1 U39 (PIN[7] , PIN[8] , GIN[8] , GIN[9] , POUT[7] , GOUT[9] ); - BLOCK1 U310 (PIN[8] , PIN[9] , GIN[9] , GIN[10] , POUT[8] , GOUT[10] ); - BLOCK1 U311 (PIN[9] , PIN[10] , GIN[10] , GIN[11] , POUT[9] , GOUT[11] ); - BLOCK1 U312 (PIN[10] , PIN[11] , GIN[11] , GIN[12] , POUT[10] , GOUT[12] ); - BLOCK1 U313 (PIN[11] , PIN[12] , GIN[12] , GIN[13] , POUT[11] , GOUT[13] ); - BLOCK1 U314 (PIN[12] , PIN[13] , GIN[13] , GIN[14] , POUT[12] , GOUT[14] ); - BLOCK1 U315 (PIN[13] , PIN[14] , GIN[14] , GIN[15] , POUT[13] , GOUT[15] ); - BLOCK1 U316 (PIN[14] , PIN[15] , GIN[15] , GIN[16] , POUT[14] , GOUT[16] ); - BLOCK1 U317 (PIN[15] , PIN[16] , GIN[16] , GIN[17] , POUT[15] , GOUT[17] ); - BLOCK1 U318 (PIN[16] , PIN[17] , GIN[17] , GIN[18] , POUT[16] , GOUT[18] ); - BLOCK1 U319 (PIN[17] , PIN[18] , GIN[18] , GIN[19] , POUT[17] , GOUT[19] ); - BLOCK1 U320 (PIN[18] , PIN[19] , GIN[19] , GIN[20] , POUT[18] , GOUT[20] ); - BLOCK1 U321 (PIN[19] , PIN[20] , GIN[20] , GIN[21] , POUT[19] , GOUT[21] ); - BLOCK1 U322 (PIN[20] , PIN[21] , GIN[21] , GIN[22] , POUT[20] , GOUT[22] ); - BLOCK1 U323 (PIN[21] , PIN[22] , GIN[22] , GIN[23] , POUT[21] , GOUT[23] ); - BLOCK1 U324 (PIN[22] , PIN[23] , GIN[23] , GIN[24] , POUT[22] , GOUT[24] ); - BLOCK1 U325 (PIN[23] , PIN[24] , GIN[24] , GIN[25] , POUT[23] , GOUT[25] ); - BLOCK1 U326 (PIN[24] , PIN[25] , GIN[25] , GIN[26] , POUT[24] , GOUT[26] ); - BLOCK1 U327 (PIN[25] , PIN[26] , GIN[26] , GIN[27] , POUT[25] , GOUT[27] ); - BLOCK1 U328 (PIN[26] , PIN[27] , GIN[27] , GIN[28] , POUT[26] , GOUT[28] ); - BLOCK1 U329 (PIN[27] , PIN[28] , GIN[28] , GIN[29] , POUT[27] , GOUT[29] ); - BLOCK1 U330 (PIN[28] , PIN[29] , GIN[29] , GIN[30] , POUT[28] , GOUT[30] ); - BLOCK1 U331 (PIN[29] , PIN[30] , GIN[30] , GIN[31] , POUT[29] , GOUT[31] ); - BLOCK1 U332 (PIN[30] , PIN[31] , GIN[31] , GIN[32] , POUT[30] , GOUT[32] ); - BLOCK1 U333 (PIN[31] , PIN[32] , GIN[32] , GIN[33] , POUT[31] , GOUT[33] ); - BLOCK1 U334 (PIN[32] , PIN[33] , GIN[33] , GIN[34] , POUT[32] , GOUT[34] ); - BLOCK1 U335 (PIN[33] , PIN[34] , GIN[34] , GIN[35] , POUT[33] , GOUT[35] ); - BLOCK1 U336 (PIN[34] , PIN[35] , GIN[35] , GIN[36] , POUT[34] , GOUT[36] ); - BLOCK1 U337 (PIN[35] , PIN[36] , GIN[36] , GIN[37] , POUT[35] , GOUT[37] ); - BLOCK1 U338 (PIN[36] , PIN[37] , GIN[37] , GIN[38] , POUT[36] , GOUT[38] ); - BLOCK1 U339 (PIN[37] , PIN[38] , GIN[38] , GIN[39] , POUT[37] , GOUT[39] ); - BLOCK1 U340 (PIN[38] , PIN[39] , GIN[39] , GIN[40] , POUT[38] , GOUT[40] ); - BLOCK1 U341 (PIN[39] , PIN[40] , GIN[40] , GIN[41] , POUT[39] , GOUT[41] ); - BLOCK1 U342 (PIN[40] , PIN[41] , GIN[41] , GIN[42] , POUT[40] , GOUT[42] ); - BLOCK1 U343 (PIN[41] , PIN[42] , GIN[42] , GIN[43] , POUT[41] , GOUT[43] ); - BLOCK1 U344 (PIN[42] , PIN[43] , GIN[43] , GIN[44] , POUT[42] , GOUT[44] ); - BLOCK1 U345 (PIN[43] , PIN[44] , GIN[44] , GIN[45] , POUT[43] , GOUT[45] ); - BLOCK1 U346 (PIN[44] , PIN[45] , GIN[45] , GIN[46] , POUT[44] , GOUT[46] ); - BLOCK1 U347 (PIN[45] , PIN[46] , GIN[46] , GIN[47] , POUT[45] , GOUT[47] ); - BLOCK1 U348 (PIN[46] , PIN[47] , GIN[47] , GIN[48] , POUT[46] , GOUT[48] ); - BLOCK1 U349 (PIN[47] , PIN[48] , GIN[48] , GIN[49] , POUT[47] , GOUT[49] ); - BLOCK1 U350 (PIN[48] , PIN[49] , GIN[49] , GIN[50] , POUT[48] , GOUT[50] ); - BLOCK1 U351 (PIN[49] , PIN[50] , GIN[50] , GIN[51] , POUT[49] , GOUT[51] ); - BLOCK1 U352 (PIN[50] , PIN[51] , GIN[51] , GIN[52] , POUT[50] , GOUT[52] ); - BLOCK1 U353 (PIN[51] , PIN[52] , GIN[52] , GIN[53] , POUT[51] , GOUT[53] ); - BLOCK1 U354 (PIN[52] , PIN[53] , GIN[53] , GIN[54] , POUT[52] , GOUT[54] ); - BLOCK1 U355 (PIN[53] , PIN[54] , GIN[54] , GIN[55] , POUT[53] , GOUT[55] ); - BLOCK1 U356 (PIN[54] , PIN[55] , GIN[55] , GIN[56] , POUT[54] , GOUT[56] ); - BLOCK1 U357 (PIN[55] , PIN[56] , GIN[56] , GIN[57] , POUT[55] , GOUT[57] ); - BLOCK1 U358 (PIN[56] , PIN[57] , GIN[57] , GIN[58] , POUT[56] , GOUT[58] ); - BLOCK1 U359 (PIN[57] , PIN[58] , GIN[58] , GIN[59] , POUT[57] , GOUT[59] ); - BLOCK1 U360 (PIN[58] , PIN[59] , GIN[59] , GIN[60] , POUT[58] , GOUT[60] ); - BLOCK1 U361 (PIN[59] , PIN[60] , GIN[60] , GIN[61] , POUT[59] , GOUT[61] ); - BLOCK1 U362 (PIN[60] , PIN[61] , GIN[61] , GIN[62] , POUT[60] , GOUT[62] ); - BLOCK1 U363 (PIN[61] , PIN[62] , GIN[62] , GIN[63] , POUT[61] , GOUT[63] ); - BLOCK1 U364 (PIN[62] , PIN[63] , GIN[63] , GIN[64] , POUT[62] , GOUT[64] ); - -endmodule // DBLC_0_64 - - -module DBLC_1_64 ( PIN, GIN, POUT, GOUT ); - - input [62:0] PIN; - input [64:0] GIN; - - output [60:0] POUT; - output [64:0] GOUT; - - INVBLOCK U10 (GIN[0] , GOUT[0] ); - INVBLOCK U11 (GIN[1] , GOUT[1] ); - BLOCK2A U22 (PIN[0] , GIN[0] , GIN[2] , GOUT[2] ); - BLOCK2A U23 (PIN[1] , GIN[1] , GIN[3] , GOUT[3] ); - BLOCK2 U34 (PIN[0] , PIN[2] , GIN[2] , GIN[4] , POUT[0] , GOUT[4] ); - BLOCK2 U35 (PIN[1] , PIN[3] , GIN[3] , GIN[5] , POUT[1] , GOUT[5] ); - BLOCK2 U36 (PIN[2] , PIN[4] , GIN[4] , GIN[6] , POUT[2] , GOUT[6] ); - BLOCK2 U37 (PIN[3] , PIN[5] , GIN[5] , GIN[7] , POUT[3] , GOUT[7] ); - BLOCK2 U38 (PIN[4] , PIN[6] , GIN[6] , GIN[8] , POUT[4] , GOUT[8] ); - BLOCK2 U39 (PIN[5] , PIN[7] , GIN[7] , GIN[9] , POUT[5] , GOUT[9] ); - BLOCK2 U310 (PIN[6] , PIN[8] , GIN[8] , GIN[10] , POUT[6] , GOUT[10] ); - BLOCK2 U311 (PIN[7] , PIN[9] , GIN[9] , GIN[11] , POUT[7] , GOUT[11] ); - BLOCK2 U312 (PIN[8] , PIN[10] , GIN[10] , GIN[12] , POUT[8] , GOUT[12] ); - BLOCK2 U313 (PIN[9] , PIN[11] , GIN[11] , GIN[13] , POUT[9] , GOUT[13] ); - BLOCK2 U314 (PIN[10] , PIN[12] , GIN[12] , GIN[14] , POUT[10] , GOUT[14] ); - BLOCK2 U315 (PIN[11] , PIN[13] , GIN[13] , GIN[15] , POUT[11] , GOUT[15] ); - BLOCK2 U316 (PIN[12] , PIN[14] , GIN[14] , GIN[16] , POUT[12] , GOUT[16] ); - BLOCK2 U317 (PIN[13] , PIN[15] , GIN[15] , GIN[17] , POUT[13] , GOUT[17] ); - BLOCK2 U318 (PIN[14] , PIN[16] , GIN[16] , GIN[18] , POUT[14] , GOUT[18] ); - BLOCK2 U319 (PIN[15] , PIN[17] , GIN[17] , GIN[19] , POUT[15] , GOUT[19] ); - BLOCK2 U320 (PIN[16] , PIN[18] , GIN[18] , GIN[20] , POUT[16] , GOUT[20] ); - BLOCK2 U321 (PIN[17] , PIN[19] , GIN[19] , GIN[21] , POUT[17] , GOUT[21] ); - BLOCK2 U322 (PIN[18] , PIN[20] , GIN[20] , GIN[22] , POUT[18] , GOUT[22] ); - BLOCK2 U323 (PIN[19] , PIN[21] , GIN[21] , GIN[23] , POUT[19] , GOUT[23] ); - BLOCK2 U324 (PIN[20] , PIN[22] , GIN[22] , GIN[24] , POUT[20] , GOUT[24] ); - BLOCK2 U325 (PIN[21] , PIN[23] , GIN[23] , GIN[25] , POUT[21] , GOUT[25] ); - BLOCK2 U326 (PIN[22] , PIN[24] , GIN[24] , GIN[26] , POUT[22] , GOUT[26] ); - BLOCK2 U327 (PIN[23] , PIN[25] , GIN[25] , GIN[27] , POUT[23] , GOUT[27] ); - BLOCK2 U328 (PIN[24] , PIN[26] , GIN[26] , GIN[28] , POUT[24] , GOUT[28] ); - BLOCK2 U329 (PIN[25] , PIN[27] , GIN[27] , GIN[29] , POUT[25] , GOUT[29] ); - BLOCK2 U330 (PIN[26] , PIN[28] , GIN[28] , GIN[30] , POUT[26] , GOUT[30] ); - BLOCK2 U331 (PIN[27] , PIN[29] , GIN[29] , GIN[31] , POUT[27] , GOUT[31] ); - BLOCK2 U332 (PIN[28] , PIN[30] , GIN[30] , GIN[32] , POUT[28] , GOUT[32] ); - BLOCK2 U333 (PIN[29] , PIN[31] , GIN[31] , GIN[33] , POUT[29] , GOUT[33] ); - BLOCK2 U334 (PIN[30] , PIN[32] , GIN[32] , GIN[34] , POUT[30] , GOUT[34] ); - BLOCK2 U335 (PIN[31] , PIN[33] , GIN[33] , GIN[35] , POUT[31] , GOUT[35] ); - BLOCK2 U336 (PIN[32] , PIN[34] , GIN[34] , GIN[36] , POUT[32] , GOUT[36] ); - BLOCK2 U337 (PIN[33] , PIN[35] , GIN[35] , GIN[37] , POUT[33] , GOUT[37] ); - BLOCK2 U338 (PIN[34] , PIN[36] , GIN[36] , GIN[38] , POUT[34] , GOUT[38] ); - BLOCK2 U339 (PIN[35] , PIN[37] , GIN[37] , GIN[39] , POUT[35] , GOUT[39] ); - BLOCK2 U340 (PIN[36] , PIN[38] , GIN[38] , GIN[40] , POUT[36] , GOUT[40] ); - BLOCK2 U341 (PIN[37] , PIN[39] , GIN[39] , GIN[41] , POUT[37] , GOUT[41] ); - BLOCK2 U342 (PIN[38] , PIN[40] , GIN[40] , GIN[42] , POUT[38] , GOUT[42] ); - BLOCK2 U343 (PIN[39] , PIN[41] , GIN[41] , GIN[43] , POUT[39] , GOUT[43] ); - BLOCK2 U344 (PIN[40] , PIN[42] , GIN[42] , GIN[44] , POUT[40] , GOUT[44] ); - BLOCK2 U345 (PIN[41] , PIN[43] , GIN[43] , GIN[45] , POUT[41] , GOUT[45] ); - BLOCK2 U346 (PIN[42] , PIN[44] , GIN[44] , GIN[46] , POUT[42] , GOUT[46] ); - BLOCK2 U347 (PIN[43] , PIN[45] , GIN[45] , GIN[47] , POUT[43] , GOUT[47] ); - BLOCK2 U348 (PIN[44] , PIN[46] , GIN[46] , GIN[48] , POUT[44] , GOUT[48] ); - BLOCK2 U349 (PIN[45] , PIN[47] , GIN[47] , GIN[49] , POUT[45] , GOUT[49] ); - BLOCK2 U350 (PIN[46] , PIN[48] , GIN[48] , GIN[50] , POUT[46] , GOUT[50] ); - BLOCK2 U351 (PIN[47] , PIN[49] , GIN[49] , GIN[51] , POUT[47] , GOUT[51] ); - BLOCK2 U352 (PIN[48] , PIN[50] , GIN[50] , GIN[52] , POUT[48] , GOUT[52] ); - BLOCK2 U353 (PIN[49] , PIN[51] , GIN[51] , GIN[53] , POUT[49] , GOUT[53] ); - BLOCK2 U354 (PIN[50] , PIN[52] , GIN[52] , GIN[54] , POUT[50] , GOUT[54] ); - BLOCK2 U355 (PIN[51] , PIN[53] , GIN[53] , GIN[55] , POUT[51] , GOUT[55] ); - BLOCK2 U356 (PIN[52] , PIN[54] , GIN[54] , GIN[56] , POUT[52] , GOUT[56] ); - BLOCK2 U357 (PIN[53] , PIN[55] , GIN[55] , GIN[57] , POUT[53] , GOUT[57] ); - BLOCK2 U358 (PIN[54] , PIN[56] , GIN[56] , GIN[58] , POUT[54] , GOUT[58] ); - BLOCK2 U359 (PIN[55] , PIN[57] , GIN[57] , GIN[59] , POUT[55] , GOUT[59] ); - BLOCK2 U360 (PIN[56] , PIN[58] , GIN[58] , GIN[60] , POUT[56] , GOUT[60] ); - BLOCK2 U361 (PIN[57] , PIN[59] , GIN[59] , GIN[61] , POUT[57] , GOUT[61] ); - BLOCK2 U362 (PIN[58] , PIN[60] , GIN[60] , GIN[62] , POUT[58] , GOUT[62] ); - BLOCK2 U363 (PIN[59] , PIN[61] , GIN[61] , GIN[63] , POUT[59] , GOUT[63] ); - BLOCK2 U364 (PIN[60] , PIN[62] , GIN[62] , GIN[64] , POUT[60] , GOUT[64] ); - -endmodule // DBLC_1_64 - - -module DBLC_2_64 ( PIN, GIN, POUT, GOUT ); - - input [60:0] PIN; - input [64:0] GIN; - - output [56:0] POUT; - output [64:0] GOUT; - - INVBLOCK U10 (GIN[0] , GOUT[0] ); - INVBLOCK U11 (GIN[1] , GOUT[1] ); - INVBLOCK U12 (GIN[2] , GOUT[2] ); - INVBLOCK U13 (GIN[3] , GOUT[3] ); - BLOCK1A U24 (PIN[0] , GIN[0] , GIN[4] , GOUT[4] ); - BLOCK1A U25 (PIN[1] , GIN[1] , GIN[5] , GOUT[5] ); - BLOCK1A U26 (PIN[2] , GIN[2] , GIN[6] , GOUT[6] ); - BLOCK1A U27 (PIN[3] , GIN[3] , GIN[7] , GOUT[7] ); - BLOCK1 U38 (PIN[0] , PIN[4] , GIN[4] , GIN[8] , POUT[0] , GOUT[8] ); - BLOCK1 U39 (PIN[1] , PIN[5] , GIN[5] , GIN[9] , POUT[1] , GOUT[9] ); - BLOCK1 U310 (PIN[2] , PIN[6] , GIN[6] , GIN[10] , POUT[2] , GOUT[10] ); - BLOCK1 U311 (PIN[3] , PIN[7] , GIN[7] , GIN[11] , POUT[3] , GOUT[11] ); - BLOCK1 U312 (PIN[4] , PIN[8] , GIN[8] , GIN[12] , POUT[4] , GOUT[12] ); - BLOCK1 U313 (PIN[5] , PIN[9] , GIN[9] , GIN[13] , POUT[5] , GOUT[13] ); - BLOCK1 U314 (PIN[6] , PIN[10] , GIN[10] , GIN[14] , POUT[6] , GOUT[14] ); - BLOCK1 U315 (PIN[7] , PIN[11] , GIN[11] , GIN[15] , POUT[7] , GOUT[15] ); - BLOCK1 U316 (PIN[8] , PIN[12] , GIN[12] , GIN[16] , POUT[8] , GOUT[16] ); - BLOCK1 U317 (PIN[9] , PIN[13] , GIN[13] , GIN[17] , POUT[9] , GOUT[17] ); - BLOCK1 U318 (PIN[10] , PIN[14] , GIN[14] , GIN[18] , POUT[10] , GOUT[18] ); - BLOCK1 U319 (PIN[11] , PIN[15] , GIN[15] , GIN[19] , POUT[11] , GOUT[19] ); - BLOCK1 U320 (PIN[12] , PIN[16] , GIN[16] , GIN[20] , POUT[12] , GOUT[20] ); - BLOCK1 U321 (PIN[13] , PIN[17] , GIN[17] , GIN[21] , POUT[13] , GOUT[21] ); - BLOCK1 U322 (PIN[14] , PIN[18] , GIN[18] , GIN[22] , POUT[14] , GOUT[22] ); - BLOCK1 U323 (PIN[15] , PIN[19] , GIN[19] , GIN[23] , POUT[15] , GOUT[23] ); - BLOCK1 U324 (PIN[16] , PIN[20] , GIN[20] , GIN[24] , POUT[16] , GOUT[24] ); - BLOCK1 U325 (PIN[17] , PIN[21] , GIN[21] , GIN[25] , POUT[17] , GOUT[25] ); - BLOCK1 U326 (PIN[18] , PIN[22] , GIN[22] , GIN[26] , POUT[18] , GOUT[26] ); - BLOCK1 U327 (PIN[19] , PIN[23] , GIN[23] , GIN[27] , POUT[19] , GOUT[27] ); - BLOCK1 U328 (PIN[20] , PIN[24] , GIN[24] , GIN[28] , POUT[20] , GOUT[28] ); - BLOCK1 U329 (PIN[21] , PIN[25] , GIN[25] , GIN[29] , POUT[21] , GOUT[29] ); - BLOCK1 U330 (PIN[22] , PIN[26] , GIN[26] , GIN[30] , POUT[22] , GOUT[30] ); - BLOCK1 U331 (PIN[23] , PIN[27] , GIN[27] , GIN[31] , POUT[23] , GOUT[31] ); - BLOCK1 U332 (PIN[24] , PIN[28] , GIN[28] , GIN[32] , POUT[24] , GOUT[32] ); - BLOCK1 U333 (PIN[25] , PIN[29] , GIN[29] , GIN[33] , POUT[25] , GOUT[33] ); - BLOCK1 U334 (PIN[26] , PIN[30] , GIN[30] , GIN[34] , POUT[26] , GOUT[34] ); - BLOCK1 U335 (PIN[27] , PIN[31] , GIN[31] , GIN[35] , POUT[27] , GOUT[35] ); - BLOCK1 U336 (PIN[28] , PIN[32] , GIN[32] , GIN[36] , POUT[28] , GOUT[36] ); - BLOCK1 U337 (PIN[29] , PIN[33] , GIN[33] , GIN[37] , POUT[29] , GOUT[37] ); - BLOCK1 U338 (PIN[30] , PIN[34] , GIN[34] , GIN[38] , POUT[30] , GOUT[38] ); - BLOCK1 U339 (PIN[31] , PIN[35] , GIN[35] , GIN[39] , POUT[31] , GOUT[39] ); - BLOCK1 U340 (PIN[32] , PIN[36] , GIN[36] , GIN[40] , POUT[32] , GOUT[40] ); - BLOCK1 U341 (PIN[33] , PIN[37] , GIN[37] , GIN[41] , POUT[33] , GOUT[41] ); - BLOCK1 U342 (PIN[34] , PIN[38] , GIN[38] , GIN[42] , POUT[34] , GOUT[42] ); - BLOCK1 U343 (PIN[35] , PIN[39] , GIN[39] , GIN[43] , POUT[35] , GOUT[43] ); - BLOCK1 U344 (PIN[36] , PIN[40] , GIN[40] , GIN[44] , POUT[36] , GOUT[44] ); - BLOCK1 U345 (PIN[37] , PIN[41] , GIN[41] , GIN[45] , POUT[37] , GOUT[45] ); - BLOCK1 U346 (PIN[38] , PIN[42] , GIN[42] , GIN[46] , POUT[38] , GOUT[46] ); - BLOCK1 U347 (PIN[39] , PIN[43] , GIN[43] , GIN[47] , POUT[39] , GOUT[47] ); - BLOCK1 U348 (PIN[40] , PIN[44] , GIN[44] , GIN[48] , POUT[40] , GOUT[48] ); - BLOCK1 U349 (PIN[41] , PIN[45] , GIN[45] , GIN[49] , POUT[41] , GOUT[49] ); - BLOCK1 U350 (PIN[42] , PIN[46] , GIN[46] , GIN[50] , POUT[42] , GOUT[50] ); - BLOCK1 U351 (PIN[43] , PIN[47] , GIN[47] , GIN[51] , POUT[43] , GOUT[51] ); - BLOCK1 U352 (PIN[44] , PIN[48] , GIN[48] , GIN[52] , POUT[44] , GOUT[52] ); - BLOCK1 U353 (PIN[45] , PIN[49] , GIN[49] , GIN[53] , POUT[45] , GOUT[53] ); - BLOCK1 U354 (PIN[46] , PIN[50] , GIN[50] , GIN[54] , POUT[46] , GOUT[54] ); - BLOCK1 U355 (PIN[47] , PIN[51] , GIN[51] , GIN[55] , POUT[47] , GOUT[55] ); - BLOCK1 U356 (PIN[48] , PIN[52] , GIN[52] , GIN[56] , POUT[48] , GOUT[56] ); - BLOCK1 U357 (PIN[49] , PIN[53] , GIN[53] , GIN[57] , POUT[49] , GOUT[57] ); - BLOCK1 U358 (PIN[50] , PIN[54] , GIN[54] , GIN[58] , POUT[50] , GOUT[58] ); - BLOCK1 U359 (PIN[51] , PIN[55] , GIN[55] , GIN[59] , POUT[51] , GOUT[59] ); - BLOCK1 U360 (PIN[52] , PIN[56] , GIN[56] , GIN[60] , POUT[52] , GOUT[60] ); - BLOCK1 U361 (PIN[53] , PIN[57] , GIN[57] , GIN[61] , POUT[53] , GOUT[61] ); - BLOCK1 U362 (PIN[54] , PIN[58] , GIN[58] , GIN[62] , POUT[54] , GOUT[62] ); - BLOCK1 U363 (PIN[55] , PIN[59] , GIN[59] , GIN[63] , POUT[55] , GOUT[63] ); - BLOCK1 U364 (PIN[56] , PIN[60] , GIN[60] , GIN[64] , POUT[56] , GOUT[64] ); - -endmodule // DBLC_2_64 - - -module DBLC_3_64 ( PIN, GIN, POUT, GOUT ); - - input [56:0] PIN; - input [64:0] GIN; - - output [48:0] POUT; - output [64:0] GOUT; - - INVBLOCK U10 (GIN[0] , GOUT[0] ); - INVBLOCK U11 (GIN[1] , GOUT[1] ); - INVBLOCK U12 (GIN[2] , GOUT[2] ); - INVBLOCK U13 (GIN[3] , GOUT[3] ); - INVBLOCK U14 (GIN[4] , GOUT[4] ); - INVBLOCK U15 (GIN[5] , GOUT[5] ); - INVBLOCK U16 (GIN[6] , GOUT[6] ); - INVBLOCK U17 (GIN[7] , GOUT[7] ); - BLOCK2A U28 (PIN[0] , GIN[0] , GIN[8] , GOUT[8] ); - BLOCK2A U29 (PIN[1] , GIN[1] , GIN[9] , GOUT[9] ); - BLOCK2A U210 (PIN[2] , GIN[2] , GIN[10] , GOUT[10] ); - BLOCK2A U211 (PIN[3] , GIN[3] , GIN[11] , GOUT[11] ); - BLOCK2A U212 (PIN[4] , GIN[4] , GIN[12] , GOUT[12] ); - BLOCK2A U213 (PIN[5] , GIN[5] , GIN[13] , GOUT[13] ); - BLOCK2A U214 (PIN[6] , GIN[6] , GIN[14] , GOUT[14] ); - BLOCK2A U215 (PIN[7] , GIN[7] , GIN[15] , GOUT[15] ); - BLOCK2 U316 (PIN[0] , PIN[8] , GIN[8] , GIN[16] , POUT[0] , GOUT[16] ); - BLOCK2 U317 (PIN[1] , PIN[9] , GIN[9] , GIN[17] , POUT[1] , GOUT[17] ); - BLOCK2 U318 (PIN[2] , PIN[10] , GIN[10] , GIN[18] , POUT[2] , GOUT[18] ); - BLOCK2 U319 (PIN[3] , PIN[11] , GIN[11] , GIN[19] , POUT[3] , GOUT[19] ); - BLOCK2 U320 (PIN[4] , PIN[12] , GIN[12] , GIN[20] , POUT[4] , GOUT[20] ); - BLOCK2 U321 (PIN[5] , PIN[13] , GIN[13] , GIN[21] , POUT[5] , GOUT[21] ); - BLOCK2 U322 (PIN[6] , PIN[14] , GIN[14] , GIN[22] , POUT[6] , GOUT[22] ); - BLOCK2 U323 (PIN[7] , PIN[15] , GIN[15] , GIN[23] , POUT[7] , GOUT[23] ); - BLOCK2 U324 (PIN[8] , PIN[16] , GIN[16] , GIN[24] , POUT[8] , GOUT[24] ); - BLOCK2 U325 (PIN[9] , PIN[17] , GIN[17] , GIN[25] , POUT[9] , GOUT[25] ); - BLOCK2 U326 (PIN[10] , PIN[18] , GIN[18] , GIN[26] , POUT[10] , GOUT[26] ); - BLOCK2 U327 (PIN[11] , PIN[19] , GIN[19] , GIN[27] , POUT[11] , GOUT[27] ); - BLOCK2 U328 (PIN[12] , PIN[20] , GIN[20] , GIN[28] , POUT[12] , GOUT[28] ); - BLOCK2 U329 (PIN[13] , PIN[21] , GIN[21] , GIN[29] , POUT[13] , GOUT[29] ); - BLOCK2 U330 (PIN[14] , PIN[22] , GIN[22] , GIN[30] , POUT[14] , GOUT[30] ); - BLOCK2 U331 (PIN[15] , PIN[23] , GIN[23] , GIN[31] , POUT[15] , GOUT[31] ); - BLOCK2 U332 (PIN[16] , PIN[24] , GIN[24] , GIN[32] , POUT[16] , GOUT[32] ); - BLOCK2 U333 (PIN[17] , PIN[25] , GIN[25] , GIN[33] , POUT[17] , GOUT[33] ); - BLOCK2 U334 (PIN[18] , PIN[26] , GIN[26] , GIN[34] , POUT[18] , GOUT[34] ); - BLOCK2 U335 (PIN[19] , PIN[27] , GIN[27] , GIN[35] , POUT[19] , GOUT[35] ); - BLOCK2 U336 (PIN[20] , PIN[28] , GIN[28] , GIN[36] , POUT[20] , GOUT[36] ); - BLOCK2 U337 (PIN[21] , PIN[29] , GIN[29] , GIN[37] , POUT[21] , GOUT[37] ); - BLOCK2 U338 (PIN[22] , PIN[30] , GIN[30] , GIN[38] , POUT[22] , GOUT[38] ); - BLOCK2 U339 (PIN[23] , PIN[31] , GIN[31] , GIN[39] , POUT[23] , GOUT[39] ); - BLOCK2 U340 (PIN[24] , PIN[32] , GIN[32] , GIN[40] , POUT[24] , GOUT[40] ); - BLOCK2 U341 (PIN[25] , PIN[33] , GIN[33] , GIN[41] , POUT[25] , GOUT[41] ); - BLOCK2 U342 (PIN[26] , PIN[34] , GIN[34] , GIN[42] , POUT[26] , GOUT[42] ); - BLOCK2 U343 (PIN[27] , PIN[35] , GIN[35] , GIN[43] , POUT[27] , GOUT[43] ); - BLOCK2 U344 (PIN[28] , PIN[36] , GIN[36] , GIN[44] , POUT[28] , GOUT[44] ); - BLOCK2 U345 (PIN[29] , PIN[37] , GIN[37] , GIN[45] , POUT[29] , GOUT[45] ); - BLOCK2 U346 (PIN[30] , PIN[38] , GIN[38] , GIN[46] , POUT[30] , GOUT[46] ); - BLOCK2 U347 (PIN[31] , PIN[39] , GIN[39] , GIN[47] , POUT[31] , GOUT[47] ); - BLOCK2 U348 (PIN[32] , PIN[40] , GIN[40] , GIN[48] , POUT[32] , GOUT[48] ); - BLOCK2 U349 (PIN[33] , PIN[41] , GIN[41] , GIN[49] , POUT[33] , GOUT[49] ); - BLOCK2 U350 (PIN[34] , PIN[42] , GIN[42] , GIN[50] , POUT[34] , GOUT[50] ); - BLOCK2 U351 (PIN[35] , PIN[43] , GIN[43] , GIN[51] , POUT[35] , GOUT[51] ); - BLOCK2 U352 (PIN[36] , PIN[44] , GIN[44] , GIN[52] , POUT[36] , GOUT[52] ); - BLOCK2 U353 (PIN[37] , PIN[45] , GIN[45] , GIN[53] , POUT[37] , GOUT[53] ); - BLOCK2 U354 (PIN[38] , PIN[46] , GIN[46] , GIN[54] , POUT[38] , GOUT[54] ); - BLOCK2 U355 (PIN[39] , PIN[47] , GIN[47] , GIN[55] , POUT[39] , GOUT[55] ); - BLOCK2 U356 (PIN[40] , PIN[48] , GIN[48] , GIN[56] , POUT[40] , GOUT[56] ); - BLOCK2 U357 (PIN[41] , PIN[49] , GIN[49] , GIN[57] , POUT[41] , GOUT[57] ); - BLOCK2 U358 (PIN[42] , PIN[50] , GIN[50] , GIN[58] , POUT[42] , GOUT[58] ); - BLOCK2 U359 (PIN[43] , PIN[51] , GIN[51] , GIN[59] , POUT[43] , GOUT[59] ); - BLOCK2 U360 (PIN[44] , PIN[52] , GIN[52] , GIN[60] , POUT[44] , GOUT[60] ); - BLOCK2 U361 (PIN[45] , PIN[53] , GIN[53] , GIN[61] , POUT[45] , GOUT[61] ); - BLOCK2 U362 (PIN[46] , PIN[54] , GIN[54] , GIN[62] , POUT[46] , GOUT[62] ); - BLOCK2 U363 (PIN[47] , PIN[55] , GIN[55] , GIN[63] , POUT[47] , GOUT[63] ); - BLOCK2 U364 (PIN[48] , PIN[56] , GIN[56] , GIN[64] , POUT[48] , GOUT[64] ); - -endmodule // DBLC_3_64 - - -module DBLC_4_64 ( PIN, GIN, POUT, GOUT ); - - input [48:0] PIN; - input [64:0] GIN; - - output [32:0] POUT; - output [64:0] GOUT; - - INVBLOCK U10 (GIN[0] , GOUT[0] ); - INVBLOCK U11 (GIN[1] , GOUT[1] ); - INVBLOCK U12 (GIN[2] , GOUT[2] ); - INVBLOCK U13 (GIN[3] , GOUT[3] ); - INVBLOCK U14 (GIN[4] , GOUT[4] ); - INVBLOCK U15 (GIN[5] , GOUT[5] ); - INVBLOCK U16 (GIN[6] , GOUT[6] ); - INVBLOCK U17 (GIN[7] , GOUT[7] ); - INVBLOCK U18 (GIN[8] , GOUT[8] ); - INVBLOCK U19 (GIN[9] , GOUT[9] ); - INVBLOCK U110 (GIN[10] , GOUT[10] ); - INVBLOCK U111 (GIN[11] , GOUT[11] ); - INVBLOCK U112 (GIN[12] , GOUT[12] ); - INVBLOCK U113 (GIN[13] , GOUT[13] ); - INVBLOCK U114 (GIN[14] , GOUT[14] ); - INVBLOCK U115 (GIN[15] , GOUT[15] ); - BLOCK1A U216 (PIN[0] , GIN[0] , GIN[16] , GOUT[16] ); - BLOCK1A U217 (PIN[1] , GIN[1] , GIN[17] , GOUT[17] ); - BLOCK1A U218 (PIN[2] , GIN[2] , GIN[18] , GOUT[18] ); - BLOCK1A U219 (PIN[3] , GIN[3] , GIN[19] , GOUT[19] ); - BLOCK1A U220 (PIN[4] , GIN[4] , GIN[20] , GOUT[20] ); - BLOCK1A U221 (PIN[5] , GIN[5] , GIN[21] , GOUT[21] ); - BLOCK1A U222 (PIN[6] , GIN[6] , GIN[22] , GOUT[22] ); - BLOCK1A U223 (PIN[7] , GIN[7] , GIN[23] , GOUT[23] ); - BLOCK1A U224 (PIN[8] , GIN[8] , GIN[24] , GOUT[24] ); - BLOCK1A U225 (PIN[9] , GIN[9] , GIN[25] , GOUT[25] ); - BLOCK1A U226 (PIN[10] , GIN[10] , GIN[26] , GOUT[26] ); - BLOCK1A U227 (PIN[11] , GIN[11] , GIN[27] , GOUT[27] ); - BLOCK1A U228 (PIN[12] , GIN[12] , GIN[28] , GOUT[28] ); - BLOCK1A U229 (PIN[13] , GIN[13] , GIN[29] , GOUT[29] ); - BLOCK1A U230 (PIN[14] , GIN[14] , GIN[30] , GOUT[30] ); - BLOCK1A U231 (PIN[15] , GIN[15] , GIN[31] , GOUT[31] ); - BLOCK1 U332 (PIN[0] , PIN[16] , GIN[16] , GIN[32] , POUT[0] , GOUT[32] ); - BLOCK1 U333 (PIN[1] , PIN[17] , GIN[17] , GIN[33] , POUT[1] , GOUT[33] ); - BLOCK1 U334 (PIN[2] , PIN[18] , GIN[18] , GIN[34] , POUT[2] , GOUT[34] ); - BLOCK1 U335 (PIN[3] , PIN[19] , GIN[19] , GIN[35] , POUT[3] , GOUT[35] ); - BLOCK1 U336 (PIN[4] , PIN[20] , GIN[20] , GIN[36] , POUT[4] , GOUT[36] ); - BLOCK1 U337 (PIN[5] , PIN[21] , GIN[21] , GIN[37] , POUT[5] , GOUT[37] ); - BLOCK1 U338 (PIN[6] , PIN[22] , GIN[22] , GIN[38] , POUT[6] , GOUT[38] ); - BLOCK1 U339 (PIN[7] , PIN[23] , GIN[23] , GIN[39] , POUT[7] , GOUT[39] ); - BLOCK1 U340 (PIN[8] , PIN[24] , GIN[24] , GIN[40] , POUT[8] , GOUT[40] ); - BLOCK1 U341 (PIN[9] , PIN[25] , GIN[25] , GIN[41] , POUT[9] , GOUT[41] ); - BLOCK1 U342 (PIN[10] , PIN[26] , GIN[26] , GIN[42] , POUT[10] , GOUT[42] ); - BLOCK1 U343 (PIN[11] , PIN[27] , GIN[27] , GIN[43] , POUT[11] , GOUT[43] ); - BLOCK1 U344 (PIN[12] , PIN[28] , GIN[28] , GIN[44] , POUT[12] , GOUT[44] ); - BLOCK1 U345 (PIN[13] , PIN[29] , GIN[29] , GIN[45] , POUT[13] , GOUT[45] ); - BLOCK1 U346 (PIN[14] , PIN[30] , GIN[30] , GIN[46] , POUT[14] , GOUT[46] ); - BLOCK1 U347 (PIN[15] , PIN[31] , GIN[31] , GIN[47] , POUT[15] , GOUT[47] ); - BLOCK1 U348 (PIN[16] , PIN[32] , GIN[32] , GIN[48] , POUT[16] , GOUT[48] ); - BLOCK1 U349 (PIN[17] , PIN[33] , GIN[33] , GIN[49] , POUT[17] , GOUT[49] ); - BLOCK1 U350 (PIN[18] , PIN[34] , GIN[34] , GIN[50] , POUT[18] , GOUT[50] ); - BLOCK1 U351 (PIN[19] , PIN[35] , GIN[35] , GIN[51] , POUT[19] , GOUT[51] ); - BLOCK1 U352 (PIN[20] , PIN[36] , GIN[36] , GIN[52] , POUT[20] , GOUT[52] ); - BLOCK1 U353 (PIN[21] , PIN[37] , GIN[37] , GIN[53] , POUT[21] , GOUT[53] ); - BLOCK1 U354 (PIN[22] , PIN[38] , GIN[38] , GIN[54] , POUT[22] , GOUT[54] ); - BLOCK1 U355 (PIN[23] , PIN[39] , GIN[39] , GIN[55] , POUT[23] , GOUT[55] ); - BLOCK1 U356 (PIN[24] , PIN[40] , GIN[40] , GIN[56] , POUT[24] , GOUT[56] ); - BLOCK1 U357 (PIN[25] , PIN[41] , GIN[41] , GIN[57] , POUT[25] , GOUT[57] ); - BLOCK1 U358 (PIN[26] , PIN[42] , GIN[42] , GIN[58] , POUT[26] , GOUT[58] ); - BLOCK1 U359 (PIN[27] , PIN[43] , GIN[43] , GIN[59] , POUT[27] , GOUT[59] ); - BLOCK1 U360 (PIN[28] , PIN[44] , GIN[44] , GIN[60] , POUT[28] , GOUT[60] ); - BLOCK1 U361 (PIN[29] , PIN[45] , GIN[45] , GIN[61] , POUT[29] , GOUT[61] ); - BLOCK1 U362 (PIN[30] , PIN[46] , GIN[46] , GIN[62] , POUT[30] , GOUT[62] ); - BLOCK1 U363 (PIN[31] , PIN[47] , GIN[47] , GIN[63] , POUT[31] , GOUT[63] ); - BLOCK1 U364 (PIN[32] , PIN[48] , GIN[48] , GIN[64] , POUT[32] , GOUT[64] ); - -endmodule // DBLC_4_64 - - -module DBLC_5_64 ( PIN, GIN, POUT, GOUT ); - - input [32:0] PIN; - input [64:0] GIN; - - output [0:0] POUT; - output [64:0] GOUT; - - INVBLOCK U10 (GIN[0] , GOUT[0] ); - INVBLOCK U11 (GIN[1] , GOUT[1] ); - INVBLOCK U12 (GIN[2] , GOUT[2] ); - INVBLOCK U13 (GIN[3] , GOUT[3] ); - INVBLOCK U14 (GIN[4] , GOUT[4] ); - INVBLOCK U15 (GIN[5] , GOUT[5] ); - INVBLOCK U16 (GIN[6] , GOUT[6] ); - INVBLOCK U17 (GIN[7] , GOUT[7] ); - INVBLOCK U18 (GIN[8] , GOUT[8] ); - INVBLOCK U19 (GIN[9] , GOUT[9] ); - INVBLOCK U110 (GIN[10] , GOUT[10] ); - INVBLOCK U111 (GIN[11] , GOUT[11] ); - INVBLOCK U112 (GIN[12] , GOUT[12] ); - INVBLOCK U113 (GIN[13] , GOUT[13] ); - INVBLOCK U114 (GIN[14] , GOUT[14] ); - INVBLOCK U115 (GIN[15] , GOUT[15] ); - INVBLOCK U116 (GIN[16] , GOUT[16] ); - INVBLOCK U117 (GIN[17] , GOUT[17] ); - INVBLOCK U118 (GIN[18] , GOUT[18] ); - INVBLOCK U119 (GIN[19] , GOUT[19] ); - INVBLOCK U120 (GIN[20] , GOUT[20] ); - INVBLOCK U121 (GIN[21] , GOUT[21] ); - INVBLOCK U122 (GIN[22] , GOUT[22] ); - INVBLOCK U123 (GIN[23] , GOUT[23] ); - INVBLOCK U124 (GIN[24] , GOUT[24] ); - INVBLOCK U125 (GIN[25] , GOUT[25] ); - INVBLOCK U126 (GIN[26] , GOUT[26] ); - INVBLOCK U127 (GIN[27] , GOUT[27] ); - INVBLOCK U128 (GIN[28] , GOUT[28] ); - INVBLOCK U129 (GIN[29] , GOUT[29] ); - INVBLOCK U130 (GIN[30] , GOUT[30] ); - INVBLOCK U131 (GIN[31] , GOUT[31] ); - BLOCK2A U232 (PIN[0] , GIN[0] , GIN[32] , GOUT[32] ); - BLOCK2A U233 (PIN[1] , GIN[1] , GIN[33] , GOUT[33] ); - BLOCK2A U234 (PIN[2] , GIN[2] , GIN[34] , GOUT[34] ); - BLOCK2A U235 (PIN[3] , GIN[3] , GIN[35] , GOUT[35] ); - BLOCK2A U236 (PIN[4] , GIN[4] , GIN[36] , GOUT[36] ); - BLOCK2A U237 (PIN[5] , GIN[5] , GIN[37] , GOUT[37] ); - BLOCK2A U238 (PIN[6] , GIN[6] , GIN[38] , GOUT[38] ); - BLOCK2A U239 (PIN[7] , GIN[7] , GIN[39] , GOUT[39] ); - BLOCK2A U240 (PIN[8] , GIN[8] , GIN[40] , GOUT[40] ); - BLOCK2A U241 (PIN[9] , GIN[9] , GIN[41] , GOUT[41] ); - BLOCK2A U242 (PIN[10] , GIN[10] , GIN[42] , GOUT[42] ); - BLOCK2A U243 (PIN[11] , GIN[11] , GIN[43] , GOUT[43] ); - BLOCK2A U244 (PIN[12] , GIN[12] , GIN[44] , GOUT[44] ); - BLOCK2A U245 (PIN[13] , GIN[13] , GIN[45] , GOUT[45] ); - BLOCK2A U246 (PIN[14] , GIN[14] , GIN[46] , GOUT[46] ); - BLOCK2A U247 (PIN[15] , GIN[15] , GIN[47] , GOUT[47] ); - BLOCK2A U248 (PIN[16] , GIN[16] , GIN[48] , GOUT[48] ); - BLOCK2A U249 (PIN[17] , GIN[17] , GIN[49] , GOUT[49] ); - BLOCK2A U250 (PIN[18] , GIN[18] , GIN[50] , GOUT[50] ); - BLOCK2A U251 (PIN[19] , GIN[19] , GIN[51] , GOUT[51] ); - BLOCK2A U252 (PIN[20] , GIN[20] , GIN[52] , GOUT[52] ); - BLOCK2A U253 (PIN[21] , GIN[21] , GIN[53] , GOUT[53] ); - BLOCK2A U254 (PIN[22] , GIN[22] , GIN[54] , GOUT[54] ); - BLOCK2A U255 (PIN[23] , GIN[23] , GIN[55] , GOUT[55] ); - BLOCK2A U256 (PIN[24] , GIN[24] , GIN[56] , GOUT[56] ); - BLOCK2A U257 (PIN[25] , GIN[25] , GIN[57] , GOUT[57] ); - BLOCK2A U258 (PIN[26] , GIN[26] , GIN[58] , GOUT[58] ); - BLOCK2A U259 (PIN[27] , GIN[27] , GIN[59] , GOUT[59] ); - BLOCK2A U260 (PIN[28] , GIN[28] , GIN[60] , GOUT[60] ); - BLOCK2A U261 (PIN[29] , GIN[29] , GIN[61] , GOUT[61] ); - BLOCK2A U262 (PIN[30] , GIN[30] , GIN[62] , GOUT[62] ); - BLOCK2A U263 (PIN[31] , GIN[31] , GIN[63] , GOUT[63] ); - BLOCK2 U364 (PIN[0] , PIN[32] , GIN[32] , GIN[64] , POUT[0] , GOUT[64] ); - -endmodule // DBLC_5_64 - - -module XORSTAGE_64 ( A, B, PBIT, CARRY, SUM, COUT ); - - input [63:0] A; - input [63:0] B; - input PBIT; - input [64:0] CARRY; - - output [63:0] SUM; - output COUT; - - XXOR1 U20 (A[0] , B[0] , CARRY[0] , SUM[0] ); - XXOR1 U21 (A[1] , B[1] , CARRY[1] , SUM[1] ); - XXOR1 U22 (A[2] , B[2] , CARRY[2] , SUM[2] ); - XXOR1 U23 (A[3] , B[3] , CARRY[3] , SUM[3] ); - XXOR1 U24 (A[4] , B[4] , CARRY[4] , SUM[4] ); - XXOR1 U25 (A[5] , B[5] , CARRY[5] , SUM[5] ); - XXOR1 U26 (A[6] , B[6] , CARRY[6] , SUM[6] ); - XXOR1 U27 (A[7] , B[7] , CARRY[7] , SUM[7] ); - XXOR1 U28 (A[8] , B[8] , CARRY[8] , SUM[8] ); - XXOR1 U29 (A[9] , B[9] , CARRY[9] , SUM[9] ); - XXOR1 U210 (A[10] , B[10] , CARRY[10] , SUM[10] ); - XXOR1 U211 (A[11] , B[11] , CARRY[11] , SUM[11] ); - XXOR1 U212 (A[12] , B[12] , CARRY[12] , SUM[12] ); - XXOR1 U213 (A[13] , B[13] , CARRY[13] , SUM[13] ); - XXOR1 U214 (A[14] , B[14] , CARRY[14] , SUM[14] ); - XXOR1 U215 (A[15] , B[15] , CARRY[15] , SUM[15] ); - XXOR1 U216 (A[16] , B[16] , CARRY[16] , SUM[16] ); - XXOR1 U217 (A[17] , B[17] , CARRY[17] , SUM[17] ); - XXOR1 U218 (A[18] , B[18] , CARRY[18] , SUM[18] ); - XXOR1 U219 (A[19] , B[19] , CARRY[19] , SUM[19] ); - XXOR1 U220 (A[20] , B[20] , CARRY[20] , SUM[20] ); - XXOR1 U221 (A[21] , B[21] , CARRY[21] , SUM[21] ); - XXOR1 U222 (A[22] , B[22] , CARRY[22] , SUM[22] ); - XXOR1 U223 (A[23] , B[23] , CARRY[23] , SUM[23] ); - XXOR1 U224 (A[24] , B[24] , CARRY[24] , SUM[24] ); - XXOR1 U225 (A[25] , B[25] , CARRY[25] , SUM[25] ); - XXOR1 U226 (A[26] , B[26] , CARRY[26] , SUM[26] ); - XXOR1 U227 (A[27] , B[27] , CARRY[27] , SUM[27] ); - XXOR1 U228 (A[28] , B[28] , CARRY[28] , SUM[28] ); - XXOR1 U229 (A[29] , B[29] , CARRY[29] , SUM[29] ); - XXOR1 U230 (A[30] , B[30] , CARRY[30] , SUM[30] ); - XXOR1 U231 (A[31] , B[31] , CARRY[31] , SUM[31] ); - XXOR1 U232 (A[32] , B[32] , CARRY[32] , SUM[32] ); - XXOR1 U233 (A[33] , B[33] , CARRY[33] , SUM[33] ); - XXOR1 U234 (A[34] , B[34] , CARRY[34] , SUM[34] ); - XXOR1 U235 (A[35] , B[35] , CARRY[35] , SUM[35] ); - XXOR1 U236 (A[36] , B[36] , CARRY[36] , SUM[36] ); - XXOR1 U237 (A[37] , B[37] , CARRY[37] , SUM[37] ); - XXOR1 U238 (A[38] , B[38] , CARRY[38] , SUM[38] ); - XXOR1 U239 (A[39] , B[39] , CARRY[39] , SUM[39] ); - XXOR1 U240 (A[40] , B[40] , CARRY[40] , SUM[40] ); - XXOR1 U241 (A[41] , B[41] , CARRY[41] , SUM[41] ); - XXOR1 U242 (A[42] , B[42] , CARRY[42] , SUM[42] ); - XXOR1 U243 (A[43] , B[43] , CARRY[43] , SUM[43] ); - XXOR1 U244 (A[44] , B[44] , CARRY[44] , SUM[44] ); - XXOR1 U245 (A[45] , B[45] , CARRY[45] , SUM[45] ); - XXOR1 U246 (A[46] , B[46] , CARRY[46] , SUM[46] ); - XXOR1 U247 (A[47] , B[47] , CARRY[47] , SUM[47] ); - XXOR1 U248 (A[48] , B[48] , CARRY[48] , SUM[48] ); - XXOR1 U249 (A[49] , B[49] , CARRY[49] , SUM[49] ); - XXOR1 U250 (A[50] , B[50] , CARRY[50] , SUM[50] ); - XXOR1 U251 (A[51] , B[51] , CARRY[51] , SUM[51] ); - XXOR1 U252 (A[52] , B[52] , CARRY[52] , SUM[52] ); - XXOR1 U253 (A[53] , B[53] , CARRY[53] , SUM[53] ); - XXOR1 U254 (A[54] , B[54] , CARRY[54] , SUM[54] ); - XXOR1 U255 (A[55] , B[55] , CARRY[55] , SUM[55] ); - XXOR1 U256 (A[56] , B[56] , CARRY[56] , SUM[56] ); - XXOR1 U257 (A[57] , B[57] , CARRY[57] , SUM[57] ); - XXOR1 U258 (A[58] , B[58] , CARRY[58] , SUM[58] ); - XXOR1 U259 (A[59] , B[59] , CARRY[59] , SUM[59] ); - XXOR1 U260 (A[60] , B[60] , CARRY[60] , SUM[60] ); - XXOR1 U261 (A[61] , B[61] , CARRY[61] , SUM[61] ); - XXOR1 U262 (A[62] , B[62] , CARRY[62] , SUM[62] ); - XXOR1 U263 (A[63] , B[63] , CARRY[63] , SUM[63] ); - BLOCK1A U1 (PBIT , CARRY[0] , CARRY[64] , COUT ); - -endmodule // XORSTAGE_64 - - -module DBLCTREE_64 ( PIN, GIN, GOUT, POUT ); - - input [63:0] PIN; - input [64:0] GIN; - - output [64:0] GOUT; - output [0:0] POUT; - - wire [62:0] INTPROP_0; - wire [64:0] INTGEN_0; - wire [60:0] INTPROP_1; - wire [64:0] INTGEN_1; - wire [56:0] INTPROP_2; - wire [64:0] INTGEN_2; - wire [48:0] INTPROP_3; - wire [64:0] INTGEN_3; - wire [32:0] INTPROP_4; - wire [64:0] INTGEN_4; - - DBLC_0_64 U_0 (.PIN(PIN) , .GIN(GIN) , .POUT(INTPROP_0) , .GOUT(INTGEN_0) ); - DBLC_1_64 U_1 (.PIN(INTPROP_0) , .GIN(INTGEN_0) , .POUT(INTPROP_1) , .GOUT(INTGEN_1) ); - DBLC_2_64 U_2 (.PIN(INTPROP_1) , .GIN(INTGEN_1) , .POUT(INTPROP_2) , .GOUT(INTGEN_2) ); - DBLC_3_64 U_3 (.PIN(INTPROP_2) , .GIN(INTGEN_2) , .POUT(INTPROP_3) , .GOUT(INTGEN_3) ); - DBLC_4_64 U_4 (.PIN(INTPROP_3) , .GIN(INTGEN_3) , .POUT(INTPROP_4) , .GOUT(INTGEN_4) ); - DBLC_5_64 U_5 (.PIN(INTPROP_4) , .GIN(INTGEN_4) , .POUT(POUT) , .GOUT(GOUT) ); - -endmodule // DBLCTREE_64 - - -module DBLCADDER_64_64 ( OPA, OPB, CIN, SUM, COUT ); - - input [63:0] OPA; - input [63:0] OPB; - input CIN; - - output [63:0] SUM; - output COUT; - - wire [63:0] INTPROP; - wire [64:0] INTGEN; - wire [0:0] PBIT; - wire [64:0] CARRY; - - PRESTAGE_64 U1 (OPA , OPB , CIN , INTPROP , INTGEN ); - DBLCTREE_64 U2 (INTPROP , INTGEN , CARRY , PBIT ); - XORSTAGE_64 U3 (OPA[63:0] , OPB[63:0] , PBIT[0] , CARRY[64:0] , SUM , COUT ); - -endmodule diff --git a/pipelined/src/fpu/cla12.sv b/pipelined/src/fpu/cla12.sv deleted file mode 100644 index b098228c8..000000000 --- a/pipelined/src/fpu/cla12.sv +++ /dev/null @@ -1,332 +0,0 @@ -// This module implements a 12-bit carry lookahead adder. It is used -// for rounding in the floating point adder. - -module cla12 (S, CO, X, Y); - - input [11:0] X; - input [11:0] Y; - - output [11:0] S; - output CO; - - wire [63:0] A,B,Q;//***KEP was 0:63 - changed due to lint warning - wire LOGIC0; - wire CIN; - wire CO_64; - - assign LOGIC0 = 0; - assign CIN = 0; - - DBLCADDER_64_64 U1 (A , B , CIN, Q , CO_64); - - assign A[0] = X[0]; - assign B[0] = Y[0]; - assign A[1] = X[1]; - assign B[1] = Y[1]; - assign A[2] = X[2]; - assign B[2] = Y[2]; - assign A[3] = X[3]; - assign B[3] = Y[3]; - assign A[4] = X[4]; - assign B[4] = Y[4]; - assign A[5] = X[5]; - assign B[5] = Y[5]; - assign A[6] = X[6]; - assign B[6] = Y[6]; - assign A[7] = X[7]; - assign B[7] = Y[7]; - assign A[8] = X[8]; - assign B[8] = Y[8]; - assign A[9] = X[9]; - assign B[9] = Y[9]; - assign A[10] = X[10]; - assign B[10] = Y[10]; - assign A[11] = X[11]; - assign B[11] = Y[11]; - assign A[12] = LOGIC0; - assign B[12] = LOGIC0; - assign A[13] = LOGIC0; - assign B[13] = LOGIC0; - assign A[14] = LOGIC0; - assign B[14] = LOGIC0; - assign A[15] = LOGIC0; - assign B[15] = LOGIC0; - assign A[16] = LOGIC0; - assign B[16] = LOGIC0; - assign A[17] = LOGIC0; - assign B[17] = LOGIC0; - assign A[18] = LOGIC0; - assign B[18] = LOGIC0; - assign A[19] = LOGIC0; - assign B[19] = LOGIC0; - assign A[20] = LOGIC0; - assign B[20] = LOGIC0; - assign A[21] = LOGIC0; - assign B[21] = LOGIC0; - assign A[22] = LOGIC0; - assign B[22] = LOGIC0; - assign A[23] = LOGIC0; - assign B[23] = LOGIC0; - assign A[24] = LOGIC0; - assign B[24] = LOGIC0; - assign A[25] = LOGIC0; - assign B[25] = LOGIC0; - assign A[26] = LOGIC0; - assign B[26] = LOGIC0; - assign A[27] = LOGIC0; - assign B[27] = LOGIC0; - assign A[28] = LOGIC0; - assign B[28] = LOGIC0; - assign A[29] = LOGIC0; - assign B[29] = LOGIC0; - assign A[30] = LOGIC0; - assign B[30] = LOGIC0; - assign A[31] = LOGIC0; - assign B[31] = LOGIC0; - assign A[32] = LOGIC0; - assign B[32] = LOGIC0; - assign A[33] = LOGIC0; - assign B[33] = LOGIC0; - assign A[34] = LOGIC0; - assign B[34] = LOGIC0; - assign A[35] = LOGIC0; - assign B[35] = LOGIC0; - assign A[36] = LOGIC0; - assign B[36] = LOGIC0; - assign A[37] = LOGIC0; - assign B[37] = LOGIC0; - assign A[38] = LOGIC0; - assign B[38] = LOGIC0; - assign A[39] = LOGIC0; - assign B[39] = LOGIC0; - assign A[40] = LOGIC0; - assign B[40] = LOGIC0; - assign A[41] = LOGIC0; - assign B[41] = LOGIC0; - assign A[42] = LOGIC0; - assign B[42] = LOGIC0; - assign A[43] = LOGIC0; - assign B[43] = LOGIC0; - assign A[44] = LOGIC0; - assign B[44] = LOGIC0; - assign A[45] = LOGIC0; - assign B[45] = LOGIC0; - assign A[46] = LOGIC0; - assign B[46] = LOGIC0; - assign A[47] = LOGIC0; - assign B[47] = LOGIC0; - assign A[48] = LOGIC0; - assign B[48] = LOGIC0; - assign A[49] = LOGIC0; - assign B[49] = LOGIC0; - assign A[50] = LOGIC0; - assign B[50] = LOGIC0; - assign A[51] = LOGIC0; - assign B[51] = LOGIC0; - assign A[52] = LOGIC0; - assign B[52] = LOGIC0; - assign A[53] = LOGIC0; - assign B[53] = LOGIC0; - assign A[54] = LOGIC0; - assign B[54] = LOGIC0; - assign A[55] = LOGIC0; - assign B[55] = LOGIC0; - assign A[56] = LOGIC0; - assign B[56] = LOGIC0; - assign A[57] = LOGIC0; - assign B[57] = LOGIC0; - assign A[58] = LOGIC0; - assign B[58] = LOGIC0; - assign A[59] = LOGIC0; - assign B[59] = LOGIC0; - assign A[60] = LOGIC0; - assign B[60] = LOGIC0; - assign A[61] = LOGIC0; - assign B[61] = LOGIC0; - assign A[62] = LOGIC0; - assign B[62] = LOGIC0; - assign A[63] = LOGIC0; - assign B[63] = LOGIC0; - - assign S[0] = Q[0]; - assign S[1] = Q[1]; - assign S[2] = Q[2]; - assign S[3] = Q[3]; - assign S[4] = Q[4]; - assign S[5] = Q[5]; - assign S[6] = Q[6]; - assign S[7] = Q[7]; - assign S[8] = Q[8]; - assign S[9] = Q[9]; - assign S[10] = Q[10]; - assign S[11] = Q[11]; - assign CO = Q[12]; - -endmodule //cla52 - -// This module implements a 12-bit carry lookahead subtractor. It is used -// for rounding in the floating point adder. - -module cla_sub12 (S, X, Y); - - input [11:0] X; - input [11:0] Y; - - output [11:0] S; - - wire [63:0] A,B,Q,Bbar;//***KEP was 0:63 - changed due to lint warning - wire CO; - wire LOGIC0; - wire VDD; - logic CO_12; - - assign Bbar = ~B; - assign LOGIC0 = 0; - assign VDD = 1; - - DBLCADDER_64_64 U1 (A , Bbar , VDD, Q , CO); - - assign A[0] = X[0]; - assign B[0] = Y[0]; - assign A[1] = X[1]; - assign B[1] = Y[1]; - assign A[2] = X[2]; - assign B[2] = Y[2]; - assign A[3] = X[3]; - assign B[3] = Y[3]; - assign A[4] = X[4]; - assign B[4] = Y[4]; - assign A[5] = X[5]; - assign B[5] = Y[5]; - assign A[6] = X[6]; - assign B[6] = Y[6]; - assign A[7] = X[7]; - assign B[7] = Y[7]; - assign A[8] = X[8]; - assign B[8] = Y[8]; - assign A[9] = X[9]; - assign B[9] = Y[9]; - assign A[10] = X[10]; - assign B[10] = Y[10]; - assign A[11] = X[11]; - assign B[11] = Y[11]; - assign A[12] = LOGIC0; - assign B[12] = LOGIC0; - assign A[13] = LOGIC0; - assign B[13] = LOGIC0; - assign A[14] = LOGIC0; - assign B[14] = LOGIC0; - assign A[15] = LOGIC0; - assign B[15] = LOGIC0; - assign A[16] = LOGIC0; - assign B[16] = LOGIC0; - assign A[17] = LOGIC0; - assign B[17] = LOGIC0; - assign A[18] = LOGIC0; - assign B[18] = LOGIC0; - assign A[19] = LOGIC0; - assign B[19] = LOGIC0; - assign A[20] = LOGIC0; - assign B[20] = LOGIC0; - assign A[21] = LOGIC0; - assign B[21] = LOGIC0; - assign A[22] = LOGIC0; - assign B[22] = LOGIC0; - assign A[23] = LOGIC0; - assign B[23] = LOGIC0; - assign A[24] = LOGIC0; - assign B[24] = LOGIC0; - assign A[25] = LOGIC0; - assign B[25] = LOGIC0; - assign A[26] = LOGIC0; - assign B[26] = LOGIC0; - assign A[27] = LOGIC0; - assign B[27] = LOGIC0; - assign A[28] = LOGIC0; - assign B[28] = LOGIC0; - assign A[29] = LOGIC0; - assign B[29] = LOGIC0; - assign A[30] = LOGIC0; - assign B[30] = LOGIC0; - assign A[31] = LOGIC0; - assign B[31] = LOGIC0; - assign A[32] = LOGIC0; - assign B[32] = LOGIC0; - assign A[33] = LOGIC0; - assign B[33] = LOGIC0; - assign A[34] = LOGIC0; - assign B[34] = LOGIC0; - assign A[35] = LOGIC0; - assign B[35] = LOGIC0; - assign A[36] = LOGIC0; - assign B[36] = LOGIC0; - assign A[37] = LOGIC0; - assign B[37] = LOGIC0; - assign A[38] = LOGIC0; - assign B[38] = LOGIC0; - assign A[39] = LOGIC0; - assign B[39] = LOGIC0; - assign A[40] = LOGIC0; - assign B[40] = LOGIC0; - assign A[41] = LOGIC0; - assign B[41] = LOGIC0; - assign A[42] = LOGIC0; - assign B[42] = LOGIC0; - assign A[43] = LOGIC0; - assign B[43] = LOGIC0; - assign A[44] = LOGIC0; - assign B[44] = LOGIC0; - assign A[45] = LOGIC0; - assign B[45] = LOGIC0; - assign A[46] = LOGIC0; - assign B[46] = LOGIC0; - assign A[47] = LOGIC0; - assign B[47] = LOGIC0; - assign A[48] = LOGIC0; - assign B[48] = LOGIC0; - assign A[49] = LOGIC0; - assign B[49] = LOGIC0; - assign A[50] = LOGIC0; - assign B[50] = LOGIC0; - assign A[51] = LOGIC0; - assign B[51] = LOGIC0; - assign A[52] = LOGIC0; - assign B[52] = LOGIC0; - assign A[53] = LOGIC0; - assign B[53] = LOGIC0; - assign A[54] = LOGIC0; - assign B[54] = LOGIC0; - assign A[55] = LOGIC0; - assign B[55] = LOGIC0; - assign A[56] = LOGIC0; - assign B[56] = LOGIC0; - assign A[57] = LOGIC0; - assign B[57] = LOGIC0; - assign A[58] = LOGIC0; - assign B[58] = LOGIC0; - assign A[59] = LOGIC0; - assign B[59] = LOGIC0; - assign A[60] = LOGIC0; - assign B[60] = LOGIC0; - assign A[61] = LOGIC0; - assign B[61] = LOGIC0; - assign A[62] = LOGIC0; - assign B[62] = LOGIC0; - assign A[63] = LOGIC0; - assign B[63] = LOGIC0; - - assign S[0] = Q[0]; - assign S[1] = Q[1]; - assign S[2] = Q[2]; - assign S[3] = Q[3]; - assign S[4] = Q[4]; - assign S[5] = Q[5]; - assign S[6] = Q[6]; - assign S[7] = Q[7]; - assign S[8] = Q[8]; - assign S[9] = Q[9]; - assign S[10] = Q[10]; - assign S[11] = Q[11]; - assign CO_12 = Q[12]; - -endmodule //cla_sub52 diff --git a/pipelined/src/fpu/cla52.sv b/pipelined/src/fpu/cla52.sv deleted file mode 100644 index 5f818fbd8..000000000 --- a/pipelined/src/fpu/cla52.sv +++ /dev/null @@ -1,409 +0,0 @@ -// This module implements a 52-bit carry lookahead adder. It is used -// for rounding in the floating point adder. - -module cla52 (S, CO, X, Y); - - input [51:0] X; - input [51:0] Y; - - output [51:0] S; - output CO; - - wire [63:0] A,B,Q;//***KEP was 0:63 - changed due to lint warning - wire LOGIC0; - wire CIN; - wire CO_64; - - assign LOGIC0 = 0; - assign CIN = 0; - DBLCADDER_64_64 U1 (A , B , CIN, Q , CO_64); - assign A[0] = X[0]; - assign B[0] = Y[0]; - assign A[1] = X[1]; - assign B[1] = Y[1]; - assign A[2] = X[2]; - assign B[2] = Y[2]; - assign A[3] = X[3]; - assign B[3] = Y[3]; - assign A[4] = X[4]; - assign B[4] = Y[4]; - assign A[5] = X[5]; - assign B[5] = Y[5]; - assign A[6] = X[6]; - assign B[6] = Y[6]; - assign A[7] = X[7]; - assign B[7] = Y[7]; - assign A[8] = X[8]; - assign B[8] = Y[8]; - assign A[9] = X[9]; - assign B[9] = Y[9]; - assign A[10] = X[10]; - assign B[10] = Y[10]; - assign A[11] = X[11]; - assign B[11] = Y[11]; - assign A[12] = X[12]; - assign B[12] = Y[12]; - assign A[13] = X[13]; - assign B[13] = Y[13]; - assign A[14] = X[14]; - assign B[14] = Y[14]; - assign A[15] = X[15]; - assign B[15] = Y[15]; - assign A[16] = X[16]; - assign B[16] = Y[16]; - assign A[17] = X[17]; - assign B[17] = Y[17]; - assign A[18] = X[18]; - assign B[18] = Y[18]; - assign A[19] = X[19]; - assign B[19] = Y[19]; - assign A[20] = X[20]; - assign B[20] = Y[20]; - assign A[21] = X[21]; - assign B[21] = Y[21]; - assign A[22] = X[22]; - assign B[22] = Y[22]; - assign A[23] = X[23]; - assign B[23] = Y[23]; - assign A[24] = X[24]; - assign B[24] = Y[24]; - assign A[25] = X[25]; - assign B[25] = Y[25]; - assign A[26] = X[26]; - assign B[26] = Y[26]; - assign A[27] = X[27]; - assign B[27] = Y[27]; - assign A[28] = X[28]; - assign B[28] = Y[28]; - assign A[29] = X[29]; - assign B[29] = Y[29]; - assign A[30] = X[30]; - assign B[30] = Y[30]; - assign A[31] = X[31]; - assign B[31] = Y[31]; - assign A[32] = X[32]; - assign B[32] = Y[32]; - assign A[33] = X[33]; - assign B[33] = Y[33]; - assign A[34] = X[34]; - assign B[34] = Y[34]; - assign A[35] = X[35]; - assign B[35] = Y[35]; - assign A[36] = X[36]; - assign B[36] = Y[36]; - assign A[37] = X[37]; - assign B[37] = Y[37]; - assign A[38] = X[38]; - assign B[38] = Y[38]; - assign A[39] = X[39]; - assign B[39] = Y[39]; - assign A[40] = X[40]; - assign B[40] = Y[40]; - assign A[41] = X[41]; - assign B[41] = Y[41]; - assign A[42] = X[42]; - assign B[42] = Y[42]; - assign A[43] = X[43]; - assign B[43] = Y[43]; - assign A[44] = X[44]; - assign B[44] = Y[44]; - assign A[45] = X[45]; - assign B[45] = Y[45]; - assign A[46] = X[46]; - assign B[46] = Y[46]; - assign A[47] = X[47]; - assign B[47] = Y[47]; - assign A[48] = X[48]; - assign B[48] = Y[48]; - assign A[49] = X[49]; - assign B[49] = Y[49]; - assign A[50] = X[50]; - assign B[50] = Y[50]; - assign A[51] = X[51]; - assign B[51] = Y[51]; - assign A[52] = LOGIC0; - assign B[52] = LOGIC0; - assign A[53] = LOGIC0; - assign B[53] = LOGIC0; - assign A[54] = LOGIC0; - assign B[54] = LOGIC0; - assign A[55] = LOGIC0; - assign B[55] = LOGIC0; - assign A[56] = LOGIC0; - assign B[56] = LOGIC0; - assign A[57] = LOGIC0; - assign B[57] = LOGIC0; - assign A[58] = LOGIC0; - assign B[58] = LOGIC0; - assign A[59] = LOGIC0; - assign B[59] = LOGIC0; - assign A[60] = LOGIC0; - assign B[60] = LOGIC0; - assign A[61] = LOGIC0; - assign B[61] = LOGIC0; - assign A[62] = LOGIC0; - assign B[62] = LOGIC0; - assign A[63] = LOGIC0; - assign B[63] = LOGIC0; - assign S[0] = Q[0]; - assign S[1] = Q[1]; - assign S[2] = Q[2]; - assign S[3] = Q[3]; - assign S[4] = Q[4]; - assign S[5] = Q[5]; - assign S[6] = Q[6]; - assign S[7] = Q[7]; - assign S[8] = Q[8]; - assign S[9] = Q[9]; - assign S[10] = Q[10]; - assign S[11] = Q[11]; - assign S[12] = Q[12]; - assign S[13] = Q[13]; - assign S[14] = Q[14]; - assign S[15] = Q[15]; - assign S[16] = Q[16]; - assign S[17] = Q[17]; - assign S[18] = Q[18]; - assign S[19] = Q[19]; - assign S[20] = Q[20]; - assign S[21] = Q[21]; - assign S[22] = Q[22]; - assign S[23] = Q[23]; - assign S[24] = Q[24]; - assign S[25] = Q[25]; - assign S[26] = Q[26]; - assign S[27] = Q[27]; - assign S[28] = Q[28]; - assign S[29] = Q[29]; - assign S[30] = Q[30]; - assign S[31] = Q[31]; - assign S[32] = Q[32]; - assign S[33] = Q[33]; - assign S[34] = Q[34]; - assign S[35] = Q[35]; - assign S[36] = Q[36]; - assign S[37] = Q[37]; - assign S[38] = Q[38]; - assign S[39] = Q[39]; - assign S[40] = Q[40]; - assign S[41] = Q[41]; - assign S[42] = Q[42]; - assign S[43] = Q[43]; - assign S[44] = Q[44]; - assign S[45] = Q[45]; - assign S[46] = Q[46]; - assign S[47] = Q[47]; - assign S[48] = Q[48]; - assign S[49] = Q[49]; - assign S[50] = Q[50]; - assign S[51] = Q[51]; - assign CO = Q[52]; - -endmodule //cla52 - -// This module implements a 52-bit carry lookahead subtractor. It is used -// for rounding in the floating point adder. - -module cla_sub52 (S, X, Y); - - input [51:0] X; - input [51:0] Y; - - output [51:0] S; - - wire [63:0] A,B,Q,Bbar;//***KEP was 0:63 - changed due to lint warning - wire LOGIC0; - wire CIN; - wire CO_52; - wire CO_64; - - assign Bbar = ~B; - assign LOGIC0 = 0; - assign CIN = 0; - - DBLCADDER_64_64 U1 (A , Bbar , CIN, Q , CO_64); - - assign A[0] = X[0]; - assign B[0] = Y[0]; - assign A[1] = X[1]; - assign B[1] = Y[1]; - assign A[2] = X[2]; - assign B[2] = Y[2]; - assign A[3] = X[3]; - assign B[3] = Y[3]; - assign A[4] = X[4]; - assign B[4] = Y[4]; - assign A[5] = X[5]; - assign B[5] = Y[5]; - assign A[6] = X[6]; - assign B[6] = Y[6]; - assign A[7] = X[7]; - assign B[7] = Y[7]; - assign A[8] = X[8]; - assign B[8] = Y[8]; - assign A[9] = X[9]; - assign B[9] = Y[9]; - assign A[10] = X[10]; - assign B[10] = Y[10]; - assign A[11] = X[11]; - assign B[11] = Y[11]; - assign A[12] = X[12]; - assign B[12] = Y[12]; - assign A[13] = X[13]; - assign B[13] = Y[13]; - assign A[14] = X[14]; - assign B[14] = Y[14]; - assign A[15] = X[15]; - assign B[15] = Y[15]; - assign A[16] = X[16]; - assign B[16] = Y[16]; - assign A[17] = X[17]; - assign B[17] = Y[17]; - assign A[18] = X[18]; - assign B[18] = Y[18]; - assign A[19] = X[19]; - assign B[19] = Y[19]; - assign A[20] = X[20]; - assign B[20] = Y[20]; - assign A[21] = X[21]; - assign B[21] = Y[21]; - assign A[22] = X[22]; - assign B[22] = Y[22]; - assign A[23] = X[23]; - assign B[23] = Y[23]; - assign A[24] = X[24]; - assign B[24] = Y[24]; - assign A[25] = X[25]; - assign B[25] = Y[25]; - assign A[26] = X[26]; - assign B[26] = Y[26]; - assign A[27] = X[27]; - assign B[27] = Y[27]; - assign A[28] = X[28]; - assign B[28] = Y[28]; - assign A[29] = X[29]; - assign B[29] = Y[29]; - assign A[30] = X[30]; - assign B[30] = Y[30]; - assign A[31] = X[31]; - assign B[31] = Y[31]; - assign A[32] = X[32]; - assign B[32] = Y[32]; - assign A[33] = X[33]; - assign B[33] = Y[33]; - assign A[34] = X[34]; - assign B[34] = Y[34]; - assign A[35] = X[35]; - assign B[35] = Y[35]; - assign A[36] = X[36]; - assign B[36] = Y[36]; - assign A[37] = X[37]; - assign B[37] = Y[37]; - assign A[38] = X[38]; - assign B[38] = Y[38]; - assign A[39] = X[39]; - assign B[39] = Y[39]; - assign A[40] = X[40]; - assign B[40] = Y[40]; - assign A[41] = X[41]; - assign B[41] = Y[41]; - assign A[42] = X[42]; - assign B[42] = Y[42]; - assign A[43] = X[43]; - assign B[43] = Y[43]; - assign A[44] = X[44]; - assign B[44] = Y[44]; - assign A[45] = X[45]; - assign B[45] = Y[45]; - assign A[46] = X[46]; - assign B[46] = Y[46]; - assign A[47] = X[47]; - assign B[47] = Y[47]; - assign A[48] = X[48]; - assign B[48] = Y[48]; - assign A[49] = X[49]; - assign B[49] = Y[49]; - assign A[50] = X[50]; - assign B[50] = Y[50]; - assign A[51] = X[51]; - assign B[51] = Y[51]; - assign A[52] = LOGIC0; - assign B[52] = LOGIC0; - assign A[53] = LOGIC0; - assign B[53] = LOGIC0; - assign A[54] = LOGIC0; - assign B[54] = LOGIC0; - assign A[55] = LOGIC0; - assign B[55] = LOGIC0; - assign A[56] = LOGIC0; - assign B[56] = LOGIC0; - assign A[57] = LOGIC0; - assign B[57] = LOGIC0; - assign A[58] = LOGIC0; - assign B[58] = LOGIC0; - assign A[59] = LOGIC0; - assign B[59] = LOGIC0; - assign A[60] = LOGIC0; - assign B[60] = LOGIC0; - assign A[61] = LOGIC0; - assign B[61] = LOGIC0; - assign A[62] = LOGIC0; - assign B[62] = LOGIC0; - assign A[63] = LOGIC0; - assign B[63] = LOGIC0; - - assign S[0] = Q[0]; - assign S[1] = Q[1]; - assign S[2] = Q[2]; - assign S[3] = Q[3]; - assign S[4] = Q[4]; - assign S[5] = Q[5]; - assign S[6] = Q[6]; - assign S[7] = Q[7]; - assign S[8] = Q[8]; - assign S[9] = Q[9]; - assign S[10] = Q[10]; - assign S[11] = Q[11]; - assign S[12] = Q[12]; - assign S[13] = Q[13]; - assign S[14] = Q[14]; - assign S[15] = Q[15]; - assign S[16] = Q[16]; - assign S[17] = Q[17]; - assign S[18] = Q[18]; - assign S[19] = Q[19]; - assign S[20] = Q[20]; - assign S[21] = Q[21]; - assign S[22] = Q[22]; - assign S[23] = Q[23]; - assign S[24] = Q[24]; - assign S[25] = Q[25]; - assign S[26] = Q[26]; - assign S[27] = Q[27]; - assign S[28] = Q[28]; - assign S[29] = Q[29]; - assign S[30] = Q[30]; - assign S[31] = Q[31]; - assign S[32] = Q[32]; - assign S[33] = Q[33]; - assign S[34] = Q[34]; - assign S[35] = Q[35]; - assign S[36] = Q[36]; - assign S[37] = Q[37]; - assign S[38] = Q[38]; - assign S[39] = Q[39]; - assign S[40] = Q[40]; - assign S[41] = Q[41]; - assign S[42] = Q[42]; - assign S[43] = Q[43]; - assign S[44] = Q[44]; - assign S[45] = Q[45]; - assign S[46] = Q[46]; - assign S[47] = Q[47]; - assign S[48] = Q[48]; - assign S[49] = Q[49]; - assign S[50] = Q[50]; - assign S[51] = Q[51]; - assign CO_52 = Q[52]; - -endmodule //cla_sub52 diff --git a/pipelined/src/fpu/cla64.sv b/pipelined/src/fpu/cla64.sv deleted file mode 100755 index 6d28be10f..000000000 --- a/pipelined/src/fpu/cla64.sv +++ /dev/null @@ -1,420 +0,0 @@ -// This module implements a 64-bit carry lookehead adder/subtractor. -// It is used to perform the primary addition in the floating point -// adder - -module cla64 (S, X, Y, Sub); - - input [63:0] X; - input [63:0] Y; - input Sub; - output [63:0] S; - wire CO; - wire [63:0] A,B,Q, Bbar; //***KEP was 0:63 - changed due to lint warning - - DBLCADDER_64_64 U1 (A , Bbar , Sub , Q , CO ); - assign A[0] = X[0]; - assign B[0] = Y[0]; - assign A[1] = X[1]; - assign B[1] = Y[1]; - assign A[2] = X[2]; - assign B[2] = Y[2]; - assign A[3] = X[3]; - assign B[3] = Y[3]; - assign A[4] = X[4]; - assign B[4] = Y[4]; - assign A[5] = X[5]; - assign B[5] = Y[5]; - assign A[6] = X[6]; - assign B[6] = Y[6]; - assign A[7] = X[7]; - assign B[7] = Y[7]; - assign A[8] = X[8]; - assign B[8] = Y[8]; - assign A[9] = X[9]; - assign B[9] = Y[9]; - assign A[10] = X[10]; - assign B[10] = Y[10]; - assign A[11] = X[11]; - assign B[11] = Y[11]; - assign A[12] = X[12]; - assign B[12] = Y[12]; - assign A[13] = X[13]; - assign B[13] = Y[13]; - assign A[14] = X[14]; - assign B[14] = Y[14]; - assign A[15] = X[15]; - assign B[15] = Y[15]; - assign A[16] = X[16]; - assign B[16] = Y[16]; - assign A[17] = X[17]; - assign B[17] = Y[17]; - assign A[18] = X[18]; - assign B[18] = Y[18]; - assign A[19] = X[19]; - assign B[19] = Y[19]; - assign A[20] = X[20]; - assign B[20] = Y[20]; - assign A[21] = X[21]; - assign B[21] = Y[21]; - assign A[22] = X[22]; - assign B[22] = Y[22]; - assign A[23] = X[23]; - assign B[23] = Y[23]; - assign A[24] = X[24]; - assign B[24] = Y[24]; - assign A[25] = X[25]; - assign B[25] = Y[25]; - assign A[26] = X[26]; - assign B[26] = Y[26]; - assign A[27] = X[27]; - assign B[27] = Y[27]; - assign A[28] = X[28]; - assign B[28] = Y[28]; - assign A[29] = X[29]; - assign B[29] = Y[29]; - assign A[30] = X[30]; - assign B[30] = Y[30]; - assign A[31] = X[31]; - assign B[31] = Y[31]; - assign A[32] = X[32]; - assign B[32] = Y[32]; - assign A[33] = X[33]; - assign B[33] = Y[33]; - assign A[34] = X[34]; - assign B[34] = Y[34]; - assign A[35] = X[35]; - assign B[35] = Y[35]; - assign A[36] = X[36]; - assign B[36] = Y[36]; - assign A[37] = X[37]; - assign B[37] = Y[37]; - assign A[38] = X[38]; - assign B[38] = Y[38]; - assign A[39] = X[39]; - assign B[39] = Y[39]; - assign A[40] = X[40]; - assign B[40] = Y[40]; - assign A[41] = X[41]; - assign B[41] = Y[41]; - assign A[42] = X[42]; - assign B[42] = Y[42]; - assign A[43] = X[43]; - assign B[43] = Y[43]; - assign A[44] = X[44]; - assign B[44] = Y[44]; - assign A[45] = X[45]; - assign B[45] = Y[45]; - assign A[46] = X[46]; - assign B[46] = Y[46]; - assign A[47] = X[47]; - assign B[47] = Y[47]; - assign A[48] = X[48]; - assign B[48] = Y[48]; - assign A[49] = X[49]; - assign B[49] = Y[49]; - assign A[50] = X[50]; - assign B[50] = Y[50]; - assign A[51] = X[51]; - assign B[51] = Y[51]; - assign A[52] = X[52]; - assign B[52] = Y[52]; - assign A[53] = X[53]; - assign B[53] = Y[53]; - assign A[54] = X[54]; - assign B[54] = Y[54]; - assign A[55] = X[55]; - assign B[55] = Y[55]; - assign A[56] = X[56]; - assign B[56] = Y[56]; - assign A[57] = X[57]; - assign B[57] = Y[57]; - assign A[58] = X[58]; - assign B[58] = Y[58]; - assign A[59] = X[59]; - assign B[59] = Y[59]; - assign A[60] = X[60]; - assign B[60] = Y[60]; - assign A[61] = X[61]; - assign B[61] = Y[61]; - assign A[62] = X[62]; - assign B[62] = Y[62]; - assign A[63] = X[63]; - assign B[63] = Y[63]; - assign S[0] = Q[0]; - assign S[1] = Q[1]; - assign S[2] = Q[2]; - assign S[3] = Q[3]; - assign S[4] = Q[4]; - assign S[5] = Q[5]; - assign S[6] = Q[6]; - assign S[7] = Q[7]; - assign S[8] = Q[8]; - assign S[9] = Q[9]; - assign S[10] = Q[10]; - assign S[11] = Q[11]; - assign S[12] = Q[12]; - assign S[13] = Q[13]; - assign S[14] = Q[14]; - assign S[15] = Q[15]; - assign S[16] = Q[16]; - assign S[17] = Q[17]; - assign S[18] = Q[18]; - assign S[19] = Q[19]; - assign S[20] = Q[20]; - assign S[21] = Q[21]; - assign S[22] = Q[22]; - assign S[23] = Q[23]; - assign S[24] = Q[24]; - assign S[25] = Q[25]; - assign S[26] = Q[26]; - assign S[27] = Q[27]; - assign S[28] = Q[28]; - assign S[29] = Q[29]; - assign S[30] = Q[30]; - assign S[31] = Q[31]; - assign S[32] = Q[32]; - assign S[33] = Q[33]; - assign S[34] = Q[34]; - assign S[35] = Q[35]; - assign S[36] = Q[36]; - assign S[37] = Q[37]; - assign S[38] = Q[38]; - assign S[39] = Q[39]; - assign S[40] = Q[40]; - assign S[41] = Q[41]; - assign S[42] = Q[42]; - assign S[43] = Q[43]; - assign S[44] = Q[44]; - assign S[45] = Q[45]; - assign S[46] = Q[46]; - assign S[47] = Q[47]; - assign S[48] = Q[48]; - assign S[49] = Q[49]; - assign S[50] = Q[50]; - assign S[51] = Q[51]; - assign S[52] = Q[52]; - assign S[53] = Q[53]; - assign S[54] = Q[54]; - assign S[55] = Q[55]; - assign S[56] = Q[56]; - assign S[57] = Q[57]; - assign S[58] = Q[58]; - assign S[59] = Q[59]; - assign S[60] = Q[60]; - assign S[61] = Q[61]; - assign S[62] = Q[62]; - assign S[63] = Q[63]; - assign Bbar = B ^ {64{Sub}}; - -endmodule // cla64 - -// This module performs 64-bit subtraction. It is used to get the two's complement -// of main addition or subtraction in the floating point adder. - -module cla_sub64 (S, X, Y); - - input [63:0] X; - input [63:0] Y; - - output [63:0] S; - - wire CO; - wire VDD = 1'b1; - wire [63:0] A,B,Q, Bbar; //***KEP was 0:63 - changed due to lint warning - - DBLCADDER_64_64 U1 (A , Bbar , VDD, Q , CO ); - assign A[0] = X[0]; - assign B[0] = Y[0]; - assign A[1] = X[1]; - assign B[1] = Y[1]; - assign A[2] = X[2]; - assign B[2] = Y[2]; - assign A[3] = X[3]; - assign B[3] = Y[3]; - assign A[4] = X[4]; - assign B[4] = Y[4]; - assign A[5] = X[5]; - assign B[5] = Y[5]; - assign A[6] = X[6]; - assign B[6] = Y[6]; - assign A[7] = X[7]; - assign B[7] = Y[7]; - assign A[8] = X[8]; - assign B[8] = Y[8]; - assign A[9] = X[9]; - assign B[9] = Y[9]; - assign A[10] = X[10]; - assign B[10] = Y[10]; - assign A[11] = X[11]; - assign B[11] = Y[11]; - assign A[12] = X[12]; - assign B[12] = Y[12]; - assign A[13] = X[13]; - assign B[13] = Y[13]; - assign A[14] = X[14]; - assign B[14] = Y[14]; - assign A[15] = X[15]; - assign B[15] = Y[15]; - assign A[16] = X[16]; - assign B[16] = Y[16]; - assign A[17] = X[17]; - assign B[17] = Y[17]; - assign A[18] = X[18]; - assign B[18] = Y[18]; - assign A[19] = X[19]; - assign B[19] = Y[19]; - assign A[20] = X[20]; - assign B[20] = Y[20]; - assign A[21] = X[21]; - assign B[21] = Y[21]; - assign A[22] = X[22]; - assign B[22] = Y[22]; - assign A[23] = X[23]; - assign B[23] = Y[23]; - assign A[24] = X[24]; - assign B[24] = Y[24]; - assign A[25] = X[25]; - assign B[25] = Y[25]; - assign A[26] = X[26]; - assign B[26] = Y[26]; - assign A[27] = X[27]; - assign B[27] = Y[27]; - assign A[28] = X[28]; - assign B[28] = Y[28]; - assign A[29] = X[29]; - assign B[29] = Y[29]; - assign A[30] = X[30]; - assign B[30] = Y[30]; - assign A[31] = X[31]; - assign B[31] = Y[31]; - assign A[32] = X[32]; - assign B[32] = Y[32]; - assign A[33] = X[33]; - assign B[33] = Y[33]; - assign A[34] = X[34]; - assign B[34] = Y[34]; - assign A[35] = X[35]; - assign B[35] = Y[35]; - assign A[36] = X[36]; - assign B[36] = Y[36]; - assign A[37] = X[37]; - assign B[37] = Y[37]; - assign A[38] = X[38]; - assign B[38] = Y[38]; - assign A[39] = X[39]; - assign B[39] = Y[39]; - assign A[40] = X[40]; - assign B[40] = Y[40]; - assign A[41] = X[41]; - assign B[41] = Y[41]; - assign A[42] = X[42]; - assign B[42] = Y[42]; - assign A[43] = X[43]; - assign B[43] = Y[43]; - assign A[44] = X[44]; - assign B[44] = Y[44]; - assign A[45] = X[45]; - assign B[45] = Y[45]; - assign A[46] = X[46]; - assign B[46] = Y[46]; - assign A[47] = X[47]; - assign B[47] = Y[47]; - assign A[48] = X[48]; - assign B[48] = Y[48]; - assign A[49] = X[49]; - assign B[49] = Y[49]; - assign A[50] = X[50]; - assign B[50] = Y[50]; - assign A[51] = X[51]; - assign B[51] = Y[51]; - assign A[52] = X[52]; - assign B[52] = Y[52]; - assign A[53] = X[53]; - assign B[53] = Y[53]; - assign A[54] = X[54]; - assign B[54] = Y[54]; - assign A[55] = X[55]; - assign B[55] = Y[55]; - assign A[56] = X[56]; - assign B[56] = Y[56]; - assign A[57] = X[57]; - assign B[57] = Y[57]; - assign A[58] = X[58]; - assign B[58] = Y[58]; - assign A[59] = X[59]; - assign B[59] = Y[59]; - assign A[60] = X[60]; - assign B[60] = Y[60]; - assign A[61] = X[61]; - assign B[61] = Y[61]; - assign A[62] = X[62]; - assign B[62] = Y[62]; - assign A[63] = X[63]; - assign B[63] = Y[63]; - assign S[0] = Q[0]; - assign S[1] = Q[1]; - assign S[2] = Q[2]; - assign S[3] = Q[3]; - assign S[4] = Q[4]; - assign S[5] = Q[5]; - assign S[6] = Q[6]; - assign S[7] = Q[7]; - assign S[8] = Q[8]; - assign S[9] = Q[9]; - assign S[10] = Q[10]; - assign S[11] = Q[11]; - assign S[12] = Q[12]; - assign S[13] = Q[13]; - assign S[14] = Q[14]; - assign S[15] = Q[15]; - assign S[16] = Q[16]; - assign S[17] = Q[17]; - assign S[18] = Q[18]; - assign S[19] = Q[19]; - assign S[20] = Q[20]; - assign S[21] = Q[21]; - assign S[22] = Q[22]; - assign S[23] = Q[23]; - assign S[24] = Q[24]; - assign S[25] = Q[25]; - assign S[26] = Q[26]; - assign S[27] = Q[27]; - assign S[28] = Q[28]; - assign S[29] = Q[29]; - assign S[30] = Q[30]; - assign S[31] = Q[31]; - assign S[32] = Q[32]; - assign S[33] = Q[33]; - assign S[34] = Q[34]; - assign S[35] = Q[35]; - assign S[36] = Q[36]; - assign S[37] = Q[37]; - assign S[38] = Q[38]; - assign S[39] = Q[39]; - assign S[40] = Q[40]; - assign S[41] = Q[41]; - assign S[42] = Q[42]; - assign S[43] = Q[43]; - assign S[44] = Q[44]; - assign S[45] = Q[45]; - assign S[46] = Q[46]; - assign S[47] = Q[47]; - assign S[48] = Q[48]; - assign S[49] = Q[49]; - assign S[50] = Q[50]; - assign S[51] = Q[51]; - assign S[52] = Q[52]; - assign S[53] = Q[53]; - assign S[54] = Q[54]; - assign S[55] = Q[55]; - assign S[56] = Q[56]; - assign S[57] = Q[57]; - assign S[58] = Q[58]; - assign S[59] = Q[59]; - assign S[60] = Q[60]; - assign S[61] = Q[61]; - assign S[62] = Q[62]; - assign S[63] = Q[63]; - assign Bbar = ~B; - -endmodule // cla_sub64 \ No newline at end of file diff --git a/pipelined/src/fpu/exception.sv b/pipelined/src/fpu/exception.sv deleted file mode 100755 index bccfa01f4..000000000 --- a/pipelined/src/fpu/exception.sv +++ /dev/null @@ -1,83 +0,0 @@ -// Exception logic for the floating point adder. Note: We may -// actually want to move to where the result is computed. - -module exception ( - - input logic [2:0] op_type, // Function opcode - input logic XSgnE, YSgnE, - // input logic [52:0] XManE, YManE, - input logic XDenormE, YDenormE, - input logic XNormE, YNormE, - input logic XZeroE, YZeroE, - input logic XInfE, YInfE, - input logic XNaNE, YNaNE, - input logic XSNaNE, YSNaNE, - output logic [3:0] Ztype, // Indicates type of result (Z) - output logic Invalid, // Invalid operation exception - output logic Denorm, // Denormalized logic - output logic Sub // The effective operation is subtraction -); - wire ZQNaN; // '1' if result Z is a quiet NaN - wire ZPInf; // '1' if result Z positive infnity - wire ZNInf; // '1' if result Z negative infnity - wire add_sub; // '1' if operation is add or subtract - wire converts; // See if there are any converts - - - - // Is this instruction a convert - assign converts = op_type[1]; - - - - // An "Invalid Operation" exception occurs if (A or B is a signalling NaN) - // or (A and B are both Infinite and the "effective operation" is - // subtraction). - assign add_sub = ~op_type[1]; - assign Invalid = (XSNaNE | YSNaNE | (add_sub & XInfE & YInfE & (XSgnE^YSgnE^op_type[0]))) & ~converts; - - // The Denorm flag is set if (A is denormlized and the operation is not integer - // conversion ) or (if B is normalized and the operation is addition or subtraction). - assign Denorm = XDenormE | YDenormE & add_sub; - - // The result is a quiet NaN if (an "Invalid Operation" exception occurs) - // or (A is a NaN) or (B is a NaN and the operation uses B). - assign ZQNaN = Invalid | XNaNE | (YNaNE & add_sub); - - // The result is +Inf if ((A is +Inf) or (B is -Inf and the operation is - // subtraction) or (B is +Inf and the operation is addition)) and (the - // result is not a quiet NaN). - assign ZPInf = (XInfE&XSgnE | add_sub&YInfE&(~YSgnE^op_type[0]))&~ZQNaN; - - // The result is -Inf if ((A is -Inf) or (B is +Inf and the operation is - // subtraction) or (B is -Inf and the operation is addition)) and the - // result is not a quiet NaN. - assign ZNInf = (XInfE&~XSgnE | add_sub&YInfE&(YSgnE^op_type[0]))&~ZQNaN; - - // Set the type of the result as follows: - // (needs optimization - got lazy or was late) - // Ztype Result - // 0000 Normal - // 0001 Quiet NaN - // 0010 Negative Infinity - // 0011 Positive Infinity - // 0100 +Bzero and +Azero (and vice-versa) - // 0101 +Bzero and -Azero (and vice-versa) - // 1000 Convert SP to DP (and vice-versa) - - assign Ztype[0] = (ZQNaN | ZPInf) | - ((XZeroE & YZeroE & (XSgnE^YSgnE^op_type[0])) - & ~converts); - assign Ztype[1] = (ZNInf | ZPInf) | - (((XZeroE & YZeroE & XSgnE & YSgnE & ~op_type[0]) | - (XZeroE & YZeroE & XSgnE & ~YSgnE & op_type[0])) - & ~converts); - assign Ztype[2] = ((XZeroE & YZeroE & ~op_type[1]) - & ~converts); - assign Ztype[3] = (op_type[1] & ~op_type[0]); - - // Determine if the effective operation is subtraction - assign Sub = add_sub & (XSgnE^YSgnE^op_type[0]); - -endmodule // exception - diff --git a/pipelined/src/fpu/faddcvt.sv b/pipelined/src/fpu/faddcvt.sv deleted file mode 100755 index e5dddb7e9..000000000 --- a/pipelined/src/fpu/faddcvt.sv +++ /dev/null @@ -1,426 +0,0 @@ -// -// File name : fpadd -// Title : Floating-Point Adder/Subtractor -// project : FPU -// Library : fpadd -// Author(s) : James E. Stine, Jr., Brett Mathis -// Purpose : definition of main unit to floating-point add/sub -// notes : -// -// Copyright Oklahoma State University -// Copyright AFRL -// -// Basic and Denormalized Operations -// -// Step 1: Load operands, set flags, and convert SP to DP -// Step 2: Check for special inputs ( +/- Infinity, NaN) -// Step 3: Compare exponents. Swap the operands of exp1 < exp2 -// or of (exp1 = exp2 AND mnt1 < mnt2) -// Step 4: Shift the mantissa corresponding to the smaller exponent, -// and extend precision by three bits to the right. -// Step 5: Add or subtract the mantissas. -// Step 6: Normalize the result.// -// Shift left until normalized. Normalized when the value to the -// left of the binrary point is 1. -// Step 7: Round the result.// -// Step 8: Put sum onto output. -// - -module faddcvt( - input logic clk, - input logic reset, - input logic FlushM, // flush the memory stage - input logic StallM, // stall the memory stage - input logic [63:0] FSrcXE, // 1st input operand (A) - input logic [63:0] FSrcYE, // 2nd input operand (B) - input logic [2:0] FOpCtrlE, FOpCtrlM, // Function opcode - input logic FmtE, FmtM, // Result Precision (0 for double, 1 for single) - input logic [2:0] FrmM, // Rounding mode - specify values - input logic XSgnE, YSgnE, - input logic [52:0] XManE, YManE, - input logic [10:0] XExpE, YExpE, - input logic XSgnM, YSgnM, - input logic [52:0] XManM, YManM, - input logic [10:0] XExpM, YExpM, - input logic XDenormE, YDenormE, - input logic XNormE, YNormE, - input logic XNormM, YNormM, - input logic XZeroE, YZeroE, - input logic XInfE, YInfE, - input logic XNaNE, YNaNE, - input logic XSNaNE, YSNaNE, - output logic [63:0] FAddResM, // Result of operation - output logic [4:0] FAddFlgM); // IEEE exception flags - - logic [63:0] AddSumE, AddSumM; - logic [63:0] AddSumTcE, AddSumTcM; - logic [3:0] AddSelInvE, AddSelInvM; - logic [10:0] AddExpPostSumE,AddExpPostSumM; - logic AddCorrSignE, AddCorrSignM; - logic AddOpANormE, AddOpANormM; - logic AddOpBNormE, AddOpBNormM; - logic AddInvalidE, AddInvalidM; - logic AddDenormInE, AddDenormInM; - logic AddSwapE, AddSwapM; - logic AddSignAE, AddSignAM; - logic [11:0] AddExp1DenormE, AddExp2DenormE, AddExp1DenormM, AddExp2DenormM; - logic [10:0] AddExponentE, AddExponentM; - - - fpuaddcvt1 fpadd1 (.FOpCtrlE, .FmtE, .AddExponentE, - .AddExpPostSumE, .AddExp1DenormE, .AddExp2DenormE, .AddSumE, .AddSumTcE, .AddSelInvE, - .XSgnE, .YSgnE,.XManE, .YManE, .XExpE, .YExpE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, - .AddCorrSignE, .AddSignAE, .AddOpANormE, .AddOpBNormE, .AddInvalidE, - .AddDenormInE, .AddSwapE); - - // E/M pipeline registers - flopenrc #(64) EMRegAdd1(clk, reset, FlushM, ~StallM, AddSumE, AddSumM); - flopenrc #(64) EMRegAdd2(clk, reset, FlushM, ~StallM, AddSumTcE, AddSumTcM); - flopenrc #(11) EMRegAdd3(clk, reset, FlushM, ~StallM, AddExpPostSumE, AddExpPostSumM); - flopenrc #(12) EMRegAdd6(clk, reset, FlushM, ~StallM, AddExp1DenormE, AddExp1DenormM); - flopenrc #(12) EMRegAdd7(clk, reset, FlushM, ~StallM, AddExp2DenormE, AddExp2DenormM); - flopenrc #(11) EMRegAdd8(clk, reset, FlushM, ~StallM, AddExponentE, AddExponentM); - flopenrc #(11) EMRegAdd9(clk, reset, FlushM, ~StallM, - {AddSelInvE, AddCorrSignE, AddOpANormE, AddOpBNormE, AddInvalidE, AddDenormInE, AddSwapE, AddSignAE}, - {AddSelInvM, AddCorrSignM, AddOpANormM, AddOpBNormM, AddInvalidM, AddDenormInM, AddSwapM, AddSignAM}); - - - fpuaddcvt2 fpadd2 (.FrmM, .FOpCtrlM, .FmtM, .AddSumM, .AddSumTcM, .XNormM, .YNormM, - .AddExp1DenormM, .AddExp2DenormM, .AddExponentM, .AddExpPostSumM, .AddSelInvM, .XSgnM, .YSgnM, .XManM, .YManM, .XExpM, .YExpM, - .AddOpANormM, .AddOpBNormM, .AddInvalidM, .AddDenormInM, - .AddSignAM, .AddCorrSignM, .AddSwapM, .FAddResM, .FAddFlgM); -endmodule - -module fpuaddcvt1 ( - input logic [2:0] FOpCtrlE, // Function opcode - input logic FmtE, // Result Precision (1 for double, 0 for single) - input logic XSgnE, YSgnE, - input logic [10:0] XExpE, YExpE, - input logic [52:0] XManE, YManE, - input logic XDenormE, YDenormE, - input logic XNormE, YNormE, - input logic XZeroE, YZeroE, - input logic XInfE, YInfE, - input logic XNaNE, YNaNE, - input logic XSNaNE, YSNaNE, - - output logic [10:0] AddExponentE, - output logic [10:0] AddExpPostSumE, - output logic [11:0] AddExp1DenormE, AddExp2DenormE,//KEP used to be [10:0] - output logic [63:0] AddSumE, AddSumTcE, - output logic [3:0] AddSelInvE, - output logic AddCorrSignE, - output logic AddSignAE, - output logic AddOpANormE, AddOpBNormE, - output logic AddInvalidE, - output logic AddDenormInE, - output logic AddSwapE - ); - - logic [5:0] ZP_mantissaA; - logic [5:0] ZP_mantissaB; - wire ZV_mantissaA; - wire ZV_mantissaB; - - wire P; - assign P = ~(FmtE^FOpCtrlE[1]); - - wire [63:0] IntValue; - wire [11:0] exp1, exp2; - wire [11:0] exp_diff1, exp_diff2; - wire [11:0] exp_shift; - wire [51:0] mantissaA; - wire [56:0] mantissaA1; - wire [63:0] mantissaA3; - wire [51:0] mantissaB; - wire [56:0] mantissaB1, mantissaB2; - wire [63:0] mantissaB3; - wire exp_gt63; - wire Sticky_out; - wire sub; - wire zeroB; - wire [5:0] align_shift; - - // Test for exceptions and return the "Invalid Operation" and - // "Denormalized" Input Flags. The "AddSelInvE" is used in - // the third pipeline stage to select the result. Also, AddOp1NormE - // and AddOp2NormE are one if FSrcXE and FSrcYE are not zero or denormalized. - // sub is one if the effective operation is subtaction. - - exception exc1 (.Ztype(AddSelInvE), .Invalid(AddInvalidE), .Denorm(AddDenormInE), .Sub(sub), - .XSgnE, .YSgnE, .XDenormE, .YDenormE, .XNormE, .YNormE, .XZeroE, .YZeroE, .XInfE, .YInfE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, - .op_type(FOpCtrlE)); - - // Perform Exponent Subtraction (used for alignment). For performance - // both exponent subtractions are performed in parallel. This was - // changed to a behavior level to allow the tools to try to optimize - // the two parallel additions. The input values are zero-extended to 12 - // bits prior to performing the addition. - - assign exp1 = {1'b0, XExpE}; - assign exp2 = {1'b0, YExpE}; - assign exp_diff1 = exp1 - exp2; - assign exp_diff2 = AddDenormInE ? ({YSgnE, YExpE} - {XSgnE, XExpE}): exp2 - exp1; - - // The second operand (B) should be set to zero, if FOpCtrlE does not - // specify addition or subtraction - assign zeroB = FOpCtrlE[1]; - - // Swapped operands if zeroB is not one and exp1 < exp2. - // Swapping causes exp2 to be used for the result exponent. - // Only the exponent of the larger operand is used to determine - // the final result. - assign AddSwapE = exp_diff1[11] & ~zeroB; - assign AddExponentE = AddSwapE ? YExpE : XExpE; - assign AddExpPostSumE = AddSwapE ? YExpE : XExpE; - assign mantissaA = AddSwapE ? YManE[51:0] : XManE[51:0]; - assign mantissaB = AddSwapE ? XManE[51:0] : YManE[51:0]; - assign AddSignAE = AddSwapE ? YSgnE : XSgnE; - - // Leading-Zero Detector. Determine the size of the shift needed for - // normalization. If sum_corrected is all zeros, the exp_valid is - // zero; otherwise, it is one. - // modified to 52 bits to detect leading zeroes on denormalized mantissas - // lz52 lz_norm_1 (ZP_mantissaA, ZV_mantissaA, mantissaA); - // lz52 lz_norm_2 (ZP_mantissaB, ZV_mantissaB, mantissaB); - logic [8:0] i; - logic [8:0] j; - always_comb begin - i = 0; - while (~mantissaA[52-i] & $unsigned(i) <= $unsigned(52)) i = i+1; // search for leading one - ZP_mantissaA = i; - end - always_comb begin - j = 0; - while (~mantissaB[52-j] & $unsigned(j) <= $unsigned(52)) j = j+1; // search for leading one - ZP_mantissaB = j; - end - - // Denormalized exponents created by subtracting the leading zeroes from the original exponents - assign AddExp1DenormE = AddSwapE ? (exp1 - {6'b0, ZP_mantissaB}) : (exp1 - {6'b0, ZP_mantissaA}); //KEP extended ZP_mantissa - assign AddExp2DenormE = AddSwapE ? (exp2 - {6'b0, ZP_mantissaA}) : (exp2 - {6'b0, ZP_mantissaB}); - - // Determine the alignment shift and limit it to 63. If any bit from - // exp_shift[6] to exp_shift[11] is one, then shift is set to all ones. - assign exp_shift = AddSwapE ? exp_diff2 : exp_diff1; - assign exp_gt63 = exp_shift[11] | exp_shift[10] | exp_shift[9] - | exp_shift[8] | exp_shift[7] | exp_shift[6]; - assign align_shift = exp_shift[5:0] | {6{exp_gt63}}; //KEP used to be all of exp_shift - - // Unpack the 52-bit mantissas to 57-bit numbers of the form. - // 001.M[51]M[50] ... M[1]M[0]00 - // Unless the number has an exponent of zero, in which case it - // is unpacked as - // 000.00 ... 00 - // This effectively flushes denormalized values to zero. - // The three bits of to the left of the binary point prevent overflow - // and loss of sign information. The two bits to the right of the - // original mantissa form the "guard" and "round" bits that are used - // to round the result. - assign AddOpANormE = AddSwapE ? YNormE : XNormE; - assign AddOpBNormE = AddSwapE ? XNormE : YNormE; - assign mantissaA1 = {2'h0, AddOpANormE, mantissaA[51:0]&{52{AddOpANormE}}, 2'h0}; - assign mantissaB1 = {2'h0, AddOpBNormE, mantissaB[51:0]&{52{AddOpBNormE}}, 2'h0}; - - // Perform mantissa alignment using a 57-bit barrel shifter - // If any of the bits shifted out are one, Sticky_out is set. - // The size of the barrel shifter could be reduced by two bits - // by not adding the leading two zeros until after the shift. - barrel_shifter_r57 bs1 (mantissaB2, Sticky_out, mantissaB1, align_shift); - - // Place either the sign-extened 32-bit value or the original 64-bit value - // into IntValue (to be used for integer to floating point conversion) - // assign IntValue [31:0] = FSrcXE[31:0]; - // assign IntValue [63:32] = FOpCtrlE[0] ? {32{FSrcXE[31]}} : FSrcXE[63:32]; - - // If doing an integer to floating point conversion, mantissaA3 is set to - // IntVal and the prenomalized exponent is set to 1084. Otherwise, - // mantissaA3 is simply extended to 64-bits by setting the 7 LSBs to zero, - // and the exponent value is left unchanged. - // Under denormalized cases, the exponent before the rounder is set to 1 - // if the normal shift value is 11. - assign mantissaA3 = AddDenormInE ? ({12'h0, mantissaA}) : {mantissaA1, 7'h0}; - - // Put zero in for mantissaB3, if zeroB is one. Otherwise, B is extended to - // 64-bits by setting the 7 LSBs to the Sticky_out bit followed by six - // zeros. - assign mantissaB3[63:7] = AddDenormInE ? {12'h0, mantissaB[51:7]} : mantissaB2 & {57{~zeroB}}; - assign mantissaB3[6] = AddDenormInE ? mantissaB[6] : Sticky_out & ~zeroB; - assign mantissaB3[5:0] = AddDenormInE ? mantissaB[5:0] : 6'h0; - - // The sign of the result needs to be corrected if the true - // operation is subtraction and the input operands were swapped. - assign AddCorrSignE = ~FOpCtrlE[1]&FOpCtrlE[0]&AddSwapE; - - // 64-bit Mantissa Adder/Subtractor - cla64 add1 (AddSumE, mantissaA3, mantissaB3, sub); //***adder - - // 64-bit Mantissa Subtractor - to get the two's complement of the - // result when the sign from the adder/subtractor is negative. - cla_sub64 sub1 (AddSumTcE, mantissaB3, mantissaA3); //***adder - - // Finds normal underflow result to determine whether to round final exponent down - //***KEP used to be (AddSumE == 16'h0) I am unsure what it's supposed to be - // assign AddNormOvflowE = (AddDenormInE & (AddSumE == 64'h0) & (AddOpANormE | AddOpBNormE) & ~FOpCtrlE[0]) ? 1'b1 : (AddSumE[63] ? AddSumTcE[52] : AddSumE[52]); - -endmodule // fpadd - - -// -// File name : fpadd -// Title : Floating-Point Adder/Subtractor -// project : FPU -// Library : fpadd -// Author(s) : James E. Stine, Jr., Brett Mathis -// Purpose : definition of main unit to floating-point add/sub -// notes : -// -// Copyright Oklahoma State University -// Copyright AFRL -// -// Basic and Denormalized Operations -// -// Step 1: Load operands, set flags, and AddConvertM SP to DP -// Step 2: Check for special inputs ( +/- Infinity, NaN) -// Step 3: Compare exponents. Swap the operands of exp1 < exp2 -// or of (exp1 = exp2 AND mnt1 < mnt2) -// Step 4: Shift the mantissa corresponding to the smaller AddExponentM, -// and extend precision by three bits to the right. -// Step 5: Add or subtract the mantissas. -// Step 6: Normalize the result.// -// Shift left until normalized. Normalized when the value to the -// left of the binrary point is 1. -// Step 7: Round the result.// -// Step 8: Put AddSumM onto output. -// - - -module fpuaddcvt2 ( - input logic [2:0] FrmM, // Rounding mode - specify values - input logic [2:0] FOpCtrlM, // Function opcode - input logic FmtM, // Result Precision (0 for double, 1 for single) - input logic [63:0] AddSumM, AddSumTcM, - input logic [11:0] AddExp1DenormM, AddExp2DenormM, - input logic [10:0] AddExponentM, AddExpPostSumM, - input logic [3:0] AddSelInvM, - input logic XSgnM, YSgnM, - input logic [52:0] XManM, YManM, - input logic [10:0] XExpM, YExpM, - input logic XNormM, YNormM, - input logic AddOpANormM, AddOpBNormM, - input logic AddInvalidM, - input logic AddDenormInM, - input logic AddSignAM, - input logic AddCorrSignM, - input logic AddSwapM, - - output logic [63:0] FAddResM, // Result of operation - output logic [4:0] FAddFlgM // IEEE exception flags -); - wire AddDenormM; // AddDenormM on input or output - - wire P; - assign P = ~(FmtM^FOpCtrlM[1]); - - wire [10:0] exp_pre; - wire [63:0] Result; - wire [63:0] sum_norm, sum_norm_w_bypass; - wire [5:0] norm_shift, norm_shift_denorm; - wire exp_valid; - wire DenormIO; - wire [4:0] FlagsIn; - wire Sticky_out; - wire sign_corr; - wire zeroB; - wire mantissa_comp; - wire mantissa_comp_sum; - wire mantissa_comp_sum_tc; - wire Float1_sum_comp; - wire Float2_sum_comp; - wire Float1_sum_tc_comp; - wire Float2_sum_tc_comp; - wire normal_underflow; - wire [63:0] sum_corr; - logic AddNormOvflowM; - - - logic AddOvEnM; // Overflow trap enabled - logic AddUnEnM; // Underflow trap enabled - - assign AddOvEnM = 1'b1; - assign AddUnEnM = 1'b1; - //AddExponentM value pre-rounding with considerations for denormalized - //cases/conversion cases - assign exp_pre = AddDenormInM ? - ((norm_shift == 6'b001011) ? 11'b00000000001 : (AddSwapM ? AddExp2DenormM[10:0] : AddExp1DenormM[10:0])) - : AddExponentM; - - - // Finds normal underflow result to determine whether to round final AddExponentM down - // Comparison between each float and the resulting AddSumM of the primary cla adder/subtractor and cla subtractor - assign Float1_sum_comp = ~(XManM[51:0] > AddSumM[51:0]); - assign Float2_sum_comp = ~(YManM[51:0] > AddSumM[51:0]); - assign Float1_sum_tc_comp = ~(XManM[51:0] > AddSumTcM[51:0]); - assign Float2_sum_tc_comp = ~(YManM[51:0] > AddSumTcM[51:0]); - - // Determines the correct Float value to compare based on AddSwapM result - assign mantissa_comp_sum = AddSwapM ? Float2_sum_comp : Float1_sum_comp; - assign mantissa_comp_sum_tc = AddSwapM ? Float2_sum_tc_comp : Float1_sum_tc_comp; - - // Determines the correct comparison result based on operation and sign of resulting AddSumM - assign mantissa_comp = (FOpCtrlM[0] ^ AddSumM[63]) ? mantissa_comp_sum_tc : mantissa_comp_sum; - - // If the signs are different and both operands aren't denormalized - // the normal underflow bit is needed and therefore updated. - assign normal_underflow = ((XSgnM ^ YSgnM) & (AddOpANormM | AddOpBNormM)) ? mantissa_comp : 1'b0; - - // Determine the correct sign of the result - assign sign_corr = (AddCorrSignM ^ AddSignAM) ^ AddSumM[63]; - - // If the AddSumM is negative, use its two complement instead. - // This value has to be 64-bits to correctly handle the - // case 10...00 - assign sum_corr = (AddDenormInM & (AddOpANormM | AddOpBNormM) & ( ( (XSgnM ~^ YSgnM) & FOpCtrlM[0] ) | ((XSgnM ^ YSgnM) & ~FOpCtrlM[0]) )) - ? (AddSumM[63] ? AddSumM : AddSumTcM) : (AddSumM[63] ? AddSumTcM : AddSumM); - - // Finds normal underflow result to determine whether to round final AddExponentM down - //KEP used to be (AddSumM == 16'h0) not sure what it is supposed to be - assign AddNormOvflowM = (AddDenormInM & (AddSumM == 64'h0) & (AddOpANormM | AddOpBNormM) & ~FOpCtrlM[0]) ? 1'b1 : (AddSumM[63] ? AddSumTcM[52] : AddSumM[52]); - - // Leading-Zero Detector. Determine the size of the shift needed for - // normalization. If sum_corrected is all zeros, the exp_valid is - // zero; otherwise, it is one. - lz64 lzd1 (norm_shift, exp_valid, sum_corr); - - assign norm_shift_denorm = (AddDenormInM & ( (~AddOpANormM & ~AddOpBNormM) | normal_underflow)) ? (6'h00) : (norm_shift); - - // Barell shifter used for normalization. It takes as inputs the - // the corrected AddSumM and the amount by which the AddSumM should - // be right shifted. It outputs the normalized AddSumM. - barrel_shifter_l64 bs2 (sum_norm, sum_corr, norm_shift_denorm); - - assign sum_norm_w_bypass = sum_norm; - - // Round the mantissa to a 52-bit value, with the leading one - // removed. If the result is a single precision number, the actual - // mantissa is in the upper 23 bits and the lower 29 bits are zero. - // At this point, normalization has already been performed, so we know - // exactly where the rounding point is. The rounding units also - // handles special cases and set the exception flags. - - // Changed DenormIO -> AddDenormM and FlagsIn -> FAddFlgM in order to - // help in processor reservation station detection of load/stores. In - // other words, the processor would like to know ahead of time that - // if the result is an exception then don't load or store. - rounder round1 (.Result, .DenormIO, .Flags(FlagsIn), .rm(FrmM), .P, .OvEn(AddOvEnM), .UnEn(AddUnEnM), .exp_valid, - .sel_inv(AddSelInvM), .Invalid(AddInvalidM), .DenormIn(AddDenormInM), .Asign(sign_corr), .Aexp(exp_pre), .norm_shift, .A(sum_norm_w_bypass), - .exponent_postsum(AddExpPostSumM), .A_Norm(XNormM), .B_Norm(YNormM), .exp_A_unmodified({XSgnM, XExpM}), .exp_B_unmodified({YSgnM, YExpM}), - .normal_overflow(AddNormOvflowM), .normal_underflow, .swap(AddSwapM), .op_type(FOpCtrlM), .sum(AddSumM)); - - // Store the final result and the exception flags in registers. - assign FAddResM = Result; - assign {AddDenormM, FAddFlgM} = {DenormIO, FlagsIn}; - -endmodule // fpadd - - diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 55e6706c5..227b0098e 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -11,15 +11,15 @@ module fcvt ( input logic [2:0] FOpCtrlE, // choose which opperation (look below for values) input logic FWriteIntE, // is fp->int (since it's writting to the integer register) input logic XZeroE, // is the input zero - input logic XOrigDenormE, // is the input denormalized + input logic XDenormE, // is the input denormalized input logic XInfE, // is the input infinity input logic XNaNE, // is the input a NaN input logic XSNaNE, // is the input a signaling NaN input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [`FPSIZES/3:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half) - output logic [`FLEN-1:0] CvtResE, // the fp to fp conversion's result - output logic [`XLEN-1:0] CvtIntResE, // the fp to fp conversion's result - output logic [4:0] CvtFlgE // the fp to fp conversion's flags + output logic [`FLEN-1:0] CvtResE, // the fp conversion result + output logic [`XLEN-1:0] CvtIntResE, // the int conversion result + output logic [4:0] CvtFlgE // the conversion's flags ); // OpCtrls: @@ -39,9 +39,10 @@ module fcvt ( logic [`FPSIZES/3:0] OutFmt; // format of the output logic [`XLEN-1:0] PosInt; // the positive integer input + logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size logic [`LGLEN-1:0] LzcIn; // input to the Leading Zero Counter (priority encoder) logic [`NE:0] CalcExp; // the calculated expoent - logic [$clog2(`LGLEN):0] ShiftAmt; // how much to shift by + logic [$clog2(`LGLEN+1)-1:0] ShiftAmt; // how much to shift by logic [`LGLEN+`NF:0] ShiftIn; // number to be shifted logic ResDenormUf;// does the result underflow or is denormalized logic ResUf; // does the result underflow @@ -71,6 +72,7 @@ module fcvt ( logic Int64; // is the integer 64 bits? logic IntToFp; // is the opperation an int->fp conversion? logic ToInt; // is the opperation an fp->int conversion? + logic [$clog2(`LGLEN+1)-1:0] ZeroCnt; // output from the LZC // seperate OpCtrl for code readability @@ -91,18 +93,11 @@ module fcvt ( /////////////////////////////////////////////////////////////////////////// // negation /////////////////////////////////////////////////////////////////////////// - // negate the input if the input is a negitive singed integer - // - remove leading ones if the input is a unsigned 32-bit integer - // - // Negitive input - // 64-bit input : negate the input - // 32-bit input : trim to 32-bits and negate the input - // Positive input - // 64-bit input : do nothing - // 32-bit input : trim to 32-bits + // 1) negate the input if the input is a negitive singed integer + // 2) trim the input to the proper size (kill the 32 most significant zeroes if needed) - assign PosInt = ResSgn ? Int64 ? -ForwardedSrcAE : {{`XLEN-32{1'b0}}, -ForwardedSrcAE[31:0]} : - Int64 ? ForwardedSrcAE : {{`XLEN-32{1'b0}}, ForwardedSrcAE[31:0]}; + assign PosInt = ResSgn ? -ForwardedSrcAE : ForwardedSrcAE; + assign TrimInt = {{`XLEN-32{Int64}}, {32{1'b1}}} & PosInt; /////////////////////////////////////////////////////////////////////////// // lzc @@ -111,16 +106,10 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign LzcIn = IntToFp ? {PosInt, {`LGLEN-`XLEN{1'b0}}} : // I->F - {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}}; // F->F + assign LzcIn = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} : + {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}}; - // lglen is the largest possible value of ZeroCnt (NF or XLEN) hence normcnt must be log2(lglen) bits - logic [$clog2(`LGLEN):0] i, ZeroCnt; - always_comb begin - i = 0; - while (~LzcIn[`LGLEN-1-i] & i <= `LGLEN-1) i = i+1; // search for leading one - ZeroCnt = i; - end + lzc #(`LGLEN) lzc (.num(LzcIn), .ZeroCnt); /////////////////////////////////////////////////////////////////////////// @@ -154,9 +143,9 @@ module fcvt ( // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN):0]&{$clog2(`LGLEN)+1{~CalcExp[`NE]}} : - ResDenormUf&~IntToFp ? ($clog2(`LGLEN)+1)'(`NF-1)+CalcExp[$clog2(`LGLEN):0] : - (ZeroCnt+1)&{$clog2(`LGLEN)+1{XOrigDenormE|IntToFp}}; + assign ShiftAmt = ToInt ? CalcExp[$clog2(`LGLEN+1)-1:0]&{$clog2(`LGLEN+1){~CalcExp[`NE]}} : + ResDenormUf&~IntToFp ? ($clog2(`LGLEN+1))'(`NF-1)+CalcExp[$clog2(`LGLEN+1)-1:0] : + (ZeroCnt+1)&{$clog2(`LGLEN+1){XDenormE|IntToFp}}; // shift // fp -> int: | `XLEN zeros | Mantissa | 0's if nessisary | << CalcExp @@ -266,13 +255,13 @@ module fcvt ( // | keep | // // - if the input is denormalized then we dont shift... so the "- (ZeroCnt+1)" is just leftovers from other options - // int -> fp : largest bias XLEN - Largest bias + new bias - 1 - ZeroCnt = XLEN + NewBias - 1 - ZeroCnt + // int -> fp : largest bias + XLEN - Largest bias + new bias - 1 - ZeroCnt = XLEN + NewBias - 1 - ZeroCnt // Process: // - shifted right by XLEN (XLEN) // - shift left to normilize (-1-ZeroCnt) // - newBias to make the biased exponent // - assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XOrigDenormE|IntToFp} - {{`NE-$clog2(`LGLEN){1'b0}}, (ZeroCnt&{$clog2(`LGLEN)+1{XOrigDenormE|IntToFp}})}; + assign CalcExp = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-$clog2(`LGLEN+1)+1{1'b0}}, (ZeroCnt&{$clog2(`LGLEN+1){XDenormE|IntToFp}})}; // find if the result is dnormal or underflows // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - can't underflow an integer to Fp conversion @@ -568,7 +557,7 @@ module fcvt ( // - do so if the result underflows, is zero (the exp doesnt calculate correctly). or the integer input is 0 // - dont set to zero if fp input is zero but not using the fp input // - dont set to zero if int input is zero but not using the int input - assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|PosInt&IntToFp)); + assign KillRes = (ResUf|(XZeroE&~IntToFp)|(~|TrimInt&IntToFp)); if (`FPSIZES == 1) begin // IEEE sends a payload while Riscv says to send a canonical quiet NaN @@ -755,7 +744,7 @@ module fcvt ( NaNRes = {{`Q_LEN-`H_LEN{1'b1}}, 1'b0, {`H_NE+1{1'b1}}, {`H_NF-1{1'b0}}}; end // determine the infinity result - // - if the input was infinity or rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign + // - if the input overflows in rounding mode RZ, RU, RD (and not rounding the value) then output the maximum normalized floating point number with the correct sign // - otherwise: output infinity with the correct sign // - kill the infinity singal if the input isn't fp InfRes = (~XInfE|IntToFp)&((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~ResSgn) | (FrmE[1:0]==2'b11&ResSgn)) ? {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`Q_LEN-`H_LEN{1'b1}}, ResSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; diff --git a/pipelined/src/fpu/fcvtfp.sv b/pipelined/src/fpu/fcvtfp.sv deleted file mode 100644 index f43d15661..000000000 --- a/pipelined/src/fpu/fcvtfp.sv +++ /dev/null @@ -1,190 +0,0 @@ - -`include "wally-config.vh" -module fcvtfp ( - input logic [10:0] XExpE, // input's exponent - input logic [52:0] XManE, // input's mantissa - input logic XSgnE, // input's sign - input logic XZeroE, // is the input zero - input logic XDenormE, // is the input denormalized - input logic XInfE, // is the input infinity - input logic XNaNE, // is the input a NaN - input logic XSNaNE, // is the input a signaling NaN - input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic FmtE, // the input's precision (1 = double 0 = single) - output logic [63:0] CvtFpResE, // the fp to fp conversion's result - output logic [4:0] CvtFpFlgE); // the fp to fp conversion's flags - - logic [12:0] DSExp; // double to single precision exponent - logic Denorm; // is the double to single precision result denormalized - logic Shift; // do you shift the double precision exponent (if single precision result is denormalized) - logic [51:0] SDFrac; // single to double precision fraction - logic [25:0] DSFrac; // double to single precision fraction - logic [77:0] DSFracShifted; // single precision fraction shifted for double precision - logic Sticky, UfSticky, Guard, Round, LSBFrac, UfGuard, UfRound, UfLSBFrac; // rounding bits - logic CalcPlus1, UfCalcPlus1, Plus1, UfPlus1; // do you add one to the result - logic [12:0] DSExpFull; // full double to single exponent - logic [22:0] DSResFrac; // final double to single fraction - logic [7:0] DSResExp; // final double to single exponent - logic [10:0] SDExp; // final single to double precision exponent - logic Overflow, Underflow, Inexact; // flags - logic [31:0] DSRes; // double to single precision result - - - // add support for all formats - // consider reordering code blocks so upconverting is in one region of the file - // and downconverting is in the other region. - - /////////////////////////////////////////////////////////////////////////////// - // LZC: Leading Zero Counter - /////////////////////////////////////////////////////////////////////////////// - - // *** consider sharing this with fcvtint - // *** emphasize parallel structure between the two - // *** add a priorityencoder module to generic (similar to priorityonehot) and use it - - // LZC - find the first 1 in the input's mantissa - logic [8:0] i,NormCnt; - always_comb begin - i = 0; - while (~XManE[52-i] & i <= 52) i = i+1; // search for leading one - NormCnt = i; - end - - - /////////////////////////////////////////////////////////////////////////////// - // Expoents - /////////////////////////////////////////////////////////////////////////////// - - // convert the single precion exponent to single precision. - // - subtract the double precision exponent (1023) and add the - // single precsision exponent (127) - // - if the input is zero then kill the exponent - - assign DSExp = ({2'b0,XExpE}-13'd1023+13'd127)&{13{~XZeroE}}; - - // is the converted double to single precision exponent in the denormalized range - assign Denorm = $signed(DSExp) <= 0 & $signed(DSExp) > $signed(-(13'd23)); - - - // caluculate the final single to double precsion exponent - // - subtract the single precision bias (127) and add the double - // precision bias (127) - // - if the result is zero or denormalized, kill the exponent - assign SDExp = XExpE-({2'b0,NormCnt&{9{~XZeroE}}})+({11{XDenormE}}&1024-127); //*** seems ineffecient - - - - /////////////////////////////////////////////////////////////////////////////// - // Fraction - /////////////////////////////////////////////////////////////////////////////// - - - // normalize the single precision fraction for double precsion - // - needed for denormal single precsion values - assign SDFrac = XManE[51:0] << NormCnt; - - // check if the double precision mantissa needs to be shifted - // - the mantissa needs to be shifted if the single precision result is denormal - assign Shift = Denorm | (($signed(DSExp) > $signed(-(13'd25))) & DSExp[12]); - // shift the mantissa - assign DSFracShifted = {XManE, 25'b0} >> ((-DSExp+1)&{13{Shift}}); //***might be some optimization here - assign DSFrac = DSFracShifted[76:51]; - - - - /////////////////////////////////////////////////////////////////////////////// - // Rounder - /////////////////////////////////////////////////////////////////////////////// - - // used to determine underflow flag - assign UfSticky = |DSFracShifted[50:0]; - assign UfGuard = DSFrac[1]; - assign UfRound = DSFrac[0]; - assign UfLSBFrac = DSFrac[2]; - - - assign Sticky = UfSticky | UfRound; - assign Guard = DSFrac[2]; - assign Round = DSFrac[1]; - assign LSBFrac = DSFrac[3]; - - - always_comb begin // ***remove guard bit - // Determine if you add 1 - case (FrmE) - 3'b000: CalcPlus1 = Guard & (Round | (Sticky) | (~Round&~Sticky&LSBFrac));//round to nearest even - 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = XSgnE;//round down - 3'b011: CalcPlus1 = ~XSgnE;//round up - 3'b100: CalcPlus1 = (Guard & (Round | (Sticky) | (~Round&~Sticky)));//round to nearest max magnitude - default: CalcPlus1 = 1'bx; - endcase - // Determine if you add 1 (for underflow flag) - case (FrmE) - 3'b000: UfCalcPlus1 = UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky&UfLSBFrac));//round to nearest even - 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = XSgnE;//round down - 3'b011: UfCalcPlus1 = ~XSgnE;//round up - 3'b100: UfCalcPlus1 = (UfGuard & (UfRound | UfSticky | (~UfRound&~UfSticky)));//round to nearest max magnitude - default: UfCalcPlus1 = 1'bx; - endcase - - end - - // if an answer is exact don't round - assign Plus1 = CalcPlus1 & (Sticky | UfGuard | Guard | Round); - assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard); - - - - // round the double to single precision result - assign {DSExpFull, DSResFrac} = {DSExp&{13{~Denorm}}, DSFrac[25:3]} + {35'b0,Plus1}; - assign DSResExp = DSExpFull[7:0]; - - - /////////////////////////////////////////////////////////////////////////////// - // Flags - /////////////////////////////////////////////////////////////////////////////// - - // calculate the flags - // - overflow, underflow and inexact can only be set by the double to single precision opperation - // - don't set underflow or overflow if the input is NaN or Infinity - // - don't set the inexact flag if the input is NaN - assign Overflow = $signed(DSExpFull) >= $signed({5'b0, {8{1'b1}}}) & ~(XNaNE|XInfE); - assign Underflow = (($signed(DSExpFull) <= 0) & ((Sticky|Guard|Round) | (XManE[52]&~|DSFrac) | (|DSFrac&~Denorm)) | ((DSExpFull == 1) & Denorm & ~(UfPlus1&UfLSBFrac))) & ~(XNaNE|XInfE); - assign Inexact = (Sticky|Guard|Round|Underflow|Overflow) &~(XNaNE); - - // pack the flags together and choose the result based on the opperation - assign CvtFpFlgE = FmtE ? {XSNaNE, 1'b0, Overflow, Underflow, Inexact} : {XSNaNE, 4'b0}; - - - - /////////////////////////////////////////////////////////////////////////////// - // Result Selection - /////////////////////////////////////////////////////////////////////////////// - - if(`IEEE754) begin - // select the double to single precision result - assign DSRes = XNaNE ? {XSgnE, {8{1'b1}}, 1'b1, XManE[50:29]} : - Underflow & ~Denorm ? {XSgnE, 30'b0, CalcPlus1&(|FrmE[1:0]|Shift)} : - Overflow | XInfE ? ((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~XSgnE) | (FrmE[1:0]==2'b11&XSgnE)) & ~XInfE ? {XSgnE, 8'hfe, {23{1'b1}}} : - {XSgnE, 8'hff, 23'b0} : - {XSgnE, DSResExp, DSResFrac}; - - // select the final result based on the opperation - //*** in al units before putting into : ? put in a seperate signal - assign CvtFpResE = FmtE ? {{32{1'b1}},DSRes} : {XSgnE, SDExp, SDFrac[51]|XNaNE, SDFrac[50:0]}; - end else begin - // select the double to single precision result - assign DSRes = XNaNE ? {1'b0, {8{1'b1}}, 1'b1, 22'b0} : - Underflow & ~Denorm ? {XSgnE, 30'b0, CalcPlus1&(|FrmE[1:0]|Shift)} : - Overflow | XInfE ? ((FrmE[1:0]==2'b01) | (FrmE[1:0]==2'b10&~XSgnE) | (FrmE[1:0]==2'b11&XSgnE)) & ~XInfE ? {XSgnE, 8'hfe, {23{1'b1}}} : - {XSgnE, 8'hff, 23'b0} : - {XSgnE, DSResExp, DSResFrac}; - - // select the final result based on the opperation - assign CvtFpResE = FmtE ? {{32{1'b1}},DSRes} : {XSgnE&~XNaNE, SDExp, SDFrac[51]|XNaNE, SDFrac[50:0]&{51{~XNaNE}}}; - end -endmodule // fpadd - - diff --git a/pipelined/src/fpu/fcvtint.sv b/pipelined/src/fpu/fcvtint.sv deleted file mode 100644 index 97007d660..000000000 --- a/pipelined/src/fpu/fcvtint.sv +++ /dev/null @@ -1,190 +0,0 @@ - -`include "wally-config.vh" -// `include "../../config/rv64icfd/wally-config.vh" -// `define XLEN 64 -module fcvtint ( - input logic XSgnE, // X's sign - input logic [10:0] XExpE, // X's exponent - input logic [52:0] XManE, // X's fraction - input logic XZeroE, // is X zero - input logic XNaNE, // is X NaN - input logic XInfE, // is X infinity - input logic XDenormE, // is X denormalized - input logic [`XLEN-1:0] ForwardedSrcAE, // integer input - input logic [2:0] FOpCtrlE, // chooses which instruction is done (full list below) - input logic [2:0] FrmE, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - input logic FmtE, // precision 1 = double 0 = single - output logic [63:0] CvtResE, // convert final result - output logic [4:0] CvtFlgE); // convert flags {invalid, divide by zero, overflow, underflow, inexact} - - logic ResSgn; // FP result's sign - logic [10:0] ResExp,TmpExp; // FP result's exponent - logic [51:0] ResFrac; // FP result's fraction - logic [6:0] LZResP; // lz output - logic [7:0] Bits; // how many bits are in the integer result - logic [7:0] SubBits; // subtract these bits from the exponent (FP result) - logic [64+51:0] ShiftedManTmp; // Shifted mantissa - logic [64+51:0] ShiftVal; // value being shifted (to int - XMan, to FP - |integer input|) - logic [64+1:0] ShiftedMan; // shifted mantissa truncated - logic [64:0] RoundedTmp; // full size rounded result - in case of overfow - logic [63:0] Rounded; // rounded result - logic [12:0] ExpVal; // unbiased X exponent - logic [12:0] ShiftCnt; // how much is the mantissa shifted - logic [64-1:0] IntIn; // trimed integer input - logic [64-1:0] PosInt; // absolute value of the integer input - logic [63:0] CvtIntRes; // interger result from the fp -> int instructions - logic [63:0] CvtFPRes; // floating point result from the int -> fp instructions - logic Of, Uf; // did the integer result underflow or overflow - logic Guard, Round, LSB, Sticky; // bits used to determine rounding - logic Plus1,CalcPlus1; // do you add one for rounding - logic SgnRes; // sign of the floating point result - logic Res64, In64; // is the result or input 64 bits - logic RoundMSB; // most significant bit of the fraction - logic RoundSgn; // sign of the rounded result - logic Invalid, Inexact; // flags - - // FOpCtrlE: - // fcvt.w.s = 001 - // fcvt.wu.s = 011 - // fcvt.s.w = 000 - // fcvt.s.wu = 010 - // fcvt.l.s = 101 - // fcvt.lu.s = 111 - // fcvt.s.l = 100 - // fcvt.s.lu = 110 - // fcvt.w.d = 001 - // fcvt.wu.d = 011 - // fcvt.d.w = 000 - // fcvt.d.wu = 010 - // fcvt.l.d = 101 - // fcvt.lu.d = 111 - // fcvt.d.l = 100 - // fcvt.d.lu = 110 - // {long, unsigned, to int} - - // *** revisit this module, explain in more depth - // should the int to fp and fp to int paths be separated? - // add support for all formats - - // calculate signals based off the input and output's size - assign Res64 = (FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&~FOpCtrlE[0]); - assign In64 = (~FOpCtrlE[0]&FOpCtrlE[2]) | (FmtE&FOpCtrlE[0]); - assign SubBits = In64 ? 8'd64 : 8'd32; - assign Bits = Res64 ? 8'd64 : 8'd32; - - // calulate the unbiased exponent - assign ExpVal = {1'b0,XExpE} - {1'b0, (11)'(`BIAS)} + {12'b0, XDenormE}; - -//////////////////////////////////////////////////////// - - // position the input in the most significant bits - assign IntIn = FOpCtrlE[2] ? {ForwardedSrcAE, {64-`XLEN{1'b0}}} : {ForwardedSrcAE[31:0], 32'b0}; - // make the integer positive - assign PosInt = IntIn[64-1]&~FOpCtrlE[1] ? -IntIn : IntIn; - // determine the integer's sign - assign ResSgn = ~FOpCtrlE[1]&IntIn[64-1]; - - // Leading one detector - logic [8:0] i; - always_comb begin - i = 0; - while (~PosInt[64-1-i] & i < `XLEN) i = i+1; // search for leading one - LZResP = i[5:0]+1; // compute shift count - end - - // if no one was found set to zero otherwise calculate the exponent - assign TmpExp = i==`XLEN ? 0 : FmtE ? 11'd1023 + {3'b0, SubBits} - {4'b0, LZResP} : 11'd127 + {3'b0, SubBits} - {4'b0, LZResP}; - - - - -//////////////////////////////////////////// - - - // select the shift value and amount based on operation (to fp or int) - assign ShiftCnt = FOpCtrlE[0] ? ExpVal : {6'b0, LZResP}; - assign ShiftVal = FOpCtrlE[0] ? {{64-1{1'b0}}, XManE} : {PosInt, 52'b0}; - - // if shift = -1 then shift one bit right for gaurd bit (right shifting twice never rounds) - // if the shift is negitive add a bit for sticky bit calculation - // otherwise shift left - assign ShiftedManTmp = &ShiftCnt ? {{64{1'b0}}, XManE[52:1]} : ShiftCnt[12] ? {{64+51{1'b0}}, ~XZeroE} : ShiftVal << ShiftCnt; - - // truncate the shifted mantissa - assign ShiftedMan = ShiftedManTmp[64+51:50]; - - // calculate sticky bit - // - take into account the possible right shift from before - // - the sticky bit calculation covers three diffrent sizes depending on the opperation - assign Sticky = |ShiftedManTmp[49:0] | &ShiftCnt&XManE[0] | (~FOpCtrlE[0]&|ShiftedManTmp[62:50]) | (~FOpCtrlE[0]&~FmtE&|ShiftedManTmp[91:63]); - - - // determine guard, round, and least significant bit of the result - assign Guard = FOpCtrlE[0] ? ShiftedMan[1] : FmtE ? ShiftedMan[13] : ShiftedMan[42]; - assign Round = FOpCtrlE[0] ? ShiftedMan[0] : FmtE ? ShiftedMan[12] : ShiftedMan[41]; - assign LSB = FOpCtrlE[0] ? ShiftedMan[2] : FmtE ? ShiftedMan[14] : ShiftedMan[43]; - - always_comb begin//*** remove guard bit - // Determine if you add 1 - case (FrmE) - 3'b000: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky&LSB));//round to nearest even - 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = (XSgnE&FOpCtrlE[0]) | (ResSgn&~FOpCtrlE[0]);//round down - 3'b011: CalcPlus1 = (~XSgnE&FOpCtrlE[0]) | (~ResSgn&~FOpCtrlE[0]);//round up - 3'b100: CalcPlus1 = Guard & (Round | Sticky | (~Round&~Sticky));//round to nearest max magnitude - default: CalcPlus1 = 1'bx; - endcase - end - - // dont tound if the result is exact - assign Plus1 = CalcPlus1 & (Guard|Round|Sticky)&~(XZeroE&FOpCtrlE[0]); - - // round the shifted mantissa - assign RoundedTmp = ShiftedMan[64+1:2] + {64'b0, Plus1}; - assign {ResExp, ResFrac} = FmtE ? {TmpExp, ShiftedMan[64+1:14]} + {62'b0, Plus1} : {{TmpExp, ShiftedMan[64+1:43]} + {33'b0,Plus1}, 29'b0} ; - - // fit the rounded result into the appropriate size and take the 2's complement if needed - assign Rounded = Res64 ? XSgnE&FOpCtrlE[0] ? -RoundedTmp[63:0] : RoundedTmp[63:0] : - XSgnE ? {{32{1'b1}}, -RoundedTmp[31:0]} : {32'b0, RoundedTmp[31:0]}; - - // extract the MSB and Sign for later use (will be used to determine underflow and overflow) - assign RoundMSB = Res64 ? RoundedTmp[64] : RoundedTmp[32]; - assign RoundSgn = Res64 ? Rounded[63] : Rounded[31]; - - - // check if the result overflows - assign Of = (~XSgnE&($signed(ShiftCnt) >= $signed({{5{Bits[7]}}, Bits}))) | (~XSgnE&RoundSgn&~FOpCtrlE[1]) | (RoundMSB&(ShiftCnt==({{5{Bits[7]}}, Bits}-1))) | (~XSgnE&XInfE) | XNaNE; - - // check if the result underflows (this calculation changes if the result is signed or unsigned) - assign Uf = FOpCtrlE[1] ? XSgnE&~XZeroE | (XSgnE&XInfE) | (XSgnE&~XZeroE&(~ShiftCnt[12]|CalcPlus1)) | (ShiftCnt[12]&Plus1) : (XSgnE&XInfE) | (XSgnE&($signed(ShiftCnt) >= $signed({{5{Bits[7]}}, Bits}))) | (XSgnE&~RoundSgn&~ShiftCnt[12]); // assign CvtIntRes = (XSgnE | ShiftCnt[12]) ? {64{1'b0}} : (ShiftCnt >= 64) ? {64{1'b1}} : Rounded; - - // calculate the result's sign - assign SgnRes = ~FOpCtrlE[2] & FOpCtrlE[0]; - - // select the integer result - assign CvtIntRes = Of ? FOpCtrlE[1] ? {64{1'b1}} : SgnRes ? {33'b0, {31{1'b1}}}: {1'b0, {63{1'b1}}} : - Uf ? FOpCtrlE[1] ? {63'b0, Plus1&~XSgnE} : SgnRes ? {{33{1'b1}}, 31'b0} : {1'b1, 63'b0} : - |RoundedTmp ? Rounded[64-1:0] : 64'b0; - - // select the floating point result - assign CvtFPRes = FmtE ? {ResSgn, ResExp, ResFrac} : {{32{1'b1}}, ResSgn, ResExp[7:0], ResFrac[51:29]}; - - // select the result - assign CvtResE = FOpCtrlE[0] ? CvtIntRes : CvtFPRes; - - // calculate the flags - // - only set invalid flag for out-of-range vales - // - set inexact if in representable range and not exact - - if(`IEEE754) begin // checks before rounding - assign Invalid = (Of | Uf)&FOpCtrlE[0]; - assign Inexact = (Guard|Round|Sticky)&~(&FOpCtrlE[1:0]&(XSgnE|Of))&~((Of|Uf)&~FOpCtrlE[1]&FOpCtrlE[0]); - assign CvtFlgE = {Invalid&~Inexact, 3'b0, Inexact}; - end else begin // RISC-V checks if the result is in range after rounding - assign Invalid = (Of | Uf)&FOpCtrlE[0]; - assign Inexact = (Guard|Round|Sticky)&~(&FOpCtrlE[1:0]&((XSgnE&~(ShiftCnt[12]&~Plus1))|Of))&~((Of|Uf)&~FOpCtrlE[1]&FOpCtrlE[0]); - assign CvtFlgE = {Invalid&~Inexact, 3'b0, Inexact}; - end -endmodule // fpadd - - diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 97735c5f7..5d16ccc51 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -43,8 +43,7 @@ module fma( input logic XSgnM, YSgnM, // input signs - memory stage input logic [`NE-1:0] ZExpM, // input exponents - memory stage input logic [`NF:0] XManM, YManM, ZManM, // input mantissa - memory stage - input logic ZOrigDenormE, // is the original precision denormalized - input logic XDenormE, YDenormE, ZDenormE, // is denorm + input logic ZDenormE, // is denorm input logic XZeroE, YZeroE, ZZeroE, // is zero - execute stage input logic XNaNM, YNaNM, ZNaNM, // is NaN input logic XSNaNM, YSNaNM, ZSNaNM, // is signaling NaN @@ -73,10 +72,10 @@ module fma( logic PSgnE, PSgnM; logic [$clog2(3*`NF+7)-1:0] NormCntE, NormCntM; logic Mult; - logic ZOrigDenormM; + logic ZDenormM; fma1 fma1 (.XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, + .XZeroE, .YZeroE, .ZZeroE, .FOpCtrlE, .FmtE, .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE); @@ -84,10 +83,10 @@ module fma( flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); flopenrc #(13) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); flopenrc #($clog2(3*`NF+7)+8) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZOrigDenormE}, - {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZOrigDenormM}); + {AddendStickyE, KillProdE, InvZE, NormCntE, NegSumE, ZSgnEffE, PSgnE, FOpCtrlE[2]&~FOpCtrlE[1]&~FOpCtrlE[0], ZDenormE}, + {AddendStickyM, KillProdM, InvZM, NormCntM, NegSumM, ZSgnEffM, PSgnM, Mult, ZDenormM}); - fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM, + fma2 fma2(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .SumM, .NegSumM, .InvZM, .NormCntM, .ZSgnEffM, .PSgnM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .Mult, .FMAResM, .FMAFlgM); @@ -101,7 +100,6 @@ module fma1( input logic XSgnE, YSgnE, ZSgnE, // input's signs input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format input logic [`NF:0] XManE, YManE, ZManE, // fractions in U(0.NF) format - input logic XDenormE, YDenormE, ZDenormE, // is the input denormal input logic XZeroE, YZeroE, ZZeroE, // is the input zero input logic [2:0] FOpCtrlE, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic [`FPSIZES/3:0] FmtE, // precision 1 = double 0 = single @@ -116,13 +114,11 @@ module fma1( output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift cnt ); - logic [`NE-1:0] Denorm; // value of a denormaized number based on precision logic [2*`NF+1:0] ProdManE; // 1.X frac * 1.Y frac in U(2.2Nf) format logic [3*`NF+5:0] AlignedAddendE; // Z aligned for addition in U(NF+5.2NF+1) logic [3*`NF+6:0] AlignedAddendInv; // aligned addend possibly inverted logic [2*`NF+1:0] ProdManKilled; // the product's mantissa possibly killed logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum - logic [`NE-1:0] XExpVal, YExpVal; // exponent value after taking into accound denormals /////////////////////////////////////////////////////////////////////////////// // Calculate the product // - When multipliying two fp numbers, add the exponents @@ -133,8 +129,8 @@ module fma1( // calculate the product's exponent - expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, .XDenormE, .YDenormE, .XExpVal, .YExpVal, - .Denorm, .ProdExpE); + expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, + .ProdExpE); // multiplication of the mantissa's mult mult(.XManE, .YManE, .ProdManE); @@ -143,7 +139,7 @@ module fma1( // Alignment shifter /////////////////////////////////////////////////////////////////////////////// - align align(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm, .XExpVal, .YExpVal, + align align(.ZExpE, .ZManE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .XExpE, .YExpE, .AlignedAddendE, .AddendStickyE, .KillProdE); // calculate the signs and take the opperation into account @@ -167,51 +163,12 @@ endmodule module expadd( input logic [`FPSIZES/3:0] FmtE, // precision input logic [`NE-1:0] XExpE, YExpE, // input exponents - input logic XDenormE, YDenormE, // are the inputs denormalized input logic XZeroE, YZeroE, // are the inputs zero - output logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals - output logic [`NE-1:0] Denorm, // value of denormalized exponent output logic [`NE+1:0] ProdExpE // product's exponent B^(1023)NE+2 ); - - // denormalized numbers have diffrent values depending on which precison it is. - // FLEN - 1 - // Other - BIAS - other bias + 1 - - if (`FPSIZES == 1) begin - assign Denorm = 1; - - end else if (`FPSIZES == 2) begin - assign Denorm = FmtE ? (`NE)'(1) : (`NE)'(`BIAS)-(`NE)'(`BIAS1)+(`NE)'(1); - - end else if (`FPSIZES == 3) begin - always_comb begin - case (FmtE) - `FMT: Denorm = 1; - `FMT1: Denorm = `BIAS-`BIAS1+1; - `FMT2: Denorm = `BIAS-`BIAS2+1; - default: Denorm = 1'bx; - endcase - end - - end else if (`FPSIZES == 4) begin - always_comb begin - case (FmtE) - 2'h3: Denorm = 1; - 2'h1: Denorm = `BIAS-`D_BIAS+1; - 2'h0: Denorm = `BIAS-`S_BIAS+1; - 2'h2: Denorm = `BIAS-`H_BIAS+1; - endcase - end - - end - - // pick denormalized value or exponent - assign XExpVal = XDenormE ? Denorm : XExpE; - assign YExpVal = YDenormE ? Denorm : YExpE; // kill the exponent if the product is zero - either X or Y is 0 - assign ProdExpE = ({2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}}; + assign ProdExpE = ({2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)})&{`NE+2{~(XZeroE|YZeroE)}}; endmodule @@ -258,13 +215,10 @@ endmodule module align( - input logic [`NE-1:0] ZExpE, // biased exponents in B(NE.0) format + input logic [`NE-1:0] XExpE, YExpE, ZExpE, // biased exponents in B(NE.0) format input logic [`NF:0] ZManE, // fractions in U(0.NF) format] - input logic ZDenormE, // is the input denormal input logic XZeroE, YZeroE, ZZeroE, // is the input zero - input logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals input logic [`NE+1:0] ProdExpE, // the product's exponent - input logic [`NE-1:0] Denorm, // the biased value of a denormalized number output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1) output logic AddendStickyE, // Sticky bit calculated from the aliged addend output logic KillProdE // should the product be set to zero @@ -273,7 +227,6 @@ module align( logic [`NE+1:0] AlignCnt; // how far to shift the addend to align with the product in Q(NE+2.0) format logic [4*`NF+5:0] ZManShifted; // output of the alignment shifter including sticky bits U(NF+5.3NF+1) logic [4*`NF+5:0] ZManPreShifted; // input to the alignment shifter U(NF+5.3NF+1) - logic [`NE-1:0] ZExpVal; // Exponent value after taking into account denormals /////////////////////////////////////////////////////////////////////////////// // Alignment shifter @@ -282,11 +235,9 @@ module align( // determine the shift count for alignment // - negitive means Z is larger, so shift Z left // - positive means the product is larger, so shift Z right - // - Denormal numbers have a diffrent exponent value depending on the precision - assign ZExpVal = ZDenormE ? Denorm : ZExpE; - // assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3); // *** can we use ProdExpE instead of XExp/YExp to save an adder? DH 5/12/22 - assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpVal}; + // KP- yes we used ProdExpE originally but we did this for timing + assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpE} + {2'b0, YExpE} - {2'b0, (`NE)'(`BIAS)} + `NF+3 - {2'b0, ZExpE}; // Defualt Addition without shifting // | 54'b0 | 106'b(product) | 2'b0 | @@ -409,22 +360,10 @@ module loa( //https://ieeexplore.ieee.org/abstract/document/930098 - lzc lzc(.f, .NormCntE); + lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NormCntE)); endmodule -module lzc( - input logic [3*`NF+6:0] f, - output logic [$clog2(3*`NF+7)-1:0] NormCntE // normalization shift -); - - logic [$clog2(3*`NF+7)-1:0] i; - always_comb begin - i = 0; - while (~f[3*`NF+6-i] & $unsigned(i) <= $unsigned($clog2(3*`NF+7)'(3)*($clog2(3*`NF+7))'(`NF)+($clog2(3*`NF+7))'(6))) i = i+1; // search for leading one - NormCntE = i; - end -endmodule @@ -450,7 +389,7 @@ module fma2( input logic [3*`NF+5:0] SumM, // the positive sum input logic NegSumM, // was the sum negitive input logic InvZM, // do you invert Z - input logic ZOrigDenormM, // is the original precision denormalized + input logic ZDenormM, // is the original precision denormalized input logic ZSgnEffM, // the modified Z sign - depends on instruction input logic PSgnM, // the product's sign input logic Mult, // multiply opperation @@ -465,7 +404,7 @@ module fma2( logic ResultSgn, ResultSgnTmp; // Result sign logic [`NE+1:0] SumExp; // exponent of the normalized sum logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow - logic [`NF+2:0] NormSum; // normalized sum + logic [`NF+1:0] NormSum; // normalized sum logic NormSumSticky; // sticky bit calulated from the normalized sum logic SumZero; // is the sum zero logic ResultDenorm; // is the result denormalized @@ -486,7 +425,7 @@ module fma2( /////////////////////////////////////////////////////////////////////////////// normalize normalize(.SumM, .ZExpM, .ProdExpM, .NormCntM, .FmtM, .KillProdM, .AddendStickyM, .NormSum, - .ZOrigDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm); + .ZDenormM, .SumZero, .NormSumSticky, .UfSticky, .SumExp, .ResultDenorm); @@ -533,7 +472,7 @@ module fma2( // Select the result /////////////////////////////////////////////////////////////////////////////// - resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZOrigDenormM, + resultselect resultselect(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .ZDenormM, .FrmM, .FmtM, .AddendStickyM, .KillProdM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .RoundAdd, .ZSgnEffM, .PSgnM, .ResultSgn, .CalcPlus1, .Invalid, .Overflow, .Underflow, .ResultDenorm, .ResultExp, .ResultFrac, .FMAResM); @@ -580,9 +519,9 @@ module normalize( input logic [$clog2(3*`NF+7)-1:0] NormCntM, // normalization shift count input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single input logic KillProdM, // is the product set to zero - input logic ZOrigDenormM, + input logic ZDenormM, input logic AddendStickyM, // the sticky bit caclulated from the aligned addend - output logic [`NF+2:0] NormSum, // normalized sum + output logic [`NF+1:0] NormSum, // normalized sum output logic SumZero, // is the sum zero output logic NormSumSticky, UfSticky, // sticky bits output logic [`NE+1:0] SumExp, // exponent of the normalized sum @@ -599,12 +538,12 @@ module normalize( /////////////////////////////////////////////////////////////////////////////// // Normalization /////////////////////////////////////////////////////////////////////////////// - //*** insert bias-bias simplification in fcvt.sv/phone pictures/ whiteboard... if still there + //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero assign SumZero = ~(|SumM); // calculate the sum's exponent - assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZOrigDenormM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); + assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); //convert the sum's exponent into the propper percision if (`FPSIZES == 1) begin @@ -707,27 +646,27 @@ module normalize( assign LZAPlus2 = SumShifted[3*`NF+8]; // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; - assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3]; + assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+4]; // Calculate the sticky bit if (`FPSIZES == 1) begin - assign NormSumSticky = |CorrSumShifted[2*`NF+2:0]; + assign NormSumSticky = |CorrSumShifted[2*`NF+3:0]; end else if (`FPSIZES == 2) begin // 3*NF+5 - NF1 - 3 - assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | - (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&~FmtM); + assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | + (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&~FmtM); end else if (`FPSIZES == 3) begin - assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | - (|CorrSumShifted[3*`NF+2-`NF1:2*`NF+3]&((FmtM==`FMT1)|(FmtM==`FMT2))) | - (|CorrSumShifted[3*`NF+2-`NF2:3*`NF+3-`NF1]&(FmtM==`FMT2)); + assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | + (|CorrSumShifted[3*`NF+3-`NF1:2*`NF+4]&((FmtM==`FMT1)|(FmtM==`FMT2))) | + (|CorrSumShifted[3*`NF+3-`NF2:3*`NF+4-`NF1]&(FmtM==`FMT2)); end else if (`FPSIZES == 4) begin - assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | - (|CorrSumShifted[3*`NF+2-`D_NF:2*`NF+3]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | - (|CorrSumShifted[3*`NF+2-`S_NF:3*`NF+3-`D_NF]&((FmtM==0)|(FmtM==2))) | - (|CorrSumShifted[3*`NF+2-`H_NF:3*`NF+3-`S_NF]&(FmtM==2)); + assign NormSumSticky = (|CorrSumShifted[2*`NF+3:0]) | + (|CorrSumShifted[3*`NF+3-`D_NF:2*`NF+4]&((FmtM==1)|(FmtM==0)|(FmtM==2))) | + (|CorrSumShifted[3*`NF+3-`S_NF:3*`NF+4-`D_NF]&((FmtM==0)|(FmtM==2))) | + (|CorrSumShifted[3*`NF+3-`H_NF:3*`NF+4-`S_NF]&(FmtM==2)); end @@ -745,7 +684,7 @@ module fmaround( input logic [`FPSIZES/3:0] FmtM, // precision 1 = double 0 = single input logic [2:0] FrmM, // rounding mode input logic UfSticky, // sticky bit for underlow calculation - input logic [`NF+2:0] NormSum, // normalized sum + input logic [`NF+1:0] NormSum, // normalized sum input logic AddendStickyM, // addend's sticky bit input logic NormSumSticky, // normalized sum's sticky bit input logic ZZeroM, // is Z zero @@ -799,83 +738,53 @@ module fmaround( if (`FPSIZES == 1) begin // determine guard, round, and least significant bit of the result - assign Guard = NormSum[2]; assign Round = NormSum[1]; - assign LSBNormSum = NormSum[3]; + assign LSBNormSum = NormSum[2]; // used to determine underflow flag - assign UfGuard = NormSum[1]; assign UfRound = NormSum[0]; - assign UfLSBNormSum = NormSum[2]; - - // determine sticky - assign Sticky = UfSticky | NormSum[0]; end else if (`FPSIZES == 2) begin // \/-------------NF---------------, - // | NF1 | 3 | | + // | NF1 | 2 | | // '-------NF1------^ // determine guard, round, and least significant bit of the result - assign Guard = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2]; assign Round = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1]; - assign LSBNormSum = FmtM ? NormSum[3] : NormSum[`NF-`NF1+3]; + assign LSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2]; // used to determine underflow flag - assign UfGuard = FmtM ? NormSum[1] : NormSum[`NF-`NF1+1]; assign UfRound = FmtM ? NormSum[0] : NormSum[`NF-`NF1]; - assign UfLSBNormSum = FmtM ? NormSum[2] : NormSum[`NF-`NF1+2]; - // determine sticky - assign Sticky = UfSticky | (FmtM ? NormSum[0] : NormSum[`NF-`NF1]); end else if (`FPSIZES == 3) begin always_comb begin case (FmtM) `FMT: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[2]; Round = NormSum[1]; - LSBNormSum = NormSum[3]; + LSBNormSum = NormSum[2]; // used to determine underflow flag - UfGuard = NormSum[1]; UfRound = NormSum[0]; - UfLSBNormSum = NormSum[2]; - // determine sticky - Sticky = UfSticky | NormSum[0]; end `FMT1: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`NF1+2]; Round = NormSum[`NF-`NF1+1]; - LSBNormSum = NormSum[`NF-`NF1+3]; + LSBNormSum = NormSum[`NF-`NF1+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`NF1+1]; UfRound = NormSum[`NF-`NF1]; - UfLSBNormSum = NormSum[`NF-`NF1+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`NF1]; end `FMT2: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`NF2+2]; Round = NormSum[`NF-`NF2+1]; - LSBNormSum = NormSum[`NF-`NF2+3]; + LSBNormSum = NormSum[`NF-`NF2+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`NF2+1]; UfRound = NormSum[`NF-`NF2]; - UfLSBNormSum = NormSum[`NF-`NF2+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`NF2]; end default: begin - Guard = 1'bx; Round = 1'bx; LSBNormSum = 1'bx; - UfGuard = 1'bx; UfRound = 1'bx; - UfLSBNormSum = 1'bx; - Sticky = 1'bx; end endcase end @@ -885,56 +794,40 @@ module fmaround( case (FmtM) 2'h3: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[2]; Round = NormSum[1]; - LSBNormSum = NormSum[3]; + LSBNormSum = NormSum[2]; // used to determine underflow flag - UfGuard = NormSum[1]; UfRound = NormSum[0]; - UfLSBNormSum = NormSum[2]; - // determine sticky - Sticky = UfSticky | NormSum[0]; end 2'h1: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`D_NF+2]; Round = NormSum[`NF-`D_NF+1]; - LSBNormSum = NormSum[`NF-`D_NF+3]; + LSBNormSum = NormSum[`NF-`D_NF+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`D_NF+1]; UfRound = NormSum[`NF-`D_NF]; - UfLSBNormSum = NormSum[`NF-`D_NF+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`D_NF]; end 2'h0: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`S_NF+2]; Round = NormSum[`NF-`S_NF+1]; - LSBNormSum = NormSum[`NF-`S_NF+3]; + LSBNormSum = NormSum[`NF-`S_NF+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`S_NF+1]; UfRound = NormSum[`NF-`S_NF]; - UfLSBNormSum = NormSum[`NF-`S_NF+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`S_NF]; end 2'h2: begin // determine guard, round, and least significant bit of the result - Guard = NormSum[`NF-`H_NF+2]; Round = NormSum[`NF-`H_NF+1]; - LSBNormSum = NormSum[`NF-`H_NF+3]; + LSBNormSum = NormSum[`NF-`H_NF+2]; // used to determine underflow flag - UfGuard = NormSum[`NF-`H_NF+1]; UfRound = NormSum[`NF-`H_NF]; - UfLSBNormSum = NormSum[`NF-`H_NF+2]; - // determine sticky - Sticky = UfSticky | NormSum[`NF-`H_NF]; end endcase end end + // used to determine underflow flag + assign UfLSBNormSum = Round; + // determine sticky + assign Sticky = UfSticky | UfRound; // Deterimine if a small number was supposed to be subtrated @@ -944,28 +837,28 @@ module fmaround( always_comb begin // Determine if you add 1 case (FrmM) - 3'b000: CalcPlus1 = Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&LSBNormSum&~SubBySmallNum));//round to nearest even + 3'b000: CalcPlus1 = Round & ((Sticky| LSBNormSum)&~SubBySmallNum);//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round down - 3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round up - 3'b100: CalcPlus1 = (Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&~SubBySmallNum)));//round to nearest max magnitude + 3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Round);//round down + 3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Round);//round up + 3'b100: CalcPlus1 = Round & ~SubBySmallNum;//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase // Determine if you add 1 (for underflow flag) case (FrmM) - 3'b000: UfCalcPlus1 = UfGuard & (UfRound | (UfSticky&UfRound|~UfSubBySmallNum) | (~Sticky&UfLSBNormSum&~UfSubBySmallNum));//round to nearest even + 3'b000: UfCalcPlus1 = UfRound & ((UfSticky| UfLSBNormSum)&~UfSubBySmallNum);//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round down - 3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round up - 3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (UfSticky&~(~UfRound&UfSubBySmallNum)) | (~Sticky&~UfSubBySmallNum)));//round to nearest max magnitude + 3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round down + 3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfRound);//round up + 3'b100: UfCalcPlus1 = UfRound & ~UfSubBySmallNum;//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase // Determine if you subtract 1 case (FrmM) 3'b000: CalcMinus1 = 0;//round to nearest even - 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero - 3'b010: CalcMinus1 = ~ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round down - 3'b011: CalcMinus1 = ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round up + 3'b001: CalcMinus1 = SubBySmallNum & ~Round;//round to zero + 3'b010: CalcMinus1 = ~ResultSgnTmp & ~Round & SubBySmallNum;//round down + 3'b011: CalcMinus1 = ResultSgnTmp & ~Round & SubBySmallNum;//round up 3'b100: CalcMinus1 = 0;//round to nearest max magnitude default: CalcMinus1 = 1'bx; endcase @@ -973,9 +866,9 @@ module fmaround( end // If an answer is exact don't round - assign Plus1 = CalcPlus1 & (Sticky | Guard | Round); - assign UfPlus1 = UfCalcPlus1 & (Sticky | UfGuard);//UfRound is part of sticky - assign Minus1 = CalcMinus1 & (Sticky | Guard | Round); + assign Plus1 = CalcPlus1 & (Sticky | Round); + assign UfPlus1 = UfCalcPlus1 & (Sticky | UfRound);//UfRound is part of sticky + assign Minus1 = CalcMinus1 & (Sticky | Round); // Compute rounded result if (`FPSIZES == 1) begin @@ -1011,7 +904,7 @@ module fmaround( end - assign NormSumTruncated = NormSum[`NF+2:3]; + assign NormSumTruncated = NormSum[`NF+1:2]; assign {FullResultExp, ResultFrac} = {SumExp, NormSumTruncated} + RoundAdd; assign ResultExp = FullResultExp[`NE-1:0]; @@ -1083,12 +976,12 @@ module fmaflags( // Set Underflow flag if the number is too small to be represented in normal numbers // - Don't set the underflow flag if the result is exact - assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Guard|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign Underflow = (SumExp[`NE+1] | ((SumExp == 0) & (Round|Sticky)))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // exp is negitive result is denorm exp was denorm but rounded to norm and if given an unbounded exponent it would stay denormal - assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Guard|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign UnderflowFlag = (FullResultExp[`NE+1] | ((FullResultExp == 0) | ((FullResultExp == 1) & (SumExp == 0) & ~(UfPlus1&UfLSBNormSum)))&(Round|Sticky))&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // Set Inexact flag if the result is diffrent from what would be outputed given infinite precision // - Don't set the underflow flag if an underflowed result isn't outputed - assign Inexact = (Sticky|Overflow|Guard|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); + assign Inexact = (Sticky|Overflow|Round|Underflow)&~(XNaNM|YNaNM|ZNaNM|XInfM|YInfM|ZInfM); // Combine flags // - FMA can't set the Divide by zero flag @@ -1108,7 +1001,7 @@ module resultselect( input logic KillProdM, // set the product to zero before addition if the product is too small to matter input logic XInfM, YInfM, ZInfM, // inputs are infinity input logic XNaNM, YNaNM, ZNaNM, // inputs are NaN - input logic ZOrigDenormM, // is the original precision denormalized + input logic ZDenormM, // is the original precision denormalized input logic ZSgnEffM, // the modified Z sign - depends on instruction input logic PSgnM, // the product's sign input logic ResultSgn, // the result's sign @@ -1134,7 +1027,7 @@ module resultselect( end assign OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - assign KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; + assign KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; assign UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))}; assign InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)}; assign NormResult = {ResultSgn, ResultExp, ResultFrac}; @@ -1153,7 +1046,7 @@ module resultselect( {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} : ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)}; - assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; + assign KillProdResult = FmtM ? {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; assign InfResult = FmtM ? {InfSgn, {`NE{1'b1}}, (`NF)'(0)} : {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)}; assign NormResult = FmtM ? {ResultSgn, ResultExp, ResultFrac} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]}; @@ -1173,7 +1066,7 @@ module resultselect( OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; + KillProdResult = {ResultSgn, {ZExpM[`NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))}; InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)}; NormResult = {ResultSgn, ResultExp, ResultFrac}; @@ -1189,7 +1082,7 @@ module resultselect( end OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, ResultSgn, {`NE1{1'b1}}, (`NF1)'(0)}; - KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE1-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF1]} + (RoundAdd[`NF-`NF1+`LEN1-2:`NF-`NF1]&{`LEN1-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`LEN1{1'b1}}, {ResultSgn, (`LEN1-1)'(0)} + {(`LEN1-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`LEN1{1'b1}}, InfSgn, {`NE1{1'b1}}, (`NF1)'(0)}; NormResult = {{`FLEN-`LEN1{1'b1}}, ResultSgn, ResultExp[`NE1-1:0], ResultFrac[`NF-1:`NF-`NF1]}; @@ -1206,7 +1099,7 @@ module resultselect( OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2-1{1'b1}}, 1'b0, {`NF2{1'b1}}} : {{`FLEN-`LEN2{1'b1}}, ResultSgn, {`NE2{1'b1}}, (`NF2)'(0)}; - KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`NE2-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`NF2]} + (RoundAdd[`NF-`NF2+`LEN2-2:`NF-`NF2]&{`LEN2-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`LEN2{1'b1}}, {ResultSgn, (`LEN2-1)'(0)} + {(`LEN2-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`LEN2{1'b1}}, InfSgn, {`NE2{1'b1}}, (`NF2)'(0)}; NormResult = {{`FLEN-`LEN2{1'b1}}, ResultSgn, ResultExp[`NE2-1:0], ResultFrac[`NF-1:`NF-`NF2]}; @@ -1244,7 +1137,7 @@ module resultselect( OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}}; - KillProdResult = {ResultSgn, {ZExpM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; + KillProdResult = {ResultSgn, {ZExpM[`Q_NE-1:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:0]} + (RoundAdd[`FLEN-2:0]&{`FLEN-1{AddendStickyM}})}; UnderflowResult = {ResultSgn, {`FLEN-1{1'b0}}} + {(`FLEN-1)'(0),(CalcPlus1&(AddendStickyM|FrmM[1]))}; InfResult = {InfSgn, {`NE{1'b1}}, (`NF)'(0)}; NormResult = {ResultSgn, ResultExp, ResultFrac}; @@ -1260,7 +1153,7 @@ module resultselect( end OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE-1{1'b1}}, 1'b0, {`D_NF{1'b1}}} : {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {`D_NE{1'b1}}, (`D_NF)'(0)}; - KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`D_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`D_NF]} + (RoundAdd[`NF-`D_NF+`D_LEN-2:`NF-`D_NF]&{`D_LEN-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`D_LEN{1'b1}}, {ResultSgn, (`D_LEN-1)'(0)} + {(`D_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`D_LEN{1'b1}}, InfSgn, {`D_NE{1'b1}}, (`D_NF)'(0)}; NormResult = {{`FLEN-`D_LEN{1'b1}}, ResultSgn, ResultExp[`D_NE-1:0], ResultFrac[`NF-1:`NF-`D_NF]}; @@ -1277,7 +1170,7 @@ module resultselect( OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE-1{1'b1}}, 1'b0, {`S_NF{1'b1}}} : {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {`S_NE{1'b1}}, (`S_NF)'(0)}; - KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`S_NE-2:1], ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`S_NF]} + (RoundAdd[`NF-`S_NF+`S_LEN-2:`NF-`S_NF]&{`S_LEN-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`S_LEN{1'b1}}, {ResultSgn, (`S_LEN-1)'(0)} + {(`S_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`S_LEN{1'b1}}, InfSgn, {`S_NE{1'b1}}, (`S_NF)'(0)}; NormResult = {{`FLEN-`S_LEN{1'b1}}, ResultSgn, ResultExp[`S_NE-1:0], ResultFrac[`NF-1:`NF-`S_NF]}; @@ -1295,7 +1188,7 @@ module resultselect( OverflowResult = ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE-1{1'b1}}, 1'b0, {`H_NF{1'b1}}} : {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; - KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZOrigDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})}; + KillProdResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, {ZExpM[`NE-1], ZExpM[`H_NE-2:1],ZExpM[0]&~ZDenormM, ZManM[`NF-1:`NF-`H_NF]} + (RoundAdd[`NF-`H_NF+`H_LEN-2:`NF-`H_NF]&{`H_LEN-1{AddendStickyM}})}; UnderflowResult = {{`FLEN-`H_LEN{1'b1}}, {ResultSgn, (`H_LEN-1)'(0)} + {(`H_LEN-1)'(0), (CalcPlus1&(AddendStickyM|FrmM[1]))}}; InfResult = {{`FLEN-`H_LEN{1'b1}}, InfSgn, {`H_NE{1'b1}}, (`H_NF)'(0)}; NormResult = {{`FLEN-`H_LEN{1'b1}}, ResultSgn, ResultExp[`H_NE-1:0], ResultFrac[`NF-1:`NF-`H_NF]}; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index c876a26ae..053d3dd5c 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -104,7 +104,6 @@ module fpu ( logic XInfQ, YInfQ; // is the input infinity - divide logic XExpMaxE; // is the exponent all ones (max value) logic XNormE; // is normal - logic ZOrigDenormE, XOrigDenormE; logic FmtQ; logic FOpCtrlQ; @@ -176,7 +175,7 @@ module fpu ( // unpack unit // - splits FP inputs into their various parts // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, .ZOrigDenormE, .XOrigDenormE, + unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); @@ -188,11 +187,11 @@ module fpu ( // - handles FMA and multiply instructions fma fma (.clk, .reset, .FlushM, .StallM, .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, + .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, - .FOpCtrlE, .ZOrigDenormE, + .FOpCtrlE, .FmtE, .FmtM, .FrmM, .FMAFlgM, .FMAResM); @@ -215,9 +214,9 @@ module fpu ( // other FP execution units fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpResE); - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE); + fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE); fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, .XSNaNE, .ClassResE); - fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XOrigDenormE, + fcvt fcvt (.XSgnE, .XExpE, .XManE, .ForwardedSrcAE, .FOpCtrlE, .FWriteIntE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtResE, .CvtIntResE, .CvtFlgE); // data to be stored in memory - to IEU @@ -235,6 +234,8 @@ module fpu ( // select the result that may be written to the integer register - to IEU mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], CvtIntResE, FIntResSelE, FIntResE); + // *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok + // *** make sure the fpu matches the chapter diagram // E/M pipe registers diff --git a/pipelined/src/fpu/fsgn.sv b/pipelined/src/fpu/fsgn.sv deleted file mode 100755 index b95fd078f..000000000 --- a/pipelined/src/fpu/fsgn.sv +++ /dev/null @@ -1,29 +0,0 @@ -//performs the fsgnj/fsgnjn/fsgnjx RISCV instructions - -module fsgn ( - input logic XSgnE, YSgnE, // X and Y sign bits - input logic [63:0] FSrcXE, // X - input logic FmtE, // precision 1 = double 0 = single - input logic [1:0] SgnOpCodeE, // operation control - output logic [63:0] SgnResE // result - ); - - logic ResSgn; - - //op code designation: - // - //00 - fsgnj - directly copy over sign value of FSrcYE - //01 - fsgnjn - negate sign value of FSrcYE - //10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE - // - - // calculate the result's sign - assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]); - - // format final result based on precision - // - uses NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - assign SgnResE = FmtE ? {ResSgn, FSrcXE[62:0]} : {FSrcXE[63:32], ResSgn, FSrcXE[30:0]}; - - -endmodule diff --git a/pipelined/src/fpu/fsgninj.sv b/pipelined/src/fpu/fsgninj.sv new file mode 100755 index 000000000..8474fdff6 --- /dev/null +++ b/pipelined/src/fpu/fsgninj.sv @@ -0,0 +1,56 @@ +/////////////////////////////////////////// +// +// Written: Katherine Parry +// Modified: 6/23/2021 +// +// Purpose: FPU Sign Injection instructions +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module fsgninj ( + input logic XSgnE, YSgnE, // X and Y sign bits + input logic [63:0] FSrcXE, // X + input logic FmtE, // precision 1 = double 0 = single + input logic [1:0] SgnOpCodeE, // operation control + output logic [63:0] SgnResE // result +); + + logic ResSgn; + + //op code designation: + // + //00 - fsgnj - directly copy over sign value of FSrcYE + //01 - fsgnjn - negate sign value of FSrcYE + //10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE + // + + // calculate the result's sign + assign ResSgn = SgnOpCodeE[1] ? (XSgnE ^ YSgnE) : (YSgnE ^ SgnOpCodeE[0]); + + // format final result based on precision + // - uses NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + assign SgnResE = FmtE ? {ResSgn, FSrcXE[62:0]} : {FSrcXE[63:32], ResSgn, FSrcXE[30:0]}; + + +endmodule diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index 44ffc2838..9e691e27f 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -12,7 +12,6 @@ module unpack ( output logic XDenormE, YDenormE, ZDenormE, // is XYZ denormalized output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XInfE, YInfE, ZInfE, // is XYZ infinity - output logic XOrigDenormE, ZOrigDenormE, // is the original precision denormalized output logic XExpMaxE // does X have the maximum exponent (NaN or Inf) ); @@ -22,535 +21,20 @@ module unpack ( logic XExpZero, YExpZero, ZExpZero; // is the exponent zero logic YExpMaxE, ZExpMaxE; // is the exponent all 1s - if (`FPSIZES == 1) begin // if there is only one floating point format supported + unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), + .NaN(XNaNE), .SNaN(XSNaNE), .Denorm(XDenormE), + .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .ExpZero(XExpZero)); - // sign bit - assign XSgnE = X[`FLEN-1]; - assign YSgnE = Y[`FLEN-1]; - assign ZSgnE = Z[`FLEN-1]; + unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), + .NaN(YNaNE), .SNaN(YSNaNE), .Denorm(YDenormE), + .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .ExpZero(YExpZero)); - // exponent - assign XExpE = X[`FLEN-2:`NF]; - assign YExpE = Y[`FLEN-2:`NF]; - assign ZExpE = Z[`FLEN-2:`NF]; - - // fraction (no assumed 1) - assign XFracE = X[`NF-1:0]; - assign YFracE = Y[`NF-1:0]; - assign ZFracE = Z[`NF-1:0]; - - // is the exponent non-zero - assign XExpNonzero = |XExpE; - assign YExpNonzero = |YExpE; - assign ZExpNonzero = |ZExpE; - - // is the exponent all 1's - assign XExpMaxE = &XExpE; - assign YExpMaxE = &YExpE; - assign ZExpMaxE = &ZExpE; - - assign XOrigDenormE = 1'b0; - assign ZOrigDenormE = 1'b0; - - - end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported - //***need better names for these constants - // largest format | smaller format - //---------------------------------- - // `FLEN | `LEN1 length of floating point number - // `NE | `NE1 length of exponent - // `NF | `NF1 length of fraction - // `BIAS | `BIAS1 exponent's bias value - // `FMT | `FMT1 precision's format value - Q=11 D=01 S=00 H=10 - - // Possible combinantions specified by spec: - // double and single - // single and half - - // Not needed but can also handle: - // quad and double - // quad and single - // quad and half - // double and half - - logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed - logic YOrigDenormE; // the original value of XYZ is denormalized - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; - assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; - assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; - - // choose sign bit depending on format - 1=larger precsion 0=smaller precision - assign XSgnE = FmtE ? X[`FLEN-1] : XLen1[`LEN1-1]; - assign YSgnE = FmtE ? Y[`FLEN-1] : YLen1[`LEN1-1]; - assign ZSgnE = FmtE ? Z[`FLEN-1] : ZLen1[`LEN1-1]; - - // example double to single conversion: - // 1023 = 0011 1111 1111 - // 127 = 0000 0111 1111 (subtract this) - // 896 = 0011 1000 0000 - // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b - // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/denorm/inf/NaN values - - // extract the exponent, converting the smaller exponent into the larger precision if nessisary - // - if the original precision had a denormal number convert the exponent value 1 - assign XExpE = FmtE ? X[`FLEN-2:`NF] : XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; - assign YExpE = FmtE ? Y[`FLEN-2:`NF] : YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; - assign ZExpE = FmtE ? Z[`FLEN-2:`NF] : ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; - - // is the input (in it's original format) denormalized - assign XOrigDenormE = FmtE ? 0 : ~|XLen1[`LEN1-2:`NF1] & ~XFracZero; - assign YOrigDenormE = FmtE ? 0 : ~|YLen1[`LEN1-2:`NF1] & ~YFracZero; - assign ZOrigDenormE = FmtE ? 0 : ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero; - - // extract the fraction, add trailing zeroes to the mantissa if nessisary - assign XFracE = FmtE ? X[`NF-1:0] : {XLen1[`NF1-1:0], (`NF-`NF1)'(0)}; - assign YFracE = FmtE ? Y[`NF-1:0] : {YLen1[`NF1-1:0], (`NF-`NF1)'(0)}; - assign ZFracE = FmtE ? Z[`NF-1:0] : {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)}; - - // is the exponent non-zero - assign XExpNonzero = FmtE ? |X[`FLEN-2:`NF] : |XLen1[`LEN1-2:`NF1]; - assign YExpNonzero = FmtE ? |Y[`FLEN-2:`NF] : |YLen1[`LEN1-2:`NF1]; - assign ZExpNonzero = FmtE ? |Z[`FLEN-2:`NF] : |ZLen1[`LEN1-2:`NF1]; - - // is the exponent all 1's - assign XExpMaxE = FmtE ? &X[`FLEN-2:`NF] : &XLen1[`LEN1-2:`NF1]; - assign YExpMaxE = FmtE ? &Y[`FLEN-2:`NF] : &YLen1[`LEN1-2:`NF1]; - assign ZExpMaxE = FmtE ? &Z[`FLEN-2:`NF] : &ZLen1[`LEN1-2:`NF1]; - - - end else if (`FPSIZES == 3) begin // three floating point precsions supported - - //***need better names for these constants - // largest format | larger format | smallest format - //--------------------------------------------------- - // `FLEN | `LEN1 | `LEN2 length of floating point number - // `NE | `NE1 | `NE2 length of exponent - // `NF | `NF1 | `NF2 length of fraction - // `BIAS | `BIAS1 | `BIAS2 exponent's bias value - // `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 S=00 H=10 - - // Possible combinantions specified by spec: - // quad and double and single - // double and single and half - - // Not needed but can also handle: - // quad and double and half - // quad and single and half - - logic [`LEN1-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision - logic [`LEN2-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision - logic YOrigDenormE; // the original value of XYZ is denormalized - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision - assign XLen1 = &X[`FLEN-1:`LEN1] ? X[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; - assign YLen1 = &Y[`FLEN-1:`LEN1] ? Y[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; - assign ZLen1 = &Z[`FLEN-1:`LEN1] ? Z[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for smaller precision - assign XLen2 = &X[`FLEN-1:`LEN2] ? X[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; - assign YLen2 = &Y[`FLEN-1:`LEN2] ? Y[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; - assign ZLen2 = &Z[`FLEN-1:`LEN2] ? Z[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; - - // There are 2 case statements - // - one for other singals and one for sgn/exp/frac - // - need two for the dependencies in the expoenent calculation - always_comb begin - case (FmtE) - `FMT: begin // if input is largest precision (`FLEN - ie quad or double) - - // This is the original format so set OrigDenorm to 0 - XOrigDenormE = 1'b0; - YOrigDenormE = 1'b0; - ZOrigDenormE = 1'b0; - - // is the exponent non-zero - XExpNonzero = |X[`FLEN-2:`NF]; - YExpNonzero = |Y[`FLEN-2:`NF]; - ZExpNonzero = |Z[`FLEN-2:`NF]; - - // is the exponent all 1's - XExpMaxE = &X[`FLEN-2:`NF]; - YExpMaxE = &Y[`FLEN-2:`NF]; - ZExpMaxE = &Z[`FLEN-2:`NF]; - end - `FMT1: begin // if input is larger precsion (`LEN1 - double or single) - - // is the input (in it's original format) denormalized - XOrigDenormE = ~|XLen1[`LEN1-2:`NF1] & ~XFracZero; - YOrigDenormE = ~|YLen1[`LEN1-2:`NF1] & ~YFracZero; - ZOrigDenormE = ~|ZLen1[`LEN1-2:`NF1] & ~ZFracZero; - - // is the exponent non-zero - XExpNonzero = |XLen1[`LEN1-2:`NF1]; - YExpNonzero = |YLen1[`LEN1-2:`NF1]; - ZExpNonzero = |ZLen1[`LEN1-2:`NF1]; - - // is the exponent all 1's - XExpMaxE = &XLen1[`LEN1-2:`NF1]; - YExpMaxE = &YLen1[`LEN1-2:`NF1]; - ZExpMaxE = &ZLen1[`LEN1-2:`NF1]; - end - `FMT2: begin // if input is smallest precsion (`LEN2 - single or half) - - // is the input (in it's original format) denormalized - XOrigDenormE = ~|XLen2[`LEN2-2:`NF2] & ~XFracZero; - YOrigDenormE = ~|YLen2[`LEN2-2:`NF2] & ~YFracZero; - ZOrigDenormE = ~|ZLen2[`LEN2-2:`NF2] & ~ZFracZero; - - // is the exponent non-zero - XExpNonzero = |XLen2[`LEN2-2:`NF2]; - YExpNonzero = |YLen2[`LEN2-2:`NF2]; - ZExpNonzero = |ZLen2[`LEN2-2:`NF2]; - - // is the exponent all 1's - XExpMaxE = &XLen2[`LEN2-2:`NF2]; - YExpMaxE = &YLen2[`LEN2-2:`NF2]; - ZExpMaxE = &ZLen2[`LEN2-2:`NF2]; - end - default: begin - XOrigDenormE = 0; - YOrigDenormE = 0; - ZOrigDenormE = 0; - XExpNonzero = 0; - YExpNonzero = 0; - ZExpNonzero = 0; - XExpMaxE = 0; - YExpMaxE = 0; - ZExpMaxE = 0; - end - endcase - end - always_comb begin - case (FmtE) - `FMT: begin // if input is largest precision (`FLEN - ie quad or double) - // extract the sign bit - XSgnE = X[`FLEN-1]; - YSgnE = Y[`FLEN-1]; - ZSgnE = Z[`FLEN-1]; - - // extract the exponent - XExpE = X[`FLEN-2:`NF]; - YExpE = Y[`FLEN-2:`NF]; - ZExpE = Z[`FLEN-2:`NF]; - - // extract the fraction - XFracE = X[`NF-1:0]; - YFracE = Y[`NF-1:0]; - ZFracE = Z[`NF-1:0]; - end - `FMT1: begin // if input is larger precsion (`LEN1 - double or single) - - // extract the sign bit - XSgnE = XLen1[`LEN1-1]; - YSgnE = YLen1[`LEN1-1]; - ZSgnE = ZLen1[`LEN1-1]; - - // example double to single conversion: - // 1023 = 0011 1111 1111 - // 127 = 0000 0111 1111 (subtract this) - // 896 = 0011 1000 0000 - // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b - // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/denorm/inf/NaN values - - // convert the larger precision's exponent to use the largest precision's bias - XExpE = XOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {XLen1[`LEN1-2], {`NE-`NE1{~XLen1[`LEN1-2]&~XExpZero|XExpMaxE}}, XLen1[`LEN1-3:`NF1]}; - YExpE = YOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {YLen1[`LEN1-2], {`NE-`NE1{~YLen1[`LEN1-2]&~YExpZero|YExpMaxE}}, YLen1[`LEN1-3:`NF1]}; - ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE1{1'b1}}, (`NE1-1)'(1)} : {ZLen1[`LEN1-2], {`NE-`NE1{~ZLen1[`LEN1-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`LEN1-3:`NF1]}; - - // extract the fraction and add the nessesary trailing zeros - XFracE = {XLen1[`NF1-1:0], (`NF-`NF1)'(0)}; - YFracE = {YLen1[`NF1-1:0], (`NF-`NF1)'(0)}; - ZFracE = {ZLen1[`NF1-1:0], (`NF-`NF1)'(0)}; - end - `FMT2: begin // if input is smallest precsion (`LEN2 - single or half) - - // exctract the sign bit - XSgnE = XLen2[`LEN2-1]; - YSgnE = YLen2[`LEN2-1]; - ZSgnE = ZLen2[`LEN2-1]; - - // example double to single conversion: - // 1023 = 0011 1111 1111 - // 127 = 0000 0111 1111 (subtract this) - // 896 = 0011 1000 0000 - // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b - // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/denorm/inf/NaN values - - // convert the smallest precision's exponent to use the largest precision's bias - XExpE = XOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {XLen2[`LEN2-2], {`NE-`NE2{~XLen2[`LEN2-2]&~XExpZero|XExpMaxE}}, XLen2[`LEN2-3:`NF2]}; - YExpE = YOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {YLen2[`LEN2-2], {`NE-`NE2{~YLen2[`LEN2-2]&~YExpZero|YExpMaxE}}, YLen2[`LEN2-3:`NF2]}; - ZExpE = ZOrigDenormE ? {1'b0, {`NE-`NE2{1'b1}}, (`NE2-1)'(1)} : {ZLen2[`LEN2-2], {`NE-`NE2{~ZLen2[`LEN2-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`LEN2-3:`NF2]}; - - // extract the fraction and add the nessesary trailing zeros - XFracE = {XLen2[`NF2-1:0], (`NF-`NF2)'(0)}; - YFracE = {YLen2[`NF2-1:0], (`NF-`NF2)'(0)}; - ZFracE = {ZLen2[`NF2-1:0], (`NF-`NF2)'(0)}; - end - default: begin - XSgnE = 0; - YSgnE = 0; - ZSgnE = 0; - XExpE = 0; - YExpE = 0; - ZExpE = 0; - XFracE = 0; - YFracE = 0; - ZFracE = 0; - end - endcase - end - - end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half - - // quad | double | single | half - //------------------------------------------------------------------- - // `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number - // `Q_NE | `D_NE | `S_NE | `H_NE length of exponent - // `Q_NF | `D_NF | `S_NF | `H_NF length of fraction - // `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value - // `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 S=00 H=10 - - - logic [`D_LEN-1:0] XLen1, YLen1, ZLen1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision - logic [`S_LEN-1:0] XLen2, YLen2, ZLen2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision - logic [`H_LEN-1:0] XLen3, YLen3, ZLen3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision - logic YOrigDenormE; // the original value of XYZ is denormalized - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision - assign XLen1 = &X[`Q_LEN-1:`D_LEN] ? X[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; - assign YLen1 = &Y[`Q_LEN-1:`D_LEN] ? Y[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; - assign ZLen1 = &Z[`Q_LEN-1:`D_LEN] ? Z[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for single precision - assign XLen2 = &X[`Q_LEN-1:`S_LEN] ? X[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)}; - assign YLen2 = &Y[`Q_LEN-1:`S_LEN] ? Y[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)}; - assign ZLen2 = &Z[`Q_LEN-1:`S_LEN] ? Z[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)}; - - // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for half precision - assign XLen3 = &X[`Q_LEN-1:`H_LEN] ? X[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; - assign YLen3 = &Y[`Q_LEN-1:`H_LEN] ? Y[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; - assign ZLen3 = &Z[`Q_LEN-1:`H_LEN] ? Z[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; - - - // There are 2 case statements - // - one for other singals and one for sgn/exp/frac - // - need two for the dependencies in the expoenent calculation - always_comb begin - case (FmtE) - 2'b11: begin // if input is quad percision - - // This is the original format so set OrigDenorm to 0 - XOrigDenormE = 1'b0; - YOrigDenormE = 1'b0; - ZOrigDenormE = 1'b0; - - // is the exponent non-zero - XExpNonzero = |X[`Q_LEN-2:`Q_NF]; - YExpNonzero = |Y[`Q_LEN-2:`Q_NF]; - ZExpNonzero = |Z[`Q_LEN-2:`Q_NF]; - - // is the exponent all 1's - XExpMaxE = &X[`Q_LEN-2:`Q_NF]; - YExpMaxE = &Y[`Q_LEN-2:`Q_NF]; - ZExpMaxE = &Z[`Q_LEN-2:`Q_NF]; - end - 2'b01: begin // if input is double percision - - // is the exponent all 1's - XExpMaxE = &XLen1[`D_LEN-2:`D_NF]; - YExpMaxE = &YLen1[`D_LEN-2:`D_NF]; - ZExpMaxE = &ZLen1[`D_LEN-2:`D_NF]; - - // is the input (in it's original format) denormalized - XOrigDenormE = ~|XLen1[`D_LEN-2:`D_NF] & ~XFracZero; - YOrigDenormE = ~|YLen1[`D_LEN-2:`D_NF] & ~YFracZero; - ZOrigDenormE = ~|ZLen1[`D_LEN-2:`D_NF] & ~ZFracZero; - - // is the exponent non-zero - XExpNonzero = |XLen1[`D_LEN-2:`D_NF]; - YExpNonzero = |YLen1[`D_LEN-2:`D_NF]; - ZExpNonzero = |ZLen1[`D_LEN-2:`D_NF]; - end - 2'b00: begin // if input is single percision - - // is the exponent all 1's - XExpMaxE = &XLen2[`S_LEN-2:`S_NF]; - YExpMaxE = &YLen2[`S_LEN-2:`S_NF]; - ZExpMaxE = &ZLen2[`S_LEN-2:`S_NF]; - - // is the input (in it's original format) denormalized - XOrigDenormE = ~|XLen2[`S_LEN-2:`S_NF] & ~XFracZero; - YOrigDenormE = ~|YLen2[`S_LEN-2:`S_NF] & ~YFracZero; - ZOrigDenormE = ~|ZLen2[`S_LEN-2:`S_NF] & ~ZFracZero; - - // is the exponent non-zero - XExpNonzero = |XLen2[`S_LEN-2:`S_NF]; - YExpNonzero = |YLen2[`S_LEN-2:`S_NF]; - ZExpNonzero = |ZLen2[`S_LEN-2:`S_NF]; - end - 2'b10: begin // if input is half percision - - // is the exponent all 1's - XExpMaxE = &XLen3[`H_LEN-2:`H_NF]; - YExpMaxE = &YLen3[`H_LEN-2:`H_NF]; - ZExpMaxE = &ZLen3[`H_LEN-2:`H_NF]; - - // is the input (in it's original format) denormalized - XOrigDenormE = ~|XLen3[`H_LEN-2:`H_NF] & ~XFracZero; - YOrigDenormE = ~|YLen3[`H_LEN-2:`H_NF] & ~YFracZero; - ZOrigDenormE = ~|ZLen3[`H_LEN-2:`H_NF] & ~ZFracZero; - - // is the exponent non-zero - XExpNonzero = |XLen3[`H_LEN-2:`H_NF]; - YExpNonzero = |YLen3[`H_LEN-2:`H_NF]; - ZExpNonzero = |ZLen3[`H_LEN-2:`H_NF]; - end - endcase - end - - always_comb begin - case (FmtE) - 2'b11: begin // if input is quad percision - // extract sign bit - XSgnE = X[`Q_LEN-1]; - YSgnE = Y[`Q_LEN-1]; - ZSgnE = Z[`Q_LEN-1]; - - // extract the exponent - XExpE = X[`Q_LEN-2:`Q_NF]; - YExpE = Y[`Q_LEN-2:`Q_NF]; - ZExpE = Z[`Q_LEN-2:`Q_NF]; - - // extract the fraction - XFracE = X[`Q_NF-1:0]; - YFracE = Y[`Q_NF-1:0]; - ZFracE = Z[`Q_NF-1:0]; - end - 2'b01: begin // if input is double percision - // extract sign bit - XSgnE = XLen1[`D_LEN-1]; - YSgnE = YLen1[`D_LEN-1]; - ZSgnE = ZLen1[`D_LEN-1]; - - // example double to single conversion: - // 1023 = 0011 1111 1111 - // 127 = 0000 0111 1111 (subtract this) - // 896 = 0011 1000 0000 - // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b - // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/denorm/inf/NaN values - - // convert the double precsion exponent into quad precsion - - XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {XLen1[`D_LEN-2], {`Q_NE-`D_NE{~XLen1[`D_LEN-2]&~XExpZero|XExpMaxE}}, XLen1[`D_LEN-3:`D_NF]}; - YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {YLen1[`D_LEN-2], {`Q_NE-`D_NE{~YLen1[`D_LEN-2]&~YExpZero|YExpMaxE}}, YLen1[`D_LEN-3:`D_NF]}; - ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`D_NE{1'b1}}, (`D_NE-1)'(1)} : {ZLen1[`D_LEN-2], {`Q_NE-`D_NE{~ZLen1[`D_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen1[`D_LEN-3:`D_NF]}; - - // extract the fraction and add the nessesary trailing zeros - XFracE = {XLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; - YFracE = {YLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; - ZFracE = {ZLen1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; - end - 2'b00: begin // if input is single percision - // extract sign bit - XSgnE = XLen2[`S_LEN-1]; - YSgnE = YLen2[`S_LEN-1]; - ZSgnE = ZLen2[`S_LEN-1]; - - // example double to single conversion: - // 1023 = 0011 1111 1111 - // 127 = 0000 0111 1111 (subtract this) - // 896 = 0011 1000 0000 - // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b - // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/denorm/inf/NaN values - - // convert the single precsion exponent into quad precsion - XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {XLen2[`S_LEN-2], {`Q_NE-`S_NE{~XLen2[`S_LEN-2]&~XExpZero|XExpMaxE}}, XLen2[`S_LEN-3:`S_NF]}; - YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {YLen2[`S_LEN-2], {`Q_NE-`S_NE{~YLen2[`S_LEN-2]&~YExpZero|YExpMaxE}}, YLen2[`S_LEN-3:`S_NF]}; - ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`S_NE{1'b1}}, (`S_NE-1)'(1)} : {ZLen2[`S_LEN-2], {`Q_NE-`S_NE{~ZLen2[`S_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen2[`S_LEN-3:`S_NF]}; - - // extract the fraction and add the nessesary trailing zeros - XFracE = {XLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; - YFracE = {YLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; - ZFracE = {ZLen2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; - end - 2'b10: begin // if input is half percision - // extract sign bit - XSgnE = XLen3[`H_LEN-1]; - YSgnE = YLen3[`H_LEN-1]; - ZSgnE = ZLen3[`H_LEN-1]; - - // example double to single conversion: - // 1023 = 0011 1111 1111 - // 127 = 0000 0111 1111 (subtract this) - // 896 = 0011 1000 0000 - // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b - // dexp = 0bdd dbbb bbbb - // also need to take into account possible zero/denorm/inf/NaN values - - // convert the half precsion exponent into quad precsion - XExpE = XOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {XLen3[`H_LEN-2], {`Q_NE-`H_NE{~XLen3[`H_LEN-2]&~XExpZero|XExpMaxE}}, XLen3[`H_LEN-3:`H_NF]}; - YExpE = YOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {YLen3[`H_LEN-2], {`Q_NE-`H_NE{~YLen3[`H_LEN-2]&~YExpZero|YExpMaxE}}, YLen3[`H_LEN-3:`H_NF]}; - ZExpE = ZOrigDenormE ? {1'b0, {`Q_NE-`H_NE{1'b1}}, (`H_NE-1)'(1)} : {ZLen3[`H_LEN-2], {`Q_NE-`H_NE{~ZLen3[`H_LEN-2]&~ZExpZero|ZExpMaxE}}, ZLen3[`H_LEN-3:`H_NF]}; - - // extract the fraction and add the nessesary trailing zeros - XFracE = {XLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; - YFracE = {YLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; - ZFracE = {ZLen3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; - end - endcase - end - - end - - // is the exponent all 0's - assign XExpZero = ~XExpNonzero; - assign YExpZero = ~YExpNonzero; - assign ZExpZero = ~ZExpNonzero; - - // is the fraction zero - assign XFracZero = ~|XFracE; - assign YFracZero = ~|YFracE; - assign ZFracZero = ~|ZFracE; - - // add the assumed one (or zero if denormal or zero) to create the mantissa - assign XManE = {XExpNonzero, XFracE}; - assign YManE = {YExpNonzero, YFracE}; - assign ZManE = {ZExpNonzero, ZFracE}; + unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), + .NaN(ZNaNE), .SNaN(ZSNaNE), .Denorm(ZDenormE), + .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .ExpZero(ZExpZero)); + // is X normalized assign XNormE = ~(XExpMaxE|XExpZero); - // is the input a NaN - // - force to be a NaN if it isn't properly Nan Boxed - assign XNaNE = XExpMaxE & ~XFracZero; - assign YNaNE = YExpMaxE & ~YFracZero; - assign ZNaNE = ZExpMaxE & ~ZFracZero; - - // is the input a singnaling NaN - assign XSNaNE = XNaNE&~XFracE[`NF-1]; - assign YSNaNE = YNaNE&~YFracE[`NF-1]; - assign ZSNaNE = ZNaNE&~ZFracE[`NF-1]; - - // is the input denormalized - assign XDenormE = XExpZero & ~XFracZero; - assign YDenormE = YExpZero & ~YFracZero; - assign ZDenormE = ZExpZero & ~ZFracZero; - - // is the input infinity - assign XInfE = XExpMaxE & XFracZero; - assign YInfE = YExpMaxE & YFracZero; - assign ZInfE = ZExpMaxE & ZFracZero; - - // is the input zero - assign XZeroE = XExpZero & XFracZero; - assign YZeroE = YExpZero & YFracZero; - assign ZZeroE = ZExpZero & ZFracZero; - endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv new file mode 100644 index 000000000..0b944b645 --- /dev/null +++ b/pipelined/src/fpu/unpackinput.sv @@ -0,0 +1,301 @@ +`include "wally-config.vh" + +module unpackinput ( + input logic [`FLEN-1:0] In, // inputs from register file + input logic [`FPSIZES/3:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half + output logic Sgn, // sign bits of XYZ + output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) + output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision) + output logic NaN, // is XYZ a NaN + output logic SNaN, // is XYZ a signaling NaN + output logic Denorm, // is XYZ denormalized + output logic Zero, // is XYZ zero + output logic Inf, // is XYZ infinity + output logic ExpMax, // does In have the maximum exponent (NaN or Inf) + output logic ExpZero // is the exponent zero +); + + logic [`NF-1:0] Frac; //Fraction of XYZ + logic ExpNonZero; // is the exponent of XYZ non-zero + logic FracZero; // is the fraction zero + + if (`FPSIZES == 1) begin // if there is only one floating point format supported + + // sign bit + assign Sgn = In[`FLEN-1]; + + // fraction (no assumed 1) + assign Frac = In[`NF-1:0]; + + // is the fraction zero + assign FracZero = ~|Frac; + + // is the exponent non-zero + assign ExpNonZero = |Exp; + + // is the input (in it's original format) denormalized + assign Denorm = ~ExpNonZero & ~FracZero; + + // exponent + assign Exp = {In[`FLEN-2:`NF+1], In[`NF]|Denorm}; + + + // is the exponent all 1's + assign ExpMax = &Exp; + + + end else if (`FPSIZES == 2) begin // if there are 2 floating point formats supported + //***need better names for these constants + // largest format | smaller format + //---------------------------------- + // `FLEN | `LEN1 length of floating point number + // `NE | `NE1 length of exponent + // `NF | `NF1 length of fraction + // `BIAS | `BIAS1 exponent's bias value + // `FMT | `FMT1 precision's format value - Q=11 D=01 S=00 H=10 + + // Possible combinantions specified by spec: + // double and single + // single and half + + // Not needed but can also handle: + // quad and double + // quad and single + // quad and half + // double and half + + logic [`LEN1-1:0] Len1; // Remove NaN boxing or NaN, if not properly NaN boxed + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN + assign Len1 = &In[`FLEN-1:`LEN1] ? In[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; + + // choose sign bit depending on format - 1=larger precsion 0=smaller precision + assign Sgn = FmtE ? In[`FLEN-1] : Len1[`LEN1-1]; + + // extract the fraction, add trailing zeroes to the mantissa if nessisary + assign Frac = FmtE ? In[`NF-1:0] : {Len1[`NF1-1:0], (`NF-`NF1)'(0)}; + + // is the fraction zero + assign FracZero = ~|Frac; + + // is the exponent non-zero + assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |Len1[`LEN1-2:`NF1]; + + // is the input (in it's original format) denormalized + assign Denorm = ~ExpNonZero & ~FracZero; + + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/denorm/inf/NaN values + + // extract the exponent, converting the smaller exponent into the larger precision if nessisary + // - if the original precision had a denormal number convert the exponent value 1 + assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|Denorm} : {Len1[`LEN1-2], {`NE-`NE1{~Len1[`LEN1-2]}}, Len1[`LEN1-3:`NF1+1], Len1[`NF1]|Denorm}; + + + + // is the exponent all 1's + assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &Len1[`LEN1-2:`NF1]; + + + end else if (`FPSIZES == 3) begin // three floating point precsions supported + + //***need better names for these constants + // largest format | larger format | smallest format + //--------------------------------------------------- + // `FLEN | `LEN1 | `LEN2 length of floating point number + // `NE | `NE1 | `NE2 length of exponent + // `NF | `NF1 | `NF2 length of fraction + // `BIAS | `BIAS1 | `BIAS2 exponent's bias value + // `FMT | `FMT1 | `FMT2 precision's format value - Q=11 D=01 S=00 H=10 + + // Possible combinantions specified by spec: + // quad and double and single + // double and single and half + + // Not needed but can also handle: + // quad and double and half + // quad and single and half + + logic [`LEN1-1:0] Len1; // Remove NaN boxing or NaN, if not properly NaN boxed for larger percision + logic [`LEN2-1:0] Len2; // Remove NaN boxing or NaN, if not properly NaN boxed for smallest precision + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for larger precision + assign Len1 = &In[`FLEN-1:`LEN1] ? In[`LEN1-1:0] : {1'b0, {`NE1+1{1'b1}}, (`NF1-1)'(0)}; + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for smaller precision + assign Len2 = &In[`FLEN-1:`LEN2] ? In[`LEN2-1:0] : {1'b0, {`NE2+1{1'b1}}, (`NF2-1)'(0)}; + + + // extract the sign bit + always_comb + case (FmtE) + `FMT: Sgn = In[`FLEN-1]; + `FMT1: Sgn = Len1[`LEN1-1]; + `FMT2: Sgn = Len2[`LEN2-1]; + default: Sgn = 0; + endcase + + // extract the fraction + always_comb + case (FmtE) + `FMT: Frac = In[`NF-1:0]; + `FMT1: Frac = {Len1[`NF1-1:0], (`NF-`NF1)'(0)}; + `FMT2: Frac = {Len2[`NF2-1:0], (`NF-`NF2)'(0)}; + default: Frac = 0; + endcase + + // is the fraction zero + assign FracZero = ~|Frac; + + + // is the exponent non-zero + always_comb + case (FmtE) + `FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double) + `FMT1: ExpNonZero = |Len1[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single) + `FMT2: ExpNonZero = |Len2[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half) + default: ExpNonZero = 0; + endcase + + // is the input (in it's original format) denormalized + assign Denorm = ~ExpNonZero & ~FracZero; + + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/denorm/inf/NaN values + + // convert the larger precision's exponent to use the largest precision's bias + always_comb + case (FmtE) + `FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|Denorm}; + `FMT1: Exp = {Len1[`LEN1-2], {`NE-`NE1{~Len1[`LEN1-2]}}, Len1[`LEN1-3:`NF1+1], Len1[`NF1]|Denorm}; + `FMT2: Exp = {Len2[`LEN2-2], {`NE-`NE2{~Len2[`LEN2-2]}}, Len2[`LEN2-3:`NF2+1], Len2[`NF2]|Denorm}; + default: Exp = 0; + endcase + + // is the exponent all 1's + always_comb + case (FmtE) + `FMT: ExpMax = &In[`FLEN-2:`NF]; + `FMT1: ExpMax = &Len1[`LEN1-2:`NF1]; + `FMT2: ExpMax = &Len2[`LEN2-2:`NF2]; + default: ExpMax = 0; + endcase + + end else if (`FPSIZES == 4) begin // if all precsisons are supported - quad, double, single, and half + + // quad | double | single | half + //------------------------------------------------------------------- + // `Q_LEN | `D_LEN | `S_LEN | `H_LEN length of floating point number + // `Q_NE | `D_NE | `S_NE | `H_NE length of exponent + // `Q_NF | `D_NF | `S_NF | `H_NF length of fraction + // `Q_BIAS | `D_BIAS | `S_BIAS | `H_BIAS exponent's bias value + // `Q_FMT | `D_FMT | `S_FMT | `H_FMT precision's format value - Q=11 D=01 S=00 H=10 + + + logic [`D_LEN-1:0] Len1; // Remove NaN boxing or NaN, if not properly NaN boxed for double percision + logic [`S_LEN-1:0] Len2; // Remove NaN boxing or NaN, if not properly NaN boxed for single percision + logic [`H_LEN-1:0] Len3; // Remove NaN boxing or NaN, if not properly NaN boxed for half percision + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for double precision + assign Len1 = &In[`Q_LEN-1:`D_LEN] ? In[`D_LEN-1:0] : {1'b0, {`D_NE+1{1'b1}}, (`D_NF-1)'(0)}; + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for single precision + assign Len2 = &In[`Q_LEN-1:`S_LEN] ? In[`S_LEN-1:0] : {1'b0, {`S_NE+1{1'b1}}, (`S_NF-1)'(0)}; + + // Check NaN boxing, If the value is not properly NaN boxed, set the value to a quiet NaN - for half precision + assign Len3 = &In[`Q_LEN-1:`H_LEN] ? In[`H_LEN-1:0] : {1'b0, {`H_NE+1{1'b1}}, (`H_NF-1)'(0)}; + + // extract sign bit + always_comb + case (FmtE) + 2'b11: Sgn = In[`Q_LEN-1]; + 2'b01: Sgn = Len1[`D_LEN-1]; + 2'b00: Sgn = Len2[`S_LEN-1]; + 2'b10: Sgn = Len3[`H_LEN-1]; + endcase + + + // extract the fraction + always_comb + case (FmtE) + 2'b11: Frac = In[`Q_NF-1:0]; + 2'b01: Frac = {Len1[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; + 2'b00: Frac = {Len2[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; + 2'b10: Frac = {Len3[`H_NF-1:0], (`Q_NF-`H_NF)'(0)}; + endcase + + // is the fraction zero + assign FracZero = ~|Frac; + + // is the exponent non-zero + always_comb + case (FmtE) + 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF]; + 2'b01: ExpNonZero = |Len1[`D_LEN-2:`D_NF]; + 2'b00: ExpNonZero = |Len2[`S_LEN-2:`S_NF]; + 2'b10: ExpNonZero = |Len3[`H_LEN-2:`H_NF]; + endcase + + // is the input (in it's original format) denormalized + assign Denorm = ~ExpNonZero & ~FracZero; + + + // example double to single conversion: + // 1023 = 0011 1111 1111 + // 127 = 0000 0111 1111 (subtract this) + // 896 = 0011 1000 0000 + // sexp = 0000 bbbb bbbb (add this) b = bit d = ~b + // dexp = 0bdd dbbb bbbb + // also need to take into account possible zero/denorm/inf/NaN values + + // convert the double precsion exponent into quad precsion + always_comb + case (FmtE) + 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|Denorm}; + 2'b01: Exp = {Len1[`D_LEN-2], {`Q_NE-`D_NE{~Len1[`D_LEN-2]}}, Len1[`D_LEN-3:`D_NF+1], Len1[`D_NF]|Denorm}; + 2'b00: Exp = {Len2[`S_LEN-2], {`Q_NE-`S_NE{~Len2[`S_LEN-2]}}, Len2[`S_LEN-3:`S_NF+1], Len2[`S_NF]|Denorm}; + 2'b10: Exp = {Len3[`H_LEN-2], {`Q_NE-`H_NE{~Len3[`H_LEN-2]}}, Len3[`H_LEN-3:`H_NF+1], Len3[`H_NF]|Denorm}; + endcase + + + // is the exponent all 1's + always_comb + case (FmtE) + 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF]; + 2'b01: ExpMax = &Len1[`D_LEN-2:`D_NF]; + 2'b00: ExpMax = &Len2[`S_LEN-2:`S_NF]; + 2'b10: ExpMax = &Len3[`H_LEN-2:`H_NF]; + endcase + + end + + // is the exponent all 0's + assign ExpZero = ~ExpNonZero; + + // add the assumed one (or zero if denormal or zero) to create the mantissa + assign Man = {ExpNonZero, Frac}; + + // is the input a NaN + // - force to be a NaN if it isn't properly Nan Boxed + assign NaN = ExpMax & ~FracZero; + + // is the input a singnaling NaN + assign SNaN = NaN&~Frac[`NF-1]; + + // is the input infinity + assign Inf = ExpMax & FracZero; + + // is the input zero + assign Zero = ExpZero & FracZero; + +endmodule \ No newline at end of file diff --git a/pipelined/src/generic/lzc.sv b/pipelined/src/generic/lzc.sv new file mode 100644 index 000000000..1ce082475 --- /dev/null +++ b/pipelined/src/generic/lzc.sv @@ -0,0 +1,15 @@ +//leading zero counter i.e. priority encoder +module lzc #(parameter WIDTH=1) ( + input logic [WIDTH-1:0] num, + output logic [$clog2(WIDTH+1)-1:0] ZeroCnt +); +/* verilator lint_off CMPCONST */ + + logic [$clog2(WIDTH+1)-1:0] i; + always_comb begin + i = 0; + while (~num[WIDTH-1-(32)'(i)] & $unsigned(i) <= $unsigned(($clog2(WIDTH+1))'(WIDTH-1))) i = i+1; // search for leading one + ZeroCnt = i; + end +/* verilator lint_on CMPCONST */ +endmodule diff --git a/pipelined/src/hazard/hazard.sv b/pipelined/src/hazard/hazard.sv index 159ee10ec..cf4cdaa8b 100644 --- a/pipelined/src/hazard/hazard.sv +++ b/pipelined/src/hazard/hazard.sv @@ -65,7 +65,7 @@ module hazard( assign StallFCause = CSRWritePendingDEM & ~(TrapM | RetM | BPPredWrongE); // stall in decode if instruction is a load/mul/csr dependent on previous assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FPUStallD | FStallD) & ~(TrapM | RetM | BPPredWrongE); - assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM); + assign StallECause = (DivBusyE | FDivBusyE) & ~(TrapM); // *** can we move to decode stage (KP?) // WFI terminates if any enabled interrupt is pending, even if global interrupts are disabled. It could also terminate with TW trap assign StallMCause = wfiM & (~TrapM & ~IntPendingM); assign StallWCause = LSUStallM | IFUStallF; diff --git a/pipelined/src/ieu/comparator.sv b/pipelined/src/ieu/comparator.sv index cc2eb1723..61a17ad9f 100644 --- a/pipelined/src/ieu/comparator.sv +++ b/pipelined/src/ieu/comparator.sv @@ -30,6 +30,15 @@ `include "wally-config.vh" +module donedet #(parameter WIDTH=64) ( + input logic [WIDTH-1:0] a, b, + output logic eq); + + //assign eq = (a+b == 0); // gives good speed but 3x necessary area + // See CMOS VLSI Design 4th Ed. p. 463 K = A+B for K = 0 + assign eq = ((a ^ b) == {a[WIDTH-2:0], 1'b0} | {b[WIDTH-2:0], 1'b0}); + endmodule + module comparator_sub #(parameter WIDTH=64) ( input logic [WIDTH-1:0] a, b, output logic [2:0] flags); diff --git a/pipelined/src/privileged/privileged.sv b/pipelined/src/privileged/privileged.sv index 43e2f8f04..a65039bf4 100644 --- a/pipelined/src/privileged/privileged.sv +++ b/pipelined/src/privileged/privileged.sv @@ -99,13 +99,14 @@ module privileged ( logic STATUS_MIE, STATUS_SIE; logic [11:0] MIP_REGW, MIE_REGW; logic [1:0] NextPrivilegeModeM; + logic DelegateM; /////////////////////////////////////////// // track the current privilege level /////////////////////////////////////////// - privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .InterruptM, .CauseM, - .MEDELEG_REGW, .MIDELEG_REGW, .STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW); + privmode privmode(.clk, .reset, .StallW, .TrapM, .mretM, .sretM, .DelegateM, + .STATUS_MPP, .STATUS_SPP, .NextPrivilegeModeM, .PrivilegeModeW); /////////////////////////////////////////// // decode privileged instructions @@ -158,11 +159,11 @@ module privileged ( .LoadPageFaultM, .StoreAmoPageFaultM, .mretM, .sretM, .PrivilegeModeW, - .MIP_REGW, .MIE_REGW, .MIDELEG_REGW, + .MIP_REGW, .MIE_REGW, .MIDELEG_REGW, .MEDELEG_REGW, .STATUS_MIE, .STATUS_SIE, .InstrValidM, .CommittedM, .TrapM, .RetM, - .InterruptM, .IntPendingM, + .InterruptM, .IntPendingM, .DelegateM, .CauseM); endmodule diff --git a/pipelined/src/privileged/privmode.sv b/pipelined/src/privileged/privmode.sv index 9446c08ce..768d8a364 100644 --- a/pipelined/src/privileged/privmode.sv +++ b/pipelined/src/privileged/privmode.sv @@ -33,25 +33,18 @@ module privmode ( input logic clk, reset, - input logic StallW, TrapM, mretM, sretM, InterruptM, - input logic [`LOG_XLEN-1:0] CauseM, - input logic [`XLEN-1:0] MEDELEG_REGW, - input logic [11:0] MIDELEG_REGW, + input logic StallW, TrapM, mretM, sretM, + input logic DelegateM, input logic [1:0] STATUS_MPP, input logic STATUS_SPP, output logic [1:0] NextPrivilegeModeM, PrivilegeModeW ); if (`U_SUPPORTED) begin:privmode - logic md; - - // get bits of DELEG registers based on CAUSE - assign md = InterruptM ? MIDELEG_REGW[CauseM[3:0]] : MEDELEG_REGW[CauseM]; - // PrivilegeMode FSM always_comb begin if (TrapM) begin // Change privilege based on DELEG registers (see 3.1.8) - if (`S_SUPPORTED & md & (PrivilegeModeW == `U_MODE | PrivilegeModeW == `S_MODE)) + if (`S_SUPPORTED & DelegateM) NextPrivilegeModeM = `S_MODE; else NextPrivilegeModeM = `M_MODE; end else if (mretM) NextPrivilegeModeM = STATUS_MPP; diff --git a/pipelined/src/privileged/trap.sv b/pipelined/src/privileged/trap.sv index 6225b9c05..7bee52a01 100644 --- a/pipelined/src/privileged/trap.sv +++ b/pipelined/src/privileged/trap.sv @@ -39,11 +39,12 @@ module trap ( (* mark_debug = "true" *) input logic LoadPageFaultM, StoreAmoPageFaultM, (* mark_debug = "true" *) input logic mretM, sretM, input logic [1:0] PrivilegeModeW, - (* mark_debug = "true" *) input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW, + (* mark_debug = "true" *) input logic [11:0] MIP_REGW, MIE_REGW, MIDELEG_REGW, + input logic [`XLEN-1:0] MEDELEG_REGW, input logic STATUS_MIE, STATUS_SIE, input logic InstrValidM, CommittedM, output logic TrapM, RetM, - output logic InterruptM, IntPendingM, + output logic InterruptM, IntPendingM, DelegateM, output logic [`LOG_XLEN-1:0] CauseM ); @@ -63,6 +64,8 @@ module trap ( assign IntPendingM = |PendingIntsM; assign ValidIntsM = {12{MIntGlobalEnM}} & PendingIntsM & ~MIDELEG_REGW | {12{SIntGlobalEnM}} & PendingIntsM & MIDELEG_REGW; assign InterruptM = (|ValidIntsM) && InstrValidM && ~(CommittedM); // *** RT. CommittedM is a temporary hack to prevent integer division from having an interrupt during divide. + assign DelegateM = (InterruptM ? MIDELEG_REGW[CauseM[3:0]] : MEDELEG_REGW[CauseM]) & + (PrivilegeModeW == `U_MODE | PrivilegeModeW == `S_MODE); /////////////////////////////////////////// // Trigger Traps and RET diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile index f8343f584..6789c234c 100644 --- a/pipelined/srt/Makefile +++ b/pipelined/srt/Makefile @@ -1,13 +1,16 @@ -all: sqrttestgen testgen +all: sqrttestgen testgen qst2 sqrttestgen: sqrttestgen.c gcc sqrttestgen.c -lm -o sqrttestgen - + testgen: testgen.c + gcc testgen.c -lm -o testgen + +qst2: qst2.c + gcc qst2.c -lm -o qst2 gcc -lm -o testgen testgen.c ./testgen - exptestgen: exptestgen.c gcc -lm -o exptestgen exptestgen.c ./exptestgen diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 3e90aeaf4..892e76373 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -111,7 +111,6 @@ module testbenchfp; logic FmaRdXZero, FmaRdYZero, FmaRdZZero; logic FmaRnmXZero, FmaRnmYZero, FmaRnmZZero; logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones - logic ZOrigDenorm, FmaRneZOrigDenorm, FmaRzZOrigDenorm, FmaRuZOrigDenorm, FmaRdZOrigDenorm, FmaRnmZOrigDenorm; // is the original precision dnormalized // in-between FMA signals logic Mult; @@ -682,7 +681,7 @@ module testbenchfp; .XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn), .XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp), .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan), - .XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN), .ZOrigDenormE(FmaRneZOrigDenorm), + .XNaNE(FmaRneXNaN), .YNaNE(FmaRneYNaN), .ZNaNE(FmaRneZNaN), .XSNaNE(FmaRneXSNaN), .YSNaNE(FmaRneYSNaN), .ZSNaNE(FmaRneZSNaN), .XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm), .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero), @@ -692,7 +691,7 @@ module testbenchfp; .XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .FmaModFmt, .XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp), .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan), - .XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN), .ZOrigDenormE(FmaRzZOrigDenorm), + .XNaNE(FmaRzXNaN), .YNaNE(FmaRzYNaN), .ZNaNE(FmaRzZNaN), .XSNaNE(FmaRzXSNaN), .YSNaNE(FmaRzYSNaN), .ZSNaNE(FmaRzZSNaN), .XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm), .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero), @@ -702,7 +701,7 @@ module testbenchfp; .XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .FmaModFmt, .XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp), .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan), - .XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN), .ZOrigDenormE(FmaRuZOrigDenorm), + .XNaNE(FmaRuXNaN), .YNaNE(FmaRuYNaN), .ZNaNE(FmaRuZNaN), .XSNaNE(FmaRuXSNaN), .YSNaNE(FmaRuYSNaN), .ZSNaNE(FmaRuZSNaN), .XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm), .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero), @@ -712,7 +711,7 @@ module testbenchfp; .XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .FmaModFmt, .XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp), .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), - .XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN), .ZOrigDenormE(FmaRdZOrigDenorm), + .XNaNE(FmaRdXNaN), .YNaNE(FmaRdYNaN), .ZNaNE(FmaRdZNaN), .XSNaNE(FmaRdXSNaN), .YSNaNE(FmaRdYSNaN), .ZSNaNE(FmaRdZSNaN), .XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm), .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero), @@ -721,7 +720,7 @@ module testbenchfp; readfmavectors readfmarnmvectors (.clk, .TestVector(FmaRnmVectors[VectorNum]), .Ans(FmaRnmAns), .AnsFlg(FmaRnmAnsFlg), .XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .FmaModFmt, .XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp), - .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan), .ZOrigDenormE(FmaRnmZOrigDenorm), + .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan), .XNaNE(FmaRnmXNaN), .YNaNE(FmaRnmYNaN), .ZNaNE(FmaRnmZNaN), .XSNaNE(FmaRnmXSNaN), .YSNaNE(FmaRnmYSNaN), .ZSNaNE(FmaRnmZSNaN), .XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm), @@ -731,7 +730,7 @@ module testbenchfp; readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal), .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal), - .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .ZOrigDenormE(ZOrigDenorm), .XOrigDenormE(XOrigDenorm), + .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), .XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm), @@ -757,13 +756,12 @@ module testbenchfp; fma1 fma1rne(.XSgnE(FmaRneXSgn), .YSgnE(FmaRneYSgn), .ZSgnE(FmaRneZSgn), .XExpE(FmaRneXExp), .YExpE(FmaRneYExp), .ZExpE(FmaRneZExp), .XManE(FmaRneXMan), .YManE(FmaRneYMan), .ZManE(FmaRneZMan), - .XDenormE(FmaRneXDenorm), .YDenormE(FmaRneYDenorm), .ZDenormE(FmaRneZDenorm), .XZeroE(FmaRneXZero), .YZeroE(FmaRneYZero), .ZZeroE(FmaRneZZero), .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRneSum), .NegSumE(FmaRneNegSum), .InvZE(FmaRneInvZ), .NormCntE(FmaRneNormCnt), .ZSgnEffE(FmaRneZSgnEff), .PSgnE(FmaRnePSgn), .ProdExpE(FmaRneProdExp), .AddendStickyE(FmaRneAddendSticky), .KillProdE(FmaRneSumKillProd)); fma2 fma2rne(.XSgnM(FmaRneXSgn), .YSgnM(FmaRneYSgn), - .ZExpM(FmaRneZExp), .ZOrigDenormM(FmaRneZOrigDenorm), + .ZExpM(FmaRneZExp), .ZDenormM(FmaRneZDenorm), .XManM(FmaRneXMan), .YManM(FmaRneYMan), .ZManM(FmaRneZMan), .XNaNM(FmaRneXNaN), .YNaNM(FmaRneYNaN), .ZNaNM(FmaRneZNaN), .XZeroM(FmaRneXZero), .YZeroM(FmaRneYZero), .ZZeroM(FmaRneZZero), @@ -776,13 +774,12 @@ module testbenchfp; fma1 fma1rz(.XSgnE(FmaRzXSgn), .YSgnE(FmaRzYSgn), .ZSgnE(FmaRzZSgn), .XExpE(FmaRzXExp), .YExpE(FmaRzYExp), .ZExpE(FmaRzZExp), .XManE(FmaRzXMan), .YManE(FmaRzYMan), .ZManE(FmaRzZMan), - .XDenormE(FmaRzXDenorm), .YDenormE(FmaRzYDenorm), .ZDenormE(FmaRzZDenorm), .XZeroE(FmaRzXZero), .YZeroE(FmaRzYZero), .ZZeroE(FmaRzZZero), .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRzSum), .NegSumE(FmaRzNegSum), .InvZE(FmaRzInvZ), .NormCntE(FmaRzNormCnt), .ZSgnEffE(FmaRzZSgnEff), .PSgnE(FmaRzPSgn), .ProdExpE(FmaRzProdExp), .AddendStickyE(FmaRzAddendSticky), .KillProdE(FmaRzSumKillProd)); fma2 fma2rz(.XSgnM(FmaRzXSgn), .YSgnM(FmaRzYSgn), - .ZExpM(FmaRzZExp), .ZOrigDenormM(FmaRzZOrigDenorm), + .ZExpM(FmaRzZExp), .ZDenormM(FmaRzZDenorm), .XManM(FmaRzXMan), .YManM(FmaRzYMan), .ZManM(FmaRzZMan), .XNaNM(FmaRzXNaN), .YNaNM(FmaRzYNaN), .ZNaNM(FmaRzZNaN), .XZeroM(FmaRzXZero), .YZeroM(FmaRzYZero), .ZZeroM(FmaRzZZero), @@ -795,13 +792,12 @@ module testbenchfp; fma1 fma1ru(.XSgnE(FmaRuXSgn), .YSgnE(FmaRuYSgn), .ZSgnE(FmaRuZSgn), .XExpE(FmaRuXExp), .YExpE(FmaRuYExp), .ZExpE(FmaRuZExp), .XManE(FmaRuXMan), .YManE(FmaRuYMan), .ZManE(FmaRuZMan), - .XDenormE(FmaRuXDenorm), .YDenormE(FmaRuYDenorm), .ZDenormE(FmaRuZDenorm), .XZeroE(FmaRuXZero), .YZeroE(FmaRuYZero), .ZZeroE(FmaRuZZero), .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRuSum), .NegSumE(FmaRuNegSum), .InvZE(FmaRuInvZ), .NormCntE(FmaRuNormCnt), .ZSgnEffE(FmaRuZSgnEff), .PSgnE(FmaRuPSgn), .ProdExpE(FmaRuProdExp), .AddendStickyE(FmaRuAddendSticky), .KillProdE(FmaRuSumKillProd)); fma2 fma2ru(.XSgnM(FmaRuXSgn), .YSgnM(FmaRuYSgn), - .ZExpM(FmaRuZExp), .ZOrigDenormM(FmaRuZOrigDenorm), + .ZExpM(FmaRuZExp), .ZDenormM(FmaRuZDenorm), .XManM(FmaRuXMan), .YManM(FmaRuYMan), .ZManM(FmaRuZMan), .XNaNM(FmaRuXNaN), .YNaNM(FmaRuYNaN), .ZNaNM(FmaRuZNaN), .XZeroM(FmaRuXZero), .YZeroM(FmaRuYZero), .ZZeroM(FmaRuZZero), @@ -813,14 +809,13 @@ module testbenchfp; .FMAFlgM(FmaRuResFlg), .FMAResM(FmaRuRes), .Mult(1'b0)); fma1 fma1rd(.XSgnE(FmaRdXSgn), .YSgnE(FmaRdYSgn), .ZSgnE(FmaRdZSgn), .XExpE(FmaRdXExp), .YExpE(FmaRdYExp), .ZExpE(FmaRdZExp), - .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), - .XDenormE(FmaRdXDenorm), .YDenormE(FmaRdYDenorm), .ZDenormE(FmaRdZDenorm), + .XManE(FmaRdXMan), .YManE(FmaRdYMan), .ZManE(FmaRdZMan), .XZeroE(FmaRdXZero), .YZeroE(FmaRdYZero), .ZZeroE(FmaRdZZero), .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRdSum), .NegSumE(FmaRdNegSum), .InvZE(FmaRdInvZ), .NormCntE(FmaRdNormCnt), .ZSgnEffE(FmaRdZSgnEff), .PSgnE(FmaRdPSgn), .ProdExpE(FmaRdProdExp), .AddendStickyE(FmaRdAddendSticky), .KillProdE(FmaRdSumKillProd)); fma2 fma2rd(.XSgnM(FmaRdXSgn), .YSgnM(FmaRdYSgn), - .ZExpM(FmaRdZExp), .ZOrigDenormM(FmaRdZOrigDenorm), + .ZExpM(FmaRdZExp), .ZDenormM(FmaRdZDenorm), .XManM(FmaRdXMan), .YManM(FmaRdYMan), .ZManM(FmaRdZMan), .XNaNM(FmaRdXNaN), .YNaNM(FmaRdYNaN), .ZNaNM(FmaRdZNaN), .XZeroM(FmaRdXZero), .YZeroM(FmaRdYZero), .ZZeroM(FmaRdZZero), @@ -833,13 +828,12 @@ module testbenchfp; fma1 fma1rnm(.XSgnE(FmaRnmXSgn), .YSgnE(FmaRnmYSgn), .ZSgnE(FmaRnmZSgn), .XExpE(FmaRnmXExp), .YExpE(FmaRnmYExp), .ZExpE(FmaRnmZExp), .XManE(FmaRnmXMan), .YManE(FmaRnmYMan), .ZManE(FmaRnmZMan), - .XDenormE(FmaRnmXDenorm), .YDenormE(FmaRnmYDenorm), .ZDenormE(FmaRnmZDenorm), .XZeroE(FmaRnmXZero), .YZeroE(FmaRnmYZero), .ZZeroE(FmaRnmZZero), .FOpCtrlE(3'b0), .FmtE(FmaModFmt), .SumE(FmaRnmSum), .NegSumE(FmaRnmNegSum), .InvZE(FmaRnmInvZ), .NormCntE(FmaRnmNormCnt), .ZSgnEffE(FmaRnmZSgnEff), .PSgnE(FmaRnmPSgn), .ProdExpE(FmaRnmProdExp), .AddendStickyE(FmaRnmAddendSticky), .KillProdE(FmaRnmSumKillProd)); fma2 fma2rnm(.XSgnM(FmaRnmXSgn), .YSgnM(FmaRnmYSgn), - .ZExpM(FmaRnmZExp), .ZOrigDenormM(FmaRnmZOrigDenorm), + .ZExpM(FmaRnmZExp), .ZDenormM(FmaRnmZDenorm), .XManM(FmaRnmXMan), .YManM(FmaRnmYMan), .ZManM(FmaRnmZMan), .XNaNM(FmaRnmXNaN), .YNaNM(FmaRnmYNaN), .ZNaNM(FmaRnmZNaN), .XZeroM(FmaRnmXZero), .YZeroM(FmaRnmYZero), .ZZeroM(FmaRnmZZero), @@ -852,12 +846,11 @@ module testbenchfp; fma1 fma1(.XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .XManE(XMan), .YManE(YMan), .ZManE(ZMan), - .XDenormE(XDenorm), .YDenormE(YDenorm), .ZDenormE(ZDenorm), .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero), .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .NormCntE, .ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE); fma2 fma2(.XSgnM(XSgn), .YSgnM(YSgn), - .ZExpM(ZExp), .ZOrigDenormM(ZOrigDenorm), + .ZExpM(ZExp), .ZDenormM(ZDenorm), .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), @@ -870,7 +863,7 @@ module testbenchfp; // .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtFpResE(CvtFpRes), .CvtFpFlgE(CvtFpFlg)); fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), - .XZeroE(XZero), .XOrigDenormE(XOrigDenorm), .FOpCtrlE(OpCtrlVal), + .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .XInfE(XInf), .XNaNE(XNaN), .XSNaNE(XSNaN), .FrmE(FrmVal), .FmtE(ModFmt), .CvtResE(CvtRes), .CvtIntResE(CvtIntRes), .CvtFlgE(CvtFlg)); fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), @@ -1174,13 +1167,13 @@ end /////////////////////////////////////////////////////////////////////////////////////////////// // check if the non-fma test is correct - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); $stop; end - + // TestFloat sets the result to all 1's when there is an invalid result, however in // http://www.jhauser.us/arithmetic/TestFloat-3/doc/TestFloat-general.html it says // for an unsigned integer result 0 is also okay @@ -1295,7 +1288,6 @@ module readfmavectors ( input logic [1:0] FmaFmt, // the format of the FMA inputs input logic [`FLEN*4+7:0] TestVector, // the test vector output logic [`FLEN-1:0] Ans, // the correct answer - output logic ZOrigDenormE, // is z denormalized in it's original precision output logic [4:0] AnsFlg, // the correct flag output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) @@ -1309,7 +1301,6 @@ module readfmavectors ( ); logic XNormE, XExpMaxE; // signals the unpacker outputs but isn't used in FMA - logic XOrigDenormE; // apply test vectors on rising edge of clk // Format of vectors Inputs(1/2/3)_AnsFlg always @(posedge clk) begin @@ -1343,10 +1334,10 @@ module readfmavectors ( endcase end - unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XOrigDenormE, + unpack unpack(.X, .Y, .Z, .FmtE(FmaModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XDenormE, .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, - .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, - .XExpMaxE, .ZOrigDenormE); + .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, + .XExpMaxE, .ZDenormE); endmodule @@ -1386,7 +1377,6 @@ module readvectors ( output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XInfE, YInfE, ZInfE, // is XYZ infinity output logic XNormE, XExpMaxE, - output logic ZOrigDenormE, XOrigDenormE, output logic [`FLEN-1:0] X, Y, Z ); @@ -1470,7 +1460,7 @@ module readvectors ( Ans = TestVector[8]; end 2'b10: begin // half - X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+3*(`H_LEN)-1:12+(`H_LEN)]}; + X = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+2*(`H_LEN)-1:12+(`H_LEN)]}; Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[12+(`H_LEN)-1:12]}; Ans = TestVector[8]; end @@ -1672,5 +1662,5 @@ module readvectors ( unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, .XNormE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, - .XExpMaxE, .ZOrigDenormE, .XOrigDenormE); + .XExpMaxE); endmodule \ No newline at end of file diff --git a/pipelined/testbench/tests-fp.vh b/pipelined/testbench/tests-fp.vh index 15abb6525..3db466c9a 100644 --- a/pipelined/testbench/tests-fp.vh +++ b/pipelined/testbench/tests-fp.vh @@ -1,29 +1,3 @@ -/////////////////////////////////////////// -// tests.vh -// -// Written: David_Harris@hmc.edu 7 October 2021 -// Modified: -// -// Purpose: List of tests to apply -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - - `define PATH "../../tests/fp/vectors/" `define ADD_OPCTRL 3'b110 `define MUL_OPCTRL 3'b100 diff --git a/synthDC/ppaAnalyze.py b/synthDC/ppaAnalyze.py index d7f161802..97a024b5d 100755 --- a/synthDC/ppaAnalyze.py +++ b/synthDC/ppaAnalyze.py @@ -5,6 +5,7 @@ from operator import index import subprocess import csv import re +from matplotlib.cbook import flatten import matplotlib.pyplot as plt import matplotlib.lines as lines import matplotlib.axes as axes @@ -13,6 +14,7 @@ from collections import namedtuple def synthsfromcsv(filename): + Synth = namedtuple("Synth", "module tech width freq delay area lpower denergy") with open(filename, newline='') as csvfile: csvreader = csv.reader(csvfile) global allSynths @@ -29,7 +31,7 @@ def synthsintocsv(): ''' writes a CSV with one line for every available synthesis each line contains the module, tech, width, target freq, and resulting metrics ''' - + print("This takes a moment...") bashCommand = "find . -path '*runs/ppa*rv32e*' -prune" output = subprocess.check_output(['bash','-c', bashCommand]) allSynths = output.decode("utf-8").split('\n')[:-1] @@ -90,22 +92,14 @@ def cleanup(): def getVals(tech, module, var, freq=None): ''' for a specified tech, module, and variable/metric - returns a list of values for that metric in ascending width order with the appropriate units - works at a specified target frequency or if none is given, uses the synthesis with the min delay for each width + returns a list of values for that metric in ascending width order + works at a specified target frequency or if none is given, uses the synthesis with the best achievable delay for each width ''' - - if (var == 'delay'): - units = " (ns)" - elif (var == 'area'): - units = " (sq microns)" - elif (var == 'lpower'): - units = " (nW)" - elif (var == 'denergy'): - units = " (pJ)" global widths metric = [] widthL = [] + if (freq != None): for oneSynth in allSynths: if (oneSynth.freq == freq) & (oneSynth.tech == tech) & (oneSynth.module == module): @@ -118,7 +112,7 @@ def getVals(tech, module, var, freq=None): m = 100000 # large number to start for oneSynth in allSynths: if (oneSynth.width == w) & (oneSynth.tech == tech) & (oneSynth.module == module): - if (oneSynth.delay < m): + if (oneSynth.delay < m) & (1000/oneSynth.delay > oneSynth.freq): m = oneSynth.delay osdict = oneSynth._asdict() met = osdict[var] @@ -127,10 +121,12 @@ def getVals(tech, module, var, freq=None): if ('flop' in module) & (var == 'area'): metric = [m/2 for m in metric] # since two flops in each module + if (var == 'denergy'): + metric = [m*1000 for m in metric] # more practical units for regression coefs - return metric, units + return metric -def genLegend(fits, coefs, r2, techcolor): +def genLegend(fits, coefs, r2, spec): ''' generates a list of two legend elements labels line with fit equation and dots with tech and r squared of the fit ''' @@ -155,12 +151,11 @@ def genLegend(fits, coefs, r2, techcolor): eq += " + " + coefsr[ind] + "*Nlog2(N)" ind += 1 - tech, c, m = techcolor - legend_elements = [lines.Line2D([0], [0], color=c, label=eq), - lines.Line2D([0], [0], color=c, ls='', marker=m, label=tech +' $R^2$='+ str(round(r2, 4)))] + legend_elements = [lines.Line2D([0], [0], color=spec.color, label=eq), + lines.Line2D([0], [0], color=spec.color, ls='', marker=spec.shape, label=spec.tech +' $R^2$='+ str(round(r2, 4)))] return legend_elements -def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn'): +def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn', norm=True, color=None): ''' module: string module name freq: int freq (MHz) var: string delay, area, lpower, or denergy @@ -175,30 +170,45 @@ def oneMetricPlot(module, var, freq=None, ax=None, fits='clsgn'): singlePlot = False fullLeg = [] - global techcolors + global techSpecs global widths - for combo in techcolors: - tech, c, m = combo - metric, units = getVals(tech, module, var, freq=freq) - if len(metric) == 5: - xp, pred, leg = regress(widths, metric, combo, fits) - fullLeg += leg - ax.scatter(widths, metric, color=c, marker=m) + global norms + + for spec in techSpecs: + metric = getVals(spec.tech, module, var, freq=freq) + + if norm: + techdict = spec._asdict() + norm = techdict[var] + metric = [m/norm for m in metric] # comment out to not normalize + + if len(metric) == 5: + xp, pred, leg = regress(widths, metric, spec, fits) + fullLeg += leg + c = color if color else spec.color + ax.scatter(widths, metric, color=c, marker=spec.shape) ax.plot(xp, pred, color=c) ax.legend(handles=fullLeg) ax.set_xticks(widths) ax.set_xlabel("Width (bits)") - ax.set_ylabel(str.title(var) + units) + + if norm: + ylabeldic = {"lpower": "Normalized Leakage Power", "denergy": "Normalized Dynamic Energy", "area": "INVx1 Areas", "delay": "FO4 Delays"} + else: + ylabeldic = {"lpower": "Leakage Power (nW)", "denergy": "Dynamic Energy (nJ)", "area": "Area (sq microns)", "delay": "Delay (ns)"} + + ax.set_ylabel(ylabeldic[var]) if singlePlot: - titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best delay)" + titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)" ax.set_title(module + titleStr) - plt.show() + plt.savefig('./plots/PPA/'+ module + '_' + var + '.png') + # plt.show() -def regress(widths, var, techcolor, fits='clsgn'): +def regress(widths, var, spec, fits='clsgn'): ''' fits a curve to the given points returns lists of x and y values to plot that curve and legend elements with the equation ''' @@ -217,9 +227,9 @@ def regress(widths, var, techcolor, fits='clsgn'): coefs = coefsResid[0] try: resid = coefsResid[1][0] + r2 = 1 - resid / (y.size * y.var()) except: - resid = 0 - r2 = 1 - resid / (y.size * y.var()) + r2 = 0 xp = np.linspace(8, 140, 200) pred = [] @@ -227,7 +237,7 @@ def regress(widths, var, techcolor, fits='clsgn'): n = [func(x) for func in funcArr] pred += [sum(np.multiply(coefs, n))] - leg = genLegend(fits, coefs, r2, techcolor) + leg = genLegend(fits, coefs, r2, spec) return xp, pred, leg @@ -244,7 +254,7 @@ def makeCoefTable(tech): for comb in [['delay', 5000], ['area', 5000], ['area', 10]]: var = comb[0] freq = comb[1] - metric, units = getVals(tech, mod, freq, var) + metric = getVals(tech, mod, freq, var) global widths coefs, r2, funcArr = regress(widths, metric) row = [mod] + comb + np.ndarray.tolist(coefs) + [r2] @@ -329,7 +339,8 @@ def freqPlot(tech, mod, width): ax3.set_ylabel('Area * Delay') ax4.set_ylabel('Area * $Delay^2$') ax1.set_title(mod + '_' + str(width)) - plt.show() + plt.savefig('./plots/freqBuckshot/' + mod + '/' + str(width) + '.png') + # plt.show() def squareAreaDelay(tech, mod, width): ''' plots delay, area, area*delay, and area*delay^2 for syntheses with specified tech, module, width @@ -343,30 +354,69 @@ def squareAreaDelay(tech, mod, width): delaysL[ind] += [oneSynth.delay] areasL[ind] += [oneSynth.area] - fig = plt.figure() - ax = fig.add_subplot(111) + f, (ax1) = plt.subplots(1, 1) + ax2 = ax1.twinx() for ind in [0,1]: areas = areasL[ind] delays = delaysL[ind] - freqs = freqsL[ind] - + targets = freqsL[ind] + targets = [1000/f for f in targets] + if ('flop' in mod): areas = [m/2 for m in areas] # since two flops in each module - freqs, delays, areas = noOutliers(freqs, delays, areas) # comment out to see all syntheses + targets, delays, areas = noOutliers(targets, delays, areas) # comment out to see all + + if not ind: + achievedDelays = delays c = 'blue' if ind else 'green' - plt.scatter(delays, areas, color=c) - - legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='o', label='timing achieved'), - lines.Line2D([0], [0], color='blue', ls='', marker='o', label='slack violated')] - - plt.legend(handles=legend_elements) + ax1.scatter(targets, delays, marker='^', color=c) + ax2.scatter(targets, areas, marker='s', color=c) - plt.xlabel("Delay Achieved (ns)") - plt.ylabel('Area (sq microns)') - plt.title(mod + '_' + str(width)) - ax.set_aspect(1./ax.get_data_ratio()) - plt.show() + bestAchieved = min(achievedDelays) + + legend_elements = [lines.Line2D([0], [0], color='green', ls='', marker='^', label='delay (timing achieved)'), + lines.Line2D([0], [0], color='green', ls='', marker='s', label='area (timing achieved)'), + lines.Line2D([0], [0], color='blue', ls='', marker='^', label='delay (timing violated)'), + lines.Line2D([0], [0], color='blue', ls='', marker='s', label='area (timing violated)')] + + ax2.legend(handles=legend_elements, loc='upper left') + + ax1.set_xlabel("Delay Targeted (ns)") + ax1.set_ylabel("Delay Achieved (ns)") + ax2.set_ylabel('Area (sq microns)') + ax1.set_title(mod + '_' + str(width)) + + squarify(f) + + xvals = np.array(ax1.get_xlim()) + frac = (min(flatten(delaysL))-xvals[0])/(xvals[1]-xvals[0]) + areaLowerLim = min(flatten(areasL))-100 + areaUpperLim = max(flatten(areasL))/frac + areaLowerLim + ax2.set_ylim([areaLowerLim, areaUpperLim]) + ax1.plot(xvals, xvals, ls="--", c=".3") + ax1.hlines(y=bestAchieved, xmin=xvals[0], xmax=xvals[1], color="black", ls='--') + + plt.savefig('./plots/squareareadelay_' + mod + '_' + str(width) + '.png') + # plt.show() + +def squarify(fig): + ''' helper function for squareAreaDelay() + forces matplotlib figure to be a square + ''' + w, h = fig.get_size_inches() + if w > h: + t = fig.subplotpars.top + b = fig.subplotpars.bottom + axs = h*(t-b) + l = (1.-axs/w)/2 + fig.subplots_adjust(left=l, right=1-l) + else: + t = fig.subplotpars.right + b = fig.subplotpars.left + axs = w*(t-b) + l = (1.-axs/h)/2 + fig.subplots_adjust(bottom=l, top=1-l) def adprodpow(areas, delays, pow): ''' for each value in [areas] returns area*delay^pow @@ -378,33 +428,63 @@ def adprodpow(areas, delays, pow): return result -def plotPPA(mod, freq=None): +def plotPPA(mod, freq=None, norm=True): ''' for the module specified, plots width vs delay, area, leakage power, and dynamic energy with fits - if no freq specified, uses the synthesis with min delay for each width + if no freq specified, uses the synthesis with best achievable delay for each width overlays data from both techs ''' fig, axs = plt.subplots(2, 2) - oneMetricPlot(mod, 'delay', ax=axs[0,0], fits='cg', freq=freq) - oneMetricPlot(mod, 'area', ax=axs[0,1], fits='s', freq=freq) - oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits='s', freq=freq) - oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits='s', freq=freq) - titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best delay)" + global fitDict + modFit = fitDict[mod] + oneMetricPlot(mod, 'delay', ax=axs[0,0], fits=modFit[0], freq=freq, norm=norm) + oneMetricPlot(mod, 'area', ax=axs[0,1], fits=modFit[1], freq=freq, norm=norm) + oneMetricPlot(mod, 'lpower', ax=axs[1,0], fits=modFit[1], freq=freq, norm=norm) + oneMetricPlot(mod, 'denergy', ax=axs[1,1], fits=modFit[1], freq=freq, norm=norm) + titleStr = " (target " + str(freq)+ "MHz)" if freq != None else " (best achievable delay)" + n = 'normalized' if norm else 'unnormalized' + saveStr = './plots/PPA/'+ n + '/' + mod + '.png' plt.suptitle(mod + titleStr) - plt.show() + plt.savefig(saveStr) + # plt.show() + +def plotBestAreas(): + global fitDict + fig, axs = plt.subplots(1, 1) + mods = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop'] + colors = ['red', 'orange', 'yellow', 'green', 'blue', 'purple'] + legend_elements = [] + for i in range(len(mods)): + oneMetricPlot(mods[i], 'area', ax=axs, freq=10, norm=False, color=colors[i]) + legend_elements += [lines.Line2D([0], [0], color=colors[i], ls='', marker='o', label=mods[i])] + plt.suptitle('Optimized Areas (target freq 10MHz)') + plt.legend(handles=legend_elements) + plt.savefig('./plots/bestareas.png') + # plt.show() if __name__ == '__main__': # set up stuff, global variables - Synth = namedtuple("Synth", "module tech width freq delay area lpower denergy") - techcolors = [['sky90', 'green', 'o'], ['tsmc28', 'blue', '^']] # add another list here for gf32 widths = [8, 16, 32, 64, 128] + # fitDict in progress + fitDict = {'add': ['gl', 'lg'], 'mult': ['clg', 's'], 'comparator': ['clsgn', 'clsgn'], 'csa': ['clsgn', 'clsgn'], 'shiftleft': ['clsgn', 'clsgn'], 'flop': ['cl', 'cl'], 'priorityencoder': ['clsgn', 'clsgn']} + TechSpec = namedtuple("TechSpec", "tech color shape delay area lpower denergy") + techSpecs = [['sky90', 'green', 'o', 43.2e-3, 1.96, 1.98, 1], ['gf32', 'purple', 's', 15e-3, .351, .3116, 1], ['tsmc28', 'blue', '^', 12.2e-3, .252, 1.09, 1]] + techSpecs = [TechSpec(*t) for t in techSpecs] + # cleanup() # synthsintocsv() # slow, run only when new synth runs to add to csv synthsfromcsv('ppaData.csv') # your csv here! - ### examples + # ### examples + + # squareAreaDelay('sky90', 'add', 32) # oneMetricPlot('add', 'delay') - # freqPlot('sky90', 'comparator', 16) - # plotPPA('add') - squareAreaDelay('sky90', 'comparator', 16) \ No newline at end of file + plotBestAreas() + + mods = ['priorityencoder', 'add', 'csa', 'shiftleft', 'comparator', 'flop', 'mult'] + for mod in mods: + plotPPA(mod, norm=False) + plotPPA(mod) + for w in [8, 16, 32, 64, 128]: + freqPlot('sky90', mod, w) \ No newline at end of file diff --git a/synthDC/ppaData.csv b/synthDC/ppaData.csv index b5b0435cc..70661af31 100644 --- a/synthDC/ppaData.csv +++ b/synthDC/ppaData.csv @@ -1,6 +1,5 @@ Module,Tech,Width,Target Freq,Delay,Area,L Power (nW),D energy (mJ) csa,sky90,16,19165,0.060643,533.12001,412.98,0.19745360800000003 -comparator,sky90,32,10,0.765874,495.88001,66.41,0.00030558372600000003 csa,sky90,64,18207,0.060643,2132.480042,1660.0,0.751063555 csa,sky90,32,15332,0.062613,815.360016,471.256,0.18859035600000001 flop,sky90,64,14692,0.070789,2132.47998,1040.0,3.156623088 @@ -10,9 +9,12 @@ flopenr,sky90,16,15000,0.189692,1110.34003,620.677,3.390175424 priorityonehot,sky90,128,3852,0.271881,2556.820035,1370.0,0.21451410899999995 comparator,sky90,64,5000,0.219296,2738.120023,2950.0,0.6076692159999999 flopenr,sky90,64,5619,0.204566,4385.500035,2100.0,4.961134631999999 -comparator,sky90,128,10,0.842074,1997.240039,243.506,0.001300162256 +shiftleft,sky90,16,7938,0.125973,1027.040017,1010.0,0.160363629 add,sky90,8,6896,0.144869,331.240005,219.731,0.060410373 +add,tsmc28,8,21210,0.048259,109.368,602.641,0.023791687000000002 shiftleft,sky90,128,3484,0.313597,11188.660188,8590.0,2.418146467 +flop,sky90,16,10,0.070789,533.119995,259.258,0.0005372531155000001 +add,sky90,32,3120,0.320213,1107.40002,307.68,0.18700439200000005 flop,sky90,128,8476,0.070789,4264.959961,2070.0,3.6420232610000003 flopr,sky90,8,11879,0.11919,400.820003,214.285,0.662589129 add,tsmc28,64,3000,0.312507,227.052001,1070.0,0.0621263916 @@ -36,56 +38,58 @@ floprasync,sky90,64,15397,0.071444,2892.960056,1290.0,3.3878030359999998 flopenr,sky90,32,5296,0.181427,2215.780039,1120.0,2.085866219 floprasync,sky90,128,20000,0.071444,5785.920113,2580.0,8.803472567999998 flopr,sky90,128,9317,0.163642,4973.499976,1710.0,6.7845973200000005 +priorityencoder,sky90,16,10,0.104403,159.740003,39.177,1.0335896999999999e-05 flopr,sky90,16,20000,0.085865,868.280017,712.532,1.40973157 shiftleft,sky90,8,10222,0.097799,394.940007,435.049,0.06836150099999999 flopenr,sky90,64,4723,0.18608,4327.680086,2230.0,3.9400579199999997 flop,sky90,128,15539,0.070789,4264.959961,2070.0,6.676960058000001 +priorityencoder,sky90,32,7160,0.111067,293.020006,53.82,0.012050769499999999 alu,sky90,16,10000,0.304,3555.440059,2890.0,2.593728 -add,sky90,32,4320,0.254861,1716.960028,866.723,0.373881087 add,tsmc28,32,21130,0.080875,367.668003,1860.0,0.15414775 flop,sky90,8,14409,0.070789,266.559998,129.629,0.3870813309 comparator,sky90,64,4636,0.215691,2072.700029,1840.0,0.345752673 -shiftleft,sky90,16,4615,0.215535,446.880008,113.608,0.0446804055 add,tsmc28,8,9092,0.108452,21.42,108.14,0.0057154204 add,sky90,16,4174,0.239287,549.780011,304.811,0.103371984 alu,sky90,16,3524,0.29417,3599.540061,2670.0,0.90839696 priorityonehot,sky90,8,21600,0.054084,157.780003,56.585,0.0190267512 -shiftleft,sky90,32,6375,0.159792,3330.040049,3530.0,0.627343392 +shiftleft,sky90,32,5796,0.172483,2653.840044,2450.0,0.446558487 +add,sky90,32,4368,0.268519,1731.660029,883.74,0.399824791 priorityonehot,sky90,128,5185,0.274609,2437.260036,1210.0,0.250718017 add,tsmc28,128,6900,0.144862,733.320004,3010.0,0.22192858399999998 csa,sky90,128,22360,0.060643,4390.400085,3430.0,1.868835331 decoder,sky90,32,16234,0.061497,250.880004,167.484,0.025275267000000004 csa,sky90,128,16929,0.060643,4264.960083,3260.0,1.3935761400000002 -shiftleft,sky90,32,6250,0.159977,2964.500038,3130.0,0.547281317 flopr,sky90,64,6988,0.11201,2728.319991,1360.0,2.4349853899999996 flop,sky90,64,19777,0.070789,2132.47998,1040.0,4.249180514000001 -add,sky90,32,3680,0.271527,1465.100024,591.825,0.289176255 +add,tsmc28,64,10560,0.101353,652.302004,3270.0,0.180104281 priorityonehot,sky90,8,22800,0.054084,157.780003,56.585,0.0200976144 +add,sky90,32,3744,0.29863,1565.060028,830.413,0.31117246000000004 floprasync,sky90,8,15000,0.071444,362.600007,161.167,0.40944556400000004 +priorityencoder,sky90,16,10176,0.104403,159.740003,39.177,0.0105133821 mult,sky90,64,657,1.52205,69763.260863,23900.0,57.09818369999999 +shiftleft,sky90,32,6048,0.166361,2766.540045,2640.0,0.48893497900000005 decoder,sky90,8,26064,0.037953,49.980001,39.023,0.0030893742000000003 flopr,sky90,16,10000,0.109984,712.459999,354.561,0.8683236799999999 priorityencoder,sky90,128,12306,0.113763,1058.400021,117.974,0.066437592 add,tsmc28,64,7052,0.141424,298.368001,1290.0,0.090794208 floprasync,sky90,128,10000,0.071444,5785.920113,2580.0,4.401807728 +add,tsmc28,64,11880,0.103723,567.756005,2760.0,0.18887958300000002 add,sky90,128,2667,0.394304,7494.060127,3580.0,1.460502016 csa,sky90,128,17479,0.060643,4264.960083,3260.0,1.4386338889999999 -priorityencoder,sky90,16,10345,0.104403,159.740003,39.177,0.0106908672 flop,sky90,16,14409,0.070789,533.119995,259.258,0.7740847939000001 decoder,sky90,8,26290,0.037953,49.980001,39.023,0.0031159413 flopr,sky90,16,16305,0.085865,774.200015,526.252,1.125432555 decoder,sky90,64,10000,0.098226,291.060005,96.679,0.0224151732 flopenr,sky90,128,7500,0.224974,8193.78002,3440.0,11.796736664 -priorityencoder,sky90,32,9004,0.111067,293.020006,53.82,0.015149538799999999 priorityonehot,sky90,32,25000,0.140143,613.480007,367.99,0.221986512 csa,sky90,128,15171,0.062613,3261.440063,1790.0,0.73632888 csa,sky90,32,15971,0.062613,815.360016,471.256,0.19641698100000002 priorityonehot,sky90,8,24000,0.054084,159.740003,61.953,0.0216336 add,tsmc28,8,10607,0.08931,22.806,114.681,0.005930184 alu,sky90,8,3911,0.255676,1453.340022,1010.0,0.358713428 +priorityencoder,sky90,16,10752,0.104403,159.740003,39.177,0.0111084792 flopenr,sky90,32,3842,0.216622,2029.58005,1030.0,1.5562774346 add,sky90,16,4865,0.222829,915.320019,765.596,0.176480568 -comparator,sky90,32,6000,0.2012,1248.520016,1480.0,0.2631696 csa,sky90,8,7500,0.10878,125.440002,23.259,0.015490272 csa,sky90,32,9583,0.080832,721.280014,239.708,0.09247180799999999 flop,sky90,64,14409,0.070789,2132.47998,1040.0,3.0958153370000003 @@ -97,12 +101,11 @@ add,tsmc28,128,7896,0.12664,894.096008,3950.0,0.26607064 decoder,sky90,128,20000,0.078354,1161.30001,1130.0,0.208656702 floprasync,sky90,64,15000,0.071444,2892.960056,1290.0,3.3004984679999994 flop,sky90,128,16104,0.070789,4264.959961,2070.0,6.9197663280000015 -comparator,sky90,32,7368,0.194845,1391.600021,1660.0,0.44034969999999996 floprasync,sky90,32,10000,0.071444,1446.480028,643.984,1.099737492 csa,sky90,32,17249,0.060643,1066.240021,875.508,0.357429842 mult,sky90,64,729,1.371734,93726.221523,53500.0,91.980251636 flop,sky90,8,14692,0.070789,266.559998,129.629,0.39466991170000004 -shiftleft,sky90,16,9231,0.113513,1695.400019,2270.0,0.27016094 +shiftleft,sky90,32,6426,0.161489,3746.540056,4530.0,0.750439383 mult,sky90,8,1455,0.687251,1615.04003,680.207,0.697559765 flop,sky90,32,14974,0.070789,1066.23999,518.516,1.6087012617 alu,sky90,32,3650,0.388358,5959.380106,3090.0,2.021015032 @@ -116,13 +119,12 @@ shifter,sky90,32,10,1.906335,1656.200032,118.773,0.0044989506 priorityonehot,sky90,8,19200,0.05415,142.100001,48.939,0.014501370000000001 decoder,sky90,16,20030,0.049718,95.060001,70.279,0.007512389800000001 csa,sky90,8,12777,0.067531,164.640003,67.475,0.0264181272 -shiftleft,sky90,32,6625,0.155982,3619.14005,4140.0,0.7033228380000001 floprasync,sky90,32,13717,0.071444,1446.480028,643.984,1.50854006 decoder,sky90,16,25037,0.039899,247.94,388.045,0.027490411 comparator,sky90,16,6133,0.16297,441.000006,363.571,0.06795849000000001 decoder,sky90,64,20000,0.057083,1052.520018,1550.0,0.13180464700000002 add,sky90,64,4242,0.328234,3507.420063,1570.0,0.8537366340000001 -shiftleft,sky90,16,5000,0.198416,468.440009,148.45,0.051389744 +comparator,sky90,32,5406,0.199677,1189.720013,1280.0,0.243805617 flopr,sky90,64,12811,0.101659,2816.520013,1550.0,4.099195857 priorityonehot,sky90,16,20000,0.088596,668.36001,947.549,0.1528281 mult,sky90,64,686,1.457722,78293.181181,31800.0,70.41526121 @@ -130,7 +132,12 @@ priorityonehot,sky90,32,15000,0.140665,681.100009,546.147,0.154872165 csa,sky90,16,9583,0.080832,360.640007,116.433,0.045338668799999995 flop,sky90,32,14126,0.070789,1066.23999,518.516,1.5176453710000002 add,tsmc28,32,16300,0.078586,414.036002,2090.0,0.129902658 +shiftleft,sky90,32,5040,0.198332,2187.360036,1640.0,0.37940911600000005 +comparator,sky90,32,4998,0.205372,919.240014,858.878,0.130821964 flop,sky90,128,12996,0.070789,4264.959961,2070.0,5.584190265000001 +shiftleft,sky90,16,8100,0.124607,1131.900019,1250.0,0.18454296699999997 +comparator,sky90,32,6120,0.2012,1261.260016,1500.0,0.2724248 +shiftleft,sky90,16,7776,0.128537,1104.460019,1210.0,0.166326878 floprasync,sky90,128,12877,0.071444,5785.920113,2580.0,5.668152628 flopenr,sky90,64,4434,0.215203,4025.840082,1940.0,3.5934596940000003 csa,sky90,16,18207,0.060643,533.12001,412.98,0.187568799 @@ -140,12 +147,10 @@ flop,sky90,16,14692,0.070789,533.119995,259.258,0.7892690344000002 csa,sky90,64,19165,0.060643,2132.480042,1660.0,0.790602791 floprasync,sky90,32,20000,0.071444,1446.480028,643.984,2.199474984 priorityonehot,sky90,128,2222,0.449659,1317.120025,366.819,0.0552181252 -priorityencoder,sky90,16,15000,0.104403,159.740003,39.177,0.015482964899999998 add,sky90,16,4595,0.221986,817.320014,742.91,0.15871998999999998 flopenr,sky90,16,5285,0.169538,1127.000031,688.586,0.8848188220000001 priorityencoder,sky90,128,7500,0.113763,1058.400021,117.974,0.040499627999999996 priorityencoder,sky90,8,10131,0.104625,85.260002,26.481,0.0075225375 -add,sky90,32,4800,0.258491,1955.100033,1070.0,0.5043159410000001 add,tsmc28,8,7880,0.123121,20.538,106.097,0.0054665724 decoder,sky90,8,30334,0.032475,70.560001,88.439,0.006699592499999999 add,tsmc28,16,6443,0.138825,50.274,244.477,0.012882959999999999 @@ -164,19 +169,21 @@ mux2,sky90,1,10,0.060639,6.86,1.19,3.1229084999999996e-07 flopr,sky90,64,20000,0.085865,2979.200057,1950.0,5.5286756200000005 csa,sky90,8,15332,0.062613,203.840004,117.131,0.0472477698 decoder,sky90,16,27040,0.039572,339.079998,606.117,0.041471456000000004 +comparator,sky90,64,10,0.561562,1008.42002,127.626,0.00045205740999999995 comparator,sky90,8,9091,0.10991,297.920001,343.785,0.057922569999999986 decoder,sky90,8,7007,0.085629,37.240001,2.355,0.0008657091900000001 flopenr,sky90,32,10000,0.212211,2240.280013,1110.0,4.800849453 add,tsmc28,128,8904,0.112309,1220.184006,5770.0,0.339285489 comparator,sky90,128,3769,0.27069,3741.640049,2910.0,0.5822541899999999 flopr,sky90,32,13277,0.098535,1412.179996,720.493,2.046670485 -shiftleft,sky90,16,7538,0.132481,971.180015,992.057,0.15526773200000002 +priorityencoder,sky90,16,9024,0.104403,159.740003,39.177,0.0093336282 +add,tsmc28,64,9680,0.103297,464.310003,2200.0,0.13149708100000002 priorityencoder,sky90,64,9605,0.112447,546.840011,77.149,0.027999303 comparator,sky90,8,8364,0.119528,210.700003,172.337,0.040400464000000004 alu,sky90,64,7500,0.456689,12146.120232,5380.0,8.588950023 alu,sky90,16,3662,0.281321,4508.000078,4380.0,1.0875869859999998 priorityonehot,sky90,8,20000,0.054151,141.120002,52.37,0.0157362806 -priorityencoder,sky90,32,5402,0.111067,293.020006,53.82,0.0090963873 +priorityencoder,sky90,16,9984,0.104403,159.740003,39.177,0.0103150164 flopr,sky90,64,13975,0.085865,2986.060057,1970.0,3.863839135 shiftleft,sky90,128,3226,0.320064,10597.720193,7050.0,2.011282176 comparator,sky90,128,4077,0.262622,4638.340054,5120.0,0.8020475880000001 @@ -186,25 +193,30 @@ comparator,sky90,16,6000,0.166568,422.380007,301.506,0.068792584 add,sky90,16,4696,0.227412,866.320016,645.684,0.173287944 alu,sky90,8,5952,0.247589,2113.860033,2120.0,0.7343489740000001 add,tsmc28,32,12074,0.082822,277.956002,1370.0,0.08091709400000001 +priorityencoder,sky90,128,10,0.113763,1058.400021,117.974,5.3923662e-05 mult,sky90,32,944,1.085045,32407.620517,26800.0,28.648443135 -shiftleft,sky90,16,10769,0.131174,1153.460019,1350.0,0.26549617600000003 add,tsmc28,16,3000,0.32096,41.202,203.505,0.0116572672 add,tsmc28,128,8400,0.119042,1050.084009,4830.0,0.29831925200000003 +add,sky90,32,3978,0.280475,1768.90003,1000.0,0.34245997499999997 mult,sky90,32,4000,1.091389,31262.980534,24900.0,123.890113724 priorityencoder,sky90,8,9176,0.104625,85.260002,26.481,0.006821550000000001 floprasync,sky90,128,7500,0.071444,5785.920113,2580.0,3.3043564439999997 +priorityencoder,sky90,32,8234,0.111067,293.020006,53.82,0.0138611616 shifter,sky90,128,10,2.758726,9722.580189,720.698,0.041491239039999996 flopenr,sky90,32,6724,0.173729,2310.840003,1080.0,2.681159657 alu,sky90,8,4506,0.242351,2197.160032,2200.0,0.609755116 priorityonehot,sky90,64,5238,0.210273,1174.040018,697.959,0.10534677299999999 -shiftleft,sky90,32,6500,0.158323,3294.760046,3490.0,0.6141349169999999 shiftleft,sky90,64,3478,0.287377,3864.140062,2250.0,0.688267915 +add,tsmc28,32,13650,0.079681,353.934001,1810.0,0.096573372 flopenr,sky90,64,15000,0.22943,4798.080035,2490.0,16.436594630000002 decoder,sky90,32,12025,0.081513,166.600003,59.7,0.0123736734 flopr,sky90,16,12112,0.133182,746.760008,381.108,1.3699100519999998 comparator,sky90,128,5000,0.260142,5215.56005,6000.0,1.3779721740000002 add,sky90,16,4415,0.22649,827.120015,595.953,0.17054697 +shiftleft,sky90,16,8424,0.118698,1146.600015,1250.0,0.177334812 +shiftleft,sky90,32,6552,0.161811,3542.700057,3960.0,0.711806589 decoder,sky90,128,12763,0.079353,1086.820012,959.985,0.126091917 +add,sky90,16,10,2.032906,221.479998,55.29,0.0012902854382000001 flopenr,sky90,32,20000,0.208206,2408.840056,1820.0,9.997635708 flopenr,sky90,32,4515,0.205972,2350.040062,1640.0,1.8533360559999998 flopenr,sky90,16,4968,0.182266,1088.780029,605.907,1.061426051 @@ -217,19 +229,20 @@ csa,sky90,64,16929,0.060643,2132.480042,1660.0,0.69800093 shiftleft,sky90,8,8889,0.112426,236.180002,193.721,0.039798804 add,sky90,8,6355,0.157048,343.980005,234.605,0.064546728 csa,sky90,64,22360,0.060643,2195.200043,1740.0,0.937237565 -priorityencoder,sky90,32,9184,0.111067,293.020006,53.82,0.015460526399999999 decoder,sky90,16,28542,0.039572,499.800013,875.782,0.058249984000000005 -add,sky90,32,4160,0.253175,2031.540036,1240.0,0.41900462499999996 shiftleft,sky90,8,11111,0.091007,491.960005,678.321,0.07371567000000001 alu,sky90,16,2073,0.481803,1688.540032,395.679,0.278963937 priorityonehot,sky90,128,3407,0.293484,1910.02003,670.082,0.107415144 +add,tsmc28,64,9860,0.101401,540.036002,2590.0,0.146828648 priorityencoder,sky90,8,9558,0.104625,85.260002,26.481,0.0070935749999999995 flopenr,sky90,8,5439,0.167649,552.720005,270.402,0.560785905 -comparator,sky90,32,5053,0.197891,805.560012,561.888,0.115964126 +csa,sky90,16,10,0.209074,235.199997,56.871,0.000105164222 flopenr,sky90,8,7751,0.148606,636.020015,364.803,0.7494646397999999 floprasync,sky90,128,15677,0.071444,5785.920113,2580.0,6.900633071999999 +add,tsmc28,64,11220,0.097771,721.602002,3630.0,0.20395030599999997 +add,tsmc28,8,20790,0.050528,86.184,484.731,0.01945328 +add,tsmc28,32,12350,0.081819,348.390002,1820.0,0.09229183200000002 flopr,sky90,64,12345,0.101659,2816.520013,1550.0,3.950163763 -shiftleft,sky90,16,8308,0.12432,1199.520016,1300.0,0.19257168 add,tsmc28,64,8909,0.112235,400.806002,1820.0,0.119081335 comparator,sky90,8,12727,0.113615,488.039998,768.445,0.12384035 decoder,sky90,128,11997,0.083125,926.100008,787.251,0.095344375 @@ -239,7 +252,6 @@ flopr,sky90,16,11646,0.133182,746.760008,381.108,1.31716998 comparator,sky90,64,4455,0.224454,1899.240032,1340.0,0.32994738 add,sky90,8,7031,0.145062,385.140007,332.65,0.07543224 comparator,sky90,128,3538,0.282712,3158.540057,1600.0,0.45997242400000005 -priorityencoder,sky90,16,11494,0.104403,159.740003,39.177,0.0118706211 alu,sky90,16,3317,0.301347,3143.840056,2170.0,0.7211233709999999 alu,sky90,128,1867,0.535525,25061.540475,9020.0,4.929507625 mult,sky90,16,1268,0.802449,9789.220166,8800.0,6.815199357 @@ -252,18 +264,22 @@ mult,sky90,128,337,2.963253,201889.800086,26700.0,81.08349183899999 add,tsmc28,8,7500,0.131988,20.916,106.321,0.0055698936 flopr,sky90,16,9317,0.10124,776.160012,486.897,0.78248396 priorityencoder,sky90,8,9749,0.104625,85.260002,26.481,0.0072505124999999995 +comparator,sky90,32,4080,0.245094,652.680013,144.113,0.074508576 +add,tsmc28,16,14140,0.070718,118.062001,647.697,0.032671715999999996 csa,sky90,8,15971,0.062613,203.840004,117.131,0.0491950341 +add,sky90,32,4680,0.257118,1882.58003,1100.0,0.439157544 mult,sky90,128,584,1.712328,298800.044147,115000.0,257.92111732800004 priorityonehot,sky90,8,18400,0.054629,109.760001,31.371,0.009920626399999998 comparator,sky90,8,10909,0.11361,387.1,565.114,0.0965685 decoder,sky90,32,17000,0.06201,655.62001,900.063,0.09729369 add,tsmc28,16,11276,0.088457,65.016001,305.664,0.020433566999999996 -shiftleft,sky90,32,3750,0.266551,1173.060021,319.774,0.129277235 +csa,sky90,8,10,0.209074,117.599998,28.436,5.2686648e-05 flopr,sky90,64,11646,0.101365,2830.240013,1510.0,3.7213118799999996 priorityonehot,sky90,32,8000,0.145441,1137.780016,1520.0,0.143259385 priorityonehot,sky90,128,3926,0.258274,2524.480033,1280.0,0.198096158 mult,sky90,64,429,2.326205,53642.260108,7400.0,20.6101763 csa,sky90,8,17249,0.060643,266.560005,209.477,0.0889996668 +add,tsmc28,32,12870,0.083324,361.368003,1900.0,0.09682248799999998 decoder,sky90,8,25784,0.0384,48.020001,31.121,0.00284544 floprasync,sky90,16,15000,0.071444,723.240014,321.992,0.822891992 alu,sky90,128,1944,0.514379,26616.800496,11800.0,5.97708398 @@ -274,10 +290,12 @@ comparator,sky90,16,7200,0.15891,771.260013,1090.0,0.12331416 shiftleft,sky90,128,2968,0.33687,9142.420162,5660.0,1.7459972099999999 flop,sky90,32,13279,0.070789,1066.23999,518.516,1.4265894803 decoder,sky90,32,7500,0.115541,147.000003,15.758,0.006470296 +add,sky90,32,3588,0.278585,1182.860022,345.668,0.22342517000000003 decoder,sky90,128,7658,0.130462,549.78001,153.219,0.041225991999999996 mult,sky90,16,1122,0.891172,6478.780105,3540.0,4.677761828 shifter,sky90,16,5000,0.209586,2120.720031,2150.0,0.46528091999999993 priorityonehot,sky90,16,12222,0.095549,368.480004,319.793,0.043379245999999996 +shiftleft,sky90,32,10,0.924571,882.980017,61.087,0.0008635493140000001 csa,sky90,128,18799,0.060643,4264.960083,3260.0,1.547306145 decoder,sky90,64,19270,0.055769,1076.040022,1560.0,0.12520140500000002 csa,sky90,16,22360,0.060643,548.800011,433.245,0.23414262300000002 @@ -291,26 +309,27 @@ csa,sky90,32,12777,0.067531,658.560013,269.898,0.10555095299999999 priorityonehot,sky90,64,4381,0.22809,942.760013,344.503,0.0638652 decoder,sky90,8,23762,0.041662,42.140001,19.278,0.0024080635999999996 add,sky90,8,9465,0.14904,637.980011,790.447,0.15276599999999999 +priorityencoder,sky90,16,9216,0.104403,159.740003,39.177,0.0095319939 flopr,sky90,32,9317,0.139384,1276.939993,443.486,1.4999112240000003 comparator,sky90,128,4154,0.257245,4649.120047,5100.0,0.849165745 -priorityencoder,sky90,32,8283,0.111067,293.020006,53.82,0.013950015199999999 alu,sky90,8,4081,0.250986,1530.76002,1160.0,0.36518463 csa,sky90,128,17809,0.060643,4264.960083,3260.0,1.4658019530000002 add,sky90,16,4435,0.22545,666.400011,419.709,0.1460916 mult,sky90,8,1891,0.605341,2405.90004,1930.0,1.5000349979999998 add,sky90,16,4000,0.249839,551.74001,302.479,0.100685117 mult,sky90,64,700,1.428547,82949.161302,39200.0,80.650049432 -priorityencoder,sky90,16,10536,0.104403,159.740003,39.177,0.0108892329 mult,sky90,8,1782,0.582418,2549.960043,2140.0,1.531176922 alu,sky90,8,4591,0.23242,2612.680037,3030.0,0.6451979199999999 flopr,sky90,128,15000,0.125811,5740.839996,3160.0,11.995198173 flopr,sky90,64,12112,0.101659,2816.520013,1550.0,3.8755460570000007 add,sky90,128,2615,0.390136,6662.040117,2450.0,1.2094216 flop,sky90,128,13561,0.070789,4264.959961,2070.0,5.826996535 +add,sky90,32,3900,0.280206,1679.720027,892.235,0.337928436 comparator,sky90,64,4727,0.225291,2499.000023,2710.0,0.465000624 add,sky90,8,7708,0.161451,407.680008,375.802,0.084923226 +add,tsmc28,64,13200,0.103072,587.916003,2960.0,0.212946752 +add,sky90,32,4056,0.253823,1918.840034,1040.0,0.38657242900000005 add,tsmc28,16,16300,0.067336,189.63,1050.0,0.04902060799999999 -priorityencoder,sky90,32,10000,0.111067,293.020006,53.82,0.016882183999999998 decoder,sky90,8,29973,0.032971,66.640001,78.184,0.0064062653 flop,sky90,128,15257,0.070789,4264.959961,2070.0,6.555698501 decoder,sky90,8,35390,0.030694,237.160005,420.74,0.024954221999999998 @@ -328,16 +347,16 @@ flopr,sky90,64,11180,0.17183,2838.080032,1420.0,5.795997730000001 csa,sky90,32,19165,0.060643,1066.240021,827.644,0.39496785900000003 comparator,sky90,64,6364,0.223965,2547.020023,2940.0,0.73236555 floprasync,sky90,64,20000,0.071444,2892.960056,1290.0,4.400664623999999 -priorityencoder,sky90,32,8824,0.111067,293.020006,53.82,0.014849657899999999 add,tsmc28,8,8031,0.119581,20.538,105.945,0.0053931031 -comparator,sky90,8,10,0.29577,118.580002,16.053,3.2505123000000005e-05 csa,sky90,64,7500,0.10878,1003.52002,186.07,0.12357407999999999 priorityencoder,sky90,128,10000,0.113763,1058.400021,117.974,0.053923662 add,tsmc28,128,8232,0.121475,945.504008,4240.0,0.27429055 shiftleft,sky90,8,7500,0.132768,218.540002,147.871,0.034785216 priorityencoder,sky90,64,9782,0.112447,546.840011,77.149,0.028561538 add,tsmc28,64,7202,0.138773,305.424001,1310.0,0.09256159100000001 +add,sky90,32,3333,0.299576,1153.460022,384.333,0.20880447200000002 add,tsmc28,128,6720,0.148758,707.742004,2940.0,0.21629413200000003 +add,tsmc28,64,11660,0.098856,687.582004,3420.0,0.203346792 mult,sky90,32,852,1.173643,23514.120391,12700.0,21.016425201 mult,sky90,32,741,1.349466,17389.120212,4650.0,10.286979318 floprasync,sky90,128,11198,0.071444,5785.920113,2580.0,4.929064447999999 @@ -354,18 +373,16 @@ alu,sky90,64,3066,0.448988,12350.940228,6740.0,3.467534324 add,sky90,128,3077,0.387515,7712.60013,2930.0,1.6446136599999999 flopenr,sky90,128,1000,0.951754,6483.679942,1260.0,3.6541643076 add,sky90,16,4087,0.243761,503.720009,183.936,0.08702267699999999 -shiftleft,sky90,32,6125,0.163188,2892.960045,2740.0,0.4977234 +comparator,sky90,32,10,0.765874,495.88001,66.41,0.00030558372600000003 csa,sky90,128,12777,0.067531,2634.240051,1080.0,0.42227134299999997 mult,sky90,32,556,1.796075,14371.700056,2210.0,5.0721158 decoder,sky90,8,34534,0.030694,237.160005,420.74,0.024340341999999997 comparator,sky90,64,5455,0.221407,2929.220025,3360.0,0.700753155 comparator,sky90,8,8909,0.11208,261.660004,251.629,0.05402256 -shiftleft,sky90,32,6000,0.169263,2872.380041,2880.0,0.542995704 priorityencoder,sky90,8,10896,0.104625,85.260002,26.481,0.0080875125 flop,sky90,64,14126,0.070789,2132.47998,1040.0,3.0350783750000003 priorityonehot,sky90,8,28000,0.054102,177.380002,118.676,0.02732151 add,tsmc28,16,13885,0.072003,111.762,603.843,0.031897328999999995 -priorityencoder,sky90,16,10153,0.104403,159.740003,39.177,0.0104925015 priorityencoder,sky90,128,20000,0.113763,1058.400021,117.974,0.10796108700000001 flopr,sky90,8,6988,0.110829,342.999999,168.133,0.29852899439999997 priorityonehot,sky90,32,5333,0.186576,407.680007,135.997,0.0276878784 @@ -380,13 +397,13 @@ priorityonehot,sky90,16,15000,0.086192,739.900005,1110.0,0.11920353600000001 shiftleft,sky90,128,3355,0.309977,11750.200195,9570.0,2.415650761 add,sky90,8,7437,0.151519,495.880011,457.493,0.09409329899999999 flop,sky90,64,14974,0.070789,2132.47998,1040.0,3.217289261 -add,sky90,32,3920,0.273454,2044.280039,1330.0,0.41154826999999994 csa,sky90,64,15971,0.062613,1630.720032,943.002,0.39320964 alu,sky90,16,2764,0.361248,2302.020041,1050.0,0.497438496 add,sky90,16,6307,0.225596,1023.12002,1010.0,0.281769404 decoder,sky90,128,13273,0.100672,959.420012,753.194,0.141041472 mult,sky90,32,5000,1.092153,31497.200524,25800.0,150.56748903899998 priorityonehot,sky90,64,2857,0.34852,702.660012,180.97,0.033179104 +add,sky90,128,10,17.100851,1867.879976,465.925,0.09453350432799999 decoder,sky90,32,10000,0.099725,147.980003,44.83,0.010152005 priorityencoder,sky90,64,9071,0.112447,546.840011,77.149,0.026425045 decoder,sky90,8,45612,0.030694,218.540003,382.667,0.029957343999999997 @@ -394,7 +411,6 @@ priorityonehot,sky90,32,6800,0.152882,730.100008,561.099,0.09157631799999999 decoder,sky90,8,33231,0.030694,201.880003,347.106,0.021639270000000002 decoder,sky90,8,15167,0.061083,37.240001,1.317,0.00140857398 shiftleft,sky90,8,10889,0.098154,548.800008,801.248,0.095013072 -shiftleft,sky90,32,7500,0.166296,3306.520048,3700.0,0.7544849520000001 priorityonehot,sky90,16,12000,0.093589,291.060006,116.96,0.030510014000000002 priorityonehot,sky90,128,3778,0.264659,2299.080036,975.931,0.15614880999999997 flopenr,sky90,32,2882,0.284333,1641.499985,327.027,0.8121119146 @@ -404,12 +420,12 @@ add,tsmc28,16,7571,0.124163,51.282,247.578,0.013844174499999999 flop,sky90,32,14409,0.070789,1066.23999,518.516,1.5480209309000001 add,sky90,16,6000,0.225754,1120.140018,1010.0,0.29099690600000006 add,tsmc28,64,11766,0.100257,659.358006,3280.0,0.197907318 -priorityencoder,sky90,32,12605,0.111067,293.020006,53.82,0.0212471171 +priorityencoder,sky90,32,9487,0.111067,293.020006,53.82,0.015971434599999997 +shiftleft,sky90,64,10,1.219378,2116.800041,153.287,0.0032435454800000003 mult,sky90,16,10,4.730546,3869.040009,641.517,0.07147855005999999 flopr,sky90,32,16305,0.085865,1540.560029,1070.0,2.261083045 flopenr,sky90,8,6799,0.152384,635.040013,414.486,0.6784440448 decoder,sky90,16,24536,0.040593,204.82,314.572,0.021676662 -priorityencoder,sky90,16,8812,0.104403,159.740003,39.177,0.0091039416 alu,sky90,8,4251,0.245524,1844.360033,1560.0,0.47975389599999996 decoder,sky90,8,10744,0.085629,37.240001,2.355,0.0013281057899999999 csa,sky90,16,15971,0.062613,407.680008,235.173,0.098239797 @@ -419,8 +435,6 @@ add,tsmc28,8,14791,0.06639,27.468,134.31,0.007946883 flopenr,sky90,16,20000,0.189692,1098.580025,591.454,4.502529312 shiftleft,sky90,64,4348,0.23035,5490.940094,4500.0,1.0674419000000002 flop,sky90,8,14126,0.070789,266.559998,129.629,0.37948567120000004 -add,sky90,32,3200,0.312424,1121.120021,296.836,0.203700448 -shiftleft,sky90,16,8154,0.128748,1062.320016,1070.0,0.17020485600000002 shiftleft,sky90,8,11333,0.092595,545.860006,815.115,0.089168985 priorityonehot,sky90,16,10667,0.09706,282.240005,85.616,0.025555897999999997 shiftleft,sky90,128,4516,0.309266,12621.420203,11200.0,3.7210885119999997 @@ -431,6 +445,7 @@ csa,sky90,16,17249,0.060643,533.12001,432.126,0.178714921 shiftleft,sky90,8,11778,0.091769,674.240011,1040.0,0.101037669 add,sky90,128,2718,0.407908,7287.280117,3350.0,1.463573904 floprasync,sky90,128,15000,0.071444,5785.920113,2580.0,6.602568704 +add,sky90,32,3666,0.278178,1498.420028,715.058,0.276508932 alu,sky90,32,3128,0.389409,5641.860104,2720.0,1.566592407 priorityonehot,sky90,32,7067,0.141491,1078.980015,1580.0,0.14389634700000004 floprasync,sky90,8,10000,0.071444,362.600007,161.167,0.2729375132 @@ -438,11 +453,11 @@ alu,sky90,32,2607,0.389198,5684.000094,2890.0,1.325608388 priorityonehot,sky90,8,10000,0.099885,59.780001,9.529,0.0024871364999999998 flop,sky90,16,14974,0.070789,533.119995,259.258,0.8043895648 decoder,sky90,16,26038,0.039572,282.240004,451.429,0.032330324 -add,sky90,8,10,0.940062,103.879999,24.765,0.0002515605912 floprasync,sky90,16,15397,0.071444,723.240014,321.992,0.8446824119999999 add,sky90,16,5217,0.22222,824.180016,601.276,0.16622056 mult,sky90,8,1745,0.589521,2771.440043,2580.0,1.480876752 flop,sky90,8,16952,0.070789,266.559998,129.629,0.45542811040000003 +shiftleft,sky90,16,7614,0.131331,1255.380019,1500.0,0.221424066 flop,sky90,32,19777,0.070789,1066.23999,518.516,2.1247318350000004 csa,sky90,8,22360,0.060643,274.400005,215.78,0.11718046890000001 priorityonehot,sky90,16,7500,0.131703,194.040003,81.795,0.015909722399999996 @@ -450,22 +465,24 @@ csa,sky90,8,16929,0.060643,266.560005,213.306,0.08754423480000001 alu,sky90,128,1556,0.642542,20580.98039,4540.0,3.342503484 add,sky90,16,4505,0.221872,731.080013,463.35,0.143773056 mult,sky90,128,5000,1.78322,314617.244472,163000.0,3044.1491277600003 +shiftleft,sky90,16,9072,0.121429,1437.660021,1910.0,0.246258012 flopenr,sky90,128,4551,0.238398,7704.760055,2750.0,6.324222143999999 alu,sky90,128,1983,0.507617,27966.260505,13900.0,6.350288669999999 floprasync,sky90,32,15397,0.071444,1446.480028,643.984,1.6932228 alu,sky90,8,7500,0.236938,2625.420042,2970.0,1.2223631420000003 add,sky90,64,2909,0.343753,2800.840049,852.781,0.4953480729999999 +priorityencoder,sky90,8,10,0.104625,85.260002,26.481,7.260975e-06 alu,sky90,64,2015,0.496274,10743.740201,3960.0,1.889811392 flop,sky90,16,14126,0.070789,533.119995,259.258,0.7588934745 add,sky90,8,7167,0.145559,710.500014,879.277,0.113244902 -comparator,sky90,32,5158,0.197393,1203.440015,1310.0,0.20765743600000003 add,tsmc28,64,8068,0.123942,337.932002,1480.0,0.10782954000000002 flopenr,sky90,16,10000,0.189228,1106.42003,616.676,2.251623972 decoder,sky90,8,11445,0.085629,37.240001,2.355,0.0014145910799999999 +shiftleft,sky90,16,8586,0.119249,1425.900021,1750.0,0.226811598 priorityonehot,sky90,128,5000,0.276002,2397.080033,1140.0,0.24039774200000003 add,sky90,8,6220,0.16068,294.000005,218.154,0.05495255999999999 comparator,tsmc28,128,7500,0.132804,374.597997,1260.0,0.08605699199999998 -comparator,sky90,32,4947,0.2021,882.980013,601.459,0.1513729 +add,tsmc28,32,12610,0.081004,370.440003,1860.0,0.09436966000000001 add,sky90,16,4685,0.227412,924.140018,742.859,0.17874583200000002 csa,sky90,64,15332,0.062613,1630.720032,893.318,0.37192122 csa,sky90,32,18207,0.060643,1066.240021,827.644,0.37519824100000004 @@ -480,7 +497,6 @@ decoder,sky90,16,22809,0.04375,201.880002,199.593,0.0188125 priorityencoder,sky90,8,7646,0.104625,85.260002,26.481,0.00567799875 add,tsmc28,32,3000,0.315207,102.186001,500.273,0.0276121332 alu,sky90,128,7500,0.514295,28689.500518,15300.0,28.687375099999997 -add,sky90,128,10,17.100851,1867.879976,465.925,0.09453350432799999 flop,sky90,8,14974,0.070789,266.559998,129.629,0.4022655714 flop,sky90,16,16952,0.070789,533.119995,259.258,0.9107004850000001 priorityonehot,sky90,8,20400,0.054151,145.040002,58.857,0.0167380741 @@ -493,48 +509,45 @@ add,tsmc28,8,18111,0.054999,42.21,235.546,0.011324294099999998 csa,sky90,8,19165,0.060643,266.560005,213.306,0.09920588370000001 flopr,sky90,128,10000,0.172584,5487.020036,2740.0,10.022643215999999 flopenr,sky90,8,7479,0.148606,636.020015,364.803,0.7232505414 +add,tsmc28,16,14700,0.068045,148.932,822.457,0.03864956 flopr,sky90,16,13277,0.133182,746.760008,381.108,1.50162705 -priorityencoder,sky90,32,9544,0.111067,293.020006,53.82,0.0160713949 decoder,sky90,8,27301,0.036011,53.900001,45.061,0.0038315704 flopr,sky90,8,10947,0.11919,403.760003,218.217,0.60977604 -add,sky90,16,10,2.032906,221.479998,55.29,0.0012902854382000001 flopr,sky90,128,10947,0.172973,5340.020018,2310.0,10.278747551999999 +shiftleft,sky90,16,7452,0.133951,878.080014,796.863,0.121627508 shiftleft,sky90,64,4261,0.234657,5289.060089,3950.0,0.980396946 +add,sky90,32,3822,0.282243,1657.18003,864.512,0.31752337500000005 priorityonehot,sky90,16,13333,0.077249,976.080015,1550.0,0.164694868 flopenr,sky90,128,6637,0.228828,8134.980007,3210.0,11.399295648 shiftleft,sky90,64,6087,0.227478,6715.940117,5940.0,1.7761482240000002 add,tsmc28,64,4501,0.187403,237.384001,1110.0,0.058469735999999994 decoder,sky90,16,21208,0.047148,119.560002,121.799,0.013107144000000001 -comparator,sky90,32,5684,0.203736,1218.140014,1420.0,0.256503624 flopr,sky90,8,12578,0.11919,400.820003,214.285,0.70155234 +comparator,sky90,32,5100,0.196617,1081.920011,1140.0,0.158276685 flopr,sky90,128,12578,0.177282,5403.720033,2390.0,12.481716492 flop,sky90,16,13844,0.070789,533.119995,259.258,0.7437092339999999 priorityonehot,sky90,8,19600,0.054151,189.14,207.102,0.021552098 priorityencoder,sky90,8,9940,0.104625,85.260002,26.481,0.007386525 add,tsmc28,8,1000,0.238199,15.75,58.809,0.001238158402 decoder,sky90,8,23256,0.041662,42.140001,17.364,0.0022872438 -priorityencoder,sky90,16,7500,0.104403,159.740003,39.177,0.007757142900000001 priorityonehot,sky90,64,6667,0.226349,1288.700018,1120.0,0.17836301200000002 comparator,sky90,16,6533,0.152969,508.620009,432.277,0.09820609799999999 priorityonehot,sky90,32,7500,0.15352,670.320007,335.87,0.07691352 csa,sky90,64,12777,0.067531,1317.120026,539.796,0.21103437499999997 priorityonehot,sky90,32,4000,0.248804,332.220006,108.841,0.0181875724 +add,tsmc28,64,10120,0.10213,648.144004,3210.0,0.17178266 flopr,sky90,32,12112,0.101547,1445.500023,882.979,1.9622942280000002 -comparator,sky90,64,10,0.561562,1008.42002,127.626,0.00045205740999999995 -priorityencoder,sky90,32,10804,0.111067,293.020006,53.82,0.0182260947 -priorityencoder,sky90,32,15000,0.111067,293.020006,53.82,0.0252677425 flopenr,sky90,64,4820,0.185072,3846.500004,1260.0,2.1616409599999997 floprasync,sky90,16,20000,0.071444,723.240014,321.992,1.097236952 priorityencoder,sky90,64,8359,0.112447,546.840011,77.149,0.024344775500000002 -shiftleft,sky90,32,5875,0.169973,2781.240046,2630.0,0.49360159200000003 mult,sky90,16,1707,0.829615,8563.24013,6780.0,8.81548899 add,tsmc28,32,5000,0.173613,110.880001,525.554,0.0278301639 priorityencoder,sky90,64,8182,0.112447,546.840011,77.149,0.023906232200000002 flopr,sky90,128,20000,0.085865,5959.380113,3930.0,11.063447654999997 comparator,sky90,8,10000,0.1136,496.86,810.074,0.09383360000000002 flop,sky90,64,13279,0.070789,2132.47998,1040.0,2.8530090670000003 -shiftleft,sky90,32,6750,0.156124,3323.180043,3580.0,0.585465 add,sky90,8,6761,0.147641,621.32001,747.563,0.10334869999999999 +shiftleft,sky90,32,7560,0.155216,3953.320055,4920.0,0.848565872 comparator,sky90,64,4000,0.249905,1437.660027,558.66,0.20292286 csa,sky90,128,15000,0.062613,3261.440063,1790.0,0.7280013510000001 add,sky90,8,7302,0.152957,551.740009,717.81,0.09682178100000001 @@ -554,6 +567,7 @@ flopenr,sky90,8,15000,0.148606,636.020015,366.09,1.4506917719999999 add,sky90,64,5000,0.334061,3798.480071,2180.0,1.167543195 mult,sky90,8,1964,0.585681,2746.940044,2480.0,1.659234273 alu,sky90,8,3996,0.250188,1360.240021,835.922,0.32849684400000007 +shiftleft,sky90,16,8262,0.120995,1111.320016,1180.0,0.167094095 flopr,sky90,32,11646,0.101914,1441.580023,867.77,1.893969776 decoder,sky90,128,7500,0.13242,552.72001,163.224,0.04263924 floprasync,sky90,16,10000,0.071444,723.240014,321.992,0.548547032 @@ -566,12 +580,12 @@ add,sky90,64,3273,0.311119,3816.120062,1960.0,0.7441966480000001 decoder,sky90,64,16117,0.061996,696.780014,775.245,0.07501516 mult,sky90,32,926,1.101021,31000.340484,24600.0,26.166865085999998 flopenr,sky90,64,7565,0.199522,4367.860033,2100.0,6.878720472 -priorityencoder,sky90,32,10264,0.111067,293.020006,53.82,0.017259811799999997 alu,sky90,32,2086,0.479314,4204.200078,1080.0,0.725202082 +add,tsmc28,32,13390,0.077735,458.136002,2370.0,0.11978963499999999 add,sky90,64,2424,0.412474,2298.100044,453.413,0.33616630999999997 priorityencoder,sky90,64,5336,0.112447,546.840011,77.149,0.0155626648 -priorityencoder,sky90,32,8643,0.111067,293.020006,53.82,0.014549777 flopr,sky90,64,15000,0.085865,2982.140057,1960.0,4.148567475 +priorityencoder,sky90,16,11520,0.104403,159.740003,39.177,0.011901942 mult,sky90,16,1317,0.805748,10366.440177,10100.0,7.202581372 shifter,sky90,8,10,0.622998,244.020005,26.943,0.00022801726800000002 flop,sky90,16,13279,0.070789,533.119995,259.258,0.7133336741 @@ -583,6 +597,7 @@ add,tsmc28,128,5040,0.197577,488.502002,2230.0,0.143045748 add,sky90,16,4144,0.240621,555.660011,274.571,0.092639085 alu,sky90,64,2409,0.452715,12468.540233,6180.0,2.755676205 add,sky90,16,2609,0.375085,405.720008,52.28,0.050598966499999995 +add,sky90,32,4134,0.25292,1966.860033,1110.0,0.40720119999999993 alu,sky90,128,2061,0.515343,27812.400516,13300.0,6.941154867 priorityonehot,sky90,64,4762,0.212289,1107.400013,650.606,0.09828980699999999 mult,sky90,8,1709,0.599356,2453.920037,2010.0,1.442649892 @@ -594,7 +609,6 @@ csa,sky90,32,16929,0.060643,1066.240021,827.644,0.348818536 csa,sky90,32,22360,0.060643,1097.600021,868.175,0.468891676 add,tsmc28,32,15394,0.081095,348.768003,1770.0,0.110694675 add,tsmc28,16,25000,0.066258,202.608001,1140.0,0.082027404 -add,sky90,32,2400,0.41509,958.440019,151.083,0.1286779 csa,sky90,16,12777,0.067531,329.280006,134.949,0.0528362544 decoder,sky90,8,39096,0.030694,184.240003,330.692,0.021700658 add,sky90,128,2359,0.423881,5520.340104,1490.0,0.846490357 @@ -602,20 +616,19 @@ decoder,sky90,8,19548,0.04935,40.180001,11.498,0.00198387 decoder,sky90,128,15315,0.079077,1283.800018,1260.0,0.19468757399999997 flopenr,sky90,32,15000,0.216654,2190.300023,1020.0,6.633512172 add,tsmc28,32,9056,0.110392,148.176001,654.803,0.046364640000000006 -comparator,sky90,32,6316,0.2012,1239.700017,1450.0,0.27966799999999997 comparator,sky90,128,5385,0.267095,4787.300045,5300.0,1.402515845 mult,sky90,16,1146,0.87258,7193.200125,4570.0,5.5426281600000005 flop,sky90,128,11301,0.070789,4264.959961,2070.0,4.855913033000001 flopenr,sky90,64,10000,0.221498,4647.160022,2330.0,9.885677238 add,tsmc28,8,15696,0.063682,28.224,139.342,0.008195873400000001 csa,sky90,128,19788,0.060643,4264.960083,3260.0,1.628689051 +priorityencoder,sky90,32,9129,0.111067,293.020006,53.82,0.015371672800000003 mult,sky90,32,889,1.124838,26822.600434,18000.0,24.957905544 flopr,sky90,64,13277,0.085865,2974.300056,1950.0,3.66540512 alu,sky90,32,10000,0.384364,6083.84011,3640.0,5.60018348 flopr,sky90,32,20000,0.085865,1540.560029,1070.0,2.7735253650000002 flop,sky90,8,13279,0.070789,266.559998,129.629,0.35677656 csa,sky90,8,18207,0.060643,266.560005,213.306,0.0942877364 -add,sky90,32,6000,0.271774,1746.36003,955.901,0.5761608800000001 flop,sky90,64,13844,0.070789,2132.47998,1040.0,2.974482991 csa,sky90,128,16820,0.060643,4264.960083,3260.0,1.384661619 floprasync,sky90,8,8398,0.071444,362.600007,161.167,0.229263796 @@ -633,6 +646,7 @@ add,tsmc28,128,3000,0.310001,473.634002,2200.0,0.12927041700000003 priorityonehot,sky90,32,6667,0.149833,623.280007,316.846,0.056037541999999996 csa,sky90,128,16490,0.060643,4264.960083,3250.0,1.356038123 alu,sky90,64,2102,0.475621,10732.960202,3830.0,1.9433874060000003 +add,tsmc28,16,14420,0.069317,140.868001,791.342,0.038401618000000005 flopenr,sky90,64,6052,0.211118,4590.320021,2610.0,5.780199722 alu,sky90,64,1314,0.76041,8106.560156,1160.0,1.1322504899999999 csa,sky90,8,15000,0.062613,203.840004,117.131,0.0461770875 @@ -643,6 +657,7 @@ flop,sky90,64,20000,0.070789,2132.47998,1040.0,4.297104667 add,tsmc28,32,18111,0.079248,413.154003,2110.0,0.145340832 shiftleft,sky90,64,2609,0.382901,2559.760048,666.022,0.302874691 add,tsmc28,8,5000,0.161025,19.026,93.122,0.00446522325 +shiftleft,sky90,32,6174,0.161964,3180.100048,3410.0,0.58145076 csa,sky90,8,15013,0.062613,203.840004,117.131,0.0462459618 shiftleft,sky90,8,13333,0.085966,939.82001,1560.0,0.16333540000000002 shiftleft,sky90,64,5000,0.239464,5848.640098,4780.0,1.380749424 @@ -655,24 +670,27 @@ flopenr,sky90,16,5813,0.189228,1106.42003,616.649,1.3088143848000002 flop,sky90,128,15000,0.070789,4264.959961,2070.0,6.445267661000001 flopenr,sky90,64,3242,0.26181,3387.859995,909.793,1.92220902 priorityonehot,sky90,16,12667,0.085601,696.78001,1080.0,0.08388898 +priorityencoder,sky90,32,8592,0.111067,293.020006,53.82,0.0144609234 add,tsmc28,64,8405,0.118964,347.004003,1530.0,0.11016066399999999 add,sky90,64,2788,0.358537,2637.180048,758.693,0.45928589700000005 flop,sky90,128,14126,0.070789,4264.959961,2070.0,6.069802805000001 flop,sky90,32,12996,0.070789,1066.23999,518.516,1.3962139204 decoder,sky90,8,35838,0.030694,237.160005,420.74,0.025291855999999998 -add,sky90,32,3840,0.291206,1547.420027,784.112,0.299650974 add,tsmc28,64,7500,0.133293,307.944001,1320.0,0.09437144399999998 alu,sky90,32,2398,0.416982,5257.700098,2000.0,1.094160768 add,tsmc28,128,7728,0.129394,854.910008,3690.0,0.25193011800000004 +add,sky90,32,3432,0.290785,1156.400022,335.133,0.20762049 flopenr,sky90,64,2892,0.298899,3245.75997,644.425,1.6744321980000003 priorityonehot,sky90,128,4000,0.253946,2661.680036,1330.0,0.210521234 floprasync,sky90,32,13997,0.071444,1446.480028,643.984,1.539332424 csa,sky90,32,16291,0.060643,1066.240021,825.615,0.33547707600000004 flopenr,sky90,64,4627,0.20887,3954.300054,1660.0,3.0662116000000004 +add,sky90,32,2340,0.42591,958.440019,152.032,0.12734709000000002 mux2,sky90,1,10,0.060639,6.86,1.19,3.1229084999999996e-07 +priorityencoder,sky90,32,8771,0.111067,293.020006,53.82,0.014771911 flop,sky90,64,10000,0.070789,2132.47998,1040.0,2.1485735702000004 decoder,sky90,8,33883,0.030694,263.620004,439.421,0.027102802 -priorityencoder,sky90,16,10919,0.104403,159.740003,39.177,0.011275523999999999 +add,tsmc28,64,10340,0.099305,644.490002,3180.0,0.17010946500000002 mult,sky90,32,1111,1.092041,31649.100517,25300.0,33.716765875 flop,sky90,128,14974,0.070789,4264.959961,2070.0,6.4341537880000015 comparator,sky90,8,8727,0.124671,264.600002,278.768,0.053109846 @@ -680,21 +698,22 @@ alu,sky90,32,7500,0.383575,6553.260121,4050.0,4.620928025 csa,sky90,16,17568,0.060643,533.12001,412.98,0.181019355 decoder,sky90,16,20000,0.049981,94.080001,66.328,0.0074221785 alu,sky90,8,4166,0.240197,1719.900028,1340.0,0.42274671999999996 +add,tsmc28,64,12320,0.097568,648.018003,3250.0,0.20167305600000002 add,sky90,128,3590,0.386891,6860.000114,2620.0,1.744491519 csa,sky90,128,16160,0.060643,4264.960083,3250.0,1.328930702 flop,sky90,32,16104,0.070789,1066.23999,518.516,1.7301327123 -priorityencoder,sky90,32,8463,0.111067,293.020006,53.82,0.0142498961 add,tsmc28,64,8102,0.123413,337.554002,1480.0,0.10761613600000002 mult,sky90,64,4000,1.411752,93087.261425,60500.0,556.283934576 +priorityencoder,sky90,32,10740,0.111067,293.020006,53.82,0.0180817076 csa,sky90,8,14693,0.067531,164.640003,67.475,0.0304159624 comparator,sky90,64,3636,0.275001,1323.000026,357.28,0.165550602 -priorityencoder,sky90,32,9724,0.111067,293.020006,53.82,0.0163712758 floprasync,sky90,16,14837,0.071444,723.240014,321.992,0.813961492 add,tsmc28,32,15998,0.081128,345.618001,1760.0,0.11601304 decoder,sky90,16,25538,0.039572,265.580003,416.038,0.028729272 flopenr,sky90,64,5836,0.198621,4564.840035,2580.0,4.922821485 shiftleft,sky90,8,10444,0.095384,335.160004,328.601,0.060759608 add,sky90,8,5409,0.182541,209.720004,99.155,0.041436807000000006 +add,sky90,32,4212,0.276372,1701.280028,896.35,0.33496286399999997 add,tsmc28,32,1000,0.912322,67.157999,231.062,0.0220781924 flopenr,sky90,32,5764,0.185375,2024.679996,668.031,1.3873465 flop,sky90,32,15000,0.070789,1066.23999,518.516,1.6115399006000002 @@ -703,36 +722,45 @@ alu,sky90,16,3801,0.273329,3920.00006,3090.0,1.040016845 add,tsmc28,8,20000,0.049999,69.426001,394.007,0.016149677 flopenr,sky90,64,5302,0.227516,4116.98001,1590.0,4.126230176000001 add,tsmc28,8,7880,0.123121,20.538,106.097,0.0054665724 +csa,sky90,128,10,0.209074,1881.599976,469.596,0.0008718385800000001 mult,sky90,16,976,1.024406,4960.760064,1320.0,2.087739428 add,tsmc28,16,6443,0.138825,50.274,244.477,0.012882959999999999 csa,sky90,128,18139,0.060643,4264.960083,3260.0,1.492970017 comparator,sky90,64,4364,0.229142,1709.120026,1020.0,0.276803536 +add,sky90,32,5460,0.27667,1690.500029,859.028,0.45456881 alu,sky90,32,2659,0.384337,6206.340103,3560.0,1.485846842 flopenr,sky90,64,5079,0.203824,4340.420085,2230.0,4.60947976 add,tsmc28,8,9056,0.108551,21.42,107.887,0.0057749132 add,tsmc28,32,4618,0.189997,108.990001,518.291,0.0276635632 +shiftleft,sky90,32,6300,0.162938,3300.640047,3910.0,0.651752 csa,sky90,8,15652,0.062613,203.840004,117.131,0.048186964799999996 flopr,sky90,64,9317,0.172725,2896.880051,1590.0,5.1119691 mult,sky90,16,1220,0.81966,8829.800131,6950.0,6.09499176 decoder,sky90,8,24773,0.04026,44.100001,23.272,0.002604822 mult,sky90,8,1855,0.605444,2332.40004,1740.0,1.4470111599999997 flopenr,sky90,64,5013,0.228449,4007.220058,1760.0,3.779231807 +priorityencoder,sky90,64,10,0.112447,546.840011,77.149,2.9123773000000003e-05 add,sky90,8,25000,0.151154,660.520013,864.531,0.39103539800000003 -add,sky90,32,10,4.160501,456.679995,112.161,0.005429453805000001 -shiftleft,sky90,16,7231,0.138234,1233.820018,1400.0,0.21619797600000001 +comparator,sky90,32,4794,0.208426,719.320014,301.75,0.106714112 add,tsmc28,64,9413,0.106226,423.108003,1900.0,0.12534668 +add,tsmc28,64,9950,0.103506,577.458005,2830.0,0.15536250599999998 decoder,sky90,8,10000,0.085629,37.240001,2.355,0.0012364827599999997 +add,sky90,8,10,0.940062,103.879999,24.765,0.0002515605912 priorityonehot,sky90,128,2963,0.337291,1562.120028,493.695,0.0711009428 add,sky90,16,6087,0.226225,857.500013,678.287,0.24771637500000002 flopenr,sky90,128,3000,0.27393,6483.679942,1300.0,3.1685483100000003 +comparator,sky90,32,4692,0.213099,736.960014,293.544,0.102500619 flopr,sky90,8,15000,0.085865,373.380007,241.917,0.517680085 add,sky90,16,4261,0.234402,607.60001,368.742,0.120013824 floprasync,sky90,128,5000,0.071444,5785.920113,2580.0,2.20619072 priorityonehot,sky90,64,10000,0.209855,1194.620015,760.611,0.23293904999999998 +add,tsmc28,8,22050,0.049896,90.216001,502.211,0.021754656 csa,sky90,128,9583,0.080832,2885.120056,975.935,0.37400966399999996 flopenr,sky90,32,4611,0.212058,1968.820014,750.904,1.3703187959999998 -shiftleft,sky90,16,7385,0.135404,937.860017,965.452,0.14786116800000002 priorityonehot,sky90,8,17200,0.057703,95.060001,27.191,0.0077148911 +flop,sky90,64,10,0.070789,2132.47998,1040.0,0.0021504140842 +add,tsmc28,16,13300,0.075179,96.642001,532.778,0.027289976999999997 +shiftleft,sky90,128,10,1.792955,4984.280097,311.169,0.013196148800000002 csa,sky90,64,17568,0.060643,2132.480042,1660.0,0.724744493 add,sky90,16,4609,0.221986,815.360013,735.998,0.157388074 flop,sky90,32,15539,0.070789,1066.23999,518.516,1.6694523815000002 @@ -741,6 +769,9 @@ add,tsmc28,8,10000,0.099158,22.554,114.184,0.006296533 add,sky90,64,3030,0.331556,3202.640054,1280.0,0.586191008 flopenr,sky90,32,5091,0.177419,2188.340035,1120.0,1.631367705 add,sky90,8,8113,0.139058,664.440013,736.234,0.11917270599999999 +comparator,sky90,8,10,0.29577,118.580002,16.053,3.2505123000000005e-05 +priorityencoder,sky90,32,7876,0.111067,293.020006,53.82,0.0132613998 +shiftleft,sky90,16,7128,0.140285,1061.340013,1110.0,0.17311168999999998 add,tsmc28,8,15394,0.064922,28.602,137.546,0.0084268756 csa,sky90,32,15000,0.062613,815.360016,471.256,0.184520511 shiftleft,sky90,128,7500,0.32019,11850.160206,9180.0,5.50278534 @@ -750,12 +781,15 @@ comparator,sky90,16,6800,0.146926,723.240009,925.474,0.136935032 add,tsmc28,16,9020,0.107948,57.834,272.583,0.016623992 alu,sky90,16,3939,0.283216,4117.960074,3430.0,1.102559888 flopr,sky90,128,11180,0.171962,5301.800014,2250.0,10.207836282 +add,tsmc28,16,13860,0.072103,113.400001,618.303,0.031725319999999994 floprasync,sky90,128,13997,0.071444,5785.920113,2580.0,6.161187672 decoder,sky90,16,12005,0.08179,78.400002,12.174,0.0043675860000000006 +add,tsmc28,64,15400,0.100198,670.950005,3310.0,0.268430442 alu,sky90,32,2972,0.388258,6001.52011,3400.0,1.6136002479999998 flopr,sky90,128,6988,0.112133,5853.53999,3790.0,5.033313971 csa,sky90,64,10000,0.080832,1442.560028,301.524,0.146467584 add,sky90,16,2703,0.363987,405.720008,52.464,0.0510309774 +priorityencoder,sky90,32,10024,0.111067,293.020006,53.82,0.0169043974 decoder,sky90,64,7500,0.131244,264.600005,64.81,0.0141612276 add,tsmc28,32,15696,0.081641,339.192002,1700.0,0.11437904100000001 add,tsmc28,8,4546,0.218872,16.128,61.042,0.0056315765600000005 @@ -763,14 +797,12 @@ csa,sky90,32,16610,0.060643,1066.240021,884.851,0.34481609800000007 flopenr,sky90,16,5599,0.187288,1208.340028,815.816,1.258762648 decoder,sky90,16,35052,0.039572,518.420012,914.948,0.075859524 comparator,sky90,16,5333,0.186933,318.500006,100.145,0.0372744402 +priorityencoder,sky90,16,7680,0.104403,159.740003,39.177,0.007934627999999999 priorityonehot,sky90,32,7600,0.145454,656.600009,371.544,0.07199973 floprasync,sky90,8,15957,0.071444,362.600007,161.167,0.43552262399999997 -comparator,sky90,32,4842,0.206449,781.060011,485.75,0.110863113 alu,sky90,8,4421,0.235607,2200.100037,2210.0,0.575116687 -shiftleft,sky90,16,8000,0.124837,968.240013,940.706,0.15092793300000001 flopr,sky90,16,10947,0.133182,746.760008,381.108,1.2380598719999998 flopr,sky90,128,13277,0.174211,5125.399977,1890.0,11.290789121 -priorityencoder,sky90,16,9195,0.104403,159.740003,39.177,0.009500673 flopr,sky90,8,13277,0.11919,400.820003,214.285,0.7405036320000001 priorityencoder,sky90,128,9493,0.113763,1058.400021,117.974,0.051193350000000006 alu,sky90,128,1906,0.524631,25815.160489,9940.0,5.398977620999999 @@ -799,28 +831,28 @@ flop,sky90,32,13561,0.070789,1066.23999,518.516,1.4568942512 priorityencoder,sky90,8,8984,0.104625,85.260002,26.481,0.006675074999999999 floprasync,sky90,64,13437,0.071444,2892.960056,1290.0,2.9566384959999996 comparator,sky90,128,2308,0.406531,2810.640055,437.781,0.244731662 +add,tsmc28,8,19950,0.050114,70.308,400.586,0.016126685199999997 add,sky90,16,4235,0.235896,600.740011,361.949,0.122901816 add,tsmc28,64,9077,0.110157,400.176003,1840.0,0.117978147 add,sky90,16,4348,0.22992,610.540011,364.173,0.11610960000000001 add,sky90,128,2769,0.431383,6941.340124,2860.0,1.297600064 add,sky90,8,18000,0.147907,580.16001,689.26,0.27555074100000004 -comparator,sky90,32,5579,0.192149,1206.380012,1440.0,0.25094659399999997 add,tsmc28,16,7893,0.109936,52.164,250.533,0.013016422400000002 flopenr,sky90,32,4899,0.176011,2065.840024,891.448,1.2759037389999999 flop,sky90,16,20000,0.070789,533.119995,259.258,1.074435442 -shiftleft,sky90,16,7077,0.141279,1079.960019,1180.0,0.18154351499999996 csa,sky90,128,15830,0.062613,3261.440063,1790.0,0.768324123 flop,sky90,8,10000,0.070789,266.559998,129.629,0.26862797353 csa,sky90,8,16291,0.060643,266.560005,205.51,0.08390565479999999 mult,sky90,16,732,1.36399,4043.480026,624.48,1.23577494 +add,tsmc28,8,20370,0.049186,82.530001,483.34,0.018543122 floprasync,sky90,16,15957,0.071444,723.240014,321.992,0.875403332 priorityonehot,sky90,64,4476,0.223289,1068.200015,670.986,0.087305999 comparator,sky90,128,3077,0.324985,2559.760047,659.43,0.28566181500000004 shiftleft,sky90,128,2581,0.387267,7361.76014,2470.0,0.958098558 priorityonehot,sky90,64,5048,0.220929,1048.600015,648.313,0.10251105599999999 -comparator,sky90,32,5000,0.205372,919.240014,840.47,0.128562872 comparator,sky90,16,6667,0.150575,691.880011,816.855,0.1210623 -shiftleft,sky90,16,7500,0.133331,1031.940019,1060.0,0.17413028600000002 +add,tsmc28,32,13130,0.078731,426.132002,2160.0,0.108018932 +shiftleft,sky90,32,5922,0.168873,2672.46004,2300.0,0.45578822699999993 add,tsmc28,16,8537,0.109936,52.164,250.533,0.0140828016 alu,sky90,16,4146,0.296664,3496.640061,2610.0,1.021117488 flopr,sky90,16,6988,0.110749,689.919998,364.27,0.5984765211 @@ -838,6 +870,7 @@ add,tsmc28,64,8403,0.118982,347.886003,1540.0,0.110177332 decoder,sky90,16,15022,0.065338,78.400002,28.061,0.006069900199999999 csa,sky90,32,14693,0.067531,658.560013,269.898,0.121353207 flopenr,sky90,128,15000,0.224053,8643.600023,4030.0,27.361800466000002 +priorityencoder,sky90,16,9408,0.104403,159.740003,39.177,0.0097303596 flop,sky90,8,20000,0.070789,266.559998,129.629,0.5373239045 alu,sky90,8,5102,0.241901,2059.96003,2060.0,0.674419988 add,sky90,16,4955,0.220767,802.620015,561.649,0.168445221 @@ -851,9 +884,12 @@ csa,sky90,128,9894,0.080832,2885.120056,603.047,0.28986355199999997 add,sky90,16,3604,0.277242,442.960009,136.766,0.078736728 add,tsmc28,32,14791,0.079295,378.630002,1900.0,0.11220242500000001 decoder,sky90,8,25279,0.038956,48.020001,35.206,0.0031047931999999994 +comparator,sky90,32,4896,0.204247,795.760012,528.548,0.114174073 add,tsmc28,64,7732,0.129331,331.128002,1450.0,0.102042159 flopr,sky90,64,10714,0.17183,2815.540026,1390.0,5.43756035 +add,sky90,32,2631,0.379925,977.060019,169.107,0.140952175 priorityonehot,sky90,16,10000,0.099923,281.260004,117.94,0.02398152 +priorityencoder,sky90,32,8950,0.111067,293.020006,53.82,0.0150717919 mult,sky90,8,10,2.076433,1009.399998,211.637,0.005689426420000001 decoder,sky90,128,17868,0.101057,1072.12001,985.334,0.202922456 csa,sky90,32,15652,0.062613,815.360016,471.256,0.19247236200000004 @@ -861,16 +897,18 @@ decoder,sky90,32,20000,0.060737,1096.620017,1730.0,0.188649122 decoder,sky90,8,20223,0.04935,40.180001,11.498,0.0020480249999999998 mult,sky90,128,517,1.934229,243417.302347,56700.0,150.428857788 comparator,sky90,128,3846,0.273602,4038.58005,3610.0,0.65117276 -comparator,sky90,32,5263,0.195832,1060.360011,1060.0,0.168611352 flopenr,sky90,64,4971,0.187689,3756.339987,1020.0,2.031358047 shiftleft,sky90,64,4087,0.244635,4460.960079,2810.0,0.74124405 priorityonehot,sky90,16,11333,0.088202,338.100002,367.782,0.034486982 +comparator,sky90,32,5508,0.200847,1061.340014,1040.0,0.23619607199999998 priorityonehot,sky90,32,6533,0.153004,593.88001,232.761,0.049573296 +shiftleft,sky90,32,7056,0.161457,3370.220042,3560.0,0.710087886 +add,sky90,64,10,8.474034,927.079988,230.083,0.023015476344 comparator,tsmc28,32,7500,0.133257,80.261999,259.856,0.0200551785 comparator,sky90,16,4000,0.249312,280.280005,55.248,0.027324595200000003 mult,sky90,64,1000,1.350119,103523.281624,73000.0,141.54647596 csa,sky90,128,17568,0.060643,4264.960083,3260.0,1.445971692 -comparator,sky90,32,10000,0.194087,1451.380013,1850.0,0.596041177 +comparator,sky90,16,10,0.576329,252.840005,31.402,0.0001368781375 alu,sky90,16,3248,0.307875,3183.040048,2030.0,0.804477375 add,tsmc28,8,3000,0.238199,15.75,58.809,0.00370637644 decoder,sky90,32,19000,0.059976,951.580016,1480.0,0.141903216 @@ -888,13 +926,12 @@ add,tsmc28,128,7650,0.130714,800.856007,3420.0,0.242343756 decoder,sky90,32,15332,0.06516,314.580003,249.747,0.033036119999999995 alu,sky90,128,2139,0.516409,28213.2205,14200.0,7.374836929000001 mult,sky90,64,857,1.336163,107976.401664,79500.0,121.17127781800001 +flop,sky90,128,10,0.070789,4264.959961,2070.0,0.004293777584000001 +csa,sky90,64,10,0.209074,940.799988,233.753,0.00043278318 alu,sky90,16,7500,0.289423,4254.180065,3860.0,2.3205936140000003 floprasync,sky90,32,15117,0.071444,1446.480028,643.984,1.662430436 -comparator,sky90,32,3158,0.304333,684.040013,135.532,0.0640925298 -priorityencoder,sky90,16,9004,0.104403,159.740003,39.177,0.0093023073 mult,sky90,128,551,1.814879,274624.423573,87300.0,215.616699595 mult,sky90,8,1927,0.574177,3273.200051,3430.0,1.827605391 -shiftleft,sky90,32,10000,0.15971,3675.98006,4090.0,1.0643074399999999 alu,sky90,8,10000,0.235219,2419.620038,2600.0,1.5159864550000002 comparator,sky90,64,6000,0.221138,2341.220025,2590.0,0.45222721 priorityonehot,sky90,16,6667,0.147215,152.880003,35.496,0.007802395000000001 @@ -907,10 +944,10 @@ add,tsmc28,16,4832,0.194121,47.124,234.075,0.012462568200000001 floprasync,sky90,8,13437,0.071444,362.600007,161.167,0.366793496 add,tsmc28,8,14489,0.068305,26.46,130.305,0.0076569904999999995 alu,sky90,16,4837,0.301919,3701.460057,3010.0,1.3909408329999997 +add,tsmc28,64,11440,0.096855,786.240003,3940.0,0.21724576500000004 flop,sky90,128,13844,0.070789,4264.959961,2070.0,5.948541248 decoder,sky90,16,30044,0.039572,495.880012,908.129,0.06470022 alu,sky90,64,2190,0.463611,11599.280214,4980.0,2.276793621 -shiftleft,sky90,16,7846,0.127358,935.900016,874.844,0.153211674 decoder,sky90,8,13313,0.05554,38.220001,2.007,0.0010152712 csa,sky90,8,16610,0.060643,266.560005,222.649,0.0865254324 flopenr,sky90,16,5390,0.187272,1099.560027,590.987,1.1700380015999998 @@ -919,6 +956,7 @@ add,tsmc28,32,15000,0.078769,420.714004,2150.0,0.119807649 flopr,sky90,64,10947,0.17183,2816.520026,1390.0,5.555951220000001 decoder,sky90,8,32580,0.030694,148.960001,268.119,0.016052962 mult,sky90,32,963,1.089271,32490.92054,27000.0,29.452798569000006 +add,tsmc28,8,21630,0.04887,86.814001,481.026,0.019743479999999997 add,tsmc28,8,25000,0.051315,80.892,454.024,0.023040435 flopr,sky90,16,11413,0.133182,746.760008,381.108,1.290799944 flopenr,sky90,32,5986,0.190611,2119.739996,800.403,1.90611 @@ -930,25 +968,26 @@ csa,sky90,32,10000,0.080832,721.280014,150.762,0.073233792 comparator,sky90,128,3692,0.270828,3380.020055,2000.0,0.505365048 mult,sky90,8,2182,0.550085,4360.02008,5200.0,2.4209240850000002 csa,sky90,64,16610,0.060643,2132.480042,1660.0,0.6849626850000001 +priorityencoder,sky90,32,9308,0.111067,293.020006,53.82,0.0156715537 flopenr,sky90,64,4916,0.20176,3790.640003,1130.0,2.4461382400000002 floprasync,sky90,16,8398,0.071444,723.240014,321.992,0.46074235599999996 flopenr,sky90,16,7398,0.189228,1106.42003,616.649,1.6656794700000002 -comparator,sky90,32,4000,0.24995,608.580012,130.613,0.0684863 +add,tsmc28,64,10780,0.098083,652.932003,3240.0,0.17939380700000002 mult,sky90,8,1091,0.915221,1167.180013,211.892,0.30293815099999993 +comparator,sky90,32,5304,0.195831,1117.200012,1210.0,0.179577027 add,sky90,64,3636,0.330032,3266.340054,1220.0,0.79537712 flop,sky90,128,14692,0.070789,4264.959961,2070.0,6.313033809 add,tsmc28,8,15000,0.06579,28.728,137.18,0.008302698 -add,sky90,32,4080,0.256294,1991.360031,1240.0,0.408532636 -shiftleft,sky90,16,10000,0.128994,1192.660017,1420.0,0.242379726 mult,sky90,64,10,14.7933,46798.920227,5460.0,2.7101325599999995 +shiftleft,sky90,16,10,0.596128,350.840007,23.053,0.00019374159999999997 +comparator,sky90,32,5202,0.196011,989.800013,915.96,0.17052957000000002 floprasync,sky90,16,14557,0.071444,723.240014,321.992,0.798601032 flop,sky90,8,16104,0.070789,266.559998,129.629,0.43264113130000004 -comparator,sky90,32,5368,0.199678,1110.340013,1120.0,0.206067696 mult,sky90,128,10,29.334627,180734.540854,18000.0,22.264981893 flop,sky90,64,15539,0.070789,2132.47998,1040.0,3.3386216070000003 add,tsmc28,8,12074,0.081502,23.31,115.92,0.0062838042000000005 +priorityencoder,sky90,32,9666,0.111067,293.020006,53.82,0.016271315499999998 flopr,sky90,128,12811,0.174211,5123.439977,1890.0,10.893762252 -add,sky90,32,5000,0.2505,1933.540033,1030.0,0.4726935 mult,sky90,64,714,1.400528,87215.101373,43900.0,85.31176259200001 alu,sky90,64,2496,0.442869,12618.480223,6700.0,2.9570363129999997 priorityencoder,sky90,8,10323,0.104625,85.260002,26.481,0.0076690125 @@ -956,6 +995,7 @@ flopr,sky90,8,20000,0.085865,597.800001,677.746,0.746080985 mult,sky90,128,539,1.855281,259737.242949,71800.0,184.947397047 csa,sky90,16,16291,0.060643,533.12001,412.352,0.16773853800000002 flopenr,sky90,64,5109,0.194025,4256.140049,1930.0,4.038630375 +comparator,sky90,32,5712,0.203736,1218.140014,1420.0,0.25772604 flopenr,sky90,32,8059,0.19172,2358.860018,1330.0,3.6465144 csa,sky90,64,15000,0.062613,1630.720032,893.318,0.363844143 mult,sky90,16,6000,0.831308,8594.600132,7150.0,29.671045136 @@ -963,6 +1003,7 @@ priorityonehot,sky90,16,8889,0.11233,198.940003,56.451,0.013827823 alu,sky90,8,4336,0.230485,2084.460033,1910.0,0.5681455249999999 priorityonehot,sky90,8,16000,0.061645,82.320002,24.568,0.006065868 alu,sky90,32,2868,0.38931,5940.760105,3200.0,1.55490414 +priorityencoder,sky90,16,8832,0.104403,159.740003,39.177,0.0091352625 comparator,sky90,64,4818,0.214579,2591.120026,2620.0,0.545245239 alu,sky90,128,1167,0.85624,18358.340355,2460.0,2.59611968 alu,sky90,64,2628,0.45202,12977.160225,6830.0,3.3114985200000002 @@ -975,9 +1016,7 @@ mult,sky90,128,787,1.735561,317542.544465,166000.0,461.80154200199996 comparator,tsmc28,16,7500,0.12946,29.736,99.737,0.006913163999999999 alu,sky90,64,2365,0.452964,12152.980222,6200.0,2.5982015040000004 priorityonehot,sky90,16,11111,0.089821,300.860005,305.978,0.029281646 -comparator,sky90,32,5474,0.192304,1188.740012,1430.0,0.20691910400000002 flopenr,sky90,32,4803,0.217601,2179.52003,1080.0,2.520907585 -add,sky90,32,4240,0.268332,1829.660028,1090.0,0.373518144 csa,sky90,32,17568,0.060643,1066.240021,827.644,0.36203871 comparator,sky90,128,4000,0.268954,4027.800041,3660.0,0.679377804 decoder,sky90,8,31928,0.031295,106.82,190.81,0.010796775 @@ -988,14 +1027,12 @@ add,tsmc28,32,8620,0.115079,146.538001,644.995,0.045571284000000004 flop,sky90,16,16104,0.070789,533.119995,259.258,0.8651406846 add,tsmc28,64,5043,0.178584,231.210001,1080.0,0.06107572799999999 priorityonehot,sky90,16,15556,0.088601,610.540002,811.656,0.097726903 -add,sky90,32,5600,0.254525,1871.800028,877.446,0.50039615 shiftleft,sky90,128,3871,0.303026,12747.840208,11600.0,3.235408602 flop,sky90,8,12996,0.070789,266.559998,129.629,0.34911011129999997 shiftleft,sky90,64,4435,0.24668,5129.320094,4030.0,1.0940258000000003 priorityonehot,sky90,64,7500,0.224494,1243.620017,948.965,0.182513622 shiftleft,sky90,64,4000,0.249988,4733.400082,3490.0,0.8394597039999999 comparator,sky90,16,10000,0.146177,1065.260009,1610.0,0.28387573400000005 -shiftleft,sky90,16,7692,0.130257,1033.900012,1060.0,0.17128795500000002 priorityonehot,sky90,32,6133,0.162922,442.960006,148.282,0.030596751600000006 priorityonehot,sky90,128,3704,0.276108,2448.040034,1370.0,0.18526846800000002 csa,sky90,128,15652,0.062613,3261.440063,1790.0,0.759683529 @@ -1008,7 +1045,6 @@ flop,sky90,8,15539,0.070789,266.559998,129.629,0.41745689080000004 decoder,sky90,16,25000,0.039941,245.0,388.765,0.027399525999999997 mult,sky90,8,2545,0.564127,4034.66007,4580.0,2.9069464310000006 flopenr,sky90,16,7147,0.189228,1106.42003,616.649,1.6091002979999998 -shiftleft,sky90,32,5750,0.173824,2582.30004,2290.0,0.43681971199999997 flopr,sky90,128,16305,0.085865,5959.380113,3930.0,9.01943133 flopr,sky90,8,16305,0.085865,373.380007,241.917,0.562673345 mult,sky90,16,1293,0.813903,9702.000166,8740.0,6.423322476 @@ -1018,6 +1054,7 @@ flop,sky90,64,15000,0.070789,2132.47998,1040.0,3.222810803 add,sky90,64,3152,0.328164,3804.360061,1890.0,0.7199918160000001 add,sky90,16,3478,0.287131,443.940009,126.253,0.074941191 flopenr,sky90,64,5495,0.222369,4167.940028,1850.0,4.343756046 +add,tsmc28,64,11000,0.098535,679.392002,3420.0,0.183570705 flopr,sky90,8,9317,0.101851,389.060005,211.043,0.4071595576 csa,sky90,128,14693,0.067531,2634.240051,1080.0,0.48561542099999994 add,tsmc28,128,4500,0.205985,498.204002,2290.0,0.13512616 @@ -1025,10 +1062,11 @@ csa,sky90,8,9583,0.080832,180.320004,58.216,0.022754207999999998 shiftleft,sky90,64,4696,0.2291,6340.600105,5970.0,1.429584 add,tsmc28,64,1000,0.998735,187.110001,917.222,0.053831816500000004 mult,sky90,64,671,1.490298,74604.461058,28900.0,66.73703473799999 -shiftleft,sky90,16,6154,0.162492,802.620013,641.83,0.118131684 +flop,sky90,8,10,0.070789,266.559998,129.629,0.00026898191853 add,sky90,16,4775,0.224325,926.100016,875.917,0.185068125 mult,sky90,128,596,1.71139,312992.404301,144000.0,294.25126243 flopenr,sky90,16,6342,0.173049,1137.780011,533.637,1.114089462 +add,sky90,32,10,4.160501,456.679995,112.161,0.005429453805000001 flop,sky90,32,10000,0.070789,1066.23999,518.516,1.0743575741 flopenr,sky90,16,3171,0.203444,841.819993,171.726,0.3346043468 add,tsmc28,128,7350,0.136053,766.962005,3210.0,0.234963531 @@ -1036,15 +1074,18 @@ decoder,sky90,8,31276,0.031874,81.340001,118.105,0.0081629314 flopenr,sky90,64,5205,0.223461,4116.000022,1710.0,4.376260224 add,sky90,64,6000,0.328457,3749.480066,1770.0,1.403496761 comparator,sky90,16,6267,0.168782,502.740008,498.843,0.08050901399999999 +add,tsmc28,64,8800,0.113635,395.640004,1780.0,0.11738495499999999 decoder,sky90,8,18000,0.055416,37.240001,6.065,0.0016181472 priorityonehot,sky90,32,5000,0.199515,362.600007,102.444,0.019392858000000002 mult,sky90,32,1296,1.097292,30544.640517,23700.0,38.819996376 flop,sky90,128,20000,0.070789,4264.959961,2070.0,8.593784600000001 +shiftleft,sky90,32,6804,0.158954,3534.86005,4050.0,0.68906559 alu,sky90,16,3593,0.302131,3612.280059,2790.0,0.9477849469999999 alu,sky90,32,1564,0.638329,3728.900073,535.987,0.541941321 flop,sky90,16,15539,0.070789,533.119995,259.258,0.8347651247000001 flop,sky90,64,12996,0.070789,2132.47998,1040.0,2.7922721050000003 flopenr,sky90,32,6217,0.174192,2356.900034,1490.0,2.579260944 +comparator,sky90,128,10,0.842074,1997.240039,243.506,0.001300162256 add,sky90,128,2410,0.414767,5600.700103,1570.0,0.893408118 decoder,sky90,8,28818,0.034594,64.680001,76.04,0.0056768754 comparator,sky90,64,2727,0.333026,1392.580027,202.012,0.12122146399999999 @@ -1056,6 +1097,7 @@ csa,sky90,64,16291,0.060643,2132.480042,1630.0,0.669620006 csa,sky90,8,10000,0.080832,180.320004,37.69,0.018348864 priorityonehot,sky90,8,20800,0.054084,154.840002,56.302,0.017934254400000002 csa,sky90,16,7500,0.10878,250.880005,46.518,0.030991421999999998 +shiftleft,sky90,16,6480,0.15429,848.680007,840.985,0.15645006 shiftleft,sky90,128,3097,0.322855,8849.400141,5950.0,1.7169428899999999 add,tsmc28,32,10775,0.092794,189.630002,873.487,0.059944924000000004 flop,sky90,32,20000,0.070789,1066.23999,518.516,2.1487222271 @@ -1069,14 +1111,18 @@ csa,sky90,16,16610,0.060643,533.12001,441.468,0.172589978 flop,sky90,128,10000,0.070789,4264.959961,2070.0,4.2968923 csa,sky90,128,15501,0.062613,3261.440063,1790.0,0.7523578080000001 add,tsmc28,16,8054,0.109936,52.164,250.533,0.0132912624 +csa,sky90,32,10,0.209074,470.399994,115.832,0.00021534622000000004 add,sky90,8,6491,0.157933,443.940009,467.006,0.07580783999999999 priorityonehot,sky90,64,5143,0.220683,1064.280016,459.708,0.088714566 flopenr,sky90,64,4531,0.216814,3957.240066,1770.0,3.501112472 csa,sky90,16,15000,0.062613,407.680008,235.173,0.09227277810000001 +priorityencoder,sky90,16,9792,0.104403,159.740003,39.177,0.0101166507 +shiftleft,sky90,8,10,0.368289,130.340003,10.712,4.552052040000001e-05 decoder,sky90,8,25000,0.039559,46.060001,27.261,0.0028205567 comparator,sky90,16,6933,0.168782,607.600006,799.51,0.094180356 add,sky90,64,3212,0.336436,3593.660062,1720.0,0.6964225200000002 add,tsmc28,128,9408,0.117481,1300.95001,6200.0,0.38028599700000004 +comparator,sky90,32,4488,0.222664,705.600013,279.131,0.088174944 decoder,sky90,128,13784,0.080668,1300.460014,1370.0,0.18795644 flop,sky90,16,15257,0.070789,533.119995,259.258,0.8196445943 csa,sky90,16,15013,0.062613,407.680008,235.173,0.0923416524 @@ -1084,15 +1130,14 @@ floprasync,sky90,128,13437,0.071444,5785.920113,2580.0,5.9146344279999985 shiftleft,sky90,128,3032,0.329767,9579.500162,6250.0,1.8898946769999998 flopr,sky90,32,12578,0.101547,1445.500023,882.979,2.039469948 alu,sky90,128,2217,0.514448,27540.940502,14000.0,7.25886128 -add,sky90,32,3760,0.278449,1689.520028,834.387,0.323279289 csa,sky90,128,16610,0.060643,4264.960083,3260.0,1.3673783640000001 floprasync,sky90,64,14557,0.071444,2892.960056,1290.0,3.203048852 flopenr,sky90,16,4228,0.180729,842.799992,176.142,0.3973688523 -priorityencoder,sky90,32,9364,0.111067,293.020006,53.82,0.0157604073 priorityonehot,sky90,32,7200,0.143094,1101.520018,1470.0,0.16956639 comparator,sky90,8,8545,0.116724,205.800003,165.947,0.041670467999999995 mult,sky90,128,528,1.893939,255011.682875,66500.0,175.06625146500002 shiftleft,sky90,64,10000,0.23373,6486.620108,6060.0,3.09762369 +add,sky90,32,2857,0.349019,998.620019,202.848,0.164736968 flop,sky90,16,13561,0.070789,533.119995,259.258,0.7285179146000001 priorityonehot,sky90,16,5000,0.196212,130.340003,29.8,0.005788254 mult,sky90,8,5000,0.552339,4261.040075,5050.0,5.394142674 @@ -1100,12 +1145,15 @@ flop,sky90,32,11301,0.070789,1066.23999,518.516,1.214102139 mult,sky90,64,5000,1.404875,94040.801492,61600.0,723.34484975 comparator,sky90,16,8000,0.158838,801.640006,1190.0,0.15169029 flopenr,sky90,8,9518,0.148606,636.020015,366.016,0.9204655639999999 +add,tsmc28,64,9900,0.101353,573.048003,2800.0,0.155374149 alu,sky90,16,3455,0.289435,3445.680058,2290.0,0.80289269 add,tsmc28,8,21130,0.050365,90.846,513.587,0.020700015 -add,sky90,32,4000,0.280842,1730.680031,849.828,0.358635234 priorityonehot,sky90,16,10222,0.097791,313.600004,134.808,0.026892525000000004 +shiftleft,sky90,32,5544,0.180365,2727.340043,2630.0,0.51043295 +shiftleft,sky90,16,8748,0.118907,1426.880021,1760.0,0.233414441 flopr,sky90,128,12112,0.177282,5399.800033,2390.0,11.989758942 flopr,sky90,8,12112,0.11919,400.820003,214.285,0.675533163 +priorityencoder,sky90,16,10368,0.104403,159.740003,39.177,0.0107117478 decoder,sky90,16,18000,0.052159,98.980002,39.029,0.0068328289999999995 decoder,sky90,8,11912,0.067612,37.240001,2.814,0.0011845622400000002 shiftleft,sky90,64,4522,0.23827,5915.280105,5100.0,1.2318559 @@ -1116,6 +1164,7 @@ comparator,sky90,8,7273,0.13643,147.980003,61.898,0.021364937999999997 mult,sky90,16,1195,0.836814,7685.16012,5330.0,5.5187883300000005 csa,sky90,32,7500,0.10878,501.76001,93.035,0.061863186 add,tsmc28,64,7952,0.12526,319.536001,1390.0,0.09645020000000001 +add,tsmc28,64,6600,0.151266,296.100001,1280.0,0.090305802 csa,sky90,8,17568,0.060643,266.560005,213.306,0.09095237140000001 flop,sky90,8,15257,0.070789,266.559998,129.629,0.40986123110000006 alu,sky90,8,3401,0.29399,1119.160018,535.517,0.22813624 @@ -1123,6 +1172,7 @@ shifter,sky90,16,10,1.237745,681.100013,52.029,0.001189472945 add,sky90,16,5406,0.22338,993.720015,916.992,0.24750504 add,sky90,128,2564,0.436395,6456.240111,2270.0,1.102770165 decoder,sky90,32,25000,0.058416,905.52001,1340.0,0.177117312 +shiftleft,sky90,32,6678,0.161895,3698.520061,4310.0,0.79911372 add,sky90,16,5135,0.222202,789.880013,544.462,0.15798562200000002 shiftleft,sky90,128,10000,0.313996,12023.620188,9230.0,7.595249244 decoder,sky90,8,37141,0.030694,188.160004,322.82,0.0214858 @@ -1133,33 +1183,33 @@ priorityonehot,sky90,64,4571,0.220784,1016.260015,474.392,0.07948224 mult,sky90,64,743,1.345895,95943.961579,56200.0,96.32301336 csa,sky90,16,14693,0.067531,329.280006,134.949,0.0607643938 flopr,sky90,32,10714,0.107015,1436.680023,864.0,1.8148673849999999 -comparator,sky90,32,4211,0.237004,654.640013,145.103,0.072997232 comparator,tsmc28,64,7500,0.13289,163.547999,522.847,0.04039856 alu,sky90,8,4847,0.227576,2652.860044,3100.0,0.695017104 -shiftleft,sky90,32,8750,0.164673,3752.420048,4460.0,1.0595060820000002 priorityonehot,sky90,128,3630,0.27774,2218.720036,971.079,0.14692446 flopr,sky90,128,11646,0.177282,5376.280021,2320.0,11.488760009999998 alu,sky90,8,2551,0.390589,784.980015,188.056,0.12030141200000001 flopr,sky90,8,11646,0.11919,400.820003,214.285,0.649525905 +flop,sky90,32,10,0.070789,1066.23999,518.516,0.0010716675921 flopenr,sky90,32,4707,0.208408,2050.160023,905.261,1.8560816479999998 mult,sky90,8,1818,0.581954,2672.460046,2200.0,1.6195779819999998 +add,tsmc28,16,13580,0.073608,110.88,614.344,0.031357008000000006 csa,sky90,64,14693,0.067531,1317.120026,539.796,0.24263888299999997 flopr,sky90,128,12345,0.177282,5403.720033,2390.0,12.250540763999998 +shiftleft,sky90,16,9720,0.110329,1628.760017,2190.0,0.296895339 flopr,sky90,8,12345,0.11919,400.820003,214.285,0.688608306 mult,sky90,8,1673,0.611485,2094.260033,1390.0,1.0994500299999999 add,tsmc28,16,35000,0.067289,190.764001,1060.0,0.107124088 csa,sky90,16,15652,0.062613,407.680008,235.173,0.09629879400000001 alu,sky90,64,2146,0.465831,11271.960215,4250.0,2.0906495279999997 -priorityencoder,sky90,32,7203,0.111067,293.020006,53.82,0.0121285164 shiftleft,sky90,8,5000,0.198975,154.840003,31.052,0.0137491725 flopenr,sky90,8,6663,0.152384,635.040013,414.486,0.664927584 -comparator,sky90,16,10,0.576329,252.840005,31.402,0.0001368781375 add,tsmc28,64,6902,0.144657,298.242001,1280.0,0.09069993900000001 priorityonehot,sky90,8,22000,0.054084,157.780003,56.585,0.019383705600000002 mult,sky90,128,607,1.707473,305974.624156,138000.0,291.971053108 mult,sky90,32,907,1.102529,29124.620481,20800.0,25.612851199000005 floprasync,sky90,32,19596,0.071444,1446.480028,643.984,2.155036816 flopenr,sky90,128,10000,0.229286,8959.160147,4960.0,19.608768006 +priorityencoder,sky90,32,10,0.111067,293.020006,53.82,1.6882184000000002e-05 add,tsmc28,16,1000,0.459597,32.886,116.238,0.005354305049999999 decoder,sky90,64,18920,0.069176,905.520014,1070.0,0.15114956000000002 comparator,sky90,64,4909,0.213022,2891.980026,3400.0,0.6002959959999999 @@ -1169,13 +1219,10 @@ add,sky90,128,2051,0.486762,4951.940095,885.884,0.68390061 mult,sky90,32,6000,1.084816,33519.920555,29100.0,195.28315224 mult,sky90,64,571,1.751186,58587.340388,11000.0,27.973445163999997 flop,sky90,16,15000,0.070789,533.119995,259.258,0.8058053448 -priorityencoder,sky90,16,7663,0.104403,159.740003,39.177,0.0079241877 flop,sky90,32,8476,0.070789,1066.23999,518.516,0.910629696 -add,sky90,64,10,8.474034,927.079988,230.083,0.023015476344 decoder,sky90,16,24035,0.041561,176.400002,223.236,0.019034938 alu,sky90,64,1752,0.570589,8920.940172,1800.0,1.428184267 priorityonehot,sky90,8,25000,0.054084,158.760003,59.967,0.022390775999999998 -priorityencoder,sky90,32,9904,0.111067,293.020006,53.82,0.016671156700000002 priorityencoder,sky90,8,10000,0.104625,85.260002,26.481,0.007428375 decoder,sky90,64,10511,0.094204,302.820005,116.69,0.024681448 add,sky90,16,5000,0.228259,924.140017,641.631,0.18762889800000002 @@ -1196,9 +1243,9 @@ decoder,sky90,8,24268,0.040971,42.140001,19.257,0.0023886093000000004 alu,sky90,128,2722,0.513268,27566.420501,13200.0,8.80767888 priorityencoder,sky90,128,7032,0.113763,1058.400021,117.974,0.037996841999999996 flop,sky90,8,15000,0.070789,266.559998,129.629,0.4029734614 +priorityencoder,sky90,32,8413,0.111067,293.020006,53.82,0.0141610425 flopenr,sky90,128,20000,0.210945,8479.94003,3830.0,34.87173984 priorityonehot,sky90,16,11556,0.090809,382.200008,391.295,0.039774342 -shiftleft,sky90,32,5000,0.199946,2419.620024,2110.0,0.5004648380000001 priorityencoder,sky90,64,7500,0.112447,546.840011,77.149,0.0218596968 priorityonehot,sky90,8,15000,0.065937,73.500001,15.316,0.004219968 csa,sky90,64,15013,0.062613,1630.720032,893.318,0.364157208