mirror of
https://github.com/openhwgroup/cvw.git
synced 2025-04-24 13:57:07 -04:00
commit
8235f66af8
13 changed files with 123 additions and 113 deletions
|
@ -98,7 +98,8 @@ localparam LOGR = $clog2(RADIX); // r = log(R
|
|||
localparam RK = LOGR*DIVCOPIES; // r*k bits per cycle generated
|
||||
|
||||
// intermediate division parameters not directly used in fdivsqrt hardware
|
||||
localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit because square root could be shifted right *** explain better
|
||||
localparam FPDIVMINb = NF + 3; // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit to allow sqrt being shifted right
|
||||
//localparam FPDIVMINb = NF + 2 + (RADIX == 2); // minimum length of fractional part: Nf result bits + guard and round bits + 1 extra bit for preshifting radix2 square root right, if radix4 doesn't use a right shift. This version saves one cycle on double-precision with R=4,k=4. However, it doesn't work yet because C is too short, so k is incorrectly calculated as a 1 in the lsb after the last step.
|
||||
localparam DIVMINb = ((FPDIVMINb<XLEN) & IDIV_ON_FPU) ? XLEN : FPDIVMINb; // minimum fractional bits b = max(XLEN, FPDIVMINb)
|
||||
localparam RESBITS = DIVMINb + LOGR; // number of bits in a result: r integer + b fractional
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
|||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic SqrtE,
|
||||
input logic IntDivE,
|
||||
input logic [P.DIVBLEN-1:0] IntResultBitsE,
|
||||
input logic [P.DIVBLEN-1:0] IntResultBitsE,
|
||||
output logic [P.DURLEN-1:0] CyclesE
|
||||
);
|
||||
|
||||
|
@ -66,12 +66,12 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) (
|
|||
// P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk.
|
||||
// Integer division needs p fractional + r integer result bits
|
||||
// FP Division needs at least Nf fractional bits + 2 guard/round bits and one integer digit (LOG R integer bits) = Nf + 2 + r bits
|
||||
// FP Sqrt needs at least Nf fractional bits, 2 guard/round bits, and *** shift bits
|
||||
// FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle.
|
||||
// The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk)
|
||||
|
||||
always_comb begin
|
||||
if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard; integer bit implicit
|
||||
else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs
|
||||
if (SqrtE) FPResultBitsE = Nf + 2 + 0; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1
|
||||
else FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard + integer bits
|
||||
|
||||
if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE;
|
||||
else ResultBitsE = FPResultBitsE;
|
||||
|
|
|
@ -28,17 +28,19 @@
|
|||
|
||||
module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [P.FMTBITS-1:0] Fmt,
|
||||
input logic [P.NE-1:0] Xe, Ye,
|
||||
input logic [P.NE-1:0] Xe, Ye, // input exponents
|
||||
input logic Sqrt,
|
||||
input logic XZero,
|
||||
input logic [P.DIVBLEN-1:0] ell, m,
|
||||
output logic [P.NE+1:0] Ue
|
||||
input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye
|
||||
output logic [P.NE+1:0] Ue // result exponent
|
||||
);
|
||||
|
||||
logic [P.NE-2:0] Bias;
|
||||
logic [P.NE+1:0] SXExp;
|
||||
logic [P.NE+1:0] SExp;
|
||||
logic [P.NE+1:0] DExp;
|
||||
|
||||
// Determine exponent bias according to the format
|
||||
|
||||
if (P.FPSIZES == 1) begin
|
||||
assign Bias = (P.NE-1)'(P.BIAS);
|
||||
|
|
|
@ -28,12 +28,12 @@
|
|||
|
||||
module fdivsqrtfgen2 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic up, uz,
|
||||
input logic [P.DIVb+3:0] C, U, UM,
|
||||
output logic [P.DIVb+3:0] F
|
||||
input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms)
|
||||
output logic [P.DIVb+3:0] F // Q4.DIVb
|
||||
);
|
||||
logic [P.DIVb+3:0] FP, FN, FZ;
|
||||
logic [P.DIVb+3:0] FP, FN, FZ; // Q4.DIVb
|
||||
|
||||
// Generate for both positive and negative bits
|
||||
// Generate for both positive and negative quotient digits
|
||||
assign FP = ~(U << 1) & C;
|
||||
assign FN = (UM << 1) | (C & ~(C << 2));
|
||||
assign FZ = '0;
|
||||
|
|
|
@ -27,14 +27,14 @@
|
|||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fdivsqrtfgen4 import cvw::*; #(parameter cvw_t P) (
|
||||
input logic [3:0] udigit,
|
||||
input logic [P.DIVb+3:0] C, U, UM,
|
||||
output logic [P.DIVb+3:0] F
|
||||
input logic [3:0] udigit, // {2, 1, -1, -2}; all cold for zero
|
||||
input logic [P.DIVb+3:0] C, U, UM, // Q4.DIVb (extended from shorter forms)
|
||||
output logic [P.DIVb+3:0] F // Q4.DIVb
|
||||
);
|
||||
logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2;
|
||||
logic [P.DIVb+3:0] F2, F1, F0, FN1, FN2; // Q4.DIVb
|
||||
|
||||
// Generate for both positive and negative bits
|
||||
assign F2 = (~U << 2) & (C << 2);
|
||||
// Generate for both positive and negative digits
|
||||
assign F2 = (~U << 2) & (C << 2); //
|
||||
assign F1 = ~(U << 1) & C;
|
||||
assign F0 = '0;
|
||||
assign FN1 = (UM << 1) | (C & ~(C << 3));
|
||||
|
|
|
@ -57,7 +57,7 @@ module fdivsqrtfsm import cvw::*; #(parameter cvw_t P) (
|
|||
// terminate immediately on special cases
|
||||
assign FSpecialCaseE = XZeroE | XInfE | XNaNE | (XsE&SqrtE) | (YZeroE | YInfE | YNaNE)&~SqrtE;
|
||||
if (P.IDIV_ON_FPU) assign SpecialCaseE = IntDivE ? ISpecialCaseE : FSpecialCaseE;
|
||||
else assign SpecialCaseE = FSpecialCaseE;
|
||||
else assign SpecialCaseE = FSpecialCaseE;
|
||||
flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
|
|
|
@ -104,14 +104,14 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
|
|||
for(i=0; $unsigned(i)<P.DIVCOPIES; i++) begin : iterations
|
||||
if (P.RADIX == 2) begin: stage
|
||||
fdivsqrtstage2 #(P) fdivsqrtstage(.D, .DBar, .SqrtE,
|
||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||
end else begin: stage
|
||||
logic j1;
|
||||
assign j1 = (i == 0 & ~C[0][P.DIVb-1]);
|
||||
fdivsqrtstage4 #(P) fdivsqrtstage(.D, .DBar, .D2, .DBar2, .SqrtE, .j1,
|
||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||
.WS(WS[i]), .WC(WC[i]), .WSNext(WSNext[i]), .WCNext(WCNext[i]),
|
||||
.C(C[i]), .U(U[i]), .UM(UM[i]), .CNext(C[i+1]), .UNext(UNext[i]), .UMNext(UMNext[i]), .un(un[i]));
|
||||
end
|
||||
assign WS[i+1] = WSNext[i];
|
||||
assign WC[i+1] = WCNext[i];
|
||||
|
|
|
@ -29,17 +29,18 @@
|
|||
module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
||||
input logic clk,
|
||||
input logic IFDivStartE,
|
||||
input logic [P.NF:0] Xm, Ym,
|
||||
input logic [P.NE-1:0] Xe, Ye,
|
||||
input logic [P.NF:0] Xm, Ym, // Floating-point significands
|
||||
input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents
|
||||
input logic [P.FMTBITS-1:0] FmtE,
|
||||
input logic SqrtE,
|
||||
input logic XZeroE,
|
||||
input logic [2:0] Funct3E,
|
||||
output logic [P.NE+1:0] UeM,
|
||||
output logic [P.DIVb+3:0] X, D,
|
||||
output logic [P.NE+1:0] UeM, // biased exponent of result
|
||||
output logic [P.DIVb+3:0] X, D, // Q4.DIVb
|
||||
// Int-specific
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B
|
||||
input logic [P.XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // U(XLEN.0) inputs from IEU
|
||||
input logic IntDivE, W64E,
|
||||
// Outputs
|
||||
output logic ISpecialCaseE,
|
||||
output logic [P.DURLEN-1:0] CyclesE,
|
||||
output logic [P.DIVBLEN-1:0] IntNormShiftM,
|
||||
|
@ -49,7 +50,6 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|||
);
|
||||
|
||||
logic [P.DIVb:0] Xnorm, Dnorm;
|
||||
logic [P.DIVb:0] PreSqrtX;
|
||||
logic [P.DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed
|
||||
logic [P.NE+1:0] UeE; // Result Exponent (FP only)
|
||||
logic [P.DIVb:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input
|
||||
|
@ -60,7 +60,8 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|||
logic SignedDivE; // signed division
|
||||
logic AsE, BsE; // Signs of integer inputs
|
||||
logic [P.XLEN-1:0] AE; // input A after W64 adjustment
|
||||
logic ALTBE;
|
||||
logic ALTBE;
|
||||
logic EvenExp;
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Integer Preprocessing
|
||||
|
@ -152,9 +153,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|||
// shift square root to be in range [1/4, 1)
|
||||
// Normalized numbers are shifted right by 1 if the exponent is odd
|
||||
// Subnormal numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd.
|
||||
// NOTE: there might be a discrepancy that X is never right shifted by 2. However
|
||||
// it comes out in the wash and gives the right answer. Investigate later if possible. ***
|
||||
//////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
assign DivX = {3'b000, Xnorm}; // Zero-extend numerator for division
|
||||
|
||||
|
@ -164,13 +163,39 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
|
|||
// Next X is shifted right by 1 or 2 bits to range [1/4, 1) and exponent will be adjusted accordingly to be even
|
||||
// Now (X-1) is negative. Formed by placing all 1s in all four integer bits (in Q4.b) form, keeping X in fraciton bits
|
||||
// Then multiply by R is left shift by r (1 or 2 for radix 2 or 4)
|
||||
// For Radix 2, this gives 3 leading 1s, followed by the fraction bits
|
||||
// For Radix 4, this gives 2 leading 1s, followed by the fraction bits (and a zero in the lsb)
|
||||
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, (Xe[0] ^ ell[0]), PreSqrtX);
|
||||
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX};
|
||||
else assign SqrtX = {2'b11, PreSqrtX, 1'b0};
|
||||
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
||||
|
||||
// This is optimized in hardware by first right shifting by 0 or 1 bit (instead of 1 or 2), then left shifting by (r-1), then subtracting 2 or 4
|
||||
// Subtracting 2 is equivalent to adding 1110. Subtracting 4 is equivalent to adding 1100. Prepend leading 1s to do a free subtraction.
|
||||
// This also means only one extra fractional bit is needed becaue we never shift right by more than 1.
|
||||
// Radix Exponent odd Exponent Even
|
||||
// 2 x-2 = 2(x/2 - 1) x/2 - 2 = 2(x/4 - 1)
|
||||
// 4 2(x)-4 = 4(x/2 - 1)) 2(x/2)-4 = 4(x/4 - 1)
|
||||
// Summary: PreSqrtX = r(x/2or4 - 1)
|
||||
|
||||
logic [P.DIVb:0] PreSqrtX;
|
||||
assign EvenExp = Xe[0] ^ ell[0]; // effective unbiased exponent after normalization is even
|
||||
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
||||
if (P.RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
|
||||
else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; // 2PreSqrtX - 4 = 4(PreSqrtX/2 - 1)
|
||||
|
||||
/*
|
||||
// Attempt to optimize radix 4 to use a left shift by 1 or zero initially, followed by no more left shift
|
||||
// This saves one bit in DIVb because there is no initial right shift.
|
||||
// However, C needs to be extended further, lest it create a k with a 1 in the lsb when C is all 1s.
|
||||
// That is an optimization for another day.
|
||||
if (P.RADIX == 2) begin
|
||||
logic [P.DIVb:0] PreSqrtX; // U1.DIVb
|
||||
mux2 #(P.DIVb+1) sqrtxmux(Xnorm, {1'b0, Xnorm[P.DIVb:1]}, EvenExp, PreSqrtX); // X if exponent odd, X/2 if exponent even
|
||||
assign SqrtX = {3'b111, PreSqrtX}; // PreSqrtX - 2 = 2(PreSqrtX/2 - 1)
|
||||
end else begin
|
||||
logic [P.DIVb+1:0] PreSqrtX; // U2.DIVb
|
||||
mux2 #(P.DIVb+2) sqrtxmux({Xnorm, 1'b0}, {1'b0, Xnorm}, EvenExp, PreSqrtX); // 2X if exponent odd, X if exponent even
|
||||
assign SqrtX = {2'b11, PreSqrtX}; // PreSqrtX - 4 = 4(PreSqrtX/4 - 1)
|
||||
end
|
||||
*/
|
||||
|
||||
// Initialize X for division or square root
|
||||
mux2 #(P.DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX);
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Selet integer or floating-point operands
|
||||
//////////////////////////////////////////////////////
|
||||
|
|
|
@ -33,8 +33,8 @@ module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
|
|||
input logic [P.DIVb:0] U, UM, // U1.DIVb
|
||||
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||
input logic [P.DIVb+1:0] C, // Q2.DIVb
|
||||
input logic SqrtE,
|
||||
output logic un,
|
||||
input logic SqrtE,
|
||||
output logic un,
|
||||
output logic [P.DIVb+1:0] CNext, // Q2.DIVb
|
||||
output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb
|
||||
output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb
|
||||
|
@ -42,20 +42,14 @@ module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
|
|||
/* verilator lint_on UNOPTFLAT */
|
||||
|
||||
logic [P.DIVb+3:0] Dsel; // Q4.DIVb
|
||||
logic up, uz;
|
||||
logic up, uz;
|
||||
logic [P.DIVb+3:0] F; // Q4.DIVb
|
||||
logic [P.DIVb+3:0] AddIn; // Q4.DIVb
|
||||
logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb
|
||||
|
||||
// Qmient Selection logic
|
||||
// Quotient Selection logic
|
||||
// Given partial remainder, select digit of +1, 0, or -1 (up, uz, un)
|
||||
// q encoding:
|
||||
// 1000 = +2
|
||||
// 0100 = +1
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
fdivsqrtqsel2 qsel2(WS[P.DIVb+3:P.DIVb], WC[P.DIVb+3:P.DIVb], up, uz, un);
|
||||
fdivsqrtuslc2 uslc2(.WS(WS[P.DIVb+3:P.DIVb]), .WC(WC[P.DIVb+3:P.DIVb]), .up, .uz, .un);
|
||||
|
||||
// Sqrt F generation. Extend C, U, UM to Q4.k
|
||||
fdivsqrtfgen2 #(P) fgen2(.up, .uz, .C({2'b11, CNext}), .U({3'b000, U}), .UM({3'b000, UM}), .F);
|
||||
|
@ -66,7 +60,7 @@ module fdivsqrtstage2 import cvw::*; #(parameter cvw_t P) (
|
|||
else if (uz) Dsel = '0;
|
||||
else Dsel = D; // un
|
||||
|
||||
// Partial Product Generation
|
||||
// Residual Update
|
||||
// WSA, WCA = WS + WC - qD
|
||||
mux2 #(P.DIVb+4) addinmux(Dsel, F, SqrtE, AddIn);
|
||||
csa #(P.DIVb+4) csa(WS, WC, AddIn, up&~SqrtE, WSA, WCA);
|
||||
|
|
|
@ -31,36 +31,29 @@ module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) (
|
|||
input logic [P.DIVb:0] U,UM, // U1.DIVb
|
||||
input logic [P.DIVb+3:0] WS, WC, // Q4.DIVb
|
||||
input logic [P.DIVb+1:0] C, // Q2.DIVb
|
||||
input logic SqrtE, j1,
|
||||
input logic SqrtE, j1,
|
||||
output logic [P.DIVb+1:0] CNext, // Q2.DIVb
|
||||
output logic un,
|
||||
output logic un,
|
||||
output logic [P.DIVb:0] UNext, UMNext, // U1.DIVb
|
||||
output logic [P.DIVb+3:0] WSNext, WCNext // Q4.DIVb
|
||||
);
|
||||
|
||||
logic [P.DIVb+3:0] Dsel; // Q4.DIVb
|
||||
logic [3:0] udigit;
|
||||
logic [3:0] udigit; // {+2, +1, -1, -2} or 0000 for 0
|
||||
logic [P.DIVb+3:0] F; // Q4.DIVb
|
||||
logic [P.DIVb+3:0] AddIn; // Q4.DIVb
|
||||
logic [4:0] Smsbs;
|
||||
logic [2:0] Dmsbs;
|
||||
logic [7:0] WCmsbs, WSmsbs;
|
||||
logic CarryIn;
|
||||
logic [4:0] Smsbs; // U1.4
|
||||
logic [2:0] Dmsbs; // U0.3 drop leading 1 from D
|
||||
logic [7:0] WCmsbs, WSmsbs; // U4.4
|
||||
logic CarryIn;
|
||||
logic [P.DIVb+3:0] WSA, WCA; // Q4.DIVb
|
||||
|
||||
// Digit Selection logic
|
||||
// u encoding:
|
||||
// 1000 = +2
|
||||
// 0100 = +1
|
||||
// 0000 = 0
|
||||
// 0010 = -1
|
||||
// 0001 = -2
|
||||
assign Smsbs = U[P.DIVb:P.DIVb-4]; // U1.4 most significant bits of square root
|
||||
assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; // U0.3 most significant fractional bits of divisor after leading 1
|
||||
assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual
|
||||
assign WSmsbs = WS[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual
|
||||
|
||||
fdivsqrtqsel4cmp qsel4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
|
||||
fdivsqrtuslc4cmp uslc4(.Dmsbs, .Smsbs, .WSmsbs, .WCmsbs, .SqrtE, .j1, .udigit);
|
||||
assign un = 1'b0; // unused for radix 4
|
||||
|
||||
// F generation logic
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
///////////////////////////////////////////
|
||||
// fdivsqrtqsel2.sv
|
||||
// fdivsqrtuslc2.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Radix 2 Quotient Digit Selection
|
||||
// Purpose: Radix 2 Unified Quotient/Square Root Digit Selection
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
|
@ -18,7 +18,7 @@
|
|||
// except in compliance with the License, or, at your option, the Apache License version 2.0. You
|
||||
// may obtain a copy of the License at
|
||||
//
|
||||
// https://solderpad.org/licenses/SHL-2.1/
|
||||
// httWS://solderpad.org/licenses/SHL-2.1/
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, any work distributed under the
|
||||
// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
|
@ -26,31 +26,26 @@
|
|||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fdivsqrtqsel2 (
|
||||
input logic [3:0] ps, pc,
|
||||
output logic up, uz, un
|
||||
module fdivsqrtuslc2 (
|
||||
input logic [3:0] WS, WC, // Q4.0 most significant bits of redundant residual
|
||||
output logic up, uz, un // {+1, 0, -1}
|
||||
);
|
||||
|
||||
logic [3:0] p, g;
|
||||
logic magnitude, sign;
|
||||
logic sign;
|
||||
|
||||
// Carry chain logic determines if W = WS + WC = -1, < -1, > -1 to choose 0, -1, 1 respectively
|
||||
|
||||
// The quotient selection logic is presented for simplicity, not
|
||||
// for efficiency. You can probably optimize your logic to
|
||||
// select the proper divisor with less delay.
|
||||
//if p2 * p1 * p0, W = -1 and choose digit of 0
|
||||
assign uz = ((WS[2]^WC[2]) & (WS[1]^WC[1]) &
|
||||
(WS[0]^WC[0]));
|
||||
|
||||
// Quotient equations from EE371 lecture notes 13-20
|
||||
assign p = ps ^ pc;
|
||||
assign g = ps & pc;
|
||||
|
||||
assign magnitude = ~((ps[2]^pc[2]) & (ps[1]^pc[1]) &
|
||||
(ps[0]^pc[0]));
|
||||
assign sign = (ps[3]^pc[3])^
|
||||
(ps[2] & pc[2] | ((ps[2]^pc[2]) &
|
||||
(ps[1]&pc[1] | ((ps[1]^pc[1]) &
|
||||
(ps[0]&pc[0])))));
|
||||
// Otherwise determine sign using carry chain: sign = p3 ^ g_2:0
|
||||
assign sign = (WS[3]^WC[3])^
|
||||
(WS[2] & WC[2] | ((WS[2]^WC[2]) &
|
||||
(WS[1]&WC[1] | ((WS[1]^WC[1]) &
|
||||
(WS[0]&WC[0])))));
|
||||
|
||||
// Produce digit = +1, 0, or -1
|
||||
assign up = magnitude & ~sign;
|
||||
assign uz = ~magnitude;
|
||||
assign un = magnitude & sign;
|
||||
assign up = ~uz & ~sign;
|
||||
assign un = ~uz & sign;
|
||||
endmodule
|
|
@ -1,10 +1,10 @@
|
|||
///////////////////////////////////////////
|
||||
// fdivsqrtqsel4.sv
|
||||
// fdivsqrtuslc4.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Radix 4 Quotient Digit Selection
|
||||
// Purpose: Table-based Radix 4 Unified Quotient/Square Root Digit Selection
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
|
@ -26,25 +26,25 @@
|
|||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fdivsqrtqsel4 (
|
||||
input logic [2:0] Dmsbs,
|
||||
input logic [4:0] Smsbs,
|
||||
input logic [7:0] WSmsbs, WCmsbs,
|
||||
module fdivsqrtuslc4 (
|
||||
input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1
|
||||
input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation
|
||||
input logic [7:0] WSmsbs, WCmsbs, // Q4.4 redundant residual most significant bits
|
||||
input logic Sqrt, j1,
|
||||
output logic [3:0] udigit
|
||||
output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot
|
||||
);
|
||||
logic [6:0] Wmsbs;
|
||||
logic [7:0] PreWmsbs;
|
||||
logic [2:0] A;
|
||||
logic [7:0] PreWmsbs; // Q4.4 nonredundant residual msbs
|
||||
logic [6:0] Wmsbs; // Q4.3 truncated nonredundant residual
|
||||
logic [2:0] A; // U0.3 upper bits of D or Smsbs, discarding integer bit
|
||||
|
||||
assign PreWmsbs = WCmsbs + WSmsbs;
|
||||
assign Wmsbs = PreWmsbs[7:1];
|
||||
assign PreWmsbs = WCmsbs + WSmsbs; // add redundant residual to find msbs
|
||||
assign Wmsbs = PreWmsbs[7:1]; // truncate least significant bit to Q4.3 to index table
|
||||
// D = 0001.xxx...
|
||||
// Dmsbs = | |
|
||||
// W = xxxx.xxx...
|
||||
// Wmsbs = | |
|
||||
|
||||
logic [3:0] USel4[1023:0];
|
||||
logic [3:0] USel4[1023:0]; // 1024-bit table indexed with 3 bits of A and 7 bits of Wmsbs
|
||||
|
||||
// Prepopulate selection table; this is constant at compile time
|
||||
always_comb begin
|
||||
|
@ -101,10 +101,10 @@ module fdivsqrtqsel4 (
|
|||
// Select A
|
||||
always_comb
|
||||
if (Sqrt) begin
|
||||
if (j1) A = 3'b101;
|
||||
else if (Smsbs == 5'b10000) A = 3'b111;
|
||||
else A = Smsbs[2:0];
|
||||
end else A = Dmsbs;
|
||||
if (j1) A = 3'b101; // on first sqrt iteration A = .101
|
||||
else if (Smsbs == 5'b10000) A = 3'b111; // if S = 1.0, use A = .111
|
||||
else A = Smsbs[2:0]; // otherwise use A = 2S (in U0.3 format)
|
||||
end else A = Dmsbs; // division Unless A = D (IN U0.3 format, dropping leading 1)
|
||||
|
||||
// Select quotient digit from lookup table based on A and W
|
||||
assign udigit = USel4[{A,Wmsbs}];
|
|
@ -1,10 +1,10 @@
|
|||
///////////////////////////////////////////
|
||||
// fdivsqrtqsel4cmp.sv
|
||||
// fdivsqrtuslc4cmp.sv
|
||||
//
|
||||
// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu
|
||||
// Modified:13 January 2022
|
||||
//
|
||||
// Purpose: Comparator-based Radix 4 Quotient Digit Selection
|
||||
// Purpose: Comparator-based Radix 4 Unified Quotient/Square Root Digit Selection
|
||||
//
|
||||
// Documentation: RISC-V System on Chip Design Chapter 13
|
||||
//
|
||||
|
@ -26,12 +26,12 @@
|
|||
// and limitations under the License.
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
module fdivsqrtqsel4cmp (
|
||||
module fdivsqrtuslc4cmp (
|
||||
input logic [2:0] Dmsbs, // U0.3 fractional bits after implicit leading 1
|
||||
input logic [4:0] Smsbs, // U1.4 leading bits of square root approximation
|
||||
input logic [7:0] WSmsbs, WCmsbs, // Q4.4
|
||||
input logic [7:0] WSmsbs, WCmsbs, // Q4.4 residual most significant bits
|
||||
input logic SqrtE, j1,
|
||||
output logic [3:0] udigit
|
||||
output logic [3:0] udigit // {2, 1, -1, -2} digit is 0 if none are hot
|
||||
);
|
||||
logic [6:0] Wmsbs;
|
||||
logic [7:0] PreWmsbs;
|
Loading…
Add table
Add a link
Reference in a new issue