diff --git a/bin/regression-wally b/bin/regression-wally index fbfc6eece..4e72fae66 100755 --- a/bin/regression-wally +++ b/bin/regression-wally @@ -125,19 +125,19 @@ derivconfigtests = [ ["div_4_2_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], ["div_4_2i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], ["div_4_4_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], - ["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], + ["div_4_4i_rv64gc", ["arch64f_divsqrt", "arch64d_divsqrt", "arch64m"]], # fpu permutations - ["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma"]], - ["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt"]], - ["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt"]], - ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i"]], - ["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i"]], - ["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma"]], - ["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt"]], - ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt"]], - ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i"]], - ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q"]], + ["f_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfaf"]], + ["fh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32zfaf"]], + ["fdh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32zfaf", "arch32zfad"]], + ["fdq_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32i", "arch32zfaf", "arch32zfad"]], + ["fdqh_rv32gc", ["arch32f", "arch32f_divsqrt", "arch32f_fma", "arch32d", "arch32d_divsqrt", "arch32d_fma", "arch32zfh", "arch32zfh_divsqrt", "arch32i", "arch32zfaf", "arch32zfad"]], + ["f_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfaf"]], + ["fh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64zfaf"]], + ["fdh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64zfaf", "arch64zfad"]], + ["fdq_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64i", "arch64zfaf", "arch64zfad"]], + ["fdqh_rv64gc", ["arch64f", "arch64f_divsqrt", "arch64f_fma", "arch64d", "arch64d_divsqrt", "arch64d_fma", "arch64zfh", "arch64zfh_divsqrt", "arch64i", "wally64q", "arch64zfaf", "arch64zfad"]], ] bpredtests = [ diff --git a/config/shared/config-shared.vh b/config/shared/config-shared.vh index 5cf6049c8..bf42b9506 100644 --- a/config/shared/config-shared.vh +++ b/config/shared/config-shared.vh @@ -75,6 +75,7 @@ localparam NE = Q_SUPPORTED ? Q_NE : D_SUPPORTED ? D_NE : S_NE; localparam NF = Q_SUPPORTED ? Q_NF : D_SUPPORTED ? D_NF : S_NF; localparam FMT = Q_SUPPORTED ? 2'd3 : D_SUPPORTED ? 2'd1 : 2'd0; localparam BIAS = Q_SUPPORTED ? Q_BIAS : D_SUPPORTED ? D_BIAS : S_BIAS; +localparam LOGFLEN = $clog2(FLEN); // Floating point constants needed for FPU paramerterization // LEN1/NE1/NF1/FNT1 is the size of the second longest supported format @@ -124,7 +125,8 @@ localparam LOGCVTLEN = $unsigned($clog2(CVTLEN+1)); // because NORMSHIFTSZ becomes limited by convert rather than divider // Figure out why extra two bits are needed for convert (and only in testbench_fp, not Wally) // Might be a testbench_fp issue -localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (3*NF+6)); +//localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1+2), (DIVb + 1 + NF + 1)), (3*NF+6)); +localparam NORMSHIFTSZ = `max(`max((CVTLEN+NF+1), (DIVb + 1 + NF + 1)), (3*NF+6)); localparam LOGNORMSHIFTSZ = ($clog2(NORMSHIFTSZ)); // log_2(NORMSHIFTSZ) localparam CORRSHIFTSZ = NORMSHIFTSZ-2; // Drop leading 2 integer bits diff --git a/config/shared/parameter-defs.vh b/config/shared/parameter-defs.vh index 1aa6da5d3..96440490c 100644 --- a/config/shared/parameter-defs.vh +++ b/config/shared/parameter-defs.vh @@ -173,6 +173,7 @@ localparam cvw_t P = '{ H_BIAS : H_BIAS, H_FMT : H_FMT, FLEN : FLEN, + LOGFLEN : LOGFLEN, NE : NE , NF : NF , FMT : FMT , diff --git a/src/cvw.sv b/src/cvw.sv index cba95c0fa..1f8e0a1c1 100644 --- a/src/cvw.sv +++ b/src/cvw.sv @@ -260,7 +260,8 @@ typedef struct packed { logic [1:0] H_FMT; // Floating point length FLEN and number of exponent (NE) and fraction (NF) bits - int FLEN; + int FLEN; + int LOGFLEN; int NE ; int NF ; logic [1:0] FMT ; diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index e9efc8e76..2d456aeee 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -48,7 +48,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic XEnE, YEnE, ZEnE, // enable inputs // operation mux selections output logic FCvtIntE, FCvtIntW, // convert to integer operation - output logic [2:0] FrmM, // FP rounding mode + output logic [2:0] FrmE, FrmM, // FP rounding mode output logic [P.FMTBITS-1:0] FmtE, FmtM, // FP format output logic [2:0] OpCtrlE, OpCtrlM, // Select which operation to do in each component output logic FpLoadStoreM, // FP load or store instruction @@ -56,6 +56,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage output logic FPUActiveE, // FP instruction being executed output logic ZfaE, ZfaM, // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod) + output logic ZfaFRoundNXE, // Zfa froundnx instruction // register control signals output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable output logic FWriteIntE, FWriteIntM, // Write to integer register @@ -66,7 +67,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( output logic FDivStartE, IDivStartE // Start division or squareroot ); - `define FCTRLW 13 + `define FCTRLW 14 logic [`FCTRLW-1:0] ControlsD; // control signals logic FRegWriteD; // FP register write enable @@ -75,13 +76,14 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( logic [2:0] OpCtrlD; // Select which operation to do in each component logic [1:0] PostProcSelD; // select result in the post processing unit logic [1:0] FResSelD; // Select one of the results that finish in the memory stage - logic [2:0] FrmD, FrmE; // FP rounding mode + logic [2:0] FrmD; // FP rounding mode logic [P.FMTBITS-1:0] FmtD; // FP format logic [1:0] Fmt, Fmt2; // format - before possible reduction logic SupportedFmt; // is the format supported logic SupportedFmt2; // is the source format supported for fp -> fp logic FCvtIntD, FCvtIntM; // convert to integer operation logic ZfaD; // Zfa variants of instructions + logic ZfaFRoundNXD; // Zfa froundnx instruction // FPU Instruction Decoder assign Fmt = Funct7D[1:0]; @@ -93,156 +95,156 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( (Fmt2 == 2'b10 & P.ZFH_SUPPORTED) | (Fmt2 == 2'b11 & P.Q_SUPPORTED)); // decode the instruction - // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt_Zfa + // FRegWrite_FWriteInt_FResSel_PostProcSel_FOpCtrl_FDivStart_IllegalFPUInstr_FCvtInt_Zfa_FroundNX always_comb if (STATUS_FS == 2'b00) // FPU instructions are illegal when FPU is disabled - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0_0; else if (OpD != 7'b0000111 & OpD != 7'b0100111 & ~SupportedFmt) - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // for anything other than loads and stores, check for supported format + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0_0; // for anything other than loads and stores, check for supported format else begin - ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0; // default: non-implemented instruction + ControlsD = `FCTRLW'b0_0_00_00_000_0_1_0_0_0; // default: non-implemented instruction /* verilator lint_off CASEINCOMPLETE */ // default value above has priority so no other default needed case(OpD) 7'b0000111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flw - 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // fld - 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flq - 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0; // flh + 3'b010: ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // flw + 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // fld + 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // flq + 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b1_0_10_00_0xx_0_0_0_0_0; // flh endcase 7'b0100111: case(Funct3D) - 3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsw - 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsd - 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsq - 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0; // fsh + 3'b010: ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsw + 3'b011: if (P.D_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsd + 3'b100: if (P.Q_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsq + 3'b001: if (P.ZFH_SUPPORTED) ControlsD = `FCTRLW'b0_0_10_00_0xx_0_0_0_0_0; // fsh endcase - 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0_0; // fmadd - 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0_0; // fmsub - 7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0_0; // fnmsub - 7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0_0; // fnmadd + 7'b1000011: ControlsD = `FCTRLW'b1_0_01_10_000_0_0_0_0_0; // fmadd + 7'b1000111: ControlsD = `FCTRLW'b1_0_01_10_001_0_0_0_0_0; // fmsub + 7'b1001011: ControlsD = `FCTRLW'b1_0_01_10_010_0_0_0_0_0; // fnmsub + 7'b1001111: ControlsD = `FCTRLW'b1_0_01_10_011_0_0_0_0_0; // fnmadd 7'b1010011: casez(Funct7D) - 7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0_0; // fadd - 7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0_0; // fsub - 7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0_0; // fmul - 7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0_0; // fdiv - 7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0_0; // fsqrt + 7'b00000??: ControlsD = `FCTRLW'b1_0_01_10_110_0_0_0_0_0; // fadd + 7'b00001??: ControlsD = `FCTRLW'b1_0_01_10_111_0_0_0_0_0; // fsub + 7'b00010??: ControlsD = `FCTRLW'b1_0_01_10_100_0_0_0_0_0; // fmul + 7'b00011??: ControlsD = `FCTRLW'b1_0_01_01_xx0_1_0_0_0_0; // fdiv + 7'b01011??: if (Rs2D == 5'b0000) ControlsD = `FCTRLW'b1_0_01_01_xx1_1_0_0_0_0; // fsqrt 7'b00100??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0_0; // fsgnj - 3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0_0; // fsgnjn - 3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0_0; // fsgnjx + 3'b000: ControlsD = `FCTRLW'b1_0_00_00_000_0_0_0_0_0; // fsgnj + 3'b001: ControlsD = `FCTRLW'b1_0_00_00_001_0_0_0_0_0; // fsgnjn + 3'b010: ControlsD = `FCTRLW'b1_0_00_00_010_0_0_0_0_0; // fsgnjx endcase 7'b00101??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_0; // fmin - 3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_0; // fmax - 3'b010: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_1; // fminm (Zfa) - 3'b011: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_1; // fmaxm (Zfa) + 3'b000: ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_0_0; // fmin + 3'b001: ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_0_0; // fmax + 3'b010: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_110_0_0_0_1_0; // fminm (Zfa) + 3'b011: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b1_0_00_00_101_0_0_0_1_0; // fmaxm (Zfa) endcase 7'b10100??: case(Funct3D) - 3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_0; // fle - 3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_0; // flt - 3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0_0; // feq - 3'b100: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_1; // fleq (Zfa) - 3'b101: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_1; // fltq (Zfa) + 3'b000: ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_0_0; // fle + 3'b001: ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_0_0; // flt + 3'b010: ControlsD = `FCTRLW'b0_1_00_00_010_0_0_0_0_0; // feq + 3'b100: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_011_0_0_0_1_0; // fleq (Zfa) + 3'b101: if (P.ZFA_SUPPORTED) ControlsD = `FCTRLW'b0_1_00_00_001_0_0_0_1_0; // fltq (Zfa) endcase 7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0_0; // fclass + ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0_0_0; // fclass else if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0; // fmv.x.w/d/h/q fp to int register + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_0_0; // fmv.x.w/d/h/q fp to int register else if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct7D[1:0] == 2'b01 & Funct3D == 3'b000 & Rs2D == 5'b00001) - ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.d (Zfa) + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1_0; // fmvh.x.d (Zfa) // Q not supported in RV64GC // coverage off else if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct7D[1:0] == 2'b11 & Funct3D == 3'b000 & Rs2D == 5'b00001) - ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1; // fmvh.x.q (Zfa) + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0_1_0; // fmvh.x.q (Zfa) // coverage on 7'b11110??: if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0_0; // fmv.w/d/h/q.x int to fp reg + ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0_0_0; // fmv.w/d/h/q.x int to fp reg else if (P.ZFA_SUPPORTED & Funct3D == 3'b000 & Rs2D == 5'b00001) - ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0_1; // fli (Zfa) + ControlsD = `FCTRLW'b1_0_00_00_111_0_0_0_1_0; // fli (Zfa) 7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_0; // fcvt.s.(d/q/h) + ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_0_0; // fcvt.s.(d/q/h) else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.s (Zfa) *** needs ctrl for all rounds + ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.s (Zfa) else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.s (Zfa) *** needs ctrl for all rounds + ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.s (Zfa) 7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01) - ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0_0; // fcvt.d.(s/h/q) + ControlsD = `FCTRLW'b1_0_01_00_001_0_0_0_0_0; // fcvt.d.(s/h/q) else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.d (Zfa) + ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.d (Zfa) else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.d (Zfa) + ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.d (Zfa) 7'b0100010: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b10) - ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0_0; // fcvt.h.(s/d/q) + ControlsD = `FCTRLW'b1_0_01_00_010_0_0_0_0_0; // fcvt.h.(s/d/q) else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.h (Zfa) + ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.h (Zfa) else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.h (Zfa) + ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.h (Zfa) // coverage off // Not covered in testing because rv64gc does not support quad precision 7'b0100011: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b11) - ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0_0; // fcvt.q.(s/h/d) + ControlsD = `FCTRLW'b1_0_01_00_011_0_0_0_0_0; // fcvt.q.(s/h/d) else if (Rs2D == 5'b00100 & P.ZFA_SUPPORTED) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // fround.q (Zfa) + ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // fround.q (Zfa) else if (Rs2D == 5'b00101 & P.ZFA_SUPPORTED) - ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0_1; // froundnx.q (Zfa) + ControlsD = `FCTRLW'b1_0_00_00_100_0_0_0_1_0; // froundnx.q (Zfa) // coverage on 7'b1101000: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.s.w w->s - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.s.wu wu->s - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.s.l l->s - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.s.lu lu->s + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.s.w w->s + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.s.wu wu->s + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.s.l l->s + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.s.lu lu->s endcase 7'b1100000: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.s s->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.s s->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.s s->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.s s->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.s s->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.s s->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.s s->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.s s->lu endcase 7'b1101001: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.d.w w->d - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.d.wu wu->d - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.d.l l->d - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.d.lu lu->d + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.d.w w->d + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.d.wu wu->d + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.d.l l->d + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.d.lu lu->d endcase 7'b1100001: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.d d->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.d d->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.d d->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.d d->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.d d->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.d d->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.d d->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.d d->lu 5'b01000: if (P.ZFA_SUPPORTED & P.D_SUPPORTED & Funct3D == 3'b001) - ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_1; // fcvtmod.w.d (Zfa) + ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_1_0; // fcvtmod.w.d (Zfa) endcase 7'b1101010: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.h.w w->h - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.h.wu wu->h - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.h.l l->h - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.h.lu lu->h + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.h.w w->h + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.h.wu wu->h + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.h.l l->h + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.h.lu lu->h endcase 7'b1100010: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.h h->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.h h->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.h h->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.h h->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.h h->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.h h->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.h h->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.h h->lu endcase // Not covered in testing because rv64gc does not support quad precision // coverage off 7'b1101011: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fcvt.q.w w->q - 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0; // fcvt.q.wu wu->q - 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0; // fcvt.q.l l->q - 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0; // fcvt.q.lu lu->q + 5'b00000: ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fcvt.q.w w->q + 5'b00001: ControlsD = `FCTRLW'b1_0_01_00_100_0_0_0_0_0; // fcvt.q.wu wu->q + 5'b00010: ControlsD = `FCTRLW'b1_0_01_00_111_0_0_0_0_0; // fcvt.q.l l->q + 5'b00011: ControlsD = `FCTRLW'b1_0_01_00_110_0_0_0_0_0; // fcvt.q.lu lu->q endcase 7'b1100011: case(Rs2D) - 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0; // fcvt.w.q q->w - 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0; // fcvt.wu.q q->wu - 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0; // fcvt.l.q q->l - 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0; // fcvt.lu.q q->lu + 5'b00000: ControlsD = `FCTRLW'b0_1_01_00_001_0_0_1_0_0; // fcvt.w.q q->w + 5'b00001: ControlsD = `FCTRLW'b0_1_01_00_000_0_0_1_0_0; // fcvt.wu.q q->wu + 5'b00010: ControlsD = `FCTRLW'b0_1_01_00_011_0_0_1_0_0; // fcvt.l.q q->l + 5'b00011: ControlsD = `FCTRLW'b0_1_01_00_010_0_0_1_0_0; // fcvt.lu.q q->lu endcase // coverage off // Not covered in testing because rv64gc is not RV64Q or RV32D 7'b1011001: if (P.ZFA_SUPPORTED & P.XLEN == 32 & P.D_SUPPORTED & Funct3D == 3'b000) - ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.d.x (Zfa) *** untested, controls could be wrong + ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fmvp.d.x (Zfa) *** untested, controls could be wrong 7'b1011011: if (P.ZFA_SUPPORTED & P.XLEN == 64 & P.Q_SUPPORTED & Funct3D == 3'b000) - ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0; // fmvp.q.x (Zfa) + ControlsD = `FCTRLW'b1_0_01_00_101_0_0_0_0_0; // fmvp.q.x (Zfa) // coverage on endcase endcase @@ -250,7 +252,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( /* verilator lint_on CASEINCOMPLETE */ // unswizzle control bits - assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD, ZfaD} = ControlsD; + assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, FDivStartD, IllegalFPUInstrD, FCvtIntD, ZfaD, ZfaFRoundNXD} = ControlsD; // rounding modes: // 000 - round to nearest, ties to even @@ -259,7 +261,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( // 011 - round up - round twords positive infinity // 100 - round to nearest, ties to max magnitude - round to nearest, ties away from zero // 111 - dynamic - choose FRM_REGW as rounding mode - assign FrmD = &Funct3D ? FRM_REGW : Funct3D; + assign FrmD = (Funct3D == 3'b111) ? FRM_REGW : Funct3D; // Precision // 00 - single @@ -269,7 +271,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( if (P.FPSIZES == 1) assign FmtD = 1'b0; - else if (P.FPSIZES == 2)begin + else if (P.FPSIZES == 2) begin logic [1:0] FmtTmp; assign FmtTmp = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : (~OpD[6]&(&OpD[2:0])) ? {~Funct3D[1], ~(Funct3D[1]^Funct3D[0])} : Funct7D[1:0]; assign FmtD = (P.FMT == FmtTmp); @@ -313,6 +315,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( // 011 - mv to fp 01 // 110 - min 10 // 101 - max 10 + // 100 - fround 11 // 111 - fli 11 // OpCtrl: @@ -350,9 +353,9 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( assign Adr3D = InstrD[31:27]; // D/E pipleine register - flopenrc #(15+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ZfaD, ~IllegalFPUInstrD}, - {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE, FPUActiveE}); + flopenrc #(`FCTRLW+2+P.FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD, FCvtIntD, ZfaD, ZfaFRoundNXD, ~IllegalFPUInstrD}, + {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE, ZfaFRoundNXE, FPUActiveE}); flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {Adr1D, Adr2D, Adr3D}, {Adr1E, Adr2E, Adr3E}); flopenrc #(1) DEFDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); flopenrc #(3) DEEnReg(clk, reset, FlushE, ~StallE, {XEnD, YEnD, ZEnD}, {XEnE, YEnE, ZEnE}); @@ -365,7 +368,7 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( flopenrc #(14+int'(P.FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE, FCvtIntE, ZfaE}, {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM, FCvtIntM, ZfaM}); - + // renameing for readability assign FpLoadStoreM = FResSelM[1]; @@ -373,5 +376,5 @@ module fctrl import cvw::*; #(parameter cvw_t P) ( flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW, {FRegWriteM, FResSelM, FCvtIntM}, {FRegWriteW, FResSelW, FCvtIntW}); - + endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 1d44cef5d..dba69267a 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -37,6 +37,8 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, + input logic [P.NE-2:0] BiasE, // Bias of exponent + input logic [P.LOGFLEN-1:0] NfE, // Number of fractional bits in selected format input logic FDivStartE, IDivStartE, input logic StallM, input logic FlushE, @@ -75,7 +77,7 @@ module fdivsqrt import cvw::*; #(parameter cvw_t P) ( fdivsqrtpreproc #(P) fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .FmtE, .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, + .FmtE, .Bias(BiasE), .Nf(NfE), .SqrtE, .XZeroE, .Funct3E, .UeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, .BZeroM, .IntNormShiftM, .AM, diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index 72fe04249..9e2489eb3 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -29,39 +29,14 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( input logic [P.FMTBITS-1:0] FmtE, + input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format input logic SqrtE, input logic IntDivE, input logic [P.DIVBLEN-1:0] IntResultBitsE, output logic [P.DURLEN-1:0] CyclesE ); - logic [P.DIVBLEN-1:0] Nf, FPResultBitsE, ResultBitsE; // number of fractional (result) bits - - /* verilator lint_off WIDTH */ - if (P.FPSIZES == 1) - assign Nf = P.NF; - else if (P.FPSIZES == 2) - always_comb - case (FmtE) - 1'b0: Nf = P.NF1; - 1'b1: Nf = P.NF; - endcase - else if (P.FPSIZES == 3) - always_comb - case (FmtE) - P.FMT: Nf = P.NF; - P.FMT1: Nf = P.NF1; - P.FMT2: Nf = P.NF2; - default: Nf = 'x; // shouldn't happen - endcase - else if (P.FPSIZES == 4) - always_comb - case(FmtE) - P.S_FMT: Nf = P.S_NF; - P.D_FMT: Nf = P.D_NF; - P.H_FMT: Nf = P.H_NF; - P.Q_FMT: Nf = P.Q_NF; - endcase + logic [P.DIVBLEN-1:0] FPResultBitsE, ResultBitsE; // number of fractional (result) bits // Cycle logic // P.DIVCOPIES = k. P.LOGR = log(R) = r. P.RK = rk. @@ -70,6 +45,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( // FP Sqrt needs at least Nf fractional bits and 2 guard/round bits. The integer bit is always initialized to 1 and does not need a cycle. // The datapath produces rk bits per cycle, so Cycles = ceil (ResultBitsE / rk) + /* verilator lint_off WIDTH */ always_comb begin FPResultBitsE = Nf + 2 + P.LOGR; // Nf + two fractional bits for round/guard; integer bit implicit because starting at n=1 diff --git a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv index a20d1871d..03d144263 100644 --- a/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtexpcalc.sv @@ -28,49 +28,21 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module fdivsqrtexpcalc import cvw::*; #(parameter cvw_t P) ( - input logic [P.FMTBITS-1:0] Fmt, + input logic [P.NE-2:0] Bias, // Bias of exponent input logic [P.NE-1:0] Xe, Ye, // input exponents input logic Sqrt, input logic [P.DIVBLEN-1:0] ell, m, // number of leading 0s in Xe and Ye output logic [P.NE+1:0] Ue // result exponent ); - - logic [P.NE-2:0] Bias; + logic [P.NE+1:0] SXExp; logic [P.NE+1:0] SExp; logic [P.NE+1:0] DExp; - // Determine exponent bias according to the format - - if (P.FPSIZES == 1) begin - assign Bias = (P.NE-1)'(P.BIAS); - - end else if (P.FPSIZES == 2) begin - assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1); - - end else if (P.FPSIZES == 3) begin - always_comb - case (Fmt) - P.FMT: Bias = (P.NE-1)'(P.BIAS); - P.FMT1: Bias = (P.NE-1)'(P.BIAS1); - P.FMT2: Bias = (P.NE-1)'(P.BIAS2); - default: Bias = 'x; - endcase - - end else if (P.FPSIZES == 4) begin - always_comb - case (Fmt) - 2'h3: Bias = (P.NE-1)'(P.Q_BIAS); - 2'h1: Bias = (P.NE-1)'(P.D_BIAS); - 2'h0: Bias = (P.NE-1)'(P.S_BIAS); - 2'h2: Bias = (P.NE-1)'(P.H_BIAS); - endcase - end - // Square root exponent = (Xe - l - bias) / 2 + bias; l accounts for subnorms assign SXExp = {2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - (P.NE+2)'(P.BIAS); assign SExp = {SXExp[P.NE+1], SXExp[P.NE+1:1]} + {2'b0, Bias}; - + // division exponent = (Xe-l) - (Ye-m) + bias; l and m account for subnorms assign DExp = ({2'b0, Xe} - {{(P.NE+1-P.DIVBLEN){1'b0}}, ell} - {2'b0, Ye} + {{(P.NE+1-P.DIVBLEN){1'b0}}, m} + {3'b0, Bias}); diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 45d50dac3..802ac92dc 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -33,6 +33,8 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( input logic [P.NF:0] Xm, Ym, // Floating-point significands input logic [P.NE-1:0] Xe, Ye, // Floating-point exponents input logic [P.FMTBITS-1:0] FmtE, + input logic [P.NE-2:0] Bias, // Bias of exponent + input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format input logic SqrtE, input logic XZeroE, input logic [2:0] Funct3E, @@ -209,11 +211,11 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( flopen #(P.DIVb+4) dreg(clk, IFDivStartE, {3'b000, Dnorm}, D); // Floating-point exponent - fdivsqrtexpcalc #(P) expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE)); + fdivsqrtexpcalc #(P) expcalc(.Bias, .Xe, .Ye, .Sqrt(SqrtE), .ell, .m(mE), .Ue(UeE)); flopen #(P.NE+2) expreg(clk, IFDivStartE, UeE, UeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles #(P) cyclecalc(.FmtE, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE); + fdivsqrtcycles #(P) cyclecalc(.FmtE, .Nf, .SqrtE, .IntDivE, .IntResultBitsE, .CyclesE); if (P.IDIV_ON_FPU) begin:intpipelineregs logic [P.DIVBLEN-1:0] IntDivNormShiftE, IntRemNormShiftE, IntNormShiftE; diff --git a/src/fpu/fmtparams.sv b/src/fpu/fmtparams.sv new file mode 100644 index 000000000..d83dfd782 --- /dev/null +++ b/src/fpu/fmtparams.sv @@ -0,0 +1,86 @@ + +/////////////////////////////////////////// +// fmtparams.sv +// +// Written: David_Harris@hmc.edu +// Modified: 5/11/24 +// +// Purpose: Look up bias of exponent and number of fractional bits for the selected format +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module fmtparams import cvw::*; #(parameter cvw_t P) ( + input logic [P.FMTBITS-1:0] Fmt, + output logic [P.NE-2:0] Bias, + output logic [P.LOGFLEN-1:0] Nf +); + + if (P.FPSIZES == 1) begin + assign Bias = (P.NE-1)'(P.BIAS); + end else if (P.FPSIZES == 2) begin + assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1); + end else if (P.FPSIZES == 3) begin + always_comb + case (Fmt) + P.FMT: Bias = (P.NE-1)'(P.BIAS); + P.FMT1: Bias = (P.NE-1)'(P.BIAS1); + P.FMT2: Bias = (P.NE-1)'(P.BIAS2); + default: Bias = 'x; + endcase + end else if (P.FPSIZES == 4) begin + always_comb + case (Fmt) + 2'h3: Bias = (P.NE-1)'(P.Q_BIAS); + 2'h1: Bias = (P.NE-1)'(P.D_BIAS); + 2'h0: Bias = (P.NE-1)'(P.S_BIAS); + 2'h2: Bias = (P.NE-1)'(P.H_BIAS); + endcase + end + + /* verilator lint_off WIDTH */ + if (P.FPSIZES == 1) + assign Nf = P.NF; + else if (P.FPSIZES == 2) + always_comb + case (Fmt) + 1'b0: Nf = P.NF1; + 1'b1: Nf = P.NF; + endcase + else if (P.FPSIZES == 3) + always_comb + case (Fmt) + P.FMT: Nf = P.NF; + P.FMT1: Nf = P.NF1; + P.FMT2: Nf = P.NF2; + default: Nf = 'x; // shouldn't happen + endcase + else if (P.FPSIZES == 4) + always_comb + case(Fmt) + P.S_FMT: Nf = P.S_NF; + P.D_FMT: Nf = P.D_NF; + P.H_FMT: Nf = P.H_NF; + P.Q_FMT: Nf = P.Q_NF; + endcase + /* verilator lint_on WIDTH */ + +endmodule diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 22c650ed8..dc3d353fb 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -70,7 +70,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( // control signals logic FRegWriteW; // FP register write enable - logic [2:0] FrmM; // FP rounding mode + logic [2:0] FrmE, FrmM; // FP rounding mode logic [P.FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double logic FDivStartE, IDivStartE; // Start division or squareroot logic FWriteIntM; // Write to integer register @@ -85,6 +85,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic FRegWriteE; // Write floating-point register logic FPUActiveE; // FP instruction being executed logic ZfaE, ZfaM; // Zfa variants of instructions (fli, fminm, fmaxm, fround, froundnx, fleq, fltq, fmvh, fmvp, fcvtmod.w.d) + logic ZfaFRoundNXE; // Zfa froundnx variant // regfile signals logic [P.FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -112,6 +113,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage logic XExpMaxE; // is the exponent all ones (max value) logic [P.FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations + logic [P.NE-2:0] BiasE; // Bias of exponent + logic [P.LOGFLEN-1:0] NfE; // Number of fractional bits // Fma Signals logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting @@ -150,7 +153,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic [P.XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move) logic [P.FLEN-1:0] PostProcResM; // Postprocessor output logic [4:0] PostProcFlgM; // Postprocessor flags - logic PreNVE, PreNVM; // selected flag that is ready in the memory stage + logic PreNVE, PreNVM; // selected invalid flag that is ready in the memory stage + logic PreNXE, PreNXM; // selected inexact flag that is ready in the memory stage logic [P.FLEN-1:0] FpResM, FpResW; // FPU preliminary result logic [P.FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage logic [P.FLEN-1:0] FResultW; // final FP result being written to the FP register @@ -162,9 +166,8 @@ module fpu import cvw::*; #(parameter cvw_t P) ( logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt logic [P.FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer logic mvsgn; // sign bit for extending move - logic [P.FLEN-1:0] FliResE; // Zfa Floating-point load immediate value - logic [P.FLEN-1:0] FRoundE; // Zfa fround output - logic [4:0] FRoundFlagsE; // Zfa fround flags + logic [P.FLEN-1:0] ZfaResE; // Result of Zfa fli or fround instruction + logic FRoundNVE, FRoundNXE; // Zfa fround invalid and inexact flags ////////////////////////////////////////////////////////////////////////////////////////// // Decode Stage: fctrl decoder, read register file @@ -174,7 +177,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( fctrl #(P) fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .IntDivE, .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, - .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .ZfaM, .FrmM, .FmtE, .FmtM, + .reset, .clk, .FRegWriteE, .FRegWriteM, .FRegWriteW, .ZfaE, .ZfaM, .ZfaFRoundNXE, .FrmE, .FrmM, .FmtE, .FmtM, .FDivStartE, .IDivStartE, .FWriteIntE, .FCvtIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .FpLoadStoreM, .IllegalFPUInstrD, .XEnD, .YEnD, .ZEnD, .XEnE, .YEnE, .ZEnE, .FResSelE, .FResSelM, .FResSelW, .FPUActiveE, .PostProcSelE, .PostProcSelM, .FCvtIntW, @@ -237,7 +240,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XSubnorm(XSubnormE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), - .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE)); + .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE), .XPostBox(XPostBoxE), .Bias(BiasE), .Nf(NfE)); // fused multiply add: fadd/sub, fmul, fmadd/fnmadd/fmsub/fnmsub fma #(P) fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), @@ -246,7 +249,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( // divide and square root: fdiv, fsqrt, optionally integer division fdivsqrt #(P) fdivsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), - .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .IDivStartE, .XsE, + .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .BiasE, .NfE, .FDivStartE, .IDivStartE, .XsE, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .Funct3M, .IntDivE, .W64E, .StallM, .FlushE, .DivStickyM, .FDivBusyE, .IFDivStartE, .FDivDoneE, .UeM, .UmM, .FIntDivResultM); @@ -270,23 +273,26 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .ResSubnormUf(CvtResSubnormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); // ZFA: fround and floating-point load immediate fli - if (P.ZFA_SUPPORTED) begin + if (P.ZFA_SUPPORTED) begin:Zfa logic [4:0] Rs1E; logic [1:0] Fmt2E; // Two-bit format field from instruction + logic [P.FLEN-1:0] FRoundE; // Zfa fround output + logic [P.FLEN-1:0] FliResE; // Zfa Floating-point load immediate value // fround - fround #(P) fround(.Xs(XsE), .Xe(XeE), .Xm(XmE), - .XNaN(XNaNE), .XSNaN(XSNaNE), .XZero(XZeroE), .Fmt(FmtE), - .FRound(FRoundE), .FRoundFlags(FRoundFlagsE)); + fround #(P) fround(.X(XE), .Xs(XsE), .Xe(XeE), .Xm(XmE), + .XNaN(XNaNE), .XSNaN(XSNaNE), .XZero(XZeroE), .Fmt(FmtE), .Frm(FrmE), .Nf(NfE), + .ZfaFRoundNX(ZfaFRoundNXE), + .FRound(FRoundE), .FRoundNV(FRoundNVE), .FRoundNX(FRoundNXE)); // fli flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, InstrD[19:15], Rs1E); flopenrc #(2) Fmt2EReg(clk, reset, FlushE, ~StallE, InstrD[26:25], Fmt2E); fli #(P) fli(.Rs1(Rs1E), .Fmt(Fmt2E), .Imm(FliResE)); + mux2 #(P.FLEN) ZfaResMux(FRoundE, FliResE, OpCtrlE[0], ZfaResE); end else begin - assign FRoundE = '0; - assign FRoundFlagsE = '0; - assign FliResE = '0; + assign {FRoundNXE, FRoundNVE} = '0; + assign ZfaResE = 'x; end // fmv.*.x: NaN Box SrcA to extend integer to requested FP size @@ -311,8 +317,9 @@ module fpu import cvw::*; #(parameter cvw_t P) ( else assign IntSrcE = PreIntSrcE; // select a result that may be written to the FP register - mux4 #(P.FLEN) FResMux(SgnResE, IntSrcE, CmpFpResE, FliResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); - assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); + mux4 #(P.FLEN) FResMux(SgnResE, IntSrcE, CmpFpResE, ZfaResE, {OpCtrlE[2], &OpCtrlE[1:0] | (OpCtrlE == 3'b100) & ZfaE}, PreFpResE); + assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE) | FRoundNVE & (OpCtrlE == 3'b100) & ZfaE; + assign PreNXE = FRoundNXE & (OpCtrlE == 3'b100); // fmv.x.*: select the result that may be written to the integer register if(P.FPSIZES == 1) begin @@ -350,7 +357,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( flopenr #(13) EMFpReg5 (clk, reset, ~StallUnpackedM, {XsE, YsE, XZeroE, YZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, {XsM, YsM, XZeroM, YZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); - flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); + flopenrc #(2) EMRegCmpFlg (clk, reset, FlushM, ~StallM, {PreNVE, PreNXE}, {PreNVM, PreNXM}); flopenrc #(3*P.NF+4) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); flopenrc #($clog2(3*P.NF+5)+7+P.NE) EMRegFma4(clk, reset, FlushM, ~StallM, {FmaAStickyE, InvAE, SCntE, AsE, PsE, SsE, SeE}, @@ -373,8 +380,7 @@ module fpu import cvw::*; #(parameter cvw_t P) ( .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged - //mux2 #(5) FPUFlgMux({PreNVM&~FResSelM[1], 4'b0}, PostProcFlgM, ~FResSelM[1]&FResSelM[0], SetFflagsM); - mux2 #(5) FPUFlgMux({PreNVM, 4'b0}, PostProcFlgM, (FResSelM == 2'b01), SetFflagsM); + mux2 #(5) FPUFlgMux({PreNVM, 3'b0, PreNXM}, PostProcFlgM, (FResSelM == 2'b01), SetFflagsM); mux2 #(P.FLEN) FPUResMux(PreFpResM, PostProcResM, FResSelM[0], FpResM); // M/W pipe registers diff --git a/src/fpu/fround.sv b/src/fpu/fround.sv index 180f99605..fb4911253 100644 --- a/src/fpu/fround.sv +++ b/src/fpu/fround.sv @@ -28,60 +28,34 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module fround import cvw::*; #(parameter cvw_t P) ( + input logic [P.FLEN-1:0] X, // input before unpacking input logic Xs, // input's sign input logic [P.NE-1:0] Xe, // input's exponent - input logic [P.NF:0] Xm, // input's fraction + input logic [P.NF:0] Xm, // input's fraction with leading integer bit (U1.NF) input logic XNaN, // X is NaN input logic XSNaN, // X is Signalling NaN input logic XZero, // X is Zero input logic [P.FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) + input logic [2:0] Frm, // rounding mode + input logic [P.LOGFLEN-1:0] Nf, // Number of fractional bits in selected format + input logic ZfaFRoundNX, // froundnx instruction can set inexact flag output logic [P.FLEN-1:0] FRound, // Rounded result - output logic [4:0] FRoundFlags // Rounder flags + output logic FRoundNV, // fround invalid + output logic FRoundNX // fround inexact ); - logic [P.NE-2:0] Bias; - logic [P.NE-1:0] E; - logic [P.NF:0] Imask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd; - logic Lnonneg, Lp, Rnonneg, Rp, Tp; - - ////////////////////////////////////////// - // Determine exponent bias according to the format - ////////////////////////////////////////// - // *** replicated from fdivsqrt; find a way to share - - if (P.FPSIZES == 1) begin - assign Bias = (P.NE-1)'(P.BIAS); - - end else if (P.FPSIZES == 2) begin - assign Bias = Fmt ? (P.NE-1)'(P.BIAS) : (P.NE-1)'(P.BIAS1); - - end else if (P.FPSIZES == 3) begin - always_comb - case (Fmt) - P.FMT: Bias = (P.NE-1)'(P.BIAS); - P.FMT1: Bias = (P.NE-1)'(P.BIAS1); - P.FMT2: Bias = (P.NE-1)'(P.BIAS2); - default: Bias = 'x; - endcase - - end else if (P.FPSIZES == 4) begin - always_comb - case (Fmt) - 2'h3: Bias = (P.NE-1)'(P.Q_BIAS); - 2'h1: Bias = (P.NE-1)'(P.D_BIAS); - 2'h0: Bias = (P.NE-1)'(P.S_BIAS); - 2'h2: Bias = (P.NE-1)'(P.H_BIAS); - endcase - end - -/* + logic [P.NE-1:0] E, Xep1, EminusNf; + logic [P.NF:0] IMask, Tmasknonneg, Tmaskneg, Tmask, HotE, HotEP1, Trunc, Rnd; + logic [P.FLEN-1:0] W, PackedW; + logic Elt0, Eeqm1, Lnonneg, Lp, Rnonneg, Rp, Tp, RoundUp, Two, EgeNf, Exact; // Unbiased exponent - assign E = Xe - Bias; + assign E = Xe - P.BIAS[P.NE-1:0]; + assign Xep1 = Xe + 1; ////////////////////////////////////////// // Compute LSB L', rounding bit R' and Sticky bit T' - // if (E < 0) // negative exponents round to 0 or 1. + // if (E < 0) // negative exponents round to 0 or 1. // L' = 0 // LSB = 0 // if (E = -1) R' = 1, TMask = 0.1111...111 // if (E = -1) 0.5  X < 1. Round bit is 1 // else R' = 0; TMask = 1.1111...111 // if (E < -1), X < 0.5. Round bit is 0 @@ -100,19 +74,19 @@ module fround import cvw::*; #(parameter cvw_t P) ( ////////////////////////////////////////// // Check if exponent is negative and -1 - assign Elt0 = (E < 0); - assign Eeqm1 = (E == -1); + assign Elt0 = E[P.NE-1]; // (E < 0); + assign Eeqm1 = ($signed(E) == -1); // Logic for nonnegative mask and rounding bits - assign Imask = {1'b1, {P.NF{1'b0}}} >>> E; + assign IMask = {1'b1, {P.NF{1'b0}}} >>> E; assign Tmasknonneg = ~(IMask >>> 1'b1); - assign HotE = IMask & !(IMask << 1'b1); + assign HotE = IMask & ~(IMask << 1'b1); assign HotEP1 = HotE >> 1'b1; assign Lnonneg = |(Xm & HotE); assign Rnonneg = |(Xm & HotEP1); - assign Trunc = Xm & Imask; - assign Rnd = Trunc + HotE; - + assign Trunc = Xm & IMask; + assign {Two, Rnd} = Trunc + HotE; // Two means result is 10.000000 = 2.0 + // mux and AND-OR logic to select final rounding bits mux2 #(1) Lmux(Lnonneg, 1'b0, Elt0, Lp); mux2 #(1) Rmux(Rnonneg, Eeqm1, Elt0, Rp); @@ -120,7 +94,6 @@ module fround import cvw::*; #(parameter cvw_t P) ( mux2 #(P.NF+1) Tmaskmux(Tmasknonneg, Tmaskneg, Elt0, Tmask); assign Tp = |(Xm & Tmask); - /////////////////////////// // Rounding, flags, special Cases // Flags = 0 // unless overridden later @@ -144,11 +117,15 @@ module fround import cvw::*; #(parameter cvw_t P) ( /////////////////////////// // Exact logic - assign Exact = (E >= Nf | XZero); // result will be exact; no need to round + /* verilator lint_off WIDTH */ + assign EminusNf = E - Nf; + /* verilator lint_on WIDTH */ + assign EgeNf = ~EminusNf[P.NE-1] & (~E[P.NE-1] | E[P.NE-2:0] == '0); // E >= Nf if MSB of E-Nf is 0 and E was positive + assign Exact = (EgeNf | XZero) & ~XNaN; // result will be exact; no need to round // Rounding logic: determine whether to round up in magnitude - always_comb - case (Rm) // *** make sure this includes dynamic + always_comb begin + case (Frm) // Frm is either specified in the instruction or is the dynamic rounding mode 3'b000: RoundUp = Rp & (Lp | Tp); // RNE 3'b001: RoundUp = 0; // RZ 3'b010: RoundUp = Xs & (Rp | Tp); // RN @@ -157,22 +134,23 @@ module fround import cvw::*; #(parameter cvw_t P) ( default: RoundUp = 0; // should never happen endcase - // output logic - if (XNaN) W = CanonicalNan; // *** - else if (Exact) W = X; - else if (Elt0) - if (RoundUp) W = {Xs, bias, {P.NF}} // *** format conversions + // If result is not exact, select output in unpacked FLEN format initially + if (XNaN) W = {1'b0, {P.NE{1'b1}}, 1'b1, {(P.NF-1){1'b0}}}; // Canonical NaN + else if (Elt0) // 0 <= |X| < 1 rounds to 0 or 1 + if (RoundUp) W = {Xs, P.BIAS[P.NE-1:0], {P.NF{1'b0}}}; // round to +/- 1 + else W = {Xs, {(P.FLEN-1){1'b0}}}; // round to +/- 0 + else begin // |X| > 1 rounds to an integer + if (RoundUp & Two) W = {Xs, Xep1, {(P.NF){1'b0}}}; // Round up to 2.0 + else if (RoundUp) W = {Xs, Xe, Rnd[P.NF-1:0]}; // Round up to Rnd + else W = {Xs, Xe, Trunc[P.NF-1:0]}; // Round down to Trunc + end + end - *** may not need to round to infinity; update docs and pseudocode above - - always_comb + packoutput #(P) packoutput(W, Fmt, PackedW); // pack and NaN-box based on selected format. + mux2 #(P.FLEN) resultmux(PackedW, X, Exact, FRound); // Flags - assign Invalid = XSNaN; - assign Inexact = FRoundNX & ~(XNaN | Exact) & (Rp | T'); - */ - - assign FRound = '0; - assign FRoundFlags = '0; + assign FRoundNV = XSNaN; // invalid if input is signaling NaN + assign FRoundNX = ZfaFRoundNX & ~(XNaN | Exact) & (Rp | Tp); // Inexact if Round or Sticky bit set for FRoundNX instruction endmodule diff --git a/src/fpu/packoutput.sv b/src/fpu/packoutput.sv new file mode 100644 index 000000000..332c0ed52 --- /dev/null +++ b/src/fpu/packoutput.sv @@ -0,0 +1,101 @@ + +/////////////////////////////////////////// +// packoutput.sv +// +// Written: David_Harris@hmc.edu +// Modified: 5/11/24 +// +// Purpose: Pack the output of the FPU +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// https://github.com/openhwgroup/cvw +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +module packoutput import cvw::*; #(parameter cvw_t P) ( + input logic [P.FLEN-1:0] Unpacked, + input logic [P.FMTBITS-1:0] Fmt, + output logic [P.FLEN-1:0] Packed +); + + logic Sign; + logic [P.NE1-1:0] Exp1; + logic [P.NF1-1:0] Fract1; + logic [P.NE2-1:0] Exp2; + logic [P.NF2-1:0] Fract2; + logic [P.H_NE-1:0] Exp3; + logic [P.H_NF-1:0] Fract3; + + // Pack exponent and fraction, with NaN-boxing to full FLEN + + assign Sign = Unpacked[P.FLEN-1]; + if (P.FPSIZES == 1) begin + assign Packed = Unpacked; + end else if (P.FPSIZES == 2) begin + int NF = P.NF; + int NE1 = P.NE1; + int top = P.NF + P.NE1-2; + int bot = P.NF - P.NF1; + always_comb + case (Fmt) + 1'b1: Packed = Unpacked; + 1'b0: begin + Exp1 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE1-2:P.NF]}; + Fract1 = Unpacked[P.NF-1:P.NF-P.NF1]; + Packed = {{(P.FLEN-P.LEN1){1'b1}}, Sign, Exp1, Fract1}; + end + endcase + end else if (P.FPSIZES == 3) begin + always_comb + case (Fmt) + P.FMT: Packed = Unpacked; + P.FMT1: begin + Exp1 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE1-2:P.NF]}; + Fract1 = Unpacked[P.NF-1:P.NF-P.NF1]; + Packed = {{(P.FLEN-P.LEN1){1'b1}}, Sign, Exp1, Fract1}; + end + P.FMT2: begin + Exp2 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE2-2:P.NF]}; + Fract2 = Unpacked[P.NF-1:P.NF-P.NF2]; + Packed = {{(P.FLEN-P.LEN2){1'b1}}, Sign, Exp2, Fract2}; + end + default: Packed = 'x; + endcase + end else if (P.FPSIZES == 4) begin + always_comb + case (Fmt) + 2'h3: Packed = Unpacked; // Quad + 2'h1: begin // double + Exp1 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE1-2:P.NF]}; + Fract1 = Unpacked[P.NF-1:P.NF-P.NF1]; + Packed = {{(P.FLEN-P.LEN1){1'b1}}, Sign, Exp1, Fract1}; + end + 2'h0: begin // float + Exp2 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.NE2-2:P.NF]}; + Fract2 = Unpacked[P.NF-1:P.NF-P.NF2]; + Packed = {{(P.FLEN-P.LEN2){1'b1}}, Sign, Exp2, Fract2}; + end + 2'h2: begin // half + Exp3 = {Unpacked[P.FLEN-2], Unpacked[P.NF+P.H_NE-2:P.NF]}; + Fract3 = Unpacked[P.NF-1:P.NF-P.H_NF]; + Packed = {{(P.FLEN-P.H_LEN){1'b1}}, Sign, Exp3, Fract3}; + end + endcase + end +endmodule \ No newline at end of file diff --git a/src/fpu/unpack.sv b/src/fpu/unpack.sv index eab224dd9..2e87d17fc 100644 --- a/src/fpu/unpack.sv +++ b/src/fpu/unpack.sv @@ -41,13 +41,15 @@ module unpack import cvw::*; #(parameter cvw_t P) ( output logic XZero, YZero, ZZero, // is XYZ zero output logic XInf, YInf, ZInf, // is XYZ infinity output logic XExpMax, // does X have the maximum exponent (NaN or Inf) - output logic [P.FLEN-1:0] XPostBox // X after being properly NaN-boxed + output logic [P.FLEN-1:0] XPostBox, // X after being properly NaN-boxed + output logic [P.NE-2:0] Bias, // Exponent bias + output logic [P.LOGFLEN-1:0] Nf // Number of fractional bits ); logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero logic XFracZero, YFracZero, ZFracZero; // is the fraction zero logic YExpMax, ZExpMax; // is the exponent all 1s - + unpackinput #(P) unpackinputX (.A(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), .FPUActive, .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero), .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero), @@ -63,4 +65,7 @@ module unpack import cvw::*; #(parameter cvw_t P) ( .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero), .Subnorm(), .PostBox()); + // look up bias and fractional bits for the given format + fmtparams #(P) fmtparams(Fmt, Bias, Nf); + endmodule diff --git a/testbench/tests.vh b/testbench/tests.vh index 49a454c43..0386dba6e 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -1641,7 +1641,7 @@ string imperas32f[] = '{ string arch64d[] = '{ `RISCVARCHTEST, // for speed - "rv64i_m/D/src/fadd.d_b10-01.S", + "rv64i_m/D/src/fadd.d_b10-01.S", "rv64i_m/D/src/fadd.d_b1-01.S", "rv64i_m/D/src/fadd.d_b11-01.S", "rv64i_m/D/src/fadd.d_b12-01.S", @@ -2278,6 +2278,7 @@ string arch64zknh[] = '{ string arch32zfaf[] = '{ //`RISCVARCHTEST, `WALLYTEST, + "rv32i_m/F_Zfa/src/fround_b1-01.S", "rv32i_m/F_Zfa/src/fleq_b1-01.S", "rv32i_m/F_Zfa/src/fleq_b19-01.S", "rv32i_m/F_Zfa/src/fli.s-01.S", @@ -2289,12 +2290,12 @@ string arch64zknh[] = '{ "rv32i_m/F_Zfa/src/fminm_b19-01.S", "rv32i_m/F_Zfa/src/fmaxm_b1-01.S", "rv32i_m/F_Zfa/src/fmaxm_b19-01.S" -/* "rv32i_m/F_Zfa/src/fround_b1-01.S" */ }; string arch32zfad[] = '{ //`RISCVARCHTEST, `WALLYTEST, + "rv32i_m/D_Zfa/src/fround_b1-01.S", "rv32i_m/D_Zfa/src/fcvtmod.w.d_b1-01.S", "rv32i_m/D_Zfa/src/fcvtmod.w.d_b22-01.S", "rv32i_m/D_Zfa/src/fcvtmod.w.d_b23-01.S", @@ -2326,12 +2327,12 @@ string arch64zknh[] = '{ "rv32i_m/D_Zfa/src/fmvh.x.d_b27-01.S", "rv32i_m/D_Zfa/src/fmvh.x.d_b28-01.S", "rv32i_m/D_Zfa/src/fmvh.x.d_b29-01.S" -/* "rv32i_m/D_Zfa/src/fround_b1-01.S" */ }; string arch64zfaf[] = '{ //`RISCVARCHTEST, `WALLYTEST, + "rv64i_m/F_Zfa/src/fround_b1-01.S", "rv64i_m/F_Zfa/src/fleq_b1-01.S", "rv64i_m/F_Zfa/src/fleq_b19-01.S", "rv64i_m/F_Zfa/src/fli.s-01.S", @@ -2341,12 +2342,12 @@ string arch64zknh[] = '{ "rv64i_m/F_Zfa/src/fminm_b19-01.S", "rv64i_m/F_Zfa/src/fmaxm_b1-01.S", "rv64i_m/F_Zfa/src/fmaxm_b19-01.S" -/* "rv64i_m/F_Zfa/src/fround_b1-01.S" */ }; string arch64zfad[] = '{ //`RISCVARCHTEST, `WALLYTEST, + "rv64i_m/D_Zfa/src/fround_b1-01.S", "rv64i_m/D_Zfa/src/fcvtmod.w.d_b1-01.S", "rv64i_m/D_Zfa/src/fcvtmod.w.d_b22-01.S", "rv64i_m/D_Zfa/src/fcvtmod.w.d_b23-01.S", @@ -2363,7 +2364,7 @@ string arch64zknh[] = '{ "rv64i_m/D_Zfa/src/fminm_b19-01.S", "rv64i_m/D_Zfa/src/fmaxm_b1-01.S", "rv64i_m/D_Zfa/src/fmaxm_b19-01.S" -/* "rv64i_m/D_Zfa/src/fround_b1-01.S" */ + }; string arch32d_fma[] = '{