From 03f573351a5335866bd6dc79bd902aa5b16378a4 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 17 Jul 2022 16:40:58 +0000 Subject: [PATCH 01/36] Rewrote convert shift calculation with always for ease of reading --- pipelined/src/fpu/cvtshiftcalc.sv | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pipelined/src/fpu/cvtshiftcalc.sv b/pipelined/src/fpu/cvtshiftcalc.sv index 4d346d970..aef894f90 100644 --- a/pipelined/src/fpu/cvtshiftcalc.sv +++ b/pipelined/src/fpu/cvtshiftcalc.sv @@ -60,10 +60,11 @@ module cvtshiftcalc( // - otherwise: // | LzcInM | 0's if nessisary | // change to int shift to the left one - assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : - CvtResDenormUf ? {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}} : - {CvtLzcIn, {`NF+1{1'b0}}}; - + + always_comb + if (ToInt) CvtShiftIn = {{`XLEN{1'b0}}, Xm[`NF]&~CvtCe[`NE], Xm[`NF-1]|(CvtCe[`NE]&Xm[`NF]), Xm[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}}; + else if (CvtResDenormUf) CvtShiftIn = {{`NF-1{1'b0}}, Xm, {`CVTLEN-`NF+1{1'b0}}}; + else CvtShiftIn = {CvtLzcIn, {`NF+1{1'b0}}}; // choose the negative of the fraction size if (`FPSIZES == 1) begin From 5a38a1522558b64b2034a69fa1aa3c1e1e94ec8d Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Mon, 18 Jul 2022 13:30:35 -0700 Subject: [PATCH 02/36] added additional changes to coremark to support rv32 --- benchmarks/coremark/riscv64-baremetal/core_portme.c | 11 ++++++++--- benchmarks/coremark/riscv64-baremetal/core_portme.h | 6 ++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.c b/benchmarks/coremark/riscv64-baremetal/core_portme.c index 57b7993ad..017bef74e 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.c +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.c @@ -114,7 +114,12 @@ void portable_free(void *p) { #define read_csr(reg) ({ unsigned long __tmp; \ asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ __tmp; }) - #define GETMYTIME(_t) (_t = *(volatile unsigned long long*)0x0200BFF8) + // #if (XLEN==64) + // typedef unsigned long long ee_ptr_int; + // #else + // typedef unsigned long ee_ptr_int; + // #endif + #define GETMYTIME(_t) (_t = *(volatile ee_ptr_int*)0x0200BFF8) #define MYTIMEDIFF(fin,ini) ((fin)-(ini)) // Changing TIMER_RES_DIVIDER to 1000000 sets EE_TICKS_PER_SEC to 1000 (now counting ticks per ms) #define TIMER_RES_DIVIDER 10000 @@ -196,8 +201,8 @@ void stop_time(void) { CORE_TICKS get_time(void) { CORE_TICKS elapsed=(CORE_TICKS)(MYTIMEDIFF(stop_time_val, start_time_val)); unsigned long instructions = minstretDiff(); - long long cm100 = 1000000000 / elapsed; // coremark score * 100 - long long cpi100 = elapsed*100/instructions; // CPI * 100 + ee_ptr_int cm100 = 1000000000 / elapsed; // coremark score * 100 + ee_ptr_int cpi100 = elapsed*100/instructions; // CPI * 100 ee_printf(" WALLY CoreMark Results (from get_time)\n"); ee_printf(" Elapsed MTIME: %u\n", elapsed); ee_printf(" Elapsed MINSTRET: %lu\n", instructions); diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.h b/benchmarks/coremark/riscv64-baremetal/core_portme.h index 33768b0f1..3146d3ec0 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.h +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h @@ -69,14 +69,16 @@ typedef clock_t CORE_TICKS; // #elif (XLEN==32) // #include // typedef ee_u32 CORE_TICKS; -#else /* Configuration: size_t and clock_t Note these need to match the size of the clock output and the xLen the processor supports */ +#elif (XLEN==64) typedef unsigned long int size_t; typedef unsigned long int clock_t; -typedef clock_t CORE_TICKS; +#else +#include #endif +typedef clock_t CORE_TICKS; /* Definitions: COMPILER_VERSION, COMPILER_FLAGS, MEM_LOCATION Initialize these strings per platform From cce5fb8dfdb9e05039af2a7a39014c5e7a1e0383 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Mon, 18 Jul 2022 20:48:56 +0000 Subject: [PATCH 03/36] moved Ss to execute stage --- pipelined/regression/sim-wally-batch | 2 +- pipelined/src/fpu/fma.sv | 9 ++++++- pipelined/src/fpu/fpu.sv | 36 +++++----------------------- pipelined/src/fpu/postprocess.sv | 3 ++- pipelined/src/fpu/roundsign.sv | 12 ++-------- pipelined/testbench/testbench-fp.sv | 5 ++-- 6 files changed, 22 insertions(+), 45 deletions(-) diff --git a/pipelined/regression/sim-wally-batch b/pipelined/regression/sim-wally-batch index 7e821e584..8b5b5d628 100755 --- a/pipelined/regression/sim-wally-batch +++ b/pipelined/regression/sim-wally-batch @@ -1 +1 @@ -vsim -c -do "do wally-pipelined-batch.do rv32gc wally32d" +vsim -c -do "do wally-pipelined-batch.do rv32gc wally32periph" diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 44cd3616a..3cd128301 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -44,6 +44,7 @@ module fma( output logic InvA, // Was A inverted for effective subtraction (P-A or -P+A) output logic As, // the aligned addend's sign (modified Z sign for other opperations) output logic Ps, // the product's sign + output logic Ss, // the sum's sign output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count ); @@ -81,7 +82,7 @@ module fma( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm); + add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Ss); loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); endmodule @@ -226,6 +227,7 @@ module add( output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive output logic InvA, // do you invert the aligned addend + output logic Ss, output logic [3*`NF+5:0] Sm // the positive sum ); logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum @@ -257,6 +259,11 @@ module add( // Choose the positive sum and accompanying LZA result. assign Sm = NegSum ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; + // is the result negitive + // if p - z is the Sum negitive + // if -p + z is the Sum positive + // if -p - z then the Sum is negitive + assign Ss = NegSum^Ps; //*** move to execute stage endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 65be29972..e7888551c 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -114,6 +114,7 @@ module fpu ( logic NegSumE, NegSumM; logic ZSgnEffE, ZSgnEffM; logic PSgnE, PSgnM; + logic SsE, SsM; logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; // Cvt Signals @@ -255,36 +256,11 @@ module fpu ( .Xm(XManE), .Ym(YManE), .Zm(ZManE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .FOpCtrl(FOpCtrlE), .Fmt(FmtE), - .As(ZSgnEffE), .Ps(PSgnE), + .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Sm(SumE), .Pe(ProdExpE), .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), .ZmSticky(AddendStickyE), .KillProd(KillProdE)); - // // fpdivsqrt using Goldschmidt's iteration - // if(`FLEN == 64) begin - // flopenrc #(64) reg_input1 (.d({FSrcXE[63:0]}), .q(DivInput1E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // flopenrc #(64) reg_input2 (.d({FSrcYE[63:0]}), .q(DivInput2E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // end - // else if (`FLEN == 32) begin - // flopenrc #(64) reg_input1 (.d({32'b0, FSrcXE[31:0]}), .q(DivInput1E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // flopenrc #(64) reg_input2 (.d({32'b0, FSrcYE[31:0]}), .q(DivInput2E), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // end - // flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE[0], FOpCtrlE[0]}), - // .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), - // .clear(FDivSqrtDoneE), .en(load_preload), - // .reset(reset), .clk(clk)); - // fpdiv_pipe fdivsqrt (.op1(DivInput1E[63:0]), .op2(DivInput2E[63:0]), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), - // .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), - // .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, - // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal @@ -359,9 +335,9 @@ module fpu ( {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE}, - {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM}); + flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE}, + {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); @@ -381,7 +357,7 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), - .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), + .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index f9ccd2553..7e741abbb 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -52,6 +52,7 @@ module postprocess ( input logic FmaKillProd, // set the product to zero before addition if the product is too small to matter input logic FmaNegSum, // was the sum negitive input logic FmaInvA, // do you invert Z + input logic FmaSs, input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count //divide signals input logic [`DURLEN-1:0] DivEarlyTermShift, @@ -197,7 +198,7 @@ module postprocess ( roundsign roundsign(.FmaPs, .FmaAs, .FmaInvA, .FmaOp, .DivOp, .CvtOp, .FmaNegSum, - .Xs, .Ys, .CvtCs, .Ms); + .FmaSs, .Xs, .Ys, .CvtCs, .Ms); round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, diff --git a/pipelined/src/fpu/roundsign.sv b/pipelined/src/fpu/roundsign.sv index 55e322bc3..acecb5947 100644 --- a/pipelined/src/fpu/roundsign.sv +++ b/pipelined/src/fpu/roundsign.sv @@ -38,23 +38,15 @@ module roundsign( input logic DivOp, input logic CvtOp, input logic CvtCs, + input logic FmaSs, output logic Ms ); - logic FmaResSgnTmp; logic Qs; - // is the result negitive - // if p - z is the Sum negitive - // if -p + z is the Sum positive - // if -p - z then the Sum is negitive - assign FmaResSgnTmp = FmaNegSum^FmaPs; //*** move to execute stage - - // assign FmaResSgnTmp = FmaInvA&(FmaAs)&FmaNegSum | FmaInvA&FmaPs&~FmaNegSum | (FmaAs&FmaPs); - assign Qs = Xs^Ys; // Sign for rounding calulation - assign Ms = (FmaResSgnTmp&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); + assign Ms = (FmaSs&FmaOp) | (CvtCs&CvtOp) | (Qs&DivOp); endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index b90c3d3de..be5114e95 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -94,6 +94,7 @@ module testbenchfp; // in-between FMA signals logic Mult; + logic Ss; logic [`NE+1:0] Pe; logic ZmSticky; logic KillProd; @@ -674,13 +675,13 @@ module testbenchfp; fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), .Xe(XExp), .Ye(YExp), .Ze(ZExp), .Xm(XMan), .Ym(YMan), .Zm(ZMan), - .XZero, .YZero, .ZZero, + .XZero, .YZero, .ZZero, .Ss, .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps, .Pe, .ZmSticky, .KillProd); postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), - .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), + .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss), .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), From 1e876733210f687163845bf6694a8b0fc59fe75c Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 18 Jul 2022 23:10:22 +0000 Subject: [PATCH 04/36] Cleaned up Coremark makefile --- benchmarks/coremark/Makefile | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 2db418aa3..d6cdc529e 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -1,6 +1,8 @@ +# Wally Coremark Makefile +# Daniel Torres & David Harris 28 July 2022 + PORT_DIR = $(CURDIR)/riscv64-baremetal cmbase=../../addins/coremark -# cmbase= ../riscv-coremark/coremark work_dir= ../../benchmarks/coremark/work XLEN ?=64 sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ @@ -14,17 +16,7 @@ PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=$(ARCH) -static -falign- -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ - -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DXLEN=$(XLEN) - -# flags that cause build errors mcmodel=medlow - -# -static -mcmodel=medlow -mtune=sifive-7-series \ -# -O3 -falign-functions=16 -funroll-all-loops -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -# -finline-functions -falign-jumps=4 \ -# -nostdlib -nostartfiles -ffreestanding -mstrict-align \ -# -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \ -# -DPERFORMANCE_RUN=1 -# "-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " + -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DXLEN=$(XLEN) -DITERATIONS=10 all: $(work_dir)/coremark.bare.riscv.elf.memfile @@ -38,10 +30,8 @@ $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv extractFunctionRadix.sh $<.elf.objdump $(work_dir)/coremark.bare.riscv: $(sources) Makefile - # These flags were used by WD on CoreMark make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="$(PORT_CFLAGS)" - # -fno-toplevel-reorder --param=max-inline-insns-size=128 " # adding this bit caused a compiler error - mkdir -p $(work_dir) + mkdir -p $(work_dir) mv $(cmbase)/coremark.bare.riscv $(work_dir) .PHONY: clean @@ -49,15 +39,4 @@ $(work_dir)/coremark.bare.riscv: $(sources) Makefile clean: rm -f $(work_dir)/* - - -# # PORT_CFLAGS = -g -march=$(XLEN)im -mabi=$(ABI) -static -mcmodel=medlow -mtune=sifive-3-series \ -# # -O3 -falign-functions=16 -funroll-all-loops \ -# # -finline-functions -falign-jumps=4 \ -# # -nostdlib -nostartfiles -ffreestanding -mstrict-align \ -# # -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 \ -# # -DPERFORMANCE_RUN=1 - -# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv$(XLEN)im -mabi=$(ABI) -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " -# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=/opt/riscv/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fno-toplevel-reorder --param=max-inline-insns-size=128 -fipa-pta" -# make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="-march=rv64imd -mabi=lp64d -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-7-series -Ofast -funroll-all-loops -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta " \ No newline at end of file + \ No newline at end of file From 630110e73ed0cb72e303eb3a9eedad62aa5f1d8d Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 18 Jul 2022 16:48:13 -0700 Subject: [PATCH 05/36] Coremark cleanup --- benchmarks/coremark/Makefile | 6 +++--- benchmarks/coremark/riscv64-baremetal/core_portme.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index d6cdc529e..3f94a7b8b 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -16,7 +16,7 @@ PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=$(ARCH) -static -falign- -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ - -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DXLEN=$(XLEN) -DITERATIONS=10 + -DTOTAL_DATA_SIZE=2000 -DMAIN_HAS_NOARGC=1 -DPERFORMANCE_RUN=1 -DITERATIONS=10 -DXLEN=$(XLEN) all: $(work_dir)/coremark.bare.riscv.elf.memfile @@ -31,7 +31,7 @@ $(work_dir)/coremark.bare.riscv.elf.memfile: $(work_dir)/coremark.bare.riscv $(work_dir)/coremark.bare.riscv: $(sources) Makefile make -C $(cmbase) PORT_DIR=$(PORT_DIR) compile RISCV=$(RISCV)/riscv-gnu-toolchain XCFLAGS="$(PORT_CFLAGS)" - mkdir -p $(work_dir) + mkdir -p $(work_dir) mv $(cmbase)/coremark.bare.riscv $(work_dir) .PHONY: clean @@ -39,4 +39,4 @@ $(work_dir)/coremark.bare.riscv: $(sources) Makefile clean: rm -f $(work_dir)/* - \ No newline at end of file + diff --git a/benchmarks/coremark/riscv64-baremetal/core_portme.h b/benchmarks/coremark/riscv64-baremetal/core_portme.h index 3146d3ec0..ce88f7239 100755 --- a/benchmarks/coremark/riscv64-baremetal/core_portme.h +++ b/benchmarks/coremark/riscv64-baremetal/core_portme.h @@ -94,7 +94,7 @@ typedef clock_t CORE_TICKS; #define COMPILER_FLAGS FLAGS_STR /* "Please put compiler flags here (e.g. -o3)" */ #endif #ifndef MEM_LOCATION - #define MEM_LOCATION "Please put data memory location here\n\t\t\t(e.g. code in flash, data on heap etc)" + #define MEM_LOCATION "Code and Data in external RAM" #define MEM_LOCATION_UNSPEC 1 #endif From 64b3e4117b3aa0b7d20e45ef713214c86eea0385 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 19 Jul 2022 00:04:24 +0000 Subject: [PATCH 06/36] reworked fmashiftcalc to match book --- pipelined/src/fpu/fmashiftcalc.sv | 69 +++++++++++++++++-------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index a6c1a1c60..6d1c40bc7 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -43,6 +43,7 @@ module fmashiftcalc( ); logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later logic [`NE+1:0] NormSumExp; // the exponent of the normalized sum with the `FLEN bias + logic [`NE+1:0] BiasCorr; /////////////////////////////////////////////////////////////////////////////// // Normalization @@ -50,37 +51,40 @@ module fmashiftcalc( //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero assign FmaSZero = ~(|FmaSm); - + logic [`NE+1:0] FmaSe; + assign FmaSe = FmaKillProd ? {2'b0, Ze} : FmaPe; // calculate the sum's exponent - // ProdExp - NormCnt - 1 + NF+4 = ProdExp + ~NormCnt + 1 - 1 + NF+4 = ProdExp + ~NormCnt + NF+4 - assign NormSumExp = (FmaKillProd ? {2'b0, Ze} : FmaPe) + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); + assign NormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin assign FmaNe = NormSumExp; end else if (`FPSIZES == 2) begin - assign FmaNe = Fmt ? NormSumExp : (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; - + assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS); + assign FmaNe = NormSumExp+BiasCorr; + end else if (`FPSIZES == 3) begin always_comb begin case (Fmt) - `FMT: FmaNe = NormSumExp; - `FMT1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS1))&{`NE+2{|NormSumExp}}; - `FMT2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`BIAS2))&{`NE+2{|NormSumExp}}; - default: FmaNe = {`NE+2{1'bx}}; + `FMT: BiasCorr = '0; + `FMT1: BiasCorr = (`NE+2)'(`BIAS1-`BIAS); + `FMT2: BiasCorr = (`NE+2)'(`BIAS2-`BIAS); + default: BiasCorr = 'x; endcase end + assign FmaNe = NormSumExp+BiasCorr; end else if (`FPSIZES == 4) begin always_comb begin case (Fmt) - 2'h3: FmaNe = NormSumExp; - 2'h1: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`D_BIAS))&{`NE+2{|NormSumExp}}; - 2'h0: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`S_BIAS))&{`NE+2{|NormSumExp}}; - 2'h2: FmaNe = (NormSumExp-(`NE+2)'(`BIAS)+(`NE+2)'(`H_BIAS))&{`NE+2{|NormSumExp}}; + 2'h3: BiasCorr = '0; + 2'h1: BiasCorr = (`NE+2)'(`D_BIAS-`Q_BIAS); + 2'h0: BiasCorr = (`NE+2)'(`S_BIAS-`Q_BIAS); + 2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS); endcase end + assign FmaNe = NormSumExp+BiasCorr; end @@ -89,25 +93,25 @@ module fmashiftcalc( if (`FPSIZES == 1) begin logic Sum0LEZ, Sum0GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); + assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; end else if (`FPSIZES == 2) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp; + assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); + assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp; assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; end else if (`FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF)-(`NE+2)'(2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF1+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS1)) | ~|NormSumExp; - assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)); - assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF2+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`BIAS2)) | ~|NormSumExp; + assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); + assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp; + assign Sum2LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2)); + assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|NormSumExp; always_comb begin case (Fmt) `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; @@ -120,20 +124,20 @@ module fmashiftcalc( end else if (`FPSIZES == 4) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL; assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`NF )-(`NE+2)'(2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`D_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`D_BIAS)) | ~|NormSumExp; - assign Sum2LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)); - assign Sum2GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`S_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`S_BIAS)) | ~|NormSumExp; - assign Sum3LEZ = $signed(NormSumExp) <= $signed( (`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)); - assign Sum3GEFL = $signed(NormSumExp) >= $signed(-(`NE+2)'(`H_NF+2)+(`NE+2)'(`BIAS)-(`NE+2)'(`H_BIAS)) | ~|NormSumExp; + assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS)); + assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|NormSumExp; + assign Sum2LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS)); + assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|NormSumExp; + assign Sum3LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS)); + assign Sum3GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|NormSumExp; always_comb begin case (Fmt) 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; 2'h1: FmaPreResultDenorm = Sum1LEZ & Sum1GEFL & ~FmaSZero; 2'h0: FmaPreResultDenorm = Sum2LEZ & Sum2GEFL & ~FmaSZero; 2'h2: FmaPreResultDenorm = Sum3LEZ & Sum3GEFL & ~FmaSZero; - endcase // *** remove checking to see if it's underflowed and only check for less than zero for denorm checking + endcase end end @@ -152,5 +156,8 @@ module fmashiftcalc( // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; - assign FmaShiftAmt = FmaNCnt+DenormShift; + if (`FPSIZES == 1) + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaNCnt+1; + else + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaNCnt+1; endmodule From 514674417ed91fb5b0c94c1750d156b3b1d626b6 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 19 Jul 2022 01:10:10 +0000 Subject: [PATCH 07/36] moved Se into execute stage --- pipelined/src/fpu/fma.sv | 7 +++- pipelined/src/fpu/fmashiftcalc.sv | 61 ++++++++++++++-------------- pipelined/src/fpu/fpu.sv | 11 ++--- pipelined/src/fpu/postprocess.sv | 15 +++---- pipelined/src/fpu/resultsign.sv | 4 +- pipelined/src/fpu/round.sv | 6 +-- pipelined/src/fpu/shiftcorrection.sv | 6 +-- pipelined/testbench/testbench-fp.sv | 5 ++- 8 files changed, 61 insertions(+), 54 deletions(-) diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 3cd128301..3f4cc2ac5 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -45,6 +45,7 @@ module fma( output logic As, // the aligned addend's sign (modified Z sign for other opperations) output logic Ps, // the product's sign output logic Ss, // the sum's sign + output logic [`NE+1:0] Se, output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count ); @@ -82,7 +83,7 @@ module fma( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.Am, .Pm, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Ss); + add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); endmodule @@ -223,11 +224,14 @@ module add( input logic Ps, As,// the product sign and the alligend addeded's sign (Modified Z sign for other opperations) input logic KillProd, // should the product be set to 0 input logic ZmSticky, + input logic [`NE-1:0] Ze, + input logic [`NE+1:0] Pe, output logic [3*`NF+6:0] AmInv, // aligned addend possibly inverted output logic [2*`NF+1:0] PmKilled, // the product's mantissa possibly killed output logic NegSum, // was the sum negitive output logic InvA, // do you invert the aligned addend output logic Ss, + output logic [`NE+1:0] Se, output logic [3*`NF+5:0] Sm // the positive sum ); logic [3*`NF+6:0] PreSum, NegPreSum; // possibly negitive sum @@ -264,6 +268,7 @@ module add( // if -p + z is the Sum positive // if -p - z then the Sum is negitive assign Ss = NegSum^Ps; //*** move to execute stage + assign Se = KillProd ? {2'b0, Ze} : Pe; endmodule diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 6d1c40bc7..79953b212 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -35,14 +35,15 @@ module fmashiftcalc( input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic FmaKillProd, // is the product set to zero - output logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results + input logic [`NE+1:0] FmaSe, + output logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results output logic FmaSZero, // is the result denormalized - calculated before LZA corection output logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count output logic [3*`NF+8:0] FmaShiftIn // is the sum zero ); logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later - logic [`NE+1:0] NormSumExp; // the exponent of the normalized sum with the `FLEN bias + logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias logic [`NE+1:0] BiasCorr; /////////////////////////////////////////////////////////////////////////////// @@ -51,18 +52,16 @@ module fmashiftcalc( //*** insert bias-bias simplification in fcvt.sv/phone pictures // Determine if the sum is zero assign FmaSZero = ~(|FmaSm); - logic [`NE+1:0] FmaSe; - assign FmaSe = FmaKillProd ? {2'b0, Ze} : FmaPe; // calculate the sum's exponent - assign NormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); + assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin - assign FmaNe = NormSumExp; + assign NormSumExp = PreNormSumExp; end else if (`FPSIZES == 2) begin assign BiasCorr = Fmt ? (`NE+2)'(0) : (`NE+2)'(`BIAS1-`BIAS); - assign FmaNe = NormSumExp+BiasCorr; + assign NormSumExp = PreNormSumExp+BiasCorr; end else if (`FPSIZES == 3) begin always_comb begin @@ -73,7 +72,7 @@ module fmashiftcalc( default: BiasCorr = 'x; endcase end - assign FmaNe = NormSumExp+BiasCorr; + assign NormSumExp = PreNormSumExp+BiasCorr; end else if (`FPSIZES == 4) begin always_comb begin @@ -84,7 +83,7 @@ module fmashiftcalc( 2'h2: BiasCorr = (`NE+2)'(`H_BIAS-`Q_BIAS); endcase end - assign FmaNe = NormSumExp+BiasCorr; + assign NormSumExp = PreNormSumExp+BiasCorr; end @@ -92,26 +91,26 @@ module fmashiftcalc( if (`FPSIZES == 1) begin logic Sum0LEZ, Sum0GEFL; - assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); assign FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; end else if (`FPSIZES == 2) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL; - assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp; + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp; assign FmaPreResultDenorm = (Fmt ? Sum0LEZ : Sum1LEZ) & (Fmt ? Sum0GEFL : Sum1GEFL) & ~FmaSZero; end else if (`FPSIZES == 3) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL; - assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|NormSumExp; - assign Sum2LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2)); - assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|NormSumExp; + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS1)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF1-2+`BIAS-`BIAS1)) | ~|PreNormSumExp; + assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`BIAS2)); + assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF2-2+`BIAS-`BIAS2)) | ~|PreNormSumExp; always_comb begin case (Fmt) `FMT: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; @@ -123,14 +122,14 @@ module fmashiftcalc( end else if (`FPSIZES == 4) begin logic Sum0LEZ, Sum0GEFL, Sum1LEZ, Sum1GEFL, Sum2LEZ, Sum2GEFL, Sum3LEZ, Sum3GEFL; - assign Sum0LEZ = NormSumExp[`NE+1] | ~|NormSumExp; - assign Sum0GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`NF-2)); - assign Sum1LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS)); - assign Sum1GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|NormSumExp; - assign Sum2LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS)); - assign Sum2GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|NormSumExp; - assign Sum3LEZ = $signed(NormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS)); - assign Sum3GEFL = $signed(NormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|NormSumExp; + assign Sum0LEZ = PreNormSumExp[`NE+1] | ~|PreNormSumExp; + assign Sum0GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`NF-2)); + assign Sum1LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`D_BIAS)); + assign Sum1GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`D_NF-2+`BIAS-`D_BIAS)) | ~|PreNormSumExp; + assign Sum2LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`S_BIAS)); + assign Sum2GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`S_NF-2+`BIAS-`S_BIAS)) | ~|PreNormSumExp; + assign Sum3LEZ = $signed(PreNormSumExp) <= $signed((`NE+2)'(`BIAS-`H_BIAS)); + assign Sum3GEFL = $signed(PreNormSumExp) >= $signed((`NE+2)'(-`H_NF-2+`BIAS-`H_BIAS)) | ~|PreNormSumExp; always_comb begin case (Fmt) 2'h3: FmaPreResultDenorm = Sum0LEZ & Sum0GEFL & ~FmaSZero; @@ -148,11 +147,11 @@ module fmashiftcalc( // - if kill prod dont add to exp // Determine if the result is denormal - // assign FmaPreResultDenorm = $signed(FmaNe)<=0 & ($signed(FmaNe)>=$signed(-FracLen)) & ~FmaSZero; + // assign FmaPreResultDenorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero; // Determine the shift needed for denormal results // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = FmaPreResultDenorm ? FmaNe[$clog2(3*`NF+7)-1:0] : 1; + assign DenormShift = FmaPreResultDenorm ? NormSumExp[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index e7888551c..a9c0ac247 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -109,6 +109,7 @@ module fpu ( logic [3*`NF+5:0] SumE, SumM; logic [`NE+1:0] ProdExpE, ProdExpM; logic AddendStickyE, AddendStickyM; + logic [`NE+1:0] SeE,SeM; logic KillProdE, KillProdM; logic InvAE, InvAM; logic NegSumE, NegSumM; @@ -256,7 +257,7 @@ module fpu ( .Xm(XManE), .Ym(YManE), .Zm(ZManE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .FOpCtrl(FOpCtrlE), .Fmt(FmtE), - .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), + .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Se(SeE), .Sm(SumE), .Pe(ProdExpE), .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), .ZmSticky(AddendStickyE), .KillProd(KillProdE)); @@ -335,9 +336,9 @@ module fpu ( {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); - flopenrc #($clog2(3*`NF+7)+7) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE}, - {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM}); + flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, + {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE, SeE}, + {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM, SeM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); @@ -359,7 +360,7 @@ module fpu ( postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM), .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), - .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), + .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .FmaSe(SeM), .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 7e741abbb..de3c4f30c 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -46,6 +46,7 @@ module postprocess ( //fma signals input logic FmaAs, // the modified Z sign - depends on instruction input logic FmaPs, // the product's sign + input logic [`NE+1:0] FmaSe, input logic [`NE+1:0] FmaPe, // Product exponent input logic [3*`NF+5:0] FmaSm, // the positive sum input logic FmaZmS, // sticky bit that is calculated during alignment @@ -93,10 +94,10 @@ module postprocess ( logic UfL; logic [`FMTBITS-1:0] OutFmt; // fma signals - logic [`NE+1:0] FmaSe; // exponent of the normalized sum + logic [`NE+1:0] FmaMe; // exponent of the normalized sum logic FmaSZero; // is the sum zero logic [3*`NF+8:0] FmaShiftIn; // shift input - logic [`NE+1:0] FmaNe; // exponent of the normalized sum not taking into account denormal or zero results + logic [`NE+1:0] NormSumExp; // exponent of the normalized sum not taking into account denormal or zero results logic FmaPreResultDenorm; // is the result denormalized - calculated before LZA corection logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count // division singals @@ -151,7 +152,7 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .FmaNe, + fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); @@ -182,9 +183,9 @@ module postprocess ( normshift normshift (.ShiftIn, .ShiftAmt, .Shifted); - shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .FmaNe, + shiftcorrection shiftcorrection(.FmaOp, .FmaPreResultDenorm, .NormSumExp, .DivResDenorm, .DivDenormShift, .DivOp, .DivQe, - .Qe, .FmaSZero, .Shifted, .FmaSe, .Mf); + .Qe, .FmaSZero, .Shifted, .FmaMe, .Mf); /////////////////////////////////////////////////////////////////////////////// // Rounding @@ -201,7 +202,7 @@ module postprocess ( .FmaSs, .Xs, .Ys, .CvtCs, .Ms); round round(.OutFmt, .Frm, .S, .FmaZmS, .Plus1, .PostProcSel, .CvtCe, .Qe, - .Ms, .FmaSe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, + .Ms, .FmaMe, .FmaOp, .CvtOp, .CvtResDenormUf, .Mf, .ToInt, .CvtResUf, .DivS, .DivDone, .DivOp, .UfPlus1, .FullRe, .Rf, .Re, .R, .UfL, .Me); @@ -209,7 +210,7 @@ module postprocess ( // Sign calculation /////////////////////////////////////////////////////////////////////////////// - resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaSe, .R, .S, + resultsign resultsign(.Frm, .FmaPs, .FmaAs, .FmaMe, .R, .S, .FmaOp, .ZInf, .InfIn, .FmaSZero, .Mult, .Ms, .Ws); /////////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index e1ea5e410..b8019b98b 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -34,7 +34,7 @@ module resultsign( input logic ZInf, input logic InfIn, input logic FmaOp, - input logic [`NE+1:0] FmaSe, + input logic [`NE+1:0] FmaMe, input logic FmaSZero, input logic Mult, input logic R, @@ -50,7 +50,7 @@ module resultsign( // if cancelation then 0 unless round to -infinity // if multiply then Psgn // otherwise psign - assign Zeros = (FmaPs^FmaAs)&~(FmaSe[`NE+1] | ((FmaSe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs; + assign Zeros = (FmaPs^FmaAs)&~(FmaMe[`NE+1] | ((FmaMe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // is the result negitive diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 6132dba4a..0943413bd 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -48,7 +48,7 @@ module round( input logic CvtResUf, input logic [`CORRSHIFTSZ-1:0] Mf, input logic FmaZmS, // addend's sticky bit - input logic [`NE+1:0] FmaSe, // exponent of the normalized sum + input logic [`NE+1:0] FmaMe, // exponent of the normalized sum input logic Ms, // the result's sign input logic [`NE:0] CvtCe, // the calculated expoent input logic [`NE+1:0] Qe, // the calculated expoent @@ -176,7 +176,7 @@ module round( // only add the Addend sticky if doing an FMA opperation // - the shifter shifts too far left when there's an underflow (shifting out all possible sticky bits) - assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaSe[`NE+1]&FmaOp | DivS&DivOp; + assign UfS = FmaZmS&FmaOp | NormS | CvtResUf&CvtOp | FmaMe[`NE+1]&FmaOp | DivS&DivOp; // determine round and LSB of the rounded value // - underflow round bit is used to determint the underflow flag @@ -299,7 +299,7 @@ module round( always_comb case(PostProcSel) - 2'b10: Me = FmaSe; // fma + 2'b10: Me = FmaMe; // fma 2'b00: Me = {CvtCe[`NE], CvtCe}&{`NE+2{~CvtResDenormUf|CvtResUf}}; // cvt 2'b01: Me = DivDone ? Qe : '0; // divide default: Me = '0; diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index 71a2393a6..50cffb078 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -35,12 +35,12 @@ module shiftcorrection( input logic DivResDenorm, input logic [`NE+1:0] DivQe, input logic [`NE+1:0] DivDenormShift, - input logic [`NE+1:0] FmaNe, // exponent of the normalized sum not taking into account denormal or zero results + input logic [`NE+1:0] NormSumExp, // exponent of the normalized sum not taking into account denormal or zero results input logic FmaPreResultDenorm, // is the result denormalized - calculated before LZA corection input logic FmaSZero, output logic [`CORRSHIFTSZ-1:0] Mf, // the shifted sum before LZA correction output logic [`NE+1:0] Qe, - output logic [`NE+1:0] FmaSe // exponent of the normalized sum + output logic [`NE+1:0] FmaMe // exponent of the normalized sum ); logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction logic [`CORRSHIFTSZ-1:0] CorrQuotShifted; @@ -58,7 +58,7 @@ module shiftcorrection( assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 - assign FmaSe = (FmaNe+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &FmaNe&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; + assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &NormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; // recalculate if the result is denormalized assign ResDenorm = FmaPreResultDenorm&~Shifted[`NORMSHIFTSZ-3]&~Shifted[`NORMSHIFTSZ-2]; diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index be5114e95..91ce82616 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -96,6 +96,7 @@ module testbenchfp; logic Mult; logic Ss; logic [`NE+1:0] Pe; + logic [`NE+1:0] Se; logic ZmSticky; logic KillProd; logic [$clog2(3*`NF+7)-1:0] NCnt; @@ -675,7 +676,7 @@ module testbenchfp; fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), .Xe(XExp), .Ye(YExp), .Ze(ZExp), .Xm(XMan), .Ym(YMan), .Zm(ZMan), - .XZero, .YZero, .ZZero, .Ss, + .XZero, .YZero, .ZZero, .Ss, .Se, .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps, .Pe, .ZmSticky, .KillProd); @@ -686,7 +687,7 @@ module testbenchfp; .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, - .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, + .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSe(Se), .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); From b13b49658bb6906378910ab7ca95beebee54d8b1 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 19 Jul 2022 02:58:07 +0000 Subject: [PATCH 08/36] Removed duplicate -march from CoreMark makefile --- benchmarks/coremark/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 3f94a7b8b..1b619d80e 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -10,8 +10,8 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) -ARCH := rv$(XLEN)im -PORT_CFLAGS = -g -march=rv$(XLEN)im -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ +ARCH := rv$(XLEN)i +PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -funroll-all-loops -finline-functions -falign-jumps=4 \ -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ From 37bf837d48deb9cef6839d13d6405cfb128ea5c7 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Tue, 19 Jul 2022 08:59:16 -0700 Subject: [PATCH 09/36] fixed GPIO test by adding a new function to clear PLIC interrupts --- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 39 +++++++++++++++++++ .../rv32i_m/privilege/src/WALLY-gpio-01.S | 4 ++ 2 files changed, 43 insertions(+) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index 0caad5d0b..7e112c917 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -973,6 +973,45 @@ readsip_test: // read the MIP into the signature addi a6, a6, 4 j test_loop // go to next test case +claim_m_plic_interrupts: // clears one non-pending PLIC interrupt + li t2, 0x0C002000 + li t3, 0x0C200004 + li t4, 0xFFF + lw t6, 0(t2) // save current enable status + sw t4, 0(t2) // enable all relevant interrupts on PLIC + lw t5, 0(t3) // make PLIC claim + sw t5, 0(t3) // complete claim made + sw t6, 0(t2) // restore saved enable status + j test_loop + +claim_s_plic_interrupts: // clears one non-pending PLIC interrupt + li t2, 0x0C002080 + li t3, 0x0C201004 + li t4, 0xFFF + lw t6, 0(t2) // save current enable status + sw t4, 0(t2) // enable all relevant interrupts on PLIC + lw t5, 0(t3) // make PLIC claim + sw t5, 0(t3) // complete claim made + sw t6, 0(t2) // restore saved enable status + j test_loop + +uart_data_wait: + li t2, 0x10000005 // LSR + li t3, 0x10000002 // IIR + lb t4, 0(t3) // save IIR before potential clear + lb t5, 0(t2) + andi t5, t5, 1 // only care if data is ready + li t6, 1 + beq t5, t6, uart_data_ready + j uart_data_wait + +uart_data_ready: + sb t4, 0(t1) + sb t5, 1(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop + goto_s_mode: // return to address in t3, li a0, 3 // Trap handler behavior (go to supervisor mode) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index 1b3bbdb47..8fb801feb 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -140,18 +140,22 @@ SETUP_PLIC .4byte high_ie, 0x00020000, write32_test # enable high interrupt on bit 17, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised .4byte high_ie, 0x00000000, write32_test # disable high interrupt on bit 17 +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear PLIC pending interrupts .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte low_ie, 0x00010000, write32_test # enable low interrupt on bit 16, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised .4byte low_ie, 0x00000000, write32_test # disable low interrupt on bit 16 +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear PLIC pending interrupts .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte rise_ie, 0x00200000, write32_test # enable rise interrupt on bit 21, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised .4byte rise_ie, 0x00000000, write32_test # disable rise interrupt on bit 21, which is pending +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear PLIC pending interrupts .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte fall_ie, 0x01000000, write32_test # enable high interrupt on bit 24, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised .4byte fall_ie, 0x00000000, write32_test # disable high interrupt on bit 24, which is pending +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear PLIC pending interrupts .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte 0x0, 0x0, terminate_test # terminate tests From 0668659ac9dedc3990e3cfc37db9365d7b59e16c Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Tue, 19 Jul 2022 13:17:02 -0700 Subject: [PATCH 10/36] made changes to makefile, now builds fastest version (RV64im) by default. Also removed redundent CFLAG funroll-all-loops (was duplicated) --- benchmarks/coremark/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/coremark/Makefile b/benchmarks/coremark/Makefile index 1b619d80e..e41865986 100644 --- a/benchmarks/coremark/Makefile +++ b/benchmarks/coremark/Makefile @@ -10,9 +10,9 @@ sources=$(cmbase)/core_main.c $(cmbase)/core_list_join.c $(cmbase)/coremark.h \ $(PORT_DIR)/core_portme.h $(PORT_DIR)/core_portme.c $(PORT_DIR)/core_portme.mak \ $(PORT_DIR)/crt.S $(PORT_DIR)/encoding.h $(PORT_DIR)/util.h $(PORT_DIR)/syscalls.c ABI := $(if $(findstring "64","$(XLEN)"),lp64,ilp32) -ARCH := rv$(XLEN)i +ARCH := rv$(XLEN)im PORT_CFLAGS = -g -mabi=$(ABI) -march=$(ARCH) -static -falign-functions=16 \ - -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -funroll-all-loops -finline-functions -falign-jumps=4 \ + -mbranch-cost=1 -DSKIP_DEFAULT_MEMSET -mtune=sifive-3-series -O3 -finline-functions -falign-jumps=4 \ -fno-delete-null-pointer-checks -fno-rename-registers --param=loop-max-datarefs-for-datadeps=0 \ -funroll-all-loops --param=uninlined-function-insns=8 -fno-tree-vrp -fwrapv -fipa-pta \ -nostdlib -nostartfiles -ffreestanding -mstrict-align \ From 5b1adc7a675f2ddaa85e57c64a19ce3df96b4e03 Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Tue, 19 Jul 2022 13:36:18 -0700 Subject: [PATCH 11/36] commented out embench 2.0 tests --- pipelined/testbench/tests.vh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 4b1b9a160..8a2765701 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -49,24 +49,24 @@ string tvpaths[] = '{ `EMBENCH, "bd_speedopt_speed/src/aha-mont64/aha-mont64", "bd_speedopt_speed/src/crc32/crc32", - "bd_speedopt_speed/src/cubic/cubic", + "bd_speedopt_speed/src/cubic/cubic", // cubic is likely going to removed when embench 2.0 launches "bd_speedopt_speed/src/edn/edn", "bd_speedopt_speed/src/huffbench/huffbench", "bd_speedopt_speed/src/matmult-int/matmult-int", - "bd_speedopt_speed/src/md5sum/md5sum", + // "bd_speedopt_speed/src/md5sum/md5sum", //commenting out tests from embench 2.0. When embench 2.0 launches stabilty, add these tests back "bd_speedopt_speed/src/minver/minver", "bd_speedopt_speed/src/nbody/nbody", "bd_speedopt_speed/src/nettle-aes/nettle-aes", "bd_speedopt_speed/src/nettle-sha256/nettle-sha256", "bd_speedopt_speed/src/nsichneu/nsichneu", "bd_speedopt_speed/src/picojpeg/picojpeg", - "bd_speedopt_speed/src/primecount/primecount", + // "bd_speedopt_speed/src/primecount/primecount", "bd_speedopt_speed/src/qrduino/qrduino", "bd_speedopt_speed/src/sglib-combined/sglib-combined", "bd_speedopt_speed/src/slre/slre", "bd_speedopt_speed/src/st/st", "bd_speedopt_speed/src/statemate/statemate", - "bd_speedopt_speed/src/tarfind/tarfind", + // "bd_speedopt_speed/src/tarfind/tarfind", "bd_speedopt_speed/src/ud/ud", "bd_speedopt_speed/src/wikisort/wikisort", "bd_sizeopt_speed/src/aha-mont64/aha-mont64", @@ -75,7 +75,7 @@ string tvpaths[] = '{ "bd_sizeopt_speed/src/edn/edn", "bd_sizeopt_speed/src/huffbench/huffbench", "bd_sizeopt_speed/src/matmult-int/matmult-int", - "bd_sizeopt_speed/src/md5sum/md5sum", + // "bd_sizeopt_speed/src/md5sum/md5sum", "bd_sizeopt_speed/src/minver/minver", "bd_sizeopt_speed/src/nbody/nbody", "bd_sizeopt_speed/src/nettle-aes/nettle-aes", @@ -88,7 +88,7 @@ string tvpaths[] = '{ "bd_sizeopt_speed/src/slre/slre", "bd_sizeopt_speed/src/st/st", "bd_sizeopt_speed/src/statemate/statemate", - "bd_sizeopt_speed/src/tarfind/tarfind", + // "bd_sizeopt_speed/src/tarfind/tarfind", "bd_sizeopt_speed/src/ud/ud", "bd_sizeopt_speed/src/wikisort/wikisort" }; From 20800b2714d9d95daa4c8b1f81ec5a29767fa0cc Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Tue, 19 Jul 2022 15:16:12 -0700 Subject: [PATCH 12/36] embench no longer launches run automatiacally, need to use make run --- benchmarks/embench/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index 255ea8be5..630864ded 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -4,7 +4,8 @@ embench_dir = ../../addins/embench-iot -all: build sim size +all: build +run: sim size allClean: clean all From d61f84e7510aa98ad3aae1af8d45994d532ad74f Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 19 Jul 2022 23:44:37 +0000 Subject: [PATCH 13/36] oprimized zeros and replaced complex ?: with always_comb --- pipelined/src/fpu/fcvt.sv | 21 ++++++--- pipelined/src/fpu/fmashiftcalc.sv | 4 -- pipelined/src/fpu/negateintres.sv | 9 +++- pipelined/src/fpu/resultsign.sv | 25 ++++++++--- pipelined/src/fpu/shiftcorrection.sv | 5 ++- pipelined/src/fpu/specialcase.sv | 65 ++++++++++++++++++---------- 6 files changed, 85 insertions(+), 44 deletions(-) diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index b9932523a..9d7f2d62d 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -103,7 +103,7 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign LzcInFull = IntToFp ? {1'b0, TrimInt, {`CVTLEN-`XLEN{1'b0}}} : + assign LzcInFull = IntToFp ? {TrimInt, {`CVTLEN-`XLEN+1{1'b0}}} : {Xm, {`CVTLEN-`NF{1'b0}}}; assign LzcIn = LzcInFull[`CVTLEN-1:0]; @@ -125,9 +125,10 @@ module fcvt ( // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - assign ShiftAmt = ToInt ? Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}} : - ResDenormUf&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0] : - (LeadingZeros); + always_comb + if(ToInt) ShiftAmt = Ce[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~Ce[`NE]}}; + else if (ResDenormUf&~IntToFp) ShiftAmt = (`LOGCVTLEN)'(`NF-1)+Ce[`LOGCVTLEN-1:0]; + else ShiftAmt = LeadingZeros; /////////////////////////////////////////////////////////////////////////// // exp calculations @@ -150,7 +151,9 @@ module fcvt ( assign NewBias = ToInt ? (`NE-1)'(1) : (`NE-1)'(`BIAS); end else if (`FPSIZES == 2) begin - assign NewBias = ToInt ? (`NE-1)'(1) : OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + logic [`NE-2:0] NewBiasToFp; + assign NewBiasToFp = OutFmt ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); + assign NewBias = ToInt ? (`NE-1)'(1) : NewBiasToFp; end else if (`FPSIZES == 3) begin logic [`NE-2:0] NewBiasToFp; @@ -177,7 +180,7 @@ module fcvt ( // select the old exponent // int -> fp : largest bias + XLEN // fp -> ??? : XExp - assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN) : Xe; + assign OldExp = IntToFp ? (`NE)'(`BIAS)+(`NE)'(`XLEN-1) : Xe; // calculate CalcExp // fp -> fp : @@ -222,7 +225,11 @@ module fcvt ( // - if 64-bit : check the msb of the 64-bit integer input and if it's signed // - if 32-bit : check the msb of the 32-bit integer input and if it's signed // - otherwise: the floating point input's sign - assign Cs = IntToFp ? Int64 ? Int[`XLEN-1]&Signed : Int[31]&Signed : Xs; + always_comb + if(IntToFp) + if(Int64) Cs = Int[`XLEN-1]&Signed; + else Cs = Int[31]&Signed; + else Cs = Xs; endmodule diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index 79953b212..d598efb7e 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -42,7 +42,6 @@ module fmashiftcalc( output logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt, // normalization shift count output logic [3*`NF+8:0] FmaShiftIn // is the sum zero ); - logic [$clog2(3*`NF+7)-1:0] DenormShift; // right shift if the result is denormalized //***change this later logic [`NE+1:0] PreNormSumExp; // the exponent of the normalized sum with the `FLEN bias logic [`NE+1:0] BiasCorr; @@ -149,9 +148,6 @@ module fmashiftcalc( // Determine if the result is denormal // assign FmaPreResultDenorm = $signed(NormSumExp)<=0 & ($signed(NormSumExp)>=$signed(-FracLen)) & ~FmaSZero; - // Determine the shift needed for denormal results - // - if not denorm add 1 to shift out the leading 1 - assign DenormShift = FmaPreResultDenorm ? NormSumExp[$clog2(3*`NF+7)-1:0] : 1; // set and calculate the shift input and amount // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; diff --git a/pipelined/src/fpu/negateintres.sv b/pipelined/src/fpu/negateintres.sv index dde515b94..7a696b379 100644 --- a/pipelined/src/fpu/negateintres.sv +++ b/pipelined/src/fpu/negateintres.sv @@ -42,7 +42,12 @@ module negateintres( // round and negate the positive res if needed assign CvtNegRes = Xs ? -({2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}) : {2'b0, Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`XLEN]}+{{`XLEN+1{1'b0}}, Plus1}; - assign CvtNegResMsbs = Signed ? Int64 ? CvtNegRes[`XLEN:`XLEN-1] : CvtNegRes[32:31] : - Int64 ? CvtNegRes[`XLEN+1:`XLEN] : CvtNegRes[33:32]; + always_comb + if(Signed) + if(Int64) CvtNegResMsbs = CvtNegRes[`XLEN:`XLEN-1]; + else CvtNegResMsbs = CvtNegRes[32:31]; + else + if(Int64) CvtNegResMsbs = CvtNegRes[`XLEN+1:`XLEN]; + else CvtNegResMsbs = CvtNegRes[33:32]; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index b8019b98b..c2912ece7 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -46,11 +46,21 @@ module resultsign( logic Zeros; logic Infs; - // Determine the sign if the sum is zero - // if cancelation then 0 unless round to -infinity - // if multiply then Psgn - // otherwise psign - assign Zeros = (FmaPs^FmaAs)&~(FmaMe[`NE+1] | ((FmaMe == 0) & (R|S)))&~Mult ? Frm[1:0] == 2'b10 : FmaPs; + // The IEEE754-2019 standard specifies: + // - the sign of an exact zero sum (with operands of diffrent signs) should be positive unless rounding toward negitive infinity + // - when the exact result of an FMA opperation is non-zero, but is zero due to rounding, use the sign of the exact result + // - if x = +0 or -0 then x+x=x and x-(-x)=x + // - the sign of a product is the exclisive or or the opperand's signs + // Zero sign will only be selected if: + // - P=Z and a cancelation occurs - exact zero + // - Z is zero and P is zero - exact zero + // - P is killed and Z is zero - Psgn + // - Z is killed and P is zero - impossible + // Zero sign calculation: + // - if a multiply opperation is done, then use the products sign(Ps) + // - if the zero sum is not exactly zero i.e. R|S use the sign of the exact result (which is the product's sign) + // - if an effective addition occurs (P+A or -P+-A or P--A) then use the product's sign + assign Zeros = (FmaPs^FmaAs)&~(R|S)&~Mult ? Frm[1:0] == 2'b10 : FmaPs; // is the result negitive @@ -58,6 +68,9 @@ module resultsign( // if -p + z is the Sum positive // if -p - z then the Sum is negitive assign Infs = ZInf ? FmaAs : FmaPs; - assign Ws = InfIn&FmaOp ? Infs : FmaSZero&FmaOp ? Zeros : Ms; + always_comb + if(InfIn&FmaOp) Ws = Infs; + else if(FmaSZero&FmaOp) Ws = Zeros; + else Ws = Ms; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index 50cffb078..514edbee1 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -55,7 +55,10 @@ module shiftcorrection( // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits - assign Mf = FmaOp ? {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}} : DivOp&~DivResDenorm ? CorrQuotShifted : Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; + always_comb + if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}}; + else if (DivOp&~DivResDenorm) Mf = CorrQuotShifted; + else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 assign FmaMe = (NormSumExp+{{`NE+1{1'b0}}, LZAPlus1}+{{`NE{1'b0}}, LZAPlus2, 1'b0}+{{`NE+1{1'b0}}, ~ResDenorm&FmaPreResultDenorm}+{{`NE+1{1'b0}}, &NormSumExp&Shifted[3*`NF+6]}) & {`NE+2{~(FmaSZero|ResDenorm)}}; diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv index 3c28eae2e..6014962a1 100644 --- a/pipelined/src/fpu/specialcase.sv +++ b/pipelined/src/fpu/specialcase.sv @@ -95,9 +95,14 @@ module specialcase( end else begin assign InvalidRes = OutFmt ? {1'b0, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{`FLEN-`LEN1{1'b1}}, 1'b0, {`NE1{1'b1}}, 1'b1, (`NF1-1)'(0)}; end - - assign OfRes = OutFmt ? OfResMax ? {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {Ws, {`NE{1'b1}}, {`NF{1'b0}}} : - OfResMax ? {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}} : {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; + + always_comb + if(OutFmt) + if(OfResMax) OfRes = {Ws, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}}; + else OfRes = {Ws, {`NE{1'b1}}, {`NF{1'b0}}}; + else + if(OfResMax) OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1-1{1'b1}}, 1'b0, {`NF1{1'b1}}}; + else OfRes = {{`FLEN-`LEN1{1'b1}}, Ws, {`NE1{1'b1}}, (`NF1)'(0)}; assign UfRes = OutFmt ? {Ws, (`FLEN-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)} : {{`FLEN-`LEN1{1'b1}}, Ws, (`LEN1-2)'(0), Plus1&Frm[1]&~(DivOp&YInf)}; assign NormRes = OutFmt ? {Ws, Re, Rf} : {{`FLEN-`LEN1{1'b1}}, Ws, Re[`NE1-1:0], Rf[`NF-1:`NF-`NF1]}; @@ -234,20 +239,21 @@ module specialcase( assign KillRes = CvtOp ? (CvtResUf|(XZero&~IntToFp)|(IntZero&IntToFp)) : FullRe[`NE+1] | (((YInf&~XInf)|XZero)&DivOp);//Underflow & ~ResDenorm & (Re!=1); assign SelOfRes = Overflow|DivByZero|(InfIn&~(YInf&DivOp)); // output infinity with result sign if divide by zero - if(`IEEE754) begin - assign PostProcRes = XNaN&~(IntToFp&CvtOp) ? XNaNRes : - YNaN&~CvtOp ? YNaNRes : - ZNaN&FmaOp ? ZNaNRes : - Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end else begin - assign PostProcRes = NaNIn|Invalid ? InvalidRes : - SelOfRes ? OfRes : - KillRes ? UfRes : - NormRes; - end + if(`IEEE754) + always_comb + if(XNaN&~(IntToFp&CvtOp)) PostProcRes = XNaNRes; + else if(YNaN&~CvtOp) PostProcRes = YNaNRes; + else if(ZNaN&FmaOp) PostProcRes = ZNaNRes; + else if(Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; + else + always_comb + if(NaNIn|Invalid) PostProcRes = InvalidRes; + else if(SelOfRes) PostProcRes = OfRes; + else if(KillRes) PostProcRes = UfRes; + else PostProcRes = NormRes; /////////////////////////////////////////////////////////////////////////////////////// // @@ -272,10 +278,17 @@ module specialcase( // unsigned | 2^32-1 | 2^64-1 | // // other: 32 bit unsinged res should be sign extended as if it were a signed number - assign OfIntRes = Signed ? Xs&~XNaN ? Int64 ? {1'b1, {`XLEN-1{1'b0}}} : {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}} : // signed negitive - Int64 ? {1'b0, {`XLEN-1{1'b1}}} : {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}} : // signed positive - Xs&~XNaN ? {`XLEN{1'b0}} : // unsigned negitive - {`XLEN{1'b1}};// unsigned positive + always_comb + if(Signed) + if(Xs&~XNaN) // signed negitive + if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}}; + else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}}; + else // signed positive + if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}}; + else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}}; + else + if(Xs&~XNaN) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive + else OfIntRes = {`XLEN{1'b1}}; // unsigned positive // select the integer output @@ -284,7 +297,11 @@ module specialcase( // - if rounding and signed opperation and negitive input, output -1 // - otherwise output a rounded 0 // - otherwise output the normal res (trmined and sign extended if nessisary) - assign FCvtIntRes = IntInvalid ? OfIntRes : - CvtCe[`NE] ? Xs&Signed&Plus1 ? {{`XLEN{1'b1}}} : {{`XLEN-1{1'b0}}, Plus1} : //CalcExp has to come after invalid ***swap to actual mux at some point?? - Int64 ? CvtNegRes[`XLEN-1:0] : {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; + always_comb + if(IntInvalid) FCvtIntRes = OfIntRes; + else if(CvtCe[`NE]) + if(Xs&Signed&Plus1) FCvtIntRes = {{`XLEN{1'b1}}}; + else FCvtIntRes = {{`XLEN-1{1'b0}}, Plus1}; + else if(Int64) FCvtIntRes = CvtNegRes[`XLEN-1:0]; + else FCvtIntRes = {{`XLEN-32{CvtNegRes[31]}}, CvtNegRes[31:0]}; endmodule \ No newline at end of file From 0f94177765c6a47ff7c468d2136f180803de623d Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 20 Jul 2022 01:36:25 +0000 Subject: [PATCH 14/36] small changes --- pipelined/src/fpu/divsqrt.sv | 2 +- pipelined/src/fpu/srt.sv | 12 ++++++------ pipelined/srt/srt.sv | 29 +++++++++++++++++------------ 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index cbf7f95f0..ffc60026b 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -41,7 +41,7 @@ module divsqrt( input logic XNaNE, YNaNE, input logic DivStartE, input logic StallM, - input logic StallE, + input logic StallE, output logic DivStickyM, output logic DivBusy, output logic DivDone, diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 9e0315113..ee5ae9a39 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -34,18 +34,18 @@ module srt( input logic clk, input logic DivStart, input logic DivBusy, - input logic [`FMTBITS-1:0] FmtE, + input logic [`FMTBITS-1:0] FmtE, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, - input logic [`DIVLEN-1:0] X, - input logic [`DIVLEN-1:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - input logic NegSticky, + input logic [`DIVLEN-1:0] X, + input logic [`DIVLEN-1:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, output logic [`QLEN-1-(`RADIX/4):0] Quot, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] StickyWSA, output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`NE+1:0] DivCalcExpM, + output logic [`NE+1:0] DivCalcExpM, output logic [`XLEN-1:0] Rem ); diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 949335bf0..13a59d848 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -55,7 +55,7 @@ module srt ( logic qp, qz, qn; // quotient is +1, 0, or -1 logic [`NE-1:0] calcExp; logic calcSign; - logic [`DIVLEN+3:0] X, Dpreproc, C, F, AddIn; + logic [`DIVLEN+3:0] X, Dpreproc, C, F, S, SM, AddIn; logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; logic intSign; @@ -90,8 +90,9 @@ module srt ( // If only implementing division, use divide otfc // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot); // otherwise use sotfc - creg sotfcC(clk, Start, C); - sotfc2 sotfc2(clk, Start, qp, qn, C, Quot, F); + creg sotfcC(clk, Start, Sqrt, C); + sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, Quot, S, SM); + fsel2 fsel(qp, qn, C, S, SM, F); // Adder input selection assign AddIn = Sqrt ? F : Dsel; @@ -214,11 +215,16 @@ module fsel2 ( // Generate for both positive and negative bits assign FP = ~S & C; assign FN = SM | (C & (~C << 2)); - assign FZ = {(`DIVLEN+4){1'b0}}; + assign FZ = '0; // Choose which adder input will be used - assign F = sp ? FP : (sn ? FN : FZ); + always_comb + if (sp) F = FP; + else if (sn) F = FN; + else F = FZ; + + // assign F = sp ? FP : (sn ? FN : FZ); endmodule @@ -266,17 +272,18 @@ module sotfc2( input logic clk, input logic Start, input logic sp, sn, + input logic Sqrt, input logic [`DIVLEN+3:0] C, output logic [`DIVLEN-2:0] Sq, - output logic [`DIVLEN+3:0] F + output logic [`DIVLEN+3:0] S, SM ); // The on-the-fly converter transfers the square root // bits to the quotient as they come. // Use this otfc for division and square root. - logic [`DIVLEN+3:0] S, SM, SNext, SMNext, SMux; + logic [`DIVLEN+3:0] SNext, SMNext, SMux; flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); - mux2 #(`DIVLEN+4) Smux(SNext, {4'b0001, {(`DIVLEN){1'b0}}}, Start, SMux); + mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux); flop #(`DIVLEN+4) Sreg(clk, SMux, S); always_comb begin @@ -292,9 +299,6 @@ module sotfc2( end end assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; - - fsel2 fsel(sp, sn, C, S, SM, F); - endmodule ////////////////////////// @@ -302,11 +306,12 @@ endmodule ////////////////////////// module creg(input logic clk, input logic Start, + input logic Sqrt, output logic [`DIVLEN+3:0] C ); logic [`DIVLEN+3:0] CMux; - mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {6'b111111, {(`DIVLEN-2){1'b0}}}, Start, CMux); + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, Start, CMux); flop #(`DIVLEN+4) cflop(clk, CMux, C); endmodule From 36bd17984bf50b9da89424a69ba32d788ba45c49 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 20 Jul 2022 01:49:33 +0000 Subject: [PATCH 15/36] Reordered embench Makefile to run size tests first --- benchmarks/embench/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/embench/Makefile b/benchmarks/embench/Makefile index 630864ded..21c30cf6e 100644 --- a/benchmarks/embench/Makefile +++ b/benchmarks/embench/Makefile @@ -5,7 +5,7 @@ embench_dir = ../../addins/embench-iot all: build -run: sim size +run: size sim allClean: clean all From c3a4a2abdf14068e85653f22a6e8a9221775ba44 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 20 Jul 2022 02:00:43 +0000 Subject: [PATCH 16/36] New radix-2 algorithm implemented and working --- pipelined/srt/srt.sv | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 13a59d848..ed55ddd8d 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -75,7 +75,7 @@ module srt ( // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) - qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN-1], WC[`DIVLEN+3:`DIVLEN-1], Sqrt, qp, qz, qn); + qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], Sqrt, qp, qz, qn); flopen #(`NE) expflop(clk, Start, calcExp, rExp); flopen #(1) signflop(clk, Start, calcSign, rsign); @@ -154,7 +154,7 @@ module srtpreproc ( // Selecting correct divider inputs assign DivX = Int ? PreprocA : PreprocX; - assign SqrtX = XExp[0] ? {4'b0000, SrcXFrac, 1'b0} : {5'b11111, SrcXFrac}; + assign SqrtX = XExp[0] ? {5'b11101, SrcXFrac} : {4'b1111, SrcXFrac, 1'b0}; assign X = Sqrt ? {SqrtX, {(`EXTRAFRACBITS-1){1'b0}}} : {4'b0001, DivX}; assign D = {4'b0001, Int ? PreprocB : PreprocY}; @@ -169,13 +169,13 @@ endmodule ///////////////////////////////// // Quotient Selection, Radix 2 // ///////////////////////////////// -module qsel2 ( // *** eventually just change to 4 bits - input logic [`DIVLEN+3:`DIVLEN-1] ps, pc, +module qsel2 ( + input logic [`DIVLEN+3:`DIVLEN] ps, pc, input logic Sqrt, output logic qp, qz, qn ); - logic [`DIVLEN+3:`DIVLEN-1] p, g; + logic [`DIVLEN+3:`DIVLEN] p, g; logic magnitude, sign, cout; // The quotient selection logic is presented for simplicity, not @@ -186,8 +186,8 @@ module qsel2 ( // *** eventually just change to 4 bits assign p = ps ^ pc; assign g = ps & pc; - assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN-1]); - assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN] | (Sqrt & (p[`DIVLEN] & g[`DIVLEN-1]))))); + assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); + assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & (g[`DIVLEN]))); assign #1 sign = p[`DIVLEN+3] ^ cout; /* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & (ps[52]^pc[52])); @@ -283,7 +283,7 @@ module sotfc2( logic [`DIVLEN+3:0] SNext, SMNext, SMux; flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); - mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux); + mux2 #(`DIVLEN+4) Smux(SNext, {2'b00, Sqrt, {(`DIVLEN+1){1'b0}}}, Start, SMux); flop #(`DIVLEN+4) Sreg(clk, SMux, S); always_comb begin @@ -298,7 +298,7 @@ module sotfc2( SMNext = SM | ((C << 1) & ~(C << 2)); end end - assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; + assign Sq = S[`DIVLEN+1] ? S[`DIVLEN:2] : S[`DIVLEN-1:1]; endmodule ////////////////////////// @@ -311,7 +311,7 @@ module creg(input logic clk, ); logic [`DIVLEN+3:0] CMux; - mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, Start, CMux); + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b1111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux); flop #(`DIVLEN+4) cflop(clk, CMux, C); endmodule From cce57fdcc569fd7631c37b27dcbf3a1d1ff928b5 Mon Sep 17 00:00:00 2001 From: cturek Date: Wed, 20 Jul 2022 02:04:20 +0000 Subject: [PATCH 17/36] divsqrt working for floating point --- pipelined/srt/srt.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index ed55ddd8d..27fac324e 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -311,7 +311,7 @@ module creg(input logic clk, ); logic [`DIVLEN+3:0] CMux; - mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b1111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux); + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b11111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux); flop #(`DIVLEN+4) cflop(clk, CMux, C); endmodule From b26297e87490c5f6e986967a4d89d36d6ccb901a Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 20 Jul 2022 02:27:39 +0000 Subject: [PATCH 18/36] moved ctrl signal registers into fctrl, also a lot of code cleaning --- pipelined/src/fpu/divsqrt.sv | 20 +- pipelined/src/fpu/fclassify.sv | 38 +-- pipelined/src/fpu/fcmp.sv | 138 ++++++----- pipelined/src/fpu/fctrl.sv | 51 +++- pipelined/src/fpu/fcvt.sv | 12 +- pipelined/src/fpu/fhazard.sv | 28 +-- pipelined/src/fpu/fma.sv | 18 +- pipelined/src/fpu/fmashiftcalc.sv | 8 +- pipelined/src/fpu/fpu.sv | 336 +++++++++++++-------------- pipelined/src/fpu/fsgninj.sv | 45 ++-- pipelined/src/fpu/otfc.sv | 2 +- pipelined/src/fpu/postprocess.sv | 20 +- pipelined/src/fpu/qsel.sv | 2 +- pipelined/src/fpu/shiftcorrection.sv | 6 +- pipelined/src/fpu/srt.sv | 26 +-- pipelined/src/fpu/srtfsm.sv | 6 +- pipelined/src/fpu/unpack.sv | 45 ++-- pipelined/src/fpu/unpackinput.sv | 38 +-- 18 files changed, 439 insertions(+), 400 deletions(-) diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index ffc60026b..a2f0ba8e3 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -34,20 +34,20 @@ module divsqrt( input logic clk, input logic reset, input logic [`FMTBITS-1:0] FmtE, - input logic [`NF:0] XManE, YManE, - input logic [`NE-1:0] XExpE, YExpE, + input logic [`NF:0] XmE, YmE, + input logic [`NE-1:0] XeE, YeE, input logic XInfE, YInfE, input logic XZeroE, YZeroE, input logic XNaNE, YNaNE, input logic DivStartE, input logic StallM, - input logic StallE, - output logic DivStickyM, + input logic StallE, + output logic DivSM, output logic DivBusy, output logic DivDone, - output logic [`NE+1:0] DivCalcExpM, + output logic [`NE+1:0] QeM, output logic [`DURLEN-1:0] EarlyTermShiftM, - output logic [`QLEN-1-(`RADIX/4):0] QuotM + output logic [`QLEN-1-(`RADIX/4):0] QmM // output logic [`XLEN-1:0] RemM, ); @@ -60,10 +60,10 @@ module divsqrt( logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.Xm(XManE), .Dur, .Ym(YManE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); - srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivStickyE(DivStickyM), .XNaNE, .YNaNE, + srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XExpE), .Ye(YExpE), .XZeroE, .YZeroE, - .StickyWSA, .DivBusy, .Quot(QuotM), .Rem(), .DivCalcExpM); + srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + .StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fclassify.sv b/pipelined/src/fpu/fclassify.sv index 6c7ab451f..6aaec00a4 100644 --- a/pipelined/src/fpu/fclassify.sv +++ b/pipelined/src/fpu/fclassify.sv @@ -29,29 +29,29 @@ `include "wally-config.vh" module fclassify ( - input logic XSgnE, // sign bit - input logic XNaNE, // is NaN - input logic XSNaNE, // is signaling NaN - input logic XDenormE, // is denormal - input logic XZeroE, // is zero - input logic XInfE, // is infinity - output logic [`XLEN-1:0] ClassResE // classify result - ); + input logic Xs, // sign bit + input logic XNaN, // is NaN + input logic XSNaN, // is signaling NaN + input logic XDenorm,// is denormal + input logic XZero, // is zero + input logic XInf, // is infinity + output logic [`XLEN-1:0] ClassRes// classify result +); logic PInf, PZero, PNorm, PDenorm; logic NInf, NZero, NNorm, NDenorm; - logic XNormE; + logic XNorm; // determine the sub categories - assign XNormE = ~(XNaNE | XInfE | XDenormE | XZeroE); - assign PInf = ~XSgnE&XInfE; - assign NInf = XSgnE&XInfE; - assign PNorm = ~XSgnE&XNormE; - assign NNorm = XSgnE&XNormE; - assign PDenorm = ~XSgnE&XDenormE; - assign NDenorm = XSgnE&XDenormE; - assign PZero = ~XSgnE&XZeroE; - assign NZero = XSgnE&XZeroE; + assign XNorm= ~(XNaN | XInf| XDenorm| XZero); + assign PInf = ~Xs&XInf; + assign NInf = Xs&XInf; + assign PNorm = ~Xs&XNorm; + assign NNorm = Xs&XNorm; + assign PDenorm = ~Xs&XDenorm; + assign NDenorm = Xs&XDenorm; + assign PZero = ~Xs&XZero; + assign NZero = Xs&XZero; // determine sub category and combine into the result // bit 0 - -Inf @@ -64,6 +64,6 @@ module fclassify ( // bit 7 - +Inf // bit 8 - signaling NaN // bit 9 - quiet NaN - assign ClassResE = {{`XLEN-10{1'b0}}, XNaNE&~XSNaNE, XSNaNE, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; + assign ClassRes = {{`XLEN-10{1'b0}}, XNaN&~XSNaN, XSNaN, PInf, PNorm, PDenorm, PZero, NZero, NDenorm, NNorm, NInf}; endmodule diff --git a/pipelined/src/fpu/fcmp.sv b/pipelined/src/fpu/fcmp.sv index 9c6757848..48ff536f6 100755 --- a/pipelined/src/fpu/fcmp.sv +++ b/pipelined/src/fpu/fcmp.sv @@ -27,9 +27,10 @@ // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // OR OTHER DEALINGS IN THE SOFTWARE. //////////////////////////////////////////////////////////////////////////////////////////////// + `include "wally-config.vh" -// FOpCtrlE values +// OpCtrl values // 110 min // 101 max // 010 equal @@ -37,36 +38,32 @@ // 011 less than or equal module fcmp ( - input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single - input logic [2:0] FOpCtrlE, // see above table - input logic XSgnE, YSgnE, // input signs - input logic [`NE-1:0] XExpE, YExpE, // input exponents - input logic [`NF:0] XManE, YManE, // input mantissa - input logic XZeroE, YZeroE, // is zero - input logic XNaNE, YNaNE, // is NaN - input logic XSNaNE, YSNaNE, // is signaling NaN - input logic [`FLEN-1:0] FSrcXE, FSrcYE, // original, non-converted to double, inputs - output logic CmpNVE, // invalid flag - output logic [`FLEN-1:0] CmpFpResE, // compare resilt - output logic [`XLEN-1:0] CmpIntResE // compare resilt + input logic [`FMTBITS-1:0] Fmt, // format of fp number + input logic [2:0] OpCtrl, // see above table + input logic Xs, Ys, // input signs + input logic [`NE-1:0] Xe, Ye, // input exponents + input logic [`NF:0] Xm, Ym, // input mantissa + input logic XZero, YZero, // is zero + input logic XNaN, YNaN, // is NaN + input logic XSNaN, YSNaN, // is signaling NaN + input logic [`FLEN-1:0] X, Y, // original inputs (before unpacker) + output logic CmpNV, // invalid flag + output logic [`FLEN-1:0] CmpFpRes, // compare floating-point result + output logic [`XLEN-1:0] CmpIntRes // compare integer result ); - logic LTabs, LT, EQ; // is X < or > or = Y - logic [`FLEN-1:0] NaNRes; - logic BothZero, EitherNaN, EitherSNaN; + logic LTabs, LT, EQ; // is X < or > or = Y + logic [`FLEN-1:0] NaNRes; // NaN result + logic BothZero; // are both inputs zero + logic EitherNaN, EitherSNaN; // are either input a (signaling) NaN - assign LTabs= {1'b0, XExpE, XManE} < {1'b0, YExpE, YManE}; // unsigned comparison, treating FP as integers - assign LT = (XSgnE & ~YSgnE) | (XSgnE & YSgnE & ~LTabs & ~EQ) | (~XSgnE & ~YSgnE & LTabs); - // assign LT = {~XSgnE, XExpE, XManE[`NF-1:0]} < {~YSgnE, YExpE, YManE[`NF-1:0]}; // *** James look at whether we can simplify to this, but it fails regression + assign LTabs= {1'b0, Xe, Xm} < {1'b0, Ye, Ym}; // unsigned comparison, treating FP as integers + assign LT = (Xs & ~Ys) | (Xs & Ys & ~LTabs & ~EQ) | (~Xs & ~Ys & LTabs); // signed comparison + assign EQ = (X == Y); - //assign LT = $signed({XSgnE, XExpE, XManE[`NF-1:0]}) < $signed({YSgnE, YExpE, YManE[`NF-1:0]}); - //assign LT = XInt < YInt; -// assign LT = XSgnE^YSgnE ? XSgnE : XExpE==YExpE ? ((XManE Y + else // MIN + if(XNaN) + if(YNaN) CmpFpRes = NaNRes; // X = NaN Y = NaN + else CmpFpRes = Y; // X = NaN Y != NaN + else + if(YNaN) CmpFpRes = X; // X != NaN Y = NaN + else // X,Y != NaN + if(LT) CmpFpRes = X; // X < Y + else CmpFpRes = Y; // X > Y + + // LT/LE/EQ + // - -0 = 0 + // - inf = inf and -inf = -inf + // - return 0 if comparison with NaN (unordered) + assign CmpIntRes = {(`XLEN-1)'(0), (((EQ|BothZero)&OpCtrl[1])|(LT&OpCtrl[0]&~BothZero))&~EitherNaN}; endmodule diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 5c553e864..85047248d 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -29,25 +29,41 @@ `include "wally-config.vh" module fctrl ( + input logic clk, + input logic reset, + input logic StallE, StallM, StallW, // stall signals + input logic FlushE, FlushM, FlushW, // flush signals + input logic [31:0] InstrD, input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision input logic [6:0] OpD, // bits 6:0 of instruction input logic [4:0] Rs2D, // bits 24:20 of instruction input logic [2:0] Funct3D, // bits 14:12 of instruction - may contain rounding mode input logic [2:0] FRM_REGW, // rounding mode from CSR input logic [1:0] STATUS_FS, // is FPU enabled? + input logic FDivBusyE, // is the divider busy output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction - output logic FRegWriteD, // FP register write enable - output logic FDivStartD, // Start division or squareroot - output logic [1:0] FResSelD, // select result to be written to fp register - output logic [2:0] FOpCtrlD, // chooses which opperation to do - specifics shown at bottom of module and in each unit - output logic [1:0] PostProcSelD, - output logic [`FMTBITS-1:0] FmtD, // precision - single-0 double-1 - output logic [2:0] FrmD, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude - output logic FWriteIntD // is the result written to the integer register + output logic FRegWriteM, FRegWriteW, // FP register write enable + output logic [2:0] FrmM, // FP rounding mode + output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format + output logic DivStartE, // Start division or squareroot + output logic FWriteIntE, FWriteIntM, // Write to integer register + output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component + output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage + output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit + output logic [4:0] Adr1E, Adr2E, Adr3E // adresses of each input ); `define FCTRLW 11 logic [`FCTRLW-1:0] ControlsD; + logic FRegWriteD; // FP register write enable + logic DivStartD; // integer register write enable + logic FWriteIntD; // integer register write enable + logic FRegWriteE; // FP register write enable + logic [2:0] OpCtrlD; // Select which opperation to do in each component + logic [1:0] PostProcSelD; // select result in the post processing unit + logic [1:0] FResSelD; // Select one of the results that finish in the memory stage + logic [2:0] FrmD, FrmE; // FP rounding mode + logic [`FMTBITS-1:0] FmtD; // FP format //*** will putting x for don't cares reduce area in synthisis??? // FPU Instruction Decoder always_comb @@ -130,7 +146,7 @@ module fctrl ( endcase // unswizzle control bits - assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, FOpCtrlD, FDivStartD, IllegalFPUInstrD} = ControlsD; + assign {FRegWriteD, FWriteIntD, FResSelD, PostProcSelD, OpCtrlD, DivStartD, IllegalFPUInstrD} = ControlsD; // rounding modes: // 000 - round to nearest, ties to even @@ -168,7 +184,7 @@ module fctrl ( // 10 fma // Other Sel: -// Ctrl signal = {FOpCtrl[2], &FOpctrl[1:0]} +// Ctrl signal = {OpCtrl[2], &FOpctrl[1:0]} // 000 - sign 00 // 001 - negate sign 00 // 010 - xor sign 00 @@ -205,5 +221,20 @@ module fctrl ( // 01 - negate sign // 10 - xor sign + // D/E pipleine register + flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, OpCtrlD, FWriteIntD}, + {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, OpCtrlE, FWriteIntE}); + flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE); + // E/M pipleine register + flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, + {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE}, + {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM}); + // M/W pipleine register + flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FRegWriteM, FResSelM}, + {FRegWriteW, FResSelW}); endmodule diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index 9d7f2d62d..d2967887f 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -35,7 +35,7 @@ module fcvt ( input logic [`NE-1:0] Xe, // input's exponent input logic [`NF:0] Xm, // input's fraction input logic [`XLEN-1:0] Int, // integer input - from IEU - input logic [2:0] FOpCtrl, // choose which opperation (look below for values) + input logic [2:0] OpCtrl, // choose which opperation (look below for values) input logic ToInt, // is fp->int (since it's writting to the integer register) input logic XZero, // is the input zero input logic XDenorm, // is the input denormalized @@ -73,17 +73,17 @@ module fcvt ( // seperate OpCtrl for code readability - assign Signed = FOpCtrl[0]; - assign Int64 = FOpCtrl[1]; - assign IntToFp = FOpCtrl[2]; + assign Signed = OpCtrl[0]; + assign Int64 = OpCtrl[1]; + assign IntToFp = OpCtrl[2]; // choose the ouptut format depending on the opperation // - fp -> fp: OpCtrl contains the percision of the output // - int -> fp: Fmt contains the percision of the output if (`FPSIZES == 2) - assign OutFmt = IntToFp ? Fmt : (FOpCtrl[1:0] == `FMT); + assign OutFmt = IntToFp ? Fmt : (OpCtrl[1:0] == `FMT); else if (`FPSIZES == 3 | `FPSIZES == 4) - assign OutFmt = IntToFp ? Fmt : FOpCtrl[1:0]; + assign OutFmt = IntToFp ? Fmt : OpCtrl[1:0]; /////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/fpu/fhazard.sv b/pipelined/src/fpu/fhazard.sv index ca31d904f..36a0ff82f 100644 --- a/pipelined/src/fpu/fhazard.sv +++ b/pipelined/src/fpu/fhazard.sv @@ -31,20 +31,20 @@ `include "wally-config.vh" module fhazard( - input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses - input logic FRegWriteM, FRegWriteW, // is the fp register being written to - input logic [4:0] RdM, RdW, // the adress being written to - input logic [1:0] FResSelM, // the result being selected + input logic [4:0] Adr1E, Adr2E, Adr3E, // read data adresses + input logic FRegWriteM, FRegWriteW, // is the fp register being written to + input logic [4:0] RdM, RdW, // the adress being written to + input logic [1:0] FResSelM, // the result being selected output logic FStallD, // stall the decode stage - output logic [1:0] FForwardXE, FForwardYE, FForwardZE // select a forwarded value + output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value ); always_comb begin // set defaults - FForwardXE = 2'b00; // choose FRD1E - FForwardYE = 2'b00; // choose FRD2E - FForwardZE = 2'b00; // choose FRD3E + ForwardXE = 2'b00; // choose FRD1E + ForwardYE = 2'b00; // choose FRD2E + ForwardZE = 2'b00; // choose FRD3E FStallD = 0; //*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait @@ -52,28 +52,28 @@ module fhazard( // if the needed value is in the memory stage - input 1 if ((Adr1E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) FForwardXE = 2'b10; // choose FResM + if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage - else if ((Adr1E == RdW) & FRegWriteW) FForwardXE = 2'b01; // choose FPUResult64W + else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W // if the needed value is in the memory stage - input 2 if ((Adr2E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) FForwardYE = 2'b10; // choose FResM + if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage - else if ((Adr2E == RdW) & FRegWriteW) FForwardYE = 2'b01; // choose FPUResult64W + else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W // if the needed value is in the memory stage - input 3 if ((Adr3E == RdM) & FRegWriteM) // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) FForwardZE = 2'b10; // choose FResM + if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM else FStallD = 1; // otherwise stall // if the needed value is in the writeback stage - else if ((Adr3E == RdW) & FRegWriteW) FForwardZE = 2'b01; // choose FPUResult64W + else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W end diff --git a/pipelined/src/fpu/fma.sv b/pipelined/src/fpu/fma.sv index 3f4cc2ac5..067147ee6 100644 --- a/pipelined/src/fpu/fma.sv +++ b/pipelined/src/fpu/fma.sv @@ -34,7 +34,7 @@ module fma( input logic [`NE-1:0] Xe, Ye, Ze, // input's biased exponents in B(NE.0) format input logic [`NF:0] Xm, Ym, Zm, // input's significands in U(0.NF) format input logic XZero, YZero, ZZero, // is the input zero - input logic [2:0] FOpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) + input logic [2:0] OpCtrl, // 000 = fmadd (X*Y)+Z, 001 = fmsub (X*Y)-Z, 010 = fnmsub -(X*Y)+Z, 011 = fnmadd -(X*Y)-Z, 100 = fmul (X*Y) input logic [`FMTBITS-1:0] Fmt, // format of the result single double half or quad output logic [`NE+1:0] Pe, // the product's exponent B(NE+2.0) format; adds 2 bits to allow for size of number and negative sign output logic ZmSticky, // sticky bit that is calculated during alignment @@ -46,7 +46,7 @@ module fma( output logic Ps, // the product's sign output logic Ss, // the sum's sign output logic [`NE+1:0] Se, - output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count + output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count ); logic [2*`NF+1:0] Pm; // the product's significand in U(2.2Nf) format @@ -72,7 +72,7 @@ module fma( // Alignment shifter /////////////////////////////////////////////////////////////////////////////// // calculate the signs and take the opperation into account - sign sign(.FOpCtrl, .Xs, .Ys, .Zs, .Ps, .As); + sign sign(.OpCtrl, .Xs, .Ys, .Zs, .Ps, .As); align align(.Ze, .Zm, .XZero, .YZero, .ZZero, .Xe, .Ye, .Am, .ZmSticky, .KillProd); @@ -85,7 +85,7 @@ module fma( add add(.Am, .Pm, .Ze, .Pe, .Ps, .As, .KillProd, .ZmSticky, .AmInv, .PmKilled, .NegSum, .InvA, .Sm, .Se, .Ss); - loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .NCnt); + loa loa(.A(AmInv+{(3*`NF+6)'(0),InvA&~((ZmSticky&~KillProd))}), .P({PmKilled, 1'b0, InvA&Ps&ZmSticky&KillProd}), .SCnt); endmodule @@ -120,7 +120,7 @@ endmodule module sign( - input logic [2:0] FOpCtrl, // opperation contol + input logic [2:0] OpCtrl, // opperation contol input logic Xs, Ys, Zs, // sign of the inputs output logic Ps, // the product's sign - takes opperation into account output logic As // aligned addend sign used in fma - takes opperation into account @@ -130,9 +130,9 @@ module sign( // Negate product's sign if FNMADD or FNMSUB // flip is negation opperation - assign Ps = Xs ^ Ys ^ (FOpCtrl[1]&~FOpCtrl[2]); + assign Ps = Xs ^ Ys ^ (OpCtrl[1]&~OpCtrl[2]); // flip if subtraction - assign As = Zs^FOpCtrl[0]; + assign As = Zs^OpCtrl[0]; endmodule @@ -275,7 +275,7 @@ endmodule module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEEE Sym. Computer Arithmetic, 2001] input logic [3*`NF+6:0] A, // addend input logic [2*`NF+3:0] P, // product - output logic [$clog2(3*`NF+7)-1:0] NCnt // normalization shift count for the positive result + output logic [$clog2(3*`NF+7)-1:0] SCnt // normalization shift count for the positive result ); logic [3*`NF+6:0] T; @@ -300,6 +300,6 @@ module loa( // [Schmookler & Nowka, Leading zero anticipation and detection, IEE - lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(NCnt)); + lzc #(3*`NF+7) lzc (.num(f), .ZeroCnt(SCnt)); endmodule diff --git a/pipelined/src/fpu/fmashiftcalc.sv b/pipelined/src/fpu/fmashiftcalc.sv index d598efb7e..7464149f6 100644 --- a/pipelined/src/fpu/fmashiftcalc.sv +++ b/pipelined/src/fpu/fmashiftcalc.sv @@ -32,7 +32,7 @@ module fmashiftcalc( input logic [3*`NF+5:0] FmaSm, // the positive sum input logic [`NE-1:0] Ze, // exponent of Z input logic [`NE+1:0] FmaPe, // X exponent + Y exponent - bias - input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // normalization shift count + input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // normalization shift count input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single input logic FmaKillProd, // is the product set to zero input logic [`NE+1:0] FmaSe, @@ -52,7 +52,7 @@ module fmashiftcalc( // Determine if the sum is zero assign FmaSZero = ~(|FmaSm); // calculate the sum's exponent - assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaNCnt} + (`NE+2)'(`NF+4); + assign PreNormSumExp = FmaSe + {{`NE+2-$unsigned($clog2(3*`NF+7)){1'b1}}, ~FmaSCnt} + (`NE+2)'(`NF+4); //convert the sum's exponent into the proper percision if (`FPSIZES == 1) begin @@ -152,7 +152,7 @@ module fmashiftcalc( // - shift once if killing a product and the result is denormalized assign FmaShiftIn = {3'b0, FmaSm}; if (`FPSIZES == 1) - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaNCnt+1; + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3): FmaSCnt+1; else - assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaNCnt+1; + assign FmaShiftAmt = FmaPreResultDenorm ? FmaSe[$clog2(3*`NF+7)-1:0]+($clog2(3*`NF+7))'(`NF+3)+BiasCorr[$clog2(3*`NF+7)-1:0]: FmaSCnt+1; endmodule diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index a9c0ac247..6d9b9cf47 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -30,28 +30,28 @@ `include "wally-config.vh" module fpu ( - input logic clk, - input logic reset, - input logic [2:0] FRM_REGW, // Rounding mode from CSR - input logic [31:0] InstrD, // instruction from IFU - input logic [`FLEN-1:0] ReadDataW,// Read data from memory - input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input being processed (from IEU) - input logic StallE, StallM, StallW, // stall signals from HZU - input logic FlushE, FlushM, FlushW, // flush signals from HZU - input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) - input logic [1:0] STATUS_FS, // Is floating-point enabled? - output logic FRegWriteM, // FP register write enable - output logic FpLoadStoreM, // Fp load instruction? - output logic FStore2, - output logic FStallD, // Stall the decode stage - output logic FWriteIntE, // integer register write enables - output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory - output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory - output logic [`XLEN-1:0] FIntResM, // data to be written to integer register - output logic [`XLEN-1:0] FCvtIntResW, // data to be written to integer register - output logic [1:0] FResSelW, - output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) - output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + input logic clk, + input logic reset, + input logic [2:0] FRM_REGW, // Rounding mode (from CSR) + input logic [31:0] InstrD, // instruction (from IFU) + input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU) + input logic [`XLEN-1:0] ForwardedSrcAE, // Integer input (from IEU) + input logic StallE, StallM, StallW, // stall signals (from HZU) + input logic FlushE, FlushM, FlushW, // flush signals (from HZU) + input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) + input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit) + output logic FRegWriteM, // FP register write enable (to privileged unit) + output logic FpLoadStoreM, // Fp load instruction? (to LSU) + output logic FStore2, // store two words into memory (to LSU) + output logic FStallD, // Stall the decode stage (To HZU) + output logic FWriteIntE, // integer register write enable (to IEU) + output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory (to IEU) - only used if `XLEN >`FLEN + output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to IEU) - only used if `XLEN <`FLEN + output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) + output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) + output logic [1:0] FResSelW, // final result selection (to IEU) + output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU) + output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to privileged unit) output logic [4:0] SetFflagsM // FPU flags (to privileged unit) ); @@ -62,99 +62,88 @@ module fpu ( // - sets the underflow after rounding // control signals - logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic [`FMTBITS-1:0] FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division or squareroot - logic FWriteIntD; // Write to integer register - logic FWriteIntM; // Write to integer register - logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals - logic [2:0] FOpCtrlD, FOpCtrlE, FOpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelD, FResSelE, FResSelM; // Select one of the results that finish in the memory stage - logic [1:0] PostProcSelD, PostProcSelE, PostProcSelM; // select result in the post processing unit - logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input + logic FRegWriteW; // FP register write enable + logic [2:0] FrmM; // FP rounding mode + logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double + logic DivStartE; // Start division or squareroot + logic FWriteIntM; // Write to integer register + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals + logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage + logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit + logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input // regfile signals - logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`FLEN-1:0] FSrcXE; // Input 1 to the various units (after forwarding) - logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) - logic [`FLEN-1:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) - logic [`FLEN-1:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) + logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding) + logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) + logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding) + logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding) // unpacking signals - logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM, YSgnM; // input's sign - memory stage - logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage - logic [`NE-1:0] ZExpM; // input's exponent - memory stage - logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage - logic [`NF:0] XManM, YManM, ZManM; // input's fraction - memory stage - logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage - logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage - logic XNaNQ, YNaNQ; // is the input a NaN - divide - logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage - logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized - logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage - logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage - logic XZeroQ, YZeroQ; // is the input zero - divide - logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage - logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage - logic XInfQ, YInfQ; // is the input infinity - divide - logic XExpMaxE; // is the exponent all ones (max value) - logic FmtQ; - logic FOpCtrlQ; + logic XsE, YsE, ZsE; // input's sign - execute stage + logic XsM, YsM; // input's sign - memory stage + logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage + logic [`NE-1:0] ZeM; // input's exponent - memory stage + logic [`NF:0] XmE, YmE, ZmE; // input's fraction - execute stage + logic [`NF:0] XmM, YmM, ZmM; // input's fraction - memory stage + logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage + logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage + logic XNaNQ, YNaNQ; // is the input a NaN - divide + logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage + logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage + logic XDenormE, ZDenormE, ZDenormM; // is the input denormalized + logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage + logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage + logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage + logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage + logic XExpMaxE; // is the exponent all ones (max value) // Fma Signals - logic [3*`NF+5:0] SumE, SumM; - logic [`NE+1:0] ProdExpE, ProdExpM; - logic AddendStickyE, AddendStickyM; - logic [`NE+1:0] SeE,SeM; - logic KillProdE, KillProdM; - logic InvAE, InvAM; - logic NegSumE, NegSumM; - logic ZSgnEffE, ZSgnEffM; - logic PSgnE, PSgnM; - logic SsE, SsM; - logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; + logic [3*`NF+5:0] SmE, SmM; + logic [`NE+1:0] PeE, PeM; + logic ZmStickyE, ZmStickyM; + logic [`NE+1:0] SeE,SeM; + logic KillProdE, KillProdM; + logic InvAE, InvAM; + logic NegSumE, NegSumM; + logic AsE, AsM; + logic PsE, PsM; + logic SsE, SsM; + logic [$clog2(3*`NF+7)-1:0] SCntE, SCntM; // Cvt Signals - logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent - logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by + logic [`NE:0] CeE, CeM; // the calculated expoent + logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized - logic CvtResSgnE, CvtResSgnM; // the result's sign + logic CsE, CsM; // the result's sign logic IntZeroE, IntZeroM; // is the integer zero? - logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`QLEN-1-(`RADIX/4):0] QuotM; - logic [`NE+1:0] DivCalcExpE, DivCalcExpM; - logic DivStickyE, DivStickyM; - logic DivDoneM; - logic [`DURLEN-1:0] EarlyTermShiftM; + logic [`QLEN-1-(`RADIX/4):0] QmM; + logic [`NE+1:0] QeE, QeM; + logic DivSE, DivSM; + logic DivDoneM; + logic [`DURLEN-1:0] EarlyTermShiftM; // result and flag signals - logic [63:0] FDivResM, FDivResW; // divide/squareroot result - logic [4:0] FDivFlgM; // divide/squareroot flags - logic [`FLEN-1:0] ReadResW; // read result (load instruction) - logic [`XLEN-1:0] ClassResE; // classify result - logic [`XLEN-1:0] FIntResE; // classify result - logic [`FLEN-1:0] FpResM, FpResW; // classify result - logic [`FLEN-1:0] PostProcResM; // classify result - logic [4:0] PostProcFlgM; // classify result + logic [`XLEN-1:0] ClassResE; // classify result + logic [`XLEN-1:0] FIntResE; // classify result + logic [`FLEN-1:0] FpResM, FpResW; // classify result + logic [`FLEN-1:0] PostProcResM; // classify result + logic [4:0] PostProcFlgM; // classify result logic [`XLEN-1:0] FCvtIntResM; - logic [`FLEN-1:0] CmpFpResE; // compare result - logic [`XLEN-1:0] CmpIntResE; // compare result - logic CmpNVE; // compare invalid flag (Not Valid) - logic [`FLEN-1:0] SgnResE; // sign injection result - logic [`FLEN-1:0] PreFpResE, PreFpResM, PreFpResW; // selected result that is ready in the memory stage - logic PreNVE, PreNVM; // selected flag that is ready in the memory stage - logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register + logic [`FLEN-1:0] CmpFpResE; // compare result + logic [`XLEN-1:0] CmpIntResE; // compare result + logic CmpNVE; // compare invalid flag (Not Valid) + logic [`FLEN-1:0] SgnResE; // sign injection result + logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage + logic PreNVE, PreNVM; // selected flag that is ready in the memory stage + logic [`FLEN-1:0] FPUResultW; // final FP result being written to the FP register // other signals - logic FDivSqrtDoneE; // is divide done - logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit - logic load_preload; // enable for FF on fpdivsqrt - logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format + logic [`FLEN-1:0] AlignedSrcAE; // align SrcA to the floating point format logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed logic [`FLEN-1:0] BoxedOneE; // Zero value for Z for multiplication, with NaN boxing if needed @@ -171,9 +160,11 @@ module fpu ( ////////////////////////////////////////////////////////////////////////////////////////// // calculate FP control signals - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, .STATUS_FS, - .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResSelD, .FOpCtrlD, .PostProcSelD, - .FmtD, .FrmD, .FWriteIntD); + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD, + .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, + .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, + .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, + .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E); // FP register file fregfile fregfile (.clk, .reset, .we4(FRegWriteW), @@ -185,12 +176,6 @@ module fpu ( flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); flopenrc #(`FLEN) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); flopenrc #(`FLEN) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(12+`FMTBITS) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, PostProcSelD, FResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD}, - {FRegWriteE, PostProcSelE, FResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}); - flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, FDivStartD, FDivStartE); // EXECUTION STAGE @@ -207,12 +192,12 @@ module fpu ( // Hazard unit for FPU // - determines if any forwarding or stalls are needed fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, - .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); + .FStallD, .ForwardXE, .ForwardYE, .ForwardZE); // forwarding muxs - mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, FForwardXE, FSrcXE); - mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, FForwardYE, FPreSrcYE); - mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, FForwardZE, FPreSrcZE); + mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE); + mux3 #(`FLEN) fyemux (FRD2E, FPUResultW, PreFpResM, ForwardYE, PreYE); + mux3 #(`FLEN) fzemux (FRD3E, FPUResultW, PreFpResM, ForwardZE, PreZE); generate @@ -227,7 +212,7 @@ module fpu ( endgenerate - mux2 #(`FLEN) fyaddmux (FPreSrcYE, BoxedOneE, FOpCtrlE[2]&FOpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), FSrcYE); // Force Z to be 0 for multiply instructions + mux2 #(`FLEN) fyaddmux (PreYE, BoxedOneE, OpCtrlE[2]&OpCtrlE[1]&(FResSelE==2'b01)&(PostProcSelE==2'b10), YE); // Force Z to be 0 for multiply instructions // Force Z to be 0 for multiply instructions generate @@ -241,55 +226,76 @@ module fpu ( (`FLEN)'(0), FmtE, BoxedZeroE); // NaN boxing zeroes endgenerate - mux3 #(`FLEN) fzmulmux (FPreSrcZE, BoxedZeroE, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); + mux3 #(`FLEN) fzmulmux (PreZE, BoxedZeroE, PreYE, {OpCtrlE[2]&OpCtrlE[1], OpCtrlE[2]&~OpCtrlE[1]}, ZE); // unpack unit // - splits FP inputs into their various parts // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpack unpack (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FmtE, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .ZDenormE, - .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, .XExpMaxE); + unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), + .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), + .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), + .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE), + .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), + .ZInf(ZInfE), .XExpMax(XExpMaxE)); - // fma - does multiply, add, and multiply-add instructions - fma fma (.Xs(XSgnE), .Ys(YSgnE), .Zs(ZSgnE), - .Xe(XExpE), .Ye(YExpE), .Ze(ZExpE), - .Xm(XManE), .Ym(YManE), .Zm(ZManE), + // fused multiply add + // - fadd/fsub + // - fmul + // - fmadd/fnmadd/fmsub/fnmsub + fma fma (.Xs(XsE), .Ys(YsE), .Zs(ZsE), + .Xe(XeE), .Ye(YeE), .Ze(ZeE), + .Xm(XmE), .Ym(YmE), .Zm(ZmE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), - .FOpCtrl(FOpCtrlE), .Fmt(FmtE), - .As(ZSgnEffE), .Ps(PSgnE), .Ss(SsE), .Se(SeE), - .Sm(SumE), .Pe(ProdExpE), - .NegSum(NegSumE), .InvA(InvAE), .NCnt(FmaNormCntE), - .ZmSticky(AddendStickyE), .KillProd(KillProdE)); + .OpCtrl(OpCtrlE), .Fmt(FmtE), + .As(AsE), .Ps(PsE), .Ss(SsE), .Se(SeE), + .Sm(SmE), .Pe(PeE), + .NegSum(NegSumE), .InvA(InvAE), .SCnt(SCntE), + .ZmSticky(ZmStickyE), .KillProd(KillProdE)); - divsqrt divsqrt(.clk, .reset, .FmtE, .XManE, .YManE, .XExpE, .YExpE, - .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(FDivStartE), - .StallE, .StallM, .DivStickyM, .DivBusy(FDivBusyE), .DivCalcExpM, //***change divbusyE to M signal - .EarlyTermShiftM, .QuotM, .DivDone(DivDoneM)); - // other FP execution units - fcmp fcmp (.FmtE, .FOpCtrlE, .XSgnE, .YSgnE, .XExpE, .YExpE, .XManE, .YManE, - .XZeroE, .YZeroE, .XNaNE, .YNaNE, .XSNaNE, .YSNaNE, .FSrcXE, .FSrcYE, .CmpNVE, .CmpFpResE, .CmpIntResE); - fsgninj fsgninj(.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .SgnResE); - fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XSNaNE, .ClassResE); + // divide and squareroot + // - fdiv + // - fsqrt + // *** add other opperations + divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, + .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), + .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal + .EarlyTermShiftM, .QmM, .DivDone(DivDoneM)); + // compare + // - fmin/fmax + // - flt/fle/feq + fcmp fcmp (.Fmt(FmtE), .OpCtrl(OpCtrlE), .Xs(XsE), .Ys(YsE), .Xe(XeE), .Ye(YeE), + .Xm(XmE), .Ym(YmE), .XZero(XZeroE), .YZero(YZeroE), .XNaN(XNaNE), .YNaN(YNaNE), + .XSNaN(XSNaNE), .YSNaN(YSNaNE), .X(XE), .Y(YE), .CmpNV(CmpNVE), + .CmpFpRes(CmpFpResE), .CmpIntRes(CmpIntResE)); + // sign injection + // - fsgnj/fsgnjx/fsgnjn + fsgninj fsgninj(.OpCtrl(OpCtrlE[1:0]), .Xs(XsE), .Ys(YsE), .X(XE), .Fmt(FmtE), .SgnRes(SgnResE)); - fcvt fcvt (.Xs(XSgnE), .Xe(XExpE), .Xm(XManE), .Int(ForwardedSrcAE), .FOpCtrl(FOpCtrlE), - .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CvtCalcExpE), - .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .IntZero(IntZeroE), + // classify + // - fclass + fclassify fclassify (.Xs(XsE), .XDenorm(XDenormE), .XZero(XZeroE), .XNaN(XNaNE), + .XInf(XInfE), .XSNaN(XSNaNE), .ClassRes(ClassResE)); + + // convert + // - fcvt.*.* + fcvt fcvt (.Xs(XsE), .Xe(XeE), .Xm(XmE), .Int(ForwardedSrcAE), .OpCtrl(OpCtrlE), + .ToInt(FWriteIntE), .XZero(XZeroE), .XDenorm(XDenormE), .Fmt(FmtE), .Ce(CeE), + .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CsE), .IntZero(IntZeroE), .LzcIn(CvtLzcInE)); // data to be stored in memory - to IEU // - FP uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s if (`LLEN==`XLEN) begin - assign FWriteDataE = FSrcYE[`XLEN-1:0]; + assign FWriteDataE = YE[`XLEN-1:0]; end else begin logic [`FLEN-1:0] FWriteDataE; if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT; else assign FStore2 = FmtM; - if (`FPSIZES==1) assign FWriteDataE = FSrcYE; - else if (`FPSIZES==2) assign FWriteDataE = FmtE ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; - else assign FWriteDataE = FmtE == `FMT ? FSrcYE : {2{FSrcYE[`LEN1-1:0]}}; + if (`FPSIZES==1) assign FWriteDataE = YE; + else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}}; + else assign FWriteDataE = FmtE == `FMT ? YE : {2{YE[`LEN1-1:0]}}; flopenrc #(`FLEN) EMWriteDataReg (clk, reset, FlushM, ~StallM, FWriteDataE, FWriteDataM); end @@ -306,14 +312,14 @@ module fpu ( {{`FLEN-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); // NaN boxing zeroes endgenerate // select a result that may be written to the FP register - mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {FOpCtrlE[2], &FOpCtrlE[1:0]}, PreFpResE); - assign PreNVE = CmpNVE&(FOpCtrlE[2]|FWriteIntE); + mux3 #(`FLEN) FResMux(SgnResE, AlignedSrcAE, CmpFpResE, {OpCtrlE[2], &OpCtrlE[1:0]}, PreFpResE); + assign PreNVE = CmpNVE&(OpCtrlE[2]|FWriteIntE); // select the result that may be written to the integer register - to IEU if (`FLEN>`XLEN) - assign IntSrcXE = FSrcXE[`XLEN-1:0]; + assign IntSrcXE = XE[`XLEN-1:0]; else - assign IntSrcXE = {{`XLEN-`FLEN{FSrcXE[`FLEN-1:0]}}, FSrcXE}; + assign IntSrcXE = {{`XLEN-`FLEN{XE[`FLEN-1:0]}}, XE}; mux3 #(`XLEN) IntResMux (ClassResE, IntSrcXE, CmpIntResE, {~FResSelE[1], FResSelE[0]}, FIntResE); // *** DH 5/25/22: CvtRes will move to mem stage. Premux in execute to save area, then make sure stalls are ok @@ -321,27 +327,24 @@ module fpu ( // E/M pipe registers - // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); - flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM}); - flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM}); - flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); + // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, XE, FSrcXM); + flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XsE,XmE}, {XsM,XmM}); + flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YsE,YmE}, {YsM,YmM}); + flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZeE,ZmE}, {ZeM,ZmM}); flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); flopenrc #(13) EMFpReg5 (clk, reset, FlushM, ~StallM, {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE, ZDenormE}, {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM, ZDenormM}); flopenrc #(1) EMRegCmpFlg (clk, reset, FlushM, ~StallM, PreNVE, PreNVM); - flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, - {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE}, - {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, FOpCtrlM, FWriteIntM}); - flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SumE, SumM); - flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, ProdExpE, ProdExpM); + flopenrc #(3*`NF+6) EMRegFma2(clk, reset, FlushM, ~StallM, SmE, SmM); + flopenrc #(`NE+2) EMRegFma3(clk, reset, FlushM, ~StallM, PeE, PeM); flopenrc #($clog2(3*`NF+7)+9+`NE) EMRegFma4(clk, reset, FlushM, ~StallM, - {AddendStickyE, KillProdE, InvAE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE, SsE, SeE}, - {AddendStickyM, KillProdM, InvAM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM, SsM, SeM}); + {ZmStickyE, KillProdE, InvAE, SCntE, NegSumE, AsE, PsE, SsE, SeE}, + {ZmStickyM, KillProdM, InvAM, SCntM, NegSumM, AsM, PsM, SsM, SeM}); flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, - {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, - {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); + {CeE, CvtShiftAmtE, CvtResDenormUfE, CsE, IntZeroE, CvtLzcInE}, + {CeM, CvtShiftAmtM, CvtResDenormUfM, CsM, IntZeroM, CvtLzcInM}); // BEGIN MEMORY STAGE @@ -357,11 +360,11 @@ module fpu ( assign FpLoadStoreM = FResSelM[1]; - postprocess postprocess(.Xs(XSgnM), .Ys(YSgnM), .Ze(ZExpM), .Xm(XManM), .Ym(YManM), .Zm(ZManM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(ProdExpM), .DivEarlyTermShift(EarlyTermShiftM), - .FmaZmS(AddendStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QuotM), .FmaSs(SsM), - .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SumM), .DivQe(DivCalcExpM), .DivDone(DivDoneM), - .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(ZSgnEffM), .FmaPs(PSgnM), .FOpCtrl(FOpCtrlM), .FmaNCnt(FmaNormCntM), .FmaSe(SeM), - .CvtCe(CvtCalcExpM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CvtResSgnM), .ToInt(FWriteIntM), .DivS(DivStickyM), + postprocess postprocess(.Xs(XsM), .Ys(YsM), .Ze(ZeM), .Xm(XmM), .Ym(YmM), .Zm(ZmM), .Frm(FrmM), .Fmt(FmtM), .FmaPe(PeM), .DivEarlyTermShift(EarlyTermShiftM), + .FmaZmS(ZmStickyM), .FmaKillProd(KillProdM), .XZero(XZeroM), .YZero(YZeroM), .ZZero(ZZeroM), .XInf(XInfM), .YInf(YInfM), .DivQm(QmM), .FmaSs(SsM), + .ZInf(ZInfM), .XNaN(XNaNM), .YNaN(YNaNM), .ZNaN(ZNaNM), .XSNaN(XSNaNM), .YSNaN(YSNaNM), .ZSNaN(ZSNaNM), .FmaSm(SmM), .DivQe(QeM), .DivDone(DivDoneM), + .FmaNegSum(NegSumM), .FmaInvA(InvAM), .ZDenorm(ZDenormM), .FmaAs(AsM), .FmaPs(PsM), .OpCtrl(OpCtrlM), .FmaSCnt(SCntM), .FmaSe(SeM), + .CvtCe(CeM), .CvtResDenormUf(CvtResDenormUfM),.CvtShiftAmt(CvtShiftAmtM), .CvtCs(CsM), .ToInt(FWriteIntM), .DivS(DivSM), .CvtLzcIn(CvtLzcInM), .IntZero(IntZeroM), .PostProcSel(PostProcSelM), .PostProcRes(PostProcResM), .PostProcFlg(PostProcFlgM), .FCvtIntRes(FCvtIntResM)); // FPU flag selection - to privileged @@ -371,9 +374,6 @@ module fpu ( // M/W pipe registers flopenrc #(`FLEN) MWRegFp(clk, reset, FlushW, ~StallW, FpResM, FpResW); flopenrc #(`XLEN) MWRegInt(clk, reset, FlushW, ~StallW, FCvtIntResM, FCvtIntResW); - flopenrc #(4+int'(`FMTBITS-1)) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FRegWriteM, FResSelM, FmtM}, - {FRegWriteW, FResSelW, FmtW}); // BEGIN WRITEBACK STAGE diff --git a/pipelined/src/fpu/fsgninj.sv b/pipelined/src/fpu/fsgninj.sv index 17d15669f..a5b7e7742 100755 --- a/pipelined/src/fpu/fsgninj.sv +++ b/pipelined/src/fpu/fsgninj.sv @@ -26,60 +26,59 @@ // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE // OR OTHER DEALINGS IN THE SOFTWARE. //////////////////////////////////////////////////////////////////////////////////////////////// + `include "wally-config.vh" module fsgninj ( - input logic XSgnE, YSgnE, // X and Y sign bits - input logic [`FLEN-1:0] FSrcXE, // X - input logic [`FMTBITS-1:0] FmtE, // precision 1 = double 0 = single - input logic [1:0] SgnOpCodeE, // operation control - output logic [`FLEN-1:0] SgnResE // result + input logic Xs, Ys, // X and Y sign bits + input logic [`FLEN-1:0] X, // X + input logic [`FMTBITS-1:0] Fmt, // format + input logic [1:0] OpCtrl, // operation control + output logic [`FLEN-1:0] SgnRes // result ); logic ResSgn; - //op code designation: - // - //00 - fsgnj - directly copy over sign value of FSrcYE - //01 - fsgnjn - negate sign value of FSrcYE - //10 - fsgnjx - XOR sign values of FSrcXE & FSrcYE - // + // OpCtrl: + // 00 - fsgnj - directly copy over sign value of Y + // 01 - fsgnjn - negate sign value of Y + // 10 - fsgnjx - XOR sign values of X and Y // calculate the result's sign - assign ResSgn = (SgnOpCodeE[1] ? XSgnE : SgnOpCodeE[0]) ^ YSgnE; + assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys; // format final result based on precision // - uses NaN-blocking format // - if there are any unsused bits the most significant bits are filled with 1s if (`FPSIZES == 1) - assign SgnResE = {ResSgn, FSrcXE[`FLEN-2:0]}; + assign SgnRes = {ResSgn, X[`FLEN-2:0]}; else if (`FPSIZES == 2) - assign SgnResE = {~FmtE|ResSgn, FSrcXE[`FLEN-2:`LEN1], FmtE ? FSrcXE[`LEN1-1] : ResSgn, FSrcXE[`LEN1-2:0]}; + assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]}; else if (`FPSIZES == 3) begin logic [2:0] SgnBits; always_comb - case (FmtE) - `FMT: SgnBits = {ResSgn, FSrcXE[`LEN1-1], FSrcXE[`LEN2-1]}; - `FMT1: SgnBits = {1'b1, ResSgn, FSrcXE[`LEN2-1]}; + case (Fmt) + `FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]}; + `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]}; `FMT2: SgnBits = {2'b11, ResSgn}; default: SgnBits = {3{1'bx}}; endcase - assign SgnResE = {SgnBits[2], FSrcXE[`FLEN-2:`LEN1], SgnBits[1], FSrcXE[`LEN1-2:`LEN2], SgnBits[0], FSrcXE[`LEN2-2:0]}; + assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]}; end else if (`FPSIZES == 4) begin logic [3:0] SgnBits; always_comb - case (FmtE) - `Q_FMT: SgnBits = {ResSgn, FSrcXE[`D_LEN-1], FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]}; - `D_FMT: SgnBits = {1'b1, ResSgn, FSrcXE[`S_LEN-1], FSrcXE[`H_LEN-1]}; - `S_FMT: SgnBits = {2'b11, ResSgn, FSrcXE[`H_LEN-1]}; + case (Fmt) + `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]}; + `D_FMT: SgnBits = {1'b1, ResSgn, X[`S_LEN-1], X[`H_LEN-1]}; + `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]}; `H_FMT: SgnBits = {3'b111, ResSgn}; endcase - assign SgnResE = {SgnBits[3], FSrcXE[`Q_LEN-2:`D_LEN], SgnBits[2], FSrcXE[`D_LEN-2:`S_LEN], SgnBits[1], FSrcXE[`S_LEN-2:`H_LEN], SgnBits[0], FSrcXE[`H_LEN-2:0]}; + assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]}; end endmodule diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv index 8d11273a2..66af5b3c5 100644 --- a/pipelined/src/fpu/otfc.sv +++ b/pipelined/src/fpu/otfc.sv @@ -107,6 +107,6 @@ module otfc4 ( QMNext = {QMR, 2'b11}; end end - // Final Quoteint is in the range [.5, 2) + // Final Qmeint is in the range [.5, 2) endmodule diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index de3c4f30c..d3169d471 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -36,7 +36,7 @@ module postprocess ( input logic [`NF:0] Xm, Ym, Zm, // input mantissas input logic [2:0] Frm, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude input logic [`FMTBITS-1:0] Fmt, // precision 1 = double 0 = single - input logic [2:0] FOpCtrl, // choose which opperation (look below for values) + input logic [2:0] OpCtrl, // choose which opperation (look below for values) input logic XZero, YZero, ZZero, // inputs are zero input logic XInf, YInf, ZInf, // inputs are infinity input logic XNaN, YNaN, ZNaN, // inputs are NaN @@ -54,7 +54,7 @@ module postprocess ( input logic FmaNegSum, // was the sum negitive input logic FmaInvA, // do you invert Z input logic FmaSs, - input logic [$clog2(3*`NF+7)-1:0] FmaNCnt, // the normalization shift count + input logic [$clog2(3*`NF+7)-1:0] FmaSCnt, // the normalization shift count //divide signals input logic [`DURLEN-1:0] DivEarlyTermShift, input logic DivS, @@ -125,14 +125,14 @@ module postprocess ( logic Sqrt; // signals to help readability - assign Signed = FOpCtrl[0]; - assign Int64 = FOpCtrl[1]; - assign IntToFp = FOpCtrl[2]; - assign Mult = FOpCtrl[2]&~FOpCtrl[1]&~FOpCtrl[0]; + assign Signed = OpCtrl[0]; + assign Int64 = OpCtrl[1]; + assign IntToFp = OpCtrl[2]; + assign Mult = OpCtrl[2]&~OpCtrl[1]&~OpCtrl[0]; assign CvtOp = (PostProcSel == 2'b00); assign FmaOp = (PostProcSel == 2'b10); assign DivOp = (PostProcSel == 2'b01)&DivDone; - assign Sqrt = FOpCtrl[0]; + assign Sqrt = OpCtrl[0]; // is there an input of infinity or NaN being used assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); @@ -142,9 +142,9 @@ module postprocess ( // - fp -> fp: OpCtrl contains the percision of the output // - otherwise: Fmt contains the percision of the output if (`FPSIZES == 2) - assign OutFmt = IntToFp|~CvtOp ? Fmt : (FOpCtrl[1:0] == `FMT); + assign OutFmt = IntToFp|~CvtOp ? Fmt : (OpCtrl[1:0] == `FMT); else if (`FPSIZES == 3 | `FPSIZES == 4) - assign OutFmt = IntToFp|~CvtOp ? Fmt : FOpCtrl[1:0]; + assign OutFmt = IntToFp|~CvtOp ? Fmt : OpCtrl[1:0]; /////////////////////////////////////////////////////////////////////////////// // Normalization @@ -152,7 +152,7 @@ module postprocess ( cvtshiftcalc cvtshiftcalc(.ToInt, .CvtCe, .CvtResDenormUf, .Xm, .CvtLzcIn, .XZero, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); - fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaNCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe, + fmashiftcalc fmashiftcalc(.FmaSm, .Ze, .FmaPe, .FmaSCnt, .Fmt, .FmaKillProd, .NormSumExp, .FmaSe, .FmaSZero, .FmaPreResultDenorm, .FmaShiftAmt, .FmaShiftIn); divshiftcalc divshiftcalc(.Fmt, .DivQe, .DivQm, .DivEarlyTermShift, .DivResDenorm, .DivDenormShift, .DivShiftAmt, .DivShiftIn); diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 396ca7761..202b3ee81 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -42,7 +42,7 @@ module qsel2 ( // *** eventually just change to 4 bits // for efficiency. You can probably optimize your logic to // select the proper divisor with less delay. - // Quotient equations from EE371 lecture notes 13-20 + // Qmient equations from EE371 lecture notes 13-20 assign p = ps ^ pc; assign g = ps & pc; diff --git a/pipelined/src/fpu/shiftcorrection.sv b/pipelined/src/fpu/shiftcorrection.sv index 514edbee1..6329ffe28 100644 --- a/pipelined/src/fpu/shiftcorrection.sv +++ b/pipelined/src/fpu/shiftcorrection.sv @@ -43,7 +43,7 @@ module shiftcorrection( output logic [`NE+1:0] FmaMe // exponent of the normalized sum ); logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction - logic [`CORRSHIFTSZ-1:0] CorrQuotShifted; + logic [`CORRSHIFTSZ-1:0] CorrQmShifted; logic ResDenorm; // is the result denormalized logic LZAPlus1, LZAPlus2; // add one or two to the sum's exponent due to LZA correction @@ -53,11 +53,11 @@ module shiftcorrection( // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone assign CorrSumShifted = LZAPlus1 ? Shifted[`NORMSHIFTSZ-3:1] : Shifted[`NORMSHIFTSZ-4:0]; // if the msb is 1 or the exponent was one, but the shifted quotent was < 1 (Denorm) - assign CorrQuotShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; + assign CorrQmShifted = (LZAPlus2|(DivQe==1&~LZAPlus2)) ? Shifted[`NORMSHIFTSZ-2:`NORMSHIFTSZ-`CORRSHIFTSZ-1] : Shifted[`NORMSHIFTSZ-3:`NORMSHIFTSZ-`CORRSHIFTSZ-2]; // if the result of the divider was calculated to be denormalized, then the result was correctly normalized, so select the top shifted bits always_comb if(FmaOp) Mf = {CorrSumShifted, {`CORRSHIFTSZ-(3*`NF+6){1'b0}}}; - else if (DivOp&~DivResDenorm) Mf = CorrQuotShifted; + else if (DivOp&~DivResDenorm) Mf = CorrQmShifted; else Mf = Shifted[`NORMSHIFTSZ-1:`NORMSHIFTSZ-`CORRSHIFTSZ]; // Determine sum's exponent // if plus1 If plus2 if said denorm but norm plus 1 if said denorm but norm plus 2 diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index ee5ae9a39..7e9f9922a 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -37,15 +37,15 @@ module srt( input logic [`FMTBITS-1:0] FmtE, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, - input logic [`DIVLEN-1:0] X, - input logic [`DIVLEN-1:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - input logic NegSticky, - output logic [`QLEN-1-(`RADIX/4):0] Quot, + input logic [`DIVLEN-1:0] X, + input logic [`DIVLEN-1:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, + output logic [`QLEN-1-(`RADIX/4):0] Qm, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] StickyWSA, output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`NE+1:0] DivCalcExpM, + output logic [`NE+1:0] QeM, output logic [`XLEN-1:0] Rem ); @@ -62,7 +62,7 @@ module srt( /* verilator lint_on UNOPTFLAT */ logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2; - logic [`NE+1:0] DivCalcExp; + logic [`NE+1:0] Qe; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; logic [`QLEN-1:0] QMMux; @@ -88,7 +88,7 @@ module srt( mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); - flopen #(`NE+2) expflop(clk, DivStart, DivCalcExp, DivCalcExpM); + flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM); // Divisor Selections @@ -123,7 +123,7 @@ module srt( flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - assign Quot = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; + assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; assign FirstWS = WS[0]; assign FirstWC = WC[0]; if(`RADIX==2) @@ -132,7 +132,7 @@ module srt( else assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; - expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .DivCalcExp); + expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe); endmodule @@ -155,7 +155,7 @@ module divinteration ( logic [3:0] q; logic qp, qz;//, qn; - // Quotient Selection logic + // Qmient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) // q encoding: // 1000 = +2 @@ -226,7 +226,7 @@ module expcalc( input logic [`NE-1:0] Xe, Ye, input logic XZeroE, input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`NE+1:0] DivCalcExp + output logic [`NE+1:0] Qe ); logic [`NE-2:0] Bias; @@ -255,5 +255,5 @@ module expcalc( endcase end // correct exponent for denormalized input's normalization shifts - assign DivCalcExp = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; + assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtfsm.sv b/pipelined/src/fpu/srtfsm.sv index 634ecc1d3..597f96cd7 100644 --- a/pipelined/src/fpu/srtfsm.sv +++ b/pipelined/src/fpu/srtfsm.sv @@ -43,7 +43,7 @@ module srtfsm( input logic [`DIVLEN+3:0] StickyWSA, input logic [`DURLEN-1:0] Dur, output logic [`DURLEN-1:0] EarlyTermShiftE, - output logic DivStickyE, + output logic DivSE, output logic DivDone, output logic NegSticky, output logic DivBusy @@ -65,9 +65,9 @@ module srtfsm( // this is only a problem on radix 2 (and pssibly maximally redundant 4) since minimally redundant // radix-4 division can't create a QM that continually adds 0's if (`RADIX == 2) - assign DivStickyE = |W&~(StickyWSA == WS); + assign DivSE = |W&~(StickyWSA == WS); else - assign DivStickyE = |W; + assign DivSE = |W; assign DivDone = (state == DONE); assign W = WC+WS; assign NegSticky = W[`DIVLEN+3]; //*** is there a better way to do this??? diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index 71cad1872..050839c2f 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -30,35 +30,34 @@ module unpack ( input logic [`FLEN-1:0] X, Y, Z, // inputs from register file - input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half - output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ - output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) - output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) - output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN - output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN - output logic XDenormE, ZDenormE, // is XYZ denormalized - output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero - output logic XInfE, YInfE, ZInfE, // is XYZ infinity - output logic XExpMaxE // does X have the maximum exponent (NaN or Inf) + input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half + output logic Xs, Ys, Zs, // sign bits of XYZ + output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) + output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) + output logic XNaN, YNaN, ZNaN, // is XYZ a NaN + output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN + output logic XDenorm, ZDenorm, // is XYZ denormalized + output logic XZero, YZero, ZZero, // is XYZ zero + output logic XInf, YInf, ZInf, // is XYZ infinity + output logic XExpMax // does X have the maximum exponent (NaN or Inf) ); - logic [`NF-1:0] XFracE, YFracE, ZFracE; //Fraction of XYZ logic XExpNonZero, YExpNonZero, ZExpNonZero; // is the exponent of XYZ non-zero logic XFracZero, YFracZero, ZFracZero; // is the fraction zero - logic YExpMaxE, ZExpMaxE; // is the exponent all 1s + logic YExpMax, ZExpMax; // is the exponent all 1s - unpackinput unpackinputX (.In(X), .FmtE, .Sgn(XSgnE), .Exp(XExpE), .Man(XManE), - .NaN(XNaNE), .SNaN(XSNaNE), .ExpNonZero(XExpNonZero), - .Zero(XZeroE), .Inf(XInfE), .ExpMax(XExpMaxE), .FracZero(XFracZero)); + unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), + .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero), + .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero)); - unpackinput unpackinputY (.In(Y), .FmtE, .Sgn(YSgnE), .Exp(YExpE), .Man(YManE), - .NaN(YNaNE), .SNaN(YSNaNE), .ExpNonZero(YExpNonZero), - .Zero(YZeroE), .Inf(YInfE), .ExpMax(YExpMaxE), .FracZero(YFracZero)); + unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), + .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero), + .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero)); - unpackinput unpackinputZ (.In(Z), .FmtE, .Sgn(ZSgnE), .Exp(ZExpE), .Man(ZManE), - .NaN(ZNaNE), .SNaN(ZSNaNE), .ExpNonZero(ZExpNonZero), - .Zero(ZZeroE), .Inf(ZInfE), .ExpMax(ZExpMaxE), .FracZero(ZFracZero)); + unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), + .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero), + .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero)); // is the input denormalized - assign XDenormE = ~XExpNonZero & ~XFracZero; - assign ZDenormE = ~ZExpNonZero & ~ZFracZero; + assign XDenorm = ~XExpNonZero & ~XFracZero; + assign ZDenorm = ~ZExpNonZero & ~ZFracZero; endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv index 2b078cc6c..7be922508 100644 --- a/pipelined/src/fpu/unpackinput.sv +++ b/pipelined/src/fpu/unpackinput.sv @@ -30,7 +30,7 @@ module unpackinput ( input logic [`FLEN-1:0] In, // inputs from register file - input logic [`FMTBITS-1:0] FmtE, // format signal 00 - single 01 - double 11 - quad 10 - half + input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half output logic Sgn, // sign bits of XYZ output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) output logic [`NF:0] Man, // mantissas of XYZ (converted to largest supported precision) @@ -74,16 +74,16 @@ module unpackinput ( // quad and half // double and half - assign BadNaNBox = ~(FmtE|(&In[`FLEN-1:`LEN1])); // Check NaN boxing + assign BadNaNBox = ~(Fmt|(&In[`FLEN-1:`LEN1])); // Check NaN boxing // choose sign bit depending on format - 1=larger precsion 0=smaller precision - assign Sgn = FmtE ? In[`FLEN-1] : In[`LEN1-1]; + assign Sgn = Fmt ? In[`FLEN-1] : In[`LEN1-1]; // extract the fraction, add trailing zeroes to the mantissa if nessisary - assign Frac = FmtE ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; + assign Frac = Fmt ? In[`NF-1:0] : {In[`NF1-1:0], (`NF-`NF1)'(0)}; // is the exponent non-zero - assign ExpNonZero = FmtE ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; + assign ExpNonZero = Fmt ? |In[`FLEN-2:`NF] : |In[`LEN1-2:`NF1]; // example double to single conversion: // 1023 = 0011 1111 1111 @@ -95,10 +95,10 @@ module unpackinput ( // extract the exponent, converting the smaller exponent into the larger precision if nessisary // - if the original precision had a denormal number convert the exponent value 1 - assign Exp = FmtE ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; + assign Exp = Fmt ? {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero} : {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; // is the exponent all 1's - assign ExpMax = FmtE ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; + assign ExpMax = Fmt ? &In[`FLEN-2:`NF] : &In[`LEN1-2:`NF1]; end else if (`FPSIZES == 3) begin // three floating point precsions supported @@ -122,7 +122,7 @@ module unpackinput ( // Check NaN boxing always_comb - case (FmtE) + case (Fmt) `FMT: BadNaNBox = 0; `FMT1: BadNaNBox = ~&In[`FLEN-1:`LEN1]; `FMT2: BadNaNBox = ~&In[`FLEN-1:`LEN2]; @@ -131,7 +131,7 @@ module unpackinput ( // extract the sign bit always_comb - case (FmtE) + case (Fmt) `FMT: Sgn = In[`FLEN-1]; `FMT1: Sgn = In[`LEN1-1]; `FMT2: Sgn = In[`LEN2-1]; @@ -140,7 +140,7 @@ module unpackinput ( // extract the fraction always_comb - case (FmtE) + case (Fmt) `FMT: Frac = In[`NF-1:0]; `FMT1: Frac = {In[`NF1-1:0], (`NF-`NF1)'(0)}; `FMT2: Frac = {In[`NF2-1:0], (`NF-`NF2)'(0)}; @@ -149,7 +149,7 @@ module unpackinput ( // is the exponent non-zero always_comb - case (FmtE) + case (Fmt) `FMT: ExpNonZero = |In[`FLEN-2:`NF]; // if input is largest precision (`FLEN - ie quad or double) `FMT1: ExpNonZero = |In[`LEN1-2:`NF1]; // if input is larger precsion (`LEN1 - double or single) `FMT2: ExpNonZero = |In[`LEN2-2:`NF2]; // if input is smallest precsion (`LEN2 - single or half) @@ -166,7 +166,7 @@ module unpackinput ( // convert the larger precision's exponent to use the largest precision's bias always_comb - case (FmtE) + case (Fmt) `FMT: Exp = {In[`FLEN-2:`NF+1], In[`NF]|~ExpNonZero}; `FMT1: Exp = {In[`LEN1-2], {`NE-`NE1{~In[`LEN1-2]}}, In[`LEN1-3:`NF1+1], In[`NF1]|~ExpNonZero}; `FMT2: Exp = {In[`LEN2-2], {`NE-`NE2{~In[`LEN2-2]}}, In[`LEN2-3:`NF2+1], In[`NF2]|~ExpNonZero}; @@ -175,7 +175,7 @@ module unpackinput ( // is the exponent all 1's always_comb - case (FmtE) + case (Fmt) `FMT: ExpMax = &In[`FLEN-2:`NF]; `FMT1: ExpMax = &In[`LEN1-2:`NF1]; `FMT2: ExpMax = &In[`LEN2-2:`NF2]; @@ -194,7 +194,7 @@ module unpackinput ( // Check NaN boxing always_comb - case (FmtE) + case (Fmt) 2'b11: BadNaNBox = 0; 2'b01: BadNaNBox = ~&In[`Q_LEN-1:`D_LEN]; 2'b00: BadNaNBox = ~&In[`Q_LEN-1:`S_LEN]; @@ -203,7 +203,7 @@ module unpackinput ( // extract sign bit always_comb - case (FmtE) + case (Fmt) 2'b11: Sgn = In[`Q_LEN-1]; 2'b01: Sgn = In[`D_LEN-1]; 2'b00: Sgn = In[`S_LEN-1]; @@ -213,7 +213,7 @@ module unpackinput ( // extract the fraction always_comb - case (FmtE) + case (Fmt) 2'b11: Frac = In[`Q_NF-1:0]; 2'b01: Frac = {In[`D_NF-1:0], (`Q_NF-`D_NF)'(0)}; 2'b00: Frac = {In[`S_NF-1:0], (`Q_NF-`S_NF)'(0)}; @@ -222,7 +222,7 @@ module unpackinput ( // is the exponent non-zero always_comb - case (FmtE) + case (Fmt) 2'b11: ExpNonZero = |In[`Q_LEN-2:`Q_NF]; 2'b01: ExpNonZero = |In[`D_LEN-2:`D_NF]; 2'b00: ExpNonZero = |In[`S_LEN-2:`S_NF]; @@ -240,7 +240,7 @@ module unpackinput ( // convert the double precsion exponent into quad precsion always_comb - case (FmtE) + case (Fmt) 2'b11: Exp = {In[`Q_LEN-2:`Q_NF+1], In[`Q_NF]|~ExpNonZero}; 2'b01: Exp = {In[`D_LEN-2], {`Q_NE-`D_NE{~In[`D_LEN-2]}}, In[`D_LEN-3:`D_NF+1], In[`D_NF]|~ExpNonZero}; 2'b00: Exp = {In[`S_LEN-2], {`Q_NE-`S_NE{~In[`S_LEN-2]}}, In[`S_LEN-3:`S_NF+1], In[`S_NF]|~ExpNonZero}; @@ -250,7 +250,7 @@ module unpackinput ( // is the exponent all 1's always_comb - case (FmtE) + case (Fmt) 2'b11: ExpMax = &In[`Q_LEN-2:`Q_NF]; 2'b01: ExpMax = &In[`D_LEN-2:`D_NF]; 2'b00: ExpMax = &In[`S_LEN-2:`S_NF]; From a30d9c6bd8783761b9d9c57f3efd51c82eda4ab6 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Wed, 20 Jul 2022 21:57:23 +0000 Subject: [PATCH 19/36] turn off 2 word store durring non-fp instructions --- pipelined/src/fpu/fctrl.sv | 26 +++++++++++++++++++++++++- pipelined/src/fpu/fpu.sv | 7 ++++--- pipelined/src/fpu/unpackinput.sv | 1 + 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 85047248d..ea98651b7 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -41,7 +41,7 @@ module fctrl ( input logic [2:0] FRM_REGW, // rounding mode from CSR input logic [1:0] STATUS_FS, // is FPU enabled? input logic FDivBusyE, // is the divider busy - output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + output logic IllegalFPUInstrD, IllegalFPUInstrM, // Is the instruction an illegal fpu instruction output logic FRegWriteM, FRegWriteW, // FP register write enable output logic [2:0] FrmM, // FP rounding mode output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format @@ -55,6 +55,7 @@ module fctrl ( `define FCTRLW 11 logic [`FCTRLW-1:0] ControlsD; + logic IllegalFPUInstrE; logic FRegWriteD; // FP register write enable logic DivStartD; // integer register write enable logic FWriteIntD; // integer register write enable @@ -171,6 +172,25 @@ module fctrl ( else if (`FPSIZES == 3|`FPSIZES == 4) assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; + +// // signals to help readability +// assign IntToFp = OpCtrl[2]; +// assign CvtOp = (PostProcSelE == 2'b00)&(FResSelE == 2'b01); +// assign FmaOp = (PostProcSelE == 2'b10)&(FResSelE == 2'b01); +// assign Sqrt = OpCtrl[0]; + +// // is there an input of infinity or NaN being used +// assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); +// assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp); + +// // enables: +// // X - all except int->fp, store, load, mv int->fp +// // Y - all except cvt, mv, load, class +// // Z - fma ops only +// assign XEnE = ; +// assign YEnE = ~((FResSel==2'b10)); +// assign ZEnE = FmaOp&~OpCtrlE[2]; + // Final Res Sel: // fp int // 00 other cmp @@ -228,10 +248,14 @@ module fctrl ( flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, {Adr1E, Adr2E, Adr3E}); flopenrc #(1) DEDivStartReg(clk, reset, FlushE, ~StallE|FDivBusyE, DivStartD, DivStartE); + if(`FLEN>`XLEN) + flopenrc #(1) DEIllegalReg(clk, reset, FlushE, ~StallE, IllegalFPUInstrD, IllegalFPUInstrE); // E/M pipleine register flopenrc #(12+int'(`FMTBITS)) EMCtrlReg (clk, reset, FlushM, ~StallM, {FRegWriteE, FResSelE, PostProcSelE, FrmE, FmtE, OpCtrlE, FWriteIntE}, {FRegWriteM, FResSelM, PostProcSelM, FrmM, FmtM, OpCtrlM, FWriteIntM}); + if(`FLEN>`XLEN) + flopenrc #(1) EMIllegalReg(clk, reset, FlushM, ~StallM, IllegalFPUInstrE, IllegalFPUInstrM); // M/W pipleine register flopenrc #(3) MWCtrlReg(clk, reset, FlushW, ~StallW, {FRegWriteM, FResSelM}, diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 6d9b9cf47..84f8e04df 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -72,6 +72,7 @@ module fpu ( logic [1:0] FResSelE, FResSelM; // Select one of the results that finish in the memory stage logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input + logic IllegalFPUInstrM; // regfile signals logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -163,7 +164,7 @@ module fpu ( fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, - .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, + .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E); // FP register file @@ -290,8 +291,8 @@ module fpu ( assign FWriteDataE = YE[`XLEN-1:0]; end else begin logic [`FLEN-1:0] FWriteDataE; - if(`FMTBITS == 2) assign FStore2 = FmtM == `FMT; - else assign FStore2 = FmtM; + if(`FMTBITS == 2) assign FStore2 = (FmtM == `FMT)&~IllegalFPUInstrM; + else assign FStore2 = FmtM&~IllegalFPUInstrM; if (`FPSIZES==1) assign FWriteDataE = YE; else if (`FPSIZES==2) assign FWriteDataE = FmtE ? YE : {2{YE[`LEN1-1:0]}}; diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv index 7be922508..699d88958 100644 --- a/pipelined/src/fpu/unpackinput.sv +++ b/pipelined/src/fpu/unpackinput.sv @@ -30,6 +30,7 @@ module unpackinput ( input logic [`FLEN-1:0] In, // inputs from register file + // input logic En, // enable the input input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half output logic Sgn, // sign bits of XYZ output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) From 7950a675ea77fe516515a15d2639df330648d2c3 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 21 Jul 2022 01:20:06 +0000 Subject: [PATCH 20/36] added input enables and improved forwarding --- pipelined/src/fpu/fctrl.sv | 29 ++++----- pipelined/src/fpu/fhazard.sv | 42 +++++++------ pipelined/src/fpu/flags.sv | 5 +- pipelined/src/fpu/fpu.sv | 14 +++-- pipelined/src/fpu/postprocess.sv | 6 +- pipelined/src/fpu/specialcase.sv | 4 +- pipelined/src/fpu/unpack.sv | 7 ++- pipelined/src/fpu/unpackinput.sv | 6 +- pipelined/testbench/testbench-fp.sv | 95 +++++++++++++++-------------- 9 files changed, 107 insertions(+), 101 deletions(-) diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index ea98651b7..5b6b22ef0 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -46,6 +46,8 @@ module fctrl ( output logic [2:0] FrmM, // FP rounding mode output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format output logic DivStartE, // Start division or squareroot + output logic XEnE, YEnE, ZEnE, + output logic YEnForwardE, ZEnForwardE, output logic FWriteIntE, FWriteIntM, // Write to integer register output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage @@ -173,23 +175,18 @@ module fctrl ( assign FmtD = ((Funct7D[6:3] == 4'b0100)&OpD[4]) ? Rs2D[1:0] : Funct7D[1:0]; -// // signals to help readability -// assign IntToFp = OpCtrl[2]; -// assign CvtOp = (PostProcSelE == 2'b00)&(FResSelE == 2'b01); -// assign FmaOp = (PostProcSelE == 2'b10)&(FResSelE == 2'b01); -// assign Sqrt = OpCtrl[0]; -// // is there an input of infinity or NaN being used -// assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); -// assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp); - -// // enables: -// // X - all except int->fp, store, load, mv int->fp -// // Y - all except cvt, mv, load, class -// // Z - fma ops only -// assign XEnE = ; -// assign YEnE = ~((FResSel==2'b10)); -// assign ZEnE = FmaOp&~OpCtrlE[2]; +// enables: +// X - all except int->fp, store, load, mv int->fp +// Y - all except cvt, mv, load, class +// Z - fma ops only +// load/store mv int->fp cvt int->fp + assign XEnE = ~(((FResSelE==2'b10)&~FWriteIntE)|((FResSelE==2'b11)&FRegWriteE)|((FResSelE==2'b01)&(PostProcSelE==2'b00)&OpCtrlE[2])); +// load/class mv cvt + assign YEnE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&(PostProcSelE==2'b00))); + assign ZEnE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&(~OpCtrlE[2]|OpCtrlE[1]); + assign YEnForwardE = ~(((FResSelE==2'b10)&(FWriteIntE|FRegWriteE))|(FResSelE==2'b11)|((FResSelE==2'b01)&(PostProcSelE==2'b00))); + assign ZEnForwardE = (PostProcSelE==2'b10)&(FResSelE==2'b01)&~OpCtrlE[2]; // Final Res Sel: // fp int diff --git a/pipelined/src/fpu/fhazard.sv b/pipelined/src/fpu/fhazard.sv index 36a0ff82f..690e04ebb 100644 --- a/pipelined/src/fpu/fhazard.sv +++ b/pipelined/src/fpu/fhazard.sv @@ -35,6 +35,7 @@ module fhazard( input logic FRegWriteM, FRegWriteW, // is the fp register being written to input logic [4:0] RdM, RdW, // the adress being written to input logic [1:0] FResSelM, // the result being selected + input logic XEnE, YEnE, ZEnE, output logic FStallD, // stall the decode stage output logic [1:0] ForwardXE, ForwardYE, ForwardZE // select a forwarded value ); @@ -47,33 +48,34 @@ module fhazard( ForwardZE = 2'b00; // choose FRD3E FStallD = 0; - //*** this hazard unit is waiting for all three inputs, change so that if an input isnt used then don't wait - // if the needed value is in the memory stage - input 1 - if ((Adr1E == RdM) & FRegWriteM) - // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM - else FStallD = 1; // otherwise stall - // if the needed value is in the writeback stage - else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W + if(XEnE) + if ((Adr1E == RdM) & FRegWriteM) + // if the result will be FResM (can be taken from the memory stage) + if(FResSelM == 2'b00) ForwardXE = 2'b10; // choose FResM + else FStallD = 1; // otherwise stall + // if the needed value is in the writeback stage + else if ((Adr1E == RdW) & FRegWriteW) ForwardXE = 2'b01; // choose FPUResult64W // if the needed value is in the memory stage - input 2 - if ((Adr2E == RdM) & FRegWriteM) - // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM - else FStallD = 1; // otherwise stall - // if the needed value is in the writeback stage - else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W + if(YEnE) + if ((Adr2E == RdM) & FRegWriteM) + // if the result will be FResM (can be taken from the memory stage) + if(FResSelM == 2'b00) ForwardYE = 2'b10; // choose FResM + else FStallD = 1; // otherwise stall + // if the needed value is in the writeback stage + else if ((Adr2E == RdW) & FRegWriteW) ForwardYE = 2'b01; // choose FPUResult64W // if the needed value is in the memory stage - input 3 - if ((Adr3E == RdM) & FRegWriteM) - // if the result will be FResM (can be taken from the memory stage) - if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM - else FStallD = 1; // otherwise stall - // if the needed value is in the writeback stage - else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W + if(ZEnE) + if ((Adr3E == RdM) & FRegWriteM) + // if the result will be FResM (can be taken from the memory stage) + if(FResSelM == 2'b00) ForwardZE = 2'b10; // choose FResM + else FStallD = 1; // otherwise stall + // if the needed value is in the writeback stage + else if ((Adr3E == RdW) & FRegWriteW) ForwardZE = 2'b01; // choose FPUResult64W end diff --git a/pipelined/src/fpu/flags.sv b/pipelined/src/fpu/flags.sv index 6b1bc6381..c169ab2fa 100644 --- a/pipelined/src/fpu/flags.sv +++ b/pipelined/src/fpu/flags.sv @@ -37,7 +37,6 @@ module flags( input logic NaNIn, // is a NaN input being used input logic [`FMTBITS-1:0] OutFmt, // output format input logic XZero, YZero, // inputs are zero - input logic XNaN, YNaN, // inputs are NaN input logic Sqrt, // Sqrt? input logic ToInt, // convert to integer input logic IntToFp, // convert integer to floating point @@ -153,11 +152,11 @@ module flags( // | | | | or the res rounds up out of bounds // | | | | and the res didn't underflow // | | | | | - assign IntInvalid = XNaN|XInf|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); + assign IntInvalid = NaNIn|InfIn|(ShiftGtIntSz&~FullRe[`NE+1])|((Xs&~Signed)&(~((CvtCe[`NE]|(~|CvtCe))&~Plus1)))|(CvtNegResMsbs[1]^CvtNegResMsbs[0]); // | // or when the positive res rounds up out of range assign SigNaN = (XSNaN&~(IntToFp&CvtOp)) | (YSNaN&~CvtOp) | (ZSNaN&FmaOp); - assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~XNaN & ~YNaN) | (XZero & YInf) | (YZero & XInf); + assign FmaInvalid = ((XInf | YInf) & ZInf & (FmaPs ^ FmaAs) & ~NaNIn) | (XZero & YInf) | (YZero & XInf); assign DivInvalid = ((XInf & YInf) | (XZero & YZero))&~Sqrt | (Xs&Sqrt); assign Invalid = SigNaN | (FmaInvalid&FmaOp) | (DivInvalid&DivOp); diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index 84f8e04df..cfa46b657 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -73,6 +73,8 @@ module fpu ( logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input logic IllegalFPUInstrM; + logic XEnE, YEnE, ZEnE; + logic YEnForwardE, ZEnForwardE; // regfile signals logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage @@ -163,8 +165,8 @@ module fpu ( // calculate FP control signals fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .InstrD, .StallE, .StallM, .StallW, .FlushE, .FlushM, .FlushW, .FRM_REGW, .STATUS_FS, .FDivBusyE, - .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, - .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, + .reset, .clk, .IllegalFPUInstrD, .FRegWriteM, .FRegWriteW, .FrmM, .FmtE, .FmtM, .YEnForwardE, .ZEnForwardE, + .DivStartE, .FWriteIntE, .FWriteIntM, .OpCtrlE, .OpCtrlM, .IllegalFPUInstrM, .XEnE, .YEnE, .ZEnE, .FResSelE, .FResSelM, .FResSelW, .PostProcSelE, .PostProcSelM, .Adr1E, .Adr2E, .Adr3E); // FP register file @@ -193,7 +195,7 @@ module fpu ( // Hazard unit for FPU // - determines if any forwarding or stalls are needed fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResSelM, - .FStallD, .ForwardXE, .ForwardYE, .ForwardZE); + .XEnE, .YEnE(YEnForwardE), .ZEnE(ZEnForwardE), .FStallD, .ForwardXE, .ForwardYE, .ForwardZE); // forwarding muxs mux3 #(`FLEN) fxemux (FRD1E, FPUResultW, PreFpResM, ForwardXE, XE); @@ -233,11 +235,11 @@ module fpu ( // - splits FP inputs into their various parts // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) unpack unpack (.X(XE), .Y(YE), .Z(ZE), .Fmt(FmtE), .Xs(XsE), .Ys(YsE), .Zs(ZsE), - .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), - .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), + .Xe(XeE), .Ye(YeE), .Ze(ZeE), .Xm(XmE), .Ym(YmE), .Zm(ZmE), .YEn(YEnE), + .XNaN(XNaNE), .YNaN(YNaNE), .ZNaN(ZNaNE), .XSNaN(XSNaNE), .XEn(XEnE), .YSNaN(YSNaNE), .ZSNaN(ZSNaNE), .XDenorm(XDenormE), .ZDenorm(ZDenormE), .XZero(XZeroE), .YZero(YZeroE), .ZZero(ZZeroE), .XInf(XInfE), .YInf(YInfE), - .ZInf(ZInfE), .XExpMax(XExpMaxE)); + .ZEn(ZEnE), .ZInf(ZInfE), .XExpMax(XExpMaxE)); // fused multiply add // - fadd/fsub diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index d3169d471..4d9dc310f 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -135,8 +135,8 @@ module postprocess ( assign Sqrt = OpCtrl[0]; // is there an input of infinity or NaN being used - assign InfIn = (XInf&~(IntToFp&CvtOp))|(YInf&~CvtOp)|(ZInf&FmaOp); - assign NaNIn = (XNaN&~(IntToFp&CvtOp))|(YNaN&~CvtOp)|(ZNaN&FmaOp); + assign InfIn = XInf|YInf|ZInf; + assign NaNIn = XNaN|YNaN|ZNaN; // choose the ouptut format depending on the opperation // - fp -> fp: OpCtrl contains the percision of the output @@ -219,7 +219,7 @@ module postprocess ( flags flags(.XSNaN, .YSNaN, .ZSNaN, .XInf, .YInf, .ZInf, .InfIn, .XZero, .YZero, .Xs, .Sqrt, .ToInt, .IntToFp, .Int64, .Signed, .OutFmt, .CvtCe, - .XNaN, .YNaN, .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, + .NaNIn, .FmaAs, .FmaPs, .R, .IntInvalid, .DivByZero, .UfL, .S, .UfPlus1, .CvtOp, .DivOp, .FmaOp, .FullRe, .Plus1, .Me, .CvtNegResMsbs, .Invalid, .Overflow, .PostProcFlg); diff --git a/pipelined/src/fpu/specialcase.sv b/pipelined/src/fpu/specialcase.sv index 6014962a1..41e75110f 100644 --- a/pipelined/src/fpu/specialcase.sv +++ b/pipelined/src/fpu/specialcase.sv @@ -280,14 +280,14 @@ module specialcase( // other: 32 bit unsinged res should be sign extended as if it were a signed number always_comb if(Signed) - if(Xs&~XNaN) // signed negitive + if(Xs&~NaNIn) // signed negitive if(Int64) OfIntRes = {1'b1, {`XLEN-1{1'b0}}}; else OfIntRes = {{`XLEN-32{1'b1}}, 1'b1, {31{1'b0}}}; else // signed positive if(Int64) OfIntRes = {1'b0, {`XLEN-1{1'b1}}}; else OfIntRes = {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}}; else - if(Xs&~XNaN) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive + if(Xs&~NaNIn) OfIntRes = {`XLEN{1'b0}}; // unsigned negitive else OfIntRes = {`XLEN{1'b1}}; // unsigned positive diff --git a/pipelined/src/fpu/unpack.sv b/pipelined/src/fpu/unpack.sv index 050839c2f..4053cba13 100644 --- a/pipelined/src/fpu/unpack.sv +++ b/pipelined/src/fpu/unpack.sv @@ -31,6 +31,7 @@ module unpack ( input logic [`FLEN-1:0] X, Y, Z, // inputs from register file input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half + input logic XEn, YEn, ZEn, output logic Xs, Ys, Zs, // sign bits of XYZ output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) @@ -46,15 +47,15 @@ module unpack ( logic XFracZero, YFracZero, ZFracZero; // is the fraction zero logic YExpMax, ZExpMax; // is the exponent all 1s - unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), + unpackinput unpackinputX (.In(X), .Fmt, .Sgn(Xs), .Exp(Xe), .Man(Xm), .En(XEn), .NaN(XNaN), .SNaN(XSNaN), .ExpNonZero(XExpNonZero), .Zero(XZero), .Inf(XInf), .ExpMax(XExpMax), .FracZero(XFracZero)); - unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), + unpackinput unpackinputY (.In(Y), .Fmt, .Sgn(Ys), .Exp(Ye), .Man(Ym), .En(YEn), .NaN(YNaN), .SNaN(YSNaN), .ExpNonZero(YExpNonZero), .Zero(YZero), .Inf(YInf), .ExpMax(YExpMax), .FracZero(YFracZero)); - unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), + unpackinput unpackinputZ (.In(Z), .Fmt, .Sgn(Zs), .Exp(Ze), .Man(Zm), .En(ZEn), .NaN(ZNaN), .SNaN(ZSNaN), .ExpNonZero(ZExpNonZero), .Zero(ZZero), .Inf(ZInf), .ExpMax(ZExpMax), .FracZero(ZFracZero)); // is the input denormalized diff --git a/pipelined/src/fpu/unpackinput.sv b/pipelined/src/fpu/unpackinput.sv index 699d88958..4e43768c4 100644 --- a/pipelined/src/fpu/unpackinput.sv +++ b/pipelined/src/fpu/unpackinput.sv @@ -30,7 +30,7 @@ module unpackinput ( input logic [`FLEN-1:0] In, // inputs from register file - // input logic En, // enable the input + input logic En, // enable the input input logic [`FMTBITS-1:0] Fmt, // format signal 00 - single 01 - double 11 - quad 10 - half output logic Sgn, // sign bits of XYZ output logic [`NE-1:0] Exp, // exponents of XYZ (converted to largest supported precision) @@ -263,8 +263,8 @@ module unpackinput ( // Output logic assign FracZero = ~|Frac; // is the fraction zero? assign Man = {ExpNonZero, Frac}; // add the assumed one (or zero if denormal or zero) to create the significand - assign NaN = (ExpMax & ~FracZero)|BadNaNBox; // is the input a NaN? + assign NaN = ((ExpMax & ~FracZero)|BadNaNBox)&En; // is the input a NaN? assign SNaN = NaN&~Frac[`NF-1]&~BadNaNBox; // is the input a singnaling NaN? - assign Inf = ExpMax & FracZero; // is the input infinity? + assign Inf = ExpMax & FracZero &En; // is the input infinity? assign Zero = ~ExpNonZero & FracZero; // is the input zero? endmodule \ No newline at end of file diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 91ce82616..19b637478 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -66,9 +66,9 @@ module testbenchfp; logic [`XLEN-1:0] IntRes, CmpRes; // Results from each unit logic [4:0] FmaFlg, CvtFlg, DivFlg, CmpFlg; // Outputed flags logic AnsNaN, ResNaN, NaNGood; - logic XSgn, YSgn, ZSgn; // sign of the inputs - logic [`NE-1:0] XExp, YExp, ZExp; // exponent of the inputs - logic [`NF:0] XMan, YMan, ZMan; // mantissas of the inputs + logic Xs, Ys, Zs; // sign of the inputs + logic [`NE-1:0] Xe, Ye, Ze; // exponent of the inputs + logic [`NF:0] Xm, Ym, Zm; // mantissas of the inputs logic XNaN, YNaN, ZNaN; // is the input NaN logic XSNaN, YSNaN, ZSNaN; // is the input a signaling NaN logic XDenorm, ZDenorm; // is the input denormalized @@ -99,7 +99,7 @@ module testbenchfp; logic [`NE+1:0] Se; logic ZmSticky; logic KillProd; - logic [$clog2(3*`NF+7)-1:0] NCnt; + logic [$clog2(3*`NF+7)-1:0] SCnt; logic [3*`NF+5:0] Sm; logic InvA; logic NegSum; @@ -650,14 +650,14 @@ module testbenchfp; // extract the inputs (X, Y, Z, SrcA) and the output (Ans, AnsFlg) from the current test vector readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, - .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal), - .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal), - .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart, - .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN), - .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), - .XDenormE(XDenorm), .ZDenormE(ZDenorm), - .XZeroE(XZero), .YZeroE(YZero), .ZZeroE(ZZero), - .XInfE(XInf), .YInfE(YInf), .ZInfE(ZInf), .XExpMaxE(XExpMax), + .Xs, .Ys, .Zs, .Unit(UnitVal), + .Xe, .Ye, .Ze, .TestNum, .OpCtrl(OpCtrlVal), + .Xm, .Ym, .Zm, .DivStart, + .XNaN, .YNaN, .ZNaN, + .XSNaN, .YSNaN, .ZSNaN, + .XDenorm, .ZDenorm, + .XZero, .YZero, .ZZero, + .XInf, .YInf, .ZInf, .XExpMax, .X, .Y, .Z); @@ -673,34 +673,34 @@ module testbenchfp; /////////////////////////////////////////////////////////////////////////////////////////////// // instantiate devices under test - fma fma(.Xs(XSgn), .Ys(YSgn), .Zs(ZSgn), - .Xe(XExp), .Ye(YExp), .Ze(ZExp), - .Xm(XMan), .Ym(YMan), .Zm(ZMan), + fma fma(.Xs(Xs), .Ys(Ys), .Zs(Zs), + .Xe(Xe), .Ye(Ye), .Ze(Ze), + .Xm(Xm), .Ym(Ym), .Zm(Zm), .XZero, .YZero, .ZZero, .Ss, .Se, - .FOpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .NCnt, .As, .Ps, + .OpCtrl(OpCtrlVal), .Fmt(ModFmt), .Sm, .NegSum, .InvA, .SCnt, .As, .Ps, .Pe, .ZmSticky, .KillProd); - postprocess postprocess(.Xs(XSgn), .Ys(YSgn), .PostProcSel(UnitVal[1:0]), - .Ze(ZExp), .ZDenorm(ZDenorm), .FOpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), - .Xm(XMan), .Ym(YMan), .Zm(ZMan), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss), + postprocess postprocess(.Xs(Xs), .Ys(Ys), .PostProcSel(UnitVal[1:0]), + .Ze(Ze), .ZDenorm(ZDenorm), .OpCtrl(OpCtrlVal), .DivQm(Quot), .DivQe(DivCalcExp), + .Xm(Xm), .Ym(Ym), .Zm(Zm), .CvtCe(CvtCalcExpE), .DivS(DivSticky), .FmaSs(Ss), .XNaN(XNaN), .YNaN(YNaN), .ZNaN(ZNaN), .CvtResDenormUf(CvtResDenormUfE), .XZero(XZero), .YZero(YZero), .ZZero(ZZero), .CvtShiftAmt(CvtShiftAmtE), .XInf(XInf), .YInf(YInf), .ZInf(ZInf), .CvtCs(CvtResSgnE), .ToInt(WriteIntVal), .XSNaN(XSNaN), .YSNaN(YSNaN), .ZSNaN(ZSNaN), .CvtLzcIn(CvtLzcInE), .IntZero, .FmaKillProd(KillProd), .FmaZmS(ZmSticky), .FmaPe(Pe), .DivDone, .FmaSe(Se), - .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaNCnt(NCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), + .FmaSm(Sm), .FmaNegSum(NegSum), .FmaInvA(InvA), .FmaSCnt(SCnt), .DivEarlyTermShift(EarlyTermShift), .FmaAs(As), .FmaPs(Ps), .Fmt(ModFmt), .Frm(FrmVal), .PostProcFlg(Flg), .PostProcRes(FpRes), .FCvtIntRes(IntRes)); - fcvt fcvt (.Xs(XSgn), .Xe(XExp), .Xm(XMan), .Int(SrcA), .ToInt(WriteIntVal), - .XZero(XZero), .XDenorm(XDenorm), .FOpCtrl(OpCtrlVal), .IntZero, + fcvt fcvt (.Xs(Xs), .Xe(Xe), .Xm(Xm), .Int(SrcA), .ToInt(WriteIntVal), + .XZero(XZero), .XDenorm(XDenorm), .OpCtrl(OpCtrlVal), .IntZero, .Fmt(ModFmt), .Ce(CvtCalcExpE), .ShiftAmt(CvtShiftAmtE), .ResDenormUf(CvtResDenormUfE), .Cs(CvtResSgnE), .LzcIn(CvtLzcInE)); - fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), - .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), - .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XManE(XMan), .YManE(YMan), .XExpE(XExp), .YExpE(YExp), + fcmp fcmp (.Fmt(ModFmt), .OpCtrl(OpCtrlVal), .Xs, .Ys, .Xe, .Ye, + .Xm, .Ym, .XZero, .YZero, .CmpIntRes(CmpRes), + .XNaN, .YNaN, .XSNaN, .YSNaN, .X, .Y, .CmpNV(CmpFlg[4]), .CmpFpRes(FpCmpRes)); + divsqrt divsqrt(.clk, .reset, .FmtE(ModFmt), .XmE(Xm), .YmE(Ym), .XeE(Xe), .YeE(Ye), .XInfE(XInf), .YInfE(YInf), .XZeroE(XZero), .YZeroE(YZero), .XNaNE(XNaN), .YNaNE(YNaN), .DivStartE(DivStart), - .StallE(1'b0), .StallM(1'b0), .DivStickyM(DivSticky), .DivBusy, .DivCalcExpM(DivCalcExp), - .EarlyTermShiftM(EarlyTermShift), .QuotM(Quot), .DivDone); + .StallE(1'b0), .StallM(1'b0), .DivSM(DivSticky), .DivBusy, .QeM(DivCalcExp), + .EarlyTermShiftM(EarlyTermShift), .QmM(Quot), .DivDone); assign CmpFlg[3:0] = 0; @@ -868,10 +868,10 @@ end // Testfloat outputs 800... for both the largest integer values for both positive and negitive numbers but // the riscv spec specifies 2^31-1 for positive values out of range and NaNs ie 7fff... - else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&(Res[`XLEN-1:0] === (`XLEN)'(0))) | - (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) | - (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~XSgn|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | - (~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&XSgn&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin + else if ((UnitVal === `CVTINTUNIT) & ~(((WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&(Res[`XLEN-1:0] === (`XLEN)'(0))) | + (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&OpCtrlVal[1]&(Res[`XLEN-1:0] === {1'b0, {`XLEN-1{1'b1}}})) | + (WriteIntVal&OpCtrlVal[0]&AnsFlg[4]&(~Xs|XNaN)&~OpCtrlVal[1]&(Res[`XLEN-1:0] === {{`XLEN-32{1'b0}}, 1'b0, {31{1'b1}}})) | + (~(WriteIntVal&~OpCtrlVal[0]&AnsFlg[4]&Xs&~XNaN)&(Res === Ans | NaNGood | NaNGood === 1'bx))) & (ResFlg === AnsFlg | AnsFlg === 5'bx))) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); @@ -924,18 +924,19 @@ module readvectors ( output logic [`FLEN-1:0] Ans, output logic [`XLEN-1:0] SrcA, output logic [4:0] AnsFlg, - output logic XSgnE, YSgnE, ZSgnE, // sign bits of XYZ - output logic [`NE-1:0] XExpE, YExpE, ZExpE, // exponents of XYZ (converted to largest supported precision) - output logic [`NF:0] XManE, YManE, ZManE, // mantissas of XYZ (converted to largest supported precision) - output logic XNaNE, YNaNE, ZNaNE, // is XYZ a NaN - output logic XSNaNE, YSNaNE, ZSNaNE, // is XYZ a signaling NaN - output logic XDenormE, ZDenormE, // is XYZ denormalized - output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero - output logic XInfE, YInfE, ZInfE, // is XYZ infinity - output logic XExpMaxE, + output logic Xs, Ys, Zs, // sign bits of XYZ + output logic [`NE-1:0] Xe, Ye, Ze, // exponents of XYZ (converted to largest supported precision) + output logic [`NF:0] Xm, Ym, Zm, // mantissas of XYZ (converted to largest supported precision) + output logic XNaN, YNaN, ZNaN, // is XYZ a NaN + output logic XSNaN, YSNaN, ZSNaN, // is XYZ a signaling NaN + output logic XDenorm, ZDenorm, // is XYZ denormalized + output logic XZero, YZero, ZZero, // is XYZ zero + output logic XInf, YInf, ZInf, // is XYZ infinity + output logic XExpMax, output logic DivStart, output logic [`FLEN-1:0] X, Y, Z ); + logic XEn, YEn, ZEn; // apply test vectors on rising edge of clk // Format of vectors Inputs(1/2/3)_AnsFlg @@ -1257,8 +1258,12 @@ module readvectors ( endcase end - unpack unpack(.X, .Y, .Z, .FmtE(ModFmt), .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, - .XManE, .YManE, .ZManE, .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, - .XDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .XInfE, .YInfE, .ZInfE, - .XExpMaxE); + assign XEn = ~((Unit == `CVTINTUNIT)&OpCtrl[2]); + assign YEn = ~((Unit == `CVTINTUNIT)|(Unit == `CVTFPUNIT)); + assign ZEn = (Unit == `FMAUNIT); + + unpack unpack(.X, .Y, .Z, .Fmt(ModFmt), .Xs, .Ys, .Zs, .Xe, .Ye, .Ze, + .Xm, .Ym, .Zm, .XNaN, .YNaN, .ZNaN, .XSNaN, .YSNaN, .ZSNaN, + .XDenorm, .ZDenorm, .XZero, .YZero, .ZZero, .XInf, .YInf, .ZInf, + .XEn, .YEn, .ZEn, .XExpMax); endmodule \ No newline at end of file From e46e96e0805ba4a5306517720cd0ab202cec7949 Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Thu, 21 Jul 2022 10:14:05 -0700 Subject: [PATCH 21/36] changed the default branch of embench --- .gitmodules | 5 +++++ addins/embench-iot | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 81ed2d5f4..bf7fdaefd 100644 --- a/.gitmodules +++ b/.gitmodules @@ -17,6 +17,11 @@ [submodule "addins/embench-iot"] path = addins/embench-iot url = https://github.com/embench/embench-iot + branch = embench-1.0-branch [submodule "addins/coremark"] path = addins/coremark url = https://github.com/eembc/coremark +[submodule "embench"] + branch = embench-1.0-branch +[submodule "embench-iot"] + branch = embench-1.0-branch diff --git a/addins/embench-iot b/addins/embench-iot index 261a65e0a..58ffa0c68 160000 --- a/addins/embench-iot +++ b/addins/embench-iot @@ -1 +1 @@ -Subproject commit 261a65e0a2d3e8d62d81b1d8fe7e309a096bc6a9 +Subproject commit 58ffa0c68c52f291d12c5902fc787d2bca94ddf9 From 16e4260ddab1e7b038bb11cbf0531c1792767eca Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Thu, 21 Jul 2022 10:15:13 -0700 Subject: [PATCH 22/36] fixed gitmodules --- .gitmodules | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.gitmodules b/.gitmodules index bf7fdaefd..ab45d3f96 100644 --- a/.gitmodules +++ b/.gitmodules @@ -21,7 +21,3 @@ [submodule "addins/coremark"] path = addins/coremark url = https://github.com/eembc/coremark -[submodule "embench"] - branch = embench-1.0-branch -[submodule "embench-iot"] - branch = embench-1.0-branch From 4793267bd7619c5d9433406882859cbdd2597ec4 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 21 Jul 2022 17:36:21 +0000 Subject: [PATCH 23/36] Updated Radix2 Sqrt to follow new algorithm --- pipelined/srt/srt.sv | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 27fac324e..157be2e7f 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -213,8 +213,8 @@ module fsel2 ( logic [`DIVLEN+3:0] FP, FN, FZ; // Generate for both positive and negative bits - assign FP = ~S & C; - assign FN = SM | (C & (~C << 2)); + assign FP = ~(S << 1) & C; + assign FN = (SM << 1) | (C & (~C << 2)); assign FZ = '0; // Choose which adder input will be used @@ -283,22 +283,22 @@ module sotfc2( logic [`DIVLEN+3:0] SNext, SMNext, SMux; flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); - mux2 #(`DIVLEN+4) Smux(SNext, {2'b00, Sqrt, {(`DIVLEN+1){1'b0}}}, Start, SMux); + mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux); flop #(`DIVLEN+4) Sreg(clk, SMux, S); always_comb begin if (sp) begin - SNext = S | ((C << 1) & ~(C << 2)); + SNext = S | (C & ~(C << 1)); SMNext = S; end else if (sn) begin - SNext = SM | ((C << 1) & ~(C << 2)); + SNext = SM | (C & ~(C << 1)); SMNext = SM; end else begin // If sp and sn are not true, then sz is SNext = S; - SMNext = SM | ((C << 1) & ~(C << 2)); + SMNext = SM | (C & ~(C << 1)); end end - assign Sq = S[`DIVLEN+1] ? S[`DIVLEN:2] : S[`DIVLEN-1:1]; + assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; endmodule ////////////////////////// @@ -311,7 +311,7 @@ module creg(input logic clk, ); logic [`DIVLEN+3:0] CMux; - mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b11111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux); + mux2 #(`DIVLEN+4) Cmux({1'b1, C[`DIVLEN+3:1]}, {4'b1111, Sqrt, {(`DIVLEN-1){1'b0}}}, Start, CMux); flop #(`DIVLEN+4) cflop(clk, CMux, C); endmodule From 86ebdd05f0753693a1d86e27ea48f7e897e4fe90 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 21 Jul 2022 17:59:10 +0000 Subject: [PATCH 24/36] Division working too --- pipelined/srt/srt.sv | 2 +- pipelined/srt/testbench.sv | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 157be2e7f..a7216b9ff 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -2,7 +2,7 @@ // srt.sv // // Written: David_Harris@hmc.edu 13 January 2022 -// Modified: cturek@hmc.edu June 2022 +// Modified: cturek@hmc.edu July 2022 // // Purpose: Combined Divide and Square Root Floating Point and Integer Unit // diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 39696af44..7a4e1897b 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -72,7 +72,7 @@ module testbench; // Equip Int test or Sqrt test assign Int = 1'b0; - assign Sqrt = 1'b1; + assign Sqrt = 1'b0; // Divider srt srt(.clk, .Start(req), @@ -101,7 +101,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("sqrttestvectors", Tests); + $readmemh ("testvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; From fbe8bb2298413d731f70306e100e8cc3881222ec Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 21 Jul 2022 19:38:06 +0000 Subject: [PATCH 25/36] radix-4 division integrated into srt - not tested --- addins/embench-iot | 2 +- pipelined/config/shared/wally-shared.vh | 2 +- pipelined/regression/wave-fpu.do | 2 +- pipelined/src/fpu/divsqrt.sv | 13 +-- pipelined/src/fpu/fctrl.sv | 4 +- pipelined/src/fpu/fpu.sv | 2 +- pipelined/src/fpu/otfc.sv | 70 ++++++++++++++++ pipelined/src/fpu/qsel.sv | 89 +++++++++++++++++---- pipelined/src/fpu/srt.sv | 102 +++++++++++------------- pipelined/src/fpu/srtpreproc.sv | 78 +++++++++++++++--- pipelined/testbench/testbench-fp.sv | 2 +- 11 files changed, 271 insertions(+), 95 deletions(-) diff --git a/addins/embench-iot b/addins/embench-iot index 58ffa0c68..261a65e0a 160000 --- a/addins/embench-iot +++ b/addins/embench-iot @@ -1 +1 @@ -Subproject commit 58ffa0c68c52f291d12c5902fc787d2bca94ddf9 +Subproject commit 261a65e0a2d3e8d62d81b1d8fe7e309a096bc6a9 diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index 015ef2611..b2abdff7b 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -101,7 +101,7 @@ `define CORRSHIFTSZ ((`DIVRESLEN+`NF) > (3*`NF+8) ? (`DIVRESLEN+`NF) : (3*`NF+6)) // division constants -`define RADIX 32'h2 +`define RADIX 32'h4 `define DIVCOPIES 32'h1 `define DIVLEN ((`NF < `XLEN) ? (`XLEN) : (`NF + 3)) `define EXTRAFRACBITS ((`NF<(`XLEN)) ? (`XLEN - `NF) : 3) diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 98c72f170..b71207e09 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -33,7 +33,7 @@ add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/intera # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/otfc/otfc2/* # add wave -group {Divide} -group inter0 -noupdate /testbenchfp/divsqrt/srt/interations[0]/divinteration/qsel/qsel2/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtpreproc/* -add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/* +# add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srt/expcalc/* add wave -group {Divide} -noupdate /testbenchfp/divsqrt/srtfsm/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/divsqrt.sv b/pipelined/src/fpu/divsqrt.sv index a2f0ba8e3..7ba44a953 100644 --- a/pipelined/src/fpu/divsqrt.sv +++ b/pipelined/src/fpu/divsqrt.sv @@ -41,7 +41,8 @@ module divsqrt( input logic XNaNE, YNaNE, input logic DivStartE, input logic StallM, - input logic StallE, + input logic StallE, + input logic SqrtE, SqrtM, output logic DivSM, output logic DivBusy, output logic DivDone, @@ -55,15 +56,15 @@ module divsqrt( logic [`DIVLEN+3:0] WS, WC; logic [`DIVLEN+3:0] StickyWSA; logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt; - logic [`DIVLEN-1:0] X; - logic [`DIVLEN-1:0] Dpreproc; + logic [`DIVLEN+3:0] X; + logic [`DIVLEN+3:0] Dpreproc; logic [`DURLEN-1:0] Dur; logic NegSticky; - srtpreproc srtpreproc(.Xm(XmE), .Dur, .Ym(YmE), .X,.Dpreproc, .XZeroCnt, .YZeroCnt); + srtpreproc srtpreproc(.clk, .DivStart(DivStartE), .Xm(XmE), .QeM, .Xe(XeE), .Fmt(FmtE), .Ye(YeE), .Sqrt(SqrtE), .Dur, .Ym(YmE), .XZero(XZeroE), .X, .Dpreproc, .XZeroCnt, .YZeroCnt); srtfsm srtfsm(.reset, .NextWSN, .NextWCN, .WS, .WC, .Dur, .DivBusy, .clk, .DivStart(DivStartE),.StallE, .StallM, .DivDone, .XZeroE, .YZeroE, .DivSE(DivSM), .XNaNE, .YNaNE, .StickyWSA, .XInfE, .YInfE, .NegSticky(NegSticky), .EarlyTermShiftE(EarlyTermShiftM)); - srt srt(.clk, .FmtE, .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, - .StickyWSA, .DivBusy, .Qm(QmM), .Rem(), .QeM); + srt srt(.clk, .Sqrt(SqrtM), .X,.Dpreproc, .NegSticky, .XZeroCnt, .YZeroCnt, .FirstWS(WS), .FirstWC(WC), .NextWSN, .NextWCN, .DivStart(DivStartE), .Xe(XeE), .Ye(YeE), .XZeroE, .YZeroE, + .StickyWSA, .DivBusy, .Qm(QmM), .Rem()); endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/fctrl.sv b/pipelined/src/fpu/fctrl.sv index 5b6b22ef0..20e4a0099 100755 --- a/pipelined/src/fpu/fctrl.sv +++ b/pipelined/src/fpu/fctrl.sv @@ -219,8 +219,8 @@ module fctrl ( // 110 - add // 111 - sub // Div: -// 0 - ??? -// 1 - ??? +// 0 - div +// 1 - sqrt // Cvt Int: {Int to Fp?, 64 bit int?, signed int?} // Cvt Fp: output format // 10 - to half diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index cfa46b657..3e214b0f1 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -259,7 +259,7 @@ module fpu ( // - fdiv // - fsqrt // *** add other opperations - divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, + divsqrt divsqrt(.clk, .reset, .FmtE, .XmE, .YmE, .XeE, .YeE, .SqrtE(OpCtrlE[0]), .SqrtM(OpCtrlM[0]), .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .DivStartE(DivStartE), .StallE, .StallM, .DivSM, .DivBusy(FDivBusyE), .QeM, //***change divbusyE to M signal .EarlyTermShiftM, .QmM, .DivDone(DivDoneM)); diff --git a/pipelined/src/fpu/otfc.sv b/pipelined/src/fpu/otfc.sv index 66af5b3c5..7ecb823e6 100644 --- a/pipelined/src/fpu/otfc.sv +++ b/pipelined/src/fpu/otfc.sv @@ -58,6 +58,41 @@ module otfc2 ( endmodule +/////////////////////////////// +// Square Root OTFC, Radix 2 // +/////////////////////////////// +module sotfc2( + input logic clk, + input logic Start, + input logic sp, sn, + input logic Sqrt, + input logic [`DIVLEN+3:0] C, + output logic [`DIVLEN-2:0] Sq, + output logic [`DIVLEN+3:0] S, SM +); + // The on-the-fly converter transfers the square root + // bits to the quotient as they come. + // Use this otfc for division and square root. + logic [`DIVLEN+3:0] SNext, SMNext, SMux; + + flopr #(`DIVLEN+4) SMreg(clk, Start, SMNext, SM); + mux2 #(`DIVLEN+4) Smux(SNext, {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, Start, SMux); + flop #(`DIVLEN+4) Sreg(clk, SMux, S); + + always_comb begin + if (sp) begin + SNext = S | (C & ~(C << 1)); + SMNext = S; + end else if (sn) begin + SNext = SM | (C & ~(C << 1)); + SMNext = SM; + end else begin // If sp and sn are not true, then sz is + SNext = S; + SMNext = SM | (C & ~(C << 1)); + end + end + assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; +endmodule module otfc4 ( input logic [3:0] q, @@ -110,3 +145,38 @@ module otfc4 ( // Final Qmeint is in the range [.5, 2) endmodule + +/////////////////////////////// +// Square Root OTFC, Radix 4 // +/////////////////////////////// +module sotfc4( + input logic [3:0] s, + input logic Sqrt, + input logic [`DIVLEN+3:0] S, SM, + input logic [`DIVLEN+3:0] C, + output logic [`DIVLEN+3:0] SNext, SMNext +); + // The on-the-fly converter transfers the square root + // bits to the quotient as they come. + // Use this otfc for division and square root. + + always_comb begin + if (s[3]) begin + SNext = S | ((C << 1)&~(C << 2)); + SMNext = S | (C&~(C << 1)); + end else if (s[2]) begin + SNext = S | (C&~(C << 1)); + SMNext = S; + end else if (s[1]) begin + SNext = SM | (C&~(C << 2)); + SMNext = SM | ((C << 1)&~(C << 2)); + end else if (s[0]) begin + SNext = SM | ((C << 1)&~(C << 2)); + SMNext = SM | (C&~(C << 1)); + end else begin // If sp and sn are not true, then sz is + SNext = S; + SMNext = SM | (C & ~(C << 2)); + end + end + +endmodule diff --git a/pipelined/src/fpu/qsel.sv b/pipelined/src/fpu/qsel.sv index 202b3ee81..87c6a4b25 100644 --- a/pipelined/src/fpu/qsel.sv +++ b/pipelined/src/fpu/qsel.sv @@ -62,9 +62,36 @@ module qsel2 ( // *** eventually just change to 4 bits // assign #1 qn = magnitude & sign; endmodule +//////////////////////////////////// +// Adder Input Generation, Radix 2 // +//////////////////////////////////// +module fgen2 ( + input logic sp, sn, + input logic [`DIVLEN+3:0] C, S, SM, + output logic [`DIVLEN+3:0] F +); + logic [`DIVLEN+3:0] FP, FN, FZ; + + // Generate for both positive and negative bits + assign FP = ~(S << 1) & C; + assign FN = (SM << 1) | (C & (~C << 2)); + assign FZ = '0; + + // Choose which adder input will be used + + always_comb + if (sp) F = FP; + else if (sn) F = FN; + else F = FZ; + + // assign F = sp ? FP : (sn ? FN : FZ); + +endmodule + module qsel4 ( input logic [`DIVLEN+3:0] D, input logic [`DIVLEN+3:0] WS, WC, + input logic Sqrt, output logic [3:0] q ); logic [6:0] Wmsbs; @@ -91,45 +118,77 @@ module qsel4 ( else if(w2>=4) QSel4[i] = 4'b0100; else if(w2>=-4) QSel4[i] = 4'b0000; else if(w2>=-13) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else QSel4[i] = 4'b0001; 1: if(w2>=14) QSel4[i] = 4'b1000; else if(w2>=4) QSel4[i] = 4'b0100; - else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-15) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-5) QSel4[i] = 4'b0000; // was -6 + else if(~Sqrt&(w2>=-15)) QSel4[i] = 4'b0010; // divide case + else if( Sqrt&(w2>=-14)) QSel4[i] = 4'b0010; // sqrt case + else QSel4[i] = 4'b0001; 2: if(w2>=15) QSel4[i] = 4'b1000; else if(w2>=4) QSel4[i] = 4'b0100; else if(w2>=-6) QSel4[i] = 4'b0000; else if(w2>=-16) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else QSel4[i] = 4'b0001; 3: if(w2>=16) QSel4[i] = 4'b1000; else if(w2>=4) QSel4[i] = 4'b0100; else if(w2>=-6) QSel4[i] = 4'b0000; - else if(w2>=-18) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-17) QSel4[i] = 4'b0010; // was -18 + else QSel4[i] = 4'b0001; 4: if(w2>=18) QSel4[i] = 4'b1000; else if(w2>=6) QSel4[i] = 4'b0100; - else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-6) QSel4[i] = 4'b0000; // was -8 + else if(~Sqrt&(w2>=-20)) QSel4[i] = 4'b0010; // divide case + else if( Sqrt&(w2>=-18)) QSel4[i] = 4'b0010; // sqrt case + else QSel4[i] = 4'b0001; 5: if(w2>=20) QSel4[i] = 4'b1000; else if(w2>=6) QSel4[i] = 4'b0100; else if(w2>=-8) QSel4[i] = 4'b0000; else if(w2>=-20) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else QSel4[i] = 4'b0001; 6: if(w2>=20) QSel4[i] = 4'b1000; else if(w2>=8) QSel4[i] = 4'b0100; else if(w2>=-8) QSel4[i] = 4'b0000; else if(w2>=-22) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; - 7: if(w2>=24) QSel4[i] = 4'b1000; + else QSel4[i] = 4'b0001; + 7: if(w2>=22) QSel4[i] = 4'b1000; // was 24 else if(w2>=8) QSel4[i] = 4'b0100; else if(w2>=-8) QSel4[i] = 4'b0000; - else if(w2>=-24) QSel4[i] = 4'b0010; - else QSel4[i] = 4'b0001; + else if(w2>=-23) QSel4[i] = 4'b0010; // was -24 + else QSel4[i] = 4'b0001; endcase end end assign q = QSel4[{Dmsbs,Wmsbs}]; endmodule + +//////////////////////////////////// +// Adder Input Generation, Radix 4 // +//////////////////////////////////// +module fgen4 ( + input logic [3:0] s, + input logic [`DIVLEN+3:0] C, S, SM, + output logic [`DIVLEN+3:0] F +); + logic [`DIVLEN+3:0] F2, F1, F0, FN1, FN2; + + // Generate for both positive and negative bits + assign F2 = (~S << 2) & (C << 2); + assign F1 = ~(S << 1) & C; + assign F0 = '0; + assign FN1 = (SM << 1) | (C & ~(C << 2)); + assign FN2 = (SM << 2) | ((C << 2)&~(C <<4)); + + // Choose which adder input will be used + + always_comb + if (s[3]) F = F2; + else if (s[2]) F = F1; + else if (s[1]) F = FN1; + else if (s[0]) F = FN2; + else F = F0; + + // assign F = sp ? FP : (sn ? FN : FZ); + +endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srt.sv b/pipelined/src/fpu/srt.sv index 7e9f9922a..633ac1787 100644 --- a/pipelined/src/fpu/srt.sv +++ b/pipelined/src/fpu/srt.sv @@ -34,18 +34,17 @@ module srt( input logic clk, input logic DivStart, input logic DivBusy, - input logic [`FMTBITS-1:0] FmtE, input logic [`NE-1:0] Xe, Ye, input logic XZeroE, YZeroE, - input logic [`DIVLEN-1:0] X, - input logic [`DIVLEN-1:0] Dpreproc, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - input logic NegSticky, + input logic Sqrt, + input logic [`DIVLEN+3:0] X, + input logic [`DIVLEN+3:0] Dpreproc, + input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, + input logic NegSticky, output logic [`QLEN-1-(`RADIX/4):0] Qm, output logic [`DIVLEN+3:0] NextWSN, NextWCN, output logic [`DIVLEN+3:0] StickyWSA, output logic [`DIVLEN+3:0] FirstWS, FirstWC, - output logic [`NE+1:0] QeM, output logic [`XLEN-1:0] Rem ); @@ -59,13 +58,19 @@ module srt( logic [`QLEN-1:0] QM[`DIVCOPIES-1:0]; logic [`QLEN-1:0] QNext[`DIVCOPIES-1:0]; logic [`QLEN-1:0] QMNext[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] S[`DIVCOPIES-1:0]; //***change to QLEN??? + logic [`DIVLEN+3:0] SM[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] SNext[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] SMNext[`DIVCOPIES-1:0]; + logic [`DIVLEN+3:0] C[`DIVCOPIES-1:0]; /* verilator lint_on UNOPTFLAT */ logic [`DIVLEN+3:0] WSN, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2; - logic [`NE+1:0] Qe; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; logic [`QLEN-1:0] QMMux; + logic [`DIVLEN+3:0] CMux; + logic [`DIVLEN+3:0] SMux; // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -83,13 +88,13 @@ module srt( assign NextWCN = {WCA[`DIVCOPIES-1][`DIVLEN+1:0], 2'b0}; end - mux2 #(`DIVLEN+4) wsmux(NextWSN, {3'b000, ~XZeroE, X}, DivStart, WSN); + mux2 #(`DIVLEN+4) wsmux(NextWSN, X, DivStart, WSN); flopen #(`DIVLEN+4) wsflop(clk, DivStart|DivBusy, WSN, WS[0]); mux2 #(`DIVLEN+4) wcmux(NextWCN, {`DIVLEN+4{1'b0}}, DivStart, WCN); flopen #(`DIVLEN+4) wcflop(clk, DivStart|DivBusy, WCN, WC[0]); - flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); - flopen #(`NE+2) expflop(clk, DivStart, Qe, QeM); - + flopen #(`DIVLEN+4) dflop(clk, DivStart, Dpreproc, D); + mux2 #(`DIVLEN+4) Cmux({2'b11, C[`DIVCOPIES-1][`DIVLEN+3:2]}, {5'b11111, Sqrt, {(`DIVLEN-2){1'b0}}}, DivStart, CMux); + flop #(`DIVLEN+4) cflop(clk, CMux, C[0]); // Divisor Selections // - choose the negitive version of what's being selected @@ -102,8 +107,9 @@ module srt( genvar i; generate for(i=0; $unsigned(i)<`DIVCOPIES; i++) begin : interations - divinteration divinteration(.D, .DBar, .D2, .DBar2, - .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i])); + divinteration divinteration(.D, .DBar, .D2, .DBar2, .Sqrt, + .WS(WS[i]), .WC(WC[i]), .WSA(WSA[i]), .WCA(WCA[i]), .Q(Q[i]), .QM(QM[i]), .QNext(QNext[i]), .QMNext(QMNext[i]), + .C(C[i]), .S(S[i]), .SM(SM[i]), .SNext(SNext[i]), .SMNext(SMNext[i])); if(i<(`DIVCOPIES-1)) begin if (`RADIX==2)begin assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 1'b0}; @@ -111,9 +117,12 @@ module srt( end else begin assign WS[i+1] = {WSA[i][`DIVLEN+1:0], 2'b0}; assign WC[i+1] = {WCA[i][`DIVLEN+1:0], 2'b0}; + assign C[i+1] = {2'b11, C[i][`DIVLEN+3:2]}; end assign Q[i+1] = QNext[i]; assign QM[i+1] = QMNext[i]; + assign S[i+1] = SNext[i]; + assign SM[i+1] = SMNext[i]; end end endgenerate @@ -123,16 +132,27 @@ module srt( flopenr #(`QLEN) Qreg(clk, DivStart, DivBusy, QNext[`DIVCOPIES-1], Q[0]); flopen #(`QLEN) QMreg(clk, DivBusy, QMMux, QM[0]); - assign Qm = NegSticky ? QM[0][`QLEN-1-(`RADIX/4):0] : Q[0][`QLEN-1-(`RADIX/4):0]; + flopr #(`DIVLEN+4) SMreg(clk, DivStart, SMNext[`DIVCOPIES-1], SM[0]); + mux2 #(`DIVLEN+4) Smux(SNext[`DIVCOPIES-1], {3'b000, Sqrt, {(`DIVLEN){1'b0}}}, DivStart, SMux); + flop #(`DIVLEN+4) Sreg(clk, SMux, S[0]); + + always_comb + if(Sqrt) + if(NegSticky) Qm = SM[0][`QLEN-1-(`RADIX/4):0]; + else Qm = S[0][`QLEN-1-(`RADIX/4):0]; + else + if(NegSticky) Qm = QM[0][`QLEN-1-(`RADIX/4):0]; + else Qm = Q[0][`QLEN-1-(`RADIX/4):0]; + assign FirstWS = WS[0]; assign FirstWC = WC[0]; + if(`RADIX==2) if (`DIVCOPIES == 1) assign StickyWSA = {WSA[0][`DIVLEN+2:0], 1'b0}; else assign StickyWSA = {WSA[1][`DIVLEN+2:0], 1'b0}; - expcalc expcalc(.FmtE, .Xe, .Ye, .XZeroE, .XZeroCnt, .YZeroCnt, .Qe); endmodule @@ -145,8 +165,12 @@ module divinteration ( input logic [`DIVLEN+3:0] D, input logic [`DIVLEN+3:0] DBar, D2, DBar2, input logic [`QLEN-1:0] Q, QM, + input logic [`DIVLEN+3:0] S, SM, input logic [`DIVLEN+3:0] WS, WC, + input logic [`DIVLEN+3:0] C, + input logic Sqrt, output logic [`QLEN-1:0] QNext, QMNext, + output logic [`DIVLEN+3:0] SNext, SMNext, output logic [`DIVLEN+3:0] WSA, WCA ); /* verilator lint_on UNOPTFLAT */ @@ -154,6 +178,8 @@ module divinteration ( logic [`DIVLEN+3:0] Dsel; logic [3:0] q; logic qp, qz;//, qn; + logic [`DIVLEN+3:0] F; + logic [`DIVLEN+3:0] AddIn; // Qmient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) @@ -166,7 +192,8 @@ module divinteration ( if(`RADIX == 2) begin : qsel qsel2 qsel2(WS[`DIVLEN+3:`DIVLEN], WC[`DIVLEN+3:`DIVLEN], qp, qz);//, qn); end else begin - qsel4 qsel4(.D, .WS, .WC, .q); + qsel4 qsel4(.D, .WS, .WC, .Sqrt, .q); + fgen4 fgen4(.s(q), .C, .S, .SM, .F); end if(`RADIX == 2) begin : dsel @@ -184,16 +211,18 @@ module divinteration ( end // Partial Product Generation // WSA, WCA = WS + WC - qD + assign AddIn = Sqrt ? F : Dsel; if (`RADIX == 2) begin : csa - csa #(`DIVLEN+4) csa(WS, WC, Dsel, qp, WSA, WCA); + csa #(`DIVLEN+4) csa(WS, WC, AddIn, qp, WSA, WCA); end else begin - csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); + csa #(`DIVLEN+4) csa(WS, WC, AddIn, |q[3:2], WSA, WCA); end if (`RADIX == 2) begin : otfc otfc2 otfc2(.qp, .qz, .Q, .QM, .QNext, .QMNext); end else begin otfc4 otfc4(.q, .Q, .QM, .QNext, .QMNext); + sotfc4 sotfc4(.s(q), .Sqrt, .C, .S, .SM, .SNext, .SMNext); end endmodule @@ -220,40 +249,3 @@ module csa #(parameter N=69) ( assign out2 = {in1[N-2:0] & (in2[N-2:0] | in3[N-2:0]) | (in2[N-2:0] & in3[N-2:0]), cin}; endmodule - -module expcalc( - input logic [`FMTBITS-1:0] FmtE, - input logic [`NE-1:0] Xe, Ye, - input logic XZeroE, - input logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, - output logic [`NE+1:0] Qe - ); - logic [`NE-2:0] Bias; - - if (`FPSIZES == 1) begin - assign Bias = (`NE-1)'(`BIAS); - - end else if (`FPSIZES == 2) begin - assign Bias = FmtE ? (`NE-1)'(`BIAS) : (`NE-1)'(`BIAS1); - - end else if (`FPSIZES == 3) begin - always_comb - case (FmtE) - `FMT: Bias = (`NE-1)'(`BIAS); - `FMT1: Bias = (`NE-1)'(`BIAS1); - `FMT2: Bias = (`NE-1)'(`BIAS2); - default: Bias = 'x; - endcase - - end else if (`FPSIZES == 4) begin - always_comb - case (FmtE) - 2'h3: Bias = (`NE-1)'(`Q_BIAS); - 2'h1: Bias = (`NE-1)'(`D_BIAS); - 2'h0: Bias = (`NE-1)'(`S_BIAS); - 2'h2: Bias = (`NE-1)'(`H_BIAS); - endcase - end - // correct exponent for denormalized input's normalization shifts - assign Qe = ({2'b0, Xe} - {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, XZeroCnt} - {2'b0, Ye} + {{`NE+1-$unsigned($clog2(`NF+2)){1'b0}}, YZeroCnt} + {3'b0, Bias})&{`NE+2{~XZeroE}}; - endmodule \ No newline at end of file diff --git a/pipelined/src/fpu/srtpreproc.sv b/pipelined/src/fpu/srtpreproc.sv index b9fb8bb82..4d2609179 100644 --- a/pipelined/src/fpu/srtpreproc.sv +++ b/pipelined/src/fpu/srtpreproc.sv @@ -31,16 +31,25 @@ `include "wally-config.vh" module srtpreproc ( + input logic clk, + input logic DivStart, input logic [`NF:0] Xm, Ym, - output logic [`DIVLEN-1:0] X, - output logic [`DIVLEN-1:0] Dpreproc, + input logic [`NE-1:0] Xe, Ye, + input logic [`FMTBITS-1:0] Fmt, + input logic Sqrt, + input logic XZero, + output logic [`NE+1:0] QeM, + output logic [`DIVLEN+3:0] X, + output logic [`DIVLEN+3:0] Dpreproc, output logic [$clog2(`NF+2)-1:0] XZeroCnt, YZeroCnt, output logic [`DURLEN-1:0] Dur ); // logic [`XLEN-1:0] PosA, PosB; // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; - logic [`DIVLEN-1:0] PreprocA, PreprocX; - logic [`DIVLEN-1:0] PreprocB, PreprocY; + logic [`NF-1:0] PreprocA, PreprocX; + logic [`NF-1:0] PreprocB, PreprocY; + logic [`NF+3:0] SqrtX; + logic [`NE+1:0] Qe; // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; @@ -49,23 +58,22 @@ module srtpreproc ( // ***can probably merge X LZC with conversion // cout the number of leading zeros - lzc #(`NF+1) lzcA (Xm, XZeroCnt); - lzc #(`NF+1) lzcB (Ym, YZeroCnt); + lzc #(`NF+1) lzcX (Xm, XZeroCnt); + lzc #(`NF+1) lzcY (Ym, YZeroCnt); // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; // assign PreprocA = ExtraA << zeroCntA; // assign PreprocB = ExtraB << (zeroCntB + 1); - assign PreprocX = {Xm[`NF-1:0]< Date: Thu, 21 Jul 2022 12:47:51 -0700 Subject: [PATCH 26/36] removed ugly /ref/Ref from tests.vh, added back d_fsd-align-01.S and d_fld-align-01.S tests to tests.vh, updated makefile to fix the riscof issues and fix fld fsd tests, updated testbench.sv for comptability with changes --- pipelined/testbench/testbench.sv | 29 +- pipelined/testbench/tests.vh | 1152 ++++++++++---------- tests/riscof/Makefile | 8 +- tests/riscof/sail_cSim/riscof_sail_cSim.py | 4 +- 4 files changed, 607 insertions(+), 586 deletions(-) diff --git a/pipelined/testbench/testbench.sv b/pipelined/testbench/testbench.sv index 0fb5f5e60..30c62865e 100644 --- a/pipelined/testbench/testbench.sv +++ b/pipelined/testbench/testbench.sv @@ -195,13 +195,19 @@ logic [3:0] dummy; /* if (tests[0] == `IMPERASTEST) pathname = tvpaths[0]; else pathname = tvpaths[1]; */ - memfilename = {pathname, tests[test], ".elf.memfile"}; + if (riscofTest) memfilename = {pathname, tests[test], "/ref/ref.elf.memfile"}; + else memfilename = {pathname, tests[test], ".elf.memfile"}; if (`IMEM == `MEM_TIM) $readmemh(memfilename, dut.core.ifu.irom.irom.ram.memory.RAM); else $readmemh(memfilename, dut.uncore.ram.ram.memory.RAM); if (`DMEM == `MEM_TIM) $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.memory.RAM); - ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"}; - ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"}; + if (riscofTest) begin + ProgramAddrMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.addr"}; + ProgramLabelMapFile = {pathname, tests[test], "/ref/ref.elf.objdump.lab"}; + end else begin + ProgramAddrMapFile = {pathname, tests[test], ".elf.objdump.addr"}; + ProgramLabelMapFile = {pathname, tests[test], ".elf.objdump.lab"}; + end // declare memory labels that interest us, the updateProgramAddrLabelArray task will find the addr of each label and fill the array // to expand, add more elements to this array and initialize them to zero (also initilaize them to zero at the start of the next test) updateProgramAddrLabelArray(ProgramAddrMapFile, ProgramLabelMapFile, ProgramAddrLabelArray); @@ -241,7 +247,8 @@ logic [3:0] dummy; // this contains instret and cycles for start and end of test run, used by embench python speed script to calculate embench speed score // also begin_signature contains the results of the self checking mechanism, which will be read by the python script for error checking $display("Embench Benchmark: %s is done.", tests[test]); - outputfile = {pathname, tests[test], ".sim.output"}; + if (riscofTest) outputfile = {pathname, tests[test], "/ref/ref.sim.output"}; + else outputfile = {pathname, tests[test], ".sim.output"}; outputFilePointer = $fopen(outputfile); i = 0; while ($unsigned(i) < $unsigned(5'd5)) begin @@ -256,7 +263,7 @@ logic [3:0] dummy; for(i=0; i config$(XLEN).ini -build_arch: +fsd_fld_tempfix: + # this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests + # https://github.com/riscv-non-isa/riscv-arch-test/issues/266 + find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fld*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} + find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fsd*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} + +build_arch: fsd_fld_tempfix riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser rm -rf $(arch_workdir)/rv$(XLEN)i_m mv -f $(work_dir)/rv$(XLEN)i_m $(arch_workdir)/ diff --git a/tests/riscof/sail_cSim/riscof_sail_cSim.py b/tests/riscof/sail_cSim/riscof_sail_cSim.py index b86f62b55..dc3033ab3 100644 --- a/tests/riscof/sail_cSim/riscof_sail_cSim.py +++ b/tests/riscof/sail_cSim/riscof_sail_cSim.py @@ -90,7 +90,7 @@ class sail_cSim(pluginTemplate): test_dir = testentry['work_dir'] test_name = test.rsplit('/',1)[1][:-2] - elf = 'Ref.elf' + elf = 'ref.elf' execute = "@cd "+testentry['work_dir']+";" @@ -98,7 +98,7 @@ class sail_cSim(pluginTemplate): compile_cmd = cmd + ' -D' + " -D".join(testentry['macros']) execute+=compile_cmd+";" - execute += self.objdump_cmd.format(elf, self.xlen, 'Ref.elf.objdump') + execute += self.objdump_cmd.format(elf, self.xlen, 'ref.elf.objdump') sig_file = os.path.join(test_dir, self.name[:-1] + ".signature") execute += self.sail_exe[self.xlen] + ' -z268435455 --test-signature={0} {1} > {2}.log 2>&1;'.format(sig_file, elf, test_name) From 635a02cf6a8796e8ec106b3cdc4f4b0afad2967a Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Thu, 21 Jul 2022 12:50:02 -0700 Subject: [PATCH 27/36] made makefile more specific, just incase future additions --- tests/riscof/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index 259eb3f83..a9a442d38 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -20,8 +20,8 @@ root: fsd_fld_tempfix: # this is a temporary fix, there's a typo on the rv64i_m/D/src/d_fsd-align-01.S and rv64i_m/D/src/d_fld-align-01.S tests # https://github.com/riscv-non-isa/riscv-arch-test/issues/266 - find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fld*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} - find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*fsd*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} + find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fld-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} + find ../../addins/riscv-arch-test/riscv-test-suite -type f -name "*d_fsd-align*.S" | xargs -I{} sed -i 's,regex(\.\*32\.\*),regex(\.\*64\.\*),g' {} build_arch: fsd_fld_tempfix riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser From c7e84f8e40f0cdb6a61410651b307fdfa2e805d8 Mon Sep 17 00:00:00 2001 From: cturek Date: Thu, 21 Jul 2022 20:45:08 +0000 Subject: [PATCH 28/36] Renamed variables, moved output handling to postprocessor, added remainder handling --- pipelined/srt/sqrttestgen | Bin 22792 -> 22792 bytes pipelined/srt/srt.sv | 100 +++++++++++++++++++++++++++++-------- pipelined/srt/testbench.sv | 10 ++-- 3 files changed, 85 insertions(+), 25 deletions(-) diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen index 06615165395cb6abb37c9d60152d5fe70f15e11a..d2cb80d445718cfa8efc5daa24fefc6b5d462149 100755 GIT binary patch delta 695 zcmXxiO=uHA6ae6PsoR()?atZ;R#1#kNU;Y?6~!LxR$N>x6e;8qJj5P~^8llk64tG4?8ZfL&qtv#acVY(I2f8`~B(tn-8&JVB^% zLv8Hs>7 zZo0!)K%q3#VTtNPnpg^0EzqcJH-al(U&QYCp+v(e(*50h`-IMcWAT14@lJdYhTV_x zS%6{8wr4@O&)OwKzBjPkaU>cP3sJ|YK8;uPk)hllrHb?=Rafc)P0_QC=V$eJPOpa(G7U#@UhaWGd`dBa z?w9-@{Yv3#VQdkGFkP@FH8_nI?P>7bbvv)a aS)9o1k8M?W_#ksQW=gABWV&B6>6T65Z3JZi delta 715 zcmXw%L1+^}6o%(DjY&w8oduP8uw6xMwP1rvN=3~IUEPx+nj*a^9-sd^$XE8 z%=wj`)&Wn)BW^HXVD^~LF|RS7VgACLXWn2w%G_c;#N6rWN_gN9FZi6Wn_1}SLRsb{ z^A2Xo+~WrfGXG_^n13^mGXG@G=_^_{9bdfQR-KOf=zd4s9d#ea z7j{{~t$uuefO@(LB=rYD;!8@b2iTgYS<~4HTHbzxRyv>jG3NMPk2NOma_G8&7p1r!Mb&bjm`dZJDyX14^J@N+m zFrBzTKJZU#L3daux5$&!L!0~_X5`r6S5#%3vcyzEO0pb{541)N3?q2Z6pa0u{$5Y!oJbYc8NLkVjH(Ba?aU}L1h0y`7 diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index a7216b9ff..7cbcd0daa 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -35,33 +35,35 @@ module srt ( input logic Start, input logic Stall, // *** multiple pipe stages input logic Flush, // *** multiple pipe stages - // Floating Point Inputs - // later add exponents, signs, special cases + // Floating Point input logic XSign, YSign, input logic [`NE-1:0] XExp, YExp, input logic [`NF-1:0] SrcXFrac, SrcYFrac, + // Integer input logic [`XLEN-1:0] SrcA, SrcB, + // Customization input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit input logic W64, // 32-bit ints on XLEN=64 + // Selection input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide output logic rsign, done, - output logic [`DIVLEN-2:0] Rem, Quot, // *** later handle integers + output logic [`DIVLEN-1:0] Rem, Result, output logic [`NE-1:0] rExp, output logic [3:0] Flags ); - logic qp, qz, qn; // quotient is +1, 0, or -1 + logic qp, qz, qn; // result bits are +1, 0, or -1 logic [`NE-1:0] calcExp; logic calcSign; logic [`DIVLEN+3:0] X, Dpreproc, C, F, S, SM, AddIn; logic [`DIVLEN+3:0] WS, WSA, WSN, WC, WCA, WCN, D, Db, Dsel; - logic [$clog2(`XLEN+1)-1:0] intExp, dur, calcDur; + logic [$clog2(`XLEN+1)-1:0] zeroCntD, intExp, dur, calcDur; logic intSign; logic cin; - srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, calcDur, intSign); + srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, zeroCntD, intExp, calcDur, intSign); // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -91,7 +93,7 @@ module srt ( // otfc2 #(`DIVLEN) otfc2(clk, Start, qp, qz, qn, Quot); // otherwise use sotfc creg sotfcC(clk, Start, Sqrt, C); - sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, Quot, S, SM); + sotfc2 sotfc2(clk, Start, qp, qn, Sqrt, C, S, SM); fsel2 fsel(qp, qn, C, S, SM, F); // Adder input selection @@ -103,7 +105,7 @@ module srt ( expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); - signcalc signcalc(.XSign, .YSign, .calcSign); + srtpostproc postproc(.WS, .WC, .X, .D, .S, .SM, .dur, .zeroCntD, .XSign, .YSign, .Signed, .Int, .Result, .Rem, .calcSign); endmodule //////////////// @@ -123,11 +125,11 @@ module srtpreproc ( input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide output logic [`DIVLEN+3:0] X, D, - output logic [$clog2(`XLEN+1)-1:0] intExp, dur, // Quotient integer exponent + output logic [$clog2(`XLEN+1)-1:0] zeroCntB, intExp, dur, // Quotient integer exponent output logic intSign // Quotient integer sign ); - logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; + logic [$clog2(`XLEN+1)-1:0] zeroCntA; logic [`XLEN-1:0] PosA, PosB; logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY, DivX; logic [`NF+4:0] SqrtX; @@ -235,7 +237,7 @@ module otfc2 #(parameter N=66) ( input logic clk, input logic Start, input logic qp, qz, qn, - output logic [N-3:0] r + output logic [N-3:0] Result ); // The on-the-fly converter transfers the quotient // bits to the quotient as they come. @@ -261,7 +263,7 @@ module otfc2 #(parameter N=66) ( QMNext = {QMR, 1'b0}; end end - assign r = Q[N] ? Q[N-1:2] : Q[N-2:1]; + assign Result = Q[N] ? Q[N-1:2] : Q[N-2:1]; endmodule @@ -274,7 +276,6 @@ module sotfc2( input logic sp, sn, input logic Sqrt, input logic [`DIVLEN+3:0] C, - output logic [`DIVLEN-2:0] Sq, output logic [`DIVLEN+3:0] S, SM ); // The on-the-fly converter transfers the square root @@ -298,7 +299,6 @@ module sotfc2( SMNext = SM | (C & ~(C << 1)); end end - assign Sq = S[`DIVLEN] ? S[`DIVLEN-1:1] : S[`DIVLEN-2:0]; endmodule ////////////////////////// @@ -395,14 +395,74 @@ module expcalc( endmodule -////////////// -// signcalc // -////////////// -module signcalc( - input logic XSign, YSign, +module srtpostproc( + input logic [`DIVLEN+3:0] WS, WC, X, D, S, SM, + input logic [$clog2(`XLEN+1)-1:0] dur, zeroCntD, + input logic XSign, YSign, Signed, Int, + output logic [`DIVLEN-1:0] Result, Rem, output logic calcSign ); + logic [`DIVLEN+3:0] W, shiftRem, intRem, intS; + logic [`DIVLEN-1:0] floatRes, intRes; + logic WSign; + assign W = WS + WC; + assign WSign = W[`DIVLEN+3]; + // Remainder handling + always_comb begin + if (zeroCntD == ($clog2(`XLEN+1))'(`XLEN)) begin + intRem = X; + intS = -1; + end + else if (~Signed) begin + if (WSign) begin + intRem = W + D; + intS = SM; + end else begin + intRem = W; + intS = S; + end + end + else case ({YSign, XSign, WSign}) + 3'b000: begin + intRem = W; + intS = S; + end + 3'b001: begin + intRem = W + D; + intS = SM; + end + 3'b010: begin + intRem = W - D; + intS = ~S; + end + 3'b011: begin + intRem = W; + intS = ~SM; + end + 3'b100: begin + intRem = W; + intS = ~SM; + end + 3'b101: begin + intRem = W + D; + intS = ~SM + 1; + end + 3'b110: begin + intRem = W - D; + intS = S + 1; + end + 3'b111: begin + intRem = W; + intS = S; + end + endcase + end + assign floatRes = S[`DIVLEN] ? S[`DIVLEN:1] : S[`DIVLEN-1:0]; + assign intRes = intS[`DIVLEN] ? intS[`DIVLEN:1] : intS[`DIVLEN-1:0]; + assign Result = Int ? intRes : floatRes; assign calcSign = XSign ^ YSign; + assign shiftRem = intRem >>> dur; + assign Rem = shiftRem[`DIVLEN-1:0]; +endmodule -endmodule \ No newline at end of file diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 7a4e1897b..b72ffb42e 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -49,7 +49,7 @@ module testbench; logic asign, bsign; logic [`NF-1:0] r; logic [`XLEN-1:0] rInt; - logic [`DIVLEN-2:0] Quot; + logic [`DIVLEN-1:0] Quot, Rem; // Test parameters parameter MEM_SIZE = 40000; @@ -72,7 +72,7 @@ module testbench; // Equip Int test or Sqrt test assign Int = 1'b0; - assign Sqrt = 1'b0; + assign Sqrt = 1'b1; // Divider srt srt(.clk, .Start(req), @@ -82,7 +82,7 @@ module testbench; .SrcXFrac(afrac), .SrcYFrac(bfrac), .SrcA(a), .SrcB(b), .Fmt(2'b00), .W64(1'b1), .Signed(1'b0), .Int, .Sqrt, - .Quot, .Rem(), .Flags(), .done); + .Result(Quot), .Rem, .Flags(), .done); // Counter // counter counter(clk, req, done); @@ -101,7 +101,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("testvectors", Tests); + $readmemh ("sqrttestvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; @@ -117,7 +117,7 @@ module testbench; always @(posedge clk) begin r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)]; - rInt = {1'b1, Quot}; + rInt = Quot; if (done) begin if (~Int & ~Sqrt) begin req <= #5 1; From 8bfb23320468af2c4e814999f3508ba6d6e5b208 Mon Sep 17 00:00:00 2001 From: cturek Date: Fri, 22 Jul 2022 01:27:08 +0000 Subject: [PATCH 29/36] Changed testbench to operate on two inputs and one output, changed all test generators, changed srt module to return only one output and take in Mod as a signal to compute integer remainder --- pipelined/srt/Makefile | 8 +++- pipelined/srt/exptestgen.c | 4 -- pipelined/srt/inttestgen | Bin 18496 -> 18496 bytes pipelined/srt/inttestgen.c | 23 +++--------- pipelined/srt/modtestgen | Bin 0 -> 18496 bytes pipelined/srt/modtestgen.c | 73 ++++++++++++++++++++++++++++++++++++ pipelined/srt/sqrttestgen | Bin 22792 -> 22792 bytes pipelined/srt/sqrttestgen.c | 4 -- pipelined/srt/srt-waves.do | 1 + pipelined/srt/srt.sv | 22 ++++++----- pipelined/srt/testbench.sv | 40 ++++++++++---------- 11 files changed, 118 insertions(+), 57 deletions(-) create mode 100755 pipelined/srt/modtestgen create mode 100644 pipelined/srt/modtestgen.c diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile index 5d7898b17..ee249139d 100644 --- a/pipelined/srt/Makefile +++ b/pipelined/srt/Makefile @@ -1,4 +1,4 @@ -all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen +all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen modtestgen sqrttestgen: sqrttestgen.c gcc sqrttestgen.c -o sqrttestgen -lm @@ -28,6 +28,10 @@ inttestgen: inttestgen.c gcc -lm -o inttestgen inttestgen.c ./inttestgen +modtestgen: modtestgen.c + gcc -lm -o modtestgen modtestgen.c + ./modtestgen + clean: - rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen + rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen modtestgen diff --git a/pipelined/srt/exptestgen.c b/pipelined/srt/exptestgen.c index d6bebb774..61fe74aa4 100644 --- a/pipelined/srt/exptestgen.c +++ b/pipelined/srt/exptestgen.c @@ -96,10 +96,6 @@ void output(FILE *fptr, int aSign, int aExp, double aFrac, int bSign, int bExp, // Print r in standard double format fprintf(fptr, "%03x", rExp|(rSign<<11)); printhex(fptr, rFrac); - fprintf(fptr, "_"); - - // Spacing for testbench, value doesn't matter - fprintf(fptr, "%016x", 0); fprintf(fptr, "\n"); } diff --git a/pipelined/srt/inttestgen b/pipelined/srt/inttestgen index 2f7e6819945a6b827154fdc1c264eb7d77a29312..9e65cf4a031da28d084759b76e92bd9fd7e4bfc3 100755 GIT binary patch delta 1043 zcmYLIO-vI(6rR~a*(oh;5h#?VK&t#SLP35?lt>A!SwvKj5;gvCk(;I$;(>sTC6ZFq z>Kw$xgI-Kbh)VW?2W^ok#e?BMh{Q_|Hfe%lZGvf}u)b}#?Ig3``}yAY-t6oK{W$2y z{&k{JqSVqn+6ri0inQZW*iwdzzQW&2*13_1h2)Ki=j+=;4-Ioa-jLt#`FX~e$Hs^a zajVO2{)4;*%gdoEpr?_7dfZHlSbg~=pCif5b3XegH|KRkR~-pab-R<=Sfg{`8|QOI zIp27g^EqOicQtB)q{orKp2-B6`si^a!Ag>;7n%ICeL?WAtvESvg!BBY=Dd>$&N~jJ zAmbeX8E1^fSi{kqz)Nd8fUqe_cY-v?fcrUc9qeddniHm-#K0Wcrz6D34Z{3DUV=P| zvbko_F0kWjb1<&fs1QaduD-TBrmhcxpD!LMuD-Z@b!DtcT{SJ%(g3L_8_ugnPVRoA zh!@T@h!eBXb99G%ZF##V2=l;eP6~nxa1>B1EQ2;dz`6rL2m`(b6x)5LzZ56TOHgHj zD6$u5Gyu)8Hz1ZT#$X+WZHKMdqeF$YJ!e5w3&p6XNOO{V4XiUX13Bn&Qx>ZUNaPO%_7frnix8&^-{=oDF zXZ;AFvz2CLCLIA99j&ZXGh(UeiEB4a%DF%~U%bJ0)lJKlyIGPAJvNmqaz5Q@JV}uB zndZ|)%OM)ZCH$49E?kkFF4Z9YkRdf6o<2)Zj)oIq=W6Zp~qUgg^3aRul*Ex!rD^o8QRy;4X=>S zc08!iNTX+TWO9VGsTo_WI+&r)dZ!_EeL(1kTrpB1qx3c!rJ`Xe-p~KBT9nvHYwQ+9 i=J3$xvb}?_QXiH+wOdK*O}Q}M>lQr2Y>J<6?E5h-KC^f+=*){7`Y&7<&$rT4@vdl53E$< zS}3%;CTs zp4%3Bj3|D3=J(ce{bB<6MOXadr_b=$yc@$ywn)`Y^29vV`sV!htBt`SO7B8seS$`RJ}dpC9QC6jWHERtX2 z8c{sk#5WGsy-R!^jrhib#3zS3ln7BawF;>4$`R}xj|daXUO5W4s4y|hC@g>PcPuuy zwc#PkKJosr5oH|8#vlcql1@sxy!Yi^ty6v%;Hfogl56=Kx74cI?p|4>3Dt#!cXZPLn-#F ziv|lvLngvGMfGg45Ys!gTNR_B=(A0;*v zsgRF}94x6&rEWRbt$}T=`v;gd6h?{;Td<);P@s)mH?i9eSQ@llv22)9B$>RXh~_P+ zmN?eF@9z7^_a5KNf&lB5g=drB-S7Kzci;Va_l|e^WMXI_7zhXsLGil+Q*$jb38`-( z<~agrO!SI2_`XHFSF}L7Nn-FNY0N>F$u!ee$rU056!m(83g`{SWQ6I>3KmScgh-o- zr$~%aibmR1lBp1y1W*sly0k_nfgY6+rfDk23{lp@B%*@ z2$i2HRabQ7vd4CA@5-e+bJ=`(x^udBXXnoC(NZD0P1YOjqwKx|qcSIKZ+0z~`Fi*u zhWzV)J$$ZwD)v`jUA)qH-`Bb_fBpw2I#3pylG{y2$OPiME}!JC0jc_M4M18wEbayD zS`Xax!B6?%pZ39T2V8@XI~V|X<@Zt|zHwn8j)9F;K~8`zgPEZT4$L&#gQ z;r8*gY{O}2N>l7toRiumqQ2IXzgIz$`#o|r8;<>-!)6<9pWnJ|INIXSYs0H;3e1=d z2S-$cZo_eGaQOF*TVB&=UuxA)Hb2*<3H{UaR&eEMef9^f=cUNXj$Z=2vgvvF?${WE z7^WwXcWGe-!lr+~G`jZE(=z=Grm@PG=4JXvn8vDJdQ7Ijk7;!Ir8$}Y4yLinmrl#{ zUt=0we`yBNSiYWL4_1D1k6!tuKKuOANOJJP{8C#?=oik-NOIxCDk#^w4WRcQr&0cV zCfc#_5wIY{Y5mmh`+%XJYW@V0UCY)wsOpQYs;X@(3mqG0Q1CoakdI6G9d{zRE{;r=z*v7pIo)B2Lso*9_bB@4t%^RjMbXFO ziq4NKdT|Bl_tvkDi6=I|*JA1F%8Fh|Ea|fkEs64Gs6z_tP&xM<4FxgRC%RGa6Cn5m zCKpE_b|G;EqT*L~pG;iQ&*&%npPT_Jix(2h67D^jSgyef`pLwC{$>3)4=w7I(O>8% z_rqs&Q9oHm&XQiau=svZn@ubSW*=H^d-(gai7ODh()RG5G5>3CtgM{=;d$kdmo&Zd z(6U}BgDRM*><2TWOCSo^&(Ye)!J0}w^#snai<6+PvaHJVx@88H3>Jy);M0ih;OJi5 z;2IS+Uq!6JGof#4mSjnZMi-S%VYK<&JyKWjZ!rgmg2y>~hE9 zb4P3)6x%=kvBApU$L|`fyc!>kSKf#xhfZxuHfds1Kh=r!(9j*wJ6>vC9KHgzJNHJ& z+Sv0f_1B@wYeSV+`ztTUSFZm@efE4n-}O)Bf5q|p`$ys*j~|KO6F2OAPL2;K2U0oi zX{6(PY7Y;H$wJyPOV<77m{lm2n76l3&ZYCWTbi7~HJgN%$>z+}VpDg|&RlMq<&n;> z{BumDm-Ym13VjgQE^Y96?DdtEGeEncT2nxe0>xo_1Nd+r=FM z2qwboKOahjwNp)r@aE4o>*4MbEqb_jcGbRcEFbQThr8q9&3$35FTB1l9O( z)n(W~beZ0_aQt9k{wr5k3Yb0(hBv!AwBG2BXt%blds|QUj-Kt> z=7VNh(^HnhJA1nCKzVFKoRhH=?u*|R_`p<8_qJU^{2ke3x)EhCD7ofhIiJtw$F=dX zF;_-HxMfrMeBoF&?g~pvkJ#Rb&i+ixm9*!m#xBN)-p0hsI@UwfcG*J-ziCdQbaZOf}^jIz8}GNchEmU?cl~|^ZiYDapHe;5^Dn&jFx?hWA^=>KL}1+&^`fd4crt_fQOe>w=j!#eaq9J7pgR_p$y5_SIznckP9}rF}J2 z{B7>5`K%*l`MFJBpf-Ig>qxiGo7rc(+-Kg3eYcKZf8_l@rj7pi_V#&y+x@Z8o^R>B z;~g`U)M$J}j#1eH$YLtKU`+--qDsh0llE#NmAPT5-`+{@Ut! z4X6Bdg3oC!uefQp+em4W2fwc|A`O&Z6EwuAN+M6d?T)wJnHo!AN&a6?)pAL zIKOvueZK*?S3mg{$@4oA%m0vYo?jV%j&QgC|I-IwhYv50dTsN;5BcB)AN&hG_@h4f zlZ3nL`#jUc!C~-R0lc=5e)%XxFGTR zdHX)H^QG39)R1RgAvO(oMBFHnu6h2GQr=}8lEa7x>z{Xj+0=_Mr;2;60WU{6|J6VK}6|rpOMXHt>~CLZJ2Ol$t4q= zC|GU`<-G7{IiJhkYvzu7D)=#@^0HNV%2UaQ7nPT;Tt$_|axy^O%8nUDGglb1*Vl+R$@>*?gx`@zZg@P|`C8mRY+Q>flQPQEk(P|>pc&?)2cmy;|(i>c#K zO{;iZsDriX^5o=kP_d9jEV?>-RUKe*S) zQGPQFID>5@bh4oL9~8#Eq2a#xkTEZvQTPE15g$0x1SF`!ma(Fu;pliKjK$iorxXuNN*(@Kp~YCK<#9xG?z zl6Dp*5=>3t0rY74cpmI2Y84gdezRD@^XF2G5(e<#S&>2oicRI<@hANnwajUV%MKGQ z7G!sbniJHuC(@wHDW#$MxYFP_7*0)Q;mJFWk5%}J$_$h-Xde-U(KZPqNR>-gCYrLW zV)j@W?w;`?;O$equL4W8>g_k)uj0Q;!1q-+2b8sk_d^2jh~R4<2Hw}=d;gk5YhS!> zWTXcoc75LevdG|VEJ)O?-w*h!aHoXxKNE~e$ull}UT0RpluShf7~!5E|j z?5jy4da1&&{E~iReZKE<7vS=hQHfXg-IfuA@J&Uk-){fi101{~!{z7wxvx`2`Th`> zpXHd|3;KBHhSR){H&6N+_$@2>wNq~Aw!yf1i|6dtwd z<8P*J{m%o2KEd+I&=Mo62;ZktDOFaEBdx&4uCI~4Mh0h4$QI<80Z23bV~E@J=SY8! z^hc>3*#zrx`YSGd-nU$)i&1M>kZ6-DJqKybpZ(AKo2_)wYr-YZ{$m*ZYuD#}Q2zTF zNfspP&i~skecms +#include +#include + +/* Constants */ + +#define ENTRIES 10 +#define RANDOM_VECS 500 + +/* Prototypes */ + +void output(FILE *fptr, long a, long b, long rem); +void printhex(FILE *fptr, long x); +double random_input(void); + +/* Main */ + +void main(void) +{ + FILE *fptr; + long a, b, rem; + long list[ENTRIES] = {1, 3, 5, 18, 25, 33, 42, 65, 103, 255}; + int i, j; + + if ((fptr = fopen("modtestvectors","w")) == NULL) { + fprintf(stderr, "Couldn't write testvectors file\n"); + exit(1); + } + + for (i=0; iS@dfGW*6;)pR$3B;vl1Fb|V94`HlJm(ND)e8+rKp=J)G)=H#`G zo$YJs~8&&+{Z~M0Ng7L0i#%Qq~qfYzlY>gQ41$-}chVq=T zq|&7q(!bnZkC(0&@FDL6RSvFy(8XB#MqXbPE8b4py~X3OOK#$W@QmC!IP>3()tGTy zU#%K{W3~JEhcj2NB#fzB#e?=fI_AZy60TQLhH^9R#6{1(0h!4S(+T8No zKbDmfz5D9aw_JU4wd+gC>tk#)hCUZ@xT-eubLNWbl_3=f!<|NhN6np0(?Nd}uSb*^ zEOqz!=VV8%X5*vZ@-+tG&mEmor89_AV61af9bOeOe?fFl#2=&n9k8_b0Vwc#^YuNuIgcWPZiV$?9oks=4%yl0;bs9=R!o<+&CTav zdtZpR!v4NxY`4Ti4VBGQX7DO(xnvfEr{Xx$9Ep25-wiM8X`X=CJ%!ea2~t>#*{4*V;Wt gp(WAd@1DC%W^pcCgdNmOX4+bBa z^Pvh2ZG|+3GoKplgIZ0)A5A3P2OCMaBQ?D?P1D0PrP`QY+BCU}B*;roq+P$+{lGfO z&g}1-Z)SF9Cp&ze51;4pv?uCiR*8P{Yr>mUY$TPSb(4fie!FDvM_+cGY5Z<$-^8uW zFD?FZbn4yJo5q;qkgFhA9AtWGE)9>}3j=ptjIAi%lR?uoR}|H?K-S6E$(`gBIYLg5 zqvRjSN60^r`^n?v_igvNzOn<_pjiD3VOZcZLR=tD7!x>6_*&o;VM5?IVOroA;etR6 zW<7y{*X_{B>#V;Rhqz0ULjYK{~%YB?~-?+Q(Df;^!|#O8 zi;nVhAW7wX9BQQsJ_QG)n(lMrkc^}*h!d4vTD5jd%jl=I97eO^pxO7A5^su(8O#uY zpAc+z>dC>=)TNJHN)(FRQTRrx+c(1*>rooCOzhs`Q}}_j3|4ZzL+sa~IXse9K+9l` z%ZJg}?Uc4AW?$^Fva?;F$QAq`?3C*cPCvQeZ-1NSkhbO=`e3c`!%yQ^E{64y``Li? z4z)?K3ZqqelR;-?Kw9EO9tmH0WY%7Mo$4i1qbsL{C zuBdJqmdkan8_!gXmU?vD4pBN`zOmhtmThT9`QEF1gH7;PQ=PQl*@Q!2ux?8+ZVh8X zqG(pmhD1*P(>ILn<`elIZ6jyCam)V&)hz)}C-sM6zI8KXc!_bW<$2C^WNDtlBG~$P z5_quG>1BS^a0GYe@igpi_i+slv@hU&RmfMizPR-~SaCey%ptQTcqN3HMsLW?c{jYL z#c;9*S_2P4S;xzK33@wPak0scL|K%cWPOV5`HXp)c!cBoeYngQ`$x-eNI|RnIwi$0 yXA_gzw?>;jF7dbF!D|g0`>I1#xY-k-X(MnL_21w3Y`D45uiDe$d)*Ch&-)idgXL=g diff --git a/pipelined/srt/sqrttestgen.c b/pipelined/srt/sqrttestgen.c index 76c6a6649..7a45449fd 100644 --- a/pipelined/srt/sqrttestgen.c +++ b/pipelined/srt/sqrttestgen.c @@ -86,10 +86,6 @@ void output(FILE *fptr, int aExp, double aFrac, int rExp, double rFrac) // Print r in standard double format fprintf(fptr, "%03x", rExp); printhex(fptr, rFrac); - fprintf(fptr, "_"); - - // Spacing for testbench, value doesn't matter - fprintf(fptr, "%016x", 0); fprintf(fptr, "\n"); } diff --git a/pipelined/srt/srt-waves.do b/pipelined/srt/srt-waves.do index 1e0c3f281..2fbf40c18 100644 --- a/pipelined/srt/srt-waves.do +++ b/pipelined/srt/srt-waves.do @@ -2,4 +2,5 @@ add wave -noupdate /testbench/* add wave -noupdate /testbench/srt/* add wave -noupdate /testbench/srt/sotfc2/* add wave -noupdate /testbench/srt/preproc/* +add wave -noupdate /testbench/srt/postproc/* add wave -noupdate /testbench/srt/divcounter/* diff --git a/pipelined/srt/srt.sv b/pipelined/srt/srt.sv index 7cbcd0daa..1b61bc14f 100644 --- a/pipelined/srt/srt.sv +++ b/pipelined/srt/srt.sv @@ -47,9 +47,10 @@ module srt ( // Selection input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs + input logic Mod, // perform remainder calculation (modulo) instead of divide input logic Sqrt, // perform square root, not divide output logic rsign, done, - output logic [`DIVLEN-1:0] Rem, Result, + output logic [`DIVLEN-1:0] Result, output logic [`NE-1:0] rExp, output logic [3:0] Flags ); @@ -63,7 +64,7 @@ module srt ( logic intSign; logic cin; - srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, zeroCntD, intExp, calcDur, intSign); + srtpreproc preproc(SrcA, SrcB, SrcXFrac, SrcYFrac, XExp, Fmt, W64, Signed, Int, Mod, Sqrt, X, Dpreproc, zeroCntD, intExp, calcDur, intSign); // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -105,7 +106,7 @@ module srt ( expcalc expcalc(.XExp, .YExp, .calcExp, .Sqrt); - srtpostproc postproc(.WS, .WC, .X, .D, .S, .SM, .dur, .zeroCntD, .XSign, .YSign, .Signed, .Int, .Result, .Rem, .calcSign); + srtpostproc postproc(.WS, .WC, .X, .D, .S, .SM, .dur, .zeroCntD, .XSign, .YSign, .Signed, .Int, .Mod, .Result, .calcSign); endmodule //////////////// @@ -123,6 +124,7 @@ module srtpreproc ( input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs + input logic Mod, // perform remainder calculation (modulo) instead of divide input logic Sqrt, // perform square root, not divide output logic [`DIVLEN+3:0] X, D, output logic [$clog2(`XLEN+1)-1:0] zeroCntB, intExp, dur, // Quotient integer exponent @@ -161,7 +163,7 @@ module srtpreproc ( assign D = {4'b0001, Int ? PreprocB : PreprocY}; // Integer exponent and sign calculations - assign intExp = zeroCntB - zeroCntA + 1; + assign intExp = zeroCntB - zeroCntA - Mod + (PreprocA >= PreprocB); assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); // Number of cycles of divider @@ -398,8 +400,8 @@ endmodule module srtpostproc( input logic [`DIVLEN+3:0] WS, WC, X, D, S, SM, input logic [$clog2(`XLEN+1)-1:0] dur, zeroCntD, - input logic XSign, YSign, Signed, Int, - output logic [`DIVLEN-1:0] Result, Rem, + input logic XSign, YSign, Signed, Int, Mod, + output logic [`DIVLEN-1:0] Result, output logic calcSign ); logic [`DIVLEN+3:0] W, shiftRem, intRem, intS; @@ -460,9 +462,11 @@ module srtpostproc( end assign floatRes = S[`DIVLEN] ? S[`DIVLEN:1] : S[`DIVLEN-1:0]; assign intRes = intS[`DIVLEN] ? intS[`DIVLEN:1] : intS[`DIVLEN-1:0]; - assign Result = Int ? intRes : floatRes; + assign shiftRem = (intRem >>> (`DIVLEN - dur + 2)); + always_comb + if (Int) Result = intRes >> (`DIVLEN - dur); + else if (Mod) Result = shiftRem[`DIVLEN-1:0]; + else Result = floatRes; assign calcSign = XSign ^ YSign; - assign shiftRem = intRem >>> dur; - assign Rem = shiftRem[`DIVLEN-1:0]; endmodule diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index b72ffb42e..513305b26 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -42,24 +42,23 @@ module testbench; logic clk; logic req; logic done; - logic Int; + logic Int, Sqrt, Mod; logic [`XLEN-1:0] a, b; logic [`NF-1:0] afrac, bfrac; logic [`NE-1:0] aExp, bExp; logic asign, bsign; logic [`NF-1:0] r; logic [`XLEN-1:0] rInt; - logic [`DIVLEN-1:0] Quot, Rem; + logic [`DIVLEN-1:0] Quot; // Test parameters parameter MEM_SIZE = 40000; parameter MEM_WIDTH = 64+64+64+64; // Test sizes - `define memrem 63:0 - `define memr 127:64 - `define memb 191:128 - `define mema 255:192 + `define memr 63:0 + `define memb 127:64 + `define mema 191:128 // Test logicisters logic [MEM_WIDTH-1:0] Tests [0:MEM_SIZE]; // Space for input file @@ -70,9 +69,10 @@ module testbench; logic rsign; integer testnum, errors; - // Equip Int test or Sqrt test - assign Int = 1'b0; - assign Sqrt = 1'b1; + // Equip Int, Sqrt, or IntMod test + assign Int = 1'b1; + assign Mod = 1'b0; + assign Sqrt = 1'b0; // Divider srt srt(.clk, .Start(req), @@ -81,8 +81,8 @@ module testbench; .XSign(asign), .YSign(bsign), .rsign, .SrcXFrac(afrac), .SrcYFrac(bfrac), .SrcA(a), .SrcB(b), .Fmt(2'b00), - .W64(1'b1), .Signed(1'b0), .Int, .Sqrt, - .Result(Quot), .Rem, .Flags(), .done); + .W64(1'b1), .Signed(1'b0), .Int, .Mod, .Sqrt, + .Result(Quot), .Flags(), .done); // Counter // counter counter(clk, req, done); @@ -101,7 +101,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("sqrttestvectors", Tests); + $readmemh ("inttestvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a; @@ -109,7 +109,7 @@ module testbench; {bsign, bExp, bfrac} = b; nextr = Vec[`memr]; r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)]; - rInt = {1'b1, Quot}; + rInt = Quot; req <= #5 1; end @@ -119,7 +119,7 @@ module testbench; r = Quot[(`DIVLEN - 2):(`DIVLEN - `NF - 1)]; rInt = Quot; if (done) begin - if (~Int & ~Sqrt) begin + if (~Int & ~Sqrt) begin // This test case checks floating point division req <= #5 1; diffp = correctr[51:0] - r; diffn = r - correctr[51:0]; @@ -135,23 +135,21 @@ module testbench; $display("%d Tests completed successfully", testnum); $stop; end - end else if (~Sqrt) begin + end else if (~Sqrt) begin // This test case works for both integer divide and integer modulo req <= #5 1; diffp = correctr[63:0] - rInt; - diffn = rInt - correctr[63:0]; - if (($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp + if (($signed(diffp) != 0) | (diffp === 64'bx)) // check if accurate to 1 ulp begin errors = errors+1; - $display("result was %h, should be %h %h %h\n", rInt, correctr, diffn, diffp); + $display("result was %h, should be %h %h\n", rInt, correctr, diffp); $display("failed\n"); - $stop; end if (afrac === 52'hxxxxxxxxxxxxx) begin - $display("%d Tests completed successfully", testnum); + $display("%d Tests completed successfully", testnum - errors); $stop; end - end else begin + end else begin // This test case verifies square root req <= #5 1; diffp = correctr[51:0] - r; diffn = r - correctr[51:0]; From 3d2c6683d8d4331134ea8f54a4b318812e38e97f Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 21 Jul 2022 20:35:46 -0700 Subject: [PATCH 30/36] Fixed UART bug related to parity and MSR/LSR --- pipelined/src/uncore/uartPC16550D.sv | 50 ++++++++++++++++------------ 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/pipelined/src/uncore/uartPC16550D.sv b/pipelined/src/uncore/uartPC16550D.sv index 524a63454..89bdc837d 100644 --- a/pipelined/src/uncore/uartPC16550D.sv +++ b/pipelined/src/uncore/uartPC16550D.sv @@ -165,6 +165,7 @@ module uartPC16550D( SCR <= #1 8'b0; // not strictly necessary to reset end else begin if (~MEMWb) begin + /* verilator lint_off CASEINCOMPLETE */ case (A) /* -----\/----- EXCLUDED -----\/----- 3'b000: if (DLAB) DLL <= #1 Din; // else TXHR <= #1 Din; // TX handled in TX register/FIFO section @@ -177,34 +178,40 @@ module uartPC16550D( // freq /baud / 16 = div //3'b000: if (DLAB) DLL <= #1 8'd38; //else TXHR <= #1 Din; // TX handled in TX register/FIFO section //3'b000: if (DLAB) DLL <= #1 8'd11; //else TXHR <= #1 Din; // TX handled in - 3'b000: if (DLAB) DLL <= #1 8'd8; //else TXHR <= #1 Din; // TX handled in + 3'b000: if (DLAB) DLL <= #1 8'd8; //else TXHR <= #1 Din; // TX handled in 3'b001: if (DLAB) DLM <= #1 8'b0; else IER <= #1 Din[3:0]; - 3'b010: FCR <= #1 {Din[7:6], 2'b0, Din[3], 2'b0, Din[0]}; // Write only FIFO Control Register; 4:5 reserved and 2:1 self-clearing 3'b011: LCR <= #1 Din; 3'b100: MCR <= #1 Din[4:0]; - 3'b101: LSR[6:1] <= #1 Din[6:1]; // recommended only for test, see 8.6.3 - 3'b110: MSR <= #1 Din[3:0]; 3'b111: SCR <= #1 Din; endcase + /* verilator lint_on CASEINCOMPLETE */ end - + // Line Status Register (8.6.3) - // Ben 6/9/21 I don't like how this is a register. A lot of the individual bits have clocked components, so this just adds unecessary delay. - LSR[0] <= #1 rxdataready; // Data ready - LSR[1] <= #1 (LSR[1] | RXBR[10]) & ~squashRXerrIP;; // overrun error - LSR[2] <= #1 (LSR[2] | RXBR[9]) & ~squashRXerrIP; // parity error - LSR[3] <= #1 (LSR[3] | RXBR[8]) & ~squashRXerrIP; // framing error - LSR[4] <= #1 (LSR[4] | rxbreak) & ~squashRXerrIP; // break indicator - LSR[5] <= #1 THRE; // THRE - LSR[6] <= #1 ~txsrfull & THRE; // TEMT - if (rxfifohaserr) LSR[7] <= #1 1; // any bits in FIFO have error + // Ben 6/9/21 I don't like how this is a register. A lot of the individual bits have clocked components, so this just adds unecessary delay. + if (~MEMWb & (A == 3'b101)) + LSR[6:1] <= #1 Din[6:1]; // recommended only for test, see 8.6.3 + else begin + LSR[0] <= #1 rxdataready; // Data ready + LSR[1] <= #1 (LSR[1] | RXBR[10]) & ~squashRXerrIP;; // overrun error + LSR[2] <= #1 (LSR[2] | RXBR[9]) & ~squashRXerrIP; // parity error + LSR[3] <= #1 (LSR[3] | RXBR[8]) & ~squashRXerrIP; // framing error + LSR[4] <= #1 (LSR[4] | rxbreak) & ~squashRXerrIP; // break indicator + LSR[5] <= #1 THRE; // THRE + LSR[6] <= #1 ~txsrfull & THRE; // TEMT + if (rxfifohaserr) LSR[7] <= #1 1; // any bits in FIFO have error + end // Modem Status Register (8.6.8) - MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send - MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready - MSR[2] <= #1 MSR[2] | (~RIb2 & RIbsync); // Trailing Edge of Ring Indicator - MSR[3] <= #1 MSR[3] | DCDb2 ^ DCDbsync; // Delta Data Carrier Detect + if (~MEMWb & (A == 3'b110)) + MSR <= #1 Din[3:0]; + else begin + MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send + MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready + MSR[2] <= #1 MSR[2] | (~RIb2 & RIbsync); // Trailing Edge of Ring Indicator + MSR[3] <= #1 MSR[3] | DCDb2 ^ DCDbsync; // Delta Data Carrier Detect + end end always_comb if (~MEMRb) @@ -215,7 +222,8 @@ module uartPC16550D( 3'b011: Dout = LCR; 3'b100: Dout = {3'b000, MCR}; 3'b101: Dout = LSR; - 3'b110: Dout = {~CTSbsync, ~DSRbsync, ~RIbsync, ~DCDbsync, MSR[3:0]}; + // 3'b110: Dout = {~CTSbsync, ~DSRbsync, ~RIbsync, ~DCDbsync, MSR[3:0]}; + 3'b110: Dout = {~DCDbsync, ~RIbsync, ~DSRbsync, ~CTSbsync, MSR[3:0]}; 3'b111: Dout = SCR; endcase else Dout = 8'b0; @@ -304,7 +312,7 @@ module uartPC16550D( // ERROR CONDITIONS assign rxparity = ^rxdata; - assign rxparityerr = rxparity ^ rxparitybit ^ ~evenparitysel; // Check even/odd parity (*** check if LCR needs to be inverted) + assign rxparityerr = (rxparity ^ rxparitybit ^ ~evenparitysel) & LCR[3]; // Check even/odd parity (*** check if LCR needs to be inverted) assign rxoverrunerr = fifoenabled ? (rxfifoentries == 15) : rxdataready; // overrun if FIFO or receive buffer register full assign rxframingerr = ~rxstopbit; // framing error if no stop bit assign rxbreak = rxframingerr & (rxdata9 == 9'b0); // break when 0 for start + data + parity + stop time @@ -405,7 +413,7 @@ module uartPC16550D( txstate <= #1 UART_IDLE; end - assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) + assign txbitsexpected = 4'd1 + (4'd5 + {2'b00, LCR[1:0]}) + {3'b000, LCR[3]} + 4'd1 + {3'b000, LCR[2]} - 4'd1; // start bit + data bits + (parity bit) + stop bit(s) - 1 // *** explain; is this necessary? if (`QEMU) assign txnextbit = txbaudpulse & (txoversampledcnt[1:0] == 2'b00); // implies txstate = UART_ACTIVE else assign txnextbit = txbaudpulse & (txoversampledcnt == 4'b0000); // implies txstate = UART_ACTIVE From 8dcb794bbb713e144ef420c3cf008408cfc6d47e Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Thu, 21 Jul 2022 20:58:58 -0700 Subject: [PATCH 31/36] added support for new version of riscof and arch tests, now supports tests that can be compiled for both rv32 and rv64 --- pipelined/testbench/tests.vh | 346 ++++++++++----------- tests/riscof/Makefile | 5 +- tests/riscof/sail_cSim/riscof_sail_cSim.py | 2 +- tests/riscof/spike/riscof_spike.py | 2 +- 4 files changed, 178 insertions(+), 177 deletions(-) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index df06eb010..b10bb951b 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1057,176 +1057,176 @@ string imperas32f[] = '{ string arch64d[] = '{ `RISCVARCHTEST, - "rv64i_m/D/src/d_fadd_b10-01.S", - "rv64i_m/D/src/d_fadd_b1-01.S", - "rv64i_m/D/src/d_fadd_b11-01.S", - "rv64i_m/D/src/d_fadd_b12-01.S", - "rv64i_m/D/src/d_fadd_b13-01.S", - "rv64i_m/D/src/d_fadd_b2-01.S", - "rv64i_m/D/src/d_fadd_b3-01.S", - "rv64i_m/D/src/d_fadd_b4-01.S", - "rv64i_m/D/src/d_fadd_b5-01.S", - "rv64i_m/D/src/d_fadd_b7-01.S", - "rv64i_m/D/src/d_fadd_b8-01.S", - "rv64i_m/D/src/d_fclass_b1-01.S", - "rv64i_m/D/src/d_fcvt.d.l_b25-01.S", - "rv64i_m/D/src/d_fcvt.d.l_b26-01.S", - "rv64i_m/D/src/d_fcvt.d.lu_b25-01.S", - "rv64i_m/D/src/d_fcvt.d.lu_b26-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b1-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b22-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b23-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b24-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b27-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b28-01.S", - "rv64i_m/D/src/d_fcvt.d.s_b29-01.S", - "rv64i_m/D/src/d_fcvt.d.w_b25-01.S", - "rv64i_m/D/src/d_fcvt.d.w_b26-01.S", - "rv64i_m/D/src/d_fcvt.d.wu_b25-01.S", - "rv64i_m/D/src/d_fcvt.d.wu_b26-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b1-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b22-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b23-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b24-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b27-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b28-01.S", - "rv64i_m/D/src/d_fcvt.l.d_b29-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b1-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b22-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b23-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b24-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b27-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b28-01.S", - "rv64i_m/D/src/d_fcvt.lu.d_b29-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b1-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b22-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b23-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b24-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b27-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b28-01.S", - "rv64i_m/D/src/d_fcvt.s.d_b29-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b1-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b22-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b23-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b24-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b27-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b28-01.S", - "rv64i_m/D/src/d_fcvt.w.d_b29-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b1-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b22-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b23-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b24-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b27-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b28-01.S", - "rv64i_m/D/src/d_fcvt.wu.d_b29-01.S", - "rv64i_m/D/src/d_fdiv_b1-01.S", - "rv64i_m/D/src/d_fdiv_b20-01.S", - "rv64i_m/D/src/d_fdiv_b2-01.S", - "rv64i_m/D/src/d_fdiv_b21-01.S", - "rv64i_m/D/src/d_fdiv_b3-01.S", - "rv64i_m/D/src/d_fdiv_b4-01.S", - "rv64i_m/D/src/d_fdiv_b5-01.S", - "rv64i_m/D/src/d_fdiv_b6-01.S", - "rv64i_m/D/src/d_fdiv_b7-01.S", - "rv64i_m/D/src/d_fdiv_b8-01.S", - "rv64i_m/D/src/d_fdiv_b9-01.S", - "rv64i_m/D/src/d_feq_b1-01.S", - "rv64i_m/D/src/d_feq_b19-01.S", - "rv64i_m/D/src/d_fle_b1-01.S", - "rv64i_m/D/src/d_fle_b19-01.S", - "rv64i_m/D/src/d_flt_b1-01.S", - "rv64i_m/D/src/d_flt_b19-01.S", - "rv64i_m/D/src/d_fld-align-01.S", - "rv64i_m/D/src/d_fsd-align-01.S", - "rv64i_m/D/src/d_fmadd_b14-01.S", - "rv64i_m/D/src/d_fmadd_b16-01.S", - "rv64i_m/D/src/d_fmadd_b17-01.S", - "rv64i_m/D/src/d_fmadd_b18-01.S", - "rv64i_m/D/src/d_fmadd_b2-01.S", - "rv64i_m/D/src/d_fmadd_b3-01.S", - "rv64i_m/D/src/d_fmadd_b4-01.S", - "rv64i_m/D/src/d_fmadd_b5-01.S", - "rv64i_m/D/src/d_fmadd_b6-01.S", - "rv64i_m/D/src/d_fmadd_b7-01.S", - "rv64i_m/D/src/d_fmadd_b8-01.S", - "rv64i_m/D/src/d_fmax_b1-01.S", - "rv64i_m/D/src/d_fmax_b19-01.S", - "rv64i_m/D/src/d_fmin_b1-01.S", - "rv64i_m/D/src/d_fmin_b19-01.S", - "rv64i_m/D/src/d_fmsub_b14-01.S", - "rv64i_m/D/src/d_fmsub_b16-01.S", - "rv64i_m/D/src/d_fmsub_b17-01.S", - "rv64i_m/D/src/d_fmsub_b18-01.S", - "rv64i_m/D/src/d_fmsub_b2-01.S", - "rv64i_m/D/src/d_fmsub_b3-01.S", - "rv64i_m/D/src/d_fmsub_b4-01.S", - "rv64i_m/D/src/d_fmsub_b5-01.S", - "rv64i_m/D/src/d_fmsub_b6-01.S", - "rv64i_m/D/src/d_fmsub_b7-01.S", - "rv64i_m/D/src/d_fmsub_b8-01.S", - "rv64i_m/D/src/d_fmul_b1-01.S", - "rv64i_m/D/src/d_fmul_b2-01.S", - "rv64i_m/D/src/d_fmul_b3-01.S", - "rv64i_m/D/src/d_fmul_b4-01.S", - "rv64i_m/D/src/d_fmul_b5-01.S", - "rv64i_m/D/src/d_fmul_b6-01.S", - "rv64i_m/D/src/d_fmul_b7-01.S", - "rv64i_m/D/src/d_fmul_b8-01.S", - "rv64i_m/D/src/d_fmul_b9-01.S", - "rv64i_m/D/src/d_fmv.d.x_b25-01.S", - "rv64i_m/D/src/d_fmv.d.x_b26-01.S", - "rv64i_m/D/src/d_fmv.x.d_b1-01.S", - "rv64i_m/D/src/d_fmv.x.d_b22-01.S", - "rv64i_m/D/src/d_fmv.x.d_b23-01.S", - "rv64i_m/D/src/d_fmv.x.d_b24-01.S", - "rv64i_m/D/src/d_fmv.x.d_b27-01.S", - "rv64i_m/D/src/d_fmv.x.d_b28-01.S", - "rv64i_m/D/src/d_fmv.x.d_b29-01.S", - "rv64i_m/D/src/d_fnmadd_b14-01.S", - "rv64i_m/D/src/d_fnmadd_b16-01.S", - "rv64i_m/D/src/d_fnmadd_b17-01.S", - "rv64i_m/D/src/d_fnmadd_b18-01.S", - "rv64i_m/D/src/d_fnmadd_b2-01.S", - "rv64i_m/D/src/d_fnmadd_b3-01.S", - "rv64i_m/D/src/d_fnmadd_b4-01.S", - "rv64i_m/D/src/d_fnmadd_b5-01.S", - "rv64i_m/D/src/d_fnmadd_b6-01.S", - "rv64i_m/D/src/d_fnmadd_b7-01.S", - "rv64i_m/D/src/d_fnmadd_b8-01.S", - "rv64i_m/D/src/d_fnmsub_b14-01.S", - "rv64i_m/D/src/d_fnmsub_b16-01.S", - "rv64i_m/D/src/d_fnmsub_b17-01.S", - "rv64i_m/D/src/d_fnmsub_b18-01.S", - "rv64i_m/D/src/d_fnmsub_b2-01.S", - "rv64i_m/D/src/d_fnmsub_b3-01.S", - "rv64i_m/D/src/d_fnmsub_b4-01.S", - "rv64i_m/D/src/d_fnmsub_b5-01.S", - "rv64i_m/D/src/d_fnmsub_b6-01.S", - "rv64i_m/D/src/d_fnmsub_b7-01.S", - "rv64i_m/D/src/d_fnmsub_b8-01.S", - "rv64i_m/D/src/d_fsgnj_b1-01.S", - "rv64i_m/D/src/d_fsgnjn_b1-01.S", - "rv64i_m/D/src/d_fsgnjx_b1-01.S", - // "rv64i_m/D/src/d_fsqrt_b1-01.S", - // "rv64i_m/D/src/d_fsqrt_b20-01.S", - // "rv64i_m/D/src/d_fsqrt_b2-01.S", - // "rv64i_m/D/src/d_fsqrt_b3-01.S", - // "rv64i_m/D/src/d_fsqrt_b4-01.S", - // "rv64i_m/D/src/d_fsqrt_b5-01.S", - // "rv64i_m/D/src/d_fsqrt_b7-01.S", - // "rv64i_m/D/src/d_fsqrt_b8-01.S", - // "rv64i_m/D/src/d_fsqrt_b9-01.S", - "rv64i_m/D/src/d_fsub_b10-01.S", - "rv64i_m/D/src/d_fsub_b1-01.S", - "rv64i_m/D/src/d_fsub_b11-01.S", - "rv64i_m/D/src/d_fsub_b12-01.S", - "rv64i_m/D/src/d_fsub_b13-01.S", - "rv64i_m/D/src/d_fsub_b2-01.S", - "rv64i_m/D/src/d_fsub_b3-01.S", - "rv64i_m/D/src/d_fsub_b4-01.S", - "rv64i_m/D/src/d_fsub_b5-01.S", - "rv64i_m/D/src/d_fsub_b7-01.S", - "rv64i_m/D/src/d_fsub_b8-01.S" + "rv64i_m/D/src/fadd.d_b10-01.S", + "rv64i_m/D/src/fadd.d_b1-01.S", + "rv64i_m/D/src/fadd.d_b11-01.S", + "rv64i_m/D/src/fadd.d_b12-01.S", + "rv64i_m/D/src/fadd.d_b13-01.S", + "rv64i_m/D/src/fadd.d_b2-01.S", + "rv64i_m/D/src/fadd.d_b3-01.S", + "rv64i_m/D/src/fadd.d_b4-01.S", + "rv64i_m/D/src/fadd.d_b5-01.S", + "rv64i_m/D/src/fadd.d_b7-01.S", + "rv64i_m/D/src/fadd.d_b8-01.S", + "rv64i_m/D/src/fclass.d_b1-01.S", + "rv64i_m/D/src/fcvt.d.l_b25-01.S", + "rv64i_m/D/src/fcvt.d.l_b26-01.S", + "rv64i_m/D/src/fcvt.d.lu_b25-01.S", + "rv64i_m/D/src/fcvt.d.lu_b26-01.S", + "rv64i_m/D/src/fcvt.d.s_b1-01.S", + "rv64i_m/D/src/fcvt.d.s_b22-01.S", + "rv64i_m/D/src/fcvt.d.s_b23-01.S", + "rv64i_m/D/src/fcvt.d.s_b24-01.S", + "rv64i_m/D/src/fcvt.d.s_b27-01.S", + "rv64i_m/D/src/fcvt.d.s_b28-01.S", + "rv64i_m/D/src/fcvt.d.s_b29-01.S", + "rv64i_m/D/src/fcvt.d.w_b25-01.S", + "rv64i_m/D/src/fcvt.d.w_b26-01.S", + "rv64i_m/D/src/fcvt.d.wu_b25-01.S", + "rv64i_m/D/src/fcvt.d.wu_b26-01.S", + "rv64i_m/D/src/fcvt.l.d_b1-01.S", + "rv64i_m/D/src/fcvt.l.d_b22-01.S", + "rv64i_m/D/src/fcvt.l.d_b23-01.S", + "rv64i_m/D/src/fcvt.l.d_b24-01.S", + "rv64i_m/D/src/fcvt.l.d_b27-01.S", + "rv64i_m/D/src/fcvt.l.d_b28-01.S", + "rv64i_m/D/src/fcvt.l.d_b29-01.S", + "rv64i_m/D/src/fcvt.lu.d_b1-01.S", + "rv64i_m/D/src/fcvt.lu.d_b22-01.S", + "rv64i_m/D/src/fcvt.lu.d_b23-01.S", + "rv64i_m/D/src/fcvt.lu.d_b24-01.S", + "rv64i_m/D/src/fcvt.lu.d_b27-01.S", + "rv64i_m/D/src/fcvt.lu.d_b28-01.S", + "rv64i_m/D/src/fcvt.lu.d_b29-01.S", + "rv64i_m/D/src/fcvt.s.d_b1-01.S", + "rv64i_m/D/src/fcvt.s.d_b22-01.S", + "rv64i_m/D/src/fcvt.s.d_b23-01.S", + "rv64i_m/D/src/fcvt.s.d_b24-01.S", + "rv64i_m/D/src/fcvt.s.d_b27-01.S", + "rv64i_m/D/src/fcvt.s.d_b28-01.S", + "rv64i_m/D/src/fcvt.s.d_b29-01.S", + "rv64i_m/D/src/fcvt.w.d_b1-01.S", + "rv64i_m/D/src/fcvt.w.d_b22-01.S", + "rv64i_m/D/src/fcvt.w.d_b23-01.S", + "rv64i_m/D/src/fcvt.w.d_b24-01.S", + "rv64i_m/D/src/fcvt.w.d_b27-01.S", + "rv64i_m/D/src/fcvt.w.d_b28-01.S", + "rv64i_m/D/src/fcvt.w.d_b29-01.S", + "rv64i_m/D/src/fcvt.wu.d_b1-01.S", + "rv64i_m/D/src/fcvt.wu.d_b22-01.S", + "rv64i_m/D/src/fcvt.wu.d_b23-01.S", + "rv64i_m/D/src/fcvt.wu.d_b24-01.S", + "rv64i_m/D/src/fcvt.wu.d_b27-01.S", + "rv64i_m/D/src/fcvt.wu.d_b28-01.S", + "rv64i_m/D/src/fcvt.wu.d_b29-01.S", + "rv64i_m/D/src/fdiv.d_b1-01.S", + "rv64i_m/D/src/fdiv.d_b20-01.S", + "rv64i_m/D/src/fdiv.d_b2-01.S", + "rv64i_m/D/src/fdiv.d_b21-01.S", + "rv64i_m/D/src/fdiv.d_b3-01.S", + "rv64i_m/D/src/fdiv.d_b4-01.S", + "rv64i_m/D/src/fdiv.d_b5-01.S", + "rv64i_m/D/src/fdiv.d_b6-01.S", + "rv64i_m/D/src/fdiv.d_b7-01.S", + "rv64i_m/D/src/fdiv.d_b8-01.S", + "rv64i_m/D/src/fdiv.d_b9-01.S", + "rv64i_m/D/src/feq.d_b1-01.S", + "rv64i_m/D/src/feq.d_b19-01.S", + "rv64i_m/D/src/fle.d_b1-01.S", + "rv64i_m/D/src/fle.d_b19-01.S", + "rv64i_m/D/src/flt.d_b1-01.S", + "rv64i_m/D/src/flt.d_b19-01.S", + // "rv64i_m/D/src/fld-align-01.S", //missing right now from top of tree, should be returned when it comes back + // "rv64i_m/D/src/fsd-align-01.S", //https://github.com/riscv-non-isa/riscv-arch-test/issues/266 + "rv64i_m/D/src/fmadd.d_b14-01.S", + "rv64i_m/D/src/fmadd.d_b16-01.S", + "rv64i_m/D/src/fmadd.d_b17-01.S", + "rv64i_m/D/src/fmadd.d_b18-01.S", + "rv64i_m/D/src/fmadd.d_b2-01.S", + "rv64i_m/D/src/fmadd.d_b3-01.S", + "rv64i_m/D/src/fmadd.d_b4-01.S", + "rv64i_m/D/src/fmadd.d_b5-01.S", + "rv64i_m/D/src/fmadd.d_b6-01.S", + "rv64i_m/D/src/fmadd.d_b7-01.S", + "rv64i_m/D/src/fmadd.d_b8-01.S", + "rv64i_m/D/src/fmax.d_b1-01.S", + "rv64i_m/D/src/fmax.d_b19-01.S", + "rv64i_m/D/src/fmin.d_b1-01.S", + "rv64i_m/D/src/fmin.d_b19-01.S", + "rv64i_m/D/src/fmsub.d_b14-01.S", + "rv64i_m/D/src/fmsub.d_b16-01.S", + "rv64i_m/D/src/fmsub.d_b17-01.S", + "rv64i_m/D/src/fmsub.d_b18-01.S", + "rv64i_m/D/src/fmsub.d_b2-01.S", + "rv64i_m/D/src/fmsub.d_b3-01.S", + "rv64i_m/D/src/fmsub.d_b4-01.S", + "rv64i_m/D/src/fmsub.d_b5-01.S", + "rv64i_m/D/src/fmsub.d_b6-01.S", + "rv64i_m/D/src/fmsub.d_b7-01.S", + "rv64i_m/D/src/fmsub.d_b8-01.S", + "rv64i_m/D/src/fmul.d_b1-01.S", + "rv64i_m/D/src/fmul.d_b2-01.S", + "rv64i_m/D/src/fmul.d_b3-01.S", + "rv64i_m/D/src/fmul.d_b4-01.S", + "rv64i_m/D/src/fmul.d_b5-01.S", + "rv64i_m/D/src/fmul.d_b6-01.S", + "rv64i_m/D/src/fmul.d_b7-01.S", + "rv64i_m/D/src/fmul.d_b8-01.S", + "rv64i_m/D/src/fmul.d_b9-01.S", + "rv64i_m/D/src/fmv.d.x_b25-01.S", + "rv64i_m/D/src/fmv.d.x_b26-01.S", + "rv64i_m/D/src/fmv.x.d_b1-01.S", + "rv64i_m/D/src/fmv.x.d_b22-01.S", + "rv64i_m/D/src/fmv.x.d_b23-01.S", + "rv64i_m/D/src/fmv.x.d_b24-01.S", + "rv64i_m/D/src/fmv.x.d_b27-01.S", + "rv64i_m/D/src/fmv.x.d_b28-01.S", + "rv64i_m/D/src/fmv.x.d_b29-01.S", + "rv64i_m/D/src/fnmadd.d_b14-01.S", + "rv64i_m/D/src/fnmadd.d_b16-01.S", + "rv64i_m/D/src/fnmadd.d_b17-01.S", + "rv64i_m/D/src/fnmadd.d_b18-01.S", + "rv64i_m/D/src/fnmadd.d_b2-01.S", + "rv64i_m/D/src/fnmadd.d_b3-01.S", + "rv64i_m/D/src/fnmadd.d_b4-01.S", + "rv64i_m/D/src/fnmadd.d_b5-01.S", + "rv64i_m/D/src/fnmadd.d_b6-01.S", + "rv64i_m/D/src/fnmadd.d_b7-01.S", + "rv64i_m/D/src/fnmadd.d_b8-01.S", + "rv64i_m/D/src/fnmsub.d_b14-01.S", + "rv64i_m/D/src/fnmsub.d_b16-01.S", + "rv64i_m/D/src/fnmsub.d_b17-01.S", + "rv64i_m/D/src/fnmsub.d_b18-01.S", + "rv64i_m/D/src/fnmsub.d_b2-01.S", + "rv64i_m/D/src/fnmsub.d_b3-01.S", + "rv64i_m/D/src/fnmsub.d_b4-01.S", + "rv64i_m/D/src/fnmsub.d_b5-01.S", + "rv64i_m/D/src/fnmsub.d_b6-01.S", + "rv64i_m/D/src/fnmsub.d_b7-01.S", + "rv64i_m/D/src/fnmsub.d_b8-01.S", + "rv64i_m/D/src/fsgnj.d_b1-01.S", + "rv64i_m/D/src/fsgnjn.d_b1-01.S", + "rv64i_m/D/src/fsgnjx.d_b1-01.S", + // "rv64i_m/D/src/fsqrt.d_b1-01.S", + // "rv64i_m/D/src/fsqrt.d_b20-01.S", + // "rv64i_m/D/src/fsqrt.d_b2-01.S", + // "rv64i_m/D/src/fsqrt.d_b3-01.S", + // "rv64i_m/D/src/fsqrt.d_b4-01.S", + // "rv64i_m/D/src/fsqrt.d_b5-01.S", + // "rv64i_m/D/src/fsqrt.d_b7-01.S", + // "rv64i_m/D/src/fsqrt.d_b8-01.S", + // "rv64i_m/D/src/fsqrt.d_b9-01.S", + "rv64i_m/D/src/fssub.d_b10-01.S", + "rv64i_m/D/src/fssub.d_b1-01.S", + "rv64i_m/D/src/fssub.d_b11-01.S", + "rv64i_m/D/src/fssub.d_b12-01.S", + "rv64i_m/D/src/fssub.d_b13-01.S", + "rv64i_m/D/src/fssub.d_b2-01.S", + "rv64i_m/D/src/fssub.d_b3-01.S", + "rv64i_m/D/src/fssub.d_b4-01.S", + "rv64i_m/D/src/fssub.d_b5-01.S", + "rv64i_m/D/src/fssub.d_b7-01.S", + "rv64i_m/D/src/fssub.d_b8-01.S" }; string arch32priv[] = '{ @@ -1310,7 +1310,7 @@ string imperas32f[] = '{ "rv32i_m/F/src/fle_b19-01.S", "rv32i_m/F/src/flt_b1-01.S", "rv32i_m/F/src/flt_b19-01.S", - "rv32i_m/F/src/flw-align-01.S", + // "rv32i_m/F/src/flw-align-01.S", "rv32i_m/F/src/fmadd_b1-01.S", "rv32i_m/F/src/fmadd_b14-01.S", // "rv32i_m/F/src/fmadd_b15-01.S", @@ -1407,8 +1407,8 @@ string imperas32f[] = '{ "rv32i_m/F/src/fsub_b4-01.S", "rv32i_m/F/src/fsub_b5-01.S", "rv32i_m/F/src/fsub_b7-01.S", - "rv32i_m/F/src/fsub_b8-01.S", - "rv32i_m/F/src/fsw-align-01.S" + "rv32i_m/F/src/fsub_b8-01.S" + // "rv32i_m/F/src/fsw-align-01.S" }; diff --git a/tests/riscof/Makefile b/tests/riscof/Makefile index a9a442d38..fffa0e454 100644 --- a/tests/riscof/Makefile +++ b/tests/riscof/Makefile @@ -26,10 +26,11 @@ fsd_fld_tempfix: build_arch: fsd_fld_tempfix riscof run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(arch_dir)/riscv-test-suite/ --env=$(arch_dir)/riscv-test-suite/env --no-browser rm -rf $(arch_workdir)/rv$(XLEN)i_m - mv -f $(work_dir)/rv$(XLEN)i_m $(arch_workdir)/ + rsync -a $(work_dir)/rv32i_m/ $(arch_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" + rsync -a $(work_dir)/rv64i_m/ $(arch_workdir)/rv$(XLEN)i_m/ || echo "error suppressed" build_wally: - riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run + riscof --verbose debug run --work-dir=$(work_dir) --config=config$(XLEN).ini --suite=$(wally_dir)/riscv-test-suite/ --env=$(wally_dir)/riscv-test-suite/env --no-browser --no-dut-run 2>&1 | tee log.txt rm -rf $(wally_workdir)/rv$(XLEN)i_m mv -f $(work_dir)/rv$(XLEN)i_m $(wally_workdir)/ diff --git a/tests/riscof/sail_cSim/riscof_sail_cSim.py b/tests/riscof/sail_cSim/riscof_sail_cSim.py index dc3033ab3..683b816b3 100644 --- a/tests/riscof/sail_cSim/riscof_sail_cSim.py +++ b/tests/riscof/sail_cSim/riscof_sail_cSim.py @@ -94,7 +94,7 @@ class sail_cSim(pluginTemplate): execute = "@cd "+testentry['work_dir']+";" - cmd = self.compile_cmd.format(testentry['isa'].lower(), self.xlen) + ' ' + test + ' -o ' + elf + cmd = self.compile_cmd.format(testentry['isa'].lower().replace('zicsr', ' ', 1), self.xlen) + ' ' + test + ' -o ' + elf compile_cmd = cmd + ' -D' + " -D".join(testentry['macros']) execute+=compile_cmd+";" diff --git a/tests/riscof/spike/riscof_spike.py b/tests/riscof/spike/riscof_spike.py index fd4293954..4f74c72f4 100644 --- a/tests/riscof/spike/riscof_spike.py +++ b/tests/riscof/spike/riscof_spike.py @@ -151,7 +151,7 @@ class spike(pluginTemplate): # substitute all variables in the compile command that we created in the initialize # function - cmd = self.compile_cmd.format(testentry['isa'].lower(), self.xlen, test, elf, compile_macros) + cmd = self.compile_cmd.format(testentry['isa'].lower().replace('zicsr', ' ', 2), self.xlen, test, elf, compile_macros) # if the user wants to disable running the tests and only compile the tests, then # the "else" clause is executed below assigning the sim command to simple no action From c29a60c198cda7d861e34945a719cac3b7bac6bf Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Thu, 21 Jul 2022 21:10:15 -0700 Subject: [PATCH 32/36] changed gitignore, updated version of arch tests on main build --- .gitignore | 4 ---- addins/riscv-arch-test | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 9c6691eb3..0c8a1655a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,12 +7,8 @@ __pycache__/ .vscode/ #External repos -addins addins/riscv-arch-test/Makefile.include addins/riscv-tests/target -addins/coremark/work/* -addins/embench/bd_speed/* -addins/embench/bd_size/* benchmarks/embench/wally*.json #vsim work files to ignore diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index be67c99bd..e5020bf7b 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 +Subproject commit e5020bf7b345f8efb96c6c939de3162525b7f545 From d22587090bfcab76ef7572ddfc5eb0a1dc8bd105 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 22 Jul 2022 04:29:27 +0000 Subject: [PATCH 33/36] Reset MSR on read --- pipelined/src/uncore/uartPC16550D.sv | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipelined/src/uncore/uartPC16550D.sv b/pipelined/src/uncore/uartPC16550D.sv index 89bdc837d..5eabe60c7 100644 --- a/pipelined/src/uncore/uartPC16550D.sv +++ b/pipelined/src/uncore/uartPC16550D.sv @@ -206,6 +206,8 @@ module uartPC16550D( // Modem Status Register (8.6.8) if (~MEMWb & (A == 3'b110)) MSR <= #1 Din[3:0]; + else if (~MEMRb & (A == 3'b110)) + MSR <= #1 4'b0; // Reading MSR clears the flags in MSR bits 3:0 else begin MSR[0] <= #1 MSR[0] | CTSb2 ^ CTSbsync; // Delta Clear to Send MSR[1] <= #1 MSR[1] | DSRb2 ^ DSRbsync; // Delta Data Set Ready From df568fd202bf53be651e13183810dd3153546e1f Mon Sep 17 00:00:00 2001 From: slmnemo Date: Fri, 22 Jul 2022 07:10:39 -0700 Subject: [PATCH 34/36] Added PLIC and UART tests and new functions to the test library --- addins/riscv-arch-test | 2 +- pipelined/testbench/tests.vh | 6 +- .../rv32i_m/privilege/Makefrag | 2 + .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 81 +++++++++++++++++-- 4 files changed, 81 insertions(+), 10 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index e5020bf7b..be67c99bd 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit e5020bf7b345f8efb96c6c939de3162525b7f545 +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index b10bb951b..a2145835d 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1603,9 +1603,9 @@ string wally32i[] = '{ string wally32periph[] = '{ `WALLYTEST, - "rv32i_m/privilege/WALLY-gpio-01", - "rv32i_m/privilege/WALLY-clint-01" - // "rv32i_m/privilege/WALLY-plic-01" + // "rv32i_m/privilege/WALLY-gpio-01", + // "rv32i_m/privilege/WALLY-clint-01" + "rv32i_m/privilege/WALLY-plic-01" // "rv32i_m/privilege/WALLY-uart-01" }; diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag index 56b3bc01f..23806cf67 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/Makefrag @@ -55,6 +55,8 @@ target_tests_nosim = \ WALLY-status-tw-01 \ WALLY-gpio-01 \ WALLY-clint-01 \ + WALLY-plic-01 \ + WALLY-uart-01 \ rv32i_tests = $(addsuffix .elf, $(rv32i_sc_tests)) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index 7e112c917..17ff15c02 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -954,6 +954,17 @@ read08_test: // address to read in t3, expected 8 bit value in t4 (unused, but there for your perusal). li t2, 0xBAD // bad value that will be overwritten on good reads. lb t2, 0(t3) + andi t2, t2, 0xFF // mask to lower 8 bits + sw t2, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop // go to next test case + +read04_test: + // address to read in t3, expected 8 bit value in t4 (unused, but there for your perusal). + li t2, 0xBAD // bad value that will be overwritten on good reads. + lb t2, 0(t3) + andi t2, t2, 15 // mask lower 4 bits sw t2, 0(t1) addi t1, t1, 4 addi a6, a6, 4 @@ -974,6 +985,18 @@ readsip_test: // read the MIP into the signature j test_loop // go to next test case claim_m_plic_interrupts: // clears one non-pending PLIC interrupt + li t2, 0x0C00000C // GPIO priority + li t3, 7 + lw t4, 0(t2) + sw t3, 0(t2) + sw t4, -4(sp) + addi sp, sp, -4 + li t2, 0x0C000028 // UART priority + li t3, 7 + lw t4, 0(t2) + sw t3, 0(t2) + sw t4, -4(sp) + addi sp, sp, -4 li t2, 0x0C002000 li t3, 0x0C200004 li t4, 0xFFF @@ -982,9 +1005,28 @@ claim_m_plic_interrupts: // clears one non-pending PLIC interrupt lw t5, 0(t3) // make PLIC claim sw t5, 0(t3) // complete claim made sw t6, 0(t2) // restore saved enable status + li t2, 0x0C00000C // GPIO priority + li t3, 0x0C000028 // UART priority + lw t4, 4(sp) // load stored GPIO and UART priority + lw t5, 0(sp) + addi sp, sp, 8 // restore stack pointer + sw t4, 0(t2) + sw t5, 0(t3) j test_loop claim_s_plic_interrupts: // clears one non-pending PLIC interrupt + li t2, 0x0C00000C // GPIO priority + li t3, 7 + lw t4, 0(t2) + sw t3, 0(t2) + sw t4, -4(sp) + addi sp, sp, -4 + li t2, 0x0C000028 // UART priority + li t3, 7 + lw t4, 0(t2) + sw t3, 0(t2) + sw t4, -4(sp) + addi sp, sp, -4 li t2, 0x0C002080 li t3, 0x0C201004 li t4, 0xFFF @@ -993,25 +1035,52 @@ claim_s_plic_interrupts: // clears one non-pending PLIC interrupt lw t5, 0(t3) // make PLIC claim sw t5, 0(t3) // complete claim made sw t6, 0(t2) // restore saved enable status + li t2, 0x0C00000C // GPIO priority + li t3, 0x0C000028 // UART priority + lw t4, 4(sp) // load stored GPIO and UART priority + lw t5, 0(sp) + addi sp, sp, 8 // restore stack pointer + sw t4, 0(t2) + sw t5, 0(t3) + j test_loop + +uart_lsr_intr_wait: // waits for interrupts to be ready + li t2, 0x10000002 // IIR + li t4, 0x6 +uart_lsr_intr_loop: + lb t3, 0(t2) + andi t3, t3, 0x7 + bne t3, t4, uart_lsr_intr_loop + sw t3, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 j test_loop uart_data_wait: li t2, 0x10000005 // LSR li t3, 0x10000002 // IIR + li a4, 0x61 +uart_read_LSR_IIR: lb t4, 0(t3) // save IIR before potential clear lb t5, 0(t2) - andi t5, t5, 1 // only care if data is ready - li t6, 1 - beq t5, t6, uart_data_ready - j uart_data_wait + andi t6, t5, 0x61 // only care if all transmissions are done + bne a4, t6, uart_read_LSR_IIR uart_data_ready: - sb t4, 0(t1) - sb t5, 1(t1) + li t2, 0 + sw t2, 0(t1) // clear entry deadbeef from memory + andi t5, t5, 0x9F // mask THRE and TEMT from signature + sb t4, 1(t1) // IIR + sb t5, 0(t1) // LSR addi t1, t1, 4 addi a6, a6, 4 j test_loop +uart_clearmodemintr: + li t2, 0x10000006 + lb t2, 0(t2) + j test_loop + goto_s_mode: // return to address in t3, li a0, 3 // Trap handler behavior (go to supervisor mode) From d38369e8bf3543276555736b9d20d8b0514c8970 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Fri, 22 Jul 2022 07:12:55 -0700 Subject: [PATCH 35/36] Added new PLIC and UART tests --- .../references/WALLY-plic-01.reference_output | 249 +++++ .../WALLY-plic-s-01.reference_output | 0 .../references/WALLY-uart-01.reference_output | 33 + .../rv32i_m/privilege/src/WALLY-plic-01.S | 951 ++++++++++++++++++ .../rv32i_m/privilege/src/WALLY-plic-s-01.S | 493 +++++++++ .../rv32i_m/privilege/src/WALLY-uart-01.S | 140 +++ 6 files changed, 1866 insertions(+) create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-s-01.reference_output create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S create mode 100644 tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output new file mode 100644 index 000000000..8c6b8ef6a --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-01.reference_output @@ -0,0 +1,249 @@ +00000000 # read empty MIP (1.0.0) +00000008 # check GPIO interrupt pending on intPending0 +00000000 # Claim gives no interrupt due to zero priority +00000008 # interrupt still pending due to no claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.0.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000000 # read empty MIP (1.0.2) +00000000 # check GPIO interrupt pending on intPending0 +00000000 # claim gives no interrupt due to no intPending +00000000 # no interrupts pending after clear +00000000 # still no interrupts after clear +00000000 # read empty MIP (1.1.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # interrupt still pending due to no claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.1.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.2.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.2.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.3.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.3.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.4.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.4.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.5.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.5.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.6.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set due to PLIC priority (1.6.1) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000000 # read empty MIP (1.7.0) +00000008 # check GPIO interrupt pending on intPending0 +00000003 # Claim gives 3 for ID of GPIO +00000000 # check no interrupts pending after claim +00000000 # check no interrupts pending +00000800 # MEIP set (2.0) +00000408 # gpio and uart pending +00000003 # claim gpio +00000400 # gpio no longer pending +00000000 # no interrupts pending +00000800 # MEIP set (2.1) +00000408 # gpio and uart pending +00000003 # claim gpio +00000400 # gpio no longer pending +00000000 # no interrupts pending +00000800 # MEIP set (2.2) +00000408 # gpio and uart pending +0000000A # claim uart +00000008 # uart no longer pending +00000000 # no interrupts pending +00000800 # MEIP set (2.3) +00000408 # gpio and uart pending +0000000A # claim uart +00000008 # uart no longer pending +00000000 # no interrupts pending +00000000 # MEIP empty (2.4) +00000408 # gpio and uart pending +0000000A # claim none +00000008 # gpio and uart still pending +00000000 # no interrupts pending +00000A00 # MEIP and SEIP set (3.0) +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000200 # SEIP set (3.1) +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000200 # SEIP set (3.2) +00000408 # check GPIO and UART interrupt pending on intPending0 +0000000A # claim UART +00000008 # GPIO interrupt pending after UART claimcomp +00000000 # check no interrupts pending +00000000 # read empty MIP (3.3) +00000408 # check GPIO and UART interrupt pending on intPending0 +0000000A # claim UART +00000008 # check UART interrupt pending cleared after claim +00000000 # check no interrupts pending +00000A00 # MEIP and SEIP set (4.0) +00000400 # UART interrupt pending +0000000A # claim UART +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000200 # SEIP set (4.1) +00000400 # UART interrupt pending +00000000 # nothing in claim register +00000400 # UART interrupt pending +00000000 # check no interrupts pending +00000A00 # MEIP and SEIP set (4.2) +00000400 # UART interrupt pending +0000000A # claim UART +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000800 # MEIP set (4.3) +00000400 # UART interrupt pending +0000000A # claim UART +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000800 # MEIP set (4.4) +00000400 # UART interrupt pending +0000000A # claim UART +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000200 # SEIP set (4.5) +00000400 # UART interrupt pending +00000000 # nothing in claim register +00000400 # UART interrupt pending +00000000 # check no interrupts pending +00000000 # All disabled (4.6) +00000400 # UART interrupt pending +00000000 # nothing in claim register +00000400 # UART interrupt pending +00000000 # check no interrupts pending +00000200 # SEIP set (5.0) +00000008 # GPIO interrupt pending +00000000 # nothing in claim register +00000008 # GPIO interrupt pending +00000000 # check no interrupts pending +00000A00 # MEIP and SEIP set (5.1) +00000008 # GPIO interrupt pending +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000800 # MEIP set (5.2) +00000008 # GPIO interrupt pending +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000A00 # MEIP and SEIP set (5.3) +00000008 # GPIO interrupt pending +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000800 # MEIP set (5.4) +00000008 # GPIO interrupt pending +00000003 # claim gives 3 for ID of GPIO +00000000 # check no interrupts pending +00000000 # check no interrupts pending +00000200 # SEIP set (5.5) +00000008 # GPIO interrupt pending +00000000 # nothing in claim register +00000008 # GPIO interrupt pending +00000000 # check no interrupts pending +00000000 # read empty MIP (5.6) +00000008 # GPIO interrupt pending +00000000 # nothing in claim register +00000008 # GPIO interrupt pending +00000000 # check no interrupts pending +0000000b # written due to goto_s_mode +00000200 # read sip (7.0) +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000200 # read sip (7.1) +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000200 # read sip (7.2) +00000408 # check GPIO and UART interrupt pending on intPending0 +0000000A # claim UART +00000008 # GPIO interrupt pending after UART claimcomp +00000000 # check no interrupts pending +00000200 # read sip (7.3) +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000000 # read sip +00000408 # check GPIO and UART interrupt pending on intPending0 +00000000 # nothing in claim register +00000408 # check GPIO and UART interrupt pending on intPending0 +00000000 # check no interrupts pending +00000200 # read sip +00000408 # check GPIO and UART interrupt pending on intPending0 +00000003 # claim gives 3 for ID of GPIO +00000400 # check GPIO interrupt pending cleared after claim +00000000 # check no interrupts pending +00000000 # read sip +00000408 # check GPIO and UART interrupt pending on intPending0 +00000000 # nothing in claim register +00000408 # check GPIO and UART interrupt pending on intPending0 +00000000 # check no interrupts pending +00000009 # output from ecall in supervisor mode +00000800 # MEIP set +00000408 # check GPIO and UART interrupt pending on intPending0 +0000000A # claim UART +00000008 # GPIO interrupt pending after UART claimcomp +00000003 # claim gives 3 for ID of GPIO +00000000 # read empty MIP # no interrupts, meip is low +00000000 # check no interrupts pending +00000800 # MEIP set +00000008 # GPIO interrupt pending after complete +00000003 # claim gives 3 for ID of GPIO +00000000 # read empty MIP # meip is zeroed +00000000 # check no interrupts pending +00000800 # MEIP set +00000400 # check GPIO interrupt pending cleared after claim +00000800 # MEIP set +00000408 # check GPIO and UART interrupt pending on intPending0 +0000000A # claim UART diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-s-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-plic-s-01.reference_output new file mode 100644 index 000000000..e69de29bb diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output new file mode 100644 index 000000000..457bc35c8 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-uart-01.reference_output @@ -0,0 +1,33 @@ +00000000 # Reset tests +00000001 # 00000000 *** commented because LCR should reset to zero but resets to 3 due to the FPGA +00000000 +00000060 +00000000 +00000060 # read-write test +00000020 # transmitter register empty but shift register not +00000101 # transmitter is not empty when done transmitting 5 bits +00000000 +00000060 +00000101 # Multi-bit transmission: 5 bits +00000015 +00000101 # Transmit 6 bits +0000002A +00000101 # Transmit 7 bits +0000007F +00000101 # Transmit 8 bits +00000080 +00000002 # Transmission interrupt tests +00000401 # Interrupt generated by finished transmission +00000004 +00000006 # IIR return LSR intr and LSR has an overflow error +00000063 +00000004 +00000001 +00000001 # MODEM interrupt tests +00000000 +00000011 +00000001 + + + +0000000b # ecall from test termination diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S new file mode 100644 index 000000000..81a48a23f --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-01.S @@ -0,0 +1,951 @@ +/////////////////////////////////////////// +// +// WALLY-plic +// +// Author: David_Harris@hmc.edu and Nicholas Lucio +// +// Created 2022-06-16 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +#include "WALLY-TEST-LIB-32.h" + +INIT_TESTS + +TRAP_HANDLER m + +j run_test_loop // begin test loop/table tests instead of executing inline code. + +INIT_TEST_TABLE + +END_TESTS + +TEST_STACK_AND_DATA + +.align 2 +test_cases: +# --------------------------------------------------------------------------------------------- +# Test Contents +# +# Here is where the actual tests are held, or rather, what the actual tests do. +# each entry consists of 3 values that will be read in as follows: +# +# '.4byte [x28 Value], [x29 Value], [x30 value]' +# or +# '.4byte [address], [value], [test type]' +# +# The encoding for x30 test type values can be found in the test handler in the framework file +# +# --------------------------------------------------------------------------------------------- + +# =========== Define PLIC registers =========== + +.equ PLIC, 0x0C000000 +.equ PLIC_INTPRI_GPIO, (PLIC+0x00000C) # GPIO is interrupt 3 +.equ PLIC_INTPRI_UART, (PLIC+0x000028) # UART is interrupt 10 +.equ PLIC_INTPENDING0, (PLIC+0x001000) # intPending0 register +.equ PLIC_INTEN00, (PLIC+0x002000) # interrupt enables for context 0 (machine mode) sources 31:1 +.equ PLIC_INTEN10, (PLIC+0x002080) # interrupt enables for context 1 (supervisor mode) sources 31:1 +.equ PLIC_THRESH0, (PLIC+0x200000) # Priority threshold for context 0 (machine mode) +.equ PLIC_CLAIM0, (PLIC+0x200004) # Claim/Complete register for context 0 +.equ PLIC_THRESH1, (PLIC+0x201000) # Priority threshold for context 1 (supervisor mode) +.equ PLIC_CLAIM1, (PLIC+0x201004) # Claim/Complete register for context 1 + +# =========== Define GPIO registers =========== + +.equ GPIO, 0x10060000 +.equ input_val, (GPIO+0x00) +.equ input_en, (GPIO+0x04) +.equ output_en, (GPIO+0x08) +.equ output_val, (GPIO+0x0C) +.equ rise_ie, (GPIO+0x18) +.equ rise_ip, (GPIO+0x1C) +.equ fall_ie, (GPIO+0x20) +.equ fall_ip, (GPIO+0x24) +.equ high_ie, (GPIO+0x28) +.equ high_ip, (GPIO+0x2C) +.equ low_ie, (GPIO+0x30) +.equ low_ip, (GPIO+0x34) +.equ iof_en, (GPIO+0x38) +.equ iof_sel, (GPIO+0x3C) +.equ out_xor, (GPIO+0x40) + +# =========== Define UART registers =========== + +.equ UART, 0x10000000 +.equ UART_IER, (UART+0x01) +.equ UART_MCR, (UART+0x04) +.equ UART_MSR, (UART+0x06) + +# =========== Initialize UART and GPIO =========== + +# GPIO Initialization +.4byte input_en, 0x00000001, write32_test # enable bit 0 of input_en +.4byte output_en, 0x00000001, write32_test # enable bit 0 of output_en +.4byte output_val, 0x00000000, write32_test # make sure output_val is 0 +.4byte rise_ie, 0x00000001, write32_test # enable rise interrupts + +# =========== Initialize relevant PLIC registers =========== + +.4byte PLIC_INTPRI_GPIO, 0x00000000, write32_test # set GPIO priority to zero +.4byte PLIC_INTPRI_UART, 0x00000000, write32_test # set UART priority to zero +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000007, write32_test # set s-mode threshold to max + +# =========== Machine-Mode Priority Testing (1.T.X) =========== + +# Test 1.0.0: GPIO int lacks priority (0 = 0) +.4byte PLIC_THRESH0, 0x00000000, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.0.1: GPIO int has priority (1 > 0) +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.0.2: meip and c/c clear without interrupt pending +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.1.0: GPIO lacks priority (1 = 1) +.4byte PLIC_THRESH0, 0x00000001, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.1.1: GPIO int has priority (2 > 1) +.4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.2.0: GPIO int lacks priority (2 = 2) +.4byte PLIC_THRESH0, 0x00000002, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.2.1: GPIO int has priority (3 > 2) +.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.3.0: GPIO int lacks priority (3 = 3) +.4byte PLIC_THRESH0, 0x00000003, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.3.1: GPIO int has priority (4 > 3) +.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.4.0: GPIO int lacks priority (4 = 4) +.4byte PLIC_THRESH0, 0x00000004, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.4.1: GPIO int has priority (5 > 4) +.4byte PLIC_INTPRI_GPIO, 0x00000005, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.5.0: GPIO int lacks priority (5 = 5) +.4byte PLIC_THRESH0, 0x00000005, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.5.1: GPIO int has priority (6 > 5) +.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.6.0: GPIO int lacks priority (6 = 6) +.4byte PLIC_THRESH0, 0x00000006, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.6.1: GPIO int has priority (7 > 6) +.4byte PLIC_INTPRI_GPIO, 0x00000007, write32_test # let GPIO cause interrupts +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# Test 1.7.0: GPIO int lacks priority (7 = 7) +.4byte PLIC_THRESH0, 0x00000007, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim from earlier +.4byte 0x0, 0x00000000, claim_m_plic_interrupts # clear interrupt one +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending + +# =========== UART vs GPIO priority (2.X) =========== + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000007, write32_test # set s-mode threshold to max +# UART Initialization +.4byte UART_IER, 0x08, write08_test # enable modem status interrupts from CTS +.4byte UART_MCR, 0x10, write08_test # enable loopback mode, RTS = 0 +.4byte UART_MSR, 0x00, write08_test # disable UART interrupt + +# Test 2.0: GPIO Priority = UART Priority + +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIOPriority = 1 +.4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UARTPriority = 1 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 2.1: GPIO Priority > UART Priority + +.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test # GPIOPriority = 3 +.4byte PLIC_INTPRI_UART, 0x00000002, write32_test # UARTPriority = 2 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 2.2: GPIO Priority < UART Priority + +.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test # GPIOPriority = 4 +.4byte PLIC_INTPRI_UART, 0x00000005, write32_test # UARTPriority = 5 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for UART +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 2.3: GPIO Priority < UART Priority + +.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # GPIOPriority = 6 +.4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UARTPriority = 7 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for UART +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 2.4: Interrupts don't have enough priority + +.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test # GPIOPriority = 4 +.4byte PLIC_INTPRI_UART, 0x00000005, write32_test # UARTPriority = 5 +.4byte PLIC_THRESH0, 0x00000006, write32_test # set m-mode threshold to 6 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# =========== SEIP tests (3.X) =========== + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 + +# Test 3.0: Cause machine and supervisor interrupts + +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIOPriority = 1 +.4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UARTPriority = 1 +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000A00, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 3.1: Suppress machine mode interrupts + +.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test # GPIOPriority = 3 +.4byte PLIC_INTPRI_UART, 0x00000002, write32_test # UARTPriority = 2 +.4byte PLIC_THRESH0, 0x00000007, write32_test # set m-mode threshold to 7 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 3.2: Cause SEIP with UART first + +.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # GPIOPriority = 6 +.4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UARTPriority = 7 +.4byte PLIC_THRESH0, 0x00000007, write32_test # set m-mode threshold to 7 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 3.3: Low SEIP due to insufficient priority + +.4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test # GPIOPriority = 2 +.4byte PLIC_INTPRI_UART, 0x00000003, write32_test # UARTPriority = 3 +.4byte PLIC_THRESH0, 0x00000004, write32_test # set m-mode threshold to 4 +.4byte PLIC_THRESH1, 0x00000005, write32_test # set s-mode threshold to 5 +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# =========== UART interrupt enable tests (4.X) =========== + +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIO Priority = 1 +.4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UART Priority = 1 + +# Test 4.0: GPIO m-mode disabled + +.4byte PLIC_INTEN00, 0x00000400, write32_test # disable GPIO m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000A00, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.1: UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000008, write32_test # disable UART m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.2: GPIO s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000400, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000A00, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.3: UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000008, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.4: GPIO and UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.5: GPIO and UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 4.6: GPIO and UART fully disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# =========== GPIO interrupt enable tests (5.X) =========== + +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIO Priority = 1 +.4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UART Priority = 1 + +# Test 5.0: GPIO m-mode disabled + +.4byte PLIC_INTEN00, 0x00000400, write32_test # disable GPIO m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.1: UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000008, write32_test # disable UART m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000A00, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.2: GPIO s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000400, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.3: UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000008, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000A00, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.4: GPIO and UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.5: GPIO and UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000200, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 5.6: GPIO and UART fully disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM0, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending for GPIO and UART +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte PLIC_CLAIM0, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_m_plic_interrupts # clear interrupt two +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x0, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# =========== S-mode enable tests (7.X) =========== + +.4byte 0x0, 0x0, goto_s_mode # go to s-mode. 0xb written to output +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIO Priority = 1 +.4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UART Priority = 1 + +# Test 7.0: GPIO m-mode disabled + +.4byte PLIC_INTEN00, 0x00000400, write32_test # disable GPIO m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.1: UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000008, write32_test # disable UART m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.2: GPIO s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000400, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x0000000A, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x0000000A, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.3: UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000008, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.4: GPIO and UART s-mode disabled + +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000000, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.5: GPIO and UART m-mode disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000200, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000003, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000003, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# Test 7.6: GPIO and UART fully disabled + +.4byte PLIC_INTEN00, 0x00000000, write32_test # disable GPIO interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000000, readsip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +.4byte PLIC_CLAIM1, 0x00000000, read32_test # read claim register +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending cleared for GPIO +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000001, write32_test # clear GPIO interrupt +.4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt +.4byte PLIC_CLAIM1, 0x00000000, write32_test # complete claim made earlier +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt one +.4byte 0x0, 0x00000000, claim_s_plic_interrupts # clear interrupt two +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupts pending + +# =========== Special claim tests (8) =========== + +.4byte 0x0, 0x0, goto_m_mode # write 0x9 to output + +.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # GPIO Priority = 6 +.4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UART Priority = 7 +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable all m-mode interrupts +.4byte PLIC_INTEN10, 0x00000000, write32_test # enable all s-mode interrupts +.4byte PLIC_THRESH0, 0x00000005, write32_test # set m-mode threshold to 5 + +# Test 8 + +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte UART_MSR, 0x0F, write08_test # cause UART interrupt +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # read interrupt pending +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # claim UART +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # UART interrupt cleared +.4byte PLIC_CLAIM0, 0x00000003, read32_test # claim GPIO +.4byte 0x0, 0x00000000, readmip_test # no interrupts, meip is low +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # both interrupts claimed +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete GPIO +.4byte 0x0, 0x00000800, readmip_test # GPIO interrupt sets MEIP +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # GPIO bit is set +.4byte PLIC_CLAIM0, 0x00000003, read32_test # claim GPIO again +.4byte 0x0, 0x00000000, readmip_test # meip is zeroed +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # both interrupts claimed +.4byte PLIC_CLAIM0, 0x0000000A, write32_test # complete UART claim +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000400, read32_test # UART pending +.4byte PLIC_CLAIM0, 0x00000003, write32_test # complete GPIO claim +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000408, read32_test # GPIO and UART pending +.4byte PLIC_CLAIM0, 0x0000000A, read32_test # claim UART + +.4byte 0x0, 0x0, terminate_test # terminate tests diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S new file mode 100644 index 000000000..45b87c4b1 --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-plic-s-01.S @@ -0,0 +1,493 @@ +/////////////////////////////////////////// +// +// WALLY-gpio +// +// Author: David_Harris@hmc.edu and Nicholas Lucio +// +// Created 2022-06-16 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +#include "WALLY-TEST-LIB-32.h" + +INIT_TESTS + +TRAP_HANDLER m + +j run_test_loop // begin test loop/table tests instead of executing inline code. + +INIT_TEST_TABLE + +END_TESTS + +TEST_STACK_AND_DATA + +.align 2 +test_cases: +# --------------------------------------------------------------------------------------------- +# Test Contents +# +# Here is where the actual tests are held, or rather, what the actual tests do. +# each entry consists of 3 values that will be read in as follows: +# +# '.4byte [x28 Value], [x29 Value], [x30 value]' +# or +# '.4byte [address], [value], [test type]' +# +# The encoding for x30 test type values can be found in the test handler in the framework file +# +# --------------------------------------------------------------------------------------------- + +# =========== Define PLIC registers =========== + +.equ PLIC, 0x0C000000 +.equ PLIC_INTPRI_GPIO, (PLIC+0x00000C) # GPIO is interrupt 3 +.equ PLIC_INTPRI_UART, (PLIC+0x000028) # UART is interrupt 10 +.equ PLIC_INTPENDING0, (PLIC+0x001000) # intPending0 register +.equ PLIC_INTEN00, (PLIC+0x002000) # interrupt enables for context 0 (machine mode) sources 31:1 +.equ PLIC_INTEN10, (PLIC+0x002080) # interrupt enables for context 1 (supervisor mode) sources 31:1 +.equ PLIC_THRESH0, (PLIC+0x200000) # Priority threshold for context 0 (machine mode) +.equ PLIC_CLAIM0, (PLIC+0x200004) # Claim/Complete register for context 0 +.equ PLIC_THRESH1, (PLIC+0x201000) # Priority threshold for context 1 (supervisor mode) +.equ PLIC_CLAIM1, (PLIC+0x201004) # Claim/Complete register for context 1 + +# =========== Define GPIO registers =========== + +.equ GPIO, 0x10060000 +.equ input_val, (GPIO+0x00) +.equ input_en, (GPIO+0x04) +.equ output_en, (GPIO+0x08) +.equ output_val, (GPIO+0x0C) +.equ rise_ie, (GPIO+0x18) +.equ rise_ip, (GPIO+0x1C) +.equ fall_ie, (GPIO+0x20) +.equ fall_ip, (GPIO+0x24) +.equ high_ie, (GPIO+0x28) +.equ high_ip, (GPIO+0x2C) +.equ low_ie, (GPIO+0x30) +.equ low_ip, (GPIO+0x34) +.equ iof_en, (GPIO+0x38) +.equ iof_sel, (GPIO+0x3C) +.equ out_xor, (GPIO+0x40) + +# =========== Define UART registers =========== + +.equ UART, 0x10000000 +.equ UART_IER, (UART+0x01) +.equ UART_MCR, (UART+0x04) +.equ UART_MSR, (UART+0x06) + +# =========== Initialize UART and GPIO =========== + +# GPIO Initialization +.4byte input_en, 0x00000001, write32_test # enable bit 0 of input_en +.4byte output_en, 0x00000001, write32_test # enable bit 0 of output_en +.4byte output_val, 0x00000000, write32_test # make sure output_val is 0 +.4byte rise_ie, 0x00000001, write32_test # enable rise interrupts + +# UART Initialization +.4byte UART_IER, 0x08, write08_test # enable modem status interrupts from CTS +.4byte UART_MCR, 0x10, write08_test # enable loopback mode, RTS = 0 +.4byte UART_MSR, 0x00, write08_test # disable UART interrupt + +# =========== Initialize relevant PLIC registers =========== + +.4byte PLIC_INTPRI_GPIO, 0x00000000, write32_test # set GPIO priority to zero +.4byte PLIC_INTPRI_UART, 0x00000000, write32_test # set UART priority to zero +.4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +.4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +.4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +.4byte PLIC_THRESH1, 0x00000007, write32_test # set s-mode threshold to max + +# =========== Machine-Mode Priority Testing (1.T.X) =========== + +# Test 1.0.0: GPIO int lacks priority (0 = 0) +.4byte PLIC_THRESH0, 0x00000000, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect no interrupt pending *** pending bug????? +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.0.1: GPIO int has priority (1 > 0) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.0.2: meip and c/c clear without interrupt pending +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete + +# Test 1.1.0: GPIO lacks priority (1 = 1) +.4byte PLIC_THRESH0, 0x00000001, write32_test # change threshold +.4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.1.1: GPIO int has priority (2 > 1) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.2.0: GPIO int lacks priority (2 = 2) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000002, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.2.1: GPIO int has priority (3 > 2) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.3.0: GPIO int lacks priority (3 = 3) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000003, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.3.1: GPIO int has priority (4 > 3) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000004, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.4.0: GPIO int lacks priority (4 = 4) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000004, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.4.1: GPIO int has priority (5 > 4) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000005, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.5.0: GPIO int lacks priority (5 = 5) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000005, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.5.1: GPIO int has priority (6 > 5) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.6.0: GPIO int lacks priority (6 = 6) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000006, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.6.1: GPIO int has priority (7 > 6) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_INTPRI_GPIO, 0x00000007, write32_test # let GPIO cause interrupts +.4byte 0x0, 0x00000800, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000008, read32_test # expect interrupt pending on bit 3 +.4byte 0x0, 0x00000003, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# Test 1.7.0: GPIO int lacks priority (7 = 7) +.4byte output_val, 0x00000001, write32_test # set GPIO rise_ip high +.4byte PLIC_THRESH0, 0x00000007, write32_test # change threshold +.4byte 0x0, 0x00000000, readmip_test # read mip +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # expect no interrupt pending +.4byte 0x0, 0x00000000, readmclaimcomplete_test # read and clear claimcomplete +.4byte PLIC_INTPENDING0, 0x00000000, read32_test # interrupt pending was cleared +.4byte output_val, 0x00000000, write32_test # clear output_val +.4byte rise_ip, 0x00000000, write32_test # clear interrupt + +# # =========== UART vs GPIO priority (2.X) =========== *** + +# .4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +# .4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +# .4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +# .4byte PLIC_THRESH1, 0x00000007, write32_test # set s-mode threshold to max + +# # Test 2.0: GPIO Priority = UART Priority + +# .4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIOPriority = 1 +# .4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UARTPriority = 1 +# .4byte 0x0, 0x00000800, readmip_test # read mip +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # GPIO claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000200, read32_test # GPIO interrupt was cleared +# .4byte 0x0, 0x00000800, readmip_test # expect mip to remain high +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # UART claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # UART interrupt pending was cleared +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 2.1: GPIO Priority < UART Priority + +# .4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test # GPIO Priority = 2 +# .4byte PLIC_INTPRI_UART, 0x00000003, write32_test # UART Priority = 3 +# .4byte 0x0, 0x00000800, readmip_test # read mip +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for UART and GPIO +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # UART claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000008, read32_test # UART interrupt was cleared +# .4byte 0x0, 0x00000800, readmip_test # expect mip to remain high +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # GPIO claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # GPIO interrupt pending was cleared +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 2.2: GPIO Priority > UART Priority + +# .4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000005, write32_test # GPIO Priority = 5 +# .4byte PLIC_INTPRI_UART, 0x00000004, write32_test # UART Priority = 4 +# .4byte 0x0, 0x00000800, readmip_test # read mip +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # GPIO claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000200, read32_test # GPIO interrupt was cleared +# .4byte 0x0, 0x00000800, readmip_test # expect mip to remain high +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # UART claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # UART interrupt pending was cleared +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 2.3: GPIO Priority < UART Priority (2) + +# .4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # GPIO Priority = 6 +# .4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UART Priority = 7 +# .4byte 0x0, 0x00000800, readmip_test # read mip +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for UART and GPIO +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # UART claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000008, read32_test # UART interrupt was cleared +# .4byte 0x0, 0x00000800, readmip_test # expect mip to remain high +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # GPIO claim/complete process +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # GPIO interrupt pending was cleared +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 2.4: Interrupts disabled (Thresh0 = 7) + +# .4byte output_val, 0x00000001, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_THRESH0, 0x00000007, write32_test # Disable m-mode interrupts +# .4byte PLIC_INTPRI_GPIO, 0x00000007, write32_test # GPIO Priority = 7 +# .4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UART Priority = 7 +# .4byte 0x0, 0x00000000, readmip_test, # read mip +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending +# .4byte 0x0, 0x00000000, readmclaimcomplete_test # no interrupt pending +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # =========== SEIP tests (3.X) =========== + +# .4byte PLIC_INTEN00, 0x00000408, write32_test # enable m-mode interrupts +# .4byte PLIC_INTEN10, 0x00000408, write32_test # enable s-mode interrupts +# .4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +# .4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 + +# # Test 3.0: Cause machine and supervisor interrupts + +# .4byte output_val, 0x00000000, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIO Priority = 1 +# .4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UART Priority = 1 +# .4byte 0x0, 0x00000A00, readmip_test # Expect high on MEIP and SEIP +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # Expect GPIO on claim/complete +# .4byte 0x0, 0x00000A00, readmip_test # Still expect high on MEIP and SEIP +# .4byte PLIC_INTPENDING0, 0x00000200, read32_test # GPIO interrupt was cleared +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # Expect UART on claim/complete +# .4byte 0x0, 0x00000000, readmip_test # all interrupts were cleared +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # Pending should also be clear +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 3.1: Suppress machine mode interrupts + +# .4byte output_val, 0x00000000, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000003, write32_test # GPIO Priority = 3 +# .4byte PLIC_INTPRI_UART, 0x00000002, write32_test # UART Priority = 2 +# .4byte PLIC_THRESH0, 0x00000007, write32_test # set m-mode threshold to 7 +# .4byte 0x0, 0x00000200, readmip_test # Expect high on SEIP only +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # Expect GPIO on claim/complete +# .4byte 0x0, 0x00000200, readmip_test # Expect high on SEIP only +# .4byte PLIC_INTPENDING0, 0x00000200, read32_test # GPIO interrupt was cleared +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # Expect UART on claim/complete +# .4byte 0x0, 0x00000000, readmip_test # all interrupts were cleared +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # Pending should also be clear +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 3.2: Cause SEIP with UART first + +# .4byte output_val, 0x00000000, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000006, write32_test # GPIO Priority = 6 +# .4byte PLIC_INTPRI_UART, 0x00000007, write32_test # UART Priority = 7 +# .4byte 0x0, 0x00000200, readmip_test # Expect high on SEIP only +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # Expect UART on claim/complete +# .4byte 0x0, 0x00000200, readmip_test # Expect high on SEIP only +# .4byte PLIC_INTPENDING0, 0x00000008, read32_test # UART interrupt was cleared +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # Expect GPIO on claim/complete +# .4byte 0x0, 0x00000000, readmip_test # all interrupts were cleared +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # Pending should also be clear +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # Test 3.3: Low SEIP due to insufficient priority + +# .4byte output_val, 0x00000000, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTPRI_GPIO, 0x00000002, write32_test # GPIO Priority = 2 +# .4byte PLIC_INTPRI_UART, 0x00000003, write32_test # UART Priority = 3 +# .4byte PLIC_THRESH0, 0x00000004, write32_test # set m-mode threshold to 7 +# .4byte PLIC_THRESH1, 0x00000005, write32_test # set s-mode threshold to 7 +# .4byte 0x0, 0x00000000, readmip_test # no interrupt pending +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # no interrupt pending +# .4byte 0x0, 0x00000000, readmclaimcomplete_test # Expect nothing on claim/complete +# .4byte output_val, 0x00000000, write32_test # clear output_val +# .4byte rise_ip, 0x00000000, write32_test # clear GPIO interrupt +# .4byte UART_MSR, 0x00000000, write08_test # clear UART interrupt + +# # =========== UART interrupt enable tests (4.X) =========== + +# .4byte PLIC_THRESH0, 0x00000000, write32_test # set m-mode threshold to 0 +# .4byte PLIC_THRESH1, 0x00000000, write32_test # set s-mode threshold to 0 +# .4byte PLIC_INTPRI_GPIO, 0x00000001, write32_test # GPIO Priority = 1 +# .4byte PLIC_INTPRI_UART, 0x00000001, write32_test # UART Priority = 1 + +# # Test 4.0: GPIO m-mode disabled + +# .4byte output_val, 0x00000000, write32_test # cause rise_ip to go high +# .4byte UART_MSR, 0x00000010, write08_test # step 1 of UART interrupt +# .4byte UART_MSR, 0x00000012, write08_test # step 2 of UART interrupt +# .4byte PLIC_INTEN00, 0x000000200, write32_test # GPIO m-mode interrupt disabled +# .4byte PLIC_INTEN00, 0x000000208, write32_test # No s-mode interrupt disabled +# .4byte 0x0, 0x00000A00, readmip_test # Expect high on MEIP from UART and SEIP from GPIO +# .4byte PLIC_INTPENDING0, 0x00000408, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x00000003, readmclaimcomplete_test # Expect GPIO on claim/complete +# .4byte 0x0, 0x00000200, readmip_test # Expect high on MEIP and SEIP from UART +# .4byte PLIC_INTPENDING0, 0x00000200, read32_test # interrupt pending for GPIO and UART +# .4byte 0x0, 0x0000000A, readmclaimcomplete_test # Expect UART on claim/complete +# .4byte 0x0, 0x00000000, readmip_test # all interrupts were cleared +# .4byte PLIC_INTPENDING0, 0x00000000, read32_test # Pending should also be clear + +# Test 4.1: UART m-mode disabled + + + +# Test 4.2: GPIO s-mode disabled + + + +# Test 4.3: UART s-mode disabled + + + +# Test 4.4: GPIO and UART s-mode disabled + + + +# Test 4.5: GPIO and UART m-mode disabled + + + +# Test 4.6: GPIO and UART fully disabled + +.4byte 0x0, 0x0, terminate_test # terminate tests diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S new file mode 100644 index 000000000..08b1dc25e --- /dev/null +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-uart-01.S @@ -0,0 +1,140 @@ +/////////////////////////////////////////// +// +// WALLY-uart +// +// Author: David_Harris@hmc.edu and Nicholas Lucio +// +// Created 2022-06-16 +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, +// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT +// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////// + +#include "WALLY-TEST-LIB-32.h" + +INIT_TESTS + +TRAP_HANDLER m + +j run_test_loop // begin test loop/table tests instead of executing inline code. + +INIT_TEST_TABLE + +END_TESTS + +TEST_STACK_AND_DATA + +.align 2 + +.equ UART, 0x10000000 +.equ UART_RBR, (UART) +.equ UART_THR, (UART) +.equ UART_IER, (UART+0x01) +.equ UART_IIR, (UART+0x02) +.equ UART_FCR, (UART+0x02) +.equ UART_LCR, (UART+0x03) +.equ UART_MCR, (UART+0x04) +.equ UART_LSR, (UART+0x05) +.equ UART_MSR, (UART+0x06) +.equ UART_Scr, (UART+0x07) + +test_cases: +# --------------------------------------------------------------------------------------------- +# Test Contents +# +# Here is where the actual tests are held, or rather, what the actual tests do. +# each entry consists of 3 values that will be read in as follows: +# +# '.4byte [x28 Value], [x29 Value], [x30 value]' +# or +# '.4byte [address], [value], [test type]' +# +# The encoding for x30 test type values can be found in the test handler in the framework file +# +# --------------------------------------------------------------------------------------------- + +# =========== UART resets to correct values on master reset =========== + +.4byte UART_IER, 0x00, read08_test +.4byte UART_IIR, 0x01, read08_test # IIR resets to 1 +# .4byte UART_LCR, 0x00, read08_test *** commented out because LCR should reset to zero but resets to 3 +.4byte UART_MCR, 0x00, read08_test +.4byte UART_LSR, 0x60, read08_test # LSR resets with transmit status bits set +.4byte UART_MSR, 0x00, read04_test + +# =========== Basic read-write =========== + +.4byte UART_LCR, 0x00, write08_test # set LCR to reset value *** remove if UART resets to correct value +.4byte UART_MCR, 0x10, write08_test # put UART into loopback for MSR test +.4byte UART_LSR, 0x60, read08_test +.4byte UART_THR, 0x00, write08_test # write value to UART +.4byte UART_LSR, 0x00, read08_test # data not ready and transmitter is not empty +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and LSR +.4byte UART_RBR, 0x00, read08_test # read written value +.4byte UART_LSR, 0x60, read08_test # read LSR + +# =========== Different size read-write =========== + +# Transmit 5 bits + +.4byte UART_LCR, 0x00, write08_test # set LCR to transmit 5 bits +.4byte UART_THR, 0x55, write08_test # write value to UART +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR +.4byte UART_RBR, 0x15, read08_test # read written value without bits 5-7 + +# Transmit 6 bits + +.4byte UART_LCR, 0x01, write08_test # set LCR to transmit six bits +.4byte UART_THR, 0xAA, write08_test # write value to UART +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR +.4byte UART_RBR, 0x2A, read08_test # read written value without bits 6 & 7 + +# Transmit 7 bits + +.4byte UART_LCR, 0x02, write08_test # set LCR to transmit seven bits +.4byte UART_THR, 0xFF, write08_test # write value to UART +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR +.4byte UART_RBR, 0x7F, read08_test # read written value without bit 7 + +# Transmit 8 bits + +.4byte UART_LCR, 0x03, write08_test # set LCR to transmit seven bits +.4byte UART_THR, 0x80, write08_test # write value to UART +.4byte 0x0, 0x0101, uart_data_wait # wait for data to become ready then output IIR and then LSR +.4byte UART_RBR, 0x80, read08_test # read full written value + sign extension + +# =========== Transmit-related interrupts =========== + +.4byte UART_IER, 0x07, write08_test # enable data available, buffer empty, and line status interrupts +.4byte UART_IIR, 0x02, read08_test # buffer should be empty, causing interrupt +.4byte UART_THR, 0x00, write08_test # write zeroes to transmitter +.4byte 0x0, 0x0401, uart_data_wait # IIR should have data ready interrupt +.4byte UART_THR, 0x01, write08_test # write 1 to transmitter buffer +.4byte UART_IIR, 0x04, read08_test # data interrupt should still be high +.4byte 0x0, 0x06, uart_lsr_intr_wait # wait for transmission to complete, IIR should throw error due to overrun error. +.4byte UART_LSR, 0x63, read08_test # read overrun error from LSR +.4byte UART_IIR, 0x04, read08_test # check that LSR interrupt was cleared +.4byte UART_RBR, 0x01, read08_test # read previous value from UART + +# =========== MODEM interrupts =========== + +.4byte UART_MSR, 0x00, write08_test # clear MSR +.4byte UART_IER, 0x08, write08_test # enable MODEM Status interrupts +.4byte UART_IIR, 0x01, read08_test # no interrupts pending +.4byte UART_MCR, 0x02, write08_test # Cause DCTS interrupt +.4byte UART_IIR, 0x00, read08_test # MODEM interrupt +.4byte UART_MSR, 0x11, read08_test # Read MSR to clear interrupt +.4byte UART_IIR, 0x01, read08_test # interrupt cleared by reading MSR + +.4byte 0x0, 0x0, terminate_test \ No newline at end of file From e2691c02b75b37815c62a4cc971206302e1290d8 Mon Sep 17 00:00:00 2001 From: cturek Date: Fri, 22 Jul 2022 16:45:19 +0000 Subject: [PATCH 36/36] Square root negative exponent handling --- pipelined/srt/sqrttestgen | Bin 22792 -> 22792 bytes pipelined/srt/sqrttestgen.c | 33 ++++++++++++++++++++++++--------- pipelined/srt/srt-waves.do | 1 + pipelined/srt/srt.sv | 22 ++++++++++++---------- pipelined/srt/testbench.sv | 8 ++++---- 5 files changed, 41 insertions(+), 23 deletions(-) diff --git a/pipelined/srt/sqrttestgen b/pipelined/srt/sqrttestgen index 45fc6e7868a87d7e5f3b9308e340b3600a392273..7c6efb9f9085d76783d4192e98ba9cd8c44b8d24 100755 GIT binary patch delta 1737 zcmY+FYfKzf6vyw~T^q z&i&tW&OLjR9lk_|FVSGs+36p|kV!+QwHtdatqe_tAad?tOwB zX?aW+a%MJbA=|;UOmB{tD3PfsyPYu`2o@6(7p1g?G;ZBPUeh!=Uqx1sXOY{HBgh@d zVdPHakC6`}UqtRg{xHRXp`++HopQ*1Jo_nxVGi#g1UUo|LL5dA&Tu%6Fvg)DVVpw` z!UTtI1l=$Gp~nMo<*o(Ov;fveVa-+hQ>vjpo5$hj!ck+YC5BioRp$hYwPJaQa4hWsnCfxMXF4MQv7h{y@kPRQh8cp2dr z+4Zc_Kp!lPvq{qoQ+QN{3af~mskHR zG~t4dfn&*<-qGNd8K0uwFk~o7omQh$iJ~dPO{e=8%>W@~;DL7PJa;zcUhCH?+$An{ zOg%H>uM~?0`NfG?#4ft>Jr1du_>>bnxP9``vKz|4Z<)r;gtp-NidJ{tG|dobixZ~V z1-2b5Kb;PB>0o!k?-T?37gz#am}WnS50{O>sVeZ2<_t*!vlG70zz46@8e3s*hEHb= z()N;ih@Oe@WM|mi<{|BZLzl zvZYxkuIak>8|^^dV7gOY*UZ%v+)b>seyg(`_kC<`_XZZD8?~PLM<`VQ^Xww&0PCST zquxA+n{1Lao43+VqpZ5gLuJ<1bQRVm&7S=b_nSIf1~$(by=IFR9Lw0qHml1 EKl{>MKmY&$ delta 1709 zcmZ9NZ){Ul6u|Fo_op4Nd#~F{-T!uO^RXi9oMf94tgdf+=swIq1rr?XOCvEc^@FJ~ zy3TCX)gjDW6XqAF`au?ASZJb)CTk~RC&r8!5~lc}L5OdHVfF(SEIiL`PaE(i@1FBJ z=iGblIrpZWIzy+<(3uNnkCo(x=fhv()~J|=sOZTlQ8q^}*9^&@PMkWl^zuzrR*_F54db&tX5p z0*Ac_3BPzVkq$x0^Y?T;2cR#2&ZC~lM}-r}k0CE3cOYLtZb4o{u0u{CS0QW27096t zG(1z1?|%k4jZCmTeZbG)Hxu|vtE#eGX@ zacd%?T>ZaSD*X~NYODY9&=(a=8-xsgRUAvG5oIl1ElSFo_8oZrqP)y+Bx7X9H@WK@ zA^C)or4w&wCGp5}aCmiE*-#@%ZHV32)KMAck2{iF;3w7guvWo2&;q+aQ08lHTYUfi z06V(W$oKIBe8sIhx<2jO;Ngo8Z1oqL7M%1O_J!cGC!r;Nu~SI}f4{yN&ZeYbvh=Z= zf^GA|210_Du~paoIj=Ux>MTukFYB|oUYNN*e;K5MYD%D*?y-+%emLt>BiFQHutNV5 zd{+VXON%2*g*Naj;B9^Btbd!>JjqKUVzY;>SZeL9l7slv4<6-m_t2JW>ckC`X1^WN zEkDt&@qcuE7J@w)T@Qc^faJ^DZ|iyjB=3I;r0K4%6DaT!yDL7qWf@j=f|Yp7B?*KZ zzN=t^=bIa?Fni%ku~F#}=LqcSgli>FS4!_1`YmO~2BT1JA`M_V4Br7ZCp8;LmYtWH zs!l^f7Z{7d{@5MqmcRm52is^=sT}KTI%+e^&@VpRY_+@JJa5TUTdeW%A88S7?CE4V0#_8>Ob72Bnr5U9e|_jDHfFEgjF$J~qQdf6yY Vq2eg}#Me`i!T%qq_lmFG_#ek*L> 1) + (`NE+2)'(`BIAS); + assign DExp = {2'b00, XExp} - {2'b00, YExp} + (`NE+2)'(`BIAS); + assign calcExp = Sqrt ? SExp[`NE-1:0] : DExp[`NE-1:0]; endmodule @@ -462,11 +462,13 @@ module srtpostproc( end assign floatRes = S[`DIVLEN] ? S[`DIVLEN:1] : S[`DIVLEN-1:0]; assign intRes = intS[`DIVLEN] ? intS[`DIVLEN:1] : intS[`DIVLEN-1:0]; - assign shiftRem = (intRem >>> (`DIVLEN - dur + 2)); - always_comb - if (Int) Result = intRes >> (`DIVLEN - dur); - else if (Mod) Result = shiftRem[`DIVLEN-1:0]; - else Result = floatRes; + assign shiftRem = (intRem >> (zeroCntD)); + always_comb begin + if (Int) begin + if (Mod) Result = shiftRem[`DIVLEN-1:0]; + else Result = intRes >> (`DIVLEN - dur); + end else Result = floatRes; + end assign calcSign = XSign ^ YSign; endmodule diff --git a/pipelined/srt/testbench.sv b/pipelined/srt/testbench.sv index 513305b26..1b40c673a 100644 --- a/pipelined/srt/testbench.sv +++ b/pipelined/srt/testbench.sv @@ -53,7 +53,7 @@ module testbench; // Test parameters parameter MEM_SIZE = 40000; - parameter MEM_WIDTH = 64+64+64+64; + parameter MEM_WIDTH = 64+64+64; // Test sizes `define memr 63:0 @@ -70,9 +70,9 @@ module testbench; integer testnum, errors; // Equip Int, Sqrt, or IntMod test - assign Int = 1'b1; + assign Int = 1'b0; assign Mod = 1'b0; - assign Sqrt = 1'b0; + assign Sqrt = 1'b1; // Divider srt srt(.clk, .Start(req), @@ -101,7 +101,7 @@ module testbench; begin testnum = 0; errors = 0; - $readmemh ("inttestvectors", Tests); + $readmemh ("sqrttestvectors", Tests); Vec = Tests[testnum]; a = Vec[`mema]; {asign, aExp, afrac} = a;