From 065f3f3f6df62d13b0cad84712b1942797c3d7b3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 12 Nov 2023 20:23:14 -0800 Subject: [PATCH 1/4] DivStickyM no longer mysteriously needs to be gated with SqrtM after divder improvemenst --- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index cb1f56db7..0b358909a 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -86,9 +86,10 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////// // If the result is not exact, the sticky should be set - assign DivStickyM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide +// assign DivStickyM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide + assign DivStickyM = ~WZeroM & ~(SpecialCaseM); - // Determine if sticky bit is negative // *** look for ways to optimize this. Shift shouldn't be needed. + // Determine if sticky bit is negative assign Sum = WC + WS; assign NegStickyM = Sum[P.DIVb+3]; mux2 #(P.DIVb+1) preummux(FirstU, FirstUM, NegStickyM, PreUmM); // Select U or U-1 depending on negative sticky bit From c44ae93e22aeb0842ed21e18208d52a43c04bdab Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 12 Nov 2023 20:23:27 -0800 Subject: [PATCH 2/4] DivStickyM no longer mysteriously needs to be gated with SqrtM after divder improvemenst --- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index 0b358909a..5a40a3bdc 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -86,8 +86,7 @@ module fdivsqrtpostproc import cvw::*; #(parameter cvw_t P) ( ////////////////////////// // If the result is not exact, the sticky should be set -// assign DivStickyM = ~WZeroM & ~(SpecialCaseM & SqrtM); // ***unsure why SpecialCaseM has to be gated by SqrtM, but otherwise fails regression on divide - assign DivStickyM = ~WZeroM & ~(SpecialCaseM); + assign DivStickyM = ~WZeroM & ~SpecialCaseM; // Determine if sticky bit is negative assign Sum = WC + WS; From 121f685fa27c451e535d8d25c65b23260470649c Mon Sep 17 00:00:00 2001 From: David Harris Date: Mon, 13 Nov 2023 07:23:15 -0800 Subject: [PATCH 3/4] Removed assign statement inside always block --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index e8a430a91..1e6eda56c 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -76,7 +76,7 @@ module fdivsqrtcycles import cvw::*; #(parameter cvw_t P) ( if (P.IDIV_ON_FPU) ResultBitsE = IntDivE ? IntResultBitsE : FPResultBitsE; else ResultBitsE = FPResultBitsE; - assign CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk) + CyclesE = (ResultBitsE-1)/(P.RK) + 1; // ceil (ResultBitsE/rk) end /* verilator lint_on WIDTH */ From 8ba0336c6f231eca478244f51121678786c0803d Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 14 Nov 2023 11:01:58 -0800 Subject: [PATCH 4/4] Removed unused addins, cleaned up configuration to support half precision on RV64gc, gate unused hazard inputs to reduce critical path in rv32e --- .gitmodules | 3 --- addins/embench-iot | 2 +- addins/riscv-arch-test | 2 +- addins/riscv-tests | 1 - config/rv64gc/config.vh | 2 +- src/hazard/hazard.sv | 37 +++++++++++++++++++++++++-------- src/wally/wallypipelinedcore.sv | 2 +- 7 files changed, 32 insertions(+), 17 deletions(-) delete mode 160000 addins/riscv-tests diff --git a/.gitmodules b/.gitmodules index 9a4c7fbb8..1e56898c8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -8,9 +8,6 @@ [submodule "addins/imperas-riscv-tests"] path = addins/imperas-riscv-tests url = https://github.com/riscv-ovpsim/imperas-riscv-tests -[submodule "addins/riscv-tests"] - path = addins/riscv-tests - url = https://github.com/riscv-software-src/riscv-tests [submodule "addins/riscv-dv"] path = addins/riscv-dv url = https://github.com/google/riscv-dv diff --git a/addins/embench-iot b/addins/embench-iot index 1480febc3..4c5eb8798 160000 --- a/addins/embench-iot +++ b/addins/embench-iot @@ -1 +1 @@ -Subproject commit 1480febc3ace5f471baeee4b1ae0d8fea16e4762 +Subproject commit 4c5eb87983f51ca7fcf7855306877b3d1c3aabf1 diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 197179fdc..2c5675d7a 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 197179fdc9dfeeca821e848f373c897a3fdae86c +Subproject commit 2c5675d7a58e98d47bef3a6cf5a8373397b0d0be diff --git a/addins/riscv-tests b/addins/riscv-tests deleted file mode 160000 index cf04274f5..000000000 --- a/addins/riscv-tests +++ /dev/null @@ -1 +0,0 @@ -Subproject commit cf04274f50621fd9ef9147793cca6dd1657985c7 diff --git a/config/rv64gc/config.vh b/config/rv64gc/config.vh index 8decf60d5..564b32f5d 100644 --- a/config/rv64gc/config.vh +++ b/config/rv64gc/config.vh @@ -42,7 +42,7 @@ localparam ZIFENCEI_SUPPORTED = 1; localparam COUNTERS = 12'd32; localparam ZICNTR_SUPPORTED = 1; localparam ZIHPM_SUPPORTED = 1; -localparam ZFH_SUPPORTED = 0; +localparam ZFH_SUPPORTED = 1; localparam SSTC_SUPPORTED = 1; localparam ZICBOM_SUPPORTED = 1; localparam ZICBOZ_SUPPORTED = 1; diff --git a/src/hazard/hazard.sv b/src/hazard/hazard.sv index cb70605c0..028dbf61d 100644 --- a/src/hazard/hazard.sv +++ b/src/hazard/hazard.sv @@ -26,7 +26,7 @@ // and limitations under the License. //////////////////////////////////////////////////////////////////////////////////////////////// -module hazard ( +module hazard import cvw::*; #(parameter cvw_t P) ( // Detect hazards input logic BPWrongE, CSRWriteFenceM, RetM, TrapM, input logic LoadStallD, StoreStallD, MDUStallD, CSRRdStallD, @@ -46,9 +46,28 @@ module hazard ( logic WFIStallM, WFIInterruptedM; + logic ValidWfiM, ValidTrapM, ValidRetM, ValidCSRWriteFenceM, ValidCSRRdStallD; + logic ValidFPUStallD, ValidFCvtIntStallD, ValidFDivBusyE, ValidMDUStallD, ValidDivBusyE; + + // Gate Stall/Flush sources with supported features + // This is not logically necessary because the original signals are already 0 when the feature is unsupported + // However, synthesis does not propagate the constant 0 across modules + // By gating these signals, synthesis eliminates unnecessary stall/flush logic, saving about 10% cycle time for rv32e + // These lines of code gating with a compile-time constant generate no hardware. + assign ValidWfiM = wfiM & P.ZICSR_SUPPORTED; + assign ValidTrapM = TrapM & P.ZICSR_SUPPORTED; + assign ValidRetM = RetM & P.ZICSR_SUPPORTED; + assign ValidCSRWriteFenceM = CSRWriteFenceM & P.ZICSR_SUPPORTED; + assign ValidCSRRdStallD = CSRRdStallD & P.ZICSR_SUPPORTED; + assign ValidFPUStallD = RetM & P.F_SUPPORTED; + assign ValidFCvtIntStallD = RetM & P.F_SUPPORTED; + assign ValidFDivBusyE = FDivBusyE & P.F_SUPPORTED; + assign ValidMDUStallD = MDUStallD & P.M_SUPPORTED; + assign ValidDivBusyE = DivBusyE & P.M_SUPPORTED; + // WFI logic - assign WFIStallM = wfiM & ~IntPendingM; // WFI waiting for an interrupt or timeout - assign WFIInterruptedM = wfiM & IntPendingM; // WFI detects a pending interrupt. Retire WFI; trap if interrupt is enabled. + assign WFIStallM = ValidWfiM & ~IntPendingM; // WFI waiting for an interrupt or timeout + assign WFIInterruptedM = ValidWfiM & IntPendingM; // WFI detects a pending interrupt. Retire WFI; trap if interrupt is enabled. // stalls and flushes // loads: stall for one cycle if the subsequent instruction depends on the load @@ -70,10 +89,10 @@ module hazard ( // Branch misprediction is found in the Execute stage and must flush the next two instructions. // However, an active division operation resides in the Execute stage, and when the BP incorrectly mispredicts the divide as a taken branch, the divde must still complete // When a WFI is interrupted and causes a trap, it flushes the rest of the pipeline but not the W stage, because the WFI needs to commit - assign FlushDCause = TrapM | RetM | CSRWriteFenceM | BPWrongE; - assign FlushECause = TrapM | RetM | CSRWriteFenceM |(BPWrongE & ~(DivBusyE | FDivBusyE)); - assign FlushMCause = TrapM | RetM | CSRWriteFenceM; - assign FlushWCause = TrapM & ~WFIInterruptedM; + assign FlushDCause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM | BPWrongE; + assign FlushECause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM |(BPWrongE & ~(ValidDivBusyE | ValidFDivBusyE)); + assign FlushMCause = ValidTrapM | ValidRetM | ValidCSRWriteFenceM; + assign FlushWCause = ValidTrapM & ~WFIInterruptedM; // Stall causes // Most data depenency stalls are identified in the decode stage @@ -84,8 +103,8 @@ module hazard ( // The IFU stalls the entire pipeline rather than just Fetch to avoid complications with instructions later in the pipeline causing Exceptions // A trap could be asserted at the start of a IFU/LSU stall, and should flush the memory operation assign StallFCause = '0; - assign StallDCause = (LoadStallD | StoreStallD | MDUStallD | CSRRdStallD | FCvtIntStallD | FPUStallD) & ~FlushDCause; - assign StallECause = (DivBusyE | FDivBusyE) & ~FlushECause; + assign StallDCause = (LoadStallD | StoreStallD | ValidMDUStallD | ValidCSRRdStallD | ValidFCvtIntStallD | ValidFPUStallD) & ~FlushDCause; + assign StallECause = (ValidDivBusyE | ValidFDivBusyE) & ~FlushECause; assign StallMCause = WFIStallM & ~FlushMCause; // Need to gate IFUStallF when the equivalent FlushFCause = FlushDCause = 1. // assign StallWCause = ((IFUStallF & ~FlushDCause) | LSUStallM) & ~FlushWCause; diff --git a/src/wally/wallypipelinedcore.sv b/src/wally/wallypipelinedcore.sv index 00b348660..46ffcac09 100644 --- a/src/wally/wallypipelinedcore.sv +++ b/src/wally/wallypipelinedcore.sv @@ -264,7 +264,7 @@ module wallypipelinedcore import cvw::*; #(parameter cvw_t P) ( end // global stall and flush control - hazard hzu( + hazard #(P) hzu( .BPWrongE, .CSRWriteFenceM, .RetM, .TrapM, .LoadStallD, .StoreStallD, .MDUStallD, .CSRRdStallD, .LSUStallM, .IFUStallF,