From 5e45fef8384fd810c75ed1cdb57830e14d46e0cf Mon Sep 17 00:00:00 2001 From: Miles Cook Date: Mon, 17 Apr 2023 18:35:03 -0700 Subject: [PATCH 01/35] Increase of TLB coverage in IFU --- tests/coverage/ifuCamlineWrite.S | 146 +++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 tests/coverage/ifuCamlineWrite.S diff --git a/tests/coverage/ifuCamlineWrite.S b/tests/coverage/ifuCamlineWrite.S new file mode 100644 index 000000000..4c11bf183 --- /dev/null +++ b/tests/coverage/ifuCamlineWrite.S @@ -0,0 +1,146 @@ +/////////////////////////////////////////// +// ifuCamlineWrite.S +// +// Written: Miles Cook and Kevin Box 4/17 +// +// Acknowledgements: The pagetable and outline for this test was written by Manuel Mendoza +// and Noah Limpert. +// +// Purpose: Test coverage for TLBCamlines in IFU +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// load code to initalize stack, handle interrupts, terminate + +#include "WALLY-init-lib.h" + +# run-elf.bash find this in project description +main: + # Page table root address at 0x80010000 + li t5, 0x9000000000080010 + csrw satp, t5 + + # switch to supervisor mode + li a0, 1 + ecall + + li t0, 0x80015000 # base addr + + li t2, 0 # i = 0 + li t3, 33 # Max amount of Loops = 32 + +loop: bge t2, t3, finished # exit loop if i >= loops + li t4, 0x1000 + li t1, 0x00008067 # load in jalr + sw t1, 0 (t0) + fence.I + jalr t0 + add t0, t0, t4 + addi t2, t2, 1 + j loop + +finished: + j done + +.data + +.align 16 +# Page table situated at 0x80010000 +pagetable: + .8byte 0x200044C1 // old page table was 200040 which just pointed to itself! wrong + +.align 12 + .8byte 0x0000000000000000 + .8byte 0x00000000200048C1 + .8byte 0x00000000200048C1 + + +.align 12 + .8byte 0x0000000020004CC1 + //.8byte 0x00000200800CF// ADD IN THE MEGAPAGE should 3 nibbles of zeros be removed? + +.align 12 + #80000000 + .8byte 0x200000CF + .8byte 0x200004CF + .8byte 0x200008CF + .8byte 0x20000CCF + + .8byte 0x200010CF + .8byte 0x200014CF + .8byte 0x200018CF + .8byte 0x20001CCF + + .8byte 0x200020CF + .8byte 0x200024CF + .8byte 0x200028CF + .8byte 0x20002CCF + + .8byte 0x200030CF + .8byte 0x200034CF + .8byte 0x200038CF + .8byte 0x20003CCF + + .8byte 0x200040CF + .8byte 0x200044CF + .8byte 0x200048CF + .8byte 0x20004CCF + + .8byte 0x200050CF + .8byte 0x200054CF + .8byte 0x200058CF + .8byte 0x20005CCF + + .8byte 0x200060CF + .8byte 0x200064CF + .8byte 0x200068CF + .8byte 0x20006CCF + + .8byte 0x200070CF + .8byte 0x200074CF + .8byte 0x200078CF + .8byte 0x20007CCF + + .8byte 0x200080CF + .8byte 0x200084CF + .8byte 0x200088CF + .8byte 0x20008CCF + + .8byte 0x200090CF + .8byte 0x200094CF + .8byte 0x200098CF + .8byte 0x20009CCF + + .8byte 0x200100CF + .8byte 0x200104CF + .8byte 0x200108CF + .8byte 0x20010CCF + + .8byte 0x200110CF + .8byte 0x200114CF + .8byte 0x200118CF + .8byte 0x20011CCF + + .8byte 0x200120CF + .8byte 0x200124CF + .8byte 0x200128CF + .8byte 0x20012CCF + + .8byte 0x200130CF + .8byte 0x200134CF From 914baf6bb1d030e406c0788db1028fb1bf5d332f Mon Sep 17 00:00:00 2001 From: Cedar Turek Date: Tue, 18 Apr 2023 15:14:17 -0700 Subject: [PATCH 02/35] moved D flop to preproc --- src/fpu/fdivsqrt/fdivsqrt.sv | 9 ++++----- src/fpu/fdivsqrt/fdivsqrtiter.sv | 6 +----- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 7 +++++-- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 92f64cbdc..e8708c6c4 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -57,7 +57,7 @@ module fdivsqrt( logic [`DIVb+3:0] WS, WC; // Partial remainder components logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend) - logic [`DIVb-1:0] DPreproc, D; // Iterator Divisor + logic [`DIVb-1:0] D; // Iterator Divisor logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values logic [`DIVb+1:0] FirstC; // Step tracker logic Firstun; // Quotient selection @@ -75,8 +75,7 @@ module fdivsqrt( fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E, - .QeM, .X, .DPreproc, + .Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E, .QeM, .X, .D, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, .nE, .BZeroM, .nM, .mM, .AM, @@ -90,8 +89,8 @@ module fdivsqrt( .IDivStartE, .ISpecialCaseE, .nE, .IntDivE); fdivsqrtiter fdivsqrtiter( // CSA Iterator - .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .DPreproc, - .D, .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC)); + .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, + .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC)); fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index ec15423e4..f3048c8b6 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -34,8 +34,7 @@ module fdivsqrtiter( input logic FDivBusyE, input logic SqrtE, input logic [`DIVb+3:0] X, - input logic [`DIVb-1:0] DPreproc, - output logic [`DIVb-1:0] D, + input logic [`DIVb-1:0] D, output logic [`DIVb:0] FirstU, FirstUM, output logic [`DIVb+1:0] FirstC, output logic Firstun, @@ -95,9 +94,6 @@ module fdivsqrtiter( mux2 #(`DIVb+2) cmux(C[`DIVCOPIES], initC, IFDivStartE, NextC); flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]); - // Divisior register - flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D); - // Divisor Selections // - choose the negitive version of what's being selected // - D is a 0.b mantissa diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index cf8a055ef..4af1d786c 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -39,7 +39,7 @@ module fdivsqrtpreproc ( input logic [2:0] Funct3E, output logic [`NE+1:0] QeM, output logic [`DIVb+3:0] X, - output logic [`DIVb-1:0] DPreproc, + output logic [`DIVb-1:0] D, // Int-specific input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic IntDivE, W64E, @@ -50,7 +50,7 @@ module fdivsqrtpreproc ( output logic [`XLEN-1:0] AM ); - logic [`DIVb-1:0] XPreproc; + logic [`DIVb-1:0] XPreproc, DPreproc; logic [`DIVb:0] PreSqrtX; logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [`NE+1:0] QeE; // Quotient Exponent (FP only) @@ -173,5 +173,8 @@ module fdivsqrtpreproc ( // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); + + // Divisior register + flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D); endmodule From b1dd1a627ffccb1fb6f75eb2bc31468b7e5a5a80 Mon Sep 17 00:00:00 2001 From: Cedar Turek Date: Tue, 18 Apr 2023 15:41:04 -0700 Subject: [PATCH 03/35] gave integer bits to D instead of adding manually everywhere --- src/fpu/fdivsqrt/fdivsqrt.sv | 2 +- src/fpu/fdivsqrt/fdivsqrtiter.sv | 11 ++++------- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 9 ++++----- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 10 ++++------ src/fpu/fdivsqrt/fdivsqrtstage2.sv | 5 ++--- src/fpu/fdivsqrt/fdivsqrtstage4.sv | 5 ++--- 6 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index e8708c6c4..1e05aee16 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -57,7 +57,7 @@ module fdivsqrt( logic [`DIVb+3:0] WS, WC; // Partial remainder components logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend) - logic [`DIVb-1:0] D; // Iterator Divisor + logic [`DIVb+3:0] D; // Iterator Divisor logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values logic [`DIVb+1:0] FirstC; // Step tracker logic Firstun; // Quotient selection diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index f3048c8b6..aeb4bcc4d 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -33,8 +33,7 @@ module fdivsqrtiter( input logic IFDivStartE, input logic FDivBusyE, input logic SqrtE, - input logic [`DIVb+3:0] X, - input logic [`DIVb-1:0] D, + input logic [`DIVb+3:0] X, D, output logic [`DIVb:0] FirstU, FirstUM, output logic [`DIVb+1:0] FirstC, output logic Firstun, @@ -95,12 +94,10 @@ module fdivsqrtiter( flopen #(`DIVb+2) creg(clk, FDivBusyE, NextC, C[0]); // Divisor Selections - // - choose the negitive version of what's being selected - // - D is a 0.b mantissa - assign DBar = {3'b111, 1'b0, ~D}; + assign DBar = ~D; // for -D if(`RADIX == 4) begin : d2 - assign DBar2 = {2'b11, 1'b0, ~D, 1'b1}; - assign D2 = {2'b0, 1'b1, D, 1'b0}; + assign D2 = D << 1; // for 2D, only used in R4 + assign DBar2 = ~D2; // for -2D, only used in R4 end // k=DIVCOPIES of the recurrence logic diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index b8575f7fe..1009cd227 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -32,7 +32,7 @@ module fdivsqrtpostproc( input logic clk, reset, input logic StallM, input logic [`DIVb+3:0] WS, WC, - input logic [`DIVb-1:0] D, + input logic [`DIVb+3:0] D, input logic [`DIVb:0] FirstU, FirstUM, input logic [`DIVb+1:0] FirstC, input logic SqrtE, @@ -46,7 +46,7 @@ module fdivsqrtpostproc( output logic [`XLEN-1:0] FIntDivResultM ); - logic [`DIVb+3:0] W, Sum, DM; + logic [`DIVb+3:0] W, Sum; logic [`DIVb:0] PreQmM; logic NegStickyM; logic weq0E, WZeroM; @@ -67,7 +67,7 @@ module fdivsqrtpostproc( assign FirstK = ({1'b1, FirstC} & ~({1'b1, FirstC} << 1)); assign FZeroSqrtE = {FirstUM[`DIVb], FirstUM, 2'b0} | {FirstK,1'b0}; // F for square root - assign FZeroDivE = {3'b001,D,1'b0}; // F for divide + assign FZeroDivE = D << 1; // F for divide mux2 #(`DIVb+4) fzeromux(FZeroDivE, FZeroSqrtE, SqrtE, FZeroE); csa #(`DIVb+4) fadd(WS, WC, FZeroE, 1'b0, WSF, WCF); // compute {WCF, WSF} = {WS + WC + FZero}; aplusbeq0 #(`DIVb+4) wcfpluswsfeq0(WCF, WSF, wfeq0E); @@ -102,11 +102,10 @@ module fdivsqrtpostproc( logic signed [`DIVb+3:0] PreResultM, PreIntResultM; assign W = $signed(Sum) >>> `LOGR; - assign DM = {4'b0001, D}; assign UnsignedQuotM = {3'b000, PreQmM}; // Integer remainder: sticky and sign correction muxes - mux2 #(`DIVb+4) normremdmux(W, W+DM, NegStickyM, NormRemDM); + mux2 #(`DIVb+4) normremdmux(W, W+D, NegStickyM, NormRemDM); mux2 #(`DIVb+4) normremsmux(NormRemDM, -NormRemDM, AsM, NormRemM); mux2 #(`DIVb+4) quotresmux(UnsignedQuotM, -UnsignedQuotM, NegQuotM, NormQuotM); diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 4af1d786c..9d375a267 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -38,8 +38,7 @@ module fdivsqrtpreproc ( input logic XZeroE, input logic [2:0] Funct3E, output logic [`NE+1:0] QeM, - output logic [`DIVb+3:0] X, - output logic [`DIVb-1:0] D, + output logic [`DIVb+3:0] X, D, // Int-specific input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic IntDivE, W64E, @@ -111,7 +110,9 @@ module fdivsqrtpreproc ( // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); assign DivX = {3'b000, ~NumerZeroE, XPreproc}; - // *** CT 4/13/23 Create D output here with leading 1 appended as well, use in the other modules + + // Divisior register + flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); // ***CT: factor out fdivsqrtcycles if (`IDIV_ON_FPU) begin:intrightshift // Int Supported @@ -173,8 +174,5 @@ module fdivsqrtpreproc ( // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); - - // Divisior register - flopen #(`DIVb) dreg(clk, IFDivStartE, DPreproc, D); endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/src/fpu/fdivsqrt/fdivsqrtstage2.sv index 53c1711cb..be62f8aa6 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -30,8 +30,7 @@ /* verilator lint_off UNOPTFLAT */ module fdivsqrtstage2 ( - input logic [`DIVb-1:0] D, - input logic [`DIVb+3:0] DBar, + input logic [`DIVb+3:0] D, DBar, input logic [`DIVb:0] U, UM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, @@ -66,7 +65,7 @@ module fdivsqrtstage2 ( always_comb if (up) Dsel = DBar; else if (uz) Dsel = '0; - else Dsel = {4'b0001, D}; // un + else Dsel = D; // un // Partial Product Generation // WSA, WCA = WS + WC - qD diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv index f2ff3734b..9464e6a88 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -29,8 +29,7 @@ `include "wally-config.vh" module fdivsqrtstage4 ( - input logic [`DIVb-1:0] D, - input logic [`DIVb+3:0] DBar, D2, DBar2, + input logic [`DIVb+3:0] D, DBar, D2, DBar2, input logic [`DIVb:0] U,UM, input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, @@ -75,7 +74,7 @@ module fdivsqrtstage4 ( 4'b1000: Dsel = DBar2; 4'b0100: Dsel = DBar; 4'b0000: Dsel = '0; - 4'b0010: Dsel = {3'b0, 1'b1, D}; + 4'b0010: Dsel = D; 4'b0001: Dsel = D2; default: Dsel = 'x; endcase From db0ca8695a8e7051a9454dc631c29d506cdd4f98 Mon Sep 17 00:00:00 2001 From: Kevin Thomas Date: Tue, 18 Apr 2023 17:57:56 -0500 Subject: [PATCH 04/35] Add PR#252 test file to coverage --- testbench/tests.vh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/testbench/tests.vh b/testbench/tests.vh index 6a0f80276..19adb818c 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -53,7 +53,8 @@ string tvpaths[] = '{ "lsu", "vm64check", "pmp", - "tlbKP" + "tlbKP", + "ifuCamlineWrite" }; string coremark[] = '{ From 49356aa4ca4553364bcaf51248c2872b5b13ce19 Mon Sep 17 00:00:00 2001 From: Cedar Turek Date: Tue, 18 Apr 2023 16:14:45 -0700 Subject: [PATCH 05/35] created fdivsqrtcycles, moved cycles calculation from FSM to preproc --- src/fpu/fdivsqrt/fdivsqrt.sv | 25 +++++----- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 76 +++++++++++++++++++++++++++++ src/fpu/fdivsqrt/fdivsqrtfsm.sv | 76 ++++++----------------------- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 16 +++--- 4 files changed, 114 insertions(+), 79 deletions(-) create mode 100644 src/fpu/fdivsqrt/fdivsqrtcycles.sv diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index 1e05aee16..f4d465012 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -57,42 +57,43 @@ module fdivsqrt( logic [`DIVb+3:0] WS, WC; // Partial remainder components logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend) - logic [`DIVb+3:0] D; // Iterator Divisor + logic [`DIVb+3:0] D; // Iterator Divisor logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values logic [`DIVb+1:0] FirstC; // Step tracker logic Firstun; // Quotient selection logic WZeroE; // Early termination flag + logic [`DURLEN-1:0] cycles; // FSM cycles logic SpecialCaseM; // Divide by zero, square root of negative, etc. logic DivStartE; // Enable signal for flops during stall // Integer div/rem signals logic BZeroM; // Denominator is zero logic IntDivM; // Integer operation - logic [`DIVBLEN:0] nE, nM, mM; // Shift amounts + logic [`DIVBLEN:0] nM, mM; // Shift amounts logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor logic [`XLEN-1:0] AM; // Original Numerator for postprocessor logic ISpecialCaseE; // Integer div/remainder special cases - fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor - .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .Fmt(FmtE), .Sqrt(SqrtE), .XZeroE, .Funct3E, .QeM, .X, .D, + fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor + .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), + .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, - .nE, .BZeroM, .nM, .mM, .AM, + .BZeroM, .nM, .mM, .AM, .IntDivM, .W64M, .NegQuotM, .ALTBM, .AsM); - fdivsqrtfsm fdivsqrtfsm( // FSM - .clk, .reset, .FmtE, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, + fdivsqrtfsm fdivsqrtfsm( // FSM + .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, - .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, + .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles, // Int-specific - .IDivStartE, .ISpecialCaseE, .nE, .IntDivE); + .IDivStartE, .ISpecialCaseE, .IntDivE); - fdivsqrtiter fdivsqrtiter( // CSA Iterator + fdivsqrtiter fdivsqrtiter( // CSA Iterator .clk, .IFDivStartE, .FDivBusyE, .SqrtE, .X, .D, .FirstU, .FirstUM, .FirstC, .Firstun, .FirstWS(WS), .FirstWC(WC)); - fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor + fdivsqrtpostproc fdivsqrtpostproc( // Postprocessor .clk, .reset, .StallM, .WS, .WC, .D, .FirstU, .FirstUM, .FirstC, .SqrtE, .Firstun, .SqrtM, .SpecialCaseM, .QmM, .WZeroE, .DivStickyM, diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv new file mode 100644 index 000000000..f1ad32cd8 --- /dev/null +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -0,0 +1,76 @@ +/////////////////////////////////////////// +// fdivsqrt.sv +// +// Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu +// Modified: 18 April 2022 +// +// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// +// Documentation: RISC-V System on Chip Design Chapter 13 +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module fdivsqrtcycles( + input logic [`FMTBITS-1:0] FmtE, + input logic SqrtE, + input logic IntDivE, + input logic [`DIVBLEN:0] nE, + output logic [`DURLEN-1:0] cycles +); + logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits + // DIVN = `NF+3 + // NS = NF + 1 + // N = NS or NS+2 for div/sqrt. + + /* verilator lint_off WIDTH */ + if (`FPSIZES == 1) + assign Nf = `NF; + else if (`FPSIZES == 2) + always_comb + case (FmtE) + 1'b0: Nf = `NF1; + 1'b1: Nf = `NF; + endcase + else if (`FPSIZES == 3) + always_comb + case (FmtE) + `FMT: Nf = `NF; + `FMT1: Nf = `NF1; + `FMT2: Nf = `NF2; + endcase + else if (`FPSIZES == 4) + always_comb + case(FmtE) + `S_FMT: Nf = `S_NF; + `D_FMT: Nf = `D_NF; + `H_FMT: Nf = `H_NF; + `Q_FMT: Nf = `Q_NF; + endcase + + always_comb begin + if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 + else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs + if (`IDIV_ON_FPU) cycles = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + end + /* verilator lint_on WIDTH */ + +endmodule \ No newline at end of file diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 0793346bf..5332087ad 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -29,32 +29,27 @@ `include "wally-config.vh" module fdivsqrtfsm( - input logic clk, - input logic reset, - input logic [`FMTBITS-1:0] FmtE, - input logic XInfE, YInfE, - input logic XZeroE, YZeroE, - input logic XNaNE, YNaNE, - input logic FDivStartE, IDivStartE, - input logic XsE, - input logic SqrtE, - input logic StallM, - input logic FlushE, - input logic WZeroE, - input logic IntDivE, - input logic [`DIVBLEN:0] nE, - input logic ISpecialCaseE, - output logic IFDivStartE, - output logic FDivBusyE, FDivDoneE, - output logic SpecialCaseM + input logic clk, reset, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic FDivStartE, IDivStartE, + input logic XsE, WZeroE, + input logic SqrtE, + input logic StallM, FlushE, + input logic IntDivE, + input logic ISpecialCaseE, + input logic [`DURLEN-1:0] cycles, + output logic IFDivStartE, + output logic FDivBusyE, FDivDoneE, + output logic SpecialCaseM ); typedef enum logic [1:0] {IDLE, BUSY, DONE} statetype; statetype state; - logic [`DURLEN-1:0] step; - logic [`DURLEN-1:0] cycles; logic SpecialCaseE, FSpecialCaseE; + logic [`DURLEN-1:0] step; // FDivStartE and IDivStartE come from fctrl, reflecitng the start of floating-point and possibly integer division assign IFDivStartE = (FDivStartE | (IDivStartE & `IDIV_ON_FPU)) & (state == IDLE) & ~StallM; @@ -67,47 +62,6 @@ module fdivsqrtfsm( else assign SpecialCaseE = FSpecialCaseE; flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc -// DIVN = `NF+3 -// NS = NF + 1 -// N = NS or NS+2 for div/sqrt. - -// *** CT 4/13/23 move cycles calculation back to preprocesor -/* verilator lint_off WIDTH */ - logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits - if (`FPSIZES == 1) - assign Nf = `NF; - else if (`FPSIZES == 2) - always_comb - case (FmtE) - 1'b0: Nf = `NF1; - 1'b1: Nf = `NF; - endcase - else if (`FPSIZES == 3) - always_comb - case (FmtE) - `FMT: Nf = `NF; - `FMT1: Nf = `NF1; - `FMT2: Nf = `NF2; - endcase - else if (`FPSIZES == 4) - always_comb - case(FmtE) - `S_FMT: Nf = `S_NF; - `D_FMT: Nf = `D_NF; - `H_FMT: Nf = `H_NF; - `Q_FMT: Nf = `Q_NF; - endcase - - - always_comb begin - if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 - else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (`IDIV_ON_FPU) cycles = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); - else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); - end - - /* verilator lint_on WIDTH */ - always_ff @(posedge clk) begin if (reset | FlushE) begin state <= #1 IDLE; diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 9d375a267..43a5e42b2 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -33,8 +33,8 @@ module fdivsqrtpreproc ( input logic IFDivStartE, input logic [`NF:0] Xm, Ym, input logic [`NE-1:0] Xe, Ye, - input logic [`FMTBITS-1:0] Fmt, - input logic Sqrt, + input logic [`FMTBITS-1:0] FmtE, + input logic SqrtE, input logic XZeroE, input logic [2:0] Funct3E, output logic [`NE+1:0] QeM, @@ -43,7 +43,8 @@ module fdivsqrtpreproc ( input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic IntDivE, W64E, output logic ISpecialCaseE, - output logic [`DIVBLEN:0] nE, nM, mM, + output logic [`DURLEN-1:0] cycles, + output logic [`DIVBLEN:0] nM, mM, output logic NegQuotM, ALTBM, IntDivM, W64M, output logic AsM, BZeroM, output logic [`XLEN-1:0] AM @@ -54,7 +55,7 @@ module fdivsqrtpreproc ( logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [`NE+1:0] QeE; // Quotient Exponent (FP only) logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input - logic [`DIVBLEN:0] mE, ell; // Leading zeros of inputs + logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division logic signedDiv; // signed division @@ -169,10 +170,13 @@ module fdivsqrtpreproc ( // Sqrt is initialized on step one as R(X-1), so depends on Radix if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; - mux2 #(`DIVb+4) prexmux(DivX, SqrtX, Sqrt, PreShiftX); + mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); // Floating-point exponent - fdivsqrtexpcalc expcalc(.Fmt, .Xe, .Ye, .Sqrt, .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); + fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); + + // Number of FSM cycles (to FSM) + fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles); endmodule From 1bdae2285d3276dd7e22b1c8e582c77d1326bb2f Mon Sep 17 00:00:00 2001 From: Kevin Wan Date: Tue, 18 Apr 2023 18:43:50 -0700 Subject: [PATCH 06/35] PMPCFG_ARRAY_REGW cases --- testbench/tests.vh | 2 ++ tests/coverage/pmpcfg.S | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 tests/coverage/pmpcfg.S diff --git a/testbench/tests.vh b/testbench/tests.vh index 6a0f80276..8e327cafb 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -53,7 +53,9 @@ string tvpaths[] = '{ "lsu", "vm64check", "pmp", + "pmpcfg", "tlbKP" + }; string coremark[] = '{ diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S new file mode 100644 index 000000000..bb6961526 --- /dev/null +++ b/tests/coverage/pmpcfg.S @@ -0,0 +1,33 @@ +// pmpcfg supplemental + +#include "WALLY-init-lib.h" +main: + li t0, 0x8800000000000000 + csrw pmpcfg2, t0 + li t0, 0x88000000000000 + csrw pmpcfg2, t0 + li t0, 0x880000000000 + csrw pmpcfg2, t0 + li t0, 0x8800000000 + csrw pmpcfg2, t0 + li t0, 0x88000000 + csrw pmpcfg2, t0 + li t0, 0x880000 + csrw pmpcfg2, t0 + li t0, 0x8800 + csrw pmpcfg2, t0 + li t0, 0x8800000000000000 + csrw pmpcfg0, t0 + li t0, 0x88000000000000 + csrw pmpcfg0, t0 + li t0, 0x880000000000 + csrw pmpcfg0, t0 + li t0, 0x8800000000 + csrw pmpcfg0, t0 + li t0, 0x88000000 + csrw pmpcfg0, t0 + li t0, 0x880000 + csrw pmpcfg0, t0 + li t0, 0x8800 + csrw pmpcfg0, t0 + j done From 771124e265e0ce6b4f9fe8861de861fb9df24a96 Mon Sep 17 00:00:00 2001 From: Kevin Wan Date: Tue, 18 Apr 2023 21:50:48 -0700 Subject: [PATCH 07/35] Completely covers all PMPCFG_ARRAY_REGW cases --- testbench/tests.vh | 4 +++- tests/coverage/pmpcfg.S | 8 ++++++- tests/coverage/pmpcfg1.S | 48 ++++++++++++++++++++++++++++++++++++++++ tests/coverage/pmpcfg2.S | 12 ++++++++++ tests/coverage/priv.S | 1 + 5 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 tests/coverage/pmpcfg1.S create mode 100644 tests/coverage/pmpcfg2.S diff --git a/testbench/tests.vh b/testbench/tests.vh index 8e327cafb..49c946802 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -54,8 +54,10 @@ string tvpaths[] = '{ "vm64check", "pmp", "pmpcfg", + "pmpcfg1", + "pmpcfg2", "tlbKP" - + }; string coremark[] = '{ diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S index bb6961526..d65f810e5 100644 --- a/tests/coverage/pmpcfg.S +++ b/tests/coverage/pmpcfg.S @@ -1,4 +1,7 @@ -// pmpcfg supplemental +// pmpcfg part 1 +// Kevin Wan, kewan@hmc.edu, 4/18/2023 +// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. +// See the next part in pmpcfg1.S #include "WALLY-init-lib.h" main: @@ -16,6 +19,8 @@ main: csrw pmpcfg2, t0 li t0, 0x8800 csrw pmpcfg2, t0 + li t0, 0x88 + csrw pmpcfg2, t0 li t0, 0x8800000000000000 csrw pmpcfg0, t0 li t0, 0x88000000000000 @@ -30,4 +35,5 @@ main: csrw pmpcfg0, t0 li t0, 0x8800 csrw pmpcfg0, t0 + j done diff --git a/tests/coverage/pmpcfg1.S b/tests/coverage/pmpcfg1.S new file mode 100644 index 000000000..96264c55f --- /dev/null +++ b/tests/coverage/pmpcfg1.S @@ -0,0 +1,48 @@ +// another set of pmpcfg tests. A new file is made because pmpcfg register fields are +// locked forever after writing 1 to the lock bit for the first time. + +// Kevin Wan, kewan@hmc.edu, 4/13/2023 +// This set tests locking the pmpXcfg fields in descending order again, without setting the TOR bits. +// for the other part of the tests, see pmpcfg.S + +#include "WALLY-init-lib.h" +main: + li t0, 0x800 + csrw pmpcfg0, t0 + li t0, 0x8000000 + csrw pmpcfg0, t0 + + li t0, 0x8000000000000000 + csrw pmpcfg2, t0 + li t0, 0x80000000000000 + csrw pmpcfg2, t0 + li t0, 0x800000000000 + csrw pmpcfg2, t0 + li t0, 0x8000000000 + csrw pmpcfg2, t0 + li t0, 0x80000000 + csrw pmpcfg2, t0 + li t0, 0x800000 + csrw pmpcfg2, t0 + li t0, 0x8000 + csrw pmpcfg2, t0 + li t0, 0x80 + csrw pmpcfg2, t0 + li t0, 0x8000000000000000 + csrw pmpcfg0, t0 + li t0, 0x80000000000000 + csrw pmpcfg0, t0 + li t0, 0x800000000000 + csrw pmpcfg0, t0 + li t0, 0x8000000000 + csrw pmpcfg0, t0 + li t0, 0x80000000 + csrw pmpcfg0, t0 + li t0, 0x800000 + csrw pmpcfg0, t0 + li t0, 0x8000 + csrw pmpcfg0, t0 + + + + j done \ No newline at end of file diff --git a/tests/coverage/pmpcfg2.S b/tests/coverage/pmpcfg2.S new file mode 100644 index 000000000..5966e3cdc --- /dev/null +++ b/tests/coverage/pmpcfg2.S @@ -0,0 +1,12 @@ +// pmpcfg part 3 +// Kevin Wan, kewan@hmc.edu, 4/18/2023 +// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. +// See the next part in pmpcfg1.S + +#include "WALLY-init-lib.h" +main: + li t0, 0x80 + csrw pmpcfg0, t0 + + + j done \ No newline at end of file diff --git a/tests/coverage/priv.S b/tests/coverage/priv.S index 94b7cd0ef..5e187866b 100644 --- a/tests/coverage/priv.S +++ b/tests/coverage/priv.S @@ -189,6 +189,7 @@ main: li t1, -1 csrw mcounteren, t1 + # Go to supervisor mode li a0, 1 ecall From b5a3ff2d2d6bdceca8388645b88c8e6e08de21c4 Mon Sep 17 00:00:00 2001 From: Kevin Wan Date: Tue, 18 Apr 2023 22:09:50 -0700 Subject: [PATCH 08/35] a --- tests/coverage/pmpcfg2.S | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 tests/coverage/pmpcfg2.S diff --git a/tests/coverage/pmpcfg2.S b/tests/coverage/pmpcfg2.S deleted file mode 100644 index 5966e3cdc..000000000 --- a/tests/coverage/pmpcfg2.S +++ /dev/null @@ -1,12 +0,0 @@ -// pmpcfg part 3 -// Kevin Wan, kewan@hmc.edu, 4/18/2023 -// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. -// See the next part in pmpcfg1.S - -#include "WALLY-init-lib.h" -main: - li t0, 0x80 - csrw pmpcfg0, t0 - - - j done \ No newline at end of file From d74768ce04293ccc3019719825c8f5c1e9704d4f Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 18 Apr 2023 23:06:52 -0700 Subject: [PATCH 09/35] Add test cases for pmpcfg.S --- tests/coverage/pmpcfg.S | 39 ++++++++++++++++++++++++++++++++ tests/coverage/pmpcfg1.S | 48 ++++++++++++++++++++++++++++++++++++++++ tests/coverage/pmpcfg2.S | 12 ++++++++++ 3 files changed, 99 insertions(+) create mode 100644 tests/coverage/pmpcfg.S create mode 100644 tests/coverage/pmpcfg1.S create mode 100644 tests/coverage/pmpcfg2.S diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S new file mode 100644 index 000000000..387a8a726 --- /dev/null +++ b/tests/coverage/pmpcfg.S @@ -0,0 +1,39 @@ +// pmpcfg part 1 +// Kevin Wan, kewan@hmc.edu, 4/18/2023 +// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. +// See the next part in pmpcfg1.S + +#include "WALLY-init-lib.h" +main: + li t0, 0x8800000000000000 + csrw pmpcfg2, t0 + li t0, 0x88000000000000 + csrw pmpcfg2, t0 + li t0, 0x880000000000 + csrw pmpcfg2, t0 + li t0, 0x8800000000 + csrw pmpcfg2, t0 + li t0, 0x88000000 + csrw pmpcfg2, t0 + li t0, 0x880000 + csrw pmpcfg2, t0 + li t0, 0x8800 + csrw pmpcfg2, t0 + li t0, 0x88 + csrw pmpcfg2, t0 + li t0, 0x8800000000000000 + csrw pmpcfg0, t0 + li t0, 0x88000000000000 + csrw pmpcfg0, t0 + li t0, 0x880000000000 + csrw pmpcfg0, t0 + li t0, 0x8800000000 + csrw pmpcfg0, t0 + li t0, 0x88000000 + csrw pmpcfg0, t0 + li t0, 0x880000 + csrw pmpcfg0, t0 + li t0, 0x8800 + csrw pmpcfg0, t0 + + j done \ No newline at end of file diff --git a/tests/coverage/pmpcfg1.S b/tests/coverage/pmpcfg1.S new file mode 100644 index 000000000..96264c55f --- /dev/null +++ b/tests/coverage/pmpcfg1.S @@ -0,0 +1,48 @@ +// another set of pmpcfg tests. A new file is made because pmpcfg register fields are +// locked forever after writing 1 to the lock bit for the first time. + +// Kevin Wan, kewan@hmc.edu, 4/13/2023 +// This set tests locking the pmpXcfg fields in descending order again, without setting the TOR bits. +// for the other part of the tests, see pmpcfg.S + +#include "WALLY-init-lib.h" +main: + li t0, 0x800 + csrw pmpcfg0, t0 + li t0, 0x8000000 + csrw pmpcfg0, t0 + + li t0, 0x8000000000000000 + csrw pmpcfg2, t0 + li t0, 0x80000000000000 + csrw pmpcfg2, t0 + li t0, 0x800000000000 + csrw pmpcfg2, t0 + li t0, 0x8000000000 + csrw pmpcfg2, t0 + li t0, 0x80000000 + csrw pmpcfg2, t0 + li t0, 0x800000 + csrw pmpcfg2, t0 + li t0, 0x8000 + csrw pmpcfg2, t0 + li t0, 0x80 + csrw pmpcfg2, t0 + li t0, 0x8000000000000000 + csrw pmpcfg0, t0 + li t0, 0x80000000000000 + csrw pmpcfg0, t0 + li t0, 0x800000000000 + csrw pmpcfg0, t0 + li t0, 0x8000000000 + csrw pmpcfg0, t0 + li t0, 0x80000000 + csrw pmpcfg0, t0 + li t0, 0x800000 + csrw pmpcfg0, t0 + li t0, 0x8000 + csrw pmpcfg0, t0 + + + + j done \ No newline at end of file diff --git a/tests/coverage/pmpcfg2.S b/tests/coverage/pmpcfg2.S new file mode 100644 index 000000000..5966e3cdc --- /dev/null +++ b/tests/coverage/pmpcfg2.S @@ -0,0 +1,12 @@ +// pmpcfg part 3 +// Kevin Wan, kewan@hmc.edu, 4/18/2023 +// locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. +// See the next part in pmpcfg1.S + +#include "WALLY-init-lib.h" +main: + li t0, 0x80 + csrw pmpcfg0, t0 + + + j done \ No newline at end of file From 9b72d6ac37c6cacd3eee7f42f061e8184cb324a3 Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 18 Apr 2023 23:15:47 -0700 Subject: [PATCH 10/35] Update tests.vh --- testbench/tests.vh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/testbench/tests.vh b/testbench/tests.vh index 6a0f80276..f777dbf17 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -53,7 +53,11 @@ string tvpaths[] = '{ "lsu", "vm64check", "pmp", - "tlbKP" + "tlbKP", + "pmpcfg", + "pmpcfg1", + "pmpcfg2" + }; string coremark[] = '{ From 9ef85c547b35c0cdae84631e5efbd5a1e3255cab Mon Sep 17 00:00:00 2001 From: Alec Vercruysse Date: Mon, 17 Apr 2023 14:12:58 -0700 Subject: [PATCH 11/35] fix unhit exclusion in fdivsqrtfsm --- sim/coverage-exclusions-rv64gc.do | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do index 754d57db6..41345e6e6 100644 --- a/sim/coverage-exclusions-rv64gc.do +++ b/sim/coverage-exclusions-rv64gc.do @@ -35,7 +35,7 @@ do GetLineNum.do coverage exclude -srcfile lzc.sv # FDIVSQRT has -coverage exclude -scope /core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state DONE->BUSY +coverage exclude -scope /dut/core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state DONE->BUSY ### Exclude D$ states and logic for the I$ instance # This is cleaner than trying to set an I$-specific pragma in cachefsm.sv (which would exclude it for the D$ instance too) From cd803bfa443c50739a363028448adbaf72375460 Mon Sep 17 00:00:00 2001 From: Alec Vercruysse Date: Wed, 19 Apr 2023 01:19:25 -0700 Subject: [PATCH 12/35] Cover CacheWay edge case: CacheDataMem we=1 while ce=0. This test basically triggers an i$ miss during a d$ (hit) store operation. It requires some tricky timing (e.g. a flushD right before the relevant store). I use a script to generate the test. --- testbench/tests.vh | 3 +- tests/coverage/dcache1.S | 83 +++++++++++++++++++++++++++++++++++++ tests/coverage/dcache1.py | 86 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 tests/coverage/dcache1.S create mode 100644 tests/coverage/dcache1.py diff --git a/testbench/tests.vh b/testbench/tests.vh index 6a0f80276..d2b8a9347 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -53,7 +53,8 @@ string tvpaths[] = '{ "lsu", "vm64check", "pmp", - "tlbKP" + "tlbKP", + "dcache1", }; string coremark[] = '{ diff --git a/tests/coverage/dcache1.S b/tests/coverage/dcache1.S new file mode 100644 index 000000000..4a9b3de15 --- /dev/null +++ b/tests/coverage/dcache1.S @@ -0,0 +1,83 @@ + #include "WALLY-init-lib.h" +main: + // start way test #1 + li t0, 0x80100000 +.align 6 + // i$ boundary, way test #1 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + sd zero, 0(t0) + sd zero, 0(t0) + .word 0x00000013 + .word 0x00000013 + // start way test #2 + li t0, 0x80101000 +.align 6 + // i$ boundary, way test #2 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + sd zero, 0(t0) + sd zero, 0(t0) + .word 0x00000013 + .word 0x00000013 + // start way test #3 + li t0, 0x80102000 +.align 6 + // i$ boundary, way test #3 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + sd zero, 0(t0) + sd zero, 0(t0) + .word 0x00000013 + .word 0x00000013 + // start way test #4 + li t0, 0x80103000 +.align 6 + // i$ boundary, way test #4 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + .word 0x00000013 + sd zero, 0(t0) + sd zero, 0(t0) + .word 0x00000013 + .word 0x00000013 + j done diff --git a/tests/coverage/dcache1.py b/tests/coverage/dcache1.py new file mode 100644 index 000000000..59259567b --- /dev/null +++ b/tests/coverage/dcache1.py @@ -0,0 +1,86 @@ +#################### +# dcache1.py +# +# Written: avercruysse@hmc.edu 18 April 2023 +# +# Purpose: Test Coverage for D$ +# (For each way, trigger a CacheDataMem write enable while chip enable is low) +# +# A component of the CORE-V-WALLY configurable RISC-V project. +# +# Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +# +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +# +# Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +# except in compliance with the License, or, at your option, the Apache License version 2.0. You +# may obtain a copy of the License at +# +# https://solderpad.org/licenses/SHL-2.1/ +# +# Unless required by applicable law or agreed to in writing, any work distributed under the +# License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied. See the License for the specific language governing permissions +# and limitations under the License. +################################################ + +import os + +test_name = "dcache1.S" +dcache_num_ways = 4 +dcache_way_size_in_bytes = 4096 +# warning i$ line size is not currently parameterized. + +# arbitrary start location of where I send stores to. +mem_start_addr = 0x80100000 + +# pointer to the start of unused memory (strictly increasing) +mem_addr = mem_start_addr + + +def wl(line="", comment=None, fname=test_name): + with open(fname, "a") as f: + instr = False if (":" in line or + ".align" in line or + "# include" in line) else True + indent = 6 if instr else 0 + comment = "// " + comment if comment is not None else "" + to_write = " " * indent + line + comment + "\n" + f.write(to_write) + + +def write_repro_instrs(): + """ + Assumes that the store location has been fetched to d$, and is in t0. + """ + for i in range(16): # write a whole cache set. + if i == 12: + wl('sd zero, 0(t0)') # D$ write to set PCM = PCF + 8 for proper alignment (stallD will happen). + elif i == 13: + # the store in question happens here, at adresses 0x34, 0x74 + wl('sd zero, 0(t0)') # it should hit this time + else: + # can't be a NOP or anything else that is encoded as compressed. + # this is because the branch predictor will use the wrong address + # so the IFU cache miss will come late. + wl('.word 0x00000013') # addi x0, x0, 0 (canonical NOP, uncompressed). + +if __name__ == "__main__": + if os.path.exists(test_name): + os.remove(test_name) + # os.rename(test_name, test_name + ".old") + wl(comment="This file is generated by dcache1.py (run that script manually)") + wl('#include "WALLY-init-lib.h"') + wl('main:') + + # excercise all 4 D$ ways. If they're not all full, it uses the first empty. + # So we are sure all 4 ways are exercised. + for i in range(dcache_num_ways): + wl(comment=f"start way test #{i+1}") + wl(f'li t0, {hex(mem_addr)}') + wl(f'.align 6') # start at i$ set boundary. 6 lsb bits are zero. + wl(comment=f"i$ boundary, way test #{i+1}") + write_repro_instrs() + mem_addr += dcache_way_size_in_bytes # so that we excercise a new D$ way. + + wl("j done") From b3a3af8ed364919db5b4c13fca6d66fde2e5d904 Mon Sep 17 00:00:00 2001 From: Alec Vercruysse Date: Wed, 19 Apr 2023 01:21:57 -0700 Subject: [PATCH 13/35] add D$ test case to trigger a FlushStage while SetDirtyWay=1 This hits some conditional coverage in each cacheway. A cache store hit happens at the same time as a StoreAmoMisalignedFault. --- testbench/tests.vh | 1 + tests/coverage/dcache2.S | 49 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 tests/coverage/dcache2.S diff --git a/testbench/tests.vh b/testbench/tests.vh index d2b8a9347..fd48d6dc5 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -55,6 +55,7 @@ string tvpaths[] = '{ "pmp", "tlbKP", "dcache1", + "dcache2" }; string coremark[] = '{ diff --git a/tests/coverage/dcache2.S b/tests/coverage/dcache2.S new file mode 100644 index 000000000..58f97a2e4 --- /dev/null +++ b/tests/coverage/dcache2.S @@ -0,0 +1,49 @@ +/////////////////////////////////////////// +// dcache2.S +// +// Written: avercruysse@hmc.edu 18 April 2023 +// +// Purpose: Test Coverage for D$ +// (for all 4 cache ways, trigger a FlushStage while SetDirtyWay=1) +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +#include "WALLY-init-lib.h" +main: + // way 0 + li t0, 0x80100770 + sd zero, 0(t0) + sd zero, 1(t0) + + // way 1 + li t0, 0x80101770 + sd zero, 0(t0) + sd zero, 1(t0) + + // way 2 + li t0, 0x80102770 + sd zero, 0(t0) + sd zero, 1(t0) + + // way 3 + li t0, 0x80103770 + sd zero, 0(t0) + sd zero, 1(t0) + + j done From de93bd6937d31362fc2ef286c6dc83037cd1d436 Mon Sep 17 00:00:00 2001 From: Alec Vercruysse Date: Wed, 19 Apr 2023 01:28:45 -0700 Subject: [PATCH 14/35] D$ scope-specific coverage exclusions (I$ logic that never fires) The InvalidateCache signal in the D$ is for I$ only, which causes some coverage issues that need exclusion. Another manual exclusion is due to the fact that D$ writeback, flush, write_line, or flush_writeback states can't be cancelled by a flush, so those transistions are excluded. There is some other small stuff to review (logic simplification, or an exclusion pragma if removing the redundent logic would make it harder to understand the code, as is the case in the FlushAdrCntEn assign statement, in my opinion). --- sim/coverage-exclusions-rv64gc.do | 15 ++++++++++++++- src/cache/cachefsm.sv | 8 +++++--- src/cache/cacheway.sv | 2 +- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do index 41345e6e6..38c04231c 100644 --- a/sim/coverage-exclusions-rv64gc.do +++ b/sim/coverage-exclusions-rv64gc.do @@ -52,7 +52,7 @@ set end [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag-end: icache case"] coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange $start-$end coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache WRITEBACKStatement"] # exclude Atomic Operation logic -coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache storeAMO"] -item e 1 -fecexprrow 6 +coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: cache AnyMiss"] -item e 1 -fecexprrow 6 coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache storeAMO1"] -item e 1 -fecexprrow 2-4 coverage exclude -scope /dut/core/ifu/bus/icache/icache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: icache AnyUpdateHit"] -item e 1 -fecexprrow 2 # cache write logic @@ -77,6 +77,19 @@ for {set i 0} {$i < $numcacheways} {incr i} { coverage exclude -scope /dut/core/ifu/bus/icache/icache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: icache SetValidEN"] -item e 1 -fecexprrow 4 } +## D$ Exclusions. +# InvalidateCache is I$ only: +coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: dcache InvalidateCheck"] -item b 2 +coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: dcache InvalidateCheck"] -item s 1 +coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: dcache CacheEn"] -item e 1 -fecexprrow 12 +coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -linerange [GetLineNum ../src/cache/cachefsm.sv "exclusion-tag: cache AnyMiss"] -item e 1 -fecexprrow 4 +set numcacheways 4 +for {set i 0} {$i < $numcacheways} {incr i} { + coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/CacheWays[$i] -linerange [GetLineNum ../src/cache/cacheway.sv "exclusion-tag: dcache invalidateway"] -item be 1 -fecexprrow 4 +} +# D$ writeback, flush, write_line, or flush_writeback states can't be cancelled by a flush +coverage exclude -scope /dut/core/lsu/bus/dcache/dcache/cachefsm -ftrans CurrState STATE_WRITEBACK->STATE_READY STATE_FLUSH->STATE_READY STATE_WRITE_LINE->STATE_READY STATE_FLUSH_WRITEBACK->STATE_READY + # Excluding peripherals as sources of instructions for the ifu coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker/adrdecs/clintdec diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 90d8eaad8..7cd8240c4 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -110,10 +110,10 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( always_comb begin NextState = STATE_READY; case (CurrState) // exclusion-tag: icache state-case - STATE_READY: if(InvalidateCache) NextState = STATE_READY; + STATE_READY: if(InvalidateCache) NextState = STATE_READY; // exclusion-tag: dcache InvalidateCheck else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; // exclusion-tag: icache FETCHStatement - else if(AnyMiss & LineDirty) NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement + else if(AnyMiss) /* & LineDirty */ NextState = STATE_WRITEBACK; // exclusion-tag: icache WRITEBACKStatement else NextState = STATE_READY; STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; else NextState = STATE_FETCH; @@ -160,6 +160,8 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( assign SelFlush = (CurrState == STATE_READY & FlushCache) | (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_WRITEBACK); + // coverage off -item e -fecexprrow 1 + // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck) assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) | (CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty); assign FlushWayCntEn = (CurrState == STATE_FLUSH & ~LineDirty) | @@ -181,6 +183,6 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( (CurrState == STATE_WRITE_LINE) | resetDelay; assign SelFetchBuffer = CurrState == STATE_WRITE_LINE | CurrState == STATE_READ_HOLD; - assign CacheEn = (~Stall | FlushCache | AnyMiss) | (CurrState != STATE_READY) | reset | InvalidateCache; + assign CacheEn = (~Stall | FlushCache | AnyMiss) | (CurrState != STATE_READY) | reset | InvalidateCache; // exclusion-tag: dcache CacheEn endmodule // cachefsm diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index 79ec65e64..368c7b587 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -155,7 +155,7 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, if (reset) ValidBits <= #1 '0; if(CacheEn) begin ValidWay <= #1 ValidBits[CacheSet]; - if(InvalidateCache) ValidBits <= #1 '0; + if(InvalidateCache) ValidBits <= #1 '0; // exclusion-tag: dcache invalidateway else if (SetValidEN) ValidBits[CacheSet] <= #1 SetValidWay; end end From faaf26655861423bb07180b21573f82217146c99 Mon Sep 17 00:00:00 2001 From: Alec Vercruysse Date: Wed, 19 Apr 2023 01:32:43 -0700 Subject: [PATCH 15/35] CacheFSM logic simplification for AMO operations Ran this by Ross. --- src/cache/cachefsm.sv | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 7cd8240c4..34f1778f5 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -69,7 +69,7 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( ); logic resetDelay; - logic AMO, StoreAMO; + logic StoreAMO; logic AnyUpdateHit, AnyHit; logic AnyMiss; logic FlushFlag; @@ -86,16 +86,15 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( statetype CurrState, NextState; - assign AMO = CacheAtomic[1] & (&CacheRW); - assign StoreAMO = AMO | CacheRW[0]; + assign StoreAMO = CacheRW[0]; // AMO operations assert CacheRW[0] - assign AnyMiss = (StoreAMO | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: icache storeAMO + assign AnyMiss = (StoreAMO | CacheRW[1]) & ~CacheHit & ~InvalidateCache; // exclusion-tag: cache AnyMiss assign AnyUpdateHit = (StoreAMO) & CacheHit; // exclusion-tag: icache storeAMO1 assign AnyHit = AnyUpdateHit | (CacheRW[1] & CacheHit); // exclusion-tag: icache AnyUpdateHit assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. - assign CacheAccess = (AMO | CacheRW[1] | CacheRW[0]) & CurrState == STATE_READY; // exclusion-tag: icache CacheW + assign CacheAccess = (|CacheRW) & CurrState == STATE_READY; // exclusion-tag: icache CacheW assign CacheMiss = CacheAccess & ~CacheHit; // special case on reset. When the fsm first exists reset the From 6e612a1693e682de51c3741400d13eb9f8b64e0e Mon Sep 17 00:00:00 2001 From: David Harris <74973295+davidharrishmc@users.noreply.github.com> Date: Wed, 19 Apr 2023 06:23:05 -0700 Subject: [PATCH 16/35] Update tests.vh Missing comma from merge --- testbench/tests.vh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testbench/tests.vh b/testbench/tests.vh index 93a406109..e2d4e5ad1 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -54,7 +54,7 @@ string tvpaths[] = '{ "vm64check", "pmp", "dcache1", - "dcache2" + "dcache2", "pmpcfg", "pmpcfg1", "pmpcfg2", From 4f57dca0dcf2da99990169788ea84c0ea982c4e4 Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 19 Apr 2023 11:58:22 -0700 Subject: [PATCH 17/35] Add pmpcfg test cases increasing IFU coverage --- tests/coverage/pmpcfg.S | 46 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S index 74181ab62..5b3e37b56 100644 --- a/tests/coverage/pmpcfg.S +++ b/tests/coverage/pmpcfg.S @@ -1,10 +1,52 @@ // pmpcfg part 1 // Kevin Wan, kewan@hmc.edu, 4/18/2023 +// Liam Chalk, lchalk@hmc.edu, 4/19/2023 // locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. // See the next part in pmpcfg1.S #include "WALLY-init-lib.h" main: + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00000017 + csrw pmpcfg0, t0 + + li t0, 0x90000000 + csrw pmpaddr2, t0 + li t0, 0x00000017 + csrw pmpcfg2, t0 + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00000017 + csrw pmpcfg1, t0 + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00000017 + csrw pmpcfg2, t0 + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00000017 + csrw pmpcfg3, t0 + + li t0, 0x90000000 + csrw pmpaddr1, t0 + li t0, 0x00000017 + csrw pmpcfg1, t0 + + li t0, 0x90000000 + csrw pmpaddr1, t0 + li t0, 0x00000017 + csrw pmpcfg2, t0 + + li t0, 0x90000000 + csrw pmpaddr1, t0 + li t0, 0x00000017 + csrw pmpcfg3, t0 + li t0, 0x8800000000000000 csrw pmpcfg2, t0 li t0, 0x88000000000000 @@ -36,6 +78,4 @@ main: li t0, 0x8800 csrw pmpcfg0, t0 - - j done - + j done \ No newline at end of file From ea9639435ec331c0058e5abd3df8b2e332c40992 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 19 Apr 2023 13:07:07 -0700 Subject: [PATCH 18/35] Added -fp flag to run arch64d/f tests in coverage --- sim/regression-wally | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sim/regression-wally b/sim/regression-wally index c70177206..fa112731a 100755 --- a/sim/regression-wally +++ b/sim/regression-wally @@ -28,6 +28,7 @@ regressionDir = os.path.dirname(os.path.abspath(__file__)) os.chdir(regressionDir) coverage = '-coverage' in sys.argv +fp = '-fp' in sys.argv TestCase = namedtuple("TestCase", ['name', 'variant', 'cmd', 'grepstr']) # name: the name of this test configuration (used in printing human-readable @@ -140,6 +141,9 @@ if (coverage): # delete all but 64gc tests when running coverage "arch64zi", "wally64a", "wally64periph", "wally64priv", "arch64zba", "arch64zbb", "arch64zbc", "arch64zbs", "imperas64f", "imperas64d", "imperas64c", "imperas64i"] + if (fp): + tests64gc.append("arch64f") + tests64gc.append("arch64d") coverStr = '-coverage' else: coverStr = '' From 65c04489f1dbe5b3998685ba51e34136d1093aa0 Mon Sep 17 00:00:00 2001 From: David Harris <74973295+davidharrishmc@users.noreply.github.com> Date: Thu, 20 Apr 2023 14:09:32 -0700 Subject: [PATCH 19/35] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9cb56de0d..ff76f72ff 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # core-v-wally Configurable RISC-V Processor -Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, and M extensions, FENCE.I, and the various privileged modes and CSRs. It is written in SystemVerilog. It passes the RISC-V Arch Tests and boots Linux on an FPGA. +Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, Q, M, and Zb* extensions, FENCE.I, and the various privileged modes and CSRs. It is written in SystemVerilog. It passes the RISC-V Arch Tests and boots Linux on an FPGA. ![Wally block diagram](wallyriscvTopAll.png) From 2bd8b65a2bb065b94061e06d67a2c421f57763b3 Mon Sep 17 00:00:00 2001 From: David Harris <74973295+davidharrishmc@users.noreply.github.com> Date: Thu, 20 Apr 2023 14:15:34 -0700 Subject: [PATCH 20/35] Update README.md --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ff76f72ff..b73aecdb8 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ # core-v-wally -Configurable RISC-V Processor -Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, Q, M, and Zb* extensions, FENCE.I, and the various privileged modes and CSRs. It is written in SystemVerilog. It passes the RISC-V Arch Tests and boots Linux on an FPGA. +Wally is a 5-stage pipelined processor configurable to support all the standard RISC-V options, incluidng RV32/64, A, C, F, D, Q, M, and Zb* extensions, virtual memory, PMP, and the various privileged modes and CSRs. It provides optional caches, branch prediction, and standard RISC-V peripherals (CLINT, PLIC, UART, GPIO). Wally is written in SystemVerilog. It passes the RISC-V Arch Tests and boots Linux on an FPGA. Configurations range from a minimal RV32E core to a fully featured RV64GC application processor. ![Wally block diagram](wallyriscvTopAll.png) Wally is described in an upcoming textbook, *RISC-V System-on-Chip Design*, by Harris, Stine, Thompson, and Harris. Users should follow the setup instructions below. A system administrator must install CAD tools using the directions further down. +# Verification + +Wally is presently at Technology Readiness Level 4, passing the RISC-V compatibility test suite and custom tests, and booting Linux in simulation and on an FPGA. See the [Test Plan](docs/testplan.md) for details. + # New User Setup New users may wish to do the following setup to access the server via a GUI and use a text editor. From 7ca44de126c1e3c3b2ff37b71f7d9f0c6b50eeff Mon Sep 17 00:00:00 2001 From: Noah Limpert Date: Thu, 20 Apr 2023 14:38:13 -0700 Subject: [PATCH 21/35] Commiting changes to add coverage to ASID, Global, Megapage size checks. --- testbench/tests.vh | 3 + tests/coverage/tlbASID.S | 133 ++++++++++++++++++++++++++++++++ tests/coverage/tlbGLB.S | 134 ++++++++++++++++++++++++++++++++ tests/coverage/tlbMP.S | 163 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 433 insertions(+) create mode 100644 tests/coverage/tlbASID.S create mode 100644 tests/coverage/tlbGLB.S create mode 100644 tests/coverage/tlbMP.S diff --git a/testbench/tests.vh b/testbench/tests.vh index e2d4e5ad1..b86756b4e 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -59,6 +59,9 @@ string tvpaths[] = '{ "pmpcfg1", "pmpcfg2", "tlbKP", + "tlbMP", + "tlbASID", + "tlbGLB", "ifuCamlineWrite" }; diff --git a/tests/coverage/tlbASID.S b/tests/coverage/tlbASID.S new file mode 100644 index 000000000..bf71c0491 --- /dev/null +++ b/tests/coverage/tlbASID.S @@ -0,0 +1,133 @@ +/////////////////////////////////////////// +// tlbASID.S +// +// Written: mmendozamanriquez@hmc.edu 4 April 2023 +// nlimpert@hmc.edu +// +// Purpose: Test coverage for LSU +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// load code to initalize stack, handle interrupts, terminate + +#include "WALLY-init-lib.h" + +# run-elf.bash find this in project description +main: + # Page table root address at 0x80010000 + li t5, 0x9000000000080080 // try making asid = 0. + csrw satp, t5 + + # sfence.vma x0, x0 + + # switch to supervisor mode + li a0, 1 + ecall + + li t0, 0xC0000000 + + li t2, 0 # i = 0 + li t5, 0 # j = 0 // now use as a counter for new asid loop + li t3, 32 # Max amount of Loops = 32 + +loop: bge t2, t3, nASID # exit loop if i >= loops + lw t1, 0(t0) + li t4, 0x1000 + add t0, t0, t4 + addi t2, t2, 1 + j loop + +nASID: bne t5, zero, finished + li a0, 3 // go + ecall + li t5, 0x9000100000080080 // try making asid = 1 + csrw satp, t5 + li a0, 1 + ecall + li t2, 0 + li t0, 0xC0000000 + li t5, 1 // make this not zero. + j loop + + +finished: + j done + +.data +.align 19 +# level 3 Page table situated at 0x8008 0000, should point to 8008,1000 +pagetable: + .8byte 0x200204C1 + +.align 12 // level 2 page table, contains direction to a gigapageg + .8byte 0x0 + .8byte 0x0 + .8byte 0x200000CF // gigapage that starts at 8000 0000 goes to C000 0000 + .8byte 0x200208C1 // pointer to next page table entry at 8008 2000 + +.align 12 // level 1 page table, points to level 0 page table + .8byte 0x20020CC1 + +.align 12 // level 0 page table, points to address C000 0000 // FOR NOW ALL OF THESE GO TO 8 instead of C cause they start with 2 + .8byte 0x200000CF // access xC000 0000 + .8byte 0x200004CF // access xC000 1000 + .8byte 0x200008CF // access xC000 2000 + .8byte 0x20000CCF // access xC000 3000 + + .8byte 0x200010CF // access xC000 4000 + .8byte 0x200014CF + .8byte 0x200018CF + .8byte 0x20001CCF + + .8byte 0x200020CF // access xC000 8000 + .8byte 0x200024CF + .8byte 0x200028CF + .8byte 0x20002CCF + + .8byte 0x200030CF // access xC000 C000 + .8byte 0x200034CF + .8byte 0x200038CF + .8byte 0x20003CCF + + .8byte 0x200040CF // access xC001 0000 + .8byte 0x200044CF + .8byte 0x200048CF + .8byte 0x20004CCF + + .8byte 0x200050CF // access xC001 4000 + .8byte 0x200054CF + .8byte 0x200058CF + .8byte 0x20005CCF + + .8byte 0x200060CF // access xC001 8000 + .8byte 0x200064CF + .8byte 0x200068CF + .8byte 0x20006CCF + + .8byte 0x200070CF // access xC001 C000 + .8byte 0x200074CF + .8byte 0x200078CF + .8byte 0x20007CCF + + .8byte 0x200080CF // access xC002 0000 + .8byte 0x200084CF + .8byte 0x200088CF + .8byte 0x20008CCF + + \ No newline at end of file diff --git a/tests/coverage/tlbGLB.S b/tests/coverage/tlbGLB.S new file mode 100644 index 000000000..77e3a31c1 --- /dev/null +++ b/tests/coverage/tlbGLB.S @@ -0,0 +1,134 @@ +/////////////////////////////////////////// +// tlbGLB.S +// +// Written: mmendozamanriquez@hmc.edu 4 April 2023 +// nlimpert@hmc.edu +// +// Purpose: coverage for the global check. +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + + +// load code to initalize stack, handle interrupts, terminate + +#include "WALLY-init-lib.h" + +# run-elf.bash find this in project description +main: + # Page table root address at 0x80010000 + li t5, 0x9000000000080080 // try making asid = 0. + csrw satp, t5 + + # sfence.vma x0, x0 + + # switch to supervisor mode + li a0, 1 + ecall + + li t0, 0xC0000000 + + li t2, 0 # i = 0 + li t5, 0 # j = 0 // now use as a counter for new asid loop + li t3, 32 # Max amount of Loops = 32 + +loop: bge t2, t3, nASID # exit loop if i >= loops + lw t1, 0(t0) + li t4, 0x1000 + add t0, t0, t4 + addi t2, t2, 1 + j loop + +nASID: bne t5, zero, finished + li a0, 3 // go + ecall + li t5, 0x9000100000080080 // try making asid = 1 + csrw satp, t5 + li a0, 1 + ecall + li t2, 0 + li t0, 0xC0000000 + li t5, 1 // make this not zero. + j loop + + +finished: + j done + +.data +.align 19 +# level 3 Page table situated at 0x8008 0000, should point to 8008,1000 +pagetable: + .8byte 0x200204C1 + +.align 12 // level 2 page table, contains direction to a gigapageg + .8byte 0x0 + .8byte 0x0 + .8byte 0x200000CF // gigapage that starts at 8000 0000 goes to C000 0000 + .8byte 0x200208C1 // pointer to next page table entry at 8008 2000 + +.align 12 // level 1 page table, points to level 0 page table + .8byte 0x20020CE1 + +.align 12 // level 0 page table, points to address C000 0000 // FOR NOW ALL OF THESE GO TO 8 instead of C cause they start with 2 + .8byte 0x200000CF // access xC000 0000 + .8byte 0x200004CF // access xC000 1000 + .8byte 0x200008CF // access xC000 2000 + .8byte 0x20000CCF // access xC000 3000 + + .8byte 0x200010EF // access xC000 4000 + .8byte 0x200014EF + .8byte 0x200018EF + .8byte 0x20001CEF + + .8byte 0x200020EF // access xC000 8000 + .8byte 0x200024EF + .8byte 0x200028EF + .8byte 0x20002CEF + + .8byte 0x200030EF // access xC000 C000 + .8byte 0x200034EF + .8byte 0x200038EF + .8byte 0x20003CEF + + .8byte 0x200040EF // access xC001 0000 + .8byte 0x200044EF + .8byte 0x200048EF + .8byte 0x20004CEF + + .8byte 0x200050EF // access xC001 4000 + .8byte 0x200054EF + .8byte 0x200058EF + .8byte 0x20005CEF + + .8byte 0x200060EF // access xC001 8000 + .8byte 0x200064EF + .8byte 0x200068EF + .8byte 0x20006CEF + + .8byte 0x200070EF // access xC001 C000 + .8byte 0x200074eF + .8byte 0x200078EF + .8byte 0x20007CEF + + .8byte 0x200080EF // access xC002 0000 + .8byte 0x200084EF + .8byte 0x200088EF + .8byte 0x20008CEF + + \ No newline at end of file diff --git a/tests/coverage/tlbMP.S b/tests/coverage/tlbMP.S new file mode 100644 index 000000000..6981d1f36 --- /dev/null +++ b/tests/coverage/tlbMP.S @@ -0,0 +1,163 @@ +/////////////////////////////////////////// +// tlbMP.S +// +// Written: mmendozamanriquez@hmc.edu 4 April 2023 +// nlimpert@hmc.edu +// +// Purpose: Test coverage for LSU +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// load code to initalize stack, handle interrupts, terminate + +#include "WALLY-init-lib.h" + +# run-elf.bash find this in project description +main: + # Page table root address at 0x80010000 + li t5, 0x9000000000080010 + csrw satp, t5 + + # sfence.vma x0, x0 + + # switch to supervisor mode + li a0, 1 + ecall + li t5, 0 + li t0, 0x84000000 // go to first megapage + li t4, 0x1000 // put this outside the loop. + li t2, 0 # i = 0 + li t3, 32 # Max amount of Loops = 16 + +loop: bge t2, t3, lKP # exit loop if i >= loops + lw t1, 0(t0) + add t0, t0, t4 + addi t2, t2, 1 + j loop + +lKP: bne t5, zero, finished + li t0, 0x80000000 + slli t4, t4, 9 + addi t5, t5, 1 + li t2, 0 + j loop + +finished: + j done + +.data + +.align 16 +# Page table situated at 0x80010000 +pagetable: + .8byte 0x200044C1 + +.align 12 + .8byte 0x00000000200048C1 + .8byte 0x00000000200048C1 + .8byte 0x00000000200048C1 + + +.align 12 // megapages starting at 8000 0000 going to 8480 0000 (32*2 MiB beyond that) + + .8byte 0x200000CF // access 8000,0000 + .8byte 0x200800CF // access 8020,0000 + .8byte 0x201000CF // acesss 8040,0000 + .8byte 0x201800CF // acesss 8060,0000 + + .8byte 0x202000CF // access 8080,0000 + .8byte 0x202800CF // access 80A0,0000 + .8byte 0x203000CF // access 80C0,0000 + .8byte 0x203800CF // access 80E0,0000 + + .8byte 0x204000CF // access 8100,0000 + .8byte 0x204800CF + .8byte 0x205000CF + .8byte 0x205800CF + + .8byte 0x206000CF // access 8180,0000 + .8byte 0x206800CF + .8byte 0x207000CF + .8byte 0x207800CF + + .8byte 0x208000CF // access 8200,0000 + .8byte 0x208800CF + .8byte 0x209000CF + .8byte 0x209800CF + + .8byte 0x20A000CF // access 8280,0000 + .8byte 0x20A800CF + .8byte 0x20B000CF + .8byte 0x20B800CF + + .8byte 0x20C000CF // access 8300,0000 + .8byte 0x20C800CF + .8byte 0x20D000CF + .8byte 0x20D800CF + + .8byte 0x20E000CF // access 8380,0000 + .8byte 0x20E800CF + .8byte 0x20F000CF + .8byte 0x20F800CF + + .8byte 0x20004CC1 + // Kilopage entry, for addresses from 8400, 0000 to 841F, FFFF + // point to ... + +.align 12 // should start at 84000000 + .8byte 0x210000CF + .8byte 0x210004CF + .8byte 0x210008CF + .8byte 0x21000CCF + + .8byte 0x210010CF + .8byte 0x210014CF + .8byte 0x210018CF + .8byte 0x21001CCF + + .8byte 0x210020CF + .8byte 0x210024CF + .8byte 0x210028CF + .8byte 0x21002CCF + + .8byte 0x210030CF + .8byte 0x210034CF + .8byte 0x210038CF + .8byte 0x21003CCF + + .8byte 0x210040CF + .8byte 0x210044CF + .8byte 0x210048CF + .8byte 0x21004CCF + + .8byte 0x210050CF + .8byte 0x210054CF + .8byte 0x210058CF + .8byte 0x21005CCF + + .8byte 0x210060CF + .8byte 0x210064CF + .8byte 0x210068CF + .8byte 0x21006CCF + + .8byte 0x210070CF + .8byte 0x210074CF + .8byte 0x210078CF + .8byte 0x21007CCF + From a0e71c26cbaae1cfcc11e0a55641bc1d6e8bd65b Mon Sep 17 00:00:00 2001 From: Noah Limpert Date: Thu, 20 Apr 2023 14:50:06 -0700 Subject: [PATCH 22/35] Add in a test that makes match 3 = 0 for all tlb lines --- testbench/tests.vh | 1 + tests/coverage/tlbM3.S | 155 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 tests/coverage/tlbM3.S diff --git a/testbench/tests.vh b/testbench/tests.vh index b86756b4e..a450b057d 100644 --- a/testbench/tests.vh +++ b/testbench/tests.vh @@ -60,6 +60,7 @@ string tvpaths[] = '{ "pmpcfg2", "tlbKP", "tlbMP", + "tlbM3", "tlbASID", "tlbGLB", "ifuCamlineWrite" diff --git a/tests/coverage/tlbM3.S b/tests/coverage/tlbM3.S new file mode 100644 index 000000000..ececa1f34 --- /dev/null +++ b/tests/coverage/tlbM3.S @@ -0,0 +1,155 @@ +/////////////////////////////////////////// +// tlbKP.S +// +// Written: mmendozamanriquez@hmc.edu 4 April 2023 +// nlimpert@hmc.edu +// +// Purpose: Test coverage for LSU +// +// A component of the CORE-V-WALLY configurable RISC-V project. +// +// Copyright (C) 2021-23 Harvey Mudd College & Oklahoma State University +// +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the “License”); you may not use this file +// except in compliance with the License, or, at your option, the Apache License version 2.0. You +// may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work distributed under the +// License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +//////////////////////////////////////////////////////////////////////////////////////////////// + +// load code to initalize stack, handle interrupts, terminate + +#include "WALLY-init-lib.h" + +# run-elf.bash find this in project description +main: + # Page table root address at 0x80010000 + li t5, 0x9000000000080010 + csrw satp, t5 + + # sfence.vma x0, x0 + + # switch to supervisor mode + li a0, 1 + ecall + + li t0, 0x1000 + + li t2, 0 # i = 0 + li t3, 64 # Max amount of Loops = 32 + li t4, 0x1000 + +loop: bge t2, t3, interim # exit loop if i >= loops + lw t1, 0(t0) + # sfence.vma x0, x0 + add t0, t0, t4 + addi t2, t2, 1 + j loop + +interim: + li t0, 0xFFFFFFFF000 + li t2, 0 # i = 0 + + +loop2:bge t2, t3, finished # exit loop if i >= loops + lw t1, 0(t0) + add t0, t0, t4 + addi t2, t2, 1 + j loop2 + +finished: + j done + +.data + +.align 16 +# Page table situated at 0x80010000 +pagetable: + .8byte 0x200044C1 // old page table was 200040 which just pointed to itself! wrong + +.align 12 + .8byte 0x00000000200048C1 + .8byte 0x00000000200048C1 + .8byte 0x00000000200048C1 + + +.align 12 + .8byte 0x0000000020004CC1 + //.8byte 0x00000200800CF// ADD IN THE MEGAPAGE should 3 nibbles of zeros be removed? + +.align 12 + #80000000 + .8byte 0x200000CF + .8byte 0x200004CF + .8byte 0x200008CF + .8byte 0x20000CCF + + .8byte 0x200010CF + .8byte 0x200014CF + .8byte 0x200018CF + .8byte 0x20001CCF + + .8byte 0x200020CF + .8byte 0x200024CF + .8byte 0x200028CF + .8byte 0x20002CCF + + .8byte 0x200030CF + .8byte 0x200034CF + .8byte 0x200038CF + .8byte 0x20003CCF + + .8byte 0x200040CF + .8byte 0x200044CF + .8byte 0x200048CF + .8byte 0x20004CCF + + .8byte 0x200050CF + .8byte 0x200054CF + .8byte 0x200058CF + .8byte 0x20005CCF + + .8byte 0x200060CF + .8byte 0x200064CF + .8byte 0x200068CF + .8byte 0x20006CCF + + .8byte 0x200070CF + .8byte 0x200074CF + .8byte 0x200078CF + .8byte 0x20007CCF + + .8byte 0x200080CF + .8byte 0x200084CF + .8byte 0x200088CF + .8byte 0x20008CCF + + .8byte 0x200090CF + .8byte 0x200094CF + .8byte 0x200098CF + .8byte 0x20009CCF + + .8byte 0x2000A0CF + .8byte 0x2000A4CF + .8byte 0x2000A8CF + .8byte 0x2000ACCF + + .8byte 0x2000B0CF + .8byte 0x2000B4CF + .8byte 0x2000B8CF + .8byte 0x2000BCCF + + .8byte 0x2000C0CF + .8byte 0x2000C4CF + .8byte 0x2000C8CF + .8byte 0x2000CCCF + + .8byte 0x2000D0CF + .8byte 0x2000D4CF From f2ae770e1783a770948ed4b0b44521644400974c Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 20 Apr 2023 16:24:58 -0700 Subject: [PATCH 23/35] Fmv h/q comments in controller --- src/fpu/fctrl.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index b9584bc9e..206cefbb4 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -138,10 +138,10 @@ module fctrl ( endcase 7'b11100??: if (Funct3D == 3'b001 & Rs2D == 5'b00000) ControlsD = `FCTRLW'b0_1_10_00_000_0_0_0; // fclass - else if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w / fmv.x.d to int register - 7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000) - ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w.x / fmv.d.x to fp reg + else if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) + ControlsD = `FCTRLW'b0_1_11_00_000_0_0_0; // fmv.x.w/d/h/q fp to int register + 7'b111100?: if (Funct3D == 3'b000 & Rs2D == 5'b00000 & SupportedFmt) + ControlsD = `FCTRLW'b1_0_00_00_011_0_0_0; // fmv.w/d/h/q.x int to fp reg 7'b0100000: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b00) ControlsD = `FCTRLW'b1_0_01_00_000_0_0_0; // fcvt.s.(d/q/h) 7'b0100001: if (Rs2D[4:2] == 3'b000 & SupportedFmt2 & Rs2D[1:0] != 2'b01) From 2c47268f50c159d78451973711208d2a840e1a83 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 20 Apr 2023 16:25:19 -0700 Subject: [PATCH 24/35] Started fdivsqrtpreproc flow organization --- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 4 +- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 57 ++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index f1ad32cd8..4025a30cb 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -1,10 +1,10 @@ /////////////////////////////////////////// -// fdivsqrt.sv +// fdivsqrtcycles.sv // // Written: David_Harris@hmc.edu, me@KatherineParry.com, cturek@hmc.edu, amaiuolo@hmc.edu // Modified: 18 April 2022 // -// Purpose: Combined Divide and Square Root Floating Point and Integer Unit +// Purpose: Determine number of cycles for divsqrt // // Documentation: RISC-V System on Chip Design Chapter 13 // diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 43a5e42b2..04739ee88 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -63,6 +63,10 @@ module fdivsqrtpreproc ( logic AsE, BsE; // Signs of integer inputs logic [`XLEN-1:0] AE; // input A after W64 adjustment + ////////////////////////////////////////////////////// + // Integer Preprocessing + ////////////////////////////////////////////////////// + if (`IDIV_ON_FPU) begin:intpreproc // Int Supported logic [`XLEN-1:0] BE, PosA, PosB; @@ -90,13 +94,17 @@ module fdivsqrtpreproc ( // Select integer or floating point inputs mux2 #(`DIVb) ifxmux({Xm, {(`DIVb-`NF-1){1'b0}}}, {PosA, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFX); mux2 #(`DIVb) ifdmux({Ym, {(`DIVb-`NF-1){1'b0}}}, {PosB, {(`DIVb-`XLEN){1'b0}}}, IntDivE, IFD); - - + mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE); end else begin // Int not supported assign IFX = {Xm, {(`DIVb-`NF-1){1'b0}}}; assign IFD = {Ym, {(`DIVb-`NF-1){1'b0}}}; + assign NumerZeroE = XZeroE; end + ////////////////////////////////////////////////////// + // Integer & FP leading zero and normalization shift + ////////////////////////////////////////////////////// + // count leading zeros for Subnorm FP and to normalize integer inputs lzc #(`DIVb) lzcX (IFX, ell); lzc #(`DIVb) lzcY (IFD, mE); @@ -105,17 +113,10 @@ module fdivsqrtpreproc ( assign XPreproc = (IFX << ell) << 1; assign DPreproc = (IFD << mE) << 1; - // append leading 1 (for nonzero inputs) - // shift square root to be in range [1/4, 1) - // Normalized numbers are shifted right by 1 if the exponent is odd - // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. - mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); - assign DivX = {3'b000, ~NumerZeroE, XPreproc}; + ////////////////////////////////////////////////////// + // Integer Right Shift to digit boundary + ////////////////////////////////////////////////////// - // Divisior register - flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); - - // ***CT: factor out fdivsqrtcycles if (`IDIV_ON_FPU) begin:intrightshift // Int Supported logic [`DIVBLEN:0] ZeroDiff, p; logic ALTBE; @@ -146,11 +147,6 @@ module fdivsqrtpreproc ( assign DivXShifted = DivX; end /* verilator lint_on WIDTH */ - - // Selet integer or floating-point operands - mux2 #(1) numzmux(XZeroE, AZeroE, IntDivE, NumerZeroE); - mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); - // pipeline registers flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); @@ -163,14 +159,39 @@ module fdivsqrtpreproc ( if (`XLEN==64) flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); end else begin - assign NumerZeroE = XZeroE; assign X = PreShiftX; + assign ISpecialCaseE = 0; end + ////////////////////////////////////////////////////// + // Floating-Point Preprocessing + // append leading 1 (for nonzero inputs) + // shift square root to be in range [1/4, 1) + // Normalized numbers are shifted right by 1 if the exponent is odd + // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. + ////////////////////////////////////////////////////// + + mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); + assign DivX = {3'b000, ~NumerZeroE, XPreproc}; + // Sqrt is initialized on step one as R(X-1), so depends on Radix if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); + + ////////////////////////////////////////////////////// + // Selet integer or floating-point operands + ////////////////////////////////////////////////////// + + mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + + // Divisior register + flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); + + + + + // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); From 33c0f644576cb74df808b78abdf98f83a3ab7b71 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 20 Apr 2023 16:38:47 -0700 Subject: [PATCH 25/35] Reordered fdivsqrtpreproc to follow logic --- src/fpu/fdivsqrt/fdivsqrt.sv | 6 ++-- src/fpu/fdivsqrt/fdivsqrtcycles.sv | 6 ++-- src/fpu/fdivsqrt/fdivsqrtfsm.sv | 4 +-- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 53 ++++++++++++++++------------- 4 files changed, 37 insertions(+), 32 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index f4d465012..f7a443639 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -62,7 +62,7 @@ module fdivsqrt( logic [`DIVb+1:0] FirstC; // Step tracker logic Firstun; // Quotient selection logic WZeroE; // Early termination flag - logic [`DURLEN-1:0] cycles; // FSM cycles + logic [`DURLEN-1:0] CyclesE; // FSM cycles logic SpecialCaseM; // Divide by zero, square root of negative, etc. logic DivStartE; // Enable signal for flops during stall @@ -76,7 +76,7 @@ module fdivsqrt( fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), - .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .cycles, + .FmtE, .SqrtE, .XZeroE, .Funct3E, .QeM, .X, .D, .CyclesE, // Int-specific .ForwardedSrcAE, .ForwardedSrcBE, .IntDivE, .W64E, .ISpecialCaseE, .BZeroM, .nM, .mM, .AM, @@ -85,7 +85,7 @@ module fdivsqrt( fdivsqrtfsm fdivsqrtfsm( // FSM .clk, .reset, .XInfE, .YInfE, .XZeroE, .YZeroE, .XNaNE, .YNaNE, .FDivStartE, .XsE, .SqrtE, .WZeroE, .FlushE, .StallM, - .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .cycles, + .FDivBusyE, .IFDivStartE, .FDivDoneE, .SpecialCaseM, .CyclesE, // Int-specific .IDivStartE, .ISpecialCaseE, .IntDivE); diff --git a/src/fpu/fdivsqrt/fdivsqrtcycles.sv b/src/fpu/fdivsqrt/fdivsqrtcycles.sv index 4025a30cb..2e17cc25b 100644 --- a/src/fpu/fdivsqrt/fdivsqrtcycles.sv +++ b/src/fpu/fdivsqrt/fdivsqrtcycles.sv @@ -33,7 +33,7 @@ module fdivsqrtcycles( input logic SqrtE, input logic IntDivE, input logic [`DIVBLEN:0] nE, - output logic [`DURLEN-1:0] cycles + output logic [`DURLEN-1:0] CyclesE ); logic [`DURLEN+1:0] Nf, fbits; // number of fractional bits // DIVN = `NF+3 @@ -68,8 +68,8 @@ module fdivsqrtcycles( always_comb begin if (SqrtE) fbits = Nf + 2 + 2; // Nf + two fractional bits for round/guard + 2 for right shift by up to 2 else fbits = Nf + 2 + `LOGR; // Nf + two fractional bits for round/guard + integer bits - try this when placing results in msbs - if (`IDIV_ON_FPU) cycles = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); - else cycles = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + if (`IDIV_ON_FPU) CyclesE = IntDivE ? ((nE + 1)/`DIVCOPIES) : (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); + else CyclesE = (fbits + (`LOGR*`DIVCOPIES)-1)/(`LOGR*`DIVCOPIES); end /* verilator lint_on WIDTH */ diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 5332087ad..75010f74c 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -39,7 +39,7 @@ module fdivsqrtfsm( input logic StallM, FlushE, input logic IntDivE, input logic ISpecialCaseE, - input logic [`DURLEN-1:0] cycles, + input logic [`DURLEN-1:0] CyclesE, output logic IFDivStartE, output logic FDivBusyE, FDivDoneE, output logic SpecialCaseM @@ -67,7 +67,7 @@ module fdivsqrtfsm( state <= #1 IDLE; end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE // end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE - step <= cycles; + step <= CyclesE; if (SpecialCaseE) state <= #1 DONE; else state <= #1 BUSY; end else if (state == BUSY) begin diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 04739ee88..a63fad82c 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -43,7 +43,7 @@ module fdivsqrtpreproc ( input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B input logic IntDivE, W64E, output logic ISpecialCaseE, - output logic [`DURLEN-1:0] cycles, + output logic [`DURLEN-1:0] CyclesE, output logic [`DIVBLEN:0] nM, mM, output logic NegQuotM, ALTBM, IntDivM, W64M, output logic AsM, BZeroM, @@ -62,6 +62,7 @@ module fdivsqrtpreproc ( logic NegQuotE; // Integer quotient is negative logic AsE, BsE; // Signs of integer inputs logic [`XLEN-1:0] AE; // input A after W64 adjustment + logic ALTBE; ////////////////////////////////////////////////////// // Integer Preprocessing @@ -113,13 +114,16 @@ module fdivsqrtpreproc ( assign XPreproc = (IFX << ell) << 1; assign DPreproc = (IFD << mE) << 1; + // *** CT: move to fdivsqrtintpreshift + ////////////////////////////////////////////////////// // Integer Right Shift to digit boundary + // Determine DivXShifted (X shifted to digit boundary) + // and nE (number of fractional digits) ////////////////////////////////////////////////////// if (`IDIV_ON_FPU) begin:intrightshift // Int Supported logic [`DIVBLEN:0] ZeroDiff, p; - logic ALTBE; // calculate number of fractional bits p assign ZeroDiff = mE - ell; // Difference in number of leading zeros @@ -129,37 +133,24 @@ module fdivsqrtpreproc ( // Integer special cases (terminate immediately) assign ISpecialCaseE = BZeroE | ALTBE; - /* verilator lint_off WIDTH */ // calculate number of fractional digits nE and right shift amount RightShiftX to complete in discrete number of steps if (`LOGRK > 0) begin // more than 1 bit per cycle logic [`LOGRK-1:0] IntTrunc, RightShiftX; logic [`DIVBLEN:0] TotalIntBits, IntSteps; - + /* verilator lint_off WIDTH */ assign TotalIntBits = `LOGR + p; // Total number of result bits (r integer bits plus p fractional bits) assign IntTrunc = TotalIntBits % `RK; // Truncation check for ceiling operator assign IntSteps = (TotalIntBits >> `LOGRK) + |IntTrunc; // Number of steps for int div assign nE = (IntSteps * `DIVCOPIES) - 1; // Fractional digits assign RightShiftX = `RK - 1 - ((TotalIntBits - 1) % `RK); // Right shift amount assign DivXShifted = DivX >> RightShiftX; // shift X by up to R*K-1 to complete in nE steps + /* verilator lint_on WIDTH */ end else begin // radix 2 1 copy doesn't require shifting assign nE = p; assign DivXShifted = DivX; end - /* verilator lint_on WIDTH */ - // pipeline registers - flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); - flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); - flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); - flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); - flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); - flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); - flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); - flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); - if (`XLEN==64) - flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); end else begin - assign X = PreShiftX; assign ISpecialCaseE = 0; end @@ -183,21 +174,35 @@ module fdivsqrtpreproc ( // Selet integer or floating-point operands ////////////////////////////////////////////////////// - mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + if (`IDIV_ON_FPU) begin + mux2 #(`DIVb+4) xmux(PreShiftX, DivXShifted, IntDivE, X); + end else begin + assign X = PreShiftX; + end // Divisior register flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); - - - - - // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); flopen #(`NE+2) expreg(clk, IFDivStartE, QeE, QeM); // Number of FSM cycles (to FSM) - fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .cycles); + fdivsqrtcycles cyclecalc(.FmtE, .SqrtE, .IntDivE, .nE, .CyclesE); + + if (`IDIV_ON_FPU) begin:intpipelineregs + // pipeline registers + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); + flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); + flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); + flopen #(1) asignreg(clk, IFDivStartE, AsE, AsM); + flopen #(`DIVBLEN+1) nreg(clk, IFDivStartE, nE, nM); + flopen #(`DIVBLEN+1) mreg(clk, IFDivStartE, mE, mM); + flopen #(`XLEN) srcareg(clk, IFDivStartE, AE, AM); + if (`XLEN==64) + flopen #(1) w64reg(clk, IFDivStartE, W64E, W64M); + end + endmodule From 86107e613667c6301c784d34ac26742c30fe2df7 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 20 Apr 2023 16:48:23 -0700 Subject: [PATCH 26/35] continued cleanup --- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index a63fad82c..b3c97c27f 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -55,10 +55,10 @@ module fdivsqrtpreproc ( logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [`NE+1:0] QeE; // Quotient Exponent (FP only) logic [`DIVb-1:0] IFX, IFD; // Correctly-sized inputs for iterator, selected from int or fp input - logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs + logic [`DIVBLEN:0] mE, nE, ell; // Leading zeros of inputs logic NumerZeroE; // Numerator is zero (X or A) logic AZeroE, BZeroE; // A or B is Zero for integer division - logic signedDiv; // signed division + logic SignedDivE; // signed division logic NegQuotE; // Integer quotient is negative logic AsE, BsE; // Signs of integer inputs logic [`XLEN-1:0] AE; // input A after W64 adjustment @@ -72,20 +72,20 @@ module fdivsqrtpreproc ( logic [`XLEN-1:0] BE, PosA, PosB; // Extract inputs, signs, zero, depending on W64 mode if applicable - assign signedDiv = ~Funct3E[0]; + assign SignedDivE = ~Funct3E[0]; // Source handling if (`XLEN==64) begin // 64-bit, supports W64 - mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & signedDiv}}, ForwardedSrcAE[31:0]}, W64E, AE); - mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & signedDiv}}, ForwardedSrcBE[31:0]}, W64E, BE); + mux2 #(64) amux(ForwardedSrcAE, {{32{ForwardedSrcAE[31] & SignedDivE}}, ForwardedSrcAE[31:0]}, W64E, AE); + mux2 #(64) bmux(ForwardedSrcBE, {{32{ForwardedSrcBE[31] & SignedDivE}}, ForwardedSrcBE[31:0]}, W64E, BE); end else begin // 32 bits only assign AE = ForwardedSrcAE; assign BE = ForwardedSrcBE; end assign AZeroE = ~(|AE); assign BZeroE = ~(|BE); - assign AsE = AE[`XLEN-1] & signedDiv; - assign BsE = BE[`XLEN-1] & signedDiv; + assign AsE = AE[`XLEN-1] & SignedDivE; + assign BsE = BE[`XLEN-1] & SignedDivE; assign NegQuotE = AsE ^ BsE; // Integer Quotient is negative // Force integer inputs to be postiive @@ -162,10 +162,10 @@ module fdivsqrtpreproc ( // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. ////////////////////////////////////////////////////// - mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); assign DivX = {3'b000, ~NumerZeroE, XPreproc}; // Sqrt is initialized on step one as R(X-1), so depends on Radix + mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); @@ -192,7 +192,7 @@ module fdivsqrtpreproc ( if (`IDIV_ON_FPU) begin:intpipelineregs // pipeline registers - flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); + flopen #(1) mdureg(clk, IFDivStartE, IntDivE, IntDivM); flopen #(1) altbreg(clk, IFDivStartE, ALTBE, ALTBM); flopen #(1) negquotreg(clk, IFDivStartE, NegQuotE, NegQuotM); flopen #(1) bzeroreg(clk, IFDivStartE, BZeroE, BZeroM); From 8a59a4ce94baa225c6851b60514cee1aecb2685d Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 20 Apr 2023 17:35:01 -0700 Subject: [PATCH 27/35] fdivsqrt cleanup --- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index b3c97c27f..3de4b252e 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -50,7 +50,7 @@ module fdivsqrtpreproc ( output logic [`XLEN-1:0] AM ); - logic [`DIVb-1:0] XPreproc, DPreproc; + logic [`DIVb-1:0] Xfract, Dfract; logic [`DIVb:0] PreSqrtX; logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed logic [`NE+1:0] QeE; // Quotient Exponent (FP only) @@ -111,8 +111,8 @@ module fdivsqrtpreproc ( lzc #(`DIVb) lzcY (IFD, mE); // Normalization shift: shift off leading one - assign XPreproc = (IFX << ell) << 1; - assign DPreproc = (IFD << mE) << 1; + assign Xfract = (IFX << ell) << 1; + assign Dfract = (IFD << mE) << 1; // *** CT: move to fdivsqrtintpreshift @@ -154,6 +154,8 @@ module fdivsqrtpreproc ( assign ISpecialCaseE = 0; end + // CT *** fdivsqrtfplead1 + ////////////////////////////////////////////////////// // Floating-Point Preprocessing // append leading 1 (for nonzero inputs) @@ -162,10 +164,10 @@ module fdivsqrtpreproc ( // Denormalized numbers have Xe = 0 and an unbiased exponent of 1-BIAS. They are shifted right if the number of leading zeros is odd. ////////////////////////////////////////////////////// - assign DivX = {3'b000, ~NumerZeroE, XPreproc}; + assign DivX = {3'b000, ~NumerZeroE, Xfract}; // Sqrt is initialized on step one as R(X-1), so depends on Radix - mux2 #(`DIVb+1) sqrtxmux({~XZeroE, XPreproc}, {1'b0, ~XZeroE, XPreproc[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); + mux2 #(`DIVb+1) sqrtxmux({~XZeroE, Xfract}, {1'b0, ~XZeroE, Xfract[`DIVb-1:1]}, (Xe[0] ^ ell[0]), PreSqrtX); if (`RADIX == 2) assign SqrtX = {3'b111, PreSqrtX}; else assign SqrtX = {2'b11, PreSqrtX, 1'b0}; mux2 #(`DIVb+4) prexmux(DivX, SqrtX, SqrtE, PreShiftX); @@ -181,7 +183,7 @@ module fdivsqrtpreproc ( end // Divisior register - flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, DPreproc}, D); + flopen #(`DIVb+4) dreg(clk, IFDivStartE, {4'b0001, Dfract}, D); // Floating-point exponent fdivsqrtexpcalc expcalc(.Fmt(FmtE), .Xe, .Ye, .Sqrt(SqrtE), .XZero(XZeroE), .ell, .m(mE), .Qe(QeE)); From c2f441724b1e3b165d6798d823b6c92a18dba451 Mon Sep 17 00:00:00 2001 From: Liam Date: Fri, 21 Apr 2023 20:43:37 -0700 Subject: [PATCH 28/35] pmpcfg test cases Increased IFU coverage from 83.37% to 83.53% and LSU coverage from 93.14% to 93.28%. --- tests/coverage/pmpcfg.S | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S index 5b3e37b56..fd838041d 100644 --- a/tests/coverage/pmpcfg.S +++ b/tests/coverage/pmpcfg.S @@ -1,6 +1,6 @@ // pmpcfg part 1 // Kevin Wan, kewan@hmc.edu, 4/18/2023 -// Liam Chalk, lchalk@hmc.edu, 4/19/2023 +// Liam Chalk, lchalk@hmc.edu, 4/21/2023 // locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. // See the next part in pmpcfg1.S @@ -19,32 +19,37 @@ main: li t0, 0x90000000 csrw pmpaddr0, t0 - li t0, 0x00000017 + li t0, 0x00001700 + csrw pmpcfg0, t0 + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00001700 csrw pmpcfg1, t0 li t0, 0x90000000 csrw pmpaddr0, t0 - li t0, 0x00000017 + li t0, 0x00001700 csrw pmpcfg2, t0 li t0, 0x90000000 csrw pmpaddr0, t0 - li t0, 0x00000017 + li t0, 0x00001700 csrw pmpcfg3, t0 li t0, 0x90000000 csrw pmpaddr1, t0 - li t0, 0x00000017 + li t0, 0x00001700 csrw pmpcfg1, t0 li t0, 0x90000000 - csrw pmpaddr1, t0 - li t0, 0x00000017 + csrw pmpaddr2, t0 + li t0, 0x00001700 csrw pmpcfg2, t0 li t0, 0x90000000 - csrw pmpaddr1, t0 - li t0, 0x00000017 + csrw pmpaddr3, t0 + li t0, 0x00001700 csrw pmpcfg3, t0 li t0, 0x8800000000000000 From 69cc0b8bf3ab3590ee98d5a7bbf897063e2899cc Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 22 Apr 2023 09:38:14 -0700 Subject: [PATCH 29/35] test plan update --- docs/testplans/testplan.md | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/testplans/testplan.md b/docs/testplans/testplan.md index a25b3a189..37390a632 100644 --- a/docs/testplans/testplan.md +++ b/docs/testplans/testplan.md @@ -1,6 +1,29 @@ -# CORE-V Wally Test Plan +# CORE-V Wally Design Verification Test Plan + +CORE-V Wally is functionally tested in the following ways. Each test is run in lock-step against ImperasDV to ensure all architectural state is correct after each instruction. + +| Functions | Coverage Method | Status | +| ----------- | ----------- |----| +| Instructions | riscv-arch-test | Pass | +| Privileged Unit | wally-riscv-arch-test | Pass | +| Virtual Memory | wally-riscv-arch-test | Pass | +| PMP | wally-riscv-arch-test | Pass +| Peripherals | wally-riscv-arch-test | Pass | +| Floating-Point | TestFloat | Pass | +| General | Code Coverage | 91% | +| General | Boot Linux in Sim | Pass | +| General | Boot Linux on FPGA | Pass | + + +The following performance validation is also run: +| Function | Method | Status | +| --- | --- | --- | +| Overall Performance | embench | Pass| +| Overall Performance | coremark | Pass | +| Branch Predictor | *** | Pass | +| Cache Miss Rate | *** | Pass | + -CORE-V Wally is tested in the following ways: * Run [RISC-V Architecture Compatibility Tests](https://github.com/riscv-non-isa/riscv-arch-test) in lock-step against the ImperasDV reference model. * Run custom tests to cover virtual memory, PMP, privileged unit, and peripherals in lock step against ImperasDV. From 063e41806ec40ceb3eca410d08499f90153310c5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 22 Apr 2023 10:07:48 -0700 Subject: [PATCH 30/35] Fixted syntax error in exclusion. Arbitrarily picked -e 1; fix if this isn't right --- src/cache/cachefsm.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 34f1778f5..544e3454e 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -159,7 +159,7 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( assign SelFlush = (CurrState == STATE_READY & FlushCache) | (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_WRITEBACK); - // coverage off -item e -fecexprrow 1 + // coverage off -item e 1 -fecexprrow 1 // (state is always FLUSH_WRITEBACK when FlushWayFlag & CacheBusAck) assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) | (CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty); From 086556310cdd61748a9a0ef9533fdebde28ea82e Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 22 Apr 2023 12:22:45 -0700 Subject: [PATCH 31/35] Attempted to cause interrupt during fdivsqrt. Fixed enabling fpu in fpu.S. Fdivsqrt exclusions for coverage. --- sim/coverage-exclusions-rv64gc.do | 7 +++++-- src/fpu/fdivsqrt/fdivsqrtfsm.sv | 3 ++- tests/coverage/WALLY-init-lib.h | 3 +++ tests/coverage/fpu.S | 24 ++++++++++++++++++++++-- 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do index 4f90333a9..45d98a726 100644 --- a/sim/coverage-exclusions-rv64gc.do +++ b/sim/coverage-exclusions-rv64gc.do @@ -31,11 +31,14 @@ do GetLineNum.do # LZA (i<64) statement confuses coverage tool -# This is ugly to exlcude the whole file - is there a better option? // coverage off isn't working +# DH 4/22/23: Exclude all LZAs coverage exclude -srcfile lzc.sv -# FDIVSQRT has +# DH 4/22/23: FDIVSQRT can't go directly from done to busy again coverage exclude -scope /dut/core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state DONE->BUSY +# DH 4/22/23: The busy->idle transition only occurs if a FlushE occurs while the divider is busy. The flush is caused by a trap or return, +# which won't happen while the divider is busy. +coverage exclude -scope /dut/core/fpu/fpu/fdivsqrt/fdivsqrtfsm -ftrans state BUSY->IDLE ### Exclude D$ states and logic for the I$ instance # This is cleaner than trying to set an I$-specific pragma in cachefsm.sv (which would exclude it for the D$ instance too) diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index 75010f74c..d1d9dda10 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -63,10 +63,11 @@ module fdivsqrtfsm( flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc always_ff @(posedge clk) begin + // coverage off: dh 4/22/23 FlushE doesn't seem to happen while fdivsqrt is busy if (reset | FlushE) begin + // coverage on state <= #1 IDLE; end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE -// end else if ((state == IDLE) & IFDivStartE) begin // IFDivStartE implies stat is IDLE step <= CyclesE; if (SpecialCaseE) state <= #1 DONE; else state <= #1 BUSY; diff --git a/tests/coverage/WALLY-init-lib.h b/tests/coverage/WALLY-init-lib.h index 6b6dd6dd9..ec179a0dd 100644 --- a/tests/coverage/WALLY-init-lib.h +++ b/tests/coverage/WALLY-init-lib.h @@ -63,6 +63,9 @@ trap_handler: bgez t0, exception # if msb is clear, it is an exception interrupt: # must be a timer interrupt + li t0, -1 # set mtimecmp to biggest number so it doesnt interrupt again + li t1, 0x02004000 # MTIMECMP in CLINT + sd t0, 0(t1) j trap_return # clean up and return exception: diff --git a/tests/coverage/fpu.S b/tests/coverage/fpu.S index b2a52be06..879980899 100644 --- a/tests/coverage/fpu.S +++ b/tests/coverage/fpu.S @@ -28,7 +28,7 @@ main: - #bseti t0, zero, 14 # turn on FPU + bseti t0, zero, 14 # turn on FPU csrs mstatus, t0 #Pull denormalized FP number from memory and pass it to fclass.S for coverage @@ -105,6 +105,25 @@ main: # fcvt.w.q a0, ft0 # fcvt.q.d ft3, ft0 + // fdivsqrt: test busy->idle transition caused by a FlushE while divider is busy (when interrupt arrives) + // This code doesn't actually trigger a busy->idle transition because the pending timer interrupt doesn't occur until the division finishes. + li t0, 0x3F812345 # random value slightly bigger than 1 + li t1, 0x3F823456 + fmv.w.x ft0, t0 # move int to fp register + fmv.w.x ft1, t1 + li t0, -1 # set mtimecmp to biggest number so it doesnt interrupt again + li t1, 0x02004000 # MTIMECMP in CLINT + sd t0, 0(t1) + csrsi mstatus, 0b1000 # enable interrupts with mstatus.MIE + li t1, 0x0200bff8 # read MTIME in CLINT + ld t0, 0(t1) + addi t0, t0, 11 + li t1, 0x02004000 # MTIMECMP in CLINT + sd t0, 0(t1) # write mtime+10 to cause interrupt soon This is very touchy timing and is sensitive to cache line fetch latency + nop + fdiv.s ft2, ft1, ft0 # should get interrupted, triggering a flush + csrci mstatus, 0b1000 # disable interrupts with mstatus.MIE + # Completing branch coverage in fctrl.sv .word 0x38007553 // Testing the all False case for 119 - funct7 under, op = 101 0011 .word 0x40000053 // Line 145 All False Test case - illegal instruction? @@ -145,4 +164,5 @@ TestData2: .word 0x7f800000 #INF .int 0xbf800000 #FP -1.0 .int 0x7fa00000 #SNaN -.int 0x3fffffff #OverFlow Test \ No newline at end of file +.int 0x3fffffff #OverFlow Test +DivTestData: From 3b299fb77ad56a0b8e9c8561eb26a9b4f47faba9 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 22 Apr 2023 15:27:05 -0700 Subject: [PATCH 32/35] Removed unproven fdivsqrt exclusion --- src/fpu/fdivsqrt/fdivsqrtfsm.sv | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/fpu/fdivsqrt/fdivsqrtfsm.sv b/src/fpu/fdivsqrt/fdivsqrtfsm.sv index d1d9dda10..ba0758ee6 100644 --- a/src/fpu/fdivsqrt/fdivsqrtfsm.sv +++ b/src/fpu/fdivsqrt/fdivsqrtfsm.sv @@ -63,9 +63,7 @@ module fdivsqrtfsm( flopenr #(1) SpecialCaseReg(clk, reset, IFDivStartE, SpecialCaseE, SpecialCaseM); // save SpecialCase for checking in fdivsqrtpostproc always_ff @(posedge clk) begin - // coverage off: dh 4/22/23 FlushE doesn't seem to happen while fdivsqrt is busy if (reset | FlushE) begin - // coverage on state <= #1 IDLE; end else if (IFDivStartE) begin // IFDivStartE implies stat is IDLE step <= CyclesE; From 52f49ed24d3237338050ed3c21aba64426a42dd6 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 22 Apr 2023 15:32:39 -0700 Subject: [PATCH 33/35] Fault on writes to odd-numbered PMPCFG in RV64 --- src/privileged/csrm.sv | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/privileged/csrm.sv b/src/privileged/csrm.sv index f0e5f00db..fb519be37 100644 --- a/src/privileged/csrm.sv +++ b/src/privileged/csrm.sv @@ -171,7 +171,8 @@ module csrm #(parameter IllegalCSRMAccessM = !(`S_SUPPORTED) & (CSRAdrM == MEDELEG | CSRAdrM == MIDELEG); // trap on DELEG register access when no S or N-mode if (CSRAdrM >= PMPADDR0 & CSRAdrM < PMPADDR0 + `PMP_ENTRIES) // reading a PMP entry CSRMReadValM = {{(`XLEN-(`PA_BITS-2)){1'b0}}, PMPADDR_ARRAY_REGW[CSRAdrM - PMPADDR0]}; - else if (CSRAdrM >= PMPCFG0 & CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4) begin + else if (CSRAdrM >= PMPCFG0 & CSRAdrM < PMPCFG0 + `PMP_ENTRIES/4 & (`XLEN==32 | CSRAdrM[0] == 0)) begin + // only odd-numbered PMPCFG entries exist in RV64 if (`XLEN==64) begin entry = ({CSRAdrM[11:1], 1'b0} - PMPCFG0)*4; // disregard odd entries in RV64 CSRMReadValM = {PMPCFG_ARRAY_REGW[entry+7],PMPCFG_ARRAY_REGW[entry+6],PMPCFG_ARRAY_REGW[entry+5],PMPCFG_ARRAY_REGW[entry+4], From d29dc30288fbeedbaed4389b8a460b81949c56c1 Mon Sep 17 00:00:00 2001 From: Diego Herrera Vicioso Date: Mon, 24 Apr 2023 02:06:53 -0700 Subject: [PATCH 34/35] Excluded coverage for impossible cases in wficountreg and status.MPRV --- sim/coverage-exclusions-rv64gc.do | 6 +++++- src/privileged/csrsr.sv | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sim/coverage-exclusions-rv64gc.do b/sim/coverage-exclusions-rv64gc.do index 45d98a726..b4441e5b1 100644 --- a/sim/coverage-exclusions-rv64gc.do +++ b/sim/coverage-exclusions-rv64gc.do @@ -127,4 +127,8 @@ coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$lin set line [GetLineNum ../src/mmu/pmachecker.sv "WriteAccessM \\| ExecuteAccessF"] coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-5 set line [GetLineNum ../src/mmu/pmachecker.sv "ReadAccessM \\| ExecuteAccessF"] -coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-3 \ No newline at end of file +coverage exclude -scope /dut/core/ifu/immu/immu/pmachecker -linerange $line-$line -item e 1 -fecexprrow 1-3 + +# Excluding reset and clear for impossible case in the wficountreg in privdec +set line [GetLineNum ../src/generic/flop/floprc.sv "reset \\| clear"] +coverage exclude -scope /dut/core/priv/priv/pmd/wfi/wficountreg -linerange $line-$line -item c 1 -feccondrow 2 diff --git a/src/privileged/csrsr.sv b/src/privileged/csrsr.sv index 60968a68b..61a6f3247 100644 --- a/src/privileged/csrsr.sv +++ b/src/privileged/csrsr.sv @@ -122,7 +122,10 @@ module csrsr ( logic [1:0] EndiannessPrivMode; always_comb begin if (SelHPTW) EndiannessPrivMode = `S_MODE; + //coverage off -item c 1 -feccondrow 1 + // status.MPRV always gets reset upon leaving machine mode, so MPRV will never be high when out of machine mode else if (PrivilegeModeW == `M_MODE & STATUS_MPRV) EndiannessPrivMode = STATUS_MPP; + //coverage on else EndiannessPrivMode = PrivilegeModeW; case (EndiannessPrivMode) From 7bf2ee54187dd13847794d6919f54b293da331c3 Mon Sep 17 00:00:00 2001 From: Liam Date: Tue, 25 Apr 2023 15:37:04 -0700 Subject: [PATCH 35/35] pmpaddr0 and pmpaddr2 test cases Writing 0x00170000 and 0x17000000 to pmpaddr0 and pmpaddr2. Increased IFU coverage from 83.53% to 83.68% and LSU coverage from 93.29% to 93.45%. --- tests/coverage/pmpcfg.S | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/coverage/pmpcfg.S b/tests/coverage/pmpcfg.S index fd838041d..bcc8f3950 100644 --- a/tests/coverage/pmpcfg.S +++ b/tests/coverage/pmpcfg.S @@ -1,6 +1,6 @@ // pmpcfg part 1 // Kevin Wan, kewan@hmc.edu, 4/18/2023 -// Liam Chalk, lchalk@hmc.edu, 4/21/2023 +// Liam Chalk, lchalk@hmc.edu, 4/25/2023 // locks each pmpXcfg bit field in order, from X = 15 to X = 0, with the A[1:0] field set to TOR. // See the next part in pmpcfg1.S @@ -52,6 +52,26 @@ main: li t0, 0x00001700 csrw pmpcfg3, t0 + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x00170000 + csrw pmpcfg0, t0 + + li t0, 0x90000000 + csrw pmpaddr2, t0 + li t0, 0x00170000 + csrw pmpcfg2, t0 + + li t0, 0x90000000 + csrw pmpaddr0, t0 + li t0, 0x17000000 + csrw pmpcfg0, t0 + + li t0, 0x90000000 + csrw pmpaddr2, t0 + li t0, 0x17000000 + csrw pmpcfg2, t0 + li t0, 0x8800000000000000 csrw pmpcfg2, t0 li t0, 0x88000000000000