From f6e52c7f0876ab91ac1bfef00bd4c1d82bb23d53 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 14 Jun 2022 22:04:38 +0000 Subject: [PATCH 01/26] Removed testbench.sv.bak --- pipelined/testbench/testbench.sv.bak | 473 --------------------------- 1 file changed, 473 deletions(-) delete mode 100644 pipelined/testbench/testbench.sv.bak diff --git a/pipelined/testbench/testbench.sv.bak b/pipelined/testbench/testbench.sv.bak deleted file mode 100644 index 8fdde9326..000000000 --- a/pipelined/testbench/testbench.sv.bak +++ /dev/null @@ -1,473 +0,0 @@ -/////////////////////////////////////////// -// testbench.sv -// -// Written: David_Harris@hmc.edu 9 January 2021 -// Modified: -// -// Purpose: Wally Testbench and helper modules -// Applies test programs from the riscv-arch-test and Imperas suites -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// MIT LICENSE -// Permission is hereby granted, free of charge, to any person obtaining a copy of this -// software and associated documentation files (the "Software"), to deal in the Software -// without restriction, including without limitation the rights to use, copy, modify, merge, -// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons -// to whom the Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or -// substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. -//////////////////////////////////////////////////////////////////////////////////////////////// - -`include "wally-config.vh" -`include "tests.vh" - -module testbench; - parameter TESTSPERIPH = 0; // set to 0 for regression - parameter TESTSPRIV = 0; // set to 0 for regression - parameter DEBUG=0; - parameter TEST="none"; - - logic clk; - logic reset_ext, reset; - - parameter SIGNATURESIZE = 5000000; - - int test, i, errors, totalerrors; - logic [31:0] sig32[0:SIGNATURESIZE]; - logic [`XLEN-1:0] signature[0:SIGNATURESIZE]; - logic [`XLEN-1:0] testadr; - string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; - logic [31:0] InstrW; - -string tests[]; -logic [3:0] dummy; - - string ProgramAddrMapFile, ProgramLabelMapFile; - logic [`AHBW-1:0] HRDATAEXT; - logic HREADYEXT, HRESPEXT; - logic [31:0] HADDR; - logic [`AHBW-1:0] HWDATA; - logic HWRITE; - logic [2:0] HSIZE; - logic [2:0] HBURST; - logic [3:0] HPROT; - logic [1:0] HTRANS; - logic HMASTLOCK; - logic HCLK, HRESETn; - logic [`XLEN-1:0] PCW; - - logic DCacheFlushDone, DCacheFlushStart; - - flopenr #(`XLEN) PCWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.PCM, PCW); - flopenr #(32) InstrWReg(clk, reset, ~dut.core.ieu.dp.StallW, dut.core.ifu.InstrM, InstrW); - - // check assertions for a legal configuration - riscvassertions riscvassertions(); - - // pick tests based on modes supported - initial begin - $display("TEST is %s", TEST); - //tests = '{}; - if (`XLEN == 64) begin // RV64 - case (TEST) - "arch64i": tests = arch64i; - "arch64priv": tests = arch64priv; - "arch64c": if (`C_SUPPORTED) - if (`ZICSR_SUPPORTED) tests = {arch64c, arch64cpriv}; - else tests = {arch64c}; - "arch64m": if (`M_SUPPORTED) tests = arch64m; - "arch64d": if (`D_SUPPORTED) tests = arch64d; - "imperas64i": tests = imperas64i; - "imperas64p": tests = imperas64p; -// "imperas64mmu": if (`VIRTMEM_SUPPORTED) tests = imperas64mmu; - "imperas64f": if (`F_SUPPORTED) tests = imperas64f; - "imperas64d": if (`D_SUPPORTED) tests = imperas64d; - "imperas64m": if (`M_SUPPORTED) tests = imperas64m; - "imperas64a": if (`A_SUPPORTED) tests = imperas64a; - "imperas64c": if (`C_SUPPORTED) tests = imperas64c; - else tests = imperas64iNOc; - "testsBP64": tests = testsBP64; - "wally64i": tests = wally64i; // *** redo - "wally64priv": tests = wally64priv;// *** redo - "imperas64periph": tests = imperas64periph; - "coremark": tests = coremark; - endcase - end else begin // RV32 - case (TEST) - "arch32i": tests = arch32i; - "arch32priv": tests = arch32priv; - "arch32c": if (`C_SUPPORTED) - if (`ZICSR_SUPPORTED) tests = {arch32c, arch32cpriv}; - else tests = {arch32c}; - "arch32m": if (`M_SUPPORTED) tests = arch32m; - "arch32f": if (`F_SUPPORTED) tests = arch32f; - "imperas32i": tests = imperas32i; - "imperas32p": tests = imperas32p; -// "imperas32mmu": if (`VIRTMEM_SUPPORTED) tests = imperas32mmu; - "imperas32f": if (`F_SUPPORTED) tests = imperas32f; - "imperas32m": if (`M_SUPPORTED) tests = imperas32m; - "imperas32a": if (`A_SUPPORTED) tests = imperas32a; - "imperas32c": if (`C_SUPPORTED) tests = imperas32c; - else tests = imperas32iNOc; - "wally32i": tests = wally32i; // *** redo - "wally32e": tests = wally32e; - "wally32priv": tests = wally32priv; // *** redo - "imperas32periph": tests = imperas32periph; - endcase - end - if (tests.size() == 0) begin - $display("TEST %s not supported in this configuration", TEST); - $stop; - end - end - - string signame, memfilename, pathname; - - logic [31:0] GPIOPinsIn, GPIOPinsOut, GPIOPinsEn; - logic UARTSin, UARTSout; - - logic SDCCLK; - logic SDCCmdIn; - logic SDCCmdOut; - logic SDCCmdOE; - logic [3:0] SDCDatIn; - - logic HREADY; - logic HSELEXT; - - - // instantiate device to be tested - assign GPIOPinsIn = 0; - assign UARTSin = 1; - assign HREADYEXT = 1; - assign HRESPEXT = 0; - assign HRDATAEXT = 0; - - wallypipelinedsoc dut(.clk, .reset_ext, .reset, .HRDATAEXT,.HREADYEXT, .HRESPEXT,.HSELEXT, - .HCLK, .HRESETn, .HADDR, .HWDATA, .HWRITE, .HSIZE, .HBURST, .HPROT, - .HTRANS, .HMASTLOCK, .HREADY, .TIMECLK(1'b0), .GPIOPinsIn, .GPIOPinsOut, .GPIOPinsEn, - .UARTSin, .UARTSout, .SDCCmdIn, .SDCCmdOut, .SDCCmdOE, .SDCDatIn, .SDCCLK); - - // Track names of instructions - instrTrackerTB it(clk, reset, dut.core.ieu.dp.FlushE, - dut.core.ifu.FinalInstrRawF[31:0], - dut.core.ifu.InstrD, dut.core.ifu.InstrE, - dut.core.ifu.InstrM, InstrW, - InstrFName, InstrDName, InstrEName, InstrMName, InstrWName); - - // initialize tests - localparam integer MemStartAddr = `RAM_BASE>>(1+`XLEN/32); - localparam integer MemEndAddr = (`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32); - - initial - begin - test = 1; - totalerrors = 0; - testadr = 0; - // fill memory with defined values to reduce Xs in simulation - // Quick note the memory will need to be initialized. The C library does not - // guarantee the initialized reads. For example a strcmp can read 6 byte - // strings, but uses a load double to read them in. If the last 2 bytes are - // not initialized the compare results in an 'x' which propagates through - // the design. - if (TEST == "coremark") - for (i=MemStartAddr; i= 128 | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be at least 128 when caches are enabled"); - assert (`DCACHE_LINELENINBITS < `DCACHE_WAYSIZEINBYTES*8) else $error("DCACHE_LINELENINBITS must be smaller than way size"); - assert (`ICACHE_WAYSIZEINBYTES <= 4096 | (`IMEM != `MEM_CACHE) | `VIRTMEM_SUPPORTED == 0) else $error("ICACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); - assert (`ICACHE_LINELENINBITS >= 32 | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be at least 32 when caches are enabled"); - assert (`ICACHE_LINELENINBITS < `ICACHE_WAYSIZEINBYTES*8) else $error("ICACHE_LINELENINBITS must be smaller than way size"); - assert (2**$clog2(`DCACHE_LINELENINBITS) == `DCACHE_LINELENINBITS | (`DMEM != `MEM_CACHE)) else $error("DCACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`DCACHE_WAYSIZEINBYTES) == `DCACHE_WAYSIZEINBYTES | (`DMEM != `MEM_CACHE)) else $error("DCACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ICACHE_LINELENINBITS) == `ICACHE_LINELENINBITS | (`IMEM != `MEM_CACHE)) else $error("ICACHE_LINELENINBITS must be a power of 2"); - assert (2**$clog2(`ICACHE_WAYSIZEINBYTES) == `ICACHE_WAYSIZEINBYTES | (`IMEM != `MEM_CACHE)) else $error("ICACHE_WAYSIZEINBYTES must be a power of 2"); - assert (2**$clog2(`ITLB_ENTRIES) == `ITLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("ITLB_ENTRIES must be a power of 2"); - assert (2**$clog2(`DTLB_ENTRIES) == `DTLB_ENTRIES | `VIRTMEM_SUPPORTED==0) else $error("DTLB_ENTRIES must be a power of 2"); - assert (`RAM_RANGE >= 56'h07FFFFFF) else $warning("Some regression tests will fail if RAM_RANGE is less than 56'h07FFFFFF"); - assert (`ZICSR_SUPPORTED == 1 | (`PMP_ENTRIES == 0 & `VIRTMEM_SUPPORTED == 0)) else $error("PMP_ENTRIES and VIRTMEM_SUPPORTED must be zero if ZICSR not supported."); - assert (`ZICSR_SUPPORTED == 1 | (`S_SUPPORTED == 0 & `U_SUPPORTED == 0)) else $error("S and U modes not supported if ZISR not supported"); - assert (`U_SUPPORTED | (`S_SUPPORTED == 0)) else $error ("S mode only supported if U also is supported"); -// assert (`MEM_DCACHE == 0 | `MEM_DTIM == 0) else $error("Can't simultaneously have a data cache and TIM"); - assert (`DMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs dcache"); - assert (`IMEM == `MEM_CACHE | `VIRTMEM_SUPPORTED ==0) else $error("Virtual memory needs icache"); - end -endmodule - - -/* verilator lint_on STMTDLY */ -/* verilator lint_on WIDTH */ - -module DCacheFlushFSM - (input logic clk, - input logic reset, - input logic start, - output logic done); - - genvar adr; - - logic [`XLEN-1:0] ShadowRAM[`RAM_BASE>>(1+`XLEN/32):(`RAM_RANGE+`RAM_BASE)>>1+(`XLEN/32)]; - - if(`DMEM == `MEM_CACHE) begin - localparam integer numlines = testbench.dut.core.lsu.bus.dcache.dcache.NUMLINES; - localparam integer numways = testbench.dut.core.lsu.bus.dcache.dcache.NUMWAYS; - localparam integer linebytelen = testbench.dut.core.lsu.bus.dcache.dcache.LINEBYTELEN; - localparam integer numwords = testbench.dut.core.lsu.bus.dcache.dcache.LINELEN/`XLEN; - localparam integer lognumlines = $clog2(numlines); - localparam integer loglinebytelen = $clog2(linebytelen); - localparam integer lognumways = $clog2(numways); - localparam integer tagstart = lognumlines + loglinebytelen; - - - - genvar index, way, cacheWord; - logic [`XLEN-1:0] CacheData [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic [`XLEN-1:0] CacheTag [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic CacheValid [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic CacheDirty [numways-1:0] [numlines-1:0] [numwords-1:0]; - logic [`PA_BITS-1:0] CacheAdr [numways-1:0] [numlines-1:0] [numwords-1:0]; - for(index = 0; index < numlines; index++) begin - for(way = 0; way < numways; way++) begin - for(cacheWord = 0; cacheWord < numwords; cacheWord++) begin - copyShadow #(.tagstart(tagstart), - .loglinebytelen(loglinebytelen)) - copyShadow(.clk, - .start, - .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.StoredData[index]), - .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), - .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), - .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].CacheDataMem.StoredData[index]), - .index(index), - .cacheWord(cacheWord), - .CacheData(CacheData[way][index][cacheWord]), - .CacheAdr(CacheAdr[way][index][cacheWord]), - .CacheTag(CacheTag[way][index][cacheWord]), - .CacheValid(CacheValid[way][index][cacheWord]), - .CacheDirty(CacheDirty[way][index][cacheWord])); - end - end - end - - integer i, j, k; - - always @(posedge clk) begin - if (start) begin #1 - #1 - for(i = 0; i < numlines; i++) begin - for(j = 0; j < numways; j++) begin - for(k = 0; k < numwords; k++) begin - if (CacheValid[j][i][k] & CacheDirty[j][i][k]) begin - ShadowRAM[CacheAdr[j][i][k] >> $clog2(`XLEN/8)] = CacheData[j][i][k]; - end - end - end - end - end - end - - - end - flop #(1) doneReg(.clk, .d(start), .q(done)); -endmodule - -module copyShadow - #(parameter tagstart, loglinebytelen) - (input logic clk, - input logic start, - input logic [`PA_BITS-1:tagstart] tag, - input logic valid, dirty, - input logic [`XLEN-1:0] data, - input logic [32-1:0] index, - input logic [32-1:0] cacheWord, - output logic [`XLEN-1:0] CacheData, - output logic [`PA_BITS-1:0] CacheAdr, - output logic [`XLEN-1:0] CacheTag, - output logic CacheValid, - output logic CacheDirty); - - - always_ff @(posedge clk) begin - if(start) begin - CacheTag = tag; - CacheValid = valid; - CacheDirty = dirty; - CacheData = data; - CacheAdr = (tag << tagstart) + (index << loglinebytelen) + (cacheWord << $clog2(`XLEN/8)); - end - end - -endmodule - From 1fab7605f52c4ba2c9858b2cdf840adb50f0c778 Mon Sep 17 00:00:00 2001 From: Daniel Torres Date: Tue, 21 Jun 2022 15:39:04 -0700 Subject: [PATCH 02/26] fixed issue where the unused spike elf files were being used to find objdump files that didn't exist causing makefile-memfile to fail prematurely --- pipelined/regression/makefile-memfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelined/regression/makefile-memfile b/pipelined/regression/makefile-memfile index 892e6db9b..c41963864 100644 --- a/pipelined/regression/makefile-memfile +++ b/pipelined/regression/makefile-memfile @@ -8,8 +8,9 @@ IMPERASDIR := $(ROOT)/tests/imperas-riscv-tests ALLDIRS := $(ARCHDIR)/$(SUFFIX) $(WALLYDIR)/$(SUFFIX) ELFFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf") +OBJDUMPFILES ?= $(shell find $(ALLDIRS) -type f -regex ".*\.elf.objdump") MEMFILES ?= $(ELFFILES:.elf=.elf.memfile) -ADDRFILES ?= $(ELFFILES:.elf=.elf.objdump.addr) +ADDRFILES ?= $(OBJDUMPFILES:.objdump=.objdump.addr) print: echo "files in $(ALLDIRS) are $(ELFFILES)." From 03b9878005a495382de7c472d29e4a735efb7bf4 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 21 Jun 2022 15:48:47 -0700 Subject: [PATCH 03/26] removed rv64fp from lint --- pipelined/regression/lint-wally | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/regression/lint-wally b/pipelined/regression/lint-wally index 2b5288d51..750486c4e 100755 --- a/pipelined/regression/lint-wally +++ b/pipelined/regression/lint-wally @@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/ verilator=`which verilator` basepath=$(dirname $0)/.. -for config in rv64fp rv64fpquad rv32e rv64gc rv32gc rv32ic; do +for config in rv32e rv64gc rv32gc rv32ic rv64fpquad; do echo "$config linting..." if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then echo "Exiting after $config lint due to errors or warnings" From 6001956bd8422669ea31e1cb2c6f7e75bc29af49 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Tue, 21 Jun 2022 15:49:52 -0700 Subject: [PATCH 04/26] using memread for quotent select --- pipelined/srt/srt-radix4.do | 2 +- pipelined/srt/srt-radix4.sv | 142 ++++++++++++++++-------------- pipelined/srt/testbench-radix4.sv | 48 +++------- 3 files changed, 86 insertions(+), 106 deletions(-) diff --git a/pipelined/srt/srt-radix4.do b/pipelined/srt/srt-radix4.do index b213aa994..07dedfbfe 100644 --- a/pipelined/srt/srt-radix4.do +++ b/pipelined/srt/srt-radix4.do @@ -17,7 +17,7 @@ if [file exists work] { } vlib work -vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv qsel4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv +vlog +incdir+../config/rv64gc +incdir+../config/shared srt-radix4.sv testbench-radix4.sv ../src/generic/flop/flop*.sv ../src/generic/mux.sv ../src/generic/lzc.sv vopt +acc work.testbenchradix4 -o workopt vsim workopt diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index ccb6453c0..6c9cd0fa7 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -34,29 +34,24 @@ module srtradix4 ( input logic clk, - input logic Start, - input logic Stall, // *** multiple pipe stages - input logic Flush, // *** multiple pipe stages - // Floating Point Inputs - // later add exponents, signs, special cases - input logic XSign, YSign, - input logic [`NE-1:0] XExp, YExp, + input logic DivStart, + input logic XSgnE, YSgnE, + input logic [`NE-1:0] XExpE, YExpE, input logic [`NF-1:0] XFrac, YFrac, input logic [`XLEN-1:0] SrcA, SrcB, - input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide - output logic rsign, + output logic DivDone, + output logic DivSgn, output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers - output logic [`NE-1:0] rExp, - output logic [3:0] Flags + output logic [`NE-1:0] DivExp ); // logic qp, qz, qm; // quotient is +1, 0, or -1 logic [3:0] q; - logic [`NE-1:0] calcExp; + logic [`NE-1:0] DivCalcExp; logic calcSign; logic [`DIVLEN-1:0] X, Dpreproc; logic [`DIVLEN+3:0] WS, WSA, WSN; @@ -65,7 +60,7 @@ module srtradix4 ( logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; - srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, Fmt, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign); + srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign); // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -77,11 +72,11 @@ module srtradix4 ( // - otherwise load WSA into the flipflop // *** what does N and A stand for? // *** change shift amount for radix4 - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, Start, WSN); + mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, DivStart, WSN); flop #(`DIVLEN+4) wsflop(clk, WSN, WS); - mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, Start, WCN); + mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); flop #(`DIVLEN+4) wcflop(clk, WCN, WC); - flopen #(`DIVLEN+4) dflop(clk, Start, {4'b0001, Dpreproc}, D); + flopen #(`DIVLEN+4) dflop(clk, DivStart, {4'b0001, Dpreproc}, D); // Quotient Selection logic // Given partial remainder, select quotient of +1, 0, or -1 (qp, qz, pm) @@ -94,9 +89,9 @@ module srtradix4 ( // 0001 = -2 qsel4 qsel4(.D, .WS, .WC, .q); - // Store the expoenent and sign until division is done - flopen #(`NE) expflop(clk, Start, calcExp, rExp); - flopen #(1) signflop(clk, Start, calcSign, rsign); + // Store the expoenent and sign until division is DivDone + flopen #(`NE) expflop(clk, DivStart, DivCalcExp, DivExp); + flopen #(1) signflop(clk, DivStart, calcSign, DivSgn); // Divisor Selection logic // *** radix 4 change to choose -2 to 2 @@ -120,11 +115,13 @@ module srtradix4 ( csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); //*** change for radix 4 - otfc4 #(`DIVLEN) otfc4(clk, Start, q, Quot); + otfc4 #(`DIVLEN) otfc4(clk, DivStart, q, Quot); - expcalc expcalc(.XExp, .YExp, .calcExp); + expcalc expcalc(.XExpE, .YExpE, .DivCalcExp); - signcalc signcalc(.XSign, .YSign, .calcSign); + signcalc signcalc(.XSgnE, .YSgnE, .calcSign); + + counter counter(clk, DivStart, DivDone); endmodule @@ -132,13 +129,58 @@ endmodule // Submodules // //////////////// +///////////// +// counter // +///////////// +module counter(input logic clk, + input logic DivStart, + output logic DivDone); + + logic [5:0] count; + + // This block of control logic sequences the divider + // through its iterations. You may modify it if you + // build a divider which completes in fewer iterations. + // You are not responsible for the (trivial) circuit + // design of the block. + + always @(posedge clk) + begin + if (count == `DIVLEN/2+1) DivDone <= #1 1; + else if (DivDone | DivStart) DivDone <= #1 0; + if (DivStart) count <= #1 0; + else count <= #1 count+1; + end +endmodule + +module qsel4 ( + input logic [`DIVLEN+3:0] D, + input logic [`DIVLEN+3:0] WS, WC, + output logic [3:0] q +); + logic [6:0] Wmsbs; + logic [7:0] PreWmsbs; + logic [2:0] Dmsbs; + assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; + assign Wmsbs = PreWmsbs[7:1]; + assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; + // D = 0001.xxx... + // Dmsbs = | | + // W = xxxx.xxx... + // Wmsbs = | | + + logic [3:0] QSel4[1023:0]; + initial $readmemh("qslc_r4a2b.tv", QSel4); + assign q = QSel4[{Dmsbs,Wmsbs}]; + +endmodule + /////////////////// // Preprocessing // /////////////////// module srtpreproc ( input logic [`XLEN-1:0] SrcA, SrcB, input logic [`NF-1:0] XFrac, YFrac, - input logic [1:0] Fmt, // Floats: 00 = 16 bit, 01 = 32 bit, 10 = 64 bit, 11 = 128 bit input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs @@ -173,48 +215,12 @@ module srtpreproc ( assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); endmodule -///////////////////////////////// -// Quotient Selection, Radix 2 // -///////////////////////////////// -module qsel2 ( // *** eventually just change to 4 bits - input logic [`DIVLEN+3:`DIVLEN] ps, pc, - output logic qp, qz, qm -); - - logic [`DIVLEN+3:`DIVLEN] p, g; - logic magnitude, sign, cout; - - // The quotient selection logic is presented for simplicity, not - // for efficiency. You can probably optimize your logic to - // select the proper divisor with less delay. - - // Quotient equations from EE371 lecture notes 13-20 - assign p = ps ^ pc; - assign g = ps & pc; - - assign #1 magnitude = ~(&p[`DIVLEN+2:`DIVLEN]); - assign #1 cout = g[`DIVLEN+2] | (p[`DIVLEN+2] & (g[`DIVLEN+1] | p[`DIVLEN+1] & g[`DIVLEN])); - assign #1 sign = p[`DIVLEN+3] ^ cout; -/* assign #1 magnitude = ~((ps[54]^pc[54]) & (ps[53]^pc[53]) & - (ps[52]^pc[52])); - assign #1 sign = (ps[55]^pc[55])^ - (ps[54] & pc[54] | ((ps[54]^pc[54]) & - (ps[53]&pc[53] | ((ps[53]^pc[53]) & - (ps[52]&pc[52]))))); */ - - // Produce quotient = +1, 0, or -1 - assign #1 qp = magnitude & ~sign; - assign #1 qz = ~magnitude; - assign #1 qm = magnitude & sign; -endmodule - - /////////////////////////////////// // On-The-Fly Converter, Radix 2 // /////////////////////////////////// module otfc4 #(parameter N=65) ( input logic clk, - input logic Start, + input logic DivStart, input logic [3:0] q, output logic [N-1:0] r ); @@ -234,8 +240,8 @@ module otfc4 #(parameter N=65) ( // discard the r most significant bits of Q and QM. logic [N:0] QR, QMR; // if starting a new divison set Q to 0 and QM to -1 - mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, Start, QMux); - mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, Start, QMMux); + mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, DivStart, QMux); + mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, DivStart, QMMux); flop #(N+3) Qreg(clk, QMux, Q); flop #(N+3) QMreg(clk, QMMux, QM); @@ -287,7 +293,7 @@ module csa #(parameter N=69) ( // This block adds in1, in2, in3, and cin to produce // a result out1 / out2 in carry-save redundant form. // cin is just added to the least significant bit and - // is required to handle adding a negative divisor. + // is Startuired to handle adding a negative divisor. // Fortunately, the carry (out2) is shifted left by one // bit, leaving room in the least significant bit to // insert cin. @@ -302,11 +308,11 @@ endmodule // expcalc // ////////////// module expcalc( - input logic [`NE-1:0] XExp, YExp, - output logic [`NE-1:0] calcExp + input logic [`NE-1:0] XExpE, YExpE, + output logic [`NE-1:0] DivCalcExp ); - assign calcExp = XExp - YExp + (`NE)'(`BIAS); + assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS); endmodule @@ -314,10 +320,10 @@ endmodule // signcalc // ////////////// module signcalc( - input logic XSign, YSign, + input logic XSgnE, YSgnE, output logic calcSign ); - assign calcSign = XSign ^ YSign; + assign calcSign = XSgnE ^ YSgnE; endmodule \ No newline at end of file diff --git a/pipelined/srt/testbench-radix4.sv b/pipelined/srt/testbench-radix4.sv index 6ac616ed6..0cea8059c 100644 --- a/pipelined/srt/testbench-radix4.sv +++ b/pipelined/srt/testbench-radix4.sv @@ -2,30 +2,6 @@ `include "wally-config.vh" `define DIVLEN ((`NF<`XLEN) ? `XLEN : `NF) -///////////// -// counter // -///////////// -module counter(input logic clk, - input logic req, - output logic done); - - logic [5:0] count; - - // This block of control logic sequences the divider - // through its iterations. You may modify it if you - // build a divider which completes in fewer iterations. - // You are not responsible for the (trivial) circuit - // design of the block. - - always @(posedge clk) - begin - if (count == `DIVLEN/2+1) done <= #1 1; - else if (done | req) done <= #1 0; - if (req) count <= #1 0; - else count <= #1 count+1; - end -endmodule - /////////// // clock // /////////// @@ -43,7 +19,7 @@ endmodule module testbenchradix4; logic clk; logic req; - logic done; + logic DivDone; logic [63:0] a, b; logic [51:0] afrac, bfrac; logic [10:0] aExp, bExp; @@ -65,22 +41,20 @@ module testbenchradix4; logic [MEM_WIDTH-1:0] Vec; // Verilog doesn't allow direct access to a // bit field of an array logic [63:0] correctr, nextr, diffn, diffp; - logic [10:0] rExp; - logic rsign; + logic [10:0] DivExp; + logic DivSgn; integer testnum, errors; // Divider - srtradix4 srtradix4(.clk, .Start(req), - .Stall(1'b0), .Flush(1'b0), - .XExp(aExp), .YExp(bExp), .rExp, - .XSign(asign), .YSign(bsign), .rsign, + srtradix4 srtradix4(.clk, .DivStart(req), + .XExpE(aExp), .YExpE(bExp), .DivExp, + .XSgnE(asign), .YSgnE(bsign), .DivSgn, .XFrac(afrac), .YFrac(bfrac), .SrcA('0), .SrcB('0), .Fmt(2'b00), - .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), - .Quot, .Rem(), .Flags()); + .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .DivDone, + .Quot, .Rem()); // Counter - counter counter(clk, req, done); initial @@ -112,14 +86,14 @@ module testbenchradix4; always @(posedge clk) begin r = Quot[`DIVLEN-1:`DIVLEN - 52]; - if (done) begin + if (DivDone) begin req <= 1; diffp = correctr[51:0] - r; diffn = r - correctr[51:0]; - if ((rsign !== correctr[63]) | (rExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp + if ((DivSgn !== correctr[63]) | (DivExp !== correctr[62:52]) | ($signed(diffn) > 1) | ($signed(diffp) > 1) | (diffn === 64'bx) | (diffp === 64'bx)) // check if accurate to 1 ulp begin errors = errors+1; - $display("result was %h_%h, should be %h %h %h\n", rExp, r, correctr, diffn, diffp); + $display("result was %h_%h, should be %h %h %h\n", DivExp, r, correctr, diffn, diffp); $display("failed\n"); $stop; end From 6ba3a7615c899c1db0be785b8c9defcd4f95ad84 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Tue, 21 Jun 2022 15:54:24 -0700 Subject: [PATCH 05/26] added individual makes for arch and wally tests as well as memfiles to Makefile. run using make archtests/wallytests/memfiles --- pipelined/regression/Makefile | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/pipelined/regression/Makefile b/pipelined/regression/Makefile index 0a9e7d993..5cf4d4083 100644 --- a/pipelined/regression/Makefile +++ b/pipelined/regression/Makefile @@ -8,22 +8,16 @@ make clean: # make allclean -C ../../tests/imperas-riscv-tests make all: + make archtests + make wallytests + make memfiles # *** Build old tests/imperas-riscv-tests for now; # Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test # DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired #make -C ../../tests/imperas-riscv-tests --jobs #make -C ../../tests/imperas-riscv-tests XLEN=64 --jobs - - # Build riscv-arch-test 64 and 32-bit versions - make -C ../../tests/riscof/ --jobs - make -C ../../tests/riscof/ XLEN=32 --jobs - # Build wally-riscv-arch-test - make -C ../../tests/wally-riscv-arch-test/ --jobs - make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs -# build the memfiles and address files. - make -f makefile-memfile wally-sim-files --jobs # Only compile Imperas tests if they are installed locally. # They are usually a symlink to $RISCV/imperas-riscv-tests and only @@ -36,4 +30,15 @@ make all: # Link Linux test vectors (fix this later***) #cd ../../tests/linux-testgen/linux-testvectors/;./tvLinker.sh - +make archtests: + # Build riscv-arch-test 64 and 32-bit versions + make -C ../../tests/riscof/ --jobs + make -C ../../tests/riscof/ XLEN=32 --jobs + +make wallytests: + # Build wally-riscv-arch-test + make -C ../../tests/wally-riscv-arch-test/ --jobs + make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs + +make memfiles: + make -f makefile-memfile wally-sim-files --jobs From ac5dfc41f17b7e736b94483976944502a501cd29 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 21 Jun 2022 22:56:01 +0000 Subject: [PATCH 06/26] Trimmed lint-wally --- pipelined/regression/lint-wally | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/regression/lint-wally b/pipelined/regression/lint-wally index 2b5288d51..750486c4e 100755 --- a/pipelined/regression/lint-wally +++ b/pipelined/regression/lint-wally @@ -5,7 +5,7 @@ export PATH=$PATH:/usr/local/bin/ verilator=`which verilator` basepath=$(dirname $0)/.. -for config in rv64fp rv64fpquad rv32e rv64gc rv32gc rv32ic; do +for config in rv32e rv64gc rv32gc rv32ic rv64fpquad; do echo "$config linting..." if !($verilator --lint-only "$@" --top-module wallypipelinedsoc "-I$basepath/config/shared" "-I$basepath/config/$config" $basepath/src/*/*.sv $basepath/src/*/*/*.sv --relative-includes); then echo "Exiting after $config lint due to errors or warnings" From 09a633d7d189bc2286eaafd91a8fd870eef8f86a Mon Sep 17 00:00:00 2001 From: slmnemo Date: Tue, 21 Jun 2022 16:10:18 -0700 Subject: [PATCH 07/26] changed order of makefiles and fixed warnings when running makes --- pipelined/regression/Makefile | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/pipelined/regression/Makefile b/pipelined/regression/Makefile index 5cf4d4083..5ad721722 100644 --- a/pipelined/regression/Makefile +++ b/pipelined/regression/Makefile @@ -1,24 +1,9 @@ -make allclean: - make clean - make all - -make clean: - make clean -C ../../tests/riscof - make clean -C ../../tests/wally-riscv-arch-test -# make allclean -C ../../tests/imperas-riscv-tests - -make all: - make archtests - make wallytests - make memfiles +all: archtests wallytests memfiles # *** Build old tests/imperas-riscv-tests for now; # Delete this part when the privileged tests transition over to tests/wally-riscv-arch-test # DH: 2/27/22 temporarily commented out imperas-riscv-tests because license expired #make -C ../../tests/imperas-riscv-tests --jobs #make -C ../../tests/imperas-riscv-tests XLEN=64 --jobs - - - # Only compile Imperas tests if they are installed locally. # They are usually a symlink to $RISCV/imperas-riscv-tests and only # get compiled there manually during installation @@ -30,15 +15,22 @@ make all: # Link Linux test vectors (fix this later***) #cd ../../tests/linux-testgen/linux-testvectors/;./tvLinker.sh -make archtests: +allclean: clean all + +clean: + make clean -C ../../tests/riscof + make clean -C ../../tests/wally-riscv-arch-test +# make allclean -C ../../tests/imperas-riscv-tests + +archtests: # Build riscv-arch-test 64 and 32-bit versions make -C ../../tests/riscof/ --jobs make -C ../../tests/riscof/ XLEN=32 --jobs -make wallytests: +wallytests: # Build wally-riscv-arch-test make -C ../../tests/wally-riscv-arch-test/ --jobs make -C ../../tests/wally-riscv-arch-test/ XLEN=32 --jobs -make memfiles: +memfiles: make -f makefile-memfile wally-sim-files --jobs From 1612daa2944b90bcda8f8af0799cb168aba4342e Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 00:07:34 +0000 Subject: [PATCH 08/26] Testfloat running division - not passing --- pipelined/config/shared/wally-shared.vh | 9 +- pipelined/regression/testfloat.do | 2 +- pipelined/regression/wave-fpu.do | 15 + pipelined/src/fpu/cvtshiftcalc.sv | 8 +- pipelined/src/fpu/fcvt.sv | 20 +- pipelined/src/fpu/fpu.sv | 12 +- pipelined/src/fpu/postprocess.sv | 21 +- pipelined/src/fpu/resultsign.sv | 10 +- pipelined/srt/qsel4.dat | 1024 +++++++++++++++++++++++ pipelined/srt/qsel4.sv | 2 +- pipelined/srt/srt-radix4.sv | 58 +- pipelined/srt/testbench-radix4.sv | 2 +- pipelined/testbench/testbench-fp.sv | 129 +-- 13 files changed, 1173 insertions(+), 139 deletions(-) create mode 100644 pipelined/srt/qsel4.dat diff --git a/pipelined/config/shared/wally-shared.vh b/pipelined/config/shared/wally-shared.vh index afe822f46..3c2699da0 100644 --- a/pipelined/config/shared/wally-shared.vh +++ b/pipelined/config/shared/wally-shared.vh @@ -94,11 +94,12 @@ `define BIAS2 ((`F_SUPPORTED & (`LEN1 != `S_LEN)) ? `S_BIAS : `H_BIAS) // largest length in IEU/FPU -`define LGLEN ((`NF<`XLEN) ? `XLEN : `NF) +`define CVTLEN ((`NF<`XLEN) ? `XLEN : `NF) `define LLEN ((`FLEN<`XLEN) ? `XLEN : `FLEN) -`define LOGLGLEN $unsigned($clog2(`LGLEN+1)) -`define NORMSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+9)) -`define CORRSHIFTSZ ((`LGLEN+`NF) > (3*`NF+8) ? (`LGLEN+`NF+1) : (3*`NF+6)) +`define LOGCVTLEN $unsigned($clog2(`CVTLEN+1)) +`define NORMSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+9)) +`define CORRSHIFTSZ ((`CVTLEN+`NF) > (3*`NF+8) ? (`CVTLEN+`NF+1) : (3*`NF+6)) +`define DIVLEN ((`NF < `XLEN) ? `XLEN : `NF) // Disable spurious Verilator warnings diff --git a/pipelined/regression/testfloat.do b/pipelined/regression/testfloat.do index 68c240c8a..db6948699 100644 --- a/pipelined/regression/testfloat.do +++ b/pipelined/regression/testfloat.do @@ -32,7 +32,7 @@ vlib work # start and run simulation # remove +acc flag for faster sim during regressions if there is no need to access internal signals # $num = the added words after the call -vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../src/generic/*.sv -suppress 2583,7063,8607,2697 +vlog +incdir+../config/$1 +incdir+../config/shared ../testbench/testbench-fp.sv ../src/fpu/*.sv ../srt/srt-radix4.sv ../src/generic/*.sv ../src/generic/flop/*.sv -suppress 2583,7063,8607,2697 vsim -voptargs=+acc work.testbenchfp -G TEST=$2 diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 61b35a51b..906eb2560 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -7,3 +7,18 @@ add wave -noupdate /testbenchfp/Y add wave -noupdate /testbenchfp/Z add wave -noupdate /testbenchfp/Res add wave -noupdate /testbenchfp/Ans +add wave -noupdate /testbenchfp/DivStart +add wave -noupdate /testbenchfp/DivDone +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultselect/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/flags/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/normshift/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/lzacorrection/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/resultsign/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/round/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/fmashiftcalc/* +add wave -group {PostProc} -noupdate /testbenchfp/postprocess/cvtshiftcalc/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/* diff --git a/pipelined/src/fpu/cvtshiftcalc.sv b/pipelined/src/fpu/cvtshiftcalc.sv index 899dffb77..ab054342f 100644 --- a/pipelined/src/fpu/cvtshiftcalc.sv +++ b/pipelined/src/fpu/cvtshiftcalc.sv @@ -7,10 +7,10 @@ module cvtshiftcalc( input logic [`NE:0] CvtCalcExpM, // the calculated expoent input logic [`NF:0] XManM, // input mantissas input logic [`FMTBITS-1:0] OutFmt, // output format - input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) + input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) input logic CvtResDenormUfM, output logic CvtResUf, - output logic [`LGLEN+`NF:0] CvtShiftIn // number to be shifted + output logic [`CVTLEN+`NF:0] CvtShiftIn // number to be shifted ); logic [$clog2(`NF):0] ResNegNF; // the result's fraction length negated (-NF) @@ -31,8 +31,8 @@ module cvtshiftcalc( // | `NF-1 zeros | Mantissa | 0's if nessisary | // - otherwise: // | LzcInM | 0's if nessisary | - assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`LGLEN-`XLEN{1'b0}}} : - CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`LGLEN-`NF+1{1'b0}}} : + assign CvtShiftIn = ToInt ? {{`XLEN{1'b0}}, XManM[`NF]&~CvtCalcExpM[`NE], XManM[`NF-1]|(CvtCalcExpM[`NE]&XManM[`NF]), XManM[`NF-2:0], {`CVTLEN-`XLEN{1'b0}}} : + CvtResDenormUfM ? {{`NF-1{1'b0}}, XManM, {`CVTLEN-`NF+1{1'b0}}} : {CvtLzcInM, {`NF+1{1'b0}}}; diff --git a/pipelined/src/fpu/fcvt.sv b/pipelined/src/fpu/fcvt.sv index a76122804..26ca7dd83 100644 --- a/pipelined/src/fpu/fcvt.sv +++ b/pipelined/src/fpu/fcvt.sv @@ -12,11 +12,11 @@ module fcvt ( input logic XDenormE, // is the input denormalized input logic [`FMTBITS-1:0] FmtE, // the input's precision (11=quad 01=double 00=single 10=half) output logic [`NE:0] CvtCalcExpE, // the calculated expoent - output logic [`LOGLGLEN-1:0] CvtShiftAmtE, // how much to shift by + output logic [`LOGCVTLEN-1:0] CvtShiftAmtE, // how much to shift by output logic CvtResDenormUfE,// does the result underflow or is denormalized output logic CvtResSgnE, // the result's sign output logic IntZeroE, // is the integer zero? - output logic [`LGLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder) + output logic [`CVTLEN-1:0] CvtLzcInE // input to the Leading Zero Counter (priority encoder) ); // OpCtrls: @@ -43,7 +43,7 @@ module fcvt ( logic Int64; // is the integer 64 bits? logic IntToFp; // is the opperation an int->fp conversion? logic ToInt; // is the opperation an fp->int conversion? - logic [`LOGLGLEN-1:0] ZeroCnt; // output from the LZC + logic [`LOGCVTLEN-1:0] ZeroCnt; // output from the LZC // seperate OpCtrl for code readability @@ -78,10 +78,10 @@ module fcvt ( // choose the input to the leading zero counter i.e. priority encoder // int -> fp : | positive integer | 00000... (if needed) | // fp -> fp : | fraction | 00000... (if needed) | - assign CvtLzcInE = IntToFp ? {TrimInt, {`LGLEN-`XLEN{1'b0}}} : - {XManE[`NF-1:0], {`LGLEN-`NF{1'b0}}}; + assign CvtLzcInE = IntToFp ? {TrimInt, {`CVTLEN-`XLEN{1'b0}}} : + {XManE[`NF-1:0], {`CVTLEN-`NF{1'b0}}}; - lzc #(`LGLEN) lzc (.num(CvtLzcInE), .ZeroCnt); + lzc #(`CVTLEN) lzc (.num(CvtLzcInE), .ZeroCnt); /////////////////////////////////////////////////////////////////////////// // shifter @@ -99,9 +99,9 @@ module fcvt ( // - only shift fp -> fp if the intital value is denormalized // - this is a problem because the input to the lzc was the fraction rather than the mantissa // - rather have a few and-gates than an extra bit in the priority encoder??? *** is this true? - assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGLGLEN-1:0]&{`LOGLGLEN{~CvtCalcExpE[`NE]}} : - CvtResDenormUfE&~IntToFp ? (`LOGLGLEN)'(`NF-1)+CvtCalcExpE[`LOGLGLEN-1:0] : - (ZeroCnt+1)&{`LOGLGLEN{XDenormE|IntToFp}}; + assign CvtShiftAmtE = ToInt ? CvtCalcExpE[`LOGCVTLEN-1:0]&{`LOGCVTLEN{~CvtCalcExpE[`NE]}} : + CvtResDenormUfE&~IntToFp ? (`LOGCVTLEN)'(`NF-1)+CvtCalcExpE[`LOGCVTLEN-1:0] : + (ZeroCnt+1)&{`LOGCVTLEN{XDenormE|IntToFp}}; /////////////////////////////////////////////////////////////////////////// // exp calculations @@ -180,7 +180,7 @@ module fcvt ( // - shift left to normilize (-1-ZeroCnt) // - newBias to make the biased exponent // - assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGLGLEN+1{1'b0}}, (ZeroCnt&{`LOGLGLEN{XDenormE|IntToFp}})}; + assign CvtCalcExpE = {1'b0, OldExp} - (`NE+1)'(`BIAS) + {2'b0, NewBias} - {{`NE{1'b0}}, XDenormE|IntToFp} - {{`NE-`LOGCVTLEN+1{1'b0}}, (ZeroCnt&{`LOGCVTLEN{XDenormE|IntToFp}})}; // find if the result is dnormal or underflows // - if Calculated expoenent is 0 or negitive (and the input/result is not exactaly 0) // - can't underflow an integer to Fp conversion diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index be73e9e7a..b8a2e1918 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -82,7 +82,7 @@ module fpu ( // unpacking signals logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM; // input's sign - memory stage + logic XSgnM, YSgnM; // input's sign - memory stage logic [`NE-1:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage logic [`NE-1:0] ZExpM; // input's exponent - memory stage logic [`NF:0] XManE, YManE, ZManE; // input's fraction - execute stage @@ -116,11 +116,11 @@ module fpu ( // Cvt Signals logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent - logic [`LOGLGLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by + logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized logic CvtResSgnE, CvtResSgnM; // the result's sign logic IntZeroE, IntZeroM; // is the integer zero? - logic [`LGLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -317,7 +317,7 @@ module fpu ( // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); flopenrc #(`NF+2) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XManE}, {XSgnM,XManM}); - flopenrc #(`NF+1) EMFpReg3 (clk, reset, FlushM, ~StallM, YManE, YManM); + flopenrc #(`NF+2) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YManE}, {YSgnM,YManM}); flopenrc #(`FLEN) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); flopenrc #(`XLEN) EMFpReg6 (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); flopenrc #(`FLEN) EMFpReg7 (clk, reset, FlushM, ~StallM, PreFpResE, PreFpResM); @@ -333,7 +333,7 @@ module fpu ( flopenrc #($clog2(3*`NF+7)+6) EMRegFma4(clk, reset, FlushM, ~StallM, {AddendStickyE, KillProdE, InvZE, FmaNormCntE, NegSumE, ZSgnEffE, PSgnE}, {AddendStickyM, KillProdM, InvZM, FmaNormCntM, NegSumM, ZSgnEffM, PSgnM}); - flopenrc #(`NE+`LOGLGLEN+`LGLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, + flopenrc #(`NE+`LOGCVTLEN+`CVTLEN+4) EMRegCvt(clk, reset, FlushM, ~StallM, {CvtCalcExpE, CvtShiftAmtE, CvtResDenormUfE, CvtResSgnE, IntZeroE, CvtLzcInE}, {CvtCalcExpM, CvtShiftAmtM, CvtResDenormUfM, CvtResSgnM, IntZeroM, CvtLzcInM}); @@ -351,7 +351,7 @@ module fpu ( assign FpLoadM = FResSelM[1]; - postprocess postprocess(.XSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, + postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 267647346..c53920554 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" module postprocess( - input logic XSgnM, // input signs + input logic XSgnM, YSgnM, // input signs input logic [`NE-1:0] ZExpM, // input exponents input logic [`NF:0] XManM, YManM, ZManM, // input mantissas input logic [2:0] FrmM, // rounding mode 000 = rount to nearest, ties to even 001 = round twords zero 010 = round down 011 = round up 100 = round to nearest, ties to max magnitude @@ -52,12 +52,13 @@ module postprocess( input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count input logic [`NE:0] CvtCalcExpM, // the calculated expoent input logic CvtResDenormUfM, - input logic [`LOGLGLEN-1:0] CvtShiftAmtM, // how much to shift by + input logic [`LOGCVTLEN-1:0] CvtShiftAmtM, // how much to shift by input logic CvtResSgnM, // the result's sign input logic FWriteIntM, // is fp->int (since it's writting to the integer register) - input logic [`LGLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) + input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) input logic IntZeroM, // is the input zero input logic [1:0] PostProcSelM, // select result to be written to fp register + input logic [`DIVLEN-1:0] Quot, output logic [`FLEN-1:0] PostProcResM, // FMA final result output logic [4:0] PostProcFlgM, output logic [`XLEN-1:0] FCvtIntResM // the int conversion result @@ -75,7 +76,7 @@ module postprocess( logic [3*`NF+8:0] FmaShiftIn; // is the sum zero logic UfPlus1; // do you add one (for determining underflow flag) logic Round; // bits needed to determine rounding - logic [`LGLEN+`NF:0] CvtShiftIn; // number to be shifted + logic [`CVTLEN+`NF:0] CvtShiftIn; // number to be shifted logic Mult; // multiply opperation logic [`FLEN:0] RoundAdd; // how much to add to the result logic [`NE+1:0] ConvNormSumExp; // exponent of the normalized sum not taking into account denormal or zero results @@ -143,12 +144,12 @@ module postprocess( ShiftIn = {FmaShiftIn, {`NORMSHIFTSZ-(3*`NF+9){1'b0}}}; end 2'b00: begin // cvt - ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`LGLEN+1){1'b0}}, CvtShiftAmtM}; - ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`LGLEN-`NF-1{1'b0}}}; + ShiftAmt = {{$clog2(`NORMSHIFTSZ)-$clog2(`CVTLEN+1){1'b0}}, CvtShiftAmtM}; + ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; end - 2'b01: begin //div - ShiftAmt = 0;//{DivShiftAmt}; - ShiftIn = 0;//{{`NORMSHIFTSZ-(3*`NF+8){1'b0}}, DivShiftIn}; + 2'b01: begin //div ***prob can take out + ShiftAmt = 1'b0;//{DivShiftAmt}; + ShiftIn = {Quot, {`NORMSHIFTSZ-`DIVLEN{1'b0}}}; end default: begin ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; @@ -181,7 +182,7 @@ module postprocess( resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .InvZM, .SumExp, .Round, .Sticky, .FmaOp, .DivOp, .CvtOp, .ZInfM, .InfIn, .NegSumM, .SumZero, .Mult, - .CvtResSgnM, .RoundSgn, .ResSgn); + .XSgnM, .YSgnM, .CvtResSgnM, .RoundSgn, .ResSgn); /////////////////////////////////////////////////////////////////////////////// // Flags diff --git a/pipelined/src/fpu/resultsign.sv b/pipelined/src/fpu/resultsign.sv index c8862ff94..9a76cf8f3 100644 --- a/pipelined/src/fpu/resultsign.sv +++ b/pipelined/src/fpu/resultsign.sv @@ -4,6 +4,8 @@ module resultsign( input logic [2:0] FrmM, input logic PSgnM, ZSgnEffM, input logic InvZM, + input logic XSgnM, + input logic YSgnM, input logic ZInfM, input logic InfIn, input logic NegSumM, @@ -25,6 +27,7 @@ module resultsign( logic FmaResSgn; logic FmaResSgnTmp; logic Underflow; + logic DivSgn; // logic ResultSgnTmp; // Determine the sign if the sum is zero @@ -43,9 +46,10 @@ module resultsign( assign InfSgn = ZInfM ? ZSgnEffM : PSgnM; assign FmaResSgn = InfIn ? InfSgn : SumZero ? ZeroSgn : FmaResSgnTmp; - // Sign for rounding calulation - assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp); + assign DivSgn = XSgnM^YSgnM; - assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (1'b0&DivOp); + // Sign for rounding calulation + assign RoundSgn = (FmaResSgnTmp&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp); + assign ResSgn = (FmaResSgn&FmaOp) | (CvtResSgnM&CvtOp) | (DivSgn&DivOp); endmodule \ No newline at end of file diff --git a/pipelined/srt/qsel4.dat b/pipelined/srt/qsel4.dat new file mode 100644 index 000000000..b92d81e8e --- /dev/null +++ b/pipelined/srt/qsel4.dat @@ -0,0 +1,1024 @@ +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +4 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +8 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +0 +0 +0 +0 +0 +0 +0 +0 diff --git a/pipelined/srt/qsel4.sv b/pipelined/srt/qsel4.sv index 069f4268c..70b8b92d2 100644 --- a/pipelined/srt/qsel4.sv +++ b/pipelined/srt/qsel4.sv @@ -11,7 +11,7 @@ module qsel4 ( logic [2:0] Dmsbs; assign PreWmsbs = WC[`DIVLEN+3:`DIVLEN-4] + WS[`DIVLEN+3:`DIVLEN-4]; assign Wmsbs = PreWmsbs[7:1]; - assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; + assign Dmsbs = D[`DIVLEN-1:`DIVLEN-3]; // D = 0001.xxx... // Dmsbs = | | // W = xxxx.xxx... diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index 6c9cd0fa7..671c63500 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -30,12 +30,9 @@ `include "wally-config.vh" -`define DIVLEN ((`NF<(`XLEN)) ? (`XLEN) : `NF) - module srtradix4 ( input logic clk, input logic DivStart, - input logic XSgnE, YSgnE, input logic [`NE-1:0] XExpE, YExpE, input logic [`NF-1:0] XFrac, YFrac, input logic [`XLEN-1:0] SrcA, SrcB, @@ -44,8 +41,8 @@ module srtradix4 ( input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide output logic DivDone, - output logic DivSgn, - output logic [`DIVLEN-1:0] Quot, Rem, // *** later handle integers + output logic [`DIVLEN-1:0] Quot, + output logic [`XLEN-1:0] Rem, // *** later handle integers output logic [`NE-1:0] DivExp ); @@ -91,7 +88,6 @@ module srtradix4 ( // Store the expoenent and sign until division is DivDone flopen #(`NE) expflop(clk, DivStart, DivCalcExp, DivExp); - flopen #(1) signflop(clk, DivStart, calcSign, DivSgn); // Divisor Selection logic // *** radix 4 change to choose -2 to 2 @@ -115,13 +111,11 @@ module srtradix4 ( csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); //*** change for radix 4 - otfc4 #(`DIVLEN) otfc4(clk, DivStart, q, Quot); + otfc4 otfc4(clk, DivStart, q, Quot); expcalc expcalc(.XExpE, .YExpE, .DivCalcExp); - signcalc signcalc(.XSgnE, .YSgnE, .calcSign); - - counter counter(clk, DivStart, DivDone); + divcounter divcounter(clk, DivStart, DivDone); endmodule @@ -132,7 +126,7 @@ endmodule ///////////// // counter // ///////////// -module counter(input logic clk, +module divcounter(input logic clk, input logic DivStart, output logic DivDone); @@ -146,6 +140,7 @@ module counter(input logic clk, always @(posedge clk) begin + DivDone = 0; if (count == `DIVLEN/2+1) DivDone <= #1 1; else if (DivDone | DivStart) DivDone <= #1 0; if (DivStart) count <= #1 0; @@ -170,7 +165,7 @@ module qsel4 ( // Wmsbs = | | logic [3:0] QSel4[1023:0]; - initial $readmemh("qslc_r4a2b.tv", QSel4); + initial $readmemh("../srt/qsel4.dat", QSel4); assign q = QSel4[{Dmsbs,Wmsbs}]; endmodule @@ -218,11 +213,11 @@ endmodule /////////////////////////////////// // On-The-Fly Converter, Radix 2 // /////////////////////////////////// -module otfc4 #(parameter N=65) ( +module otfc4 ( input logic clk, input logic DivStart, input logic [3:0] q, - output logic [N-1:0] r + output logic [`DIVLEN-1:0] Quot ); // The on-the-fly converter transfers the quotient @@ -230,20 +225,20 @@ module otfc4 #(parameter N=65) ( // // This code follows the psuedocode presented in the // floating point chapter of the book. Right now, - // it is written for Radix-2 division. + // it is written for Radix-4 division. // // QM is Q-1. It allows us to write negative bits // without using a costly CPA. - logic [N+2:0] Q, QM, QNext, QMNext, QMux, QMMux; + logic [`DIVLEN+2:0] Q, QM, QNext, QMNext, QMux, QMMux; // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. - logic [N:0] QR, QMR; + logic [`DIVLEN:0] QR, QMR; // if starting a new divison set Q to 0 and QM to -1 - mux2 #(N+3) Qmux(QNext, {N+3{1'b0}}, DivStart, QMux); - mux2 #(N+3) QMmux(QMNext, {N+3{1'b1}}, DivStart, QMMux); - flop #(N+3) Qreg(clk, QMux, Q); - flop #(N+3) QMreg(clk, QMMux, QM); + mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux); + mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux); + flop #(`DIVLEN+3) Qreg(clk, QMux, Q); + flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); // shift Q (quotent) and QM (quotent-1) // if q = 2 Q = {Q, 10} QM = {Q, 01} @@ -253,11 +248,9 @@ module otfc4 #(parameter N=65) ( // else if q = -2 Q = {QM, 10} QM = {QM, 01} // *** how does the 0 concatination numbers work? - - always_comb begin - QR = Q[N:0]; - QMR = QM[N:0]; // Shift Q and QM + QR = Q[`DIVLEN:0]; + QMR = QM[`DIVLEN:0]; // Shift Q and QM if (q[3]) begin // +2 QNext = {QR, 2'b10}; QMNext = {QR, 2'b01}; @@ -275,7 +268,8 @@ module otfc4 #(parameter N=65) ( QMNext = {QMR, 2'b11}; end end - assign r = Q[N+2] ? Q[N+1:2] : Q[N:1]; + // Quot is in the range [.5, 2) so normalize the result if nesissary + assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1]; endmodule @@ -315,15 +309,3 @@ module expcalc( assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS); endmodule - -////////////// -// signcalc // -////////////// -module signcalc( - input logic XSgnE, YSgnE, - output logic calcSign -); - - assign calcSign = XSgnE ^ YSgnE; - -endmodule \ No newline at end of file diff --git a/pipelined/srt/testbench-radix4.sv b/pipelined/srt/testbench-radix4.sv index 0cea8059c..434ef74b0 100644 --- a/pipelined/srt/testbench-radix4.sv +++ b/pipelined/srt/testbench-radix4.sv @@ -50,7 +50,7 @@ module testbenchradix4; .XExpE(aExp), .YExpE(bExp), .DivExp, .XSgnE(asign), .YSgnE(bsign), .DivSgn, .XFrac(afrac), .YFrac(bfrac), - .SrcA('0), .SrcB('0), .Fmt(2'b00), + .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(1'b0), .DivDone, .Quot, .Rem()); diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 4bae7d106..748670b46 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -48,13 +48,13 @@ module testbenchfp; logic XInf, YInf, ZInf; // is the input infinity logic XZero, YZero, ZZero; // is the input zero logic XExpMax, YExpMax, ZExpMax; // is the input's exponent all ones - logic [`LGLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder) + logic [`CVTLEN-1:0] CvtLzcInE; // input to the Leading Zero Counter (priority encoder) logic IntZeroE; logic CvtResSgnE; - logic [`XLEN-1:0] Empty1,Empty2,Empty3,Empty4,Empty5; logic [`NE:0] CvtCalcExpE; // the calculated expoent - logic [`LOGLGLEN-1:0] CvtShiftAmtE; // how much to shift by + logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by logic CvtResDenormUfE; + logic DivStart, DivDone; // in-between FMA signals @@ -68,6 +68,9 @@ module testbenchfp; logic NegSumE; logic ZSgnEffE; logic PSgnE; + logic DivSgn; + logic [`DIVLEN-1:0] Quot; + logic [`NE-1:0] DivExp; /////////////////////////////////////////////////////////////////////////////////////////////// @@ -205,16 +208,16 @@ module testbenchfp; Fmt = {Fmt, 2'b11}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the divide tests/op-ctrls/unit/fmt - // Tests = {Tests, f128div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b11}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the divide tests/op-ctrls/unit/fmt + Tests = {Tests, f128div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b11}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tested // // add the square-root tests/op-ctrls/unit/fmt // Tests = {Tests, f128sqrt}; @@ -332,16 +335,16 @@ module testbenchfp; Fmt = {Fmt, 2'b01}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f64div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b01}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f64div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b01}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if square-root is being tessted // // add the correct tests/op-ctrls/unit/fmt to their lists // Tests = {Tests, f64sqrt}; @@ -443,16 +446,16 @@ module testbenchfp; Fmt = {Fmt, 2'b00}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f32div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b00}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f32div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b00}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // // add the correct tests/op-ctrls/unit/fmt to their lists // Tests = {Tests, f32sqrt}; @@ -536,16 +539,16 @@ module testbenchfp; Fmt = {Fmt, 2'b10}; end end - // if (TEST === "div" | TEST === "all") begin // if division is being tested - // // add the correct tests/op-ctrls/unit/fmt to their lists - // Tests = {Tests, f16div}; - // OpCtrl = {OpCtrl, `DIV_OPCTRL}; - // WriteInt = {WriteInt, 1'b0}; - // for(int i = 0; i<5; i++) begin - // Unit = {Unit, `DIVUNIT}; - // Fmt = {Fmt, 2'b10}; - // end - // end + if (TEST === "div" | TEST === "all") begin // if division is being tested + // add the correct tests/op-ctrls/unit/fmt to their lists + Tests = {Tests, f16div}; + OpCtrl = {OpCtrl, `DIV_OPCTRL}; + WriteInt = {WriteInt, 1'b0}; + for(int i = 0; i<5; i++) begin + Unit = {Unit, `DIVUNIT}; + Fmt = {Fmt, 2'b10}; + end + end // if (TEST === "sqrt" | TEST === "all") begin // if sqrt is being tested // // add the correct tests/op-ctrls/unit/fmt to their lists // Tests = {Tests, f16sqrt}; @@ -611,7 +614,7 @@ module testbenchfp; readvectors readvectors (.clk, .Fmt(FmtVal), .ModFmt, .TestVector(TestVectors[VectorNum]), .VectorNum, .Ans(Ans), .AnsFlg(AnsFlg), .SrcA, .XSgnE(XSgn), .YSgnE(YSgn), .ZSgnE(ZSgn), .Unit (UnitVal), .XExpE(XExp), .YExpE(YExp), .ZExpE(ZExp), .TestNum, .OpCtrl(OpCtrlVal), - .XManE(XMan), .YManE(YMan), .ZManE(ZMan), + .XManE(XMan), .YManE(YMan), .ZManE(ZMan), .DivStart, .XNaNE(XNaN), .YNaNE(YNaN), .ZNaNE(ZNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .ZSNaNE(ZSNaN), .XDenormE(XDenorm), .ZDenormE(ZDenorm), @@ -639,8 +642,8 @@ module testbenchfp; .FOpCtrlE(OpCtrlVal), .FmtE(ModFmt), .SumE, .NegSumE, .InvZE, .FmaNormCntE, .ZSgnEffE, .PSgnE, .ProdExpE, .AddendStickyE, .KillProdE); - postprocess postprocess(.XSgnM(XSgn), .PostProcSelM(UnitVal[1:0]), - .ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), + postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]), + .ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE), .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE), .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE), @@ -650,21 +653,16 @@ module testbenchfp; .SumM(SumE), .NegSumM(NegSumE), .InvZM(InvZE), .FmaNormCntM(FmaNormCntE), .ZSgnEffM(ZSgnEffE), .PSgnM(PSgnE), .FmtM(ModFmt), .FrmM(FrmVal), .PostProcFlgM(Flg), .PostProcResM(FpRes), .FCvtIntResM(IntRes)); -fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), + fcvt fcvt (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .ForwardedSrcAE(SrcA), .FWriteIntE(WriteIntVal), .XZeroE(XZero), .XDenormE(XDenorm), .FOpCtrlE(OpCtrlVal), .IntZeroE, .FmtE(ModFmt), .CvtCalcExpE, .CvtShiftAmtE, .CvtResDenormUfE, .CvtResSgnE, .CvtLzcInE); fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - // fcvtint fcvtint (.XSgnE(XSgn), .XExpE(XExp), .XManE(XMan), .XZeroE(XZero), .XNaNE(XNaN), .XInfE(XInf), - // .XDenormE(XDenorm), .ForwardedSrcAE(SrcA), .FOpCtrlE, .FmtE(ModFmt), .FrmE(Frmal), - // .CvtRes, .CvtFlgE); - // *** integrade divide and squareroot - // fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmVal[1:0]), .op_type(FOpCtrlQ), - // .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), - // .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, - // .FDivBusyE, .done(FDivSqrtDoneE), .AS_Res(FDivRes), .Flg(FDivFlg)); - + srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivExp, + .XFrac(XMan[`NF-1:0]), .YFrac(YMan[`NF-1:0]), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), + .DivDone, .Quot, .Rem()); + assign CmpFlg[3:0] = 0; // produce clock @@ -817,7 +815,7 @@ end /////////////////////////////////////////////////////////////////////////////////////////////// // check if the non-fma test is correct - if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin + if(~((Res === Ans | NaNGood | NaNGood === 1'bx) & (ResFlg === AnsFlg | AnsFlg === 5'bx))&(DivDone&(UnitVal == `DIVUNIT))&(UnitVal !== `CVTINTUNIT)&(UnitVal !== `CMPUNIT)) begin errors += 1; $display("There is an error in %s", Tests[TestNum]); $display("inputs: %h %h %h\nSrcA: %h\n Res: %h %h\n Ans: %h %h", X, Y, Z, SrcA, Res, ResFlg, Ans, AnsFlg); @@ -840,8 +838,7 @@ end $stop; end - - VectorNum += 1; // increment the vector + if(DivDone|(UnitVal != `DIVUNIT)) VectorNum += 1; // increment the vector if (TestVectors[VectorNum][0] === 1'bx & Tests[TestNum] !== "") begin // if reached the end of file @@ -895,15 +892,17 @@ module readvectors ( output logic XDenormE, ZDenormE, // is XYZ denormalized output logic XZeroE, YZeroE, ZZeroE, // is XYZ zero output logic XInfE, YInfE, ZInfE, // is XYZ infinity - output logic XExpMaxE, + output logic XExpMaxE, + output logic DivStart, output logic [`FLEN-1:0] X, Y, Z ); // apply test vectors on rising edge of clk // Format of vectors Inputs(1/2/3)_AnsFlg - always @(posedge clk) begin + always @(TestNum) begin #1; AnsFlg = TestVector[4:0]; + DivStart = 1'b0; case (Unit) `FMAUNIT: case (Fmt) @@ -972,21 +971,29 @@ module readvectors ( X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; Ans = TestVector[8+(`Q_LEN-1):8]; + DivStart = 1'b1; #10 // one clk cycle + DivStart = 1'b0; end 2'b01: begin // double X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + DivStart = 1'b1; #10 + DivStart = 1'b0; end 2'b00: begin // single X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + DivStart = 1'b1; #10 + DivStart = 1'b0; end 2'b10: begin // half X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + DivStart = 1'b1; #10 + DivStart = 1'b0; end endcase `CMPUNIT: From d7a363aaa78823dfd45b9e82355de8b551fd3fb4 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 16:11:50 +0000 Subject: [PATCH 09/26] fixt lint error --- pipelined/src/fpu/fpu.sv | 38 ++++++++++++++++------------- pipelined/src/fpu/postprocess.sv | 5 ++-- pipelined/src/fpu/round.sv | 3 ++- pipelined/srt/srt-radix4.sv | 9 +++---- pipelined/testbench/testbench-fp.sv | 6 ++--- 5 files changed, 33 insertions(+), 28 deletions(-) diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index b8a2e1918..da46d73e5 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -104,23 +104,27 @@ module fpu ( logic FOpCtrlQ; // Fma Signals - logic [3*`NF+5:0] SumE, SumM; - logic [`NE+1:0] ProdExpE, ProdExpM; - logic AddendStickyE, AddendStickyM; - logic KillProdE, KillProdM; - logic InvZE, InvZM; - logic NegSumE, NegSumM; - logic ZSgnEffE, ZSgnEffM; - logic PSgnE, PSgnM; - logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; + logic [3*`NF+5:0] SumE, SumM; + logic [`NE+1:0] ProdExpE, ProdExpM; + logic AddendStickyE, AddendStickyM; + logic KillProdE, KillProdM; + logic InvZE, InvZM; + logic NegSumE, NegSumM; + logic ZSgnEffE, ZSgnEffM; + logic PSgnE, PSgnM; + logic [$clog2(3*`NF+7)-1:0] FmaNormCntE, FmaNormCntM; // Cvt Signals - logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent - logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by - logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized - logic CvtResSgnE, CvtResSgnM; // the result's sign - logic IntZeroE, IntZeroM; // is the integer zero? - logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + logic [`NE:0] CvtCalcExpE, CvtCalcExpM; // the calculated expoent + logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by + logic CvtResDenormUfE, CvtResDenormUfM;// does the result underflow or is denormalized + logic CvtResSgnE, CvtResSgnM; // the result's sign + logic IntZeroE, IntZeroM; // is the integer zero? + logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + + //divide signals + logic [`DIVLEN-1:0] Quot; + logic [`NE:0] DivCalcExpM; // result and flag signals logic [63:0] FDivResM, FDivResW; // divide/squareroot result @@ -352,8 +356,8 @@ module fpu ( assign FpLoadM = FResSelM[1]; postprocess postprocess(.XSgnM, .YSgnM, .ZExpM, .XManM, .YManM, .ZManM, .FrmM, .FmtM, .ProdExpM, - .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, - .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, + .AddendStickyM, .KillProdM, .XZeroM, .YZeroM, .ZZeroM, .XInfM, .YInfM, .Quot, + .ZInfM, .XNaNM, .YNaNM, .ZNaNM, .XSNaNM, .YSNaNM, .ZSNaNM, .SumM, .DivCalcExpM, .NegSumM, .InvZM, .ZDenormM, .ZSgnEffM, .PSgnM, .FOpCtrlM, .FmaNormCntM, .CvtCalcExpM, .CvtResDenormUfM,.CvtShiftAmtM, .CvtResSgnM, .FWriteIntM, .CvtLzcInM, .IntZeroM, .PostProcSelM, .PostProcResM, .PostProcFlgM, .FCvtIntResM); diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index c53920554..4b2870da4 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -51,6 +51,7 @@ module postprocess( input logic [2:0] FOpCtrlM, // choose which opperation (look below for values) input logic [$clog2(3*`NF+7)-1:0] FmaNormCntM, // the normalization shift count input logic [`NE:0] CvtCalcExpM, // the calculated expoent + input logic [`NE:0] DivCalcExpM, // the calculated expoent input logic CvtResDenormUfM, input logic [`LOGCVTLEN-1:0] CvtShiftAmtM, // how much to shift by input logic CvtResSgnM, // the result's sign @@ -148,7 +149,7 @@ module postprocess( ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; end 2'b01: begin //div ***prob can take out - ShiftAmt = 1'b0;//{DivShiftAmt}; + ShiftAmt = {$clog2(`NORMSHIFTSZ){1'b0}};//{DivShiftAmt}; ShiftIn = {Quot, {`NORMSHIFTSZ-`DIVLEN{1'b0}}}; end default: begin @@ -172,7 +173,7 @@ module postprocess( // round to infinity // round to nearest max magnitude - round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, + round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .DivCalcExpM, .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 92f1d4c27..8e3b9fe4a 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -23,6 +23,7 @@ module round( input logic [`NE+1:0] SumExp, // exponent of the normalized sum input logic RoundSgn, // the result's sign input logic [`NE:0] CvtCalcExpM, // the calculated expoent + input logic [`NE:0] DivCalcExpM, // the calculated expoent output logic UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow output logic [`NF-1:0] ResFrac, // Result fraction @@ -303,7 +304,7 @@ module round( case(PostProcSelM) 2'b10: RoundExp = SumExp; // fma 2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt - 2'b01: RoundExp = 0; // divide + 2'b01: RoundExp = {DivCalcExpM[`NE], DivCalcExpM[`NE:0]}; // divide default: RoundExp = 0; endcase diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index 671c63500..6894a0f9c 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -43,13 +43,12 @@ module srtradix4 ( output logic DivDone, output logic [`DIVLEN-1:0] Quot, output logic [`XLEN-1:0] Rem, // *** later handle integers - output logic [`NE-1:0] DivExp + output logic [`NE:0] DivCalcExpE ); // logic qp, qz, qm; // quotient is +1, 0, or -1 logic [3:0] q; - logic [`NE-1:0] DivCalcExp; - logic calcSign; + logic [`NE:0] DivCalcExp; logic [`DIVLEN-1:0] X, Dpreproc; logic [`DIVLEN+3:0] WS, WSA, WSN; logic [`DIVLEN+3:0] WC, WCA, WCN; @@ -87,7 +86,7 @@ module srtradix4 ( qsel4 qsel4(.D, .WS, .WC, .q); // Store the expoenent and sign until division is DivDone - flopen #(`NE) expflop(clk, DivStart, DivCalcExp, DivExp); + flopen #(`NE+1) expflop(clk, DivStart, DivCalcExp, DivCalcExpE); // Divisor Selection logic // *** radix 4 change to choose -2 to 2 @@ -303,7 +302,7 @@ endmodule ////////////// module expcalc( input logic [`NE-1:0] XExpE, YExpE, - output logic [`NE-1:0] DivCalcExp + output logic [`NE:0] DivCalcExp ); assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS); diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 748670b46..70787b3cb 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -70,7 +70,7 @@ module testbenchfp; logic PSgnE; logic DivSgn; logic [`DIVLEN-1:0] Quot; - logic [`NE-1:0] DivExp; + logic [`NE:0] DivCalcExp; /////////////////////////////////////////////////////////////////////////////////////////////// @@ -643,7 +643,7 @@ module testbenchfp; .ProdExpE, .AddendStickyE, .KillProdE); postprocess postprocess(.XSgnM(XSgn), .YSgnM(YSgn), .PostProcSelM(UnitVal[1:0]), - .ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, + .ZExpM(ZExp), .ZDenormM(ZDenorm), .FOpCtrlM(OpCtrlVal), .Quot, .DivCalcExpM(DivCalcExp), .XManM(XMan), .YManM(YMan), .ZManM(ZMan), .CvtCalcExpM(CvtCalcExpE), .XNaNM(XNaN), .YNaNM(YNaN), .ZNaNM(ZNaN), .CvtResDenormUfM(CvtResDenormUfE), .XZeroM(XZero), .YZeroM(YZero), .ZZeroM(ZZero), .CvtShiftAmtM(CvtShiftAmtE), @@ -659,7 +659,7 @@ module testbenchfp; fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivExp, + srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XFrac(XMan[`NF-1:0]), .YFrac(YMan[`NF-1:0]), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .DivDone, .Quot, .Rem()); From fe1b7a67cbbd9fa8c906b9050424074a2e5f6333 Mon Sep 17 00:00:00 2001 From: James Stine Date: Thu, 23 Jun 2022 11:46:44 -0500 Subject: [PATCH 10/26] Add sqrt qlsc table generator --- pipelined/srt/Makefile | 8 +- pipelined/srt/qslc_sqrt_r4a2 | Bin 0 -> 16152 bytes pipelined/srt/qslc_sqrt_r4a2.c | 198 ++++++ pipelined/srt/qslc_sqrt_r4a2.sv | 1026 +++++++++++++++++++++++++++++++ 4 files changed, 1230 insertions(+), 2 deletions(-) create mode 100755 pipelined/srt/qslc_sqrt_r4a2 create mode 100644 pipelined/srt/qslc_sqrt_r4a2.c create mode 100644 pipelined/srt/qslc_sqrt_r4a2.sv diff --git a/pipelined/srt/Makefile b/pipelined/srt/Makefile index 63146339c..49b21be7a 100644 --- a/pipelined/srt/Makefile +++ b/pipelined/srt/Makefile @@ -1,4 +1,4 @@ -all: exptestgen testgen qslc_r4a2 qslc_r4a2b +all: exptestgen testgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 sqrttestgen: sqrttestgen.c gcc sqrttestgen.c -o sqrttestgen -lm @@ -19,5 +19,9 @@ qslc_r4a2b: qslc_r4a2b.c gcc qslc_r4a2b.c -o qslc_r4a2b -lm ./qslc_r4a2b > qslc_r4a2b.tv +qslc_sqrt_r4a2: qslc_sqrt_r4a2.c + gcc qslc_sqrt_r4a2.c -o qslc_sqrt_r4a2 -lm + ./qslc_sqrt_r4a2 > qslc_sqrt_r4a2.sv + clean: - rm -f testgen exptestgen qslc_r4a2 + rm -f testgen exptestgen qslc_r4a2 qslc_r4a2b qslc_sqrt_r4a2 diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2 new file mode 100755 index 0000000000000000000000000000000000000000..047de1ba3bf6e4421c0a8c69adbcce1eb2e9b293 GIT binary patch literal 16152 zcmeHOdvsLA8J`V=BnWIkP(X$CfdY~xfrKIw36F)12b4lM;`3&c-6VTVvg_`Khf0V> zl$;n#ZL6{NwAACX=&4n!^yraAlt)inqqR1D8WlYotr7xiM0ET6X6D;WZWG(n_OEv5 zoO{2S@B96}nfdPAduMj%ZuP8Op5br^PQ%0{f=cmRr76*d_21ALh!QbhjK+DYm?B1? zJY4FOy+jFERqD(bD6_SG7$or~Qf3f&rcw)*+(MGX8!BaIDGN)X`$4=+GU>8izCBX$ zSh77;pG(<=O+H8fB$g(RVzVPpn#GRdfwD=*WxJNlqayKkYTi!GV<{M~CC8IuLdSd^ zPrJ;AjS@>cuSWCia+#tDmQ)9&weQ9J|4x0YHLu+z-3Yr}sMLZb*Y{2EC@%kM;>)~7 z*EiI42 z;-u4I6d6sb%>KwWUfT&@R#aN{!kY(16mP!!(^-?3JpY`3-+aPQyNN>>O5~3(Ih84& zgM%+qrZ^k+fu!eLLdpm5oh zC|`=>oI&)$DlJ=O`2&$abubzWM9Nkztqs)&%6t{I0byCyb)kAI8uLYBmZhMB0>hz= z09S7ESyjP$Uv2P4;$kFJYHpn`STDj2u_&D?YkUzAjs)vtRRWkg8XWT|SE+IZyCK=)}0weMBhwuBHW0k&LKVMj1uJzd^%FcQ&cP>(T8hhrr>@xIx z4-hh8==GynI_NR(F}D!MtK811x1kZ}U|aDd zaJ_Ar<49c87n^{tzX=d^Jtx`aoZ3XVeOdzFO~$sOcSy|m%G=W8-TUz(@7}&)UdMj# zYhT62!@)z^L3XmM$~hJNVm`&EshG^7;lg#^=Hgo^>uq^2Hp<&td^-w>?yr)`gdf4# zpScBP$F<-N;CF9?ksPf94;L%H*E!acXxVbOrJ<|Wnd?a$b2g5CdW;ZN&a%B^>umLO zeb!m!JQDA9&WxjZ4@=|pgH>Ka83i+G{uaEs^ohhz5S zqmU>EuGw?k(ds#Vv8Su?gYL(nC9j}(5*ude3Ot8V!?FQ2EC&-UTb1Z_u9q!JnC{Er za6t3229R^OdEORJAN8+K)i+AD>X%<8lOImnQ$Q_z5qTMbUgtH0MOcgmw;$SPL@n3( zZ00%mRi(MBZ+qq>WSq}sj)SP0mUXhKB{K_IjEa+^=;|Cu#NhdW=Oii--K|)39PsqX z{H4eP_Mw99CS9TiSU7(LYu4tjEa$U`&yhojJcYaQ;EROWL$`p6{#3);x;)85pQNX4d|}Nm=`fRPD`uS#R5%i}D+GarCm(V9n=tzRz1}J>oXBv)v0mJpm zN2%fC4B4`)Oz3`smMG{H1FAYvXhQEN=mZ5lHnYFKpURevF`;3Cet=ac`g_oT9+uF= zNFy{Jg6;tnop{4E%p$|X%ExVnkEyZ~-!q{Og5IQ{R~k^&iI53>^8}!)71V7&kIR;o zn$TSYou;6}3}}~xo@+vz2zmk|8J%#NhH5gr4FdwPXGVX_R3|#Hj!zAzg#>+8LBDH2 zRVRL8LdO!cRY8LWRCVGu6Z-z=fR-!hB?k1bvJ=;t(B}zS1SmRjscCqC3}-4IC#Lsz z;#_$pPBEby33?Q-7xed41A2;tzK7p4QvF>?(ESSfpaE5#*keM^Cg}YNy4isKS$5(+ z6Z&Z%p!Aa^ICMgwur967+rbi2VXj$y6ZLlHu1d zAP~C@9~a7&y+ala$n8sPE` z-0Kpz+QdCUxS3$n`yy4#y`&uvt=rVz0xfh?lrFIbMO5(Wv{ZbWyS6sR9XLDU9esEW zBG)$l74(Z@@Cb19s?F)Z%l7*yG<*6S&0G4mI`4WM*BkH8ZKUkVk}k54X?Y3CuvGTt zbZpNINYY7=PG&jpx*jC7=I%_{?!?QVB$M)mb!eZQQ_4AQY8FdvNT+lJ(h*2UARPfS z0`xzUnQ|(9(ZKYZmR()6VD7cEuBeMvM7PWkIXMOS7gUG^t{Ys%u0rGr^7HdWKF$Ie zIu*!VfmpwOy|`H91nT|dNQfUDOD6rtlgSx)K7WkkX}mQy;eA(($9D!MJUVSP+kkwD zd-5HR;FXE*~@TQs*ffvzx@C$qQ!A zJ?|V+6aN|<^MR8@QX-K)h9e*P5=c%|*PS>PqpkB$zwnI7y**>;=n=#69B7hk%i~aA zfj%6^#G>E+VHVlbDII}y1kw>mM<5-6bOh27NJk(Yfpi4^cO$_2@^~K}Eg2~B0*T8l zN-gkLjY@hkGFlhkn5;6BbeZ@6ouSLTvd}QW`v7UZLW%94e3cAQ`APgRP6ncjLG@^9665>%^G3yh$ zVchPx#^GmsnYjKby39EIry~1@xJniNoRj5t?GNiyS>|?O87Iq)+KwfxEOS4!`~R9q z?fbTQ%-7euM9bA$mTPH)|I=c2iGr+Hx^#hS`nrmS`dEXjpuj!Hou5~5QG+ZO%+Q*z z@kZlwxHy`$L+Cmy&}rtv$B>5RddOy-yWIndah5dcJS8T9hK= z5R*l2iPG@>L%XXT;1Vvyc{Y3K zmxKA)FY%`f`#5-2(oRk7pJy-cNc_aq{lxk{WX2*6<4crA-@f7`ii7P3&qbgH)8|3& zQZ5FZTcsXPo00))Q96X5tDw&nlTz1(S-v6gVbW)e75rRd{DvX$E!w}mJ@-KGVzMD0 zf_`kqB*FIqo1P*)MM9G}ZhN86MLZ{#FjC2HK|cN%Sr*_)?9D&}&P7RrZ2iG}9nX~6XKQ04r58M4E)>cMh1?~{` zz56PHR?Jr|poqpAs;bJUjDE9=77EuNMWU?fZrEGC83S|>WhSog8qu2 zUoR^rxccq@tr++LgXNEgtQudvpFVCVy&Nciu-tpy;cKK|h5U_!-2&rqSgP!X4dQ7xPs>#v)4Aa1P%@1R`PKt`Efm?&|sm zcQ_IX2O_b}hN_|=h_7pceyv)(q%@C4n}pZ+qBX+p-&~I>l#E4`WkVnm4TkCm6fD>x zfm$CCbS_*Q6K>f|H?r>P5Q>zE1}cR+7TAO$4L_hm@~XH4HG06+_~DI9%7+?xiotnQ z<*N(AnW7`y!i^zchmp*;*ncExo`zc*?;@4#-^uYjO`9c_^CZv3L31PK5A{75VJY!^ zNR@<`hm6U$e-}o-mz#;7i>#yfuF2>3K1&w^wY2lEfF6Gpgt+}Y|76Ma^Ze7^ejoTW z_hdfLPg(ZpdZ~>RXXf)fF$f(^RhZB7U6wq*1w_7FM(eE2xhiEslt(+(M0~{vbrFks5|IE?mR5g#-_+brT$@ro8PuloRn!vJziCVHh zmcOv^@^_qeJeOG>1;=Udd41cS zN8b9!t19)Eng1mWG`C_i-+wu16N4uY>ehp{0g8ApCG%7<_N~Ywoqb%w? yUpwUIFn#|?Q(tZmKYwVw$Zc0lb +#include + +#define DIVISOR_SIZE 3 +#define CARRY_SIZE 7 +#define SUM_SIZE 7 +#define TOT_SIZE 7 + +void disp_binary(double, int, int); + +struct bits { + unsigned int divisor : DIVISOR_SIZE; + int tot : TOT_SIZE; +} pla; + +/* + + Function: disp_binary + Description: This function displays a Double-Precision number into + four 16 bit integers using the global union variable + dp_number + Argument List: double x The value to be converted + int bits_to_left Number of bits left of radix point + int bits_to_right Number of bits right of radix point + Return value: none + +*/ +void disp_binary(double x, int bits_to_left, int bits_to_right) { + int i; + double diff; + + if (fabs(x) < pow(2.0, ((double) -bits_to_right)) ) { + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + printf("0"); + } + if (i == bits_to_right+1) + ; + + return; + } + + if (x < 0.0) + x = pow(2.0, ((double) bits_to_left)) + x; + + for (i = -bits_to_left + 1; i <= bits_to_right; i++) { + diff = pow(2.0, ((double) -i) ); + if (x < diff) + printf("0"); + else { + printf("1"); + x -= diff; + } + if (i == 0) + ; + + } + +} + +int main() { + int m; + int n; + int o; + pla.divisor = 0; + pla.tot = 0; + printf("\tcase({D[5:3],Wmsbs})\n"); + for (o=0; o < pow(2.0, DIVISOR_SIZE); o++) { + for (m=0; m < pow(2.0, TOT_SIZE); m++) { + printf("\t\t10'b"); + disp_binary((double) pla.divisor, DIVISOR_SIZE, 0); + printf("_"); + disp_binary((double) pla.tot, TOT_SIZE, 0); + printf(": q = 4'b"); + + /* + 4 bits for Radix 4 (a=2) + 1000 = +2 + 0100 = +1 + 0000 = 0 + 0010 = -1 + 0001 = -2 + */ + switch (pla.divisor) { + case 0: + if ((pla.tot) >= 24) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -8) + printf("0000"); + else if ((pla.tot) >= -26) + printf("0010"); + else + printf("0001"); + break; + case 1: + if ((pla.tot) >= 28) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -10) + printf("0000"); + else if ((pla.tot) >= -28) + printf("0010"); + else + printf("0001"); + break; + case 2: + if ((pla.tot) >= 32) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -32) + printf("0010"); + else + printf("0001"); + break; + case 3: + if ((pla.tot) >= 32) + printf("1000"); + else if ((pla.tot) >= 8) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -34) + printf("0010"); + else + printf("0001"); + break; + case 4: + if ((pla.tot) >= 36) + printf("1000"); + else if ((pla.tot) >= 12) + printf("0100"); + else if ((pla.tot) >= -12) + printf("0000"); + else if ((pla.tot) >= -36) + printf("0010"); + else + printf("0001"); + break; + case 5: + if ((pla.tot) >= 40) + printf("1000"); + else if ((pla.tot) >= 12) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -40) + printf("0010"); + else + printf("0001"); + break; + case 6: + if ((pla.tot) >= 40) + printf("1000"); + else if ((pla.tot) >= 16) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -44) + printf("0010"); + else + printf("0001"); + break; + case 7: + if ((pla.tot) >= 44) + printf("1000"); + else if ((pla.tot) >= 16) + printf("0100"); + else if ((pla.tot) >= -16) + printf("0000"); + else if ((pla.tot) >= -46) + printf("0010"); + else + printf("0001"); + break; + default: printf ("XXX"); + + } + + printf(";\n"); + (pla.tot)++; + } + (pla.divisor)++; + } + printf("\tendcase\n"); + +} diff --git a/pipelined/srt/qslc_sqrt_r4a2.sv b/pipelined/srt/qslc_sqrt_r4a2.sv new file mode 100644 index 000000000..be4e3e392 --- /dev/null +++ b/pipelined/srt/qslc_sqrt_r4a2.sv @@ -0,0 +1,1026 @@ + case({D[5:3],Wmsbs}) + 10'b000_0000000: q = 4'b0000; + 10'b000_0000001: q = 4'b0000; + 10'b000_0000010: q = 4'b0000; + 10'b000_0000011: q = 4'b0000; + 10'b000_0000100: q = 4'b0000; + 10'b000_0000101: q = 4'b0000; + 10'b000_0000110: q = 4'b0000; + 10'b000_0000111: q = 4'b0000; + 10'b000_0001000: q = 4'b0100; + 10'b000_0001001: q = 4'b0100; + 10'b000_0001010: q = 4'b0100; + 10'b000_0001011: q = 4'b0100; + 10'b000_0001100: q = 4'b0100; + 10'b000_0001101: q = 4'b0100; + 10'b000_0001110: q = 4'b0100; + 10'b000_0001111: q = 4'b0100; + 10'b000_0010000: q = 4'b0100; + 10'b000_0010001: q = 4'b0100; + 10'b000_0010010: q = 4'b0100; + 10'b000_0010011: q = 4'b0100; + 10'b000_0010100: q = 4'b0100; + 10'b000_0010101: q = 4'b0100; + 10'b000_0010110: q = 4'b0100; + 10'b000_0010111: q = 4'b0100; + 10'b000_0011000: q = 4'b1000; + 10'b000_0011001: q = 4'b1000; + 10'b000_0011010: q = 4'b1000; + 10'b000_0011011: q = 4'b1000; + 10'b000_0011100: q = 4'b1000; + 10'b000_0011101: q = 4'b1000; + 10'b000_0011110: q = 4'b1000; + 10'b000_0011111: q = 4'b1000; + 10'b000_0100000: q = 4'b1000; + 10'b000_0100001: q = 4'b1000; + 10'b000_0100010: q = 4'b1000; + 10'b000_0100011: q = 4'b1000; + 10'b000_0100100: q = 4'b1000; + 10'b000_0100101: q = 4'b1000; + 10'b000_0100110: q = 4'b1000; + 10'b000_0100111: q = 4'b1000; + 10'b000_0101000: q = 4'b1000; + 10'b000_0101001: q = 4'b1000; + 10'b000_0101010: q = 4'b1000; + 10'b000_0101011: q = 4'b1000; + 10'b000_0101100: q = 4'b1000; + 10'b000_0101101: q = 4'b1000; + 10'b000_0101110: q = 4'b1000; + 10'b000_0101111: q = 4'b1000; + 10'b000_0110000: q = 4'b1000; + 10'b000_0110001: q = 4'b1000; + 10'b000_0110010: q = 4'b1000; + 10'b000_0110011: q = 4'b1000; + 10'b000_0110100: q = 4'b1000; + 10'b000_0110101: q = 4'b1000; + 10'b000_0110110: q = 4'b1000; + 10'b000_0110111: q = 4'b1000; + 10'b000_0111000: q = 4'b1000; + 10'b000_0111001: q = 4'b1000; + 10'b000_0111010: q = 4'b1000; + 10'b000_0111011: q = 4'b1000; + 10'b000_0111100: q = 4'b1000; + 10'b000_0111101: q = 4'b1000; + 10'b000_0111110: q = 4'b1000; + 10'b000_0111111: q = 4'b1000; + 10'b000_1000000: q = 4'b0001; + 10'b000_1000001: q = 4'b0001; + 10'b000_1000010: q = 4'b0001; + 10'b000_1000011: q = 4'b0001; + 10'b000_1000100: q = 4'b0001; + 10'b000_1000101: q = 4'b0001; + 10'b000_1000110: q = 4'b0001; + 10'b000_1000111: q = 4'b0001; + 10'b000_1001000: q = 4'b0001; + 10'b000_1001001: q = 4'b0001; + 10'b000_1001010: q = 4'b0001; + 10'b000_1001011: q = 4'b0001; + 10'b000_1001100: q = 4'b0001; + 10'b000_1001101: q = 4'b0001; + 10'b000_1001110: q = 4'b0001; + 10'b000_1001111: q = 4'b0001; + 10'b000_1010000: q = 4'b0001; + 10'b000_1010001: q = 4'b0001; + 10'b000_1010010: q = 4'b0001; + 10'b000_1010011: q = 4'b0001; + 10'b000_1010100: q = 4'b0001; + 10'b000_1010101: q = 4'b0001; + 10'b000_1010110: q = 4'b0001; + 10'b000_1010111: q = 4'b0001; + 10'b000_1011000: q = 4'b0001; + 10'b000_1011001: q = 4'b0001; + 10'b000_1011010: q = 4'b0001; + 10'b000_1011011: q = 4'b0001; + 10'b000_1011100: q = 4'b0001; + 10'b000_1011101: q = 4'b0001; + 10'b000_1011110: q = 4'b0001; + 10'b000_1011111: q = 4'b0001; + 10'b000_1100000: q = 4'b0001; + 10'b000_1100001: q = 4'b0001; + 10'b000_1100010: q = 4'b0001; + 10'b000_1100011: q = 4'b0001; + 10'b000_1100100: q = 4'b0001; + 10'b000_1100101: q = 4'b0001; + 10'b000_1100110: q = 4'b0010; + 10'b000_1100111: q = 4'b0010; + 10'b000_1101000: q = 4'b0010; + 10'b000_1101001: q = 4'b0010; + 10'b000_1101010: q = 4'b0010; + 10'b000_1101011: q = 4'b0010; + 10'b000_1101100: q = 4'b0010; + 10'b000_1101101: q = 4'b0010; + 10'b000_1101110: q = 4'b0010; + 10'b000_1101111: q = 4'b0010; + 10'b000_1110000: q = 4'b0010; + 10'b000_1110001: q = 4'b0010; + 10'b000_1110010: q = 4'b0010; + 10'b000_1110011: q = 4'b0010; + 10'b000_1110100: q = 4'b0010; + 10'b000_1110101: q = 4'b0010; + 10'b000_1110110: q = 4'b0010; + 10'b000_1110111: q = 4'b0010; + 10'b000_1111000: q = 4'b0000; + 10'b000_1111001: q = 4'b0000; + 10'b000_1111010: q = 4'b0000; + 10'b000_1111011: q = 4'b0000; + 10'b000_1111100: q = 4'b0000; + 10'b000_1111101: q = 4'b0000; + 10'b000_1111110: q = 4'b0000; + 10'b000_1111111: q = 4'b0000; + 10'b001_0000000: q = 4'b0000; + 10'b001_0000001: q = 4'b0000; + 10'b001_0000010: q = 4'b0000; + 10'b001_0000011: q = 4'b0000; + 10'b001_0000100: q = 4'b0000; + 10'b001_0000101: q = 4'b0000; + 10'b001_0000110: q = 4'b0000; + 10'b001_0000111: q = 4'b0000; + 10'b001_0001000: q = 4'b0100; + 10'b001_0001001: q = 4'b0100; + 10'b001_0001010: q = 4'b0100; + 10'b001_0001011: q = 4'b0100; + 10'b001_0001100: q = 4'b0100; + 10'b001_0001101: q = 4'b0100; + 10'b001_0001110: q = 4'b0100; + 10'b001_0001111: q = 4'b0100; + 10'b001_0010000: q = 4'b0100; + 10'b001_0010001: q = 4'b0100; + 10'b001_0010010: q = 4'b0100; + 10'b001_0010011: q = 4'b0100; + 10'b001_0010100: q = 4'b0100; + 10'b001_0010101: q = 4'b0100; + 10'b001_0010110: q = 4'b0100; + 10'b001_0010111: q = 4'b0100; + 10'b001_0011000: q = 4'b0100; + 10'b001_0011001: q = 4'b0100; + 10'b001_0011010: q = 4'b0100; + 10'b001_0011011: q = 4'b0100; + 10'b001_0011100: q = 4'b1000; + 10'b001_0011101: q = 4'b1000; + 10'b001_0011110: q = 4'b1000; + 10'b001_0011111: q = 4'b1000; + 10'b001_0100000: q = 4'b1000; + 10'b001_0100001: q = 4'b1000; + 10'b001_0100010: q = 4'b1000; + 10'b001_0100011: q = 4'b1000; + 10'b001_0100100: q = 4'b1000; + 10'b001_0100101: q = 4'b1000; + 10'b001_0100110: q = 4'b1000; + 10'b001_0100111: q = 4'b1000; + 10'b001_0101000: q = 4'b1000; + 10'b001_0101001: q = 4'b1000; + 10'b001_0101010: q = 4'b1000; + 10'b001_0101011: q = 4'b1000; + 10'b001_0101100: q = 4'b1000; + 10'b001_0101101: q = 4'b1000; + 10'b001_0101110: q = 4'b1000; + 10'b001_0101111: q = 4'b1000; + 10'b001_0110000: q = 4'b1000; + 10'b001_0110001: q = 4'b1000; + 10'b001_0110010: q = 4'b1000; + 10'b001_0110011: q = 4'b1000; + 10'b001_0110100: q = 4'b1000; + 10'b001_0110101: q = 4'b1000; + 10'b001_0110110: q = 4'b1000; + 10'b001_0110111: q = 4'b1000; + 10'b001_0111000: q = 4'b1000; + 10'b001_0111001: q = 4'b1000; + 10'b001_0111010: q = 4'b1000; + 10'b001_0111011: q = 4'b1000; + 10'b001_0111100: q = 4'b1000; + 10'b001_0111101: q = 4'b1000; + 10'b001_0111110: q = 4'b1000; + 10'b001_0111111: q = 4'b1000; + 10'b001_1000000: q = 4'b0001; + 10'b001_1000001: q = 4'b0001; + 10'b001_1000010: q = 4'b0001; + 10'b001_1000011: q = 4'b0001; + 10'b001_1000100: q = 4'b0001; + 10'b001_1000101: q = 4'b0001; + 10'b001_1000110: q = 4'b0001; + 10'b001_1000111: q = 4'b0001; + 10'b001_1001000: q = 4'b0001; + 10'b001_1001001: q = 4'b0001; + 10'b001_1001010: q = 4'b0001; + 10'b001_1001011: q = 4'b0001; + 10'b001_1001100: q = 4'b0001; + 10'b001_1001101: q = 4'b0001; + 10'b001_1001110: q = 4'b0001; + 10'b001_1001111: q = 4'b0001; + 10'b001_1010000: q = 4'b0001; + 10'b001_1010001: q = 4'b0001; + 10'b001_1010010: q = 4'b0001; + 10'b001_1010011: q = 4'b0001; + 10'b001_1010100: q = 4'b0001; + 10'b001_1010101: q = 4'b0001; + 10'b001_1010110: q = 4'b0001; + 10'b001_1010111: q = 4'b0001; + 10'b001_1011000: q = 4'b0001; + 10'b001_1011001: q = 4'b0001; + 10'b001_1011010: q = 4'b0001; + 10'b001_1011011: q = 4'b0001; + 10'b001_1011100: q = 4'b0001; + 10'b001_1011101: q = 4'b0001; + 10'b001_1011110: q = 4'b0001; + 10'b001_1011111: q = 4'b0001; + 10'b001_1100000: q = 4'b0001; + 10'b001_1100001: q = 4'b0001; + 10'b001_1100010: q = 4'b0001; + 10'b001_1100011: q = 4'b0001; + 10'b001_1100100: q = 4'b0010; + 10'b001_1100101: q = 4'b0010; + 10'b001_1100110: q = 4'b0010; + 10'b001_1100111: q = 4'b0010; + 10'b001_1101000: q = 4'b0010; + 10'b001_1101001: q = 4'b0010; + 10'b001_1101010: q = 4'b0010; + 10'b001_1101011: q = 4'b0010; + 10'b001_1101100: q = 4'b0010; + 10'b001_1101101: q = 4'b0010; + 10'b001_1101110: q = 4'b0010; + 10'b001_1101111: q = 4'b0010; + 10'b001_1110000: q = 4'b0010; + 10'b001_1110001: q = 4'b0010; + 10'b001_1110010: q = 4'b0010; + 10'b001_1110011: q = 4'b0010; + 10'b001_1110100: q = 4'b0010; + 10'b001_1110101: q = 4'b0010; + 10'b001_1110110: q = 4'b0000; + 10'b001_1110111: q = 4'b0000; + 10'b001_1111000: q = 4'b0000; + 10'b001_1111001: q = 4'b0000; + 10'b001_1111010: q = 4'b0000; + 10'b001_1111011: q = 4'b0000; + 10'b001_1111100: q = 4'b0000; + 10'b001_1111101: q = 4'b0000; + 10'b001_1111110: q = 4'b0000; + 10'b001_1111111: q = 4'b0000; + 10'b010_0000000: q = 4'b0000; + 10'b010_0000001: q = 4'b0000; + 10'b010_0000010: q = 4'b0000; + 10'b010_0000011: q = 4'b0000; + 10'b010_0000100: q = 4'b0000; + 10'b010_0000101: q = 4'b0000; + 10'b010_0000110: q = 4'b0000; + 10'b010_0000111: q = 4'b0000; + 10'b010_0001000: q = 4'b0100; + 10'b010_0001001: q = 4'b0100; + 10'b010_0001010: q = 4'b0100; + 10'b010_0001011: q = 4'b0100; + 10'b010_0001100: q = 4'b0100; + 10'b010_0001101: q = 4'b0100; + 10'b010_0001110: q = 4'b0100; + 10'b010_0001111: q = 4'b0100; + 10'b010_0010000: q = 4'b0100; + 10'b010_0010001: q = 4'b0100; + 10'b010_0010010: q = 4'b0100; + 10'b010_0010011: q = 4'b0100; + 10'b010_0010100: q = 4'b0100; + 10'b010_0010101: q = 4'b0100; + 10'b010_0010110: q = 4'b0100; + 10'b010_0010111: q = 4'b0100; + 10'b010_0011000: q = 4'b0100; + 10'b010_0011001: q = 4'b0100; + 10'b010_0011010: q = 4'b0100; + 10'b010_0011011: q = 4'b0100; + 10'b010_0011100: q = 4'b0100; + 10'b010_0011101: q = 4'b0100; + 10'b010_0011110: q = 4'b0100; + 10'b010_0011111: q = 4'b0100; + 10'b010_0100000: q = 4'b1000; + 10'b010_0100001: q = 4'b1000; + 10'b010_0100010: q = 4'b1000; + 10'b010_0100011: q = 4'b1000; + 10'b010_0100100: q = 4'b1000; + 10'b010_0100101: q = 4'b1000; + 10'b010_0100110: q = 4'b1000; + 10'b010_0100111: q = 4'b1000; + 10'b010_0101000: q = 4'b1000; + 10'b010_0101001: q = 4'b1000; + 10'b010_0101010: q = 4'b1000; + 10'b010_0101011: q = 4'b1000; + 10'b010_0101100: q = 4'b1000; + 10'b010_0101101: q = 4'b1000; + 10'b010_0101110: q = 4'b1000; + 10'b010_0101111: q = 4'b1000; + 10'b010_0110000: q = 4'b1000; + 10'b010_0110001: q = 4'b1000; + 10'b010_0110010: q = 4'b1000; + 10'b010_0110011: q = 4'b1000; + 10'b010_0110100: q = 4'b1000; + 10'b010_0110101: q = 4'b1000; + 10'b010_0110110: q = 4'b1000; + 10'b010_0110111: q = 4'b1000; + 10'b010_0111000: q = 4'b1000; + 10'b010_0111001: q = 4'b1000; + 10'b010_0111010: q = 4'b1000; + 10'b010_0111011: q = 4'b1000; + 10'b010_0111100: q = 4'b1000; + 10'b010_0111101: q = 4'b1000; + 10'b010_0111110: q = 4'b1000; + 10'b010_0111111: q = 4'b1000; + 10'b010_1000000: q = 4'b0001; + 10'b010_1000001: q = 4'b0001; + 10'b010_1000010: q = 4'b0001; + 10'b010_1000011: q = 4'b0001; + 10'b010_1000100: q = 4'b0001; + 10'b010_1000101: q = 4'b0001; + 10'b010_1000110: q = 4'b0001; + 10'b010_1000111: q = 4'b0001; + 10'b010_1001000: q = 4'b0001; + 10'b010_1001001: q = 4'b0001; + 10'b010_1001010: q = 4'b0001; + 10'b010_1001011: q = 4'b0001; + 10'b010_1001100: q = 4'b0001; + 10'b010_1001101: q = 4'b0001; + 10'b010_1001110: q = 4'b0001; + 10'b010_1001111: q = 4'b0001; + 10'b010_1010000: q = 4'b0001; + 10'b010_1010001: q = 4'b0001; + 10'b010_1010010: q = 4'b0001; + 10'b010_1010011: q = 4'b0001; + 10'b010_1010100: q = 4'b0001; + 10'b010_1010101: q = 4'b0001; + 10'b010_1010110: q = 4'b0001; + 10'b010_1010111: q = 4'b0001; + 10'b010_1011000: q = 4'b0001; + 10'b010_1011001: q = 4'b0001; + 10'b010_1011010: q = 4'b0001; + 10'b010_1011011: q = 4'b0001; + 10'b010_1011100: q = 4'b0001; + 10'b010_1011101: q = 4'b0001; + 10'b010_1011110: q = 4'b0001; + 10'b010_1011111: q = 4'b0001; + 10'b010_1100000: q = 4'b0010; + 10'b010_1100001: q = 4'b0010; + 10'b010_1100010: q = 4'b0010; + 10'b010_1100011: q = 4'b0010; + 10'b010_1100100: q = 4'b0010; + 10'b010_1100101: q = 4'b0010; + 10'b010_1100110: q = 4'b0010; + 10'b010_1100111: q = 4'b0010; + 10'b010_1101000: q = 4'b0010; + 10'b010_1101001: q = 4'b0010; + 10'b010_1101010: q = 4'b0010; + 10'b010_1101011: q = 4'b0010; + 10'b010_1101100: q = 4'b0010; + 10'b010_1101101: q = 4'b0010; + 10'b010_1101110: q = 4'b0010; + 10'b010_1101111: q = 4'b0010; + 10'b010_1110000: q = 4'b0010; + 10'b010_1110001: q = 4'b0010; + 10'b010_1110010: q = 4'b0010; + 10'b010_1110011: q = 4'b0010; + 10'b010_1110100: q = 4'b0000; + 10'b010_1110101: q = 4'b0000; + 10'b010_1110110: q = 4'b0000; + 10'b010_1110111: q = 4'b0000; + 10'b010_1111000: q = 4'b0000; + 10'b010_1111001: q = 4'b0000; + 10'b010_1111010: q = 4'b0000; + 10'b010_1111011: q = 4'b0000; + 10'b010_1111100: q = 4'b0000; + 10'b010_1111101: q = 4'b0000; + 10'b010_1111110: q = 4'b0000; + 10'b010_1111111: q = 4'b0000; + 10'b011_0000000: q = 4'b0000; + 10'b011_0000001: q = 4'b0000; + 10'b011_0000010: q = 4'b0000; + 10'b011_0000011: q = 4'b0000; + 10'b011_0000100: q = 4'b0000; + 10'b011_0000101: q = 4'b0000; + 10'b011_0000110: q = 4'b0000; + 10'b011_0000111: q = 4'b0000; + 10'b011_0001000: q = 4'b0100; + 10'b011_0001001: q = 4'b0100; + 10'b011_0001010: q = 4'b0100; + 10'b011_0001011: q = 4'b0100; + 10'b011_0001100: q = 4'b0100; + 10'b011_0001101: q = 4'b0100; + 10'b011_0001110: q = 4'b0100; + 10'b011_0001111: q = 4'b0100; + 10'b011_0010000: q = 4'b0100; + 10'b011_0010001: q = 4'b0100; + 10'b011_0010010: q = 4'b0100; + 10'b011_0010011: q = 4'b0100; + 10'b011_0010100: q = 4'b0100; + 10'b011_0010101: q = 4'b0100; + 10'b011_0010110: q = 4'b0100; + 10'b011_0010111: q = 4'b0100; + 10'b011_0011000: q = 4'b0100; + 10'b011_0011001: q = 4'b0100; + 10'b011_0011010: q = 4'b0100; + 10'b011_0011011: q = 4'b0100; + 10'b011_0011100: q = 4'b0100; + 10'b011_0011101: q = 4'b0100; + 10'b011_0011110: q = 4'b0100; + 10'b011_0011111: q = 4'b0100; + 10'b011_0100000: q = 4'b1000; + 10'b011_0100001: q = 4'b1000; + 10'b011_0100010: q = 4'b1000; + 10'b011_0100011: q = 4'b1000; + 10'b011_0100100: q = 4'b1000; + 10'b011_0100101: q = 4'b1000; + 10'b011_0100110: q = 4'b1000; + 10'b011_0100111: q = 4'b1000; + 10'b011_0101000: q = 4'b1000; + 10'b011_0101001: q = 4'b1000; + 10'b011_0101010: q = 4'b1000; + 10'b011_0101011: q = 4'b1000; + 10'b011_0101100: q = 4'b1000; + 10'b011_0101101: q = 4'b1000; + 10'b011_0101110: q = 4'b1000; + 10'b011_0101111: q = 4'b1000; + 10'b011_0110000: q = 4'b1000; + 10'b011_0110001: q = 4'b1000; + 10'b011_0110010: q = 4'b1000; + 10'b011_0110011: q = 4'b1000; + 10'b011_0110100: q = 4'b1000; + 10'b011_0110101: q = 4'b1000; + 10'b011_0110110: q = 4'b1000; + 10'b011_0110111: q = 4'b1000; + 10'b011_0111000: q = 4'b1000; + 10'b011_0111001: q = 4'b1000; + 10'b011_0111010: q = 4'b1000; + 10'b011_0111011: q = 4'b1000; + 10'b011_0111100: q = 4'b1000; + 10'b011_0111101: q = 4'b1000; + 10'b011_0111110: q = 4'b1000; + 10'b011_0111111: q = 4'b1000; + 10'b011_1000000: q = 4'b0001; + 10'b011_1000001: q = 4'b0001; + 10'b011_1000010: q = 4'b0001; + 10'b011_1000011: q = 4'b0001; + 10'b011_1000100: q = 4'b0001; + 10'b011_1000101: q = 4'b0001; + 10'b011_1000110: q = 4'b0001; + 10'b011_1000111: q = 4'b0001; + 10'b011_1001000: q = 4'b0001; + 10'b011_1001001: q = 4'b0001; + 10'b011_1001010: q = 4'b0001; + 10'b011_1001011: q = 4'b0001; + 10'b011_1001100: q = 4'b0001; + 10'b011_1001101: q = 4'b0001; + 10'b011_1001110: q = 4'b0001; + 10'b011_1001111: q = 4'b0001; + 10'b011_1010000: q = 4'b0001; + 10'b011_1010001: q = 4'b0001; + 10'b011_1010010: q = 4'b0001; + 10'b011_1010011: q = 4'b0001; + 10'b011_1010100: q = 4'b0001; + 10'b011_1010101: q = 4'b0001; + 10'b011_1010110: q = 4'b0001; + 10'b011_1010111: q = 4'b0001; + 10'b011_1011000: q = 4'b0001; + 10'b011_1011001: q = 4'b0001; + 10'b011_1011010: q = 4'b0001; + 10'b011_1011011: q = 4'b0001; + 10'b011_1011100: q = 4'b0001; + 10'b011_1011101: q = 4'b0001; + 10'b011_1011110: q = 4'b0010; + 10'b011_1011111: q = 4'b0010; + 10'b011_1100000: q = 4'b0010; + 10'b011_1100001: q = 4'b0010; + 10'b011_1100010: q = 4'b0010; + 10'b011_1100011: q = 4'b0010; + 10'b011_1100100: q = 4'b0010; + 10'b011_1100101: q = 4'b0010; + 10'b011_1100110: q = 4'b0010; + 10'b011_1100111: q = 4'b0010; + 10'b011_1101000: q = 4'b0010; + 10'b011_1101001: q = 4'b0010; + 10'b011_1101010: q = 4'b0010; + 10'b011_1101011: q = 4'b0010; + 10'b011_1101100: q = 4'b0010; + 10'b011_1101101: q = 4'b0010; + 10'b011_1101110: q = 4'b0010; + 10'b011_1101111: q = 4'b0010; + 10'b011_1110000: q = 4'b0010; + 10'b011_1110001: q = 4'b0010; + 10'b011_1110010: q = 4'b0010; + 10'b011_1110011: q = 4'b0010; + 10'b011_1110100: q = 4'b0000; + 10'b011_1110101: q = 4'b0000; + 10'b011_1110110: q = 4'b0000; + 10'b011_1110111: q = 4'b0000; + 10'b011_1111000: q = 4'b0000; + 10'b011_1111001: q = 4'b0000; + 10'b011_1111010: q = 4'b0000; + 10'b011_1111011: q = 4'b0000; + 10'b011_1111100: q = 4'b0000; + 10'b011_1111101: q = 4'b0000; + 10'b011_1111110: q = 4'b0000; + 10'b011_1111111: q = 4'b0000; + 10'b100_0000000: q = 4'b0000; + 10'b100_0000001: q = 4'b0000; + 10'b100_0000010: q = 4'b0000; + 10'b100_0000011: q = 4'b0000; + 10'b100_0000100: q = 4'b0000; + 10'b100_0000101: q = 4'b0000; + 10'b100_0000110: q = 4'b0000; + 10'b100_0000111: q = 4'b0000; + 10'b100_0001000: q = 4'b0000; + 10'b100_0001001: q = 4'b0000; + 10'b100_0001010: q = 4'b0000; + 10'b100_0001011: q = 4'b0000; + 10'b100_0001100: q = 4'b0100; + 10'b100_0001101: q = 4'b0100; + 10'b100_0001110: q = 4'b0100; + 10'b100_0001111: q = 4'b0100; + 10'b100_0010000: q = 4'b0100; + 10'b100_0010001: q = 4'b0100; + 10'b100_0010010: q = 4'b0100; + 10'b100_0010011: q = 4'b0100; + 10'b100_0010100: q = 4'b0100; + 10'b100_0010101: q = 4'b0100; + 10'b100_0010110: q = 4'b0100; + 10'b100_0010111: q = 4'b0100; + 10'b100_0011000: q = 4'b0100; + 10'b100_0011001: q = 4'b0100; + 10'b100_0011010: q = 4'b0100; + 10'b100_0011011: q = 4'b0100; + 10'b100_0011100: q = 4'b0100; + 10'b100_0011101: q = 4'b0100; + 10'b100_0011110: q = 4'b0100; + 10'b100_0011111: q = 4'b0100; + 10'b100_0100000: q = 4'b0100; + 10'b100_0100001: q = 4'b0100; + 10'b100_0100010: q = 4'b0100; + 10'b100_0100011: q = 4'b0100; + 10'b100_0100100: q = 4'b1000; + 10'b100_0100101: q = 4'b1000; + 10'b100_0100110: q = 4'b1000; + 10'b100_0100111: q = 4'b1000; + 10'b100_0101000: q = 4'b1000; + 10'b100_0101001: q = 4'b1000; + 10'b100_0101010: q = 4'b1000; + 10'b100_0101011: q = 4'b1000; + 10'b100_0101100: q = 4'b1000; + 10'b100_0101101: q = 4'b1000; + 10'b100_0101110: q = 4'b1000; + 10'b100_0101111: q = 4'b1000; + 10'b100_0110000: q = 4'b1000; + 10'b100_0110001: q = 4'b1000; + 10'b100_0110010: q = 4'b1000; + 10'b100_0110011: q = 4'b1000; + 10'b100_0110100: q = 4'b1000; + 10'b100_0110101: q = 4'b1000; + 10'b100_0110110: q = 4'b1000; + 10'b100_0110111: q = 4'b1000; + 10'b100_0111000: q = 4'b1000; + 10'b100_0111001: q = 4'b1000; + 10'b100_0111010: q = 4'b1000; + 10'b100_0111011: q = 4'b1000; + 10'b100_0111100: q = 4'b1000; + 10'b100_0111101: q = 4'b1000; + 10'b100_0111110: q = 4'b1000; + 10'b100_0111111: q = 4'b1000; + 10'b100_1000000: q = 4'b0001; + 10'b100_1000001: q = 4'b0001; + 10'b100_1000010: q = 4'b0001; + 10'b100_1000011: q = 4'b0001; + 10'b100_1000100: q = 4'b0001; + 10'b100_1000101: q = 4'b0001; + 10'b100_1000110: q = 4'b0001; + 10'b100_1000111: q = 4'b0001; + 10'b100_1001000: q = 4'b0001; + 10'b100_1001001: q = 4'b0001; + 10'b100_1001010: q = 4'b0001; + 10'b100_1001011: q = 4'b0001; + 10'b100_1001100: q = 4'b0001; + 10'b100_1001101: q = 4'b0001; + 10'b100_1001110: q = 4'b0001; + 10'b100_1001111: q = 4'b0001; + 10'b100_1010000: q = 4'b0001; + 10'b100_1010001: q = 4'b0001; + 10'b100_1010010: q = 4'b0001; + 10'b100_1010011: q = 4'b0001; + 10'b100_1010100: q = 4'b0001; + 10'b100_1010101: q = 4'b0001; + 10'b100_1010110: q = 4'b0001; + 10'b100_1010111: q = 4'b0001; + 10'b100_1011000: q = 4'b0001; + 10'b100_1011001: q = 4'b0001; + 10'b100_1011010: q = 4'b0001; + 10'b100_1011011: q = 4'b0001; + 10'b100_1011100: q = 4'b0010; + 10'b100_1011101: q = 4'b0010; + 10'b100_1011110: q = 4'b0010; + 10'b100_1011111: q = 4'b0010; + 10'b100_1100000: q = 4'b0010; + 10'b100_1100001: q = 4'b0010; + 10'b100_1100010: q = 4'b0010; + 10'b100_1100011: q = 4'b0010; + 10'b100_1100100: q = 4'b0010; + 10'b100_1100101: q = 4'b0010; + 10'b100_1100110: q = 4'b0010; + 10'b100_1100111: q = 4'b0010; + 10'b100_1101000: q = 4'b0010; + 10'b100_1101001: q = 4'b0010; + 10'b100_1101010: q = 4'b0010; + 10'b100_1101011: q = 4'b0010; + 10'b100_1101100: q = 4'b0010; + 10'b100_1101101: q = 4'b0010; + 10'b100_1101110: q = 4'b0010; + 10'b100_1101111: q = 4'b0010; + 10'b100_1110000: q = 4'b0010; + 10'b100_1110001: q = 4'b0010; + 10'b100_1110010: q = 4'b0010; + 10'b100_1110011: q = 4'b0010; + 10'b100_1110100: q = 4'b0000; + 10'b100_1110101: q = 4'b0000; + 10'b100_1110110: q = 4'b0000; + 10'b100_1110111: q = 4'b0000; + 10'b100_1111000: q = 4'b0000; + 10'b100_1111001: q = 4'b0000; + 10'b100_1111010: q = 4'b0000; + 10'b100_1111011: q = 4'b0000; + 10'b100_1111100: q = 4'b0000; + 10'b100_1111101: q = 4'b0000; + 10'b100_1111110: q = 4'b0000; + 10'b100_1111111: q = 4'b0000; + 10'b101_0000000: q = 4'b0000; + 10'b101_0000001: q = 4'b0000; + 10'b101_0000010: q = 4'b0000; + 10'b101_0000011: q = 4'b0000; + 10'b101_0000100: q = 4'b0000; + 10'b101_0000101: q = 4'b0000; + 10'b101_0000110: q = 4'b0000; + 10'b101_0000111: q = 4'b0000; + 10'b101_0001000: q = 4'b0000; + 10'b101_0001001: q = 4'b0000; + 10'b101_0001010: q = 4'b0000; + 10'b101_0001011: q = 4'b0000; + 10'b101_0001100: q = 4'b0100; + 10'b101_0001101: q = 4'b0100; + 10'b101_0001110: q = 4'b0100; + 10'b101_0001111: q = 4'b0100; + 10'b101_0010000: q = 4'b0100; + 10'b101_0010001: q = 4'b0100; + 10'b101_0010010: q = 4'b0100; + 10'b101_0010011: q = 4'b0100; + 10'b101_0010100: q = 4'b0100; + 10'b101_0010101: q = 4'b0100; + 10'b101_0010110: q = 4'b0100; + 10'b101_0010111: q = 4'b0100; + 10'b101_0011000: q = 4'b0100; + 10'b101_0011001: q = 4'b0100; + 10'b101_0011010: q = 4'b0100; + 10'b101_0011011: q = 4'b0100; + 10'b101_0011100: q = 4'b0100; + 10'b101_0011101: q = 4'b0100; + 10'b101_0011110: q = 4'b0100; + 10'b101_0011111: q = 4'b0100; + 10'b101_0100000: q = 4'b0100; + 10'b101_0100001: q = 4'b0100; + 10'b101_0100010: q = 4'b0100; + 10'b101_0100011: q = 4'b0100; + 10'b101_0100100: q = 4'b0100; + 10'b101_0100101: q = 4'b0100; + 10'b101_0100110: q = 4'b0100; + 10'b101_0100111: q = 4'b0100; + 10'b101_0101000: q = 4'b1000; + 10'b101_0101001: q = 4'b1000; + 10'b101_0101010: q = 4'b1000; + 10'b101_0101011: q = 4'b1000; + 10'b101_0101100: q = 4'b1000; + 10'b101_0101101: q = 4'b1000; + 10'b101_0101110: q = 4'b1000; + 10'b101_0101111: q = 4'b1000; + 10'b101_0110000: q = 4'b1000; + 10'b101_0110001: q = 4'b1000; + 10'b101_0110010: q = 4'b1000; + 10'b101_0110011: q = 4'b1000; + 10'b101_0110100: q = 4'b1000; + 10'b101_0110101: q = 4'b1000; + 10'b101_0110110: q = 4'b1000; + 10'b101_0110111: q = 4'b1000; + 10'b101_0111000: q = 4'b1000; + 10'b101_0111001: q = 4'b1000; + 10'b101_0111010: q = 4'b1000; + 10'b101_0111011: q = 4'b1000; + 10'b101_0111100: q = 4'b1000; + 10'b101_0111101: q = 4'b1000; + 10'b101_0111110: q = 4'b1000; + 10'b101_0111111: q = 4'b1000; + 10'b101_1000000: q = 4'b0001; + 10'b101_1000001: q = 4'b0001; + 10'b101_1000010: q = 4'b0001; + 10'b101_1000011: q = 4'b0001; + 10'b101_1000100: q = 4'b0001; + 10'b101_1000101: q = 4'b0001; + 10'b101_1000110: q = 4'b0001; + 10'b101_1000111: q = 4'b0001; + 10'b101_1001000: q = 4'b0001; + 10'b101_1001001: q = 4'b0001; + 10'b101_1001010: q = 4'b0001; + 10'b101_1001011: q = 4'b0001; + 10'b101_1001100: q = 4'b0001; + 10'b101_1001101: q = 4'b0001; + 10'b101_1001110: q = 4'b0001; + 10'b101_1001111: q = 4'b0001; + 10'b101_1010000: q = 4'b0001; + 10'b101_1010001: q = 4'b0001; + 10'b101_1010010: q = 4'b0001; + 10'b101_1010011: q = 4'b0001; + 10'b101_1010100: q = 4'b0001; + 10'b101_1010101: q = 4'b0001; + 10'b101_1010110: q = 4'b0001; + 10'b101_1010111: q = 4'b0001; + 10'b101_1011000: q = 4'b0010; + 10'b101_1011001: q = 4'b0010; + 10'b101_1011010: q = 4'b0010; + 10'b101_1011011: q = 4'b0010; + 10'b101_1011100: q = 4'b0010; + 10'b101_1011101: q = 4'b0010; + 10'b101_1011110: q = 4'b0010; + 10'b101_1011111: q = 4'b0010; + 10'b101_1100000: q = 4'b0010; + 10'b101_1100001: q = 4'b0010; + 10'b101_1100010: q = 4'b0010; + 10'b101_1100011: q = 4'b0010; + 10'b101_1100100: q = 4'b0010; + 10'b101_1100101: q = 4'b0010; + 10'b101_1100110: q = 4'b0010; + 10'b101_1100111: q = 4'b0010; + 10'b101_1101000: q = 4'b0010; + 10'b101_1101001: q = 4'b0010; + 10'b101_1101010: q = 4'b0010; + 10'b101_1101011: q = 4'b0010; + 10'b101_1101100: q = 4'b0010; + 10'b101_1101101: q = 4'b0010; + 10'b101_1101110: q = 4'b0010; + 10'b101_1101111: q = 4'b0010; + 10'b101_1110000: q = 4'b0000; + 10'b101_1110001: q = 4'b0000; + 10'b101_1110010: q = 4'b0000; + 10'b101_1110011: q = 4'b0000; + 10'b101_1110100: q = 4'b0000; + 10'b101_1110101: q = 4'b0000; + 10'b101_1110110: q = 4'b0000; + 10'b101_1110111: q = 4'b0000; + 10'b101_1111000: q = 4'b0000; + 10'b101_1111001: q = 4'b0000; + 10'b101_1111010: q = 4'b0000; + 10'b101_1111011: q = 4'b0000; + 10'b101_1111100: q = 4'b0000; + 10'b101_1111101: q = 4'b0000; + 10'b101_1111110: q = 4'b0000; + 10'b101_1111111: q = 4'b0000; + 10'b110_0000000: q = 4'b0000; + 10'b110_0000001: q = 4'b0000; + 10'b110_0000010: q = 4'b0000; + 10'b110_0000011: q = 4'b0000; + 10'b110_0000100: q = 4'b0000; + 10'b110_0000101: q = 4'b0000; + 10'b110_0000110: q = 4'b0000; + 10'b110_0000111: q = 4'b0000; + 10'b110_0001000: q = 4'b0000; + 10'b110_0001001: q = 4'b0000; + 10'b110_0001010: q = 4'b0000; + 10'b110_0001011: q = 4'b0000; + 10'b110_0001100: q = 4'b0000; + 10'b110_0001101: q = 4'b0000; + 10'b110_0001110: q = 4'b0000; + 10'b110_0001111: q = 4'b0000; + 10'b110_0010000: q = 4'b0100; + 10'b110_0010001: q = 4'b0100; + 10'b110_0010010: q = 4'b0100; + 10'b110_0010011: q = 4'b0100; + 10'b110_0010100: q = 4'b0100; + 10'b110_0010101: q = 4'b0100; + 10'b110_0010110: q = 4'b0100; + 10'b110_0010111: q = 4'b0100; + 10'b110_0011000: q = 4'b0100; + 10'b110_0011001: q = 4'b0100; + 10'b110_0011010: q = 4'b0100; + 10'b110_0011011: q = 4'b0100; + 10'b110_0011100: q = 4'b0100; + 10'b110_0011101: q = 4'b0100; + 10'b110_0011110: q = 4'b0100; + 10'b110_0011111: q = 4'b0100; + 10'b110_0100000: q = 4'b0100; + 10'b110_0100001: q = 4'b0100; + 10'b110_0100010: q = 4'b0100; + 10'b110_0100011: q = 4'b0100; + 10'b110_0100100: q = 4'b0100; + 10'b110_0100101: q = 4'b0100; + 10'b110_0100110: q = 4'b0100; + 10'b110_0100111: q = 4'b0100; + 10'b110_0101000: q = 4'b1000; + 10'b110_0101001: q = 4'b1000; + 10'b110_0101010: q = 4'b1000; + 10'b110_0101011: q = 4'b1000; + 10'b110_0101100: q = 4'b1000; + 10'b110_0101101: q = 4'b1000; + 10'b110_0101110: q = 4'b1000; + 10'b110_0101111: q = 4'b1000; + 10'b110_0110000: q = 4'b1000; + 10'b110_0110001: q = 4'b1000; + 10'b110_0110010: q = 4'b1000; + 10'b110_0110011: q = 4'b1000; + 10'b110_0110100: q = 4'b1000; + 10'b110_0110101: q = 4'b1000; + 10'b110_0110110: q = 4'b1000; + 10'b110_0110111: q = 4'b1000; + 10'b110_0111000: q = 4'b1000; + 10'b110_0111001: q = 4'b1000; + 10'b110_0111010: q = 4'b1000; + 10'b110_0111011: q = 4'b1000; + 10'b110_0111100: q = 4'b1000; + 10'b110_0111101: q = 4'b1000; + 10'b110_0111110: q = 4'b1000; + 10'b110_0111111: q = 4'b1000; + 10'b110_1000000: q = 4'b0001; + 10'b110_1000001: q = 4'b0001; + 10'b110_1000010: q = 4'b0001; + 10'b110_1000011: q = 4'b0001; + 10'b110_1000100: q = 4'b0001; + 10'b110_1000101: q = 4'b0001; + 10'b110_1000110: q = 4'b0001; + 10'b110_1000111: q = 4'b0001; + 10'b110_1001000: q = 4'b0001; + 10'b110_1001001: q = 4'b0001; + 10'b110_1001010: q = 4'b0001; + 10'b110_1001011: q = 4'b0001; + 10'b110_1001100: q = 4'b0001; + 10'b110_1001101: q = 4'b0001; + 10'b110_1001110: q = 4'b0001; + 10'b110_1001111: q = 4'b0001; + 10'b110_1010000: q = 4'b0001; + 10'b110_1010001: q = 4'b0001; + 10'b110_1010010: q = 4'b0001; + 10'b110_1010011: q = 4'b0001; + 10'b110_1010100: q = 4'b0010; + 10'b110_1010101: q = 4'b0010; + 10'b110_1010110: q = 4'b0010; + 10'b110_1010111: q = 4'b0010; + 10'b110_1011000: q = 4'b0010; + 10'b110_1011001: q = 4'b0010; + 10'b110_1011010: q = 4'b0010; + 10'b110_1011011: q = 4'b0010; + 10'b110_1011100: q = 4'b0010; + 10'b110_1011101: q = 4'b0010; + 10'b110_1011110: q = 4'b0010; + 10'b110_1011111: q = 4'b0010; + 10'b110_1100000: q = 4'b0010; + 10'b110_1100001: q = 4'b0010; + 10'b110_1100010: q = 4'b0010; + 10'b110_1100011: q = 4'b0010; + 10'b110_1100100: q = 4'b0010; + 10'b110_1100101: q = 4'b0010; + 10'b110_1100110: q = 4'b0010; + 10'b110_1100111: q = 4'b0010; + 10'b110_1101000: q = 4'b0010; + 10'b110_1101001: q = 4'b0010; + 10'b110_1101010: q = 4'b0010; + 10'b110_1101011: q = 4'b0010; + 10'b110_1101100: q = 4'b0010; + 10'b110_1101101: q = 4'b0010; + 10'b110_1101110: q = 4'b0010; + 10'b110_1101111: q = 4'b0010; + 10'b110_1110000: q = 4'b0000; + 10'b110_1110001: q = 4'b0000; + 10'b110_1110010: q = 4'b0000; + 10'b110_1110011: q = 4'b0000; + 10'b110_1110100: q = 4'b0000; + 10'b110_1110101: q = 4'b0000; + 10'b110_1110110: q = 4'b0000; + 10'b110_1110111: q = 4'b0000; + 10'b110_1111000: q = 4'b0000; + 10'b110_1111001: q = 4'b0000; + 10'b110_1111010: q = 4'b0000; + 10'b110_1111011: q = 4'b0000; + 10'b110_1111100: q = 4'b0000; + 10'b110_1111101: q = 4'b0000; + 10'b110_1111110: q = 4'b0000; + 10'b110_1111111: q = 4'b0000; + 10'b111_0000000: q = 4'b0000; + 10'b111_0000001: q = 4'b0000; + 10'b111_0000010: q = 4'b0000; + 10'b111_0000011: q = 4'b0000; + 10'b111_0000100: q = 4'b0000; + 10'b111_0000101: q = 4'b0000; + 10'b111_0000110: q = 4'b0000; + 10'b111_0000111: q = 4'b0000; + 10'b111_0001000: q = 4'b0000; + 10'b111_0001001: q = 4'b0000; + 10'b111_0001010: q = 4'b0000; + 10'b111_0001011: q = 4'b0000; + 10'b111_0001100: q = 4'b0000; + 10'b111_0001101: q = 4'b0000; + 10'b111_0001110: q = 4'b0000; + 10'b111_0001111: q = 4'b0000; + 10'b111_0010000: q = 4'b0100; + 10'b111_0010001: q = 4'b0100; + 10'b111_0010010: q = 4'b0100; + 10'b111_0010011: q = 4'b0100; + 10'b111_0010100: q = 4'b0100; + 10'b111_0010101: q = 4'b0100; + 10'b111_0010110: q = 4'b0100; + 10'b111_0010111: q = 4'b0100; + 10'b111_0011000: q = 4'b0100; + 10'b111_0011001: q = 4'b0100; + 10'b111_0011010: q = 4'b0100; + 10'b111_0011011: q = 4'b0100; + 10'b111_0011100: q = 4'b0100; + 10'b111_0011101: q = 4'b0100; + 10'b111_0011110: q = 4'b0100; + 10'b111_0011111: q = 4'b0100; + 10'b111_0100000: q = 4'b0100; + 10'b111_0100001: q = 4'b0100; + 10'b111_0100010: q = 4'b0100; + 10'b111_0100011: q = 4'b0100; + 10'b111_0100100: q = 4'b0100; + 10'b111_0100101: q = 4'b0100; + 10'b111_0100110: q = 4'b0100; + 10'b111_0100111: q = 4'b0100; + 10'b111_0101000: q = 4'b0100; + 10'b111_0101001: q = 4'b0100; + 10'b111_0101010: q = 4'b0100; + 10'b111_0101011: q = 4'b0100; + 10'b111_0101100: q = 4'b1000; + 10'b111_0101101: q = 4'b1000; + 10'b111_0101110: q = 4'b1000; + 10'b111_0101111: q = 4'b1000; + 10'b111_0110000: q = 4'b1000; + 10'b111_0110001: q = 4'b1000; + 10'b111_0110010: q = 4'b1000; + 10'b111_0110011: q = 4'b1000; + 10'b111_0110100: q = 4'b1000; + 10'b111_0110101: q = 4'b1000; + 10'b111_0110110: q = 4'b1000; + 10'b111_0110111: q = 4'b1000; + 10'b111_0111000: q = 4'b1000; + 10'b111_0111001: q = 4'b1000; + 10'b111_0111010: q = 4'b1000; + 10'b111_0111011: q = 4'b1000; + 10'b111_0111100: q = 4'b1000; + 10'b111_0111101: q = 4'b1000; + 10'b111_0111110: q = 4'b1000; + 10'b111_0111111: q = 4'b1000; + 10'b111_1000000: q = 4'b0001; + 10'b111_1000001: q = 4'b0001; + 10'b111_1000010: q = 4'b0001; + 10'b111_1000011: q = 4'b0001; + 10'b111_1000100: q = 4'b0001; + 10'b111_1000101: q = 4'b0001; + 10'b111_1000110: q = 4'b0001; + 10'b111_1000111: q = 4'b0001; + 10'b111_1001000: q = 4'b0001; + 10'b111_1001001: q = 4'b0001; + 10'b111_1001010: q = 4'b0001; + 10'b111_1001011: q = 4'b0001; + 10'b111_1001100: q = 4'b0001; + 10'b111_1001101: q = 4'b0001; + 10'b111_1001110: q = 4'b0001; + 10'b111_1001111: q = 4'b0001; + 10'b111_1010000: q = 4'b0001; + 10'b111_1010001: q = 4'b0001; + 10'b111_1010010: q = 4'b0010; + 10'b111_1010011: q = 4'b0010; + 10'b111_1010100: q = 4'b0010; + 10'b111_1010101: q = 4'b0010; + 10'b111_1010110: q = 4'b0010; + 10'b111_1010111: q = 4'b0010; + 10'b111_1011000: q = 4'b0010; + 10'b111_1011001: q = 4'b0010; + 10'b111_1011010: q = 4'b0010; + 10'b111_1011011: q = 4'b0010; + 10'b111_1011100: q = 4'b0010; + 10'b111_1011101: q = 4'b0010; + 10'b111_1011110: q = 4'b0010; + 10'b111_1011111: q = 4'b0010; + 10'b111_1100000: q = 4'b0010; + 10'b111_1100001: q = 4'b0010; + 10'b111_1100010: q = 4'b0010; + 10'b111_1100011: q = 4'b0010; + 10'b111_1100100: q = 4'b0010; + 10'b111_1100101: q = 4'b0010; + 10'b111_1100110: q = 4'b0010; + 10'b111_1100111: q = 4'b0010; + 10'b111_1101000: q = 4'b0010; + 10'b111_1101001: q = 4'b0010; + 10'b111_1101010: q = 4'b0010; + 10'b111_1101011: q = 4'b0010; + 10'b111_1101100: q = 4'b0010; + 10'b111_1101101: q = 4'b0010; + 10'b111_1101110: q = 4'b0010; + 10'b111_1101111: q = 4'b0010; + 10'b111_1110000: q = 4'b0000; + 10'b111_1110001: q = 4'b0000; + 10'b111_1110010: q = 4'b0000; + 10'b111_1110011: q = 4'b0000; + 10'b111_1110100: q = 4'b0000; + 10'b111_1110101: q = 4'b0000; + 10'b111_1110110: q = 4'b0000; + 10'b111_1110111: q = 4'b0000; + 10'b111_1111000: q = 4'b0000; + 10'b111_1111001: q = 4'b0000; + 10'b111_1111010: q = 4'b0000; + 10'b111_1111011: q = 4'b0000; + 10'b111_1111100: q = 4'b0000; + 10'b111_1111101: q = 4'b0000; + 10'b111_1111110: q = 4'b0000; + 10'b111_1111111: q = 4'b0000; + endcase From 4ff866b39e835045230cf5e5b4b189aea13b9ac6 Mon Sep 17 00:00:00 2001 From: James Stine Date: Thu, 23 Jun 2022 11:59:05 -0500 Subject: [PATCH 11/26] Update --- pipelined/srt/qslc_sqrt_r4a2 | Bin 16152 -> 16152 bytes pipelined/srt/qslc_sqrt_r4a2.c | 2 +- pipelined/srt/qslc_sqrt_r4a2.sv | 2048 +++++++++++++++---------------- 3 files changed, 1025 insertions(+), 1025 deletions(-) diff --git a/pipelined/srt/qslc_sqrt_r4a2 b/pipelined/srt/qslc_sqrt_r4a2 index 047de1ba3bf6e4421c0a8c69adbcce1eb2e9b293..5cff70cdf9d63dd415b92ba2ce9092b7da87695f 100755 GIT binary patch delta 40 wcmbPHH=}Mt1+$2Xoo>~Yl(XU7lN# Date: Thu, 23 Jun 2022 13:08:15 -0700 Subject: [PATCH 12/26] Fixed wally-periph, regression is now working --- .../references/WALLY-periph.reference_output | 34 +++++++++---------- .../rv64i_m/privilege/src/WALLY-periph.S | 7 ++-- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output index 7b23883c6..fd88590e3 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/references/WALLY-periph.reference_output @@ -254,12 +254,12 @@ FFFFEE00 FFFFEE00 00000000 00000000 -02BEEF10 +02BEEF10 # Something here is failing 0000000B 80000000 00000003 000000FF -FFFFFFFF +00000000 000000FF 00000000 00000000 @@ -270,20 +270,20 @@ FFFFFFFF FFFFFF00 00000000 00000000 -02BEEF11 +02BEEF11 # this might be wrong 0000000B 80000000 -00000003 -000000CC -CCCCCCCC -00000000 -00000000 -00000033 -00000000 -000000FF -000000CC -FFFFFF33 -FFFFFF33 +00000003 +00000033 # input +00000000 # output +00000000 # rise ip +00000000 # serviced rise ip +000000CC # fall ip +00000000 +000000FF # high ip +00000033 # why is this 0x33? +FFFFFFCC # low ip +FFFFFFCC # serviced low ip 00000000 00000000 03BEEF12 @@ -454,9 +454,9 @@ FFFFFF33 00080000 00080000 00000000 +00000000 # is it this one that's failing? 00000000 -00000000 -00080000 +00080000 # failing 00080000 FFFFFFFF FFF7FFFF @@ -478,7 +478,7 @@ FFFFFFFF FFFFFFFE 00000000 00000000 -04BEEF1E +04BEEF1E # this might also be wrong 00000009 80000000 0000000A diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S index c44d7a681..705875146 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv64i_m/privilege/src/WALLY-periph.S @@ -271,7 +271,7 @@ main_code: ##### sw t1, 0x04(t0) # raise all output_en sw t1, 0x08(t0) - # raise all input_en + # raise all rise_en sw t1, 0x18(t0) # ========== Execute Test ========== # set MEIE @@ -616,6 +616,9 @@ Intr02BEEF11: sw t1, 0x08(t0) # set initial output state sw x0, 0x0C(t0) + # clear XOR + li t1, 0x00000000 + sw t1, 0x40(t0) # clear all pending interrupts li t1, 0xFFFFFFFF sw t1, 0x1C(t0) @@ -843,7 +846,7 @@ Intr03BEEF1A: sw t1, 0x04(t0) # raise all output_en sw t1, 0x08(t0) - # raise all input_en + # raise all rise_en sw t1, 0x18(t0) # ========== Execute Test ========== # set MEIE and SEIE From d86a65daf067afa70ab8fe4e6e4fc5e6c6636e01 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 23 Jun 2022 13:22:00 -0700 Subject: [PATCH 13/26] Updating new GPIO tests --- .../references/WALLY-gpio-01.reference_output | 12 +++++--- .../rv32i_m/privilege/src/WALLY-gpio-01.S | 28 +++++++++++++++++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output index 278e0aa70..e6fd4d7ff 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output @@ -1,7 +1,11 @@ +00000000 # test reset to zero 00000000 -00000000 -A5A5A5A5 +A5A5A5A5 # test output pins 5A5AFFFF -00000000 +00000000 # test input enables 5A5A0000 -A55A0000 +A55A0000 # test XOR +# A55A0000 # test interrupt pins +# 5AA5FFFF +# 00000000 +# 00000000 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index e4792a78c..38bc533b1 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -88,12 +88,34 @@ test_cases: .4byte input_en, 0xFFFF0000, write32_test # enable a few input pins .4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above. -# =========== Test output enables(?) =========== - -.4byte output_en, 0xFFFFFFFF, write32_test # undo changes made to output enable # =========== Test XOR functionality =========== .4byte out_xor, 0xFF00FF00, write32_test # invert certain pin values .4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working +# =========== End of functioning tests =========== +# # =========== Test Interrupt Pending bits =========== + +# .4byte low_ip, 0xFFFFFFFF, write32_test # clear pending low interrupts +# .4byte high_ip, 0xFFFFFFFF, write32_test # clear pending high interrupts +# .4byte rise_ip, 0xFFFFFFFF, write32_test # clear pending rise interrupts +# .4byte fall_ip, 0xFFFFFFFF, write32_test # clear pending fall interrupts +# .4byte high_ip, 0xA55A0000, read32_test # check pending high interrupts +# .4byte low_ip, 0x5AA5FFFF, read32_test # check pending low interrupts +# .4byte rise_ip, 0x00000000, read32_test # check pending rise interrupts +# .4byte fall_ip, 0x00000000, read32_test # check pending fall interrupts +# .4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts +# .4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output +# .4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?) +# .4byte low_ip, 0x5BF50000, read32_test # low interrupt pending should be opposite high for enabled pins +# .4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising) +# .4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling) + +# # =========== Test Interrupt Enable without interrupts =========== + +# .4byte high_ie, 0x00010000, write32_test # enable high interrupt on bit 16, no pending interrupt +# .4byte high_ip, 0xA5FA0000, read32_test # read to show no interrupt has happened +# .4byte low_ie, 0x00020000, write32_test # enable low interrupt on bit 17, no pending interrupt +# .4byte low_ip, 5BF50000, read32_test # read to show no interrupt has happened + .4byte 0x0, 0x0, terminate_test # terminate tests From db459c338035e61f674a079aa6e7823e039245a2 Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 23 Jun 2022 21:06:11 +0000 Subject: [PATCH 14/26] GPIO tests --- .../references/WALLY-gpio-01.reference_output | 20 +++++-- .../rv32i_m/privilege/src/WALLY-TEST-LIB-32.h | 36 +++++++++++ .../rv32i_m/privilege/src/WALLY-gpio-01.S | 60 ++++++++++++------- 3 files changed, 89 insertions(+), 27 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output index e6fd4d7ff..73f898ca0 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output @@ -5,7 +5,19 @@ A5A5A5A5 # test output pins 00000000 # test input enables 5A5A0000 A55A0000 # test XOR -# A55A0000 # test interrupt pins -# 5AA5FFFF -# 00000000 -# 00000000 +A55A0000 # Test interrupt pending bits: high_ip +5AA5FFFF # low_ip +00000000 # rise_ip +00000000 # fall_ip +A4AA0000 # input_val +A5FA0000 # high_ip +5BF50000 # low_ip +00A00000 # rise_ip +01500000 # fall_ip +00000000 # MEIP +00000000 # Test interrupts can be enabled without being triggered: MIP = 0 +00000000 # MIP = 0 +00000000 # MIP = 0 +00000000 # MIP = 0 +00000800 # Test interrupts can be enabled and triggered: MEIP set +00000000 # MEIP = 0 diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h index a72ae385a..0caad5d0b 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-TEST-LIB-32.h @@ -827,6 +827,28 @@ trap_handler_end_\MODE\(): // place to jump to so we can skip the trap handler a addi a6, a6, 4 .endm +// Place this macro in peripheral tests to setup all the PLIC registers to generate external interrupts +.macro SETUP_PLIC + # Setup PLIC with a series of register writes + + .equ PLIC_INTPRI_GPIO, 0x0C00000C # GPIO is interrupt 3 + .equ PLIC_INTPRI_UART, 0x0C000028 # UART is interrupt 10 + .equ PLIC_INTPENDING0, 0x0C001000 # intPending0 register + .equ PLIC_INTEN00, 0x0C002000 # interrupt enables for context 0 (machine mode) sources 31:1 + .equ PLIC_INTEN10, 0x0C002080 # interrupt enables for context 1 (supervisor mode) sources 31:1 + .equ PLIC_THRESH0, 0x0C200000 # Priority threshold for context 0 (machine mode) + .equ PLIC_CLAIM0, 0x0C200004 # Claim/Complete register for context 0 + .equ PLIC_THRESH1, 0x0C201000 # Priority threshold for context 1 (supervisor mode) + .equ PLIC_CLAIM1, 0x0C201004 # Claim/Complete register for context 1 + + .4byte PLIC_THRESH0, 0, write32_test # Set PLIC machine mode interrupt threshold to 0 to accept all interrupts + .4byte PLIC_THRESH1, 7, write32_test # Set PLIC supervisor mode interrupt threshold to 7 to accept no interrupts + .4byte PLIC_INTPRI_GPIO, 7, write32_test # Set GPIO to high priority + .4byte PLIC_INTPRI_UART, 7, write32_test # Set UART to high priority + .4byte PLIC_INTEN00, 0xFFFFFFFF, write32_test # Enable all interrupt sources for machine mode + .4byte PLIC_INTEN10, 0x00000000, write32_test # Disable all interrupt sources for supervisor mode +.endm + .macro END_TESTS // invokes one final ecall to return to machine mode then terminates this program, so the output is // 0x8: termination called from U mode @@ -937,6 +959,20 @@ read08_test: addi a6, a6, 4 j test_loop // go to next test case +readmip_test: // read the MIP into the signature + csrr t2, mip + sw t2, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop // go to next test case + +readsip_test: // read the MIP into the signature + csrr t2, sip + sw t2, 0(t1) + addi t1, t1, 4 + addi a6, a6, 4 + j test_loop // go to next test case + goto_s_mode: // return to address in t3, li a0, 3 // Trap handler behavior (go to supervisor mode) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index 38bc533b1..b8a751c55 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -72,6 +72,7 @@ test_cases: .4byte input_val, 0x00000000, read32_test # input_val reset to zero .4byte input_en, 0x00000000, read32_test # input_en reset to zero +# *** add more # =========== Test output and input pins =========== @@ -86,36 +87,49 @@ test_cases: .4byte input_en, 0x00000000, write32_test # disable all input pins .4byte input_val, 0x00000000, read32_test # read 0 since input pins are disabled .4byte input_en, 0xFFFF0000, write32_test # enable a few input pins -.4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above. +.4byte input_val, 0x5A5A0000, read32_test # read part of pattern set above. # =========== Test XOR functionality =========== .4byte out_xor, 0xFF00FF00, write32_test # invert certain pin values -.4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working +.4byte input_val, 0xA55A0000, read32_test # read inverted pins and verify input enable is working -# =========== End of functioning tests =========== -# # =========== Test Interrupt Pending bits =========== +# =========== Test Interrupt Pending bits =========== -# .4byte low_ip, 0xFFFFFFFF, write32_test # clear pending low interrupts -# .4byte high_ip, 0xFFFFFFFF, write32_test # clear pending high interrupts -# .4byte rise_ip, 0xFFFFFFFF, write32_test # clear pending rise interrupts -# .4byte fall_ip, 0xFFFFFFFF, write32_test # clear pending fall interrupts -# .4byte high_ip, 0xA55A0000, read32_test # check pending high interrupts -# .4byte low_ip, 0x5AA5FFFF, read32_test # check pending low interrupts -# .4byte rise_ip, 0x00000000, read32_test # check pending rise interrupts -# .4byte fall_ip, 0x00000000, read32_test # check pending fall interrupts -# .4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts -# .4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output -# .4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?) -# .4byte low_ip, 0x5BF50000, read32_test # low interrupt pending should be opposite high for enabled pins -# .4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising) -# .4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling) +SETUP_PLIC -# # =========== Test Interrupt Enable without interrupts =========== +.4byte low_ip, 0xFFFFFFFF, write32_test # clear pending low interrupts +.4byte high_ip, 0xFFFFFFFF, write32_test # clear pending high interrupts +.4byte rise_ip, 0xFFFFFFFF, write32_test # clear pending rise interrupts +.4byte fall_ip, 0xFFFFFFFF, write32_test # clear pending fall interrupts +.4byte high_ip, 0xA55A0000, read32_test # check pending high interrupts +.4byte low_ip, 0x5AA5FFFF, read32_test # check pending low interrupts +.4byte rise_ip, 0x00000000, read32_test # check pending rise interrupts +.4byte fall_ip, 0x00000000, read32_test # check pending fall interrupts +.4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts +.4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output +.4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?) +.4byte low_ip, 0x5BF50000, read32_test # low interrupt pending should be opposite high for enabled pins +.4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising) +.4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling) +.4byte 0x0, 0x00000000, readmip_test # Check no external interrupt has been generated -# .4byte high_ie, 0x00010000, write32_test # enable high interrupt on bit 16, no pending interrupt -# .4byte high_ip, 0xA5FA0000, read32_test # read to show no interrupt has happened -# .4byte low_ie, 0x00020000, write32_test # enable low interrupt on bit 17, no pending interrupt -# .4byte low_ip, 5BF50000, read32_test # read to show no interrupt has happened +# =========== Test interrupts can be enabled without being triggered =========== + +.4byte high_ie, 0x00010000, write32_test # enable high interrupt on bit 16, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending +.4byte low_ie, 0x00020000, write32_test # enable low interrupt on bit 17, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending +.4byte rise_ie, 0x00010000, write32_test # enable rise interrupt on bit 16, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending +.4byte fall_ie, 0x00010000, write32_test # enable fall interrupt on bit 16, no pending interrupt +.4byte 0x0, 0x00000000, readmip_test # No external interrupt should be pending + +# =========== Test interrupts can be enabled and triggered + +.4byte high_ie, 0x00020000, write32_test # enable high interrupt on bit 17, which is pending +.4byte 0x0, 0x00000800, readmip_test # MEIP should be raised +.4byte low_ie, 0x00000000, write32_test # disable high interrupt on bit 17, which is pending +.4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte 0x0, 0x0, terminate_test # terminate tests From cb8ae723265a83216e766e91bd1dbf59b66ba424 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 23 Jun 2022 14:12:28 -0700 Subject: [PATCH 15/26] Fixed error in GPIO signature --- .../privilege/references/WALLY-gpio-01.reference_output | 2 +- .../riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output index 73f898ca0..3cbf56ae5 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/references/WALLY-gpio-01.reference_output @@ -11,7 +11,7 @@ A55A0000 # Test interrupt pending bits: high_ip 00000000 # fall_ip A4AA0000 # input_val A5FA0000 # high_ip -5BF50000 # low_ip +5BF5FFFF # low_ip 00A00000 # rise_ip 01500000 # fall_ip 00000000 # MEIP diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S index b8a751c55..be40c0e26 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/privilege/src/WALLY-gpio-01.S @@ -109,7 +109,7 @@ SETUP_PLIC .4byte output_val, 0x5BAA000F, write32_test # change output pattern to check rise/fall interrupts .4byte input_val, 0xA4AA0000, read32_test # check new output matches expected output .4byte high_ip, 0xA5FA00000, read32_test # high interrupt pending *** (is this correct?) -.4byte low_ip, 0x5BF50000, read32_test # low interrupt pending should be opposite high for enabled pins +.4byte low_ip, 0x5BF5FFFF, read32_test # low interrupt pending should be opposite high for enabled pins .4byte rise_ip, 0x00A00000, read32_test # check for changed bits (rising) .4byte fall_ip, 0x01500000, read32_test # check for changed bits (falling) .4byte 0x0, 0x00000000, readmip_test # Check no external interrupt has been generated @@ -129,7 +129,7 @@ SETUP_PLIC .4byte high_ie, 0x00020000, write32_test # enable high interrupt on bit 17, which is pending .4byte 0x0, 0x00000800, readmip_test # MEIP should be raised -.4byte low_ie, 0x00000000, write32_test # disable high interrupt on bit 17, which is pending +.4byte high_ie, 0x00000000, write32_test # disable high interrupt on bit 17, which is pending .4byte 0x0, 0x00000000, readmip_test # MEIP should be released .4byte 0x0, 0x0, terminate_test # terminate tests From d969edeb999f9e603f8f08a7eaceb68cbdb95a0b Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 23 Jun 2022 21:20:55 +0000 Subject: [PATCH 16/26] Reset mtimecmp in clint --- pipelined/src/uncore/clint.sv | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pipelined/src/uncore/clint.sv b/pipelined/src/uncore/clint.sv index 47acfddc2..3f6210ff0 100644 --- a/pipelined/src/uncore/clint.sv +++ b/pipelined/src/uncore/clint.sv @@ -60,7 +60,7 @@ module clint ( flopr #(16) entrydflop(HCLK, ~HRESETn, entry, entryd); assign HRESPCLINT = 0; // OK - assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during accesses + assign HREADYCLINT = 1'b1; // *** needs to depend on DONE during asynchronous MTIME accesses // word aligned reads if (`XLEN==64) assign #2 entry = {HADDR[15:3], 3'b000}; @@ -87,8 +87,7 @@ module clint ( always_ff @(posedge HCLK or negedge HRESETn) if (~HRESETn) begin MSIP <= 0; - MTIMECMP <= 0; - // MTIMECMP is not reset + MTIMECMP <= 0xFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts end else if (memwrite) begin if (entryd == 16'h0000) MSIP <= HWDATA[0]; if (entryd == 16'h4000) begin @@ -104,7 +103,6 @@ module clint ( always_ff @(posedge HCLK or negedge HRESETn) if (~HRESETn) begin MTIME <= 0; - // MTIMECMP is not reset end else if (memwrite & entryd == 16'hBFF8) begin // MTIME Counter. Eventually change this to run off separate clock. Synchronization then needed for(j=0;j<`XLEN/8;j++) From 44216b39670a8cb5dbfba16e31c3db287f796f1c Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 23 Jun 2022 21:27:46 +0000 Subject: [PATCH 17/26] Fixed typo in clint --- pipelined/src/uncore/clint.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/src/uncore/clint.sv b/pipelined/src/uncore/clint.sv index 3f6210ff0..4781360e5 100644 --- a/pipelined/src/uncore/clint.sv +++ b/pipelined/src/uncore/clint.sv @@ -87,7 +87,7 @@ module clint ( always_ff @(posedge HCLK or negedge HRESETn) if (~HRESETn) begin MSIP <= 0; - MTIMECMP <= 0xFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts + MTIMECMP <= 64'hFFFFFFFFFFFFFFFF; // Spec says MTIMECMP is not reset, but we reset to maximum value to prevent spurious timer interrupts end else if (memwrite) begin if (entryd == 16'h0000) MSIP <= HWDATA[0]; if (entryd == 16'h4000) begin From 3a471ac7d6eca81edac2570231691b7657c157f4 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 23 Jun 2022 14:37:18 -0700 Subject: [PATCH 18/26] Added wally32periph to regression --- pipelined/regression/regression-wally | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/regression/regression-wally b/pipelined/regression/regression-wally index 664f99648..07058241d 100755 --- a/pipelined/regression/regression-wally +++ b/pipelined/regression/regression-wally @@ -71,7 +71,7 @@ for test in tests64gc: grepstr="All tests ran without failures") configs.append(tc) -tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv"] #, "imperas32mmu""wally32i", +tests32gc = ["arch32i", "arch32priv", "arch32c", "arch32m", "arch32f", "imperas32i", "imperas32f", "imperas32m", "wally32a", "imperas32c", "wally32priv", "wally32periph"] #, "imperas32mmu""wally32i", for test in tests32gc: tc = TestCase( name=test, From a5fc6757a1e116d7d04adc8f19e05442cb92ecb4 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 21:38:04 +0000 Subject: [PATCH 19/26] generate qsel4 in verilog --- pipelined/regression/wave-fpu.do | 3 ++ pipelined/srt/srt-radix4.sv | 52 ++++++++++++++++++++++++++++- pipelined/testbench/testbench-fp.sv | 6 +++- 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 906eb2560..60835ef67 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -22,3 +22,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/* +add wave -group {Testbench} -noupdate /testbenchfp/* +add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index 6894a0f9c..52bd4c200 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -164,7 +164,57 @@ module qsel4 ( // Wmsbs = | | logic [3:0] QSel4[1023:0]; - initial $readmemh("../srt/qsel4.dat", QSel4); + + initial begin + integer d, w, i, w2; + for(d=0; d<8; d++) + for(w=0; w<128; w++)begin + i = d*128+w; + w2 = w-128*(w>=64); // convert to two's complement + case(d) + 0: if($signed(w2)>=$signed(12)) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-4) QSel4[i] = 4'b0000; + else if(w2>=-13) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 1: if(w2>=14) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-15) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 2: if(w2>=15) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-16) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 3: if(w2>=16) QSel4[i] = 4'b1000; + else if(w2>=4) QSel4[i] = 4'b0100; + else if(w2>=-6) QSel4[i] = 4'b0000; + else if(w2>=-18) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 4: if(w2>=18) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 5: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=6) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-20) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 6: if(w2>=20) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-22) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + 7: if(w2>=24) QSel4[i] = 4'b1000; + else if(w2>=8) QSel4[i] = 4'b0100; + else if(w2>=-8) QSel4[i] = 4'b0000; + else if(w2>=-24) QSel4[i] = 4'b0010; + else QSel4[i] = 4'b0001; + endcase + end + end assign q = QSel4[{Dmsbs,Wmsbs}]; endmodule diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 70787b3cb..7a5514901 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -899,7 +899,7 @@ module readvectors ( // apply test vectors on rising edge of clk // Format of vectors Inputs(1/2/3)_AnsFlg - always @(TestNum) begin + always @(VectorNum) begin #1; AnsFlg = TestVector[4:0]; DivStart = 1'b0; @@ -971,6 +971,7 @@ module readvectors ( X = TestVector[8+3*(`Q_LEN)-1:8+2*(`Q_LEN)]; Y = TestVector[8+2*(`Q_LEN)-1:8+(`Q_LEN)]; Ans = TestVector[8+(`Q_LEN-1):8]; + if (~clk) #5; DivStart = 1'b1; #10 // one clk cycle DivStart = 1'b0; end @@ -978,6 +979,7 @@ module readvectors ( X = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+3*(`D_LEN)-1:8+2*(`D_LEN)]}; Y = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+2*(`D_LEN)-1:8+(`D_LEN)]}; Ans = {{`FLEN-`D_LEN{1'b1}}, TestVector[8+(`D_LEN-1):8]}; + if (~clk) #5; DivStart = 1'b1; #10 DivStart = 1'b0; end @@ -985,6 +987,7 @@ module readvectors ( X = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+3*(`S_LEN)-1:8+2*(`S_LEN)]}; Y = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+2*(`S_LEN)-1:8+1*(`S_LEN)]}; Ans = {{`FLEN-`S_LEN{1'b1}}, TestVector[8+(`S_LEN-1):8]}; + if (~clk) #5; DivStart = 1'b1; #10 DivStart = 1'b0; end @@ -992,6 +995,7 @@ module readvectors ( X = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+3*(`H_LEN)-1:8+2*(`H_LEN)]}; Y = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+2*(`H_LEN)-1:8+(`H_LEN)]}; Ans = {{`FLEN-`H_LEN{1'b1}}, TestVector[8+(`H_LEN-1):8]}; + if (~clk) #5; DivStart = 1'b1; #10 DivStart = 1'b0; end From bca8fe16948a3d38c76a4701d475201de49f2743 Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 23 Jun 2022 14:39:53 -0700 Subject: [PATCH 20/26] Removed big64.txt reference, fixing a warning --- pipelined/src/generic/flop/bram1p1rw.sv | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pipelined/src/generic/flop/bram1p1rw.sv b/pipelined/src/generic/flop/bram1p1rw.sv index d0d3c40a8..51fe54214 100644 --- a/pipelined/src/generic/flop/bram1p1rw.sv +++ b/pipelined/src/generic/flop/bram1p1rw.sv @@ -54,10 +54,6 @@ module bram1p1rw logic [DATA_WIDTH-1:0] RAM [(2**ADDR_WIDTH)-1:0]; integer i; - initial begin - $readmemh("big64.txt", RAM); - end - always @ (posedge clk) begin dout <= RAM[addr]; if(we) begin From de71773d6962c97588996361513562c178c5729a Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 22:36:19 +0000 Subject: [PATCH 21/26] added radix-4 0/d handling --- pipelined/config/rv64fp/wally-config.vh | 2 +- pipelined/src/fpu/postprocess.sv | 10 ++-- pipelined/src/fpu/round.sv | 1 + pipelined/srt/srt-radix4.sv | 68 ++++++++++++++----------- pipelined/testbench/testbench-fp.sv | 6 +-- 5 files changed, 48 insertions(+), 39 deletions(-) diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index bcc791338..68b3b84c3 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -32,7 +32,7 @@ `define DESIGN_COMPILER 0 // RV32 or RV64: XLEN = 32 or 64 -`define XLEN 64 +`define XLEN 32 // IEEE 754 compliance `define IEEE754 0 diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 4b2870da4..9138f9dfd 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -59,7 +59,7 @@ module postprocess( input logic [`CVTLEN-1:0] CvtLzcInM, // input to the Leading Zero Counter (priority encoder) input logic IntZeroM, // is the input zero input logic [1:0] PostProcSelM, // select result to be written to fp register - input logic [`DIVLEN-1:0] Quot, + input logic [`DIVLEN+2:0] Quot, output logic [`FLEN-1:0] PostProcResM, // FMA final result output logic [4:0] PostProcFlgM, output logic [`XLEN-1:0] FCvtIntResM // the int conversion result @@ -84,6 +84,7 @@ module postprocess( logic PreResultDenorm; // is the result denormalized - calculated before LZA corection logic [$clog2(3*`NF+7)-1:0] FmaShiftAmt; // normalization shift count logic [$clog2(`NORMSHIFTSZ)-1:0] ShiftAmt; // normalization shift count + logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt; logic [`NORMSHIFTSZ-1:0] ShiftIn; // is the sum zero logic [`NORMSHIFTSZ-1:0] Shifted; // the shifted result logic Plus1; // add one to the final result? @@ -137,6 +138,7 @@ module postprocess( .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp, .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn); + divshiftcalc divshiftcalc(.Quot, .DivShiftAmt); always_comb case(PostProcSelM) @@ -149,8 +151,8 @@ module postprocess( ShiftIn = {CvtShiftIn, {`NORMSHIFTSZ-`CVTLEN-`NF-1{1'b0}}}; end 2'b01: begin //div ***prob can take out - ShiftAmt = {$clog2(`NORMSHIFTSZ){1'b0}};//{DivShiftAmt}; - ShiftIn = {Quot, {`NORMSHIFTSZ-`DIVLEN{1'b0}}}; + ShiftAmt = DivShiftAmt; + ShiftIn = {Quot[`DIVLEN+1:0], {`NORMSHIFTSZ-`DIVLEN-2{1'b0}}}; end default: begin ShiftAmt = {$clog2(`NORMSHIFTSZ){1'bx}}; @@ -175,7 +177,7 @@ module postprocess( round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .DivCalcExpM, .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf, - .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); + .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); /////////////////////////////////////////////////////////////////////////////// // Sign calculation diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 8e3b9fe4a..1fd471e9d 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -11,6 +11,7 @@ module round( input logic [`FMTBITS-1:0] OutFmt, // precision 1 = double 0 = single input logic [2:0] FrmM, // rounding mode input logic FmaOp, + input logic DivOp, input logic [1:0] PostProcSelM, input logic CvtResDenormUfM, input logic ToInt, diff --git a/pipelined/srt/srt-radix4.sv b/pipelined/srt/srt-radix4.sv index 52bd4c200..8fd8d5419 100644 --- a/pipelined/srt/srt-radix4.sv +++ b/pipelined/srt/srt-radix4.sv @@ -34,14 +34,15 @@ module srtradix4 ( input logic clk, input logic DivStart, input logic [`NE-1:0] XExpE, YExpE, - input logic [`NF-1:0] XFrac, YFrac, + input logic [`NF:0] XManE, YManE, input logic [`XLEN-1:0] SrcA, SrcB, + input logic XZeroE, input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide output logic DivDone, - output logic [`DIVLEN-1:0] Quot, + output logic [`DIVLEN+2:0] Quot, output logic [`XLEN-1:0] Rem, // *** later handle integers output logic [`NE:0] DivCalcExpE ); @@ -49,14 +50,15 @@ module srtradix4 ( // logic qp, qz, qm; // quotient is +1, 0, or -1 logic [3:0] q; logic [`NE:0] DivCalcExp; - logic [`DIVLEN-1:0] X, Dpreproc; + logic [`DIVLEN:0] X; + logic [`DIVLEN-1:0] Dpreproc; logic [`DIVLEN+3:0] WS, WSA, WSN; logic [`DIVLEN+3:0] WC, WCA, WCN; logic [`DIVLEN+3:0] D, DBar, D2, DBar2, Dsel; logic [$clog2(`XLEN+1)-1:0] intExp; logic intSign; - srtpreproc preproc(SrcA, SrcB, XFrac, YFrac, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign); + srtpreproc preproc(SrcA, SrcB, XManE, YManE, W64, Signed, Int, Sqrt, X, Dpreproc, intExp, intSign); // Top Muxes and Registers // When start is asserted, the inputs are loaded into the divider. @@ -68,7 +70,7 @@ module srtradix4 ( // - otherwise load WSA into the flipflop // *** what does N and A stand for? // *** change shift amount for radix4 - mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {4'b0001, X}, DivStart, WSN); + mux2 #(`DIVLEN+4) wsmux({WSA[`DIVLEN+1:0], 2'b0}, {3'b000, X}, DivStart, WSN); flop #(`DIVLEN+4) wsflop(clk, WSN, WS); mux2 #(`DIVLEN+4) wcmux({WCA[`DIVLEN+1:0], 2'b0}, {`DIVLEN+4{1'b0}}, DivStart, WCN); flop #(`DIVLEN+4) wcflop(clk, WCN, WC); @@ -110,9 +112,9 @@ module srtradix4 ( csa #(`DIVLEN+4) csa(WS, WC, Dsel, |q[3:2], WSA, WCA); //*** change for radix 4 - otfc4 otfc4(clk, DivStart, q, Quot); + otfc4 otfc4(.clk, .DivStart, .q, .Quot); - expcalc expcalc(.XExpE, .YExpE, .DivCalcExp); + expcalc expcalc(.XExpE, .YExpE, .XZeroE, .DivCalcExp); divcounter divcounter(clk, DivStart, DivDone); @@ -224,39 +226,42 @@ endmodule /////////////////// module srtpreproc ( input logic [`XLEN-1:0] SrcA, SrcB, - input logic [`NF-1:0] XFrac, YFrac, + input logic [`NF:0] XManE, YManE, input logic W64, // 32-bit ints on XLEN=64 input logic Signed, // Interpret integers as signed 2's complement input logic Int, // Choose integer inputs input logic Sqrt, // perform square root, not divide - output logic [`DIVLEN-1:0] X, D, + output logic [`DIVLEN:0] X, + output logic [`DIVLEN-1:0] Dpreproc, output logic [$clog2(`XLEN+1)-1:0] intExp, // Quotient integer exponent output logic intSign // Quotient integer sign ); - logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; - logic [`XLEN-1:0] PosA, PosB; - logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; + // logic [$clog2(`XLEN+1)-1:0] zeroCntA, zeroCntB; + // logic [`XLEN-1:0] PosA, PosB; + // logic [`DIVLEN-1:0] ExtraA, ExtraB, PreprocA, PreprocB, PreprocX, PreprocY; + logic [`DIVLEN:0] PreprocA, PreprocX; + logic [`DIVLEN-1:0] PreprocB, PreprocY; - assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; - assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; + // assign PosA = (Signed & SrcA[`XLEN - 1]) ? -SrcA : SrcA; + // assign PosB = (Signed & SrcB[`XLEN - 1]) ? -SrcB : SrcB; - lzc #(`XLEN) lzcA (PosA, zeroCntA); - lzc #(`XLEN) lzcB (PosB, zeroCntB); + // lzc #(`XLEN) lzcA (PosA, zeroCntA); + // lzc #(`XLEN) lzcB (PosB, zeroCntB); - assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; - assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; + // assign ExtraA = {PosA, {`DIVLEN-`XLEN{1'b0}}}; + // assign ExtraB = {PosB, {`DIVLEN-`XLEN{1'b0}}}; - assign PreprocA = ExtraA << zeroCntA; - assign PreprocB = ExtraB << (zeroCntB + 1); - assign PreprocX = {XFrac, {`DIVLEN-`NF{1'b0}}}; - assign PreprocY = {YFrac, {`DIVLEN-`NF{1'b0}}}; + // assign PreprocA = ExtraA << zeroCntA; + // assign PreprocB = ExtraB << (zeroCntB + 1); + assign PreprocX = {XManE, {`DIVLEN-`NF{1'b0}}}; + assign PreprocY = {YManE[`NF-1:0], {`DIVLEN-`NF{1'b0}}}; assign X = Int ? PreprocA : PreprocX; - assign D = Int ? PreprocB : PreprocY; - assign intExp = zeroCntB - zeroCntA + 1; - assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); + assign Dpreproc = Int ? PreprocB : PreprocY; + // assign intExp = zeroCntB - zeroCntA + 1; + // assign intSign = Signed & (SrcA[`XLEN - 1] ^ SrcB[`XLEN - 1]); endmodule /////////////////////////////////// @@ -266,7 +271,7 @@ module otfc4 ( input logic clk, input logic DivStart, input logic [3:0] q, - output logic [`DIVLEN-1:0] Quot + output logic [`DIVLEN+2:0] Quot ); // The on-the-fly converter transfers the quotient @@ -278,7 +283,7 @@ module otfc4 ( // // QM is Q-1. It allows us to write negative bits // without using a costly CPA. - logic [`DIVLEN+2:0] Q, QM, QNext, QMNext, QMux, QMMux; + logic [`DIVLEN+2:0] QM, QNext, QMNext, QMux, QMMux; // QR and QMR are the shifted versions of Q and QM. // They are treated as [N-1:r] size signals, and // discard the r most significant bits of Q and QM. @@ -286,7 +291,7 @@ module otfc4 ( // if starting a new divison set Q to 0 and QM to -1 mux2 #(`DIVLEN+3) Qmux(QNext, {`DIVLEN+3{1'b0}}, DivStart, QMux); mux2 #(`DIVLEN+3) QMmux(QMNext, {`DIVLEN+3{1'b1}}, DivStart, QMMux); - flop #(`DIVLEN+3) Qreg(clk, QMux, Q); + flop #(`DIVLEN+3) Qreg(clk, QMux, Quot); flop #(`DIVLEN+3) QMreg(clk, QMMux, QM); // shift Q (quotent) and QM (quotent-1) @@ -298,7 +303,7 @@ module otfc4 ( // *** how does the 0 concatination numbers work? always_comb begin - QR = Q[`DIVLEN:0]; + QR = Quot[`DIVLEN:0]; QMR = QM[`DIVLEN:0]; // Shift Q and QM if (q[3]) begin // +2 QNext = {QR, 2'b10}; @@ -318,7 +323,7 @@ module otfc4 ( end end // Quot is in the range [.5, 2) so normalize the result if nesissary - assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1]; + // assign Quot = Q[`DIVLEN+2] ? Q[`DIVLEN+1:2] : Q[`DIVLEN:1]; endmodule @@ -352,9 +357,10 @@ endmodule ////////////// module expcalc( input logic [`NE-1:0] XExpE, YExpE, + input logic XZeroE, output logic [`NE:0] DivCalcExp ); - assign DivCalcExp = XExpE - YExpE + (`NE)'(`BIAS); + assign DivCalcExp = (XExpE - YExpE + (`NE)'(`BIAS))&{`NE+1{~XZeroE}}; endmodule diff --git a/pipelined/testbench/testbench-fp.sv b/pipelined/testbench/testbench-fp.sv index 7a5514901..e8afb299b 100644 --- a/pipelined/testbench/testbench-fp.sv +++ b/pipelined/testbench/testbench-fp.sv @@ -53,6 +53,7 @@ module testbenchfp; logic CvtResSgnE; logic [`NE:0] CvtCalcExpE; // the calculated expoent logic [`LOGCVTLEN-1:0] CvtShiftAmtE; // how much to shift by + logic [`DIVLEN+2:0] Quot; logic CvtResDenormUfE; logic DivStart, DivDone; @@ -69,7 +70,6 @@ module testbenchfp; logic ZSgnEffE; logic PSgnE; logic DivSgn; - logic [`DIVLEN-1:0] Quot; logic [`NE:0] DivCalcExp; @@ -659,8 +659,8 @@ module testbenchfp; fcmp fcmp (.FmtE(ModFmt), .FOpCtrlE(OpCtrlVal), .XSgnE(XSgn), .YSgnE(YSgn), .XExpE(XExp), .YExpE(YExp), .XManE(XMan), .YManE(YMan), .XZeroE(XZero), .YZeroE(YZero), .CmpIntResE(CmpRes), .XNaNE(XNaN), .YNaNE(YNaN), .XSNaNE(XSNaN), .YSNaNE(YSNaN), .FSrcXE(X), .FSrcYE(Y), .CmpNVE(CmpFlg[4]), .CmpFpResE(FpCmpRes)); - srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), - .XFrac(XMan[`NF-1:0]), .YFrac(YMan[`NF-1:0]), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), + srtradix4 srtradix4(.clk, .DivStart, .XExpE(XExp), .YExpE(YExp), .DivCalcExpE(DivCalcExp), .XZeroE(XZero), + .XManE(XMan), .YManE(YMan), .SrcA('0), .SrcB('0), .W64(1'b0), .Signed(1'b0), .Int(1'b0), .Sqrt(OpCtrlVal[0]), .DivDone, .Quot, .Rem()); assign CmpFlg[3:0] = 0; From 749d405da8998af09a21c18ea288749f075ce650 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 22:37:44 +0000 Subject: [PATCH 22/26] lint warning fix --- pipelined/src/fpu/fpu.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelined/src/fpu/fpu.sv b/pipelined/src/fpu/fpu.sv index da46d73e5..ff83079a8 100755 --- a/pipelined/src/fpu/fpu.sv +++ b/pipelined/src/fpu/fpu.sv @@ -123,7 +123,7 @@ module fpu ( logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) //divide signals - logic [`DIVLEN-1:0] Quot; + logic [`DIVLEN+2:0] Quot; logic [`NE:0] DivCalcExpM; // result and flag signals From b16e55906ac6ad71bc53002a05ab1cd76d3f4a36 Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 22:59:43 +0000 Subject: [PATCH 23/26] div debug - accounted for 1 bit normalization in exponent calculation --- pipelined/regression/wave-fpu.do | 1 + pipelined/src/fpu/postprocess.sv | 5 +++-- pipelined/src/fpu/round.sv | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pipelined/regression/wave-fpu.do b/pipelined/regression/wave-fpu.do index 60835ef67..a58400cca 100644 --- a/pipelined/regression/wave-fpu.do +++ b/pipelined/regression/wave-fpu.do @@ -23,5 +23,6 @@ add wave -group {Divide} -noupdate /testbenchfp/srtradix4/qsel4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/otfc4/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/preproc/* add wave -group {Divide} -noupdate /testbenchfp/srtradix4/divcounter/* +add wave -group {Divide} -noupdate /testbenchfp/srtradix4/expcalc/* add wave -group {Testbench} -noupdate /testbenchfp/* add wave -group {Testbench} -noupdate /testbenchfp/readvectors/* diff --git a/pipelined/src/fpu/postprocess.sv b/pipelined/src/fpu/postprocess.sv index 9138f9dfd..d970fdbce 100644 --- a/pipelined/src/fpu/postprocess.sv +++ b/pipelined/src/fpu/postprocess.sv @@ -94,6 +94,7 @@ module postprocess( logic IntToFp; // is the opperation an int->fp conversion? logic ToInt; // is the opperation an fp->int conversion? logic [`NE+1:0] RoundExp; + logic [`NE:0] CorrDivExp; logic [1:0] NegResMSBS; logic CvtOp; logic FmaOp; @@ -138,7 +139,7 @@ module postprocess( .XZeroM, .IntToFp, .OutFmt, .CvtResUf, .CvtShiftIn); fmashiftcalc fmashiftcalc(.SumM, .ZExpM, .ProdExpM, .FmaNormCntM, .FmtM, .KillProdM, .ConvNormSumExp, .ZDenormM, .SumZero, .PreResultDenorm, .FmaShiftAmt, .FmaShiftIn); - divshiftcalc divshiftcalc(.Quot, .DivShiftAmt); + divshiftcalc divshiftcalc(.Quot, .DivCalcExpM, .CorrDivExp, .DivShiftAmt); always_comb case(PostProcSelM) @@ -175,7 +176,7 @@ module postprocess( // round to infinity // round to nearest max magnitude - round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .DivCalcExpM, + round round(.OutFmt, .FrmM, .Sticky, .AddendStickyM, .ZZeroM, .Plus1, .PostProcSelM, .CvtCalcExpM, .CorrDivExp, .InvZM, .RoundSgn, .SumExp, .FmaOp, .CvtOp, .CvtResDenormUfM, .CorrShifted, .ToInt, .CvtResUf, .DivOp, .UfPlus1, .FullResExp, .ResFrac, .ResExp, .Round, .RoundAdd, .UfLSBRes, .RoundExp); diff --git a/pipelined/src/fpu/round.sv b/pipelined/src/fpu/round.sv index 1fd471e9d..73395caed 100644 --- a/pipelined/src/fpu/round.sv +++ b/pipelined/src/fpu/round.sv @@ -24,7 +24,7 @@ module round( input logic [`NE+1:0] SumExp, // exponent of the normalized sum input logic RoundSgn, // the result's sign input logic [`NE:0] CvtCalcExpM, // the calculated expoent - input logic [`NE:0] DivCalcExpM, // the calculated expoent + input logic [`NE:0] CorrDivExp, // the calculated expoent output logic UfPlus1, // do you add or subtract on from the result output logic [`NE+1:0] FullResExp, // ResExp with bits to determine sign and overflow output logic [`NF-1:0] ResFrac, // Result fraction @@ -305,7 +305,7 @@ module round( case(PostProcSelM) 2'b10: RoundExp = SumExp; // fma 2'b00: RoundExp = {CvtCalcExpM[`NE], CvtCalcExpM}&{`NE+2{~CvtResDenormUfM|CvtResUf}}; // cvt - 2'b01: RoundExp = {DivCalcExpM[`NE], DivCalcExpM[`NE:0]}; // divide + 2'b01: RoundExp = {CorrDivExp[`NE], CorrDivExp[`NE:0]}; // divide default: RoundExp = 0; endcase From ec2c446c7ef508ff514d3fb15cccf10b8e6f411d Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Thu, 23 Jun 2022 23:01:30 +0000 Subject: [PATCH 24/26] forgot a file --- pipelined/src/fpu/divshiftcalc.sv | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 pipelined/src/fpu/divshiftcalc.sv diff --git a/pipelined/src/fpu/divshiftcalc.sv b/pipelined/src/fpu/divshiftcalc.sv new file mode 100644 index 000000000..57022e5ae --- /dev/null +++ b/pipelined/src/fpu/divshiftcalc.sv @@ -0,0 +1,15 @@ +`include "wally-config.vh" + +module divshiftcalc( + input logic [`DIVLEN+2:0] Quot, + input logic [`NE:0] DivCalcExpM, + output logic [$clog2(`NORMSHIFTSZ)-1:0] DivShiftAmt, + output logic [`NE:0] CorrDivExp +); + + assign DivShiftAmt = {{$clog2(`NORMSHIFTSZ)-1{1'b0}}, ~Quot[`DIVLEN+2]}; + // the quotent is in the range [.5,2) + // if the quotent < 1 and not denormal then subtract 1 to account for the normalization shift + assign CorrDivExp = DivCalcExpM - {(`NE)'(0), ~Quot[`DIVLEN+2]}; + +endmodule From 534b752d16a67a7b069874bd199e6e8f23cb850c Mon Sep 17 00:00:00 2001 From: David Harris Date: Thu, 23 Jun 2022 23:16:43 +0000 Subject: [PATCH 25/26] Default value of Drive in Makefile --- synthDC/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/synthDC/Makefile b/synthDC/Makefile index 193153cac..3de666659 100755 --- a/synthDC/Makefile +++ b/synthDC/Makefile @@ -15,6 +15,7 @@ export MAXCORES ?= 4 # MAXOPT turns on flattening, boundary optimization, and retiming # The output netlist is hard to interpret, but significantly better PPA export MAXOPT ?= 0 +export DRIVE ?= FLOP time := $(shell date +%F-%H-%M) hash := $(shell git rev-parse --short HEAD) From 528869ef14932bca4e440a558c2877cdb42fa5fc Mon Sep 17 00:00:00 2001 From: slmnemo Date: Thu, 23 Jun 2022 16:50:27 -0700 Subject: [PATCH 26/26] Removed references to initialization files --- pipelined/config/buildroot/wally-config.vh | 2 -- pipelined/config/rv32e/wally-config.vh | 2 -- pipelined/config/rv32gc/wally-config.vh | 2 -- pipelined/config/rv32i/wally-config.vh | 2 -- pipelined/config/rv32ic/wally-config.vh | 2 -- pipelined/config/rv64BP/wally-config.vh | 2 -- pipelined/config/rv64fp/wally-config.vh | 2 -- pipelined/config/rv64fpquad/wally-config.vh | 2 -- pipelined/config/rv64gc/wally-config.vh | 2 -- pipelined/config/rv64i/wally-config.vh | 2 -- pipelined/config/rv64ic/wally-config.vh | 2 -- 11 files changed, 22 deletions(-) diff --git a/pipelined/config/buildroot/wally-config.vh b/pipelined/config/buildroot/wally-config.vh index dc6c9bb00..f11b71c0a 100644 --- a/pipelined/config/buildroot/wally-config.vh +++ b/pipelined/config/buildroot/wally-config.vh @@ -124,8 +124,6 @@ `define PLIC_NUM_SRC 53 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/buildroot/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/buildroot/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32e/wally-config.vh b/pipelined/config/rv32e/wally-config.vh index 15b2e08e7..7d083f3b5 100644 --- a/pipelined/config/rv32e/wally-config.vh +++ b/pipelined/config/rv32e/wally-config.vh @@ -130,8 +130,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt" `define BPRED_ENABLED 0 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32gc/wally-config.vh b/pipelined/config/rv32gc/wally-config.vh index 3522fd1e6..70124d551 100644 --- a/pipelined/config/rv32gc/wally-config.vh +++ b/pipelined/config/rv32gc/wally-config.vh @@ -128,8 +128,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32i/wally-config.vh b/pipelined/config/rv32i/wally-config.vh index 80d167a3d..d44072d6a 100644 --- a/pipelined/config/rv32i/wally-config.vh +++ b/pipelined/config/rv32i/wally-config.vh @@ -130,8 +130,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32i/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32i/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh index 13b2eb747..e42fd3100 100644 --- a/pipelined/config/rv32ic/wally-config.vh +++ b/pipelined/config/rv32ic/wally-config.vh @@ -128,8 +128,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv32ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv32ic/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64BP/wally-config.vh b/pipelined/config/rv64BP/wally-config.vh index 82f8446bb..3bc745eb1 100644 --- a/pipelined/config/rv64BP/wally-config.vh +++ b/pipelined/config/rv64BP/wally-config.vh @@ -130,8 +130,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv64BP/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64BP/BTBPredictor.txt" `define BPRED_ENABLED 1 //`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE `define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE diff --git a/pipelined/config/rv64fp/wally-config.vh b/pipelined/config/rv64fp/wally-config.vh index 68b3b84c3..cc8d1b2b8 100644 --- a/pipelined/config/rv64fp/wally-config.vh +++ b/pipelined/config/rv64fp/wally-config.vh @@ -132,8 +132,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/shared/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64fpquad/wally-config.vh b/pipelined/config/rv64fpquad/wally-config.vh index 08e8006ce..0dee000e2 100644 --- a/pipelined/config/rv64fpquad/wally-config.vh +++ b/pipelined/config/rv64fpquad/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/shared/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh index 042364aca..9afa1a679 100644 --- a/pipelined/config/rv64gc/wally-config.vh +++ b/pipelined/config/rv64gc/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/shared/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/shared/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64i/wally-config.vh b/pipelined/config/rv64i/wally-config.vh index 402c3b364..67ca51a7a 100644 --- a/pipelined/config/rv64i/wally-config.vh +++ b/pipelined/config/rv64i/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv64i/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64i/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 diff --git a/pipelined/config/rv64ic/wally-config.vh b/pipelined/config/rv64ic/wally-config.vh index 491759359..fca1f2609 100644 --- a/pipelined/config/rv64ic/wally-config.vh +++ b/pipelined/config/rv64ic/wally-config.vh @@ -131,8 +131,6 @@ `define PLIC_GPIO_ID 3 `define PLIC_UART_ID 10 -`define TWO_BIT_PRELOAD "../config/rv64ic/twoBitPredictor.txt" -`define BTB_PRELOAD "../config/rv64ic/BTBPredictor.txt" `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0