From f0059b7b3a180e47a5dcaae7fd5622609c389d69 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Dec 2021 11:38:26 -0800 Subject: [PATCH 01/38] IEU cleanup: --- wally-pipelined/src/ieu/datapath.sv | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 83c1f918e..ef35f2f5b 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -78,11 +78,11 @@ module datapath ( logic [`XLEN-1:0] SrcAE, SrcBE; logic [`XLEN-1:0] SrcAE2, SrcBE2; - logic [`XLEN-1:0] ALUResultE, AltResultE, ALUPreResultE; + logic [`XLEN-1:0] ALUResultE, AltResultE, IEUResultE; logic [`XLEN-1:0] WriteDataE; logic [`XLEN-1:0] AddressE; // Memory stage signals - logic [`XLEN-1:0] ALUResultM; + logic [`XLEN-1:0] IEUResultM; logic [`XLEN-1:0] ResultM; // Writeback stage signals logic [`XLEN-1:0] SCResultW; @@ -108,22 +108,22 @@ module datapath ( mux3 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, ForwardAE, ForwardedSrcAE); mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, ForwardBE, ForwardedSrcBE); mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE); + comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, FlagsE); mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ExtImmE, ALUSrcBE, SrcBE); - alu #(`XLEN) alu(SrcAE, SrcBE, ALUControlE, Funct3E, ALUPreResultE, AddressE); - comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, FlagsE); + alu #(`XLEN) alu(SrcAE, SrcBE, ALUControlE, Funct3E, ALUResultE, AddressE); mux2 #(`XLEN) altresultmux(ExtImmE, PCLinkE, JumpE, AltResultE); - mux2 #(`XLEN) aluresultmux(ALUPreResultE, AltResultE, ALUResultSrcE, ALUResultE); + mux2 #(`XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE); + assign MemAdrE = AddressE; // *** clean up this naming + assign PCTargetE = AddressE; // *** clean up this naming // Memory stage pipeline register flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM); - flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM); - assign MemAdrE = AddressE; // *** clean up this naming - assign PCTargetE = AddressE; // *** clean up this naming + flopenrc #(`XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM); flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, MemAdrE, MemAdrM); flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM); - mux2 #(`XLEN) resultmuxM(ALUResultM, FIntResM, FWriteIntM, ResultM); + mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, ResultM); // Writeback stage pipeline register and logic flopenrc #(`XLEN) ResultWReg(clk, reset, FlushW, ~StallW, ResultM, ResultW); From 3a9071e509706dabb0c13744773db3eb73efc486 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Dec 2021 12:10:45 -0800 Subject: [PATCH 02/38] Renamed MemAdrE to IEUAdrE and moved the MemAdrM flop from IEU to LSU to reduce wires crossing hierarchies --- wally-pipelined/src/cache/dcache.sv | 4 ++-- wally-pipelined/src/ieu/datapath.sv | 10 +++------- wally-pipelined/src/ieu/ieu.sv | 8 ++++---- wally-pipelined/src/ifu/bpred.sv | 6 +++--- wally-pipelined/src/ifu/ifu.sv | 8 ++++---- wally-pipelined/src/lsu/lsu.sv | 12 +++++++----- wally-pipelined/src/lsu/lsuArb.sv | 4 ++-- wally-pipelined/src/wally/wallypipelinedhart.sv | 11 +++++------ wally-pipelined/testbench/testbench-linux.sv | 2 +- 9 files changed, 31 insertions(+), 34 deletions(-) diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv index 4c7180a37..b546a82b6 100644 --- a/wally-pipelined/src/cache/dcache.sv +++ b/wally-pipelined/src/cache/dcache.sv @@ -36,7 +36,7 @@ module dcache input logic [6:0] Funct7M, input logic [1:0] AtomicM, input logic FlushDCacheM, - input logic [11:0] MemAdrE, // virtual address, but we only use the lower 12 bits. + input logic [11:0] IEUAdrE, // virtual address, but we only use the lower 12 bits. input logic [`PA_BITS-1:0] MemPAdrM, // physical address input logic [11:0] VAdr, // when hptw writes dtlb we use this address to index SRAM. @@ -147,7 +147,7 @@ module dcache // Read Path CPU (IEU) side mux4 #(INDEXLEN) - AdrSelMux(.d0(MemAdrE[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + AdrSelMux(.d0(IEUAdrE[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), .d1(VAdr[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), .d2(MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), .d3(FlushAdr), diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index ef35f2f5b..d0a046e1b 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -43,14 +43,14 @@ module datapath ( input logic [`XLEN-1:0] PCE, input logic [`XLEN-1:0] PCLinkE, output logic [2:0] FlagsE, - output logic [`XLEN-1:0] PCTargetE, + output logic [`XLEN-1:0] IEUAdrE, output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B // Memory stage signals input logic StallM, FlushM, input logic FWriteIntM, input logic [`XLEN-1:0] FIntResM, output logic [`XLEN-1:0] SrcAM, - output logic [`XLEN-1:0] WriteDataM, MemAdrM, MemAdrE, + output logic [`XLEN-1:0] WriteDataM, // Writeback stage signals input logic StallW, FlushW, input logic FWriteIntW, @@ -80,7 +80,6 @@ module datapath ( logic [`XLEN-1:0] ALUResultE, AltResultE, IEUResultE; logic [`XLEN-1:0] WriteDataE; - logic [`XLEN-1:0] AddressE; // Memory stage signals logic [`XLEN-1:0] IEUResultM; logic [`XLEN-1:0] ResultM; @@ -111,16 +110,13 @@ module datapath ( comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, FlagsE); mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ExtImmE, ALUSrcBE, SrcBE); - alu #(`XLEN) alu(SrcAE, SrcBE, ALUControlE, Funct3E, ALUResultE, AddressE); + alu #(`XLEN) alu(SrcAE, SrcBE, ALUControlE, Funct3E, ALUResultE, IEUAdrE); mux2 #(`XLEN) altresultmux(ExtImmE, PCLinkE, JumpE, AltResultE); mux2 #(`XLEN) ieuresultmux(ALUResultE, AltResultE, ALUResultSrcE, IEUResultE); - assign MemAdrE = AddressE; // *** clean up this naming - assign PCTargetE = AddressE; // *** clean up this naming // Memory stage pipeline register flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM); flopenrc #(`XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM); - flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, MemAdrE, MemAdrM); flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM); mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, ResultM); diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 4619b337c..101ca4dab 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -37,7 +37,7 @@ module ieu ( input logic FWriteIntE, input logic IllegalFPUInstrE, input logic [`XLEN-1:0] FWriteDataE, - output logic [`XLEN-1:0] PCTargetE, + output logic [`XLEN-1:0] IEUAdrE, output logic MulDivE, W64E, output logic [2:0] Funct3E, output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B @@ -49,7 +49,7 @@ module ieu ( output logic [1:0] MemRWM, // read/write control goes to LSU output logic [1:0] AtomicE, // atomic control goes to LSU output logic [1:0] AtomicM, // atomic control goes to LSU - output logic [`XLEN-1:0] MemAdrM, MemAdrE, WriteDataM, // Address and write data to LSU + output logic [`XLEN-1:0] WriteDataM, // Address and write data to LSU output logic [2:0] Funct3M, // size and signedness to LSU output logic [`XLEN-1:0] SrcAM, // to privilege and fpu @@ -127,11 +127,11 @@ module ieu ( .ALUControlE, .Funct3E, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, .JumpE, .IllegalFPUInstrE, .FWriteDataE, .PCE, .PCLinkE, .FlagsE, - .PCTargetE, + .IEUAdrE, .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B // Memory stage signals .StallM, .FlushM, .FWriteIntM, .FIntResM, - .SrcAM, .WriteDataM, .MemAdrM, .MemAdrE, + .SrcAM, .WriteDataM, // Writeback stage signals .StallW, .FlushW, .FWriteIntW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, diff --git a/wally-pipelined/src/ifu/bpred.sv b/wally-pipelined/src/ifu/bpred.sv index 3121e725c..56308b1d7 100644 --- a/wally-pipelined/src/ifu/bpred.sv +++ b/wally-pipelined/src/ifu/bpred.sv @@ -45,7 +45,7 @@ module bpred // *** the specifics of how this is encode is subject to change. input logic PCSrcE, // AKA Branch Taken // Signals required to check the branch prediction accuracy. - input logic [`XLEN-1:0] PCTargetE, // The branch destination if the branch is taken. + input logic [`XLEN-1:0] IEUAdrE, // The branch destination if the branch is taken. input logic [`XLEN-1:0] PCD, // The address the branch predictor took. input logic [`XLEN-1:0] PCLinkE, // The address following the branch instruction. (AKA Fall through address) input logic [4:0] InstrClassE, @@ -165,7 +165,7 @@ module bpred // update .UpdateEN((|InstrClassE | (PredictionInstrClassWrongE)) & ~StallE), .UpdatePC(PCE), - .UpdateTarget(PCTargetE), + .UpdateTarget(IEUAdrE), .UpdateInvalid(PredictionInstrClassWrongE), .UpdateInstrClass(InstrClassE)); @@ -218,7 +218,7 @@ module bpred // Check the prediction makes execution. // first check if the target or fallthrough address matches what was predicted. - assign TargetWrongE = PCTargetE != PCD; + assign TargetWrongE = IEUAdrE != PCD; assign FallThroughWrongE = PCLinkE != PCD; // If the target is taken check the target rather than fallthrough. The instruction needs to be a branch if PCSrcE is selected // Remember the bpred can incorrectly predict a non cfi instruction as a branch taken. If the real instruction is non cfi diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 093226098..8bc8a185f 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -40,7 +40,7 @@ module ifu ( // Execute output logic [`XLEN-1:0] PCLinkE, input logic PCSrcE, - input logic [`XLEN-1:0] PCTargetE, + input logic [`XLEN-1:0] IEUAdrE, output logic [`XLEN-1:0] PCE, output logic BPPredWrongE, // Mem @@ -223,7 +223,7 @@ module ifu ( .SelBPPredF(SelBPPredF), .PCE(PCE), .PCSrcE(PCSrcE), - .PCTargetE(PCTargetE), + .IEUAdrE(IEUAdrE), .PCD(PCD), .PCLinkE(PCLinkE), .InstrClassE(InstrClassE), @@ -242,8 +242,8 @@ module ifu ( assign BPPredClassNonCFIWrongE = 1'b0; end endgenerate - // The true correct target is PCTargetE if PCSrcE is 1 else it is the fall through PCLinkE. - assign PCCorrectE = PCSrcE ? PCTargetE : PCLinkE; + // The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE. + assign PCCorrectE = PCSrcE ? IEUAdrE : PCLinkE; // pcadder // add 2 or 4 to the PC, based on whether the instruction is 16 bits or 32 diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 791e4d4a7..6fcf4013d 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -49,9 +49,9 @@ module lsu output logic DCacheAccess, // address and write data - input logic [`XLEN-1:0] MemAdrM, - input logic [`XLEN-1:0] MemAdrE, - input logic [`XLEN-1:0] WriteDataM, + input logic [`XLEN-1:0] IEUAdrE, + output logic [`XLEN-1:0] MemAdrM, + input logic [`XLEN-1:0] WriteDataM, output logic [`XLEN-1:0] ReadDataM, // cpu privilege @@ -129,6 +129,8 @@ module lsu assign AnyCPUReqM = (|MemRWM) | (|AtomicM); + flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, MemAdrM); + // *** add generate to conditionally create hptw, lsuArb, and mmu // based on `MEM_VIRTMEM hptw hptw(.clk(clk), @@ -169,7 +171,7 @@ module lsu .Funct3M(Funct3M), .AtomicM(AtomicM), .MemAdrM(MemAdrM), - .MemAdrE(MemAdrE[11:0]), + .IEUAdrE(IEUAdrE[11:0]), .CommittedM(CommittedM), .PendingInterruptM(PendingInterruptM), .StallW(StallW), @@ -251,7 +253,7 @@ module lsu .Funct7M(Funct7M), .FlushDCacheM, .AtomicM(AtomicMtoDCache), - .MemAdrE(MemAdrEtoDCache), + .IEUAdrE(MemAdrEtoDCache), .MemPAdrM(MemPAdrM), .VAdr(MemAdrM[11:0]), .WriteDataM(WriteDataM), diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index c0647c2b6..be73b0850 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -40,7 +40,7 @@ module lsuArb input logic [2:0] Funct3M, input logic [1:0] AtomicM, input logic [`XLEN-1:0] MemAdrM, - input logic [11:0] MemAdrE, + input logic [11:0] IEUAdrE, input logic StallW, input logic PendingInterruptM, // to CPU @@ -85,7 +85,7 @@ module lsuArb assign AtomicMtoDCache = SelPTW ? 2'b00 : AtomicM; assign MemAdrMExt = {2'b00, MemAdrM}; assign MemPAdrMtoDCache = SelPTW ? TranslationPAdrM : MemAdrMExt[`PA_BITS-1:0]; - assign MemAdrEtoDCache = SelPTW ? TranslationPAdrE[11:0] : MemAdrE[11:0]; + assign MemAdrEtoDCache = SelPTW ? TranslationPAdrE[11:0] : IEUAdrE[11:0]; assign StallWtoDCache = SelPTW ? 1'b0 : StallW; // always block interrupts when using the hardware page table walker. assign CommittedM = SelPTW ? 1'b1 : CommittedMfromDCache; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 4e8dbc47e..13c2e77a5 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -68,7 +68,6 @@ module wallypipelinedhart ( (* mark_debug = "true" *) logic [31:0] InstrM; logic [`XLEN-1:0] PCF, PCD, PCE, PCLinkE; (* mark_debug = "true" *) logic [`XLEN-1:0] PCM; - logic [`XLEN-1:0] PCTargetE; logic [`XLEN-1:0] CSRReadValW, MulDivResultW; logic [`XLEN-1:0] PrivilegedNextPCM; (* mark_debug = "true" *) logic [1:0] MemRWM; @@ -122,7 +121,7 @@ module wallypipelinedhart ( // cpu lsu interface logic [2:0] Funct3M; - logic [`XLEN-1:0] MemAdrE; + logic [`XLEN-1:0] IEUAdrE; (* mark_debug = "true" *) logic [`XLEN-1:0] WriteDataM; (* mark_debug = "true" *) logic [`XLEN-1:0] MemAdrM; (* mark_debug = "true" *) logic [`XLEN-1:0] ReadDataM; @@ -170,7 +169,7 @@ module wallypipelinedhart ( .InstrReadF, .ICacheStallF, // Execute - .PCLinkE, .PCSrcE, .PCTargetE, .PCE, + .PCLinkE, .PCSrcE, .IEUAdrE, .PCE, .BPPredWrongE, // Mem @@ -209,7 +208,7 @@ module wallypipelinedhart ( // Execute Stage interface .PCE, .PCLinkE, .FWriteIntE, .IllegalFPUInstrE, - .FWriteDataE, .PCTargetE, .MulDivE, .W64E, + .FWriteDataE, .IEUAdrE, .MulDivE, .W64E, .Funct3E, .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B //.SrcAE, .SrcBE, .FWriteIntM, @@ -219,7 +218,7 @@ module wallypipelinedhart ( .MemRWM, // read/write control goes to LSU .AtomicE, // atomic control goes to LSU .AtomicM, // atomic control goes to LSU - .MemAdrM, .MemAdrE, .WriteDataM, // Address and write data to LSU + .WriteDataM, // Write data to LSU .Funct3M, // size and signedness to LSU .SrcAM, // to privilege and fpu .RdM, .FIntResM, .InvalidateICacheM, .FlushDCacheM, @@ -248,7 +247,7 @@ module wallypipelinedhart ( .CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW, //.DataMisalignedM(DataMisalignedM), - .MemAdrE, .MemAdrM, .WriteDataM, + .IEUAdrE, .MemAdrM, .WriteDataM, .ReadDataM, .FlushDCacheM, // connected to ahb (all stay the same) .DCtoAHBPAdrM, .DCtoAHBReadM, .DCtoAHBWriteM, .DCfromAHBAck, diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index b09b19aab..a64ca4340 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -100,7 +100,7 @@ module testbench(); flopenrc #(`XLEN) PCWReg(clk, reset, `FLUSHW, ~`STALLW, dut.hart.ifu.PCM, PCW); flopenr #(32) InstrWReg(clk, reset, ~`STALLW, `FLUSHW ? nop : dut.hart.ifu.InstrM, InstrW); flopenrc #(1) controlregW(clk, reset, `FLUSHW, ~`STALLW, dut.hart.ieu.c.InstrValidM, InstrValidW); - flopenrc #(`XLEN) MemAdrWReg(clk, reset, `FLUSHW, ~`STALLW, dut.hart.ieu.dp.MemAdrM, MemAdrW); + flopenrc #(`XLEN) MemAdrWReg(clk, reset, `FLUSHW, ~`STALLW, dut.hart.MemAdrM, MemAdrW); flopenrc #(`XLEN) WriteDataWReg(clk, reset, `FLUSHW, ~`STALLW, dut.hart.WriteDataM, WriteDataW); flopenr #(1) TrapWReg(clk, reset, ~`STALLW, dut.hart.hzu.TrapM, TrapW); From 7a8162497b9ffa29a6e21b2c0918c265e4e8d2b8 Mon Sep 17 00:00:00 2001 From: David Harris Date: Wed, 15 Dec 2021 12:38:35 -0800 Subject: [PATCH 03/38] Added irscv-arch-test and rsicv-isa-sim --- addins/riscv-arch-test | 2 +- addins/riscv-isa-sim | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/addins/riscv-arch-test b/addins/riscv-arch-test index 84d043817..be67c99bd 160000 --- a/addins/riscv-arch-test +++ b/addins/riscv-arch-test @@ -1 +1 @@ -Subproject commit 84d043817f75f752c9873326475e11f16e3a6f7c +Subproject commit be67c99bd461742aa1c100bcc0732657faae2230 diff --git a/addins/riscv-isa-sim b/addins/riscv-isa-sim index d22b28019..0f30988e4 160000 --- a/addins/riscv-isa-sim +++ b/addins/riscv-isa-sim @@ -1 +1 @@ -Subproject commit d22b280198e74b871e04fc0ddb622fb825fdae49 +Subproject commit 0f30988e4d0e8daac893834b91979f7700bab481 From da1df17fbbf646d10d3a1ec64b8c8e7c6c602f08 Mon Sep 17 00:00:00 2001 From: David Harris Date: Fri, 17 Dec 2021 17:45:26 -0800 Subject: [PATCH 04/38] Do File cleanups --- examples/asm/ch5/Makefile | 10 +++ examples/asm/ch5/ch5 | Bin 0 -> 1080 bytes examples/asm/ch5/ch5.S | 16 +++++ examples/asm/ch5/ch5.debug | 38 +++++++++++ examples/asm/ch5/example | Bin 0 -> 6072 bytes examples/asm/example/Makefile | 2 + examples/asm/example/example | Bin 0 -> 6072 bytes examples/asm/example/example.S | 67 ++++++++++++++++++++ examples/asm/test | Bin 0 -> 6296 bytes examples/asm/test.S | 22 +++++++ wally-pipelined/regression/fpga-wave.do | 20 ++---- wally-pipelined/regression/linux-wave.do | 8 +-- wally-pipelined/regression/wave-all.do | 28 ++------ wally-pipelined/regression/wave-coremark.do | 8 +-- wally-pipelined/regression/wave.do | 22 ++----- 15 files changed, 183 insertions(+), 58 deletions(-) create mode 100644 examples/asm/ch5/Makefile create mode 100755 examples/asm/ch5/ch5 create mode 100644 examples/asm/ch5/ch5.S create mode 100644 examples/asm/ch5/ch5.debug create mode 100755 examples/asm/ch5/example create mode 100644 examples/asm/example/Makefile create mode 100755 examples/asm/example/example create mode 100644 examples/asm/example/example.S create mode 100755 examples/asm/test create mode 100644 examples/asm/test.S diff --git a/examples/asm/ch5/Makefile b/examples/asm/ch5/Makefile new file mode 100644 index 000000000..6f1cd89d4 --- /dev/null +++ b/examples/asm/ch5/Makefile @@ -0,0 +1,10 @@ +ch5.debug: ch5 + riscv64-unknown-elf-objdump -D ch5 > ch5.debug + +ch5: ch5.S Makefile + riscv64-unknown-elf-gcc -nodefaultlibs -nostartfiles -o ch5 ch5.S +# -ffreestanding +# -nostdlib + +clean: + rm -f ch5 ch5.debug \ No newline at end of file diff --git a/examples/asm/ch5/ch5 b/examples/asm/ch5/ch5 new file mode 100755 index 0000000000000000000000000000000000000000..3684ea3833c315acc061fcbd46daf2616e3a457e GIT binary patch literal 1080 zcmb_aO-sW-5S{c3y+sfyc&XTf7gK3Pya-8Yl|m^>_26mAml|x7kgTPiRP^AX2Y-%7 z|C0CvbT*mAHX6LRhxcaR%)Xudxa;;03yK1A6yO8IY?>f|PKnn-8kCWA*vG666(}cF zLi--lE9Ds_B!ij?k2M`O>ngM!RIs0F5TI-{5h5?PC!lO$SdJ!pyIwQgVPgTNfC~YR z02?9o%EKadC(kioQv^E9oG7|2!TaZ&l!Pk@Dlnt_koh&zpELS8>MHuos`4Jv72Ao9 zKI)=AAU)gX0qHN9`d*IDS&r`H=+`;^(BR{+VcX~J6W{J@0XTv0!YBwrVC-@fSmua@ zf#-8Ks-rUO7^enncaM4l#&F&q4q125Nn$IGSEh!K$ vzIwWM)>D+eQT;J3DB)c6xI}uDco`Y@|1nfD%{G?ZU;OLe*5A|mzxw|IBGX>; literal 0 HcmV?d00001 diff --git a/examples/asm/ch5/ch5.S b/examples/asm/ch5/ch5.S new file mode 100644 index 000000000..cfba56458 --- /dev/null +++ b/examples/asm/ch5/ch5.S @@ -0,0 +1,16 @@ +# ch5.s +# David_Harris@hmc.edu 14 December 2021 + +.section .text.init + +.globl _start +_start: + lw x1, 4(x0) + sw x1, 8(x0) + add x2, x1, x1 + beq x1, x2, done +loop: + jal x0, loop +done: + +.end diff --git a/examples/asm/ch5/ch5.debug b/examples/asm/ch5/ch5.debug new file mode 100644 index 000000000..28027a62e --- /dev/null +++ b/examples/asm/ch5/ch5.debug @@ -0,0 +1,38 @@ + +ch5: file format elf64-littleriscv + + +Disassembly of section .text: + +0000000000010078 <_start>: + 10078: 00402083 lw ra,4(zero) # 4 <_start-0x10074> + 1007c: 00102423 sw ra,8(zero) # 8 <_start-0x10070> + 10080: 00108133 add sp,ra,ra + 10084: 00208463 beq ra,sp,1008c + +0000000000010088 : + 10088: 0000006f j 10088 + +Disassembly of section .riscv.attributes: + +0000000000000000 <.riscv.attributes>: + 0: 3241 addiw tp,tp,-16 + 2: 0000 unimp + 4: 7200 ld s0,32(a2) + 6: 7369 lui t1,0xffffa + 8: 01007663 bgeu zero,a6,14 <_start-0x10064> + c: 0028 addi a0,sp,8 + e: 0000 unimp + 10: 7205 lui tp,0xfffe1 + 12: 3676 fld fa2,376(sp) + 14: 6934 ld a3,80(a0) + 16: 7032 0x7032 + 18: 5f30 lw a2,120(a4) + 1a: 326d addiw tp,tp,-5 + 1c: 3070 fld fa2,224(s0) + 1e: 615f 7032 5f30 0x5f307032615f + 24: 3266 fld ft4,120(sp) + 26: 3070 fld fa2,224(s0) + 28: 645f 7032 5f30 0x5f307032645f + 2e: 30703263 0x30703263 + ... diff --git a/examples/asm/ch5/example b/examples/asm/ch5/example new file mode 100755 index 0000000000000000000000000000000000000000..a9b74047949c0c1022323eb3f86a6a5e588b2095 GIT binary patch literal 6072 zcmeHLZ%iD=6@UBZ4qxpiU`TOlHwMlHm(mL)wBRN|$Dadir6h{$RF0Y~%ie;|x#P}z zi({f%xj&ot`aHd53G8*2Q)61IXJCynjC z*`0x7xl=#%LludUX5P$ie(%kjH?y;|`-kJmbmJ+jN%1Y7fVG{DqHtDrvuA==_MJAugU*ws*_o!rgNo)_DZ&V9yp4z^5 zVv$rSKr$NrpfJJRDm)bZ!f`n2nFn`%F*;RfvsFHL&a8d6d>*E^&I7FBJypQ9&O?dq z=%jF-yY;|aV?o(;!+?F3*pB|$gSO8#rEvH;%!#(~PG{uCE#BM6#Uil?;T%OyG7-H+ zc*O!h5AtG>LfaV6jWrfTJ!RAO*~)==h^!ahk1jkOI64(el_kjdTtV5@9n&7K$Cu0# zn+qL|=0_c0xb4@jz!+JPq~NfDIQ>C zq$hG5oOx-Ou&2R^dBTPg^Mr^J^MrsB^8}9)^8|+y^8}#8jIp_S^Zw*W0xrIu9I%!Hq_ob-f8z<)|>jr`}vnI?`?W<{6*d`V?5#&Lon*SQ>P3Jiq7{$kUxo6IBNbd zG7Li15ZpWxtr(i;Qv$N$b;FyLqCswEu;SkzebO-GOUEm=*Tml+$S15f=blUOsY|Dw z=u6*e7uIj2$*P{~@py6VT9Si;7cLby9NW}+r${b}rT+Dmxk6-p zP9&nOTr1~sj0x%JLcDHp^KYj{+YYuXO;_!p7KwVeR`j4P+Lm1NS6di&u+nS;vVY5_EAd%)OpLg32Lqht#&8$kjA50e zlk{Aurf0@o+BSOuA`hC+arL)NIZY+&|GGTqRb;e4Doa zN4363Le)}$6f!BgN#P}hf(5ghT?+UvXYcn+hJd$yb}wI!3s7zGm-GuDf;%g212&)K zcI@!RM)qc1>JcChCbQyemPi>j0UQ}Evr4RraO0k}qW}RuW4`pW1UBd36MZgA97yDe z9qT>FZSa@;XR~|Kz*okH+`xz(cCW&wkzT=)!#JFzabQdMs(}koZ;A8Uh-aH`+OWn! zN5;1vfj-N@Kbr&p5OEHc*0CMGhe-N6=4|8sGY5Vp2R@qv$1k_6_P@-5=UPV|7b4p_ zqM5Ar^NocQSM1wTid*-2t$_YX*5_q+4&0vuk0G9q`?-CSK_G4$=@>TGiVMhQ@k-jOp8!+KwLH+D#pHz@V^gI%&; z^6Eh?EXjQbKnVsyepOe!uA1x&!;U0tn%pm`y}H&9-I^RwC2wCK&=0|`y{e+)TSt;I zU`MH65X4{t-?{Q&(Ua0MlDeV<-)3 zDCq0Oz;8iwhHof$wlw~ z7|Ts!5YmE57=gtVXCPutJ9jPyq$?bjOjiSHAgt;TkbS*ih7_sGT~v+u8!-lOh5G}# z+=WutOu2{EaJ%W?Qumn44z8tOfh)sZmtoEoHU)YDF5Gbe+(_Uu?kFx<*EL^PpRSTO z-T$68q6u<>uyc;xTTW8bFXMl?%!H5CvQ&yKqR(jTe#8Dp(%+JcI@b30vFE-FeRi%_ zQzE-hFazxTV6qN7?$6L??-4iEXS`hc&DfFYKf718Q+@Vcw(7I?c8fkc@86(5Gd3_H zl~#YAMx0U8njV(4X3*xfL^;KZ^^wX|lOvbumqnizTL?TZ~)BC$wz(BK^;9otRbs EKb_svjsO4v literal 0 HcmV?d00001 diff --git a/examples/asm/example/Makefile b/examples/asm/example/Makefile new file mode 100644 index 000000000..081f342c7 --- /dev/null +++ b/examples/asm/example/Makefile @@ -0,0 +1,2 @@ +example: example.S + riscv64-unknown-elf-gcc -o example example.S diff --git a/examples/asm/example/example b/examples/asm/example/example new file mode 100755 index 0000000000000000000000000000000000000000..a9b74047949c0c1022323eb3f86a6a5e588b2095 GIT binary patch literal 6072 zcmeHLZ%iD=6@UBZ4qxpiU`TOlHwMlHm(mL)wBRN|$Dadir6h{$RF0Y~%ie;|x#P}z zi({f%xj&ot`aHd53G8*2Q)61IXJCynjC z*`0x7xl=#%LludUX5P$ie(%kjH?y;|`-kJmbmJ+jN%1Y7fVG{DqHtDrvuA==_MJAugU*ws*_o!rgNo)_DZ&V9yp4z^5 zVv$rSKr$NrpfJJRDm)bZ!f`n2nFn`%F*;RfvsFHL&a8d6d>*E^&I7FBJypQ9&O?dq z=%jF-yY;|aV?o(;!+?F3*pB|$gSO8#rEvH;%!#(~PG{uCE#BM6#Uil?;T%OyG7-H+ zc*O!h5AtG>LfaV6jWrfTJ!RAO*~)==h^!ahk1jkOI64(el_kjdTtV5@9n&7K$Cu0# zn+qL|=0_c0xb4@jz!+JPq~NfDIQ>C zq$hG5oOx-Ou&2R^dBTPg^Mr^J^MrsB^8}9)^8|+y^8}#8jIp_S^Zw*W0xrIu9I%!Hq_ob-f8z<)|>jr`}vnI?`?W<{6*d`V?5#&Lon*SQ>P3Jiq7{$kUxo6IBNbd zG7Li15ZpWxtr(i;Qv$N$b;FyLqCswEu;SkzebO-GOUEm=*Tml+$S15f=blUOsY|Dw z=u6*e7uIj2$*P{~@py6VT9Si;7cLby9NW}+r${b}rT+Dmxk6-p zP9&nOTr1~sj0x%JLcDHp^KYj{+YYuXO;_!p7KwVeR`j4P+Lm1NS6di&u+nS;vVY5_EAd%)OpLg32Lqht#&8$kjA50e zlk{Aurf0@o+BSOuA`hC+arL)NIZY+&|GGTqRb;e4Doa zN4363Le)}$6f!BgN#P}hf(5ghT?+UvXYcn+hJd$yb}wI!3s7zGm-GuDf;%g212&)K zcI@!RM)qc1>JcChCbQyemPi>j0UQ}Evr4RraO0k}qW}RuW4`pW1UBd36MZgA97yDe z9qT>FZSa@;XR~|Kz*okH+`xz(cCW&wkzT=)!#JFzabQdMs(}koZ;A8Uh-aH`+OWn! zN5;1vfj-N@Kbr&p5OEHc*0CMGhe-N6=4|8sGY5Vp2R@qv$1k_6_P@-5=UPV|7b4p_ zqM5Ar^NocQSM1wTid*-2t$_YX*5_q+4&0vuk0G9q`?-CSK_G4$=@>TGiVMhQ@k-jOp8!+KwLH+D#pHz@V^gI%&; z^6Eh?EXjQbKnVsyepOe!uA1x&!;U0tn%pm`y}H&9-I^RwC2wCK&=0|`y{e+)TSt;I zU`MH65X4{t-?{Q&(Ua0MlDeV<-)3 zDCq0Oz;8iwhHof$wlw~ z7|Ts!5YmE57=gtVXCPutJ9jPyq$?bjOjiSHAgt;TkbS*ih7_sGT~v+u8!-lOh5G}# z+=WutOu2{EaJ%W?Qumn44z8tOfh)sZmtoEoHU)YDF5Gbe+(_Uu?kFx<*EL^PpRSTO z-T$68q6u<>uyc;xTTW8bFXMl?%!H5CvQ&yKqR(jTe#8Dp(%+JcI@b30vFE-FeRi%_ zQzE-hFazxTV6qN7?$6L??-4iEXS`hc&DfFYKf718Q+@Vcw(7I?c8fkc@86(5Gd3_H zl~#YAMx0U8njV(4X3*xfL^;KZ^^wX|lOvbumqnizTL?TZ~)BC$wz(BK^;9otRbs EKb_svjsO4v literal 0 HcmV?d00001 diff --git a/examples/asm/example/example.S b/examples/asm/example/example.S new file mode 100644 index 000000000..f487950a5 --- /dev/null +++ b/examples/asm/example/example.S @@ -0,0 +1,67 @@ +// example.s +// David_Harris@hmc.edu 5 December 2021 + +.section .text.init +//.globl rvtest_entry_point +//rvtest_entry_point: + +.globl main +main: + li a0, 42 + +self_loop: + j self_loop + +.end + +/* +#include "model_test.h" +#include "arch_test.h" +RVTEST_ISA("RV32I") + +.section .text.init +.globl rvtest_entry_point +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN + +#ifdef TEST_CASE_1 + +RVTEST_CASE(0,"//check ISA:=regex(.*32.*);check ISA:=regex(.*I.*);def TEST_CASE_1=True;",add) + +RVTEST_SIGBASE( x3,signature_x3_1) + +inst_0: +// rs2 == rd != rs1, rs1==x4, rs2==x24, rd==x24, rs1_val > 0 and rs2_val > 0, rs2_val == 1, rs1_val == (2**(xlen-1)-1), rs1_val != rs2_val, rs1_val == 2147483647 +// opcode: add ; op1:x4; op2:x24; dest:x24; op1val:0x7fffffff; op2val:0x1 +TEST_RR_OP(add, x24, x4, x24, 0x80000000, 0x7fffffff, 0x1, x3, 0, x18) + + +80000000 : + +.section .text.init +.globl rvtest_entry_point +rvtest_entry_point: +RVMODEL_BOOT +RVTEST_CODE_BEGIN +80000000: feedc0b7 lui ra,0xfeedc +8 + +80003220 : +#endif + + +RVTEST_CODE_END +RVMODEL_HALT +80003220: 00408093 addi ra,ra,4 +80003224: 00100093 li ra,1 + +80003228 : +80003228: 00001f17 auipc t5,0x1 +8000322c: dc1f2c23 sw ra,-552(t5) # 80004000 + +80003230 : +80003230: 0000006f j 80003230 +80003234: 0000 unimp + ... +*/ \ No newline at end of file diff --git a/examples/asm/test b/examples/asm/test new file mode 100755 index 0000000000000000000000000000000000000000..fef7868c62fa8bccead16fa64818e3fd0ba85236 GIT binary patch literal 6296 zcmeHLZ%iD=6@R<8cL%lWfRDzcCINHQVC0^-m=@AF^f)-cj)NMUSg}=iIrbKO&K-yM z7ROX_a(_5uh~vn*h$A1WAdp({Mp%k$+fG2=Qv=AhtGl3E@)2x2W`mzhF&nDa6mQi z_*@5H*H{EwO3aoniv_)fNyyRbq}7DEgtnLD>3k+nn04MqR5HCd*84Bl$`+%sQvO*a zl-L9i4|DHYV%)stSon+VlVR5^K+Xnq%3{2<<)KS@>CvwYgze@q}rrymuBt>kaRO=eGM#jYks2G3>33%PpSSIpOlSym51{(~{lq z+?M?>hw0|#Da+o+9MYO>un4`*aD72*OxPqu5|ah5-%n!K5q4(gjaH{;#KU`y?nvl~ zy7E5h&^RA|VID3{esDd0*tGD(z!N9Z(I z&0{cX8UrilQ6oysqXJ6IqXv|iM|qT(M>&+3M*$^fjLqEK1M$HaTze%xXymjff9;jM zWAVXK(EB(RnGS9KVlZ@Z!^5xE3{D;zzx$K1{fAoqVR$b*xAX3?DZ^s}g(DN2zihs} z=jOhdA5FDQNLkr@`O80lIkxlO-e^fbZ0x#MT}LjA)hHcy#u#-FKlgs-^sYF;a7y_e_CizGh%QlB9*KWAg% z2Cji~We*$1!gJB8zQSLQ4>cZblAJRp<0VUe71Rt_tFPol&zSAThQ}ZL4Bnc#0@gDd z?AGeP6!kYqaE*_fO5vo*SUPn1*NGF8oQteZbCGrX{3H9%M^87}IoF>~TF-u5v78qP z9eoy=S${MJaotLFM&O3Y9w_fMO+7Az9Jp=)E^@=TcJD+&O|s7yKpsiw=y|Y}o)!0L zy=BMlx^M3RD962!oQ^-F@8{}o>T-t4WX@N)td9y{-L|W5R<8GdcJpsWSo2jL(5mmN z5m=4DY6Mmz@P843AJF?SbB5I$>*~H&RMy!1?3N{>l5YNGVc_8>hs$TvSpZ)p6L z`}!A%=kjJ4PFX*{B{(ecvvf1?2WtNy{Vq93@#_?K7_fke9Fin`PVpCc9sf{=VGDi- z6F)CtCaH$v0>xjXb`DcKAMqUAcd6^VgyN~^QU$g1XX@u!YNtx$bI?F>3d45FS82cO z$CX?9lb(-sXkuQ(?Q(M~%5_tAcLQGXtx4(+A*ZR$@kb5s;1x6dcK zRr!clwMn4J9p0cSD`LC1(+iTK2G#ENc5DzuPeAPO1=`#`(W3^Gpy=*C0#d-=<&#y} zW3#7cXm&)mqPTlRxl>hopj~nMWzo~^_xC`c?Vv2F_!bn!Bv{0*)nz)8^t9CS6o6+1Yg4(dtqQSkb^x)m8+)C;1=t-67jK!}V&bAEAv!JeQ<$0{pIXFzLhXsLb5 zCAymHMUl9IN{iilmIFc6t*8hsbCGbm6nO=f1TekjG=|c+b_Kkh82b`5r}>6*cYWyL=?p~7 zxqJ6wK-z*qQFqlZ`-8Fyez&(1^pGNTxr?$Ee=WuUwqTE6b+@5Z6H<9i8!kRSt|>G^u1_|%sw&>LZdE2P+y60bM169Su=9@H zr_NEkzLftZxV@I8O;{uP3}^SQ0{Xv{*=0(Eo1C^_g{o}Z)o)&E6*KeiM9*?o-Bnf`hPP|4 Date: Sat, 18 Dec 2021 05:36:32 -0800 Subject: [PATCH 05/38] Simplified FWriteInt interfaces by merging into RegWrite --- wally-pipelined/src/fpu/fpu.sv | 16 ++-- wally-pipelined/src/ieu/controller.sv | 37 ++------ wally-pipelined/src/ieu/datapath.sv | 20 ++-- wally-pipelined/src/ieu/forward.sv | 10 +- wally-pipelined/src/ieu/ieu.sv | 13 ++- wally-pipelined/src/lsu/subwordread.sv | 94 +++++++++---------- .../src/wally/wallypipelinedhart.sv | 8 +- 7 files changed, 91 insertions(+), 107 deletions(-) diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index d38dcd517..0ba61c6c6 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -36,7 +36,7 @@ module fpu ( input logic [4:0] RdM, RdW, // which FP register to write to (from IEU) output logic FRegWriteM, // FP register write enable output logic FStallD, // Stall the decode stage - output logic FWriteIntE, FWriteIntM, FWriteIntW, // integer register write enable + output logic FWriteIntE, // integer register write enables output logic [`XLEN-1:0] FWriteDataE, // Data to be written to memory output logic [`XLEN-1:0] FIntResM, // data to be written to integer register output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) @@ -260,9 +260,9 @@ module fpu ( flopenrc #(64) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM); flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM); flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); - flopenrc #(8) EMCtrlReg (clk, reset, FlushM, ~StallM, - {FRegWriteE, FResultSelE, FrmE, FmtE, FWriteIntE}, - {FRegWriteM, FResultSelM, FrmM, FmtM, FWriteIntM}); + flopenrc #(7) EMCtrlReg (clk, reset, FlushM, ~StallM, + {FRegWriteE, FResultSelE, FrmE, FmtE}, + {FRegWriteM, FResultSelM, FrmM, FmtM}); // BEGIN MEMORY STAGE @@ -273,9 +273,9 @@ module fpu ( flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(5) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FRegWriteM, FResultSelM, FmtM, FWriteIntM}, - {FRegWriteW, FResultSelW, FmtW, FWriteIntW}); + flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FRegWriteM, FResultSelM, FmtM}, + {FRegWriteW, FResultSelW, FmtW}); // BEGIN WRITEBACK STAGE @@ -290,8 +290,6 @@ module fpu ( end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low assign FStallD = 0; assign FWriteIntE = 0; - assign FWriteIntM = 0; - assign FWriteIntW = 0; assign FWriteDataE = 0; assign FIntResM = 0; assign FDivBusyE = 0; diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 02e380a47..907aa6501 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -37,6 +37,7 @@ module controller( // Execute stage control signals input logic StallE, FlushE, input logic [2:0] FlagsE, + input logic FWriteIntE, output logic PCSrcE, // for datapath and Hazard Unit output logic [2:0] ALUControlE, output logic ALUSrcAE, ALUSrcBE, @@ -56,6 +57,7 @@ module controller( output logic RegWriteM, // for Hazard Unit output logic InvalidateICacheM, FlushDCacheM, output logic InstrValidM, + output logic FWriteIntM, // Writeback stage control signals input logic StallW, FlushW, output logic RegWriteW, // for datapath and Hazard Unit @@ -98,6 +100,7 @@ module controller( logic zeroE, ltE, ltuE; logic unused; logic BranchFlagE; + logic IEURegWriteE; // Extract fields assign OpD = InstrD[6:0]; @@ -167,25 +170,13 @@ module controller( assign CSRZeroSrcD = InstrD[14] ? (InstrD[19:15] == 0) : (Rs1D == 0); // Is a CSR instruction using zero as the source? assign CSRWriteD = CSRReadD & !(CSRZeroSrcD && InstrD[13]); // Don't write if setting or clearing zeros - // ALU Decoding *** should move to ALU for better modularity + // ALU Decoding assign sltD = (Funct3D == 3'b010); assign sltuD = (Funct3D == 3'b011); assign subD = (Funct3D == 3'b000 & Funct7D[5] & OpD[5]); assign sraD = (Funct3D == 3'b101 & Funct7D[5]); - assign SubArithD = ALUOpD & (subD | sraD | sltD | sltuD); // TRUE for R-type subtracts and sra, slt, sltu -// assign SubArithD = aluc3D; // ***cleanup - - // *** replace all of this assign ALUControlD = {W64D, SubArithD, ALUOpD}; -/* always_comb - case(ALUOpD) - 2'b00: ALUControlD = 5'b00000; // addition - 2'b01: ALUControlD = 5'b00000; // add for branch offset -// 2'b01: ALUControlD = 5'b01000; // subtraction -// 2'b11: ALUControlD = 5'b01110; // pass B through for lui ***no longer used - default: ALUControlD = {W64D, aluc3D, Funct3D}; // R-type instructions - endcase*/ // Fences // Ordinary fence is presently a nop @@ -208,32 +199,24 @@ module controller( // Execute stage pipeline control register and logic flopenrc #(27) controlregE(clk, reset, FlushE, ~StallE, {RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, ALUResultSrcD, CSRReadD, CSRWriteD, PrivilegedD, Funct3D, W64D, MulDivD, AtomicD, InvalidateICacheD, FlushDCacheD, InstrValidD}, - {RegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MulDivE, AtomicE, InvalidateICacheE, FlushDCacheE, InstrValidE}); + {IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MulDivE, AtomicE, InvalidateICacheE, FlushDCacheE, InstrValidE}); // Branch Logic assign {zeroE, ltE, ltuE} = FlagsE; mux4 #(1) branchflagmux(zeroE, 1'b0, ltE, ltuE, Funct3E[2:1], BranchFlagE); assign BranchTakenE = BranchFlagE ^ Funct3E[0]; -/* always_comb - case(Funct3E) - 3'b000: BranchTakenE = zeroE; // beq - 3'b001: BranchTakenE = ~zeroE; // bne - 3'b100: BranchTakenE = ltE; // blt - 3'b101: BranchTakenE = ~ltE; // bge - 3'b110: BranchTakenE = ltuE; // bltu - 3'b111: BranchTakenE = ~ltuE; // bgeu - default: BranchTakenE = 1'b0; // undefined mode - endcase*/ assign PCSrcE = JumpE | BranchE & BranchTakenE; assign MemReadE = MemRWE[1]; assign SCE = (ResultSrcE == 3'b100); + + assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers // Memory stage pipeline control register - flopenrc #(17) controlregM(clk, reset, FlushM, ~StallM, - {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, AtomicE, InvalidateICacheE, FlushDCacheE, InstrValidE}, - {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, AtomicM, InvalidateICacheM, FlushDCacheM, InstrValidM}); + flopenrc #(18) controlregM(clk, reset, FlushM, ~StallM, + {RegWriteE, ResultSrcE, MemRWE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, FWriteIntE, AtomicE, InvalidateICacheE, FlushDCacheE, InstrValidE}, + {RegWriteM, ResultSrcM, MemRWM, CSRReadM, CSRWriteM, PrivilegedM, Funct3M, FWriteIntM, AtomicM, InvalidateICacheM, FlushDCacheM, InstrValidM}); // Writeback stage pipeline control register flopenrc #(4) controlregW(clk, reset, FlushW, ~StallW, diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index d0a046e1b..9ad32acbb 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -53,7 +53,6 @@ module datapath ( output logic [`XLEN-1:0] WriteDataM, // Writeback stage signals input logic StallW, FlushW, - input logic FWriteIntW, input logic RegWriteW, input logic SquashSCW, input logic [2:0] ResultSrcW, @@ -92,8 +91,7 @@ module datapath ( assign Rs1D = InstrD[19:15]; assign Rs2D = InstrD[24:20]; assign RdD = InstrD[11:7]; - // *** can FWriteIntW be merged with RegWriteW - regfile regf(clk, reset, {RegWriteW | FWriteIntW}, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D); + regfile regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D); extend ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ExtImmD); // Execute stage pipeline register and logic @@ -106,7 +104,6 @@ module datapath ( mux3 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, ForwardAE, ForwardedSrcAE); mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, ForwardBE, ForwardedSrcBE); - mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE); comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, FlagsE); mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ExtImmE, ALUSrcBE, SrcBE); @@ -119,12 +116,23 @@ module datapath ( flopenrc #(`XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM); flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM); - mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, ResultM); // Writeback stage pipeline register and logic flopenrc #(`XLEN) ResultWReg(clk, reset, FlushW, ~StallW, ResultM, ResultW); flopenrc #(5) RdWReg(clk, reset, FlushW, ~StallW, RdM, RdW); flopen #(`XLEN) ReadDataWReg(.clk, .en(~StallW), .d(ReadDataM), .q(ReadDataW)); + mux5 #(`XLEN) resultmuxW(ResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, WriteDataW); + + // floating point interactions: fcvt, fp stores + generate + if (`F_SUPPORTED) begin:fpmux + mux2 #(`XLEN) resultmuxM(IEUResultM, FIntResM, FWriteIntM, ResultM); + mux2 #(`XLEN) writedatamux(ForwardedSrcBE, FWriteDataE, ~IllegalFPUInstrE, WriteDataE); + end else begin + assign ResultM = IEUResultM; + assign WriteDataE = ForwardedSrcBE; + end + endgenerate // handle Store Conditional result if atomic extension supported generate @@ -133,6 +141,4 @@ module datapath ( else assign SCResultW = 0; endgenerate - - mux5 #(`XLEN) resultmuxW(ResultW, ReadDataW, CSRReadValW, MulDivResultW, SCResultW, ResultSrcW, WriteDataW); endmodule diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index cab6c3ed2..40f3cd98d 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -30,7 +30,7 @@ module forward( input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, input logic MemReadE, MulDivE, CSRReadE, input logic RegWriteM, RegWriteW, - input logic FWriteIntE, FWriteIntM, FWriteIntW, + input logic FWriteIntE, input logic SCE, // Forwarding controls output logic [1:0] ForwardAE, ForwardBE, @@ -41,12 +41,12 @@ module forward( ForwardAE = 2'b00; ForwardBE = 2'b00; if (Rs1E != 5'b0) - if ((Rs1E == RdM) & (RegWriteM|FWriteIntM)) ForwardAE = 2'b10; - else if ((Rs1E == RdW) & (RegWriteW|FWriteIntW)) ForwardAE = 2'b01; + if ((Rs1E == RdM) & RegWriteM) ForwardAE = 2'b10; + else if ((Rs1E == RdW) & RegWriteW) ForwardAE = 2'b01; if (Rs2E != 5'b0) - if ((Rs2E == RdM) & (RegWriteM|FWriteIntM)) ForwardBE = 2'b10; - else if ((Rs2E == RdW) & (RegWriteW|FWriteIntW)) ForwardBE = 2'b01; + if ((Rs2E == RdM) & RegWriteM) ForwardBE = 2'b10; + else if ((Rs2E == RdW) & RegWriteW) ForwardBE = 2'b01; end // Stall on dependent operations that finish in Mem Stage and can't bypass in time diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 101ca4dab..aabd87988 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -40,9 +40,7 @@ module ieu ( output logic [`XLEN-1:0] IEUAdrE, output logic MulDivE, W64E, output logic [2:0] Funct3E, - output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B -// output logic [`XLEN-1:0] SrcAE, SrcBE, - input logic FWriteIntM, + output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B // Memory stage interface input logic SquashSCW, // from LSU @@ -59,7 +57,6 @@ module ieu ( // Writeback stage input logic [`XLEN-1:0] CSRReadValW, ReadDataM, MulDivResultW, - input logic FWriteIntW, output logic [4:0] RdW, output logic [`XLEN-1:0] ReadDataW, // input logic [`XLEN-1:0] PCLinkW, @@ -82,6 +79,7 @@ module ieu ( logic ALUResultSrcE; logic SCE; logic [4:0] RdE; + logic FWriteIntM; // forwarding signals logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E; @@ -96,7 +94,7 @@ module ieu ( .StallD, .FlushD, .InstrD, .ImmSrcD, .IllegalIEUInstrFaultD, .IllegalBaseInstrFaultD, // Execute stage control signals - .StallE, .FlushE, .FlagsE, + .StallE, .FlushE, .FlagsE, .FWriteIntE, .PCSrcE, // for datapath and Hazard Unit .ALUControlE, .ALUSrcAE, .ALUSrcBE, .ALUResultSrcE, @@ -109,6 +107,7 @@ module ieu ( .SCE, .AtomicE, .AtomicM, .Funct3M, .RegWriteM, // for Hazard Unit .InvalidateICacheM, .FlushDCacheM, .InstrValidM, + .FWriteIntM, // Writeback stage control signals .StallW, .FlushW, .RegWriteW, // for datapath and Hazard Unit @@ -133,7 +132,7 @@ module ieu ( .StallM, .FlushM, .FWriteIntM, .FIntResM, .SrcAM, .WriteDataM, // Writeback stage signals - .StallW, .FlushW, .FWriteIntW, .RegWriteW, + .StallW, .FlushW, .RegWriteW, .SquashSCW, .ResultSrcW, .ReadDataW, // input logic [`XLEN-1:0] PCLinkW, .CSRReadValW, .ReadDataM, .MulDivResultW, @@ -146,7 +145,7 @@ module ieu ( .Rs1D, .Rs2D, .Rs1E, .Rs2E, .RdE, .RdM, .RdW, .MemReadE, .MulDivE, .CSRReadE, .RegWriteM, .RegWriteW, - .FWriteIntE, .FWriteIntM, .FWriteIntW, + .FWriteIntE, .SCE, // Forwarding controls .ForwardAE, .ForwardBE, diff --git a/wally-pipelined/src/lsu/subwordread.sv b/wally-pipelined/src/lsu/subwordread.sv index 1e9757259..1f04c4cb8 100644 --- a/wally-pipelined/src/lsu/subwordread.sv +++ b/wally-pipelined/src/lsu/subwordread.sv @@ -42,82 +42,82 @@ module subwordread ( logic [`XLEN-1:0] offset4, offset5, offset6, offset7; always_comb - case(Funct3M[1:0]) - 3: offset0 = ReadDataWordMuxM; //ld - 2: offset0 = Funct3M[2] ? {{32'b0}, ReadDataWordMuxM[31:0]} : {{32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; //lw(u) - 1: offset0 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[15:0]} : {{48{ReadDataWordMuxM[15]}}, ReadDataWordMuxM[15:0]}; //lh(u) - 0: offset0 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[7:0]} : {{56{ReadDataWordMuxM[7]}}, ReadDataWordMuxM[7:0]}; //lb(u) - endcase + case(Funct3M[1:0]) + 3: offset0 = ReadDataWordMuxM; //ld + 2: offset0 = Funct3M[2] ? {{32'b0}, ReadDataWordMuxM[31:0]} : {{32{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:0]}; //lw(u) + 1: offset0 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[15:0]} : {{48{ReadDataWordMuxM[15]}}, ReadDataWordMuxM[15:0]}; //lh(u) + 0: offset0 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[7:0]} : {{56{ReadDataWordMuxM[7]}}, ReadDataWordMuxM[7:0]}; //lb(u) + endcase assign offset1 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[15:8]} : {{56{ReadDataWordMuxM[15]}}, ReadDataWordMuxM[15:8]}; //lb(u) always_comb - case(Funct3M[0]) - 1: offset2 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[31:16]} : {{48{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:16]};//lh(u) - 0: offset2 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[23:16]} : {{56{ReadDataWordMuxM[23]}}, ReadDataWordMuxM[23:16]};//lb(u) - endcase + case(Funct3M[0]) + 1: offset2 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[31:16]} : {{48{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:16]};//lh(u) + 0: offset2 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[23:16]} : {{56{ReadDataWordMuxM[23]}}, ReadDataWordMuxM[23:16]};//lb(u) + endcase assign offset3 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[31:24]} : {{56{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:24]};//lb(u) always_comb - case(Funct3M[1:0]) - 3: offset4 = Funct3M[2] ? {{32'b0}, ReadDataWordMuxM[63:32]} : {{32{ReadDataWordMuxM[63]}}, ReadDataWordMuxM[63:32]};//ld(u) // unaligned will cause fault. - 2: offset4 = Funct3M[2] ? {{32'b0}, ReadDataWordMuxM[63:32]} : {{32{ReadDataWordMuxM[63]}}, ReadDataWordMuxM[63:32]};//lw(u) - 1: offset4 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[47:32]} : {{48{ReadDataWordMuxM[47]}}, ReadDataWordMuxM[47:32]};//lh(u) - 0: offset4 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[39:32]} : {{56{ReadDataWordMuxM[39]}}, ReadDataWordMuxM[39:32]};//lb(u) - endcase + case(Funct3M[1:0]) + 3: offset4 = Funct3M[2] ? {{32'b0}, ReadDataWordMuxM[63:32]} : {{32{ReadDataWordMuxM[63]}}, ReadDataWordMuxM[63:32]};//ld(u) // unaligned will cause fault. + 2: offset4 = Funct3M[2] ? {{32'b0}, ReadDataWordMuxM[63:32]} : {{32{ReadDataWordMuxM[63]}}, ReadDataWordMuxM[63:32]};//lw(u) + 1: offset4 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[47:32]} : {{48{ReadDataWordMuxM[47]}}, ReadDataWordMuxM[47:32]};//lh(u) + 0: offset4 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[39:32]} : {{56{ReadDataWordMuxM[39]}}, ReadDataWordMuxM[39:32]};//lb(u) + endcase assign offset5 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[47:40]} : {{56{ReadDataWordMuxM[47]}}, ReadDataWordMuxM[47:40]};//lb(u) always_comb - case(Funct3M[0]) - 1: offset6 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[63:48]} : {{48{ReadDataWordMuxM[63]}}, ReadDataWordMuxM[63:48]};//lh(u) - 0: offset6 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[55:48]} : {{56{ReadDataWordMuxM[55]}}, ReadDataWordMuxM[55:48]};//lb(u) - endcase - + case(Funct3M[0]) + 1: offset6 = Funct3M[2] ? {{48'b0}, ReadDataWordMuxM[63:48]} : {{48{ReadDataWordMuxM[63]}}, ReadDataWordMuxM[63:48]};//lh(u) + 0: offset6 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[55:48]} : {{56{ReadDataWordMuxM[55]}}, ReadDataWordMuxM[55:48]};//lb(u) + endcase + assign offset7 = Funct3M[2] ? {{56'b0}, ReadDataWordMuxM[63:56]} : {{56{ReadDataWordMuxM[63]}}, ReadDataWordMuxM[63:56]};//lb(u) // address mux always_comb - case(MemPAdrM[2:0]) - 0: ReadDataM = offset0; - 1: ReadDataM = offset1; - 2: ReadDataM = offset2; - 3: ReadDataM = offset3; - 4: ReadDataM = offset4; - 5: ReadDataM = offset5; - 6: ReadDataM = offset6; - 7: ReadDataM = offset7; - endcase + case(MemPAdrM[2:0]) + 0: ReadDataM = offset0; + 1: ReadDataM = offset1; + 2: ReadDataM = offset2; + 3: ReadDataM = offset3; + 4: ReadDataM = offset4; + 5: ReadDataM = offset5; + 6: ReadDataM = offset6; + 7: ReadDataM = offset7; + endcase end else begin // 32-bit // byte mux always_comb - case(Funct3M[1:0]) - 3: offset0 = ReadDataWordMuxM; //ld illegal - 2: offset0 = ReadDataWordMuxM[31:0]; //lw - 1: offset0 = Funct3M[2] ? {{16'b0}, ReadDataWordMuxM[15:0]} : {{16{ReadDataWordMuxM[15]}}, ReadDataWordMuxM[15:0]}; //lh(u) - 0: offset0 = Funct3M[2] ? {{24'b0}, ReadDataWordMuxM[7:0]} : {{24{ReadDataWordMuxM[7]}}, ReadDataWordMuxM[7:0]}; //lb(u) - endcase + case(Funct3M[1:0]) + 3: offset0 = ReadDataWordMuxM; //ld illegal + 2: offset0 = ReadDataWordMuxM[31:0]; //lw + 1: offset0 = Funct3M[2] ? {{16'b0}, ReadDataWordMuxM[15:0]} : {{16{ReadDataWordMuxM[15]}}, ReadDataWordMuxM[15:0]}; //lh(u) + 0: offset0 = Funct3M[2] ? {{24'b0}, ReadDataWordMuxM[7:0]} : {{24{ReadDataWordMuxM[7]}}, ReadDataWordMuxM[7:0]}; //lb(u) + endcase assign offset1 = Funct3M[2] ? {{24'b0}, ReadDataWordMuxM[15:8]} : {{24{ReadDataWordMuxM[15]}}, ReadDataWordMuxM[15:8]}; //lb(u) always_comb - case(Funct3M[0]) - 1: offset2 = Funct3M[2] ? {{16'b0}, ReadDataWordMuxM[31:16]} : {{16{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:16]};//lh(u) - 0: offset2 = Funct3M[2] ? {{24'b0}, ReadDataWordMuxM[23:16]} : {{24{ReadDataWordMuxM[23]}}, ReadDataWordMuxM[23:16]};//lb(u) - endcase + case(Funct3M[0]) + 1: offset2 = Funct3M[2] ? {{16'b0}, ReadDataWordMuxM[31:16]} : {{16{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:16]};//lh(u) + 0: offset2 = Funct3M[2] ? {{24'b0}, ReadDataWordMuxM[23:16]} : {{24{ReadDataWordMuxM[23]}}, ReadDataWordMuxM[23:16]};//lb(u) + endcase assign offset3 = Funct3M[2] ? {{24'b0}, ReadDataWordMuxM[31:24]} : {{24{ReadDataWordMuxM[31]}}, ReadDataWordMuxM[31:24]};//lb(u) // address mux always_comb - case(MemPAdrM[1:0]) - 0: ReadDataM = offset0; - 1: ReadDataM = offset1; - 2: ReadDataM = offset2; - 3: ReadDataM = offset3; - endcase + case(MemPAdrM[1:0]) + 0: ReadDataM = offset0; + 1: ReadDataM = offset1; + 2: ReadDataM = offset2; + 3: ReadDataM = offset3; + endcase end endgenerate endmodule diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 13c2e77a5..7be5b8c6e 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -89,7 +89,7 @@ module wallypipelinedhart ( logic [2:0] FRM_REGW; logic [4:0] RdM, RdW; logic FStallD; - logic FWriteIntE, FWriteIntM, FWriteIntW; + logic FWriteIntE; logic [`XLEN-1:0] FWriteDataE; logic [`XLEN-1:0] FIntResM; logic FDivBusyE; @@ -210,8 +210,6 @@ module wallypipelinedhart ( .PCE, .PCLinkE, .FWriteIntE, .IllegalFPUInstrE, .FWriteDataE, .IEUAdrE, .MulDivE, .W64E, .Funct3E, .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B - //.SrcAE, .SrcBE, - .FWriteIntM, // Memory stage interface .SquashSCW, // from LSU @@ -225,7 +223,7 @@ module wallypipelinedhart ( // Writeback stage .CSRReadValW, .ReadDataM, .MulDivResultW, - .FWriteIntW, .RdW, .ReadDataW, + .RdW, .ReadDataW, .InstrValidM, // hazards @@ -370,7 +368,7 @@ module wallypipelinedhart ( .RdM, .RdW, // which FP register to write to (from IEU) .FRegWriteM, // FP register write enable .FStallD, // Stall the decode stage - .FWriteIntE, .FWriteIntM, .FWriteIntW, // integer register write enable + .FWriteIntE, // integer register write enable .FWriteDataE, // Data to be written to memory .FIntResM, // data to be written to integer register .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) From 2a5a7eff8233949d0ed40938a63cd9af548a0a67 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 05:40:38 -0800 Subject: [PATCH 06/38] Forwarding logic factoring --- wally-pipelined/src/ieu/forward.sv | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index 40f3cd98d..d041f9437 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -36,6 +36,8 @@ module forward( output logic [1:0] ForwardAE, ForwardBE, output logic FPUStallD, LoadStallD, MulDivStallD, CSRRdStallD ); + + logic MatchDE; always_comb begin ForwardAE = 2'b00; @@ -50,9 +52,10 @@ module forward( end // Stall on dependent operations that finish in Mem Stage and can't bypass in time - assign FPUStallD = FWriteIntE & ((Rs1D == RdE) | (Rs2D == RdE)); - assign LoadStallD = (MemReadE|SCE) & ((Rs1D == RdE) | (Rs2D == RdE)); - assign MulDivStallD = MulDivE & ((Rs1D == RdE) | (Rs2D == RdE)); - assign CSRRdStallD = CSRReadE & ((Rs1D == RdE) | (Rs2D == RdE)); + assign MatchDE = (Rs1D == RdE) | (Rs2D == RdE); // Decode-stage instruction source depends on result from execute stage instruction + assign FPUStallD = FWriteIntE & MatchDE; + assign LoadStallD = (MemReadE|SCE) & MatchDE; + assign MulDivStallD = MulDivE & MatchDE; + assign CSRRdStallD = CSRReadE & MatchDE; endmodule From b453454b245ff4e6d36c86955bf5e0b628eff46d Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 09:27:25 -0800 Subject: [PATCH 07/38] Moved W64 truncation after result mux --- wally-pipelined/src/ieu/alu.sv | 34 +++++++++++++---------------- wally-pipelined/src/ieu/datapath.sv | 6 ++--- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index e2dd7eef6..1d49bb401 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -32,7 +32,7 @@ module alu #(parameter WIDTH=32) ( output logic [WIDTH-1:0] Result, output logic [WIDTH-1:0] Sum); - logic [WIDTH-1:0] CondInvB, SumTrunc, Shift, SLT, SLTU, bor; + logic [WIDTH-1:0] CondInvB, Shift, SLT, SLTU, FullResult; logic Right; logic Carry, Neg; logic LT, LTU; @@ -50,17 +50,7 @@ module alu #(parameter WIDTH=32) ( assign CondInvB = SubArith ? ~B : B; assign {Carry, Sum} = A + CondInvB + {{(WIDTH-1){1'b0}}, SubArith}; - // support W-type RV64I ADDW/SUBW/ADDIW that sign-extend 32-bit result to 64 bits - generate - if (WIDTH==64) - assign SumTrunc = W64 ? {{32{Sum[31]}}, Sum[31:0]} : Sum; - else - assign SumTrunc = Sum; - endgenerate - // Shifts - // assign arith = alucontrol[3]; // sra - // assign w64 = alucontrol[4]; assign Right = (Funct3[2:0] == 3'b101); // sra or srl shifter sh(A, B[5:0], Right, SubArith, W64, Shift); @@ -80,14 +70,20 @@ module alu #(parameter WIDTH=32) ( assign ALUFunct = Funct3 & {3{ALUOp}}; // Force ALUFunct to 0 to Add when ALUOp = 0 always_comb case (ALUFunct) - 3'b000: Result = SumTrunc; // add or sub - 3'b001: Result = Shift; // sll - 3'b010: Result = SLT; // slt - 3'b011: Result = SLTU; // sltu - 3'b100: Result = A ^ B; // xor - 3'b101: Result = Shift; // sra or srl - 3'b110: Result = A | B; // or - 3'b111: Result = A & B; // and + 3'b000: FullResult = Sum; // add or sub + 3'b001: FullResult = Shift; // sll + 3'b010: FullResult = SLT; // slt + 3'b011: FullResult = SLTU; // sltu + 3'b100: FullResult = A ^ B; // xor + 3'b101: FullResult = Shift; // sra or srl + 3'b110: FullResult = A | B; // or + 3'b111: FullResult = A & B; // and endcase + + // support W-type RV64I ADDW/SUBW/ADDIW/Shifts that sign-extend 32-bit result to 64 bits + generate + if (WIDTH==64) assign Result = W64 ? {{32{FullResult[31]}}, FullResult[31:0]} : FullResult; + else assign Result = FullResult; + endgenerate endmodule diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index 9ad32acbb..c36077d21 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -136,9 +136,7 @@ module datapath ( // handle Store Conditional result if atomic extension supported generate - if (`A_SUPPORTED) - assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW}; - else - assign SCResultW = 0; + if (`A_SUPPORTED) assign SCResultW = {{(`XLEN-1){1'b0}}, SquashSCW}; + else assign SCResultW = 0; endgenerate endmodule From 7868c0da5516aba0bfa6f11b9e0fa618b4703164 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 09:43:09 -0800 Subject: [PATCH 08/38] Cleaning shifter --- wally-pipelined/src/ieu/shifter.sv | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/wally-pipelined/src/ieu/shifter.sv b/wally-pipelined/src/ieu/shifter.sv index 0e53de9de..e09e783c2 100644 --- a/wally-pipelined/src/ieu/shifter.sv +++ b/wally-pipelined/src/ieu/shifter.sv @@ -38,7 +38,7 @@ module shifter ( // extension. generate - if (`XLEN==32) begin + if (`XLEN==32) begin:shifter // funnel shifter (see CMOS VLSI Design 4e Section 11.8.1, note Table 11.11 shift types wrong) logic [62:0] z, zshift; logic [4:0] offset; @@ -56,7 +56,7 @@ module shifter ( // funnel operation assign zshift = z >> offset; assign y = zshift[31:0]; - end else begin // RV64 + end else begin:shifter // RV64 // funnel shifter followed by masking // research idea: investigate shifter designs for mixed 32/64-bit shifts logic [126:0] z, zshift; @@ -83,12 +83,7 @@ module shifter ( // funnel operation assign zshift = z >> offset; - assign ylower = zshift[31:0]; - - // mask upper 32 bits for W-type 32-bit shifts - // harris: is there a clever way to get zshift[31] earlier for arithmetic right shifts to speed up critical path? - assign yupper = w64 ? {32{zshift[31]}} : zshift[63:32]; - assign y = {yupper, ylower}; + assign y = zshift[63:0]; end endgenerate endmodule From a7d7f852a621733b34e5161edb818de22b3ecde0 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 10:01:12 -0800 Subject: [PATCH 09/38] Factored out common parts of shifter --- wally-pipelined/src/ieu/shifter.sv | 50 ++++++++++++------------------ 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/wally-pipelined/src/ieu/shifter.sv b/wally-pipelined/src/ieu/shifter.sv index e09e783c2..de7377a70 100644 --- a/wally-pipelined/src/ieu/shifter.sv +++ b/wally-pipelined/src/ieu/shifter.sv @@ -31,40 +31,28 @@ module shifter ( input logic right, arith, w64, output logic [`XLEN-1:0] y); + localparam BITS = $clog2(`XLEN); + + logic [2*`XLEN-2:0] z, zshift; + logic [BITS-1:0] amttrunc, offset; + // The best shifter architecture differs based on `XLEN. // for RV32, only 32-bit shifts are needed. These are // most efficiently implemented with a funnel shifter. // For RV64, 32 and 64-bit shifts are needed, with sign // extension. + // funnel shifter input (see CMOS VLSI Design 4e Section 11.8.1, note Table 11.11 shift types wrong) generate - if (`XLEN==32) begin:shifter - // funnel shifter (see CMOS VLSI Design 4e Section 11.8.1, note Table 11.11 shift types wrong) - logic [62:0] z, zshift; - logic [4:0] offset; - - // funnel input - always_comb - if (right) + if (`XLEN==32) begin:shifter // RV32 + always_comb // funnel mux + if (right) if (arith) z = {{31{a[31]}}, a}; else z = {31'b0, a}; else z = {a, 31'b0}; - - // shift amount - assign offset = right ? amt[4:0] : ~amt[4:0]; - - // funnel operation - assign zshift = z >> offset; - assign y = zshift[31:0]; + assign amttrunc = amt[4:0]; // shift amount end else begin:shifter // RV64 - // funnel shifter followed by masking - // research idea: investigate shifter designs for mixed 32/64-bit shifts - logic [126:0] z, zshift; - logic [31:0] ylower, yupper; - logic [5:0] offset, amt6; - - // funnel input - always_comb + always_comb // funnel mux if (w64) begin // 32-bit shifts if (right) if (arith) z = {64'b0, {31{a[31]}}, a[31:0]}; @@ -76,16 +64,16 @@ module shifter ( else z = {63'b0, a}; else z = {a, 63'b0}; end - - // shift amount - assign amt6 = w64 ? {1'b0, amt[4:0]} : amt[5:0]; // 32 or 64-bit shift - assign offset = right ? amt6 : ~amt6; - - // funnel operation - assign zshift = z >> offset; - assign y = zshift[63:0]; + assign amttrunc = w64 ? {1'b0, amt[4:0]} : amt[5:0]; // 32 or 64-bit shift end endgenerate + + // opposite offset for right shfits + assign offset = right ? amttrunc : ~amttrunc; + + // funnel operation + assign zshift = z >> offset; + assign y = zshift[`XLEN-1:0]; endmodule From 852c521328f4d08ea9db25d02b2a75227c8fbc82 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 10:08:52 -0800 Subject: [PATCH 10/38] Shared ALU mux input for shifts --- wally-pipelined/src/ieu/alu.sv | 5 ++--- wally-pipelined/src/ieu/shifter.sv | 18 +++++++----------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index 1d49bb401..a2aa1a446 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -69,13 +69,12 @@ module alu #(parameter WIDTH=32) ( // Select appropriate ALU Result assign ALUFunct = Funct3 & {3{ALUOp}}; // Force ALUFunct to 0 to Add when ALUOp = 0 always_comb - case (ALUFunct) + casez (ALUFunct) 3'b000: FullResult = Sum; // add or sub - 3'b001: FullResult = Shift; // sll + 3'b?01: FullResult = Shift; // sll, sra, or srl 3'b010: FullResult = SLT; // slt 3'b011: FullResult = SLTU; // sltu 3'b100: FullResult = A ^ B; // xor - 3'b101: FullResult = Shift; // sra or srl 3'b110: FullResult = A | B; // or 3'b111: FullResult = A & B; // and endcase diff --git a/wally-pipelined/src/ieu/shifter.sv b/wally-pipelined/src/ieu/shifter.sv index de7377a70..fc170e75a 100644 --- a/wally-pipelined/src/ieu/shifter.sv +++ b/wally-pipelined/src/ieu/shifter.sv @@ -27,20 +27,16 @@ module shifter ( input logic [`XLEN-1:0] a, - input logic [5:0] amt, + input logic [`LOG_XLEN-1:0] amt, input logic right, arith, w64, output logic [`XLEN-1:0] y); - localparam BITS = $clog2(`XLEN); - logic [2*`XLEN-2:0] z, zshift; - logic [BITS-1:0] amttrunc, offset; + logic [`LOG_XLEN-1:0] amttrunc, offset; - // The best shifter architecture differs based on `XLEN. - // for RV32, only 32-bit shifts are needed. These are - // most efficiently implemented with a funnel shifter. - // For RV64, 32 and 64-bit shifts are needed, with sign - // extension. + // Handle left and right shifts with a funnel shifter. + // For RV32, only 32-bit shifts are needed. + // For RV64, 32 and 64-bit shifts are needed, with sign extension. // funnel shifter input (see CMOS VLSI Design 4e Section 11.8.1, note Table 11.11 shift types wrong) generate @@ -50,7 +46,7 @@ module shifter ( if (arith) z = {{31{a[31]}}, a}; else z = {31'b0, a}; else z = {a, 31'b0}; - assign amttrunc = amt[4:0]; // shift amount + assign amttrunc = amt; // shift amount end else begin:shifter // RV64 always_comb // funnel mux if (w64) begin // 32-bit shifts @@ -64,7 +60,7 @@ module shifter ( else z = {63'b0, a}; else z = {a, 63'b0}; end - assign amttrunc = w64 ? {1'b0, amt[4:0]} : amt[5:0]; // 32 or 64-bit shift + assign amttrunc = w64 ? {1'b0, amt[4:0]} : amt; // 32 or 64-bit shift end endgenerate From d97d34ee32fc7aa485a825230e0b619a56c5f3e7 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 10:21:17 -0800 Subject: [PATCH 11/38] Simplified Shifter Right input --- wally-pipelined/src/ieu/alu.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index a2aa1a446..0558cbe16 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -51,8 +51,8 @@ module alu #(parameter WIDTH=32) ( assign {Carry, Sum} = A + CondInvB + {{(WIDTH-1){1'b0}}, SubArith}; // Shifts - assign Right = (Funct3[2:0] == 3'b101); // sra or srl - shifter sh(A, B[5:0], Right, SubArith, W64, Shift); + assign Right = Funct3[2]; // sra or srl + shifter sh(A, B[`LOG_XLEN-1:0], Right, SubArith, W64, Shift); // condition code flags based on add/subtract output // Overflow occurs when the numbers being added have the same sign From 7fb4213751e060184b63b81135cf767380e3a0e5 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 10:25:40 -0800 Subject: [PATCH 12/38] Simplified shifter right input --- wally-pipelined/src/ieu/alu.sv | 6 ++--- wally-pipelined/src/ieu/shifter.sv | 42 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index 0558cbe16..827aeb8a1 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -33,7 +33,6 @@ module alu #(parameter WIDTH=32) ( output logic [WIDTH-1:0] Sum); logic [WIDTH-1:0] CondInvB, Shift, SLT, SLTU, FullResult; - logic Right; logic Carry, Neg; logic LT, LTU; logic Overflow; @@ -51,9 +50,8 @@ module alu #(parameter WIDTH=32) ( assign {Carry, Sum} = A + CondInvB + {{(WIDTH-1){1'b0}}, SubArith}; // Shifts - assign Right = Funct3[2]; // sra or srl - shifter sh(A, B[`LOG_XLEN-1:0], Right, SubArith, W64, Shift); - + shifter sh(.A, .Amt(B[`LOG_XLEN-1:0]), .Right(Funct3[2]), .Arith(SubArith), .W64, .Y(Shift)); + // condition code flags based on add/subtract output // Overflow occurs when the numbers being added have the same sign // and the result has the opposite sign diff --git a/wally-pipelined/src/ieu/shifter.sv b/wally-pipelined/src/ieu/shifter.sv index fc170e75a..232f72414 100644 --- a/wally-pipelined/src/ieu/shifter.sv +++ b/wally-pipelined/src/ieu/shifter.sv @@ -26,10 +26,10 @@ `include "wally-config.vh" module shifter ( - input logic [`XLEN-1:0] a, - input logic [`LOG_XLEN-1:0] amt, - input logic right, arith, w64, - output logic [`XLEN-1:0] y); + input logic [`XLEN-1:0] A, + input logic [`LOG_XLEN-1:0] Amt, + input logic Right, Arith, W64, + output logic [`XLEN-1:0] Y); logic [2*`XLEN-2:0] z, zshift; logic [`LOG_XLEN-1:0] amttrunc, offset; @@ -42,34 +42,34 @@ module shifter ( generate if (`XLEN==32) begin:shifter // RV32 always_comb // funnel mux - if (right) - if (arith) z = {{31{a[31]}}, a}; - else z = {31'b0, a}; - else z = {a, 31'b0}; - assign amttrunc = amt; // shift amount + if (Right) + if (Arith) z = {{31{A[31]}}, A}; + else z = {31'b0, A}; + else z = {A, 31'b0}; + assign amttrunc = Amt; // shift amount end else begin:shifter // RV64 always_comb // funnel mux - if (w64) begin // 32-bit shifts - if (right) - if (arith) z = {64'b0, {31{a[31]}}, a[31:0]}; - else z = {95'b0, a[31:0]}; - else z = {32'b0, a[31:0], 63'b0}; + if (W64) begin // 32-bit shifts + if (Right) + if (Arith) z = {64'b0, {31{A[31]}}, A[31:0]}; + else z = {95'b0, A[31:0]}; + else z = {32'b0, A[31:0], 63'b0}; end else begin - if (right) - if (arith) z = {{63{a[63]}}, a}; - else z = {63'b0, a}; - else z = {a, 63'b0}; + if (Right) + if (Arith) z = {{63{A[63]}}, A}; + else z = {63'b0, A}; + else z = {A, 63'b0}; end - assign amttrunc = w64 ? {1'b0, amt[4:0]} : amt; // 32 or 64-bit shift + assign amttrunc = W64 ? {1'b0, Amt[4:0]} : Amt; // 32 or 64-bit shift end endgenerate // opposite offset for right shfits - assign offset = right ? amttrunc : ~amttrunc; + assign offset = Right ? amttrunc : ~amttrunc; // funnel operation assign zshift = z >> offset; - assign y = zshift[`XLEN-1:0]; + assign Y = zshift[`XLEN-1:0]; endmodule From 8a597390e0b105a17e7011180a8ab47f4483a243 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 21:26:00 -0800 Subject: [PATCH 13/38] Renamed RD1D to R1D, etc. --- wally-pipelined/src/ieu/alu.sv | 8 ++++---- wally-pipelined/src/ieu/datapath.sv | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/wally-pipelined/src/ieu/alu.sv b/wally-pipelined/src/ieu/alu.sv index 827aeb8a1..a93ebd294 100644 --- a/wally-pipelined/src/ieu/alu.sv +++ b/wally-pipelined/src/ieu/alu.sv @@ -52,10 +52,10 @@ module alu #(parameter WIDTH=32) ( // Shifts shifter sh(.A, .Amt(B[`LOG_XLEN-1:0]), .Right(Funct3[2]), .Arith(SubArith), .W64, .Y(Shift)); - // condition code flags based on add/subtract output - // Overflow occurs when the numbers being added have the same sign - // and the result has the opposite sign - assign Overflow = (A[WIDTH-1] ~^ CondInvB[WIDTH-1]) & (A[WIDTH-1] ^ Sum[WIDTH-1]); + // condition code flags based on subtract output + // Overflow occurs when the numbers being subtracted have the opposite sign + // and the result has the opposite sign of A + assign Overflow = (A[WIDTH-1] ^ B[WIDTH-1]) & (A[WIDTH-1] ^ Sum[WIDTH-1]); assign Neg = Sum[WIDTH-1]; assign LT = Neg ^ Overflow; assign LTU = ~Carry; diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index c36077d21..9111a61fc 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -66,11 +66,11 @@ module datapath ( // Fetch stage signals // Decode stage signals - logic [`XLEN-1:0] RD1D, RD2D; + logic [`XLEN-1:0] R1D, R2D; logic [`XLEN-1:0] ExtImmD; logic [4:0] RdD; // Execute stage signals - logic [`XLEN-1:0] RD1E, RD2E; + logic [`XLEN-1:0] R1E, R2E; logic [`XLEN-1:0] ExtImmE; // logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, SrcAE2, SrcBE2; // *** MAde forwardedsrcae an output to get rid of a mux in the critical path. @@ -91,19 +91,19 @@ module datapath ( assign Rs1D = InstrD[19:15]; assign Rs2D = InstrD[24:20]; assign RdD = InstrD[11:7]; - regfile regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, WriteDataW, RD1D, RD2D); + regfile regf(clk, reset, RegWriteW, Rs1D, Rs2D, RdW, WriteDataW, R1D, R2D); extend ext(.InstrD(InstrD[31:7]), .ImmSrcD, .ExtImmD); // Execute stage pipeline register and logic - flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, RD1D, RD1E); - flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, RD2D, RD2E); + flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, R1D, R1E); + flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, R2D, R2E); flopenrc #(`XLEN) ExtImmEReg(clk, reset, FlushE, ~StallE, ExtImmD, ExtImmE); flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E); flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); - mux3 #(`XLEN) faemux(RD1E, WriteDataW, ResultM, ForwardAE, ForwardedSrcAE); - mux3 #(`XLEN) fbemux(RD2E, WriteDataW, ResultM, ForwardBE, ForwardedSrcBE); + mux3 #(`XLEN) faemux(R1E, WriteDataW, ResultM, ForwardAE, ForwardedSrcAE); + mux3 #(`XLEN) fbemux(R2E, WriteDataW, ResultM, ForwardBE, ForwardedSrcBE); comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, FlagsE); mux2 #(`XLEN) srcamux(ForwardedSrcAE, PCE, ALUSrcAE, SrcAE); mux2 #(`XLEN) srcbmux(ForwardedSrcBE, ExtImmE, ALUSrcBE, SrcBE); From e5d2d7a3fdcec33b98bb95e9998ef38b6edb3daa Mon Sep 17 00:00:00 2001 From: David Harris Date: Sat, 18 Dec 2021 22:08:23 -0800 Subject: [PATCH 14/38] Controller fix --- wally-pipelined/src/ieu/controller.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 907aa6501..94f3d65c1 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -173,7 +173,7 @@ module controller( // ALU Decoding assign sltD = (Funct3D == 3'b010); assign sltuD = (Funct3D == 3'b011); - assign subD = (Funct3D == 3'b000 & Funct7D[5] & OpD[5]); + assign subD = (Funct3D == 3'b000 & Funct7D[5] & OpD[5]); // OpD[5] needed; ***explain why assign sraD = (Funct3D == 3'b101 & Funct7D[5]); assign SubArithD = ALUOpD & (subD | sraD | sltD | sltuD); // TRUE for R-type subtracts and sra, slt, sltu assign ALUControlD = {W64D, SubArithD, ALUOpD}; From c04c56dae148cd41ec7772bafa0578a07c2205c3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 19 Dec 2021 11:49:15 -0800 Subject: [PATCH 15/38] Renamed zero to eq in flag generation --- wally-pipelined/src/ieu/comparator.sv | 6 +++--- wally-pipelined/src/ieu/controller.sv | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/wally-pipelined/src/ieu/comparator.sv b/wally-pipelined/src/ieu/comparator.sv index 141172744..0c161d6d5 100644 --- a/wally-pipelined/src/ieu/comparator.sv +++ b/wally-pipelined/src/ieu/comparator.sv @@ -30,7 +30,7 @@ module comparator #(parameter WIDTH=32) ( output logic [2:0] flags); logic [WIDTH-1:0] bbar, diff; - logic carry, zero, neg, overflow, lt, ltu; + logic carry, eq, neg, overflow, lt, ltu; // NOTE: This can be replaced by some faster logic optimized // to just compute flags and not the difference. @@ -40,13 +40,13 @@ module comparator #(parameter WIDTH=32) ( assign {carry, diff} = a + bbar + 1; // condition code flags based on add/subtract output - assign zero = (diff == 0); + assign eq = (diff == 0); assign neg = diff[WIDTH-1]; // overflow occurs when the numbers being subtracted have the opposite sign // and the result has the opposite sign fron the first assign overflow = (a[WIDTH-1] ^ b[WIDTH-1]) & (a[WIDTH-1] ^ diff[WIDTH-1]); assign lt = neg ^ overflow; assign ltu = ~carry; - assign flags = {zero, lt, ltu}; + assign flags = {eq, lt, ltu}; endmodule diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 94f3d65c1..b081d40f1 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -97,7 +97,7 @@ module controller( logic SubArithD; logic subD, sraD, sltD, sltuD; logic BranchTakenE; - logic zeroE, ltE, ltuE; + logic eqE, ltE, ltuE; logic unused; logic BranchFlagE; logic IEURegWriteE; @@ -202,8 +202,8 @@ module controller( {IEURegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, ALUResultSrcE, CSRReadE, CSRWriteE, PrivilegedE, Funct3E, W64E, MulDivE, AtomicE, InvalidateICacheE, FlushDCacheE, InstrValidE}); // Branch Logic - assign {zeroE, ltE, ltuE} = FlagsE; - mux4 #(1) branchflagmux(zeroE, 1'b0, ltE, ltuE, Funct3E[2:1], BranchFlagE); + assign {eqE, ltE, ltuE} = FlagsE; + mux4 #(1) branchflagmux(eqE, 1'b0, ltE, ltuE, Funct3E[2:1], BranchFlagE); assign BranchTakenE = BranchFlagE ^ Funct3E[0]; assign PCSrcE = JumpE | BranchE & BranchTakenE; From fdf493bd47e081dc50d97549a70185ec6d98ada8 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 13:53:02 -0600 Subject: [PATCH 16/38] minro change. comments about needed changes in dcache. --- wally-pipelined/regression/wave.do | 302 ++++++++++++------------- wally-pipelined/src/cache/dcache.sv | 1 + wally-pipelined/src/cache/dcachefsm.sv | 5 +- 3 files changed, 150 insertions(+), 158 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index e8c76de0e..6a87625b8 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -5,10 +5,9 @@ add wave -noupdate /testbench/reset add wave -noupdate /testbench/test add wave -noupdate /testbench/memfilename add wave -noupdate /testbench/dut/hart/SATP_REGW -add wave -noupdate -expand -group {Execution Stage} /testbench/FunctionName/FunctionName/FunctionName -add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/PCE -add wave -noupdate -expand -group {Execution Stage} /testbench/InstrEName -add wave -noupdate -expand -group {Execution Stage} /testbench/dut/hart/ifu/InstrE +add wave -noupdate -group {Execution Stage} /testbench/dut/hart/ifu/PCE +add wave -noupdate -group {Execution Stage} /testbench/InstrEName +add wave -noupdate -group {Execution Stage} /testbench/dut/hart/ifu/InstrE add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/priv/trap/InstrValidM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/PCM add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName @@ -16,41 +15,41 @@ add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/lsu/MemAdrM add wave -noupdate /testbench/dut/hart/ieu/dp/ResultM add wave -noupdate /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM -add wave -noupdate -expand -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM -add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/hart/priv/trap/PendingIntsM -add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/hart/priv/trap/CommittedM -add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/hart/priv/trap/InstrValidM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/RetM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/TrapM -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/StoreStallD -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/ICacheStallF -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/hzu/LSUStall -add wave -noupdate -expand -group HDU -group hazards /testbench/dut/hart/MulDivStallD -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushM -add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushW -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM -add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/EcallFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StorePageFaultM +add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/PendingIntsM +add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/CommittedM +add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/InstrValidM +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/RetM +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/TrapM +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/StoreStallD +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/ICacheStallF +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/LSUStall +add wave -noupdate -group HDU -group hazards /testbench/dut/hart/MulDivStallD +add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF +add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD +add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE +add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushM +add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushW +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM +add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW add wave -noupdate -group Bpred -color Orange /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHR add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]} @@ -173,91 +172,83 @@ add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushM add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivResultW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivBusyE -add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/fsm1/CURRENT_STATE -add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/N -add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/D -add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/Q -add wave -noupdate -group divider /testbench/dut/hart/mdu/genblk1/div/rem0 -add wave -noupdate -group icache -color Gold /testbench/dut/hart/ifu/icache/controller/CurrState -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/BasePAdrF -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/WayHit -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/BlockReplacementBits -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/EncVicWay -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/VictimWay -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/SetValid} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[0]/CacheTagMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/ValidBits} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteWordEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[1]/CacheTagMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/ValidBits} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/SetValid} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[2]/CacheTagMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/ValidBits} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/SetValid} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[3]/CacheTagMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/DirtyBits} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/ValidBits} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -expand -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/controller/NextState -add wave -noupdate -group icache /testbench/dut/hart/ifu/ITLBMissF -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ITLBWriteF -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ReadLineF -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/PCNextIndexF -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ReadLineF -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/BasePAdrF -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/SavePC -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/UnalignedSelect -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrPAdrF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrInF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/FetchCount -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/ICacheMemWriteData +add wave -noupdate -expand -group icache -color Gold /testbench/dut/hart/ifu/icache/controller/CurrState +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/BasePAdrF +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/WayHit +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/BlockReplacementBits +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/EncVicWay +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/VictimWay +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/SetValid} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[0]/CacheTagMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/ValidBits} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteWordEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[1]/CacheTagMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/ValidBits} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/SetValid} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[2]/CacheTagMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/ValidBits} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/SetValid} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[3]/CacheTagMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/DirtyBits} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/ValidBits} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/controller/NextState +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/ITLBWriteF +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/ReadLineF +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/ReadLineF +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/BasePAdrF +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrPAdrF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrInF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/FetchCount +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/ICacheMemWriteData add wave -noupdate -group AHB -color Gold /testbench/dut/hart/ebu/BusState add wave -noupdate -group AHB /testbench/dut/hart/ebu/NextBusState add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/AtomicMaskedM @@ -279,7 +270,7 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED -add wave -noupdate -group AHB /testbench/dut/hart/ebu/StallW +add wave -noupdate -expand -group lsu /testbench/dut/hart/hzu/LSUStall add wave -noupdate -expand -group lsu -expand -group {LSU ARB} /testbench/dut/hart/lsu/arbiter/SelPTW add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/hart/lsu/dcache/dcachefsm/CurrState add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/WalkerPageFaultM @@ -375,17 +366,17 @@ add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testb add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/hart/lsu/dcache/VictimWay add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/hart/lsu/dcache/VictimDirtyWay add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/hart/lsu/dcache/VictimDirty -add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/hart/lsu/dcache/MemRWM -add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/hart/lsu/dcache/MemAdrE -add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/hart/lsu/dcache/MemPAdrM -add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/hart/lsu/dcache/Funct3M -add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/hart/lsu/dcache/Funct7M -add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/hart/lsu/dcache/AtomicM -add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/hart/lsu/dcache/FlushDCacheM -add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/hart/lsu/dcache/CacheableM -add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/hart/lsu/dcache/WriteDataM -add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/hart/lsu/dcache/ReadDataM -add wave -noupdate -expand -group lsu -expand -group dcache -group {CPU side} /testbench/dut/hart/lsu/dcache/DCacheStall +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/MemRWM +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/MemAdrE +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/MemPAdrM +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/Funct3M +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/Funct7M +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/AtomicM +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/FlushDCacheM +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/CacheableM +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/WriteDataM +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/ReadDataM +add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/DCacheStall add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/FlushAdrFlag add wave -noupdate -expand -group lsu -expand -group dcache -group status /testbench/dut/hart/lsu/dcache/WayHit add wave -noupdate -expand -group lsu -expand -group dcache -group status -color {Medium Orchid} /testbench/dut/hart/lsu/dcache/CacheHit @@ -421,23 +412,23 @@ add wave -noupdate -expand -group lsu -group pma /testbench/dut/hart/lsu/dmmu/PM add wave -noupdate -expand -group lsu -group pmp /testbench/dut/hart/lsu/dmmu/PMPInstrAccessFaultF add wave -noupdate -expand -group lsu -group pmp /testbench/dut/hart/lsu/dmmu/PMPLoadAccessFaultM add wave -noupdate -expand -group lsu -group pmp /testbench/dut/hart/lsu/dmmu/PMPStoreAccessFaultM -add wave -noupdate -expand -group lsu -group ptwalker -color Gold /testbench/dut/hart/lsu/hptw/genblk1/WalkerState -add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/hart/lsu/hptw/PCF -add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/hart/lsu/hptw/genblk1/TranslationVAdr -add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/hart/lsu/hptw/TranslationPAdr -add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/hart/lsu/hptw/HPTWReadPTE -add wave -noupdate -expand -group lsu -group ptwalker /testbench/dut/hart/lsu/hptw/PTE -add wave -noupdate -expand -group lsu -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/ITLBMissF -add wave -noupdate -expand -group lsu -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/DTLBMissM -add wave -noupdate -expand -group lsu -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/ITLBWriteF -add wave -noupdate -expand -group lsu -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/DTLBWriteM -add wave -noupdate -expand -group lsu -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/WalkerInstrPageFaultF -add wave -noupdate -expand -group lsu -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/WalkerLoadPageFaultM -add wave -noupdate -expand -group lsu -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/WalkerStorePageFaultM +add wave -noupdate -expand -group lsu -expand -group ptwalker -color Gold /testbench/dut/hart/lsu/hptw/genblk1/WalkerState +add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/hart/lsu/hptw/PCF +add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/hart/lsu/hptw/genblk1/TranslationVAdr +add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/hart/lsu/hptw/TranslationPAdr +add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/hart/lsu/hptw/HPTWReadPTE +add wave -noupdate -expand -group lsu -expand -group ptwalker /testbench/dut/hart/lsu/hptw/PTE +add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/ITLBMissF +add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/DTLBMissM +add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/ITLBWriteF +add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/DTLBWriteM +add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/WalkerInstrPageFaultF +add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/WalkerLoadPageFaultM +add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/WalkerStorePageFaultM add wave -noupdate -group csr /testbench/dut/hart/priv/csr/MIP_REGW -add wave -noupdate -group itlb /testbench/dut/hart/ifu/immu/TLBWrite -add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF -add wave -noupdate -group itlb /testbench/dut/hart/ifu/immu/PhysicalAddress +add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/immu/TLBWrite +add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/immu/PhysicalAddress add wave -noupdate /testbench/dut/hart/lsu/dcache/VAdr add wave -noupdate /testbench/dut/hart/lsu/dcache/MemPAdrM add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HCLK @@ -518,7 +509,6 @@ add wave -noupdate /testbench/dut/hart/TrapM add wave -noupdate /testbench/dut/hart/ifu/icache/CompressedF add wave -noupdate /testbench/dut/hart/ifu/icache/PCPF add wave -noupdate /testbench/dut/hart/ifu/PCPFmmu -add wave -noupdate /testbench/dut/hart/ifu/PCPF add wave -noupdate /testbench/dut/hart/ifu/PCF add wave -noupdate /testbench/dut/hart/ifu/immu/Translate add wave -noupdate /testbench/dut/hart/ifu/icache/FinalInstrRawF @@ -528,8 +518,8 @@ add wave -noupdate /testbench/dut/hart/ifu/icache/PCTagF add wave -noupdate /testbench/dut/hart/ifu/icache/PCPSpillF add wave -noupdate /testbench/dut/hart/ifu/icache/ICacheReadEn TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 6} {122378 ns} 0} -quietly wave cursor active 1 +WaveRestoreCursors {{Cursor 6} {24475 ns} 1} {{Cursor 2} {22501 ns} 1} {{Cursor 3} {44117 ns} 0} +quietly wave cursor active 3 configure wave -namecolwidth 250 configure wave -valuecolwidth 297 configure wave -justifyvalue left @@ -544,4 +534,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {122227 ns} {122479 ns} +WaveRestoreZoom {43912 ns} {44304 ns} diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv index b546a82b6..258337f8b 100644 --- a/wally-pipelined/src/cache/dcache.sv +++ b/wally-pipelined/src/cache/dcache.sv @@ -150,6 +150,7 @@ module dcache AdrSelMux(.d0(IEUAdrE[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), .d1(VAdr[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), .d2(MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + //.d2(VAdr[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), .d3(FlushAdr), .s(SelAdrM), .y(RAdr)); diff --git a/wally-pipelined/src/cache/dcachefsm.sv b/wally-pipelined/src/cache/dcachefsm.sv index 63e712f1a..f69757583 100644 --- a/wally-pipelined/src/cache/dcachefsm.sv +++ b/wally-pipelined/src/cache/dcachefsm.sv @@ -108,7 +108,7 @@ module dcachefsm STATE_PTW_READ_MISS_EVICT_DIRTY, STATE_PTW_READ_MISS_READ_WORD, STATE_PTW_READ_MISS_READ_WORD_DELAY, - STATE_PTW_ACCESS_AFTER_WALK, + STATE_PTW_ACCESS_AFTER_WALK, // dead state remove STATE_UNCACHED_WRITE, STATE_UNCACHED_WRITE_DONE, @@ -623,7 +623,8 @@ module dcachefsm CommittedM = 1'b1; NextState = STATE_READY; - + /// *** BUG BUG BUG missing AMO states. + // read hit valid cached if(MemRWM[1] & CacheableM & CacheHit & ~DTLBMissM) begin DCacheStall = 1'b0; From 620f4a58d454b8cae3ae94a7605080480bb72bb0 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 13:55:57 -0600 Subject: [PATCH 17/38] Adds FSM to LSU which will handle the interactions between the hptw and dcache. This will dramatically simplify the dcache by removing all walker states. --- wally-pipelined/src/lsu/lsu.sv | 101 +++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index b01ee6fad..48b1a56ef 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -129,6 +129,107 @@ module lsu assign AnyCPUReqM = (|MemRWM) | (|AtomicM); + + typedef enum {STATE_T0_READY, + STATE_T0_REPLAY, + STATE_T0_FAULT_REPLAY, + STATE_T3_DTLB_MISS, + STATE_T4_ITLB_MISS, + STATE_T5_ITLB_MISS, + STATE_T7_DITLB_MISS} statetype; + + statetype CurrState, NextState; + logic InterlockStall; + logic SelReplayCPURequest; + logic SelPTW2; + logic WalkerInstrPageFaultRaw; + + + always_ff @(posedge clk) + if (reset) CurrState <= #1 STATE_T0_READY; + else CurrState <= #1 NextState; + + always_comb begin + case(CurrState) + STATE_T0_READY: begin + if(~ITLBMissF & DTLBMissM & AnyCPUReqM) begin + NextState = STATE_T3_DTLB_MISS; + end + else if(ITLBMissF & ~DTLBMissM & ~AnyCPUReqM) begin + NextState = STATE_T4_ITLB_MISS; + end + else if(ITLBMissF & ~DTLBMissM & AnyCPUReqM) begin + NextState = STATE_T5_ITLB_MISS; + end + else if(ITLBMissF & DTLBMissM & AnyCPUReqM) begin + NextState = STATE_T7_DITLB_MISS; + end else begin + NextState = STATE_T0_READY; + end + end + STATE_T0_REPLAY: begin + if(DCacheStall) begin + NextState = STATE_T0_REPLAY; + end else begin + NextState = STATE_T0_READY; + end + end + STATE_T3_DTLB_MISS: begin + if(WalkerLoadPageFaultM | WalkerStorePageFaultM) begin + NextState = STATE_T0_READY; + end else if(DTLBWriteM) begin + NextState = STATE_T0_REPLAY; + end else begin + NextState = STATE_T3_DTLB_MISS; + end + end + STATE_T4_ITLB_MISS: begin + if(WalkerInstrPageFaultRaw | ITLBWriteF) begin + NextState = STATE_T0_READY; + end else begin + NextState = STATE_T4_ITLB_MISS; + end + end + STATE_T5_ITLB_MISS: begin + if(ITLBWriteF) begin + NextState = STATE_T0_REPLAY; + end else if(WalkerInstrPageFaultRaw) begin + NextState = STATE_T0_FAULT_REPLAY; + end else begin + NextState = STATE_T5_ITLB_MISS; + end + end + STATE_T0_FAULT_REPLAY: begin + if(DCacheStall) begin + NextState = STATE_T0_FAULT_REPLAY; + end else begin + NextState = STATE_T0_READY; + end + end + STATE_T7_DITLB_MISS: begin + if(WalkerStorePageFaultM | WalkerLoadPageFaultM) begin + NextState = STATE_T0_READY; + end else if(DTLBWriteM) begin + NextState = STATE_T5_ITLB_MISS; + end else begin + NextState = STATE_T7_DITLB_MISS; + end + end + default: begin + NextState = STATE_T0_READY; + end + endcase + end // always_comb + + // signal to CPU it needs to wait on HPTW. + assign InterlockStall = (NextState != STATE_T0_READY) | (NextState != STATE_T0_FAULT_REPLAY) | (NextState != STATE_T0_READY); + // When replaying CPU memory request after PTW select the IEUAdrM for correct address. + assign SelReplayCPURequest = NextState == STATE_T0_READY; + assign SelPTW2 = (CurrState == STATE_T3_DTLB_MISS) | (CurrState == STATE_T4_ITLB_MISS) | + (CurrState == STATE_T5_ITLB_MISS) | (CurrState == STATE_T7_DITLB_MISS); + + + flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, MemAdrM); // *** add generate to conditionally create hptw, lsuArb, and mmu From 0257c086410dec7a3fb40585cfb5907162f54664 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 14:00:30 -0600 Subject: [PATCH 18/38] Renamed MemAdrM to IEUAdrM. This will free the name MemAdrm for use in the DCache. --- fpga/constraints/debug2.xdc | 2 +- wally-pipelined/regression/linux-wave.do | 2 +- wally-pipelined/regression/wave.do | 2 +- wally-pipelined/src/lsu/lsu.sv | 12 ++++++------ wally-pipelined/src/lsu/lsuArb.sv | 8 ++++---- wally-pipelined/src/mmu/hptw.sv | 4 ++-- wally-pipelined/src/privileged/privileged.sv | 4 ++-- wally-pipelined/src/privileged/trap.sv | 10 +++++----- wally-pipelined/src/wally/wallypipelinedhart.sv | 6 +++--- 9 files changed, 25 insertions(+), 25 deletions(-) diff --git a/fpga/constraints/debug2.xdc b/fpga/constraints/debug2.xdc index f026114bc..67f6ba5ed 100644 --- a/fpga/constraints/debug2.xdc +++ b/fpga/constraints/debug2.xdc @@ -49,7 +49,7 @@ connect_debug_port u_ila_0/probe7 [get_nets [list {wallypipelinedsoc/hart/PCM[0] create_debug_port u_ila_0 probe set_property port_width 64 [get_debug_ports u_ila_0/probe8] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe8] -connect_debug_port u_ila_0/probe8 [get_nets [list {wallypipelinedsoc/hart/MemAdrM[0]} {wallypipelinedsoc/hart/MemAdrM[1]} {wallypipelinedsoc/hart/MemAdrM[2]} {wallypipelinedsoc/hart/MemAdrM[3]} {wallypipelinedsoc/hart/MemAdrM[4]} {wallypipelinedsoc/hart/MemAdrM[5]} {wallypipelinedsoc/hart/MemAdrM[6]} {wallypipelinedsoc/hart/MemAdrM[7]} {wallypipelinedsoc/hart/MemAdrM[8]} {wallypipelinedsoc/hart/MemAdrM[9]} {wallypipelinedsoc/hart/MemAdrM[10]} {wallypipelinedsoc/hart/MemAdrM[11]} {wallypipelinedsoc/hart/MemAdrM[12]} {wallypipelinedsoc/hart/MemAdrM[13]} {wallypipelinedsoc/hart/MemAdrM[14]} {wallypipelinedsoc/hart/MemAdrM[15]} {wallypipelinedsoc/hart/MemAdrM[16]} {wallypipelinedsoc/hart/MemAdrM[17]} {wallypipelinedsoc/hart/MemAdrM[18]} {wallypipelinedsoc/hart/MemAdrM[19]} {wallypipelinedsoc/hart/MemAdrM[20]} {wallypipelinedsoc/hart/MemAdrM[21]} {wallypipelinedsoc/hart/MemAdrM[22]} {wallypipelinedsoc/hart/MemAdrM[23]} {wallypipelinedsoc/hart/MemAdrM[24]} {wallypipelinedsoc/hart/MemAdrM[25]} {wallypipelinedsoc/hart/MemAdrM[26]} {wallypipelinedsoc/hart/MemAdrM[27]} {wallypipelinedsoc/hart/MemAdrM[28]} {wallypipelinedsoc/hart/MemAdrM[29]} {wallypipelinedsoc/hart/MemAdrM[30]} {wallypipelinedsoc/hart/MemAdrM[31]} {wallypipelinedsoc/hart/MemAdrM[32]} {wallypipelinedsoc/hart/MemAdrM[33]} {wallypipelinedsoc/hart/MemAdrM[34]} {wallypipelinedsoc/hart/MemAdrM[35]} {wallypipelinedsoc/hart/MemAdrM[36]} {wallypipelinedsoc/hart/MemAdrM[37]} {wallypipelinedsoc/hart/MemAdrM[38]} {wallypipelinedsoc/hart/MemAdrM[39]} {wallypipelinedsoc/hart/MemAdrM[40]} {wallypipelinedsoc/hart/MemAdrM[41]} {wallypipelinedsoc/hart/MemAdrM[42]} {wallypipelinedsoc/hart/MemAdrM[43]} {wallypipelinedsoc/hart/MemAdrM[44]} {wallypipelinedsoc/hart/MemAdrM[45]} {wallypipelinedsoc/hart/MemAdrM[46]} {wallypipelinedsoc/hart/MemAdrM[47]} {wallypipelinedsoc/hart/MemAdrM[48]} {wallypipelinedsoc/hart/MemAdrM[49]} {wallypipelinedsoc/hart/MemAdrM[50]} {wallypipelinedsoc/hart/MemAdrM[51]} {wallypipelinedsoc/hart/MemAdrM[52]} {wallypipelinedsoc/hart/MemAdrM[53]} {wallypipelinedsoc/hart/MemAdrM[54]} {wallypipelinedsoc/hart/MemAdrM[55]} {wallypipelinedsoc/hart/MemAdrM[56]} {wallypipelinedsoc/hart/MemAdrM[57]} {wallypipelinedsoc/hart/MemAdrM[58]} {wallypipelinedsoc/hart/MemAdrM[59]} {wallypipelinedsoc/hart/MemAdrM[60]} {wallypipelinedsoc/hart/MemAdrM[61]} {wallypipelinedsoc/hart/MemAdrM[62]} {wallypipelinedsoc/hart/MemAdrM[63]} ]] +connect_debug_port u_ila_0/probe8 [get_nets [list {wallypipelinedsoc/hart/IEUAdrM[0]} {wallypipelinedsoc/hart/IEUAdrM[1]} {wallypipelinedsoc/hart/IEUAdrM[2]} {wallypipelinedsoc/hart/IEUAdrM[3]} {wallypipelinedsoc/hart/IEUAdrM[4]} {wallypipelinedsoc/hart/IEUAdrM[5]} {wallypipelinedsoc/hart/IEUAdrM[6]} {wallypipelinedsoc/hart/IEUAdrM[7]} {wallypipelinedsoc/hart/IEUAdrM[8]} {wallypipelinedsoc/hart/IEUAdrM[9]} {wallypipelinedsoc/hart/IEUAdrM[10]} {wallypipelinedsoc/hart/IEUAdrM[11]} {wallypipelinedsoc/hart/IEUAdrM[12]} {wallypipelinedsoc/hart/IEUAdrM[13]} {wallypipelinedsoc/hart/IEUAdrM[14]} {wallypipelinedsoc/hart/IEUAdrM[15]} {wallypipelinedsoc/hart/IEUAdrM[16]} {wallypipelinedsoc/hart/IEUAdrM[17]} {wallypipelinedsoc/hart/IEUAdrM[18]} {wallypipelinedsoc/hart/IEUAdrM[19]} {wallypipelinedsoc/hart/IEUAdrM[20]} {wallypipelinedsoc/hart/IEUAdrM[21]} {wallypipelinedsoc/hart/IEUAdrM[22]} {wallypipelinedsoc/hart/IEUAdrM[23]} {wallypipelinedsoc/hart/IEUAdrM[24]} {wallypipelinedsoc/hart/IEUAdrM[25]} {wallypipelinedsoc/hart/IEUAdrM[26]} {wallypipelinedsoc/hart/IEUAdrM[27]} {wallypipelinedsoc/hart/IEUAdrM[28]} {wallypipelinedsoc/hart/IEUAdrM[29]} {wallypipelinedsoc/hart/IEUAdrM[30]} {wallypipelinedsoc/hart/IEUAdrM[31]} {wallypipelinedsoc/hart/IEUAdrM[32]} {wallypipelinedsoc/hart/IEUAdrM[33]} {wallypipelinedsoc/hart/IEUAdrM[34]} {wallypipelinedsoc/hart/IEUAdrM[35]} {wallypipelinedsoc/hart/IEUAdrM[36]} {wallypipelinedsoc/hart/IEUAdrM[37]} {wallypipelinedsoc/hart/IEUAdrM[38]} {wallypipelinedsoc/hart/IEUAdrM[39]} {wallypipelinedsoc/hart/IEUAdrM[40]} {wallypipelinedsoc/hart/IEUAdrM[41]} {wallypipelinedsoc/hart/IEUAdrM[42]} {wallypipelinedsoc/hart/IEUAdrM[43]} {wallypipelinedsoc/hart/IEUAdrM[44]} {wallypipelinedsoc/hart/IEUAdrM[45]} {wallypipelinedsoc/hart/IEUAdrM[46]} {wallypipelinedsoc/hart/IEUAdrM[47]} {wallypipelinedsoc/hart/IEUAdrM[48]} {wallypipelinedsoc/hart/IEUAdrM[49]} {wallypipelinedsoc/hart/IEUAdrM[50]} {wallypipelinedsoc/hart/IEUAdrM[51]} {wallypipelinedsoc/hart/IEUAdrM[52]} {wallypipelinedsoc/hart/IEUAdrM[53]} {wallypipelinedsoc/hart/IEUAdrM[54]} {wallypipelinedsoc/hart/IEUAdrM[55]} {wallypipelinedsoc/hart/IEUAdrM[56]} {wallypipelinedsoc/hart/IEUAdrM[57]} {wallypipelinedsoc/hart/IEUAdrM[58]} {wallypipelinedsoc/hart/IEUAdrM[59]} {wallypipelinedsoc/hart/IEUAdrM[60]} {wallypipelinedsoc/hart/IEUAdrM[61]} {wallypipelinedsoc/hart/IEUAdrM[62]} {wallypipelinedsoc/hart/IEUAdrM[63]} ]] create_debug_port u_ila_0 probe set_property port_width 32 [get_debug_ports u_ila_0/probe9] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe9] diff --git a/wally-pipelined/regression/linux-wave.do b/wally-pipelined/regression/linux-wave.do index ff0b82bef..4bf8bf7e6 100644 --- a/wally-pipelined/regression/linux-wave.do +++ b/wally-pipelined/regression/linux-wave.do @@ -75,7 +75,7 @@ add wave -noupdate -expand -group {Memory Stage} /testbench/ExpectedPCM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName add wave -noupdate -expand -group {Memory Stage} /testbench/textM -add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/lsu/MemAdrM +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/lsu/IEUAdrM add wave -noupdate -expand -group {WriteBack stage} /testbench/checkInstrW add wave -noupdate -expand -group {WriteBack stage} /testbench/InstrValidW add wave -noupdate -expand -group {WriteBack stage} /testbench/PCW diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 6a87625b8..71a5b7154 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -12,7 +12,7 @@ add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/priv/trap/I add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/PCM add wave -noupdate -expand -group {Memory Stage} /testbench/InstrMName add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/InstrM -add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/lsu/MemAdrM +add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/lsu/IEUAdrM add wave -noupdate /testbench/dut/hart/ieu/dp/ResultM add wave -noupdate /testbench/dut/hart/ieu/dp/ResultW add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 48b1a56ef..27fb4ab7a 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -50,7 +50,7 @@ module lsu // address and write data input logic [`XLEN-1:0] IEUAdrE, - output logic [`XLEN-1:0] MemAdrM, + output logic [`XLEN-1:0] IEUAdrM, input logic [`XLEN-1:0] WriteDataM, output logic [`XLEN-1:0] ReadDataM, @@ -230,7 +230,7 @@ module lsu - flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, MemAdrM); + flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); // *** add generate to conditionally create hptw, lsuArb, and mmu // based on `MEM_VIRTMEM @@ -238,7 +238,7 @@ module lsu .reset(reset), .SATP_REGW(SATP_REGW), .PCF(PCF), - .MemAdrM(MemAdrM), + .IEUAdrM(IEUAdrM), .ITLBMissF(ITLBMissF & ~PendingInterruptM), .DTLBMissM(DTLBMissM & ~PendingInterruptM), .MemRWM(MemRWM), @@ -272,7 +272,7 @@ module lsu .MemRWM(MemRWM), .Funct3M(Funct3M), .AtomicM(AtomicM), - .MemAdrM(MemAdrM), + .IEUAdrM(IEUAdrM), .IEUAdrE(IEUAdrE[11:0]), .CommittedM(CommittedM), .PendingInterruptM(PendingInterruptM), @@ -295,7 +295,7 @@ module lsu dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .DisableTranslation(DisableTranslation), .PAdr(MemPAdrMtoDCache), - .VAdr(MemAdrM), + .VAdr(IEUAdrM), .Size(Funct3MtoDCache[1:0]), .PTE(PTE), .PageTypeWriteVal(PageType), @@ -356,7 +356,7 @@ module lsu .AtomicM(AtomicMtoDCache), .IEUAdrE(MemAdrEtoDCache), .MemPAdrM(MemPAdrM), - .VAdr(MemAdrM[11:0]), + .VAdr(IEUAdrM[11:0]), .WriteDataM(WriteDataM), .ReadDataM(ReadDataM), .DCacheStall(DCacheStall), diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 3f99ba802..498f0682f 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -38,7 +38,7 @@ module lsuArb input logic [1:0] MemRWM, input logic [2:0] Funct3M, input logic [1:0] AtomicM, - input logic [`XLEN-1:0] MemAdrM, + input logic [`XLEN-1:0] IEUAdrM, input logic [11:0] IEUAdrE, input logic StallW, input logic PendingInterruptM, @@ -67,7 +67,7 @@ module lsuArb logic [2:0] PTWSize; logic [`PA_BITS-1:0] TranslationPAdrM; - logic [`XLEN+1:0] MemAdrMExt; + logic [`XLEN+1:0] IEUAdrMExt; // multiplex the outputs to LSU assign DisableTranslation = SelPTW; // change names between SelPTW would be confusing in DTLB. @@ -82,8 +82,8 @@ module lsuArb flop #(`PA_BITS) TranslationPAdrMReg(clk, TranslationPAdrE, TranslationPAdrM); // delay TranslationPAdrM by a cycle assign AtomicMtoDCache = SelPTW ? 2'b00 : AtomicM; - assign MemAdrMExt = {2'b00, MemAdrM}; - assign MemPAdrMtoDCache = SelPTW ? TranslationPAdrM : MemAdrMExt[`PA_BITS-1:0]; + assign IEUAdrMExt = {2'b00, IEUAdrM}; + assign MemPAdrMtoDCache = SelPTW ? TranslationPAdrM : IEUAdrMExt[`PA_BITS-1:0]; assign MemAdrEtoDCache = SelPTW ? TranslationPAdrE[11:0] : IEUAdrE[11:0]; assign StallWtoDCache = SelPTW ? 1'b0 : StallW; // always block interrupts when using the hardware page table walker. diff --git a/wally-pipelined/src/mmu/hptw.sv b/wally-pipelined/src/mmu/hptw.sv index b386b4742..103eef218 100644 --- a/wally-pipelined/src/mmu/hptw.sv +++ b/wally-pipelined/src/mmu/hptw.sv @@ -34,7 +34,7 @@ module hptw ( input logic clk, reset, input logic [`XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table - input logic [`XLEN-1:0] PCF, MemAdrM, // addresses to translate + input logic [`XLEN-1:0] PCF, IEUAdrM, // addresses to translate input logic ITLBMissF, DTLBMissM, // TLB Miss input logic [1:0] MemRWM, // 10 = read, 01 = write input logic [`XLEN-1:0] HPTWReadPTE, // page table entry from LSU @@ -82,7 +82,7 @@ module hptw assign TLBMiss = (DTLBMissM | ITLBMissF); // Determine which address to translate - assign TranslationVAdr = DTLBWalk ? MemAdrM : PCF; + assign TranslationVAdr = DTLBWalk ? IEUAdrM : PCF; assign CurrentPPN = PTE[`PPN_BITS+9:10]; // State flops diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index c5bc8a45c..fb6da375e 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -56,7 +56,7 @@ module privileged ( input logic StoreMisalignedFaultM, input logic TimerIntM, ExtIntM, SwIntM, input logic [63:0] MTIME_CLINT, MTIMECMP_CLINT, - input logic [`XLEN-1:0] InstrMisalignedAdrM, MemAdrM, + input logic [`XLEN-1:0] InstrMisalignedAdrM, IEUAdrM, input logic [4:0] SetFflagsM, // Trap signals from pmp/pma in mmu @@ -231,7 +231,7 @@ module privileged ( .MIP_REGW, .MIE_REGW, .SIP_REGW, .SIE_REGW, .STATUS_MIE, .STATUS_SIE, .PCM, - .InstrMisalignedAdrM, .MemAdrM, + .InstrMisalignedAdrM, .IEUAdrM, .InstrM, .InstrValidM, .CommittedM, .TrapM, .MTrapM, .STrapM, .UTrapM, .RetM, diff --git a/wally-pipelined/src/privileged/trap.sv b/wally-pipelined/src/privileged/trap.sv index 3d6ce24e5..02f3f6206 100644 --- a/wally-pipelined/src/privileged/trap.sv +++ b/wally-pipelined/src/privileged/trap.sv @@ -39,7 +39,7 @@ module trap ( (* mark_debug = "true" *) input logic [11:0] MIP_REGW, MIE_REGW, SIP_REGW, SIE_REGW, input logic STATUS_MIE, STATUS_SIE, input logic [`XLEN-1:0] PCM, - input logic [`XLEN-1:0] InstrMisalignedAdrM, MemAdrM, + input logic [`XLEN-1:0] InstrMisalignedAdrM, IEUAdrM, input logic [31:0] InstrM, input logic InstrValidM, CommittedM, output logic TrapM, MTrapM, STrapM, UTrapM, RetM, @@ -157,12 +157,12 @@ module trap ( always_comb if (InstrMisalignedFaultM) NextFaultMtvalM = InstrMisalignedAdrM; - else if (LoadMisalignedFaultM) NextFaultMtvalM = MemAdrM; - else if (StoreMisalignedFaultM) NextFaultMtvalM = MemAdrM; + else if (LoadMisalignedFaultM) NextFaultMtvalM = IEUAdrM; + else if (StoreMisalignedFaultM) NextFaultMtvalM = IEUAdrM; else if (BreakpointFaultM) NextFaultMtvalM = PCM; else if (InstrPageFaultM) NextFaultMtvalM = PCM; - else if (LoadPageFaultM) NextFaultMtvalM = MemAdrM; - else if (StorePageFaultM) NextFaultMtvalM = MemAdrM; + else if (LoadPageFaultM) NextFaultMtvalM = IEUAdrM; + else if (StorePageFaultM) NextFaultMtvalM = IEUAdrM; else if (IllegalInstrFaultM) NextFaultMtvalM = {{(`XLEN-32){1'b0}}, InstrM}; else NextFaultMtvalM = 0; endmodule diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 7be5b8c6e..0756b44cd 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -123,7 +123,7 @@ module wallypipelinedhart ( logic [2:0] Funct3M; logic [`XLEN-1:0] IEUAdrE; (* mark_debug = "true" *) logic [`XLEN-1:0] WriteDataM; - (* mark_debug = "true" *) logic [`XLEN-1:0] MemAdrM; + (* mark_debug = "true" *) logic [`XLEN-1:0] IEUAdrM; (* mark_debug = "true" *) logic [`XLEN-1:0] ReadDataM; logic [`XLEN-1:0] ReadDataW; logic CommittedM; @@ -245,7 +245,7 @@ module wallypipelinedhart ( .CommittedM, .DCacheMiss, .DCacheAccess, .SquashSCW, //.DataMisalignedM(DataMisalignedM), - .IEUAdrE, .MemAdrM, .WriteDataM, + .IEUAdrE, .IEUAdrM, .WriteDataM, .ReadDataM, .FlushDCacheM, // connected to ahb (all stay the same) .DCtoAHBPAdrM, .DCtoAHBReadM, .DCtoAHBWriteM, .DCfromAHBAck, @@ -343,7 +343,7 @@ module wallypipelinedhart ( .LoadMisalignedFaultM, .StoreMisalignedFaultM, .TimerIntM, .ExtIntM, .SwIntM, .MTIME_CLINT, .MTIMECMP_CLINT, - .InstrMisalignedAdrM, .MemAdrM, + .InstrMisalignedAdrM, .IEUAdrM, .SetFflagsM, // Trap signals from pmp/pma in mmu // *** do these need to be split up into one for dmem and one for ifu? From 9adcf86a40f969ebd411aebc121b9f0746610f7b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 14:57:42 -0600 Subject: [PATCH 19/38] Modified the icache memory to read using the virtual (non physical) address in the PCNextF stage. This allows recovering from an ITLBMiss to be 1 cycle after and simplifies the hptw slightly. --- wally-pipelined/regression/wave.do | 124 ++++++++++++---------------- wally-pipelined/src/cache/icache.sv | 5 +- wally-pipelined/src/ifu/ifu.sv | 1 + wally-pipelined/src/lsu/lsu.sv | 15 ++-- wally-pipelined/src/mmu/hptw.sv | 4 +- 5 files changed, 67 insertions(+), 82 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 71a5b7154..e7970db86 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -15,31 +15,31 @@ add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/lsu/IEUAdrM add wave -noupdate /testbench/dut/hart/ieu/dp/ResultM add wave -noupdate /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/EcallFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StorePageFaultM -add wave -noupdate -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM +add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/PendingIntsM add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/CommittedM add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/InstrValidM -add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/BPPredWrongE -add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM -add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/RetM -add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/TrapM -add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/StoreStallD -add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/ICacheStallF -add wave -noupdate -group HDU -group hazards /testbench/dut/hart/hzu/LSUStall -add wave -noupdate -group HDU -group hazards /testbench/dut/hart/MulDivStallD +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/StoreStallD +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/LSUStall +add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE @@ -97,21 +97,21 @@ add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/if add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredClassNonCFIWrongE add wave -noupdate -group Bpred -expand -group {bp wrong} /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE add wave -noupdate -group Bpred /testbench/dut/hart/ifu/bpred/bpred/BPPredWrongE -add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrFName -add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/icache/FinalInstrRawF -add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD -add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE -add wave -noupdate -expand -group {instruction pipeline} /testbench/dut/hart/ifu/InstrM -add wave -noupdate -expand -group {instruction pipeline} /testbench/InstrW -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredPCF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext0F -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext1F -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/SelBPPredF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredWrongE -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PrivilegedChangePCM +add wave -noupdate -group {instruction pipeline} /testbench/InstrFName +add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/icache/FinalInstrRawF +add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD +add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE +add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/InstrM +add wave -noupdate -group {instruction pipeline} /testbench/InstrW +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredPCF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext0F +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext1F +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/SelBPPredF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredWrongE +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PrivilegedChangePCM add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ifu/InstrD add wave -noupdate -group {Decode Stage} /testbench/InstrDName add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/c/RegWriteD @@ -126,7 +126,6 @@ add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rd1 add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rd2 add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/we3 add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/wd3 -add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/IntResultW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ReadDataW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/CSRReadValW add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart/ieu/dp/ResultSrcW @@ -134,8 +133,6 @@ add wave -noupdate -group RegFile -group {write regfile mux} /testbench/dut/hart add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/A add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/B add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/ALUControl -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/result -add wave -noupdate -group alu /testbench/dut/hart/ieu/dp/alu/FlagsE add wave -noupdate -group alu -divider internals add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs1D add wave -noupdate -group Forward /testbench/dut/hart/ieu/fw/Rs2D @@ -154,13 +151,12 @@ add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/Write add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/ALUResultE add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/SrcAE add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/SrcBE -add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCNextF -add wave -noupdate -expand -group PCS /testbench/dut/hart/PCF -add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCD -add wave -noupdate -expand -group PCS /testbench/dut/hart/PCE -add wave -noupdate -expand -group PCS /testbench/dut/hart/PCM -add wave -noupdate -expand -group PCS /testbench/PCW -add wave -noupdate -group muldiv /testbench/dut/hart/mdu/InstrD +add wave -noupdate -group PCS /testbench/dut/hart/ifu/PCNextF +add wave -noupdate -group PCS /testbench/dut/hart/PCF +add wave -noupdate -group PCS /testbench/dut/hart/ifu/PCD +add wave -noupdate -group PCS /testbench/dut/hart/PCE +add wave -noupdate -group PCS /testbench/dut/hart/PCM +add wave -noupdate -group PCS /testbench/PCW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/ForwardedSrcAE add wave -noupdate -group muldiv /testbench/dut/hart/mdu/ForwardedSrcBE add wave -noupdate -group muldiv /testbench/dut/hart/mdu/Funct3E @@ -241,6 +237,9 @@ add wave -noupdate -expand -group icache -expand -group {fsm out and control} /t add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/SelAdr +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/RAdr +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/FinalInstrRawF add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrPAdrF add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrInF add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag @@ -367,7 +366,6 @@ add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testb add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/hart/lsu/dcache/VictimDirtyWay add wave -noupdate -expand -group lsu -expand -group dcache -group Victim /testbench/dut/hart/lsu/dcache/VictimDirty add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/MemRWM -add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/MemAdrE add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/MemPAdrM add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/Funct3M add wave -noupdate -expand -group lsu -expand -group dcache -expand -group {CPU side} /testbench/dut/hart/lsu/dcache/Funct7M @@ -426,9 +424,9 @@ add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group typ add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/WalkerLoadPageFaultM add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/WalkerStorePageFaultM add wave -noupdate -group csr /testbench/dut/hart/priv/csr/MIP_REGW -add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/immu/TLBWrite -add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/ITLBMissF -add wave -noupdate -expand -group itlb /testbench/dut/hart/ifu/immu/PhysicalAddress +add wave -noupdate -group itlb /testbench/dut/hart/ifu/immu/TLBWrite +add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate -group itlb /testbench/dut/hart/ifu/immu/PhysicalAddress add wave -noupdate /testbench/dut/hart/lsu/dcache/VAdr add wave -noupdate /testbench/dut/hart/lsu/dcache/MemPAdrM add wave -noupdate -group plic /testbench/dut/uncore/plic/plic/HCLK @@ -499,26 +497,12 @@ add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HSELUART add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HADDR add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWRITE add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWDATA -add wave -noupdate -color Gold /testbench/dut/hart/lsu/dcache/subwordread/offset0 -add wave -noupdate /testbench/dut/hart/lsu/dcache/subwordread/offset1 -add wave -noupdate /testbench/dut/hart/lsu/dcache/subwordread/offset2 -add wave -noupdate /testbench/dut/hart/lsu/dcache/subwordread/offset3 -add wave -noupdate /testbench/dut/hart/ExceptionM -add wave -noupdate /testbench/dut/hart/PendingInterruptM -add wave -noupdate /testbench/dut/hart/TrapM -add wave -noupdate /testbench/dut/hart/ifu/icache/CompressedF +add wave -noupdate /testbench/dut/hart/lsu/CurrState +add wave -noupdate /testbench/dut/hart/lsu/InterlockStall +add wave -noupdate /testbench/dut/hart/ifu/icache/PCNextF add wave -noupdate /testbench/dut/hart/ifu/icache/PCPF -add wave -noupdate /testbench/dut/hart/ifu/PCPFmmu -add wave -noupdate /testbench/dut/hart/ifu/PCF -add wave -noupdate /testbench/dut/hart/ifu/immu/Translate -add wave -noupdate /testbench/dut/hart/ifu/icache/FinalInstrRawF -add wave -noupdate /testbench/dut/hart/ifu/icache/StallF -add wave -noupdate /testbench/dut/hart/ifu/icache/ICacheMemReadData -add wave -noupdate /testbench/dut/hart/ifu/icache/PCTagF -add wave -noupdate /testbench/dut/hart/ifu/icache/PCPSpillF -add wave -noupdate /testbench/dut/hart/ifu/icache/ICacheReadEn TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 6} {24475 ns} 1} {{Cursor 2} {22501 ns} 1} {{Cursor 3} {44117 ns} 0} +WaveRestoreCursors {{Cursor 6} {24475 ns} 1} {{Cursor 2} {22501 ns} 1} {{Cursor 3} {23208 ns} 0} quietly wave cursor active 3 configure wave -namecolwidth 250 configure wave -valuecolwidth 297 @@ -534,4 +518,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {43912 ns} {44304 ns} +WaveRestoreZoom {23041 ns} {23377 ns} diff --git a/wally-pipelined/src/cache/icache.sv b/wally-pipelined/src/cache/icache.sv index 372aeaecf..e3e9d6db2 100644 --- a/wally-pipelined/src/cache/icache.sv +++ b/wally-pipelined/src/cache/icache.sv @@ -32,6 +32,7 @@ module icache input logic StallF, input logic [`PA_BITS-1:0] PCNextF, input logic [`PA_BITS-1:0] PCPF, + input logic [`XLEN-1:0] PCF, input logic ExceptionM, PendingInterruptM, @@ -125,7 +126,7 @@ module icache mux3 #(INDEXLEN) AdrSelMux(.d0(PCNextF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), - .d1(PCPF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + .d1(PCF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), .d2(PCPSpillF[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), .s(SelAdr), .y(RAdr)); @@ -219,7 +220,7 @@ module icache // Detect if the instruction is compressed assign CompressedF = FinalInstrRawF[1:0] != 2'b11; - assign spill = PCPF[4:1] == 4'b1111 ? 1'b1 : 1'b0; + assign spill = PCF[4:1] == 4'b1111 ? 1'b1 : 1'b0; // to compute the fetch address we need to add the bit shifted diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 8bc8a185f..7c8c15017 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -168,6 +168,7 @@ module ifu ( .PCNextF(PCNextFPhys), .PCPF(PCPFmmu), + .PCF, .WalkerInstrPageFaultF, .InvalidateICacheM); diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 27fb4ab7a..09c1f28c3 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -141,7 +141,6 @@ module lsu statetype CurrState, NextState; logic InterlockStall; logic SelReplayCPURequest; - logic SelPTW2; logic WalkerInstrPageFaultRaw; @@ -222,10 +221,13 @@ module lsu end // always_comb // signal to CPU it needs to wait on HPTW. - assign InterlockStall = (NextState != STATE_T0_READY) | (NextState != STATE_T0_FAULT_REPLAY) | (NextState != STATE_T0_READY); + assign InterlockStall = (CurrState == STATE_T0_READY & (DTLBMissM | ITLBMissF)) | + (CurrState == STATE_T3_DTLB_MISS) | (CurrState == STATE_T4_ITLB_MISS) | + (CurrState == STATE_T5_ITLB_MISS) | (CurrState == STATE_T7_DITLB_MISS); + // When replaying CPU memory request after PTW select the IEUAdrM for correct address. assign SelReplayCPURequest = NextState == STATE_T0_READY; - assign SelPTW2 = (CurrState == STATE_T3_DTLB_MISS) | (CurrState == STATE_T4_ITLB_MISS) | + assign SelPTW = (CurrState == STATE_T3_DTLB_MISS) | (CurrState == STATE_T4_ITLB_MISS) | (CurrState == STATE_T5_ITLB_MISS) | (CurrState == STATE_T7_DITLB_MISS); @@ -250,7 +252,6 @@ module lsu .DCacheStall(DCacheStall), .TranslationPAdr, .HPTWRead(HPTWRead), - .SelPTW(SelPTW), .HPTWStall, .AnyCPUReqM, .MemAfterIWalkDone, @@ -258,14 +259,14 @@ module lsu .WalkerLoadPageFaultM(WalkerLoadPageFaultM), .WalkerStorePageFaultM(WalkerStorePageFaultM)); - assign LSUStall = DCacheStall | HPTWStall; + assign LSUStall = DCacheStall | InterlockStall; assign WalkerPageFaultM = WalkerStorePageFaultM | WalkerLoadPageFaultM; // arbiter between IEU and hptw lsuArb arbiter(.clk(clk), // HPTW connection - .SelPTW(SelPTW), + .SelPTW, .HPTWRead(HPTWRead), .TranslationPAdrE(TranslationPAdr), // CPU connection @@ -371,7 +372,7 @@ module lsu .ITLBWriteF(ITLBWriteF), .ITLBMissF, .MemAfterIWalkDone, - .SelPTW(SelPTW), + .SelPTW, .WalkerPageFaultM(WalkerPageFaultM), .WalkerInstrPageFaultF(WalkerInstrPageFaultF), diff --git a/wally-pipelined/src/mmu/hptw.sv b/wally-pipelined/src/mmu/hptw.sv index 103eef218..cb5207886 100644 --- a/wally-pipelined/src/mmu/hptw.sv +++ b/wally-pipelined/src/mmu/hptw.sv @@ -44,7 +44,6 @@ module hptw output logic [`XLEN-1:0] PTE, // page table entry to TLBs output logic [1:0] PageType, // page type to TLBs output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry - output logic SelPTW, // LSU Arbiter should select signals from the PTW rather than from the IEU output logic HPTWStall, output logic [`PA_BITS-1:0] TranslationPAdr, output logic HPTWRead, // HPTW requesting to read memory @@ -101,7 +100,6 @@ module hptw // Enable and select signals based on states assign StartWalk = (WalkerState == IDLE) & TLBMiss; assign HPTWRead = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD); - assign SelPTW = (WalkerState != IDLE) & (WalkerState != FAULT) & (WalkerState != LEAF) & (WalkerState != LEAF_DELAY); assign HPTWStall = (WalkerState != IDLE) & (WalkerState != FAULT); assign DTLBWriteM = (WalkerState == LEAF) & DTLBWalk; assign ITLBWriteF = (WalkerState == LEAF) & ~DTLBWalk; @@ -213,7 +211,7 @@ module hptw end endcase end else begin // No Virtual memory supported; tie HPTW outputs to 0 - assign HPTWRead = 0; assign SelPTW = 0; + assign HPTWRead = 0; assign WalkerInstrPageFaultF = 0; assign WalkerLoadPageFaultM = 0; assign WalkerStorePageFaultM = 0; assign TranslationPAdr = 0; end From 202203904ce1ca804e1813c8dfe333f0b584dd9d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 15:10:33 -0600 Subject: [PATCH 20/38] Corrected the LSU's fsm for stalling CPU. Removed state from hptw fsm. --- wally-pipelined/src/lsu/lsu.sv | 4 ++-- wally-pipelined/src/mmu/hptw.sv | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 09c1f28c3..65772a710 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -222,8 +222,8 @@ module lsu // signal to CPU it needs to wait on HPTW. assign InterlockStall = (CurrState == STATE_T0_READY & (DTLBMissM | ITLBMissF)) | - (CurrState == STATE_T3_DTLB_MISS) | (CurrState == STATE_T4_ITLB_MISS) | - (CurrState == STATE_T5_ITLB_MISS) | (CurrState == STATE_T7_DITLB_MISS); + (CurrState == STATE_T3_DTLB_MISS & ~WalkerPageFaultM) | (CurrState == STATE_T4_ITLB_MISS & ~WalkerInstrPageFaultF) | + (CurrState == STATE_T5_ITLB_MISS & ~WalkerInstrPageFaultF) | (CurrState == STATE_T7_DITLB_MISS & ~WalkerPageFaultM); // When replaying CPU memory request after PTW select the IEUAdrM for correct address. assign SelReplayCPURequest = NextState == STATE_T0_READY; diff --git a/wally-pipelined/src/mmu/hptw.sv b/wally-pipelined/src/mmu/hptw.sv index cb5207886..d2a5fa1ac 100644 --- a/wally-pipelined/src/mmu/hptw.sv +++ b/wally-pipelined/src/mmu/hptw.sv @@ -199,7 +199,7 @@ module hptw // LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF; // else NextWalkerState = FAULT; LEAF: if (DTLBWalk) NextWalkerState = IDLE; // updates TLB - else NextWalkerState = LEAF_DELAY; + else NextWalkerState = IDLE; LEAF_DELAY: NextWalkerState = IDLE; // give time to allow address translation FAULT: if (ITLBMissF & AnyCPUReqM & ~MemAfterIWalkDone) NextWalkerState = FAULT; else NextWalkerState = IDLE; From ece9e9df8482d541cbb06e09cee28ecc168d0baa Mon Sep 17 00:00:00 2001 From: Katherine Parry Date: Sun, 19 Dec 2021 13:51:46 -0800 Subject: [PATCH 21/38] fixed some small errors in FMA --- wally-pipelined/src/fpu/fma.sv | 171 +++++++++++---------------------- 1 file changed, 56 insertions(+), 115 deletions(-) diff --git a/wally-pipelined/src/fpu/fma.sv b/wally-pipelined/src/fpu/fma.sv index 6ad3f9864..a90848f5d 100644 --- a/wally-pipelined/src/fpu/fma.sv +++ b/wally-pipelined/src/fpu/fma.sv @@ -28,6 +28,7 @@ // `define NE 11//(`Q_SUPPORTED ? 15 : `D_SUPPORTED ? 11 : 8) // `define NF 52//(`Q_SUPPORTED ? 112 : `D_SUPPORTED ? 52 : 23) // `define XLEN 64 +`define NANPAYLOAD 1 module fma( input logic clk, input logic reset, @@ -117,9 +118,8 @@ module fma1( logic [3*`NF+6:0] AlignedAddendInv; // aligned addend possibly inverted logic [2*`NF+1:0] ProdManKilled; // the product's mantissa possibly killed logic [3*`NF+4:0] NegProdManKilled; // a negated ProdManKilled - logic [8:0] PNormCnt, NNormCnt; // the positive and nagitive LOA results logic [3*`NF+6:0] PreSum, NegPreSum; // positive and negitve versions of the sum - + logic [`NE-1:0] XExpVal, YExpVal; // exponent value after taking into accound denormals /////////////////////////////////////////////////////////////////////////////// // Calculate the product // - When multipliying two fp numbers, add the exponents @@ -130,7 +130,7 @@ module fma1( // calculate the product's exponent - expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, .XDenormE, .YDenormE, + expadd expadd(.FmtE, .XExpE, .YExpE, .XZeroE, .YZeroE, .XDenormE, .YDenormE, .XExpVal, .YExpVal, .Denorm, .ProdExpE); // multiplication of the mantissa's @@ -140,7 +140,7 @@ module fma1( // Alignment shifter /////////////////////////////////////////////////////////////////////////////// - align align(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm, + align align(.ZExpE, .ZManE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, .ProdExpE, .Denorm, .XExpVal, .YExpVal, .AlignedAddendE, .AddendStickyE, .KillProdE); // calculate the signs and take the opperation into account @@ -150,9 +150,9 @@ module fma1( // // Addition/LZA // /////////////////////////////////////////////////////////////////////////////// - add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE); + add add(.AlignedAddendE, .ProdManE, .PSgnE, .ZSgnEffE, .KillProdE, .AlignedAddendInv, .ProdManKilled, .NegSumE, .PreSum, .NegPreSum, .InvZE, .XZeroE, .YZeroE); - loa loa(.A(AlignedAddendInv+{162'b0,InvZE}), .P(ProdManKilled), .NegSumE, .NormCntE); + loa loa(.A(AlignedAddendInv+{162'b0,InvZE}), .P(ProdManKilled), .NormCntE); // Choose the positive sum and accompanying LZA result. assign SumE = NegSumE ? NegPreSum[3*`NF+5:0] : PreSum[3*`NF+5:0]; @@ -167,11 +167,11 @@ module expadd( input logic [`NE-1:0] XExpE, YExpE, // input exponents input logic XDenormE, YDenormE, // are the inputs denormalized input logic XZeroE, YZeroE, // are the inputs zero + output logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals output logic [`NE-1:0] Denorm, // value of denormalized exponent output logic [`NE+1:0] ProdExpE // product's exponent B^(1023)NE+2 ); - logic [`NE-1:0] XExpVal, YExpVal; // Exponent value after taking into account denormals // denormalized numbers have diffrent values depending on which precison it is. // double - 1 @@ -233,6 +233,7 @@ module align( input logic [`NF:0] ZManE, // fractions in U(0.NF) format] input logic ZDenormE, // is the input denormal input logic XZeroE, YZeroE, ZZeroE, // is the input zero + input logic [`NE-1:0] XExpVal, YExpVal, // Exponent value after taking into account denormals input logic [`NE+1:0] ProdExpE, // the product's exponent input logic [`NE-1:0] Denorm, // the biased value of a denormalized number output logic [3*`NF+5:0] AlignedAddendE, // Z aligned for addition in U(NF+5.2NF+1) @@ -254,7 +255,8 @@ module align( // - positive means the product is larger, so shift Z right // - Denormal numbers have a diffrent exponent value depending on the precision assign ZExpVal = ZDenormE ? Denorm : ZExpE; - assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3); + // assign AlignCnt = ProdExpE - {2'b0, ZExpVal} + (`NF+3); + assign AlignCnt = XZeroE|YZeroE ? -1 : {2'b0, XExpVal} + {2'b0, YExpVal} - 1020+`NF - {2'b0, ZExpVal}; // Defualt Addition without shifting // | 54'b0 | 106'b(product) | 2'b0 | @@ -312,14 +314,14 @@ module add( input logic PSgnE, ZSgnEffE,// the product and modified Z signs input logic KillProdE, // should the product be set to 0 input logic XZeroE, YZeroE, // is the input zero - output logic [3*`NF+6:0] AlignedAddendInv, // aligned addend possibly inverted - output logic [2*`NF+1:0] ProdManKilled, // the product's mantissa possibly killed - output logic [3*`NF+4:0] NegProdManKilled, // a negated ProdManKilled + output logic [3*`NF+6:0] AlignedAddendInv, // aligned addend possibly inverted + output logic [2*`NF+1:0] ProdManKilled, // the product's mantissa possibly killed output logic NegSumE, // was the sum negitive output logic InvZE, // do you invert Z - output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum + output logic [3*`NF+6:0] PreSum, NegPreSum// possibly negitive sum ); + logic [3*`NF+4:0] NegProdManKilled; // a negated ProdManKilled /////////////////////////////////////////////////////////////////////////////// // Addition /////////////////////////////////////////////////////////////////////////////// @@ -334,17 +336,17 @@ module add( // Kill the product if the product is too small to effect the addition (determined in fma1.sv) assign ProdManKilled = ProdManE&{2*`NF+2{~KillProdE}}; // Negate ProdMan for LZA and the negitive sum calculation - assign NegProdManKilled = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, ~ProdManKilled&{2*`NF+2{~(XZeroE|YZeroE)}}}; + assign NegProdManKilled = {{`NF+3{~(XZeroE|YZeroE|KillProdE)}}, ~ProdManKilled&{2*`NF+2{~(XZeroE|YZeroE|KillProdE)}}}; - // Is the sum negitive - assign NegSumE = (AlignedAddendE > {54'b0, ProdManKilled, 2'b0})&InvZE; //***use this to avoid addition and final muxing??? // Do the addition // - calculate a positive and negitive sum in parallel assign PreSum = AlignedAddendInv + {55'b0, ProdManKilled, 2'b0} + {{3*`NF+6{1'b0}}, InvZE}; - assign NegPreSum = AlignedAddendE + {NegProdManKilled, 2'b0} + {{(3*`NF+3){1'b0}},~(XZeroE|YZeroE),2'b0}; + assign NegPreSum = AlignedAddendE + {NegProdManKilled, 2'b0} + {{(3*`NF+3){1'b0}},~(XZeroE|YZeroE|KillProdE),2'b0}; + // Is the sum negitive + assign NegSumE = PreSum[3*`NF+6]; endmodule @@ -352,28 +354,32 @@ endmodule module loa( //https://ieeexplore.ieee.org/abstract/document/930098 input logic [3*`NF+6:0] A, // addend input logic [2*`NF+1:0] P, // product - input logic NegSumE, // is the sum negitive output logic [8:0] NormCntE // normalization shift count for the positive result ); - logic [3*`NF+6:0] T; - logic [3*`NF+5:0] G; - logic [3*`NF+5:0] Z; + logic [3*`NF+6:0] G; + logic [3*`NF+6:0] Z; assign T[3*`NF+6:2*`NF+4] = A[3*`NF+6:2*`NF+4]; - assign G[3*`NF+5:2*`NF+4] = 0; - assign Z[3*`NF+5:2*`NF+4] = ~A[3*`NF+5:2*`NF+4]; + assign G[3*`NF+6:2*`NF+4] = 0; + assign Z[3*`NF+6:2*`NF+4] = ~A[3*`NF+6:2*`NF+4]; assign T[2*`NF+3:2] = A[2*`NF+3:2]^P; assign G[2*`NF+3:2] = A[2*`NF+3:2]&P; assign Z[2*`NF+3:2] = ~A[2*`NF+3:2]&~P; assign T[1:0] = A[1:0]; assign G[1:0] = 0; assign Z[1:0] = ~A[1:0]; - + // Apply function to determine Leading pattern + // - note: the paper linked above uses the numbering system where 0 is the most significant bit + //f[n] = ~T[n]&T[n-1] note: n is the MSB + //f[i] = (T[i+1]&(G[i]&~Z[i-1] | Z[i]&~G[i-1])) | (~T[i+1]&(Z[i]&~Z[i-1] | G[i]&~G[i-1])) logic [3*`NF+6:0] f; - assign f = NegSumE ? T^{~G[3*`NF+5:0],1'b1} : T^{~Z[3*`NF+5:0], 1'b1}; + assign f[3*`NF+6] = ~T[3*`NF+6]&T[3*`NF+5]; + assign f[3*`NF+5:0] = (T[3*`NF+6:1]&(G[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | Z[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})) | (~T[3*`NF+6:1]&(Z[3*`NF+5:0]&{~Z[3*`NF+4:0], 1'b0} | G[3*`NF+5:0]&{~G[3*`NF+4:0], 1'b1})); + + lzc lzc(.f, .NormCntE); @@ -426,7 +432,7 @@ module fma2( logic [`NF-1:0] ResultFrac; // Result fraction logic [`NE-1:0] ResultExp; // Result exponent - logic ResultSgn; // Result sign + logic ResultSgn, ResultSgnTmp; // Result sign logic [`NE+1:0] SumExp; // exponent of the normalized sum logic [`NE+1:0] FullResultExp; // ResultExp with bits to determine sign and overflow logic [`NF+2:0] NormSum; // normalized sum @@ -464,7 +470,7 @@ module fma2( // round to infinity // round to nearest max magnitude - fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgn, .SumExp, + fmaround fmaround(.FmtM, .FrmM, .Sticky, .UfSticky, .NormSum, .AddendStickyM, .NormSumSticky, .ZZeroM, .InvZM, .ResultSgnTmp, .SumExp, .CalcPlus1, .Plus1, .UfPlus1, .Minus1, .FullResultExp, .ResultFrac, .ResultExp, .Round, .Guard, .UfLSBNormSum); @@ -476,7 +482,7 @@ module fma2( /////////////////////////////////////////////////////////////////////////////// - resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .ResultSgn); + resultsign resultsign(.FrmM, .PSgnM, .ZSgnEffM, .Underflow, .InvZM, .NegSumM, .SumZero, .ResultSgnTmp, .ResultSgn); @@ -512,11 +518,12 @@ module resultsign( input logic InvZM, input logic NegSumM, input logic SumZero, + output logic ResultSgnTmp, output logic ResultSgn ); logic ZeroSgn; - logic ResultSgnTmp; + // logic ResultSgnTmp; // Determine the sign if the sum is zero // if cancelation then 0 unless round to -infinity @@ -554,15 +561,24 @@ module resultselect( ); logic [`FLEN-1:0] XNaNResult, YNaNResult, ZNaNResult, InvalidResult, OverflowResult, KillProdResult, UnderflowResult; // possible results - assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]}; - assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]}; - assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]}; + generate if(`NANPAYLOAD) begin + assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, XManM[`NF-2:0]} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, XManM[50:29]}; + assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, YManM[`NF-2:0]} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, YManM[50:29]}; + assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, ZManM[`NF-2:0]} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, ZManM[50:29]}; + end else begin + assign XNaNResult = FmtM ? {XSgnM, XExpM, 1'b1, 51'b0} : {{32{1'b1}}, XSgnM, XExpM[7:0], 1'b1, 22'b0}; + assign YNaNResult = FmtM ? {YSgnM, YExpM, 1'b1, 51'b0} : {{32{1'b1}}, YSgnM, YExpM[7:0], 1'b1, 22'b0}; + assign ZNaNResult = FmtM ? {ZSgnEffM, ZExpM, 1'b1, 51'b0} : {{32{1'b1}}, ZSgnEffM, ZExpM[7:0], 1'b1, 22'b0}; + end + endgenerate + + assign OverflowResult = FmtM ? ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {ResultSgn, {`NE-1{1'b1}}, 1'b0, {`NF{1'b1}}} : {ResultSgn, {`NE{1'b1}}, {`NF{1'b0}}} : ((FrmM[1:0]==2'b01) | (FrmM[1:0]==2'b10&~ResultSgn) | (FrmM[1:0]==2'b11&ResultSgn)) ? {{32{1'b1}}, ResultSgn, 8'hfe, {23{1'b1}}} : {{32{1'b1}}, ResultSgn, 8'hff, 23'b0}; assign InvalidResult = FmtM ? {ResultSgn, {`NE{1'b1}}, 1'b1, {`NF-1{1'b0}}} : {{32{1'b1}}, ResultSgn, 8'hff, 1'b1, 22'b0}; - assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - {62'b0, (Minus1&AddendStickyM) + (Plus1&AddendStickyM)}} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; + assign KillProdResult = FmtM ? {ResultSgn, {ZExpM, ZManM[`NF-1:0]} - {62'b0, (Minus1&AddendStickyM)} + {62'b0, (Plus1&AddendStickyM)}} : {{32{1'b1}}, ResultSgn, {ZExpM[`NE-1],ZExpM[6:0], ZManM[51:29]} - {30'b0, (Minus1&AddendStickyM)} + {30'b0, (Plus1&AddendStickyM)}}; assign UnderflowResult = FmtM ? {ResultSgn, {`FLEN-1{1'b0}}} + {63'b0,(CalcPlus1&(AddendStickyM|FrmM[1]))} : {{32{1'b1}}, {ResultSgn, 31'b0} + {31'b0, (CalcPlus1&(AddendStickyM|FrmM[1]))}}; assign FMAResM = XNaNM ? XNaNResult : YNaNM ? YNaNResult : @@ -579,81 +595,6 @@ module resultselect( endmodule - -// module normalize( -// input logic [3*`NF+5:0] SumM, // the positive sum -// input logic [`NE-1:0] ZExpM, // exponent of Z -// input logic [`NE+1:0] ProdExpM, // X exponent + Y exponent - bias -// input logic [8:0] NormCntM, // normalization shift count -// input logic FmtM, // precision 1 = double 0 = single -// input logic KillProdM, // is the product set to zero -// input logic AddendStickyM, // the sticky bit caclulated from the aligned addend -// input logic NegSumM, // was the sum negitive -// output logic [`NF+2:0] NormSum, // normalized sum -// output logic SumZero, // is the sum zero -// output logic NormSumSticky, UfSticky, // sticky bits -// output logic [`NE+1:0] SumExp, // exponent of the normalized sum -// output logic ResultDenorm // is the result denormalized -// ); -// logic [`NE+1:0] FracLen; // length of the fraction -// logic [`NE+1:0] SumExpTmp; // exponent of the normalized sum not taking into account denormal or zero results -// logic [8:0] DenormShift; // right shift if the result is denormalized //***change this later -// logic [3*`NF+5:0] CorrSumShifted; // the shifted sum after LZA correction -// logic [3*`NF+7:0] SumShifted; // the shifted sum before LZA correction -// logic [`NE+1:0] SumExpTmpTmp; // the exponent of the normalized sum with the `FLEN bias -// logic PreResultDenorm; // is the result denormalized - calculated before LZA corection -// logic PreResultDenorm2; // is the result denormalized - calculated before LZA corection -// logic LZAPlus1; // add one to the sum's exponent due to LZA correction - -// /////////////////////////////////////////////////////////////////////////////// -// // Normalization -// /////////////////////////////////////////////////////////////////////////////// - -// // Determine if the sum is zero -// assign SumZero = ~(|SumM); - -// // determine the length of the fraction based on precision -// assign FracLen = FmtM ? `NF+1 : 13'd24; - -// // calculate the sum's exponent -// assign SumExpTmpTmp = KillProdM ? {2'b0, ZExpM} : ProdExpM + -({4'b0, NormCntM} + 1 - (`NF+4)); // ****try moving this into previous stage -// assign SumExpTmp = FmtM ? SumExpTmpTmp : (SumExpTmpTmp-1023+127)&{`NE+2{|SumExpTmpTmp}}; // ***move this ^ the subtraction by a constant isn't simplified - -// logic SumDLTEZ, SumDGEFL, SumSLTEZ, SumSGEFL; -// assign SumDLTEZ = SumExpTmpTmp[`NE+1] | ~|SumExpTmpTmp; -// assign SumDGEFL = ($signed(SumExpTmpTmp)>=$signed(-(13'd`NF+13'd1))); -// assign SumSLTEZ = $signed(SumExpTmpTmp) <= $signed(13'd1023-13'd127); -// assign SumSGEFL = ($signed(SumExpTmpTmp)>=$signed(-13'd24+13'd1023-13'd127)) | ~|SumExpTmpTmp; -// assign PreResultDenorm2 = (FmtM ? SumDLTEZ : SumSLTEZ) & (FmtM ? SumDGEFL : SumSGEFL) & ~SumZero; //***make sure math good -// // always_comb begin -// // assert (PreResultDenorm == PreResultDenorm2) else $fatal ("PreResultDenorms not equal"); -// // end - - - -// // Determine if the result is denormal -// // assign PreResultDenorm = $signed(SumExpTmp)<=0 & ($signed(SumExpTmp)>=$signed(-FracLen)) & ~SumZero; - -// // Determine the shift needed for denormal results -// // - if not denorm add 1 to shift out the leading 1 -// assign DenormShift = PreResultDenorm2 ? SumExpTmp[8:0] : 1; //*** change this when changing the size of DenormShift also change to an and opperation -// // Normalize the sum -// assign SumShifted = {2'b0, SumM} << NormCntM+DenormShift; //*** fix mux's with constants in them //***NormCnt can be simplified -// // LZA correction -// assign LZAPlus1 = SumShifted[3*`NF+7]; -// assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; -// assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3]; -// // Calculate the sticky bit -// assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM); -// assign UfSticky = AddendStickyM | NormSumSticky; - -// // Determine sum's exponent -// assign SumExp = (SumExpTmp+{12'b0, LZAPlus1}+{12'b0, ~|SumExpTmp&SumShifted[3*`NF+6]}) & {`NE+2{~(SumZero|ResultDenorm)}}; -// // recalculate if the result is denormalized -// assign ResultDenorm = PreResultDenorm2&~SumShifted[3*`NF+6]&~SumShifted[3*`NF+7]; - -// endmodule - module normalize( input logic [3*`NF+5:0] SumM, // the positive sum input logic [`NE-1:0] ZExpM, // exponent of Z @@ -733,7 +674,7 @@ module normalize( assign LZAPlus1 = SumShifted[3*`NF+7]; assign LZAPlus2 = SumShifted[3*`NF+8]; // the only possible mantissa for a plus two is all zeroes - a one has to propigate all the way through a sum. so we can leave the bottom statement alone - assign CorrSumShifted = LZAPlus1 ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; + assign CorrSumShifted = LZAPlus1&~KillProdM ? SumShifted[3*`NF+6:1] : SumShifted[3*`NF+5:0]; assign NormSum = CorrSumShifted[3*`NF+5:2*`NF+3]; // Calculate the sticky bit assign NormSumSticky = (|CorrSumShifted[2*`NF+2:0]) | (|CorrSumShifted[136:2*`NF+3]&~FmtM); @@ -757,7 +698,7 @@ module fmaround( input logic ZZeroM, // is Z zero input logic InvZM, // invert Z input logic [`NE+1:0] SumExp, // exponent of the normalized sum - input logic ResultSgn, // the result's sign + input logic ResultSgnTmp, // the result's sign output logic CalcPlus1, Plus1, UfPlus1, Minus1, // do you add or subtract on from the result output logic [`NE+1:0] FullResultExp, // ResultExp with bits to determine sign and overflow output logic [`NF-1:0] ResultFrac, // Result fraction @@ -824,8 +765,8 @@ module fmaround( case (FrmM) 3'b000: CalcPlus1 = Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&LSBNormSum&~SubBySmallNum));//round to nearest even 3'b001: CalcPlus1 = 0;//round to zero - 3'b010: CalcPlus1 = ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round down - 3'b011: CalcPlus1 = ~ResultSgn & ~(SubBySmallNum & ~Guard & ~Round);//round up + 3'b010: CalcPlus1 = ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round down + 3'b011: CalcPlus1 = ~ResultSgnTmp & ~(SubBySmallNum & ~Guard & ~Round);//round up 3'b100: CalcPlus1 = (Guard & (Round | ((Sticky)&~(~Round&SubBySmallNum)) | (~Round&~(Sticky)&~SubBySmallNum)));//round to nearest max magnitude default: CalcPlus1 = 1'bx; endcase @@ -833,8 +774,8 @@ module fmaround( case (FrmM) 3'b000: UfCalcPlus1 = UfGuard & (UfRound | (UfSticky&UfRound|~UfSubBySmallNum) | (~Sticky&UfLSBNormSum&~UfSubBySmallNum));//round to nearest even 3'b001: UfCalcPlus1 = 0;//round to zero - 3'b010: UfCalcPlus1 = ResultSgn & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round down - 3'b011: UfCalcPlus1 = ~ResultSgn & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round up + 3'b010: UfCalcPlus1 = ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round down + 3'b011: UfCalcPlus1 = ~ResultSgnTmp & ~(UfSubBySmallNum & ~UfGuard & ~UfRound);//round up 3'b100: UfCalcPlus1 = (UfGuard & (UfRound | (UfSticky&~(~UfRound&UfSubBySmallNum)) | (~Sticky&~UfSubBySmallNum)));//round to nearest max magnitude default: UfCalcPlus1 = 1'bx; endcase @@ -842,8 +783,8 @@ module fmaround( case (FrmM) 3'b000: CalcMinus1 = 0;//round to nearest even 3'b001: CalcMinus1 = SubBySmallNum & ~Guard & ~Round;//round to zero - 3'b010: CalcMinus1 = ~ResultSgn & ~Guard & ~Round & SubBySmallNum;//round down - 3'b011: CalcMinus1 = ResultSgn & ~Guard & ~Round & SubBySmallNum;//round up + 3'b010: CalcMinus1 = ~ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round down + 3'b011: CalcMinus1 = ResultSgnTmp & ~Guard & ~Round & SubBySmallNum;//round up 3'b100: CalcMinus1 = 0;//round to nearest max magnitude default: CalcMinus1 = 1'bx; endcase From 691c1c0dd01821d280e8ee318942b8794507067d Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 19 Dec 2021 13:53:45 -0800 Subject: [PATCH 22/38] ALUControl cleanup --- wally-pipelined/src/ieu/controller.sv | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index b081d40f1..040fa0181 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -170,10 +170,10 @@ module controller( assign CSRZeroSrcD = InstrD[14] ? (InstrD[19:15] == 0) : (Rs1D == 0); // Is a CSR instruction using zero as the source? assign CSRWriteD = CSRReadD & !(CSRZeroSrcD && InstrD[13]); // Don't write if setting or clearing zeros - // ALU Decoding + // ALU Decoding is lazy, only using func7[5] to distinguish add/sub and srl/sra assign sltD = (Funct3D == 3'b010); assign sltuD = (Funct3D == 3'b011); - assign subD = (Funct3D == 3'b000 & Funct7D[5] & OpD[5]); // OpD[5] needed; ***explain why + assign subD = (Funct3D == 3'b000 & Funct7D[5] & OpD[5]); // OpD[5] needed to distinguish sub from addi assign sraD = (Funct3D == 3'b101 & Funct7D[5]); assign SubArithD = ALUOpD & (subD | sraD | sltD | sltuD); // TRUE for R-type subtracts and sra, slt, sltu assign ALUControlD = {W64D, SubArithD, ALUOpD}; @@ -205,12 +205,11 @@ module controller( assign {eqE, ltE, ltuE} = FlagsE; mux4 #(1) branchflagmux(eqE, 1'b0, ltE, ltuE, Funct3E[2:1], BranchFlagE); assign BranchTakenE = BranchFlagE ^ Funct3E[0]; - assign PCSrcE = JumpE | BranchE & BranchTakenE; + // Other execute stage controller signals assign MemReadE = MemRWE[1]; assign SCE = (ResultSrcE == 3'b100); - assign RegWriteE = IEURegWriteE | FWriteIntE; // IRF register writes could come from IEU or FPU controllers // Memory stage pipeline control register From 04d0b85f9648557aeaf49747a213ed8c0bc67493 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 16:12:31 -0600 Subject: [PATCH 23/38] Fixed bug most of the bugs related to the dcache changes, but the mmu tests don't pass. --- wally-pipelined/regression/wave.do | 196 +++++------ wally-pipelined/src/cache/dcache.sv | 22 +- wally-pipelined/src/cache/dcachefsm.sv | 437 +------------------------ wally-pipelined/src/lsu/lsu.sv | 41 ++- wally-pipelined/src/lsu/lsuArb.sv | 8 +- 5 files changed, 139 insertions(+), 565 deletions(-) diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index e7970db86..3e19f65b3 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -103,15 +103,15 @@ add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/InstrM add wave -noupdate -group {instruction pipeline} /testbench/InstrW -add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF -add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCF -add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F -add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredPCF -add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext0F -add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext1F -add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/SelBPPredF -add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredWrongE -add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PrivilegedChangePCM +add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF +add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCF +add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F +add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredPCF +add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext0F +add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext1F +add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/SelBPPredF +add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredWrongE +add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PrivilegedChangePCM add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ifu/InstrD add wave -noupdate -group {Decode Stage} /testbench/InstrDName add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/c/RegWriteD @@ -168,86 +168,86 @@ add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushM add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivResultW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivBusyE -add wave -noupdate -expand -group icache -color Gold /testbench/dut/hart/ifu/icache/controller/CurrState -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/BasePAdrF -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/WayHit -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/BlockReplacementBits -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/EncVicWay -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/VictimWay -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/SetValid} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[0]/CacheTagMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/ValidBits} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteWordEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[1]/CacheTagMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/ValidBits} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/SetValid} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[2]/CacheTagMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/ValidBits} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/SetValid} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[3]/CacheTagMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/DirtyBits} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/ValidBits} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/controller/NextState -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/ITLBMissF -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/ITLBWriteF -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/ReadLineF -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/ReadLineF -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/BasePAdrF -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn -add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/SelAdr -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/RAdr -add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/FinalInstrRawF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrPAdrF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrInF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/FetchCount -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable -add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/ICacheMemWriteData +add wave -noupdate -group icache -color Gold /testbench/dut/hart/ifu/icache/controller/CurrState +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/BasePAdrF +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/WayHit +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/BlockReplacementBits +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/EncVicWay +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/VictimWay +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/SetValid} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[0]/CacheTagMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/ValidBits} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteWordEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[1]/CacheTagMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/ValidBits} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/SetValid} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[2]/CacheTagMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/ValidBits} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/SetValid} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[3]/CacheTagMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/DirtyBits} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/ValidBits} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/controller/NextState +add wave -noupdate -group icache /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ITLBWriteF +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ReadLineF +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ReadLineF +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/BasePAdrF +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn +add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/SelAdr +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/RAdr +add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/FinalInstrRawF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrPAdrF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrInF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/FetchCount +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable +add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/ICacheMemWriteData add wave -noupdate -group AHB -color Gold /testbench/dut/hart/ebu/BusState add wave -noupdate -group AHB /testbench/dut/hart/ebu/NextBusState add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/AtomicMaskedM @@ -272,7 +272,6 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED add wave -noupdate -expand -group lsu /testbench/dut/hart/hzu/LSUStall add wave -noupdate -expand -group lsu -expand -group {LSU ARB} /testbench/dut/hart/lsu/arbiter/SelPTW add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/hart/lsu/dcache/dcachefsm/CurrState -add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/WalkerPageFaultM add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/WriteDataM add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMBlockWriteEnableM add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMWordWriteEnableM @@ -341,10 +340,10 @@ add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM w add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/hart/lsu/dcache/ClearValid add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/hart/lsu/dcache/SetDirty add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM writes} -expand -group valid/dirty /testbench/dut/hart/lsu/dcache/ClearDirty -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/hart/lsu/dcache/MemWay[0]/WayHit} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/hart/lsu/dcache/MemWay[0]/Valid} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/hart/lsu/dcache/MemWay[0]/Dirty} -add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -group way0 {/testbench/dut/hart/lsu/dcache/MemWay[0]/ReadTag} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/hart/lsu/dcache/MemWay[0]/WayHit} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/hart/lsu/dcache/MemWay[0]/Valid} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/hart/lsu/dcache/MemWay[0]/Dirty} +add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way0 {/testbench/dut/hart/lsu/dcache/MemWay[0]/ReadTag} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/hart/lsu/dcache/MemWay[1]/WayHit} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/hart/lsu/dcache/MemWay[1]/Valid} add wave -noupdate -expand -group lsu -expand -group dcache -group {Cache SRAM read} -expand -group way1 {/testbench/dut/hart/lsu/dcache/MemWay[1]/Dirty} @@ -501,8 +500,13 @@ add wave -noupdate /testbench/dut/hart/lsu/CurrState add wave -noupdate /testbench/dut/hart/lsu/InterlockStall add wave -noupdate /testbench/dut/hart/ifu/icache/PCNextF add wave -noupdate /testbench/dut/hart/ifu/icache/PCPF +add wave -noupdate /testbench/dut/hart/lsu/WalkerInstrPageFaultF +add wave -noupdate /testbench/dut/hart/lsu/WalkerPageFaultM +add wave -noupdate /testbench/dut/hart/lsu/dcache/RAdr +add wave -noupdate /testbench/dut/hart/lsu/dcache/SelAdrM +add wave -noupdate /testbench/dut/hart/lsu/SelReplayCPURequest TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 6} {24475 ns} 1} {{Cursor 2} {22501 ns} 1} {{Cursor 3} {23208 ns} 0} +WaveRestoreCursors {{Cursor 6} {24475 ns} 1} {{Cursor 2} {22501 ns} 1} {{Cursor 3} {3615 ns} 0} quietly wave cursor active 3 configure wave -namecolwidth 250 configure wave -valuecolwidth 297 @@ -518,4 +522,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {23041 ns} {23377 ns} +WaveRestoreZoom {3453 ns} {3729 ns} diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv index 258337f8b..ab870b647 100644 --- a/wally-pipelined/src/cache/dcache.sv +++ b/wally-pipelined/src/cache/dcache.sv @@ -36,7 +36,7 @@ module dcache input logic [6:0] Funct7M, input logic [1:0] AtomicM, input logic FlushDCacheM, - input logic [11:0] IEUAdrE, // virtual address, but we only use the lower 12 bits. + input logic [11:0] MemAdrE, // virtual address, but we only use the lower 12 bits. input logic [`PA_BITS-1:0] MemPAdrM, // physical address input logic [11:0] VAdr, // when hptw writes dtlb we use this address to index SRAM. @@ -50,15 +50,9 @@ module dcache // inputs from TLB and PMA/P input logic ExceptionM, input logic PendingInterruptM, - input logic DTLBMissM, - input logic ITLBMissF, input logic CacheableM, - input logic DTLBWriteM, - input logic ITLBWriteF, - input logic WalkerInstrPageFaultF, // from ptw - input logic SelPTW, - input logic WalkerPageFaultM, + input logic IgnoreRequest, output logic MemAfterIWalkDone, // ahb side (* mark_debug = "true" *)output logic [`PA_BITS-1:0] AHBPAdr, // to ahb @@ -147,8 +141,8 @@ module dcache // Read Path CPU (IEU) side mux4 #(INDEXLEN) - AdrSelMux(.d0(IEUAdrE[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), - .d1(VAdr[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + AdrSelMux(.d0(MemAdrE[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), + .d1(VAdr[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), // *** REMOVE .d2(MemPAdrM[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), //.d2(VAdr[INDEXLEN+OFFSETLEN-1:OFFSETLEN]), .d3(FlushAdr), @@ -354,14 +348,8 @@ module dcache .ExceptionM, .PendingInterruptM, .StallWtoDCache, - .DTLBMissM, - .ITLBMissF, .CacheableM, - .DTLBWriteM, - .ITLBWriteF, - .WalkerInstrPageFaultF, - .SelPTW, - .WalkerPageFaultM, + .IgnoreRequest, .AHBAck, // from ahb .CacheHit, .FetchCountFlag, diff --git a/wally-pipelined/src/cache/dcachefsm.sv b/wally-pipelined/src/cache/dcachefsm.sv index f69757583..b13ed2646 100644 --- a/wally-pipelined/src/cache/dcachefsm.sv +++ b/wally-pipelined/src/cache/dcachefsm.sv @@ -36,16 +36,9 @@ module dcachefsm input logic ExceptionM, input logic PendingInterruptM, input logic StallWtoDCache, - // mmu inputs - input logic DTLBMissM, - input logic ITLBMissF, input logic CacheableM, - input logic DTLBWriteM, - input logic ITLBWriteF, - input logic WalkerInstrPageFaultF, // hptw inputs - input logic SelPTW, - input logic WalkerPageFaultM, + input logic IgnoreRequest, // Bus inputs input logic AHBAck, // from ahb // dcache internals @@ -101,36 +94,11 @@ module dcachefsm STATE_MISS_READ_WORD_DELAY, STATE_MISS_WRITE_WORD, - STATE_PTW_READY, - STATE_PTW_READ_MISS_FETCH_WDV, - STATE_PTW_READ_MISS_FETCH_DONE, - STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK, - STATE_PTW_READ_MISS_EVICT_DIRTY, - STATE_PTW_READ_MISS_READ_WORD, - STATE_PTW_READ_MISS_READ_WORD_DELAY, - STATE_PTW_ACCESS_AFTER_WALK, // dead state remove - STATE_UNCACHED_WRITE, STATE_UNCACHED_WRITE_DONE, STATE_UNCACHED_READ, STATE_UNCACHED_READ_DONE, - STATE_PTW_FAULT_READY, - STATE_PTW_FAULT_CPU_BUSY, - STATE_PTW_FAULT_MISS_FETCH_WDV, - STATE_PTW_FAULT_MISS_FETCH_DONE, - STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK, - STATE_PTW_FAULT_MISS_READ_WORD, - STATE_PTW_FAULT_MISS_READ_WORD_DELAY, - STATE_PTW_FAULT_MISS_WRITE_WORD, - STATE_PTW_FAULT_MISS_WRITE_WORD_DELAY, - STATE_PTW_FAULT_MISS_EVICT_DIRTY, - - STATE_PTW_FAULT_UNCACHED_WRITE, - STATE_PTW_FAULT_UNCACHED_WRITE_DONE, - STATE_PTW_FAULT_UNCACHED_READ, - STATE_PTW_FAULT_UNCACHED_READ_DONE, - STATE_CPU_BUSY, STATE_CPU_BUSY_FINISH_AMO, @@ -191,18 +159,16 @@ module dcachefsm LRUWriteEn = 1'b0; CommittedM = 1'b0; - // TLB Miss - if(((AnyCPUReqM & DTLBMissM) | ITLBMissF) & ~(ExceptionM | PendingInterruptM)) begin + if(IgnoreRequest) begin // the LSU arbiter has not yet selected the PTW. // The CPU needs to be stalled until that happens. // If we set DCacheStall for 1 cycle before going to // PTW ready the CPU will stall. // The page table walker asserts it's control 1 cycle // after the TLBs miss. - CommittedM = 1'b1; - DCacheStall = 1'b1; - NextState = STATE_PTW_READY; + // CommittedM = 1'b1; ??? *** Not Sure yet. + NextState = STATE_READY; end // Flush dcache to next level of memory @@ -215,7 +181,7 @@ module dcachefsm end // amo hit - else if(AtomicM[1] & (&MemRWM) & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + else if(AtomicM[1] & (&MemRWM) & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit) begin SelAdrM = 2'b10; DCacheStall = 1'b0; @@ -231,7 +197,7 @@ module dcachefsm end end // read hit valid cached - else if(MemRWM[1] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + else if(MemRWM[1] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit) begin DCacheStall = 1'b0; LRUWriteEn = 1'b1; @@ -244,7 +210,7 @@ module dcachefsm end end // write hit valid cached - else if (MemRWM[0] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit & ~DTLBMissM) begin + else if (MemRWM[0] & CacheableM & ~(ExceptionM | PendingInterruptM) & CacheHit) begin SelAdrM = 2'b10; DCacheStall = 1'b0; SRAMWordWriteEnableM = 1'b1; @@ -260,27 +226,27 @@ module dcachefsm end end // read or write miss valid cached - else if((|MemRWM) & CacheableM & ~(ExceptionM | PendingInterruptM) & ~CacheHit & ~DTLBMissM) begin + else if((|MemRWM) & CacheableM & ~(ExceptionM | PendingInterruptM) & ~CacheHit) begin NextState = STATE_MISS_FETCH_WDV; CntReset = 1'b1; DCacheStall = 1'b1; end // uncached write - else if(MemRWM[0] & ~CacheableM & ~(ExceptionM | PendingInterruptM) & ~DTLBMissM) begin + else if(MemRWM[0] & ~CacheableM & ~(ExceptionM | PendingInterruptM)) begin NextState = STATE_UNCACHED_WRITE; CntReset = 1'b1; DCacheStall = 1'b1; AHBWrite = 1'b1; end // uncached read - else if(MemRWM[1] & ~CacheableM & ~(ExceptionM | PendingInterruptM) & ~DTLBMissM) begin + else if(MemRWM[1] & ~CacheableM & ~(ExceptionM | PendingInterruptM)) begin NextState = STATE_UNCACHED_READ; CntReset = 1'b1; DCacheStall = 1'b1; AHBRead = 1'b1; end // fault - else if(AnyCPUReqM & (ExceptionM | PendingInterruptM) & ~DTLBMissM) begin + else if(AnyCPUReqM & (ExceptionM | PendingInterruptM)) begin NextState = STATE_READY; end else NextState = STATE_READY; @@ -394,144 +360,7 @@ module dcachefsm end end - STATE_PTW_READY: begin - // now all output connect to PTW instead of CPU. - CommittedM = 1'b1; - SelAdrM = 2'b00; - DCacheStall = 1'b0; - LRUWriteEn = 1'b0; - CntReset = 1'b0; - // In this branch we remove stall and go back to ready. There is no request for memory from the - // datapath or the walker had a fault. - // types 3b, 4a, 4b, and 7c. - if ((DTLBMissM & WalkerPageFaultM) | // 3b or 7c (can have either itlb miss or not) - (ITLBMissF & (WalkerInstrPageFaultF | ITLBWriteF) & ~AnyCPUReqM & ~DTLBMissM) | // 4a and 4b - (DTLBMissM & ITLBMissF & WalkerPageFaultM)) begin // 7c *** BUG redundant with first condiction. - NextState = STATE_READY; - DCacheStall = 1'b0; - end - // in this branch we go back to ready, but there is a memory operation from - // the datapath so we MUST stall and replay the operation. - // types 3a and 5a - else if ((DTLBMissM & DTLBWriteM) | // 3a - (ITLBMissF & ITLBWriteF & AnyCPUReqM)) begin // 5a - NextState = STATE_READY; - DCacheStall = 1'b1; - SelAdrM = 2'b01; - end - - // like 5a we want to stall and go to the ready state, but we also have to save - // the WalkerInstrPageFaultF so it is held until the end of the memory operation - // from the datapath. - // types 5b - else if (ITLBMissF & WalkerInstrPageFaultF & AnyCPUReqM) begin // 5b - NextState = STATE_PTW_FAULT_READY; - DCacheStall = 1'b1; - SelAdrM = 2'b01; - end - - // in this branch we stay in ptw_ready because we are doing an itlb walk - // after a dtlb walk. - // types 7a and 7b. - else if (DTLBMissM & DTLBWriteM & ITLBMissF)begin - NextState = STATE_PTW_READY; - DCacheStall = 1'b0; - - // read hit valid cached - end else if(MemRWM[1] & CacheableM & ~ExceptionM & CacheHit) begin - NextState = STATE_PTW_READY; - DCacheStall = 1'b0; - LRUWriteEn = 1'b1; - end - - // read miss valid cached - else if(SelPTW & MemRWM[1] & CacheableM & ~ExceptionM & ~CacheHit) begin - NextState = STATE_PTW_READ_MISS_FETCH_WDV; - CntReset = 1'b1; - DCacheStall = 1'b1; - end - - else begin - NextState = STATE_PTW_READY; - DCacheStall = 1'b0; - end - end - - STATE_PTW_READ_MISS_FETCH_WDV: begin - DCacheStall = 1'b1; - PreCntEn = 1'b1; - AHBRead = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - - if(FetchCountFlag & AHBAck) begin - NextState = STATE_PTW_READ_MISS_FETCH_DONE; - end else begin - NextState = STATE_PTW_READ_MISS_FETCH_WDV; - end - end - - STATE_PTW_READ_MISS_FETCH_DONE: begin - DCacheStall = 1'b1; - SelAdrM = 2'b10; - CntReset = 1'b1; - CommittedM = 1'b1; - CntReset = 1'b1; - if(VictimDirty) begin - NextState = STATE_PTW_READ_MISS_EVICT_DIRTY; - end else begin - NextState = STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK; - end - end - - STATE_PTW_READ_MISS_EVICT_DIRTY: begin - DCacheStall = 1'b1; - PreCntEn = 1'b1; - AHBWrite = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - SelEvict = 1'b1; - if(FetchCountFlag & AHBAck) begin - NextState = STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK; - end else begin - NextState = STATE_PTW_READ_MISS_EVICT_DIRTY; - end - end - - - STATE_PTW_READ_MISS_WRITE_CACHE_BLOCK: begin - SRAMBlockWriteEnableM = 1'b1; - DCacheStall = 1'b1; - NextState = STATE_PTW_READ_MISS_READ_WORD; - SelAdrM = 2'b10; - SetValid = 1'b1; - ClearDirty = 1'b1; - CommittedM = 1'b1; - //LRUWriteEn = 1'b1; - end - - STATE_PTW_READ_MISS_READ_WORD: begin - SelAdrM = 2'b10; - DCacheStall = 1'b1; - CommittedM = 1'b1; - NextState = STATE_PTW_READ_MISS_READ_WORD_DELAY; - end - - STATE_PTW_READ_MISS_READ_WORD_DELAY: begin - SelAdrM = 2'b10; - NextState = STATE_PTW_READY; - CommittedM = 1'b1; - end - - STATE_PTW_ACCESS_AFTER_WALK: begin - DCacheStall = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - LRUWriteEn = 1'b1; - NextState = STATE_READY; - end - STATE_CPU_BUSY: begin CommittedM = 1'b1; SelAdrM = 2'b00; @@ -608,250 +437,6 @@ module dcachefsm end end - - // itlb => instruction page fault states with memory request. - STATE_PTW_FAULT_READY: begin - DCacheStall = 1'b0; - LRUWriteEn = 1'b0; - SelAdrM = 2'b00; - MemAfterIWalkDone = 1'b0; - SetDirty = 1'b0; - LRUWriteEn = 1'b0; - CntReset = 1'b0; - AHBWrite = 1'b0; - AHBRead = 1'b0; - CommittedM = 1'b1; - NextState = STATE_READY; - - /// *** BUG BUG BUG missing AMO states. - - // read hit valid cached - if(MemRWM[1] & CacheableM & CacheHit & ~DTLBMissM) begin - DCacheStall = 1'b0; - LRUWriteEn = 1'b1; - - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - end - end - - // write hit valid cached - else if (MemRWM[0] & CacheableM & CacheHit & ~DTLBMissM) begin - SelAdrM = 2'b10; - DCacheStall = 1'b0; - SRAMWordWriteEnableM = 1'b1; - SetDirty = 1'b1; - LRUWriteEn = 1'b1; - - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - end - end - // read or write miss valid cached - else if((|MemRWM) & CacheableM & ~CacheHit & ~DTLBMissM) begin - NextState = STATE_PTW_FAULT_MISS_FETCH_WDV; - CntReset = 1'b1; - DCacheStall = 1'b1; - end - // uncached write - else if(MemRWM[0] & ~CacheableM & ~DTLBMissM) begin - NextState = STATE_PTW_FAULT_UNCACHED_WRITE; - CntReset = 1'b1; - DCacheStall = 1'b1; - AHBWrite = 1'b1; - end - // uncached read - else if(MemRWM[1] & ~CacheableM & ~DTLBMissM) begin - NextState = STATE_PTW_FAULT_UNCACHED_READ; - CntReset = 1'b1; - DCacheStall = 1'b1; - AHBRead = 1'b1; - MemAfterIWalkDone = 1'b0; - end - // fault - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - end - end - - STATE_PTW_FAULT_CPU_BUSY: begin - CommittedM = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - MemAfterIWalkDone = 1'b0; - SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - SelAdrM = 2'b00; - end - end - - STATE_PTW_FAULT_MISS_FETCH_WDV: begin - DCacheStall = 1'b1; - PreCntEn = 1'b1; - AHBRead = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - - if(FetchCountFlag & AHBAck) begin - NextState = STATE_PTW_FAULT_MISS_FETCH_DONE; - end else begin - NextState = STATE_PTW_FAULT_MISS_FETCH_WDV; - end - end - - STATE_PTW_FAULT_MISS_FETCH_DONE: begin - DCacheStall = 1'b1; - SelAdrM = 2'b10; - CntReset = 1'b1; - CommittedM = 1'b1; - if(VictimDirty) begin - NextState = STATE_PTW_FAULT_MISS_EVICT_DIRTY; - end else begin - NextState = STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK; - end - end - - STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK: begin - SRAMBlockWriteEnableM = 1'b1; - DCacheStall = 1'b1; - NextState = STATE_PTW_FAULT_MISS_READ_WORD; - SelAdrM = 2'b10; - SetValid = 1'b1; - ClearDirty = 1'b1; - CommittedM = 1'b1; - //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write - end - - STATE_PTW_FAULT_MISS_READ_WORD: begin - SelAdrM = 2'b10; - DCacheStall = 1'b1; - CommittedM = 1'b1; - if(MemRWM[1]) begin - NextState = STATE_PTW_FAULT_MISS_READ_WORD_DELAY; - // delay state is required as the read signal MemRWM[1] is still high when we - // return to the ready state because the cache is stalling the cpu. - end else begin - NextState = STATE_PTW_FAULT_MISS_WRITE_WORD; - end - end - - STATE_PTW_FAULT_MISS_READ_WORD_DELAY: begin - CommittedM = 1'b1; - LRUWriteEn = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - SelAdrM = 2'b10; - MemAfterIWalkDone = 1'b0; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - SelAdrM = 2'b00; - end - end - - STATE_PTW_FAULT_MISS_WRITE_WORD: begin - SRAMWordWriteEnableM = 1'b1; - SetDirty = 1'b1; - SelAdrM = 2'b10; - DCacheStall = 1'b1; - CommittedM = 1'b1; - LRUWriteEn = 1'b1; - NextState = STATE_PTW_FAULT_MISS_WRITE_WORD_DELAY; - end - - STATE_PTW_FAULT_MISS_WRITE_WORD_DELAY: begin - CommittedM = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - MemAfterIWalkDone = 1'b0; - SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - SelAdrM = 2'b00; - end - end - - STATE_PTW_FAULT_MISS_EVICT_DIRTY: begin - DCacheStall = 1'b1; - PreCntEn = 1'b1; - AHBWrite = 1'b1; - SelAdrM = 2'b10; - CommittedM = 1'b1; - SelEvict = 1'b1; - if(FetchCountFlag & AHBAck) begin - NextState = STATE_PTW_FAULT_MISS_WRITE_CACHE_BLOCK; - end else begin - NextState = STATE_PTW_FAULT_MISS_EVICT_DIRTY; - end - end - - - STATE_PTW_FAULT_UNCACHED_WRITE : begin - DCacheStall = 1'b1; - AHBWrite = 1'b1; - CommittedM = 1'b1; - if(AHBAck) begin - NextState = STATE_PTW_FAULT_UNCACHED_WRITE_DONE; - end else begin - NextState = STATE_PTW_FAULT_UNCACHED_WRITE; - end - end - - STATE_PTW_FAULT_UNCACHED_READ : begin - DCacheStall = 1'b1; - AHBRead = 1'b1; - CommittedM = 1'b1; - if(AHBAck) begin - NextState = STATE_PTW_FAULT_UNCACHED_READ_DONE; - end else begin - NextState = STATE_PTW_FAULT_UNCACHED_READ; - end - end - - STATE_PTW_FAULT_UNCACHED_WRITE_DONE: begin - CommittedM = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - MemAfterIWalkDone = 1'b0; - SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - SelAdrM = 2'b00; - end - end - - STATE_PTW_FAULT_UNCACHED_READ_DONE: begin - CommittedM = 1'b1; - SelUncached = 1'b1; - if(StallWtoDCache) begin - NextState = STATE_PTW_FAULT_CPU_BUSY; - SelAdrM = 2'b10; - end - else begin - MemAfterIWalkDone = 1'b1; - NextState = STATE_READY; - end - end - STATE_FLUSH: begin DCacheStall = 1'b1; CommittedM = 1'b1; diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 65772a710..38374cb32 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -107,8 +107,8 @@ module lsu logic [1:0] MemRWMtoLRSC; logic [2:0] Funct3MtoDCache; logic [1:0] AtomicMtoDCache; - logic [`PA_BITS-1:0] MemPAdrMtoDCache; - logic [11:0] MemAdrEtoDCache; + logic [`PA_BITS-1:0] MemPAdrNoTranslate; + logic [11:0] MemAdrE, MemAdrE_RENAME; logic StallWtoDCache; logic MemReadM; logic DataMisalignedMfromDCache; @@ -127,9 +127,6 @@ module lsu logic AnyCPUReqM; logic MemAfterIWalkDone; - assign AnyCPUReqM = (|MemRWM) | (|AtomicM); - - typedef enum {STATE_T0_READY, STATE_T0_REPLAY, STATE_T0_FAULT_REPLAY, @@ -142,8 +139,11 @@ module lsu logic InterlockStall; logic SelReplayCPURequest; logic WalkerInstrPageFaultRaw; + logic IgnoreRequest; - + + assign AnyCPUReqM = (|MemRWM) | (|AtomicM); + always_ff @(posedge clk) if (reset) CurrState <= #1 STATE_T0_READY; else CurrState <= #1 NextState; @@ -226,9 +226,10 @@ module lsu (CurrState == STATE_T5_ITLB_MISS & ~WalkerInstrPageFaultF) | (CurrState == STATE_T7_DITLB_MISS & ~WalkerPageFaultM); // When replaying CPU memory request after PTW select the IEUAdrM for correct address. - assign SelReplayCPURequest = NextState == STATE_T0_READY; + assign SelReplayCPURequest = NextState == STATE_T0_REPLAY; assign SelPTW = (CurrState == STATE_T3_DTLB_MISS) | (CurrState == STATE_T4_ITLB_MISS) | (CurrState == STATE_T5_ITLB_MISS) | (CurrState == STATE_T7_DITLB_MISS); + assign IgnoreRequest = CurrState == STATE_T0_READY & (ITLBMissF | DTLBMissM); @@ -284,8 +285,8 @@ module lsu .MemRWMtoLRSC(MemRWMtoLRSC), .Funct3MtoDCache(Funct3MtoDCache), .AtomicMtoDCache(AtomicMtoDCache), - .MemPAdrMtoDCache(MemPAdrMtoDCache), - .MemAdrEtoDCache(MemAdrEtoDCache), + .MemPAdrNoTranslate(MemPAdrNoTranslate), + .MemAdrE(MemAdrE), .StallWtoDCache(StallWtoDCache), .DataMisalignedMfromDCache(DataMisalignedMfromDCache), .CommittedMfromDCache(CommittedMfromDCache), @@ -295,7 +296,7 @@ module lsu mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, .PrivilegeModeW, .DisableTranslation(DisableTranslation), - .PAdr(MemPAdrMtoDCache), + .PAdr(MemPAdrNoTranslate), .VAdr(IEUAdrM), .Size(Funct3MtoDCache[1:0]), .PTE(PTE), @@ -334,9 +335,9 @@ module lsu always_comb case(Funct3MtoDCache[1:0]) 2'b00: DataMisalignedMfromDCache = 0; // lb, sb, lbu - 2'b01: DataMisalignedMfromDCache = MemPAdrMtoDCache[0]; // lh, sh, lhu - 2'b10: DataMisalignedMfromDCache = MemPAdrMtoDCache[1] | MemPAdrMtoDCache[0]; // lw, sw, flw, fsw, lwu - 2'b11: DataMisalignedMfromDCache = |MemPAdrMtoDCache[2:0]; // ld, sd, fld, fsd + 2'b01: DataMisalignedMfromDCache = MemPAdrNoTranslate[0]; // lh, sh, lhu + 2'b10: DataMisalignedMfromDCache = MemPAdrNoTranslate[1] | MemPAdrNoTranslate[0]; // lw, sw, flw, fsw, lwu + 2'b11: DataMisalignedMfromDCache = |MemPAdrNoTranslate[2:0]; // ld, sd, fld, fsd endcase // Determine if address is valid @@ -347,6 +348,8 @@ module lsu // 1. ram // controlled by `MEM_DTIM // 2. cache `MEM_DCACHE // 3. wire pass-through + assign MemAdrE_RENAME = SelReplayCPURequest ? IEUAdrM[11:0] : MemAdrE[11:0]; + dcache dcache(.clk(clk), .reset(reset), .StallWtoDCache(StallWtoDCache), @@ -355,9 +358,9 @@ module lsu .Funct7M(Funct7M), .FlushDCacheM, .AtomicM(AtomicMtoDCache), - .IEUAdrE(MemAdrEtoDCache), + .MemAdrE(MemAdrE_RENAME), .MemPAdrM(MemPAdrM), - .VAdr(IEUAdrM[11:0]), + .VAdr(IEUAdrM[11:0]), // this will be removed once the dcache hptw interlock is removed. .WriteDataM(WriteDataM), .ReadDataM(ReadDataM), .DCacheStall(DCacheStall), @@ -365,16 +368,10 @@ module lsu .DCacheMiss, .DCacheAccess, .ExceptionM(ExceptionM), + .IgnoreRequest, .PendingInterruptM(PendingInterruptMtoDCache), - .DTLBMissM(DTLBMissM), .CacheableM(CacheableMtoDCache), - .DTLBWriteM(DTLBWriteM), - .ITLBWriteF(ITLBWriteF), - .ITLBMissF, .MemAfterIWalkDone, - .SelPTW, - .WalkerPageFaultM(WalkerPageFaultM), - .WalkerInstrPageFaultF(WalkerInstrPageFaultF), // AHB connection .AHBPAdr(DCtoAHBPAdrM), diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv index 498f0682f..91c5f75f1 100644 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ b/wally-pipelined/src/lsu/lsuArb.sv @@ -52,8 +52,8 @@ module lsuArb output logic [1:0] MemRWMtoLRSC, output logic [2:0] Funct3MtoDCache, output logic [1:0] AtomicMtoDCache, - output logic [`PA_BITS-1:0] MemPAdrMtoDCache, - output logic [11:0] MemAdrEtoDCache, + output logic [`PA_BITS-1:0] MemPAdrNoTranslate, // THis name is very bad. need a better name. This is the raw address from either the ieu or the hptw. + output logic [11:0] MemAdrE, output logic StallWtoDCache, output logic PendingInterruptMtoDCache, @@ -83,8 +83,8 @@ module lsuArb assign AtomicMtoDCache = SelPTW ? 2'b00 : AtomicM; assign IEUAdrMExt = {2'b00, IEUAdrM}; - assign MemPAdrMtoDCache = SelPTW ? TranslationPAdrM : IEUAdrMExt[`PA_BITS-1:0]; - assign MemAdrEtoDCache = SelPTW ? TranslationPAdrE[11:0] : IEUAdrE[11:0]; + assign MemPAdrNoTranslate = SelPTW ? TranslationPAdrM : IEUAdrMExt[`PA_BITS-1:0]; + assign MemAdrE = SelPTW ? TranslationPAdrE[11:0] : IEUAdrE[11:0]; assign StallWtoDCache = SelPTW ? 1'b0 : StallW; // always block interrupts when using the hardware page table walker. assign CommittedM = SelPTW ? 1'b1 : CommittedMfromDCache; From 13f0e9bafa48defc32c7bff19cdc6d2b5a29d310 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 17:01:13 -0600 Subject: [PATCH 24/38] Fixed bug where icache did not replay PCF on itlb miss. --- wally-pipelined/src/cache/icachefsm.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/wally-pipelined/src/cache/icachefsm.sv b/wally-pipelined/src/cache/icachefsm.sv index 96bea67c9..dd4e2e4ad 100644 --- a/wally-pipelined/src/cache/icachefsm.sv +++ b/wally-pipelined/src/cache/icachefsm.sv @@ -144,6 +144,7 @@ module icachefsm -----/\----- EXCLUDED -----/\----- */ if(ITLBMissF) begin NextState = STATE_READY; + SelAdr = 2'b01; ICacheStallF = 1'b0; end else if (hit & ~spill) begin From 54fd8678b0e48e4db1708c11a0910570e9581c8e Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 17:53:13 -0600 Subject: [PATCH 25/38] Created hack to get around imperas64mmu unknown (value = x) bug. --- wally-pipelined/src/lsu/lsu.sv | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 38374cb32..669b5ee51 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -221,9 +221,12 @@ module lsu end // always_comb // signal to CPU it needs to wait on HPTW. - assign InterlockStall = (CurrState == STATE_T0_READY & (DTLBMissM | ITLBMissF)) | + assign InterlockStall_BUG = (CurrState == STATE_T0_READY & (DTLBMissM | ITLBMissF)) | (CurrState == STATE_T3_DTLB_MISS & ~WalkerPageFaultM) | (CurrState == STATE_T4_ITLB_MISS & ~WalkerInstrPageFaultF) | (CurrState == STATE_T5_ITLB_MISS & ~WalkerInstrPageFaultF) | (CurrState == STATE_T7_DITLB_MISS & ~WalkerPageFaultM); + + assign InterlockStall = InterlockStall_BUG === 1'bx ? 1'b0 : InterlockStall_BUG; + // When replaying CPU memory request after PTW select the IEUAdrM for correct address. assign SelReplayCPURequest = NextState == STATE_T0_REPLAY; From 814bcec7b7589d1163dad193d869ede68ce0750b Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 17:57:12 -0600 Subject: [PATCH 26/38] Implemented what I think is the last required change for the lsu state machine. --- wally-pipelined/src/lsu/lsu.sv | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 669b5ee51..cff0a417e 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -222,8 +222,8 @@ module lsu // signal to CPU it needs to wait on HPTW. assign InterlockStall_BUG = (CurrState == STATE_T0_READY & (DTLBMissM | ITLBMissF)) | - (CurrState == STATE_T3_DTLB_MISS & ~WalkerPageFaultM) | (CurrState == STATE_T4_ITLB_MISS & ~WalkerInstrPageFaultF) | - (CurrState == STATE_T5_ITLB_MISS & ~WalkerInstrPageFaultF) | (CurrState == STATE_T7_DITLB_MISS & ~WalkerPageFaultM); + (CurrState == STATE_T3_DTLB_MISS & ~WalkerPageFaultM) | (CurrState == STATE_T4_ITLB_MISS & ~WalkerInstrPageFaultRaw) | + (CurrState == STATE_T5_ITLB_MISS & ~WalkerInstrPageFaultRaw) | (CurrState == STATE_T7_DITLB_MISS & ~WalkerPageFaultM); assign InterlockStall = InterlockStall_BUG === 1'bx ? 1'b0 : InterlockStall_BUG; @@ -234,6 +234,7 @@ module lsu (CurrState == STATE_T5_ITLB_MISS) | (CurrState == STATE_T7_DITLB_MISS); assign IgnoreRequest = CurrState == STATE_T0_READY & (ITLBMissF | DTLBMissM); + assign WalkerInstrPageFaultF = WalkerInstrPageFaultRaw | CurrState == STATE_T0_FAULT_REPLAY; flopenrc #(`XLEN) AddressMReg(clk, reset, FlushM, ~StallM, IEUAdrE, IEUAdrM); @@ -259,7 +260,7 @@ module lsu .HPTWStall, .AnyCPUReqM, .MemAfterIWalkDone, - .WalkerInstrPageFaultF(WalkerInstrPageFaultF), + .WalkerInstrPageFaultF(WalkerInstrPageFaultRaw), .WalkerLoadPageFaultM(WalkerLoadPageFaultM), .WalkerStorePageFaultM(WalkerStorePageFaultM)); From cef4b6399d26da62c03fd5ce73f6d7e9bfc20e5e Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 18:16:08 -0600 Subject: [PATCH 27/38] Switched to using an always block for lsu stall logic. This avoids the problematic x propagation. --- wally-pipelined/src/lsu/lsu.sv | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index cff0a417e..2fbd9f67f 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -221,11 +221,26 @@ module lsu end // always_comb // signal to CPU it needs to wait on HPTW. - assign InterlockStall_BUG = (CurrState == STATE_T0_READY & (DTLBMissM | ITLBMissF)) | +/* -----\/----- EXCLUDED -----\/----- + // this code has a problem with imperas64mmu as it reads in an invalid uninitalized instruction. InterlockStall becomes x and it propagates + // everywhere. The case statement below implements the same logic but any x on the inputs will resolve to 0. + assign InterlockStall = (CurrState == STATE_T0_READY & (DTLBMissM | ITLBMissF)) | (CurrState == STATE_T3_DTLB_MISS & ~WalkerPageFaultM) | (CurrState == STATE_T4_ITLB_MISS & ~WalkerInstrPageFaultRaw) | (CurrState == STATE_T5_ITLB_MISS & ~WalkerInstrPageFaultRaw) | (CurrState == STATE_T7_DITLB_MISS & ~WalkerPageFaultM); - assign InterlockStall = InterlockStall_BUG === 1'bx ? 1'b0 : InterlockStall_BUG; + -----/\----- EXCLUDED -----/\----- */ + + always_comb begin + InterlockStall = 1'b0; + case(CurrState) + STATE_T0_READY: if(DTLBMissM | ITLBMissF) InterlockStall = 1'b1; + STATE_T3_DTLB_MISS: if (~WalkerPageFaultM) InterlockStall = 1'b1; + STATE_T4_ITLB_MISS: if (~WalkerInstrPageFaultRaw) InterlockStall = 1'b1; + STATE_T5_ITLB_MISS: if (~WalkerInstrPageFaultRaw) InterlockStall = 1'b1; + STATE_T7_DITLB_MISS: if (~WalkerPageFaultM) InterlockStall = 1'b1; + default: InterlockStall = 1'b0; + endcase + end // When replaying CPU memory request after PTW select the IEUAdrM for correct address. From 7b2f5440a557b4effe16202a7a464e997554b290 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 18:24:40 -0600 Subject: [PATCH 28/38] Changes to buildroot to support MemAdrM to IEUAdrM name changes. --- wally-pipelined/testbench/testbench-linux.sv | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index a64ca4340..0a329d954 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -93,14 +93,14 @@ module testbench(); logic [`XLEN-1:0] PCW; logic [31:0] InstrW; logic InstrValidW; - logic [`XLEN-1:0] MemAdrW, WriteDataW; + logic [`XLEN-1:0] IEUAdrW, WriteDataW; logic TrapW; `define FLUSHW dut.hart.FlushW `define STALLW dut.hart.StallW flopenrc #(`XLEN) PCWReg(clk, reset, `FLUSHW, ~`STALLW, dut.hart.ifu.PCM, PCW); flopenr #(32) InstrWReg(clk, reset, ~`STALLW, `FLUSHW ? nop : dut.hart.ifu.InstrM, InstrW); flopenrc #(1) controlregW(clk, reset, `FLUSHW, ~`STALLW, dut.hart.ieu.c.InstrValidM, InstrValidW); - flopenrc #(`XLEN) MemAdrWReg(clk, reset, `FLUSHW, ~`STALLW, dut.hart.MemAdrM, MemAdrW); + flopenrc #(`XLEN) IEUAdrWReg(clk, reset, `FLUSHW, ~`STALLW, dut.hart.IEUAdrM, IEUAdrW); flopenrc #(`XLEN) WriteDataWReg(clk, reset, `FLUSHW, ~`STALLW, dut.hart.WriteDataM, WriteDataW); flopenr #(1) TrapWReg(clk, reset, ~`STALLW, dut.hart.hzu.TrapM, TrapW); @@ -134,7 +134,7 @@ module testbench(); string RegWrite``STAGE; \ integer ExpectedRegAdr``STAGE; \ logic [`XLEN-1:0] ExpectedRegValue``STAGE; \ - logic [`XLEN-1:0] ExpectedMemAdr``STAGE, ExpectedMemReadData``STAGE, ExpectedMemWriteData``STAGE; \ + logic [`XLEN-1:0] ExpectedIEUAdr``STAGE, ExpectedMemReadData``STAGE, ExpectedMemWriteData``STAGE; \ string ExpectedCSRArray``STAGE[10:0]; \ logic [`XLEN-1:0] ExpectedCSRArrayValue``STAGE[10:0]; `DECLARE_TRACE_SCANNER_SIGNALS(E) @@ -155,7 +155,7 @@ module testbench(); integer ExpectedRegAdrW; logic [`XLEN-1:0] ExpectedRegValueW; string MemOpW; - logic [`XLEN-1:0] ExpectedMemAdrW, ExpectedMemReadDataW, ExpectedMemWriteDataW; + logic [`XLEN-1:0] ExpectedIEUAdrW, ExpectedMemReadDataW, ExpectedMemWriteDataW; integer NumCSRW; string ExpectedCSRArrayW[10:0]; logic [`XLEN-1:0] ExpectedCSRArrayValueW[10:0]; @@ -411,7 +411,7 @@ module testbench(); // parse memory address, read data, and/or write data \ end else if(ExpectedTokens``STAGE[MarkerIndex``STAGE].substr(0, 2) == "Mem") begin \ MemOp``STAGE = ExpectedTokens``STAGE[MarkerIndex``STAGE]; \ - matchCount``STAGE = $sscanf(ExpectedTokens``STAGE[MarkerIndex``STAGE+1], "%x", ExpectedMemAdr``STAGE); \ + matchCount``STAGE = $sscanf(ExpectedTokens``STAGE[MarkerIndex``STAGE+1], "%x", ExpectedIEUAdr``STAGE); \ matchCount``STAGE = $sscanf(ExpectedTokens``STAGE[MarkerIndex``STAGE+2], "%x", ExpectedMemWriteData``STAGE); \ matchCount``STAGE = $sscanf(ExpectedTokens``STAGE[MarkerIndex``STAGE+3], "%x", ExpectedMemReadData``STAGE); \ MarkerIndex``STAGE += 4; \ @@ -509,7 +509,7 @@ module testbench(); RegWriteW <= ""; ExpectedRegAdrW <= '0; ExpectedRegValueW <= '0; - ExpectedMemAdrW <= '0; + ExpectedIEUAdrW <= '0; MemOpW <= ""; ExpectedMemWriteDataW <= '0; ExpectedMemReadDataW <= '0; @@ -522,7 +522,7 @@ module testbench(); RegWriteW <= ""; ExpectedRegAdrW <= '0; ExpectedRegValueW <= '0; - ExpectedMemAdrW <= '0; + ExpectedIEUAdrW <= '0; MemOpW <= ""; ExpectedMemWriteDataW <= '0; ExpectedMemReadDataW <= '0; @@ -534,7 +534,7 @@ module testbench(); RegWriteW <= RegWriteM; ExpectedRegAdrW <= ExpectedRegAdrM; ExpectedRegValueW <= ExpectedRegValueM; - ExpectedMemAdrW <= ExpectedMemAdrM; + ExpectedIEUAdrW <= ExpectedIEUAdrM; MemOpW <= MemOpM; ExpectedMemWriteDataW <= ExpectedMemWriteDataM; ExpectedMemReadDataW <= ExpectedMemReadDataM; @@ -551,7 +551,7 @@ module testbench(); //$display("%tns, %d instrs: Releasing force of MTIME_CLINT.", $time, InstrCountW); release dut.uncore.clint.clint.MTIME; end - //if (ExpectedMemAdrM == 'h10000005) begin + //if (ExpectedIEUAdrM == 'h10000005) begin //$display("%tns, %d instrs: releasing force of ReadDataM.", $time, InstrCountW); //release dut.hart.ieu.dp.ReadDataM; //end @@ -588,8 +588,8 @@ module testbench(); `checkEQ(name, dut.hart.ieu.dp.regf.rf[ExpectedRegAdrW], ExpectedRegValueW) end if (MemOpW.substr(0,2) == "Mem") begin - if(`DEBUG_TRACE >= 4) $display("\tMemAdrW: %016x ? expected: %016x", MemAdrW, ExpectedMemAdrW); - `checkEQ("MemAdrW",MemAdrW,ExpectedMemAdrW) + if(`DEBUG_TRACE >= 4) $display("\tIEUAdrW: %016x ? expected: %016x", IEUAdrW, ExpectedIEUAdrW); + `checkEQ("IEUAdrW",IEUAdrW,ExpectedIEUAdrW) if(MemOpW == "MemR" || MemOpW == "MemRW") begin if(`DEBUG_TRACE >= 4) $display("\tReadDataW: %016x ? expected: %016x", dut.hart.ieu.dp.ReadDataW, ExpectedMemReadDataW); `checkEQ("ReadDataW",dut.hart.ieu.dp.ReadDataW,ExpectedMemReadDataW) From 1196e5c1913570fd0b8a3ebe26bc3681d505086d Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 19 Dec 2021 16:53:41 -0800 Subject: [PATCH 29/38] Moved generate statements for optional units into wallypipelinedhart --- wally-pipelined/src/fpu/fpu.sv | 474 +++++++++--------- wally-pipelined/src/muldiv/muldiv.sv | 93 ++-- wally-pipelined/src/privileged/csr.sv | 108 ++-- wally-pipelined/src/privileged/privileged.sv | 2 - .../src/wally/wallypipelinedhart.sv | 156 +++--- .../sdc/tb => testbench/sdc}/ram2sdLoad.py | 0 .../sdc/tb => testbench/sdc}/ramdisk2.hex | 0 .../{src/sdc/tb => testbench/sdc}/run_tb.do | 0 .../sdc/tb => testbench/sdc}/sd_top_tb.sv | 0 .../{src/sdc/tb => testbench/sdc}/wave.do | 0 wally-pipelined/testbench/testbench-linux.sv | 54 +- wally-pipelined/testbench/testbench.sv | 4 +- 12 files changed, 424 insertions(+), 467 deletions(-) rename wally-pipelined/{src/sdc/tb => testbench/sdc}/ram2sdLoad.py (100%) rename wally-pipelined/{src/sdc/tb => testbench/sdc}/ramdisk2.hex (100%) rename wally-pipelined/{src/sdc/tb => testbench/sdc}/run_tb.do (100%) rename wally-pipelined/{src/sdc/tb => testbench/sdc}/sd_top_tb.sv (100%) rename wally-pipelined/{src/sdc/tb => testbench/sdc}/wave.do (100%) diff --git a/wally-pipelined/src/fpu/fpu.sv b/wally-pipelined/src/fpu/fpu.sv index 0ba61c6c6..7b9680ed3 100755 --- a/wally-pipelined/src/fpu/fpu.sv +++ b/wally-pipelined/src/fpu/fpu.sv @@ -44,258 +44,244 @@ module fpu ( output logic [4:0] SetFflagsM // FPU flags (to privileged unit) ); - //*** make everything FLEN at some point - //*** add the 128 bit support to the if statement when needed - //*** make new tests for fp using testfloat that include flag checking and all rounding modes - //*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec - //*** only fma/mul and fp <-> int convert flags have been tested. test the others. + //*** make everything FLEN at some point + //*** add the 128 bit support to the if statement when needed + //*** make new tests for fp using testfloat that include flag checking and all rounding modes + //*** what is the format for 16-bit - finding conflicting info online can't find anything specified in spec + //*** only fma/mul and fp <-> int convert flags have been tested. test the others. - // FPU specifics: - // - uses NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - // single stored in a double: | 32 1s | single precision value | - // - sets the underflow after rounding + // FPU specifics: + // - uses NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + // single stored in a double: | 32 1s | single precision value | + // - sets the underflow after rounding - generate if (`F_SUPPORTED | `D_SUPPORTED) begin : fpu + // control signals + logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable + logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode + logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double + logic FDivStartD, FDivStartE; // Start division or squareroot + logic FWriteIntD; // Write to integer register + logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals + logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register + logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register + logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component + logic [2:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage + logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister + logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input - // control signals - logic FRegWriteD, FRegWriteE, FRegWriteW; // FP register write enable - logic [2:0] FrmD, FrmE, FrmM; // FP rounding mode - logic FmtD, FmtE, FmtM, FmtW; // FP precision 0-single 1-double - logic FDivStartD, FDivStartE; // Start division or squareroot - logic FWriteIntD; // Write to integer register - logic [1:0] FForwardXE, FForwardYE, FForwardZE; // forwarding mux control signals - logic [1:0] FResultSelD, FResultSelE; // Select the result written to FP register - logic [1:0] FResultSelM, FResultSelW; // Select the result written to FP register - logic [2:0] FOpCtrlD, FOpCtrlE; // Select which opperation to do in each component - logic [2:0] FResSelD, FResSelE; // Select one of the results that finish in the memory stage - logic [1:0] FIntResSelD, FIntResSelE; // Select the result written to the integer resister - logic [4:0] Adr1E, Adr2E, Adr3E; // adresses of each input - - // regfile signals - logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [63:0] FSrcXE; // Input 1 to the various units (after forwarding) - logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) - logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) - - // unpacking signals - logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage - logic XSgnM, YSgnM; // input's sign - memory stage - logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage - logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage - logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage - logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage - logic [10:0] BiasE; // bias based on precision (single=7f double=3ff) - logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage - logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage - logic XNaNQ, YNaNQ; // is the input a NaN - divide - logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage - logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XDenormE, YDenormE, ZDenormE; // is the input denormalized - logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage - logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage - logic XZeroQ, YZeroQ; // is the input zero - divide - logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage - logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage - logic XInfQ, YInfQ; // is the input infinity - divide - logic XExpMaxE; // is the exponent all ones (max value) - logic XNormE; // is normal - logic FmtQ; - logic FOpCtrlQ; - - // result and flag signals - logic [63:0] FDivResM, FDivResW; // divide/squareroot result - logic [4:0] FDivFlgM; // divide/squareroot flags - logic [63:0] FMAResM, FMAResW; // FMA/multiply result - logic [4:0] FMAFlgM; // FMA/multiply result - logic [63:0] ReadResW; // read result (load instruction) - logic [63:0] CvtFpResE; // add/FP -> FP convert result - logic [4:0] CvtFpFlgE; // add/FP -> FP convert flags - logic [63:0] CvtResE; // FP <-> int convert result - logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this - logic [63:0] ClassResE; // classify result - logic [63:0] CmpResE; // compare result - logic CmpNVE; // compare invalid flag (Not Valid) - logic [63:0] SgnResE; // sign injection result - logic SgnNVE; // sign injection invalid flag (Not Valid) - logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage - logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage - logic [`XLEN-1:0] FIntResE; - logic [63:0] FPUResultW; // final FP result being written to the FP register - // other signals - logic FDivSqrtDoneE; // is divide done - logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit - logic load_preload; // enable for FF on fpdivsqrt - logic [63:0] AlignedSrcAE; // align SrcA to the floating point format + // regfile signals + logic [63:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [63:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [63:0] FSrcXE; // Input 1 to the various units (after forwarding) + logic [63:0] FPreSrcYE, FSrcYE; // Input 2 to the various units (after forwarding) + logic [63:0] FPreSrcZE, FSrcZE; // Input 3 to the various units (after forwarding) - // DECODE STAGE - - // calculate FP control signals - fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, - .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, - .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - - // FP register file - fregfile fregfile (.clk, .reset, .we4(FRegWriteW), - .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), - .a4(RdW), .wd4(FPUResultW), - .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); + // unpacking signals + logic XSgnE, YSgnE, ZSgnE; // input's sign - execute stage + logic XSgnM, YSgnM; // input's sign - memory stage + logic [10:0] XExpE, YExpE, ZExpE; // input's exponent - execute stage + logic [10:0] XExpM, YExpM, ZExpM; // input's exponent - memory stage + logic [52:0] XManE, YManE, ZManE; // input's fraction - execute stage + logic [52:0] XManM, YManM, ZManM; // input's fraction - memory stage + logic [10:0] BiasE; // bias based on precision (single=7f double=3ff) + logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage + logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage + logic XNaNQ, YNaNQ; // is the input a NaN - divide + logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage + logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage + logic XDenormE, YDenormE, ZDenormE; // is the input denormalized + logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage + logic XZeroM, YZeroM, ZZeroM; // is the input zero - memory stage + logic XZeroQ, YZeroQ; // is the input zero - divide + logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage + logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage + logic XInfQ, YInfQ; // is the input infinity - divide + logic XExpMaxE; // is the exponent all ones (max value) + logic XNormE; // is normal + logic FmtQ; + logic FOpCtrlQ; - // D/E pipeline registers - flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); - flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); - flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); - flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, - {Adr1E, Adr2E, Adr3E}); - flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, - {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, - {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); + // result and flag signals + logic [63:0] FDivResM, FDivResW; // divide/squareroot result + logic [4:0] FDivFlgM; // divide/squareroot flags + logic [63:0] FMAResM, FMAResW; // FMA/multiply result + logic [4:0] FMAFlgM; // FMA/multiply result + logic [63:0] ReadResW; // read result (load instruction) + logic [63:0] CvtFpResE; // add/FP -> FP convert result + logic [4:0] CvtFpFlgE; // add/FP -> FP convert flags + logic [63:0] CvtResE; // FP <-> int convert result + logic [4:0] CvtFlgE; // FP <-> int convert flags //*** trim this + logic [63:0] ClassResE; // classify result + logic [63:0] CmpResE; // compare result + logic CmpNVE; // compare invalid flag (Not Valid) + logic [63:0] SgnResE; // sign injection result + logic SgnNVE; // sign injection invalid flag (Not Valid) + logic [63:0] FResE, FResM, FResW; // selected result that is ready in the memory stage + logic [4:0] FFlgE, FFlgM; // selected flag that is ready in the memory stage + logic [`XLEN-1:0] FIntResE; + logic [63:0] FPUResultW; // final FP result being written to the FP register + // other signals + logic FDivSqrtDoneE; // is divide done + logic [63:0] DivInput1E, DivInput2E; // inputs to divide/squareroot unit + logic load_preload; // enable for FF on fpdivsqrt + logic [63:0] AlignedSrcAE; // align SrcA to the floating point format - // EXECUTION STAGE - // Hazard unit for FPU - // - determines if any forwarding or stalls are needed - fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, - .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); - - // forwarding muxs - mux3 #(64) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); - mux3 #(64) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE); - mux3 #(64) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); - mux3 #(64) fyaddmux (FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, - {2'b0, {10{1'b1}}, 52'b0}, - {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01)}, - FSrcYE); // Force Z to be 0 for multiply instructions - // Force Z to be 0 for multiply instructions - mux3 #(64) fzmulmux (FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); - - // unpacking unit - // - splits FP inputs into their various parts - // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) - unpacking unpacking (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, - .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); - - // FMA - // - two stage FMA - // - execute stage - multiplication and addend shifting - // - memory stage - addition and rounding - // - handles FMA and multiply instructions - fma fma (.clk, .reset, .FlushM, .StallM, - .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, - .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, - .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, - .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, - .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, - .FOpCtrlE, - .FmtE, .FmtM, .FrmM, - .FMAFlgM, .FMAResM); - - // fpdivsqrt using Goldschmidt's iteration - flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE, FOpCtrlE[0]}), - .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), - .clear(FDivSqrtDoneE), .en(load_preload), - .reset(reset), .clk(clk)); - fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), - .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), - .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, - .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + // DECODE STAGE - // convert from signle to double and vice versa - cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); - - // compare unit - // - computation is done in one stage - // - writes to FP file durring min/max instructions - // - other comparisons write a 1 or 0 to the integer register - fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), - .FSrcXE, .FSrcYE, .FOpCtrlE, - .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, - .Invalid(CmpNVE), .CmpResE); - - // sign injection unit - fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE, - .SgnNVE, .SgnResE); - - // classify - fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, - .XSNaNE, .ClassResE); + // calculate FP control signals + fctrl fctrl (.Funct7D(InstrD[31:25]), .OpD(InstrD[6:0]), .Rs2D(InstrD[24:20]), .Funct3D(InstrD[14:12]), .FRM_REGW, + .IllegalFPUInstrD, .FRegWriteD, .FDivStartD, .FResultSelD, .FOpCtrlD, .FResSelD, + .FIntResSelD, .FmtD, .FrmD, .FWriteIntD); - // Convert - fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE, - .CvtResE, .CvtFlgE); - - // data to be stored in memory - to IEU - // - FP uses NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - assign FWriteDataE = FSrcYE[`XLEN-1:0]; - - // Align SrcA to MSB when single precicion - mux2 #(64) SrcAMux({{32{1'b1}}, ForwardedSrcAE[31:0]}, {{64-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); - - // select a result that may be written to the FP register - mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE); - mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE); - - // select the result that may be written to the integer register - to IEU - mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], - CvtResE[`XLEN-1:0], FIntResSelE, FIntResE); - - // E/M pipe registers + // FP register file + fregfile fregfile (.clk, .reset, .we4(FRegWriteW), + .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), + .a4(RdW), .wd4(FPUResultW), + .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); - // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); - flopenrc #(65) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); - flopenrc #(65) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM}); - flopenrc #(64) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); - flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM, - {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, - {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); - flopenrc #(64) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM); - flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM); - flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); - flopenrc #(7) EMCtrlReg (clk, reset, FlushM, ~StallM, - {FRegWriteE, FResultSelE, FrmE, FmtE}, - {FRegWriteM, FResultSelM, FrmM, FmtM}); - - // BEGIN MEMORY STAGE - - // FPU flag selection - to privileged - mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM); - - // M/W pipe registers - flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); - flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); - flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); - flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW, - {FRegWriteM, FResultSelM, FmtM}, - {FRegWriteW, FResultSelW, FmtW}); - - // BEGIN WRITEBACK STAGE - - // put ReadData into NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - // - for load instruction - mux2 #(64) ReadResMux ({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); - - // select the result to be written to the FP register - mux4 #(64) FPUResultMux (ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW); + // D/E pipeline registers + flopenrc #(64) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); + flopenrc #(64) DEReg2(clk, reset, FlushE, ~StallE, FRD2D, FRD2E); + flopenrc #(64) DEReg3(clk, reset, FlushE, ~StallE, FRD3D, FRD3E); + flopenrc #(15) DEAdrReg(clk, reset, FlushE, ~StallE, {InstrD[19:15], InstrD[24:20], InstrD[31:27]}, + {Adr1E, Adr2E, Adr3E}); + flopenrc #(17) DECtrlReg3(clk, reset, FlushE, ~StallE, + {FRegWriteD, FResultSelD, FResSelD, FIntResSelD, FrmD, FmtD, FOpCtrlD, FWriteIntD, FDivStartD}, + {FRegWriteE, FResultSelE, FResSelE, FIntResSelE, FrmE, FmtE, FOpCtrlE, FWriteIntE, FDivStartE}); - end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low - assign FStallD = 0; - assign FWriteIntE = 0; - assign FWriteDataE = 0; - assign FIntResM = 0; - assign FDivBusyE = 0; - assign IllegalFPUInstrD = 1; - assign SetFflagsM = 0; - end - endgenerate - + // EXECUTION STAGE + // Hazard unit for FPU + // - determines if any forwarding or stalls are needed + fhazard fhazard(.Adr1E, .Adr2E, .Adr3E, .FRegWriteM, .FRegWriteW, .RdM, .RdW, .FResultSelM, + .FStallD, .FForwardXE, .FForwardYE, .FForwardZE); + + // forwarding muxs + mux3 #(64) fxemux (FRD1E, FPUResultW, FResM, FForwardXE, FSrcXE); + mux3 #(64) fyemux (FRD2E, FPUResultW, FResM, FForwardYE, FPreSrcYE); + mux3 #(64) fzemux (FRD3E, FPUResultW, FResM, FForwardZE, FPreSrcZE); + mux3 #(64) fyaddmux (FPreSrcYE, {{32{1'b1}}, 2'b0, {7{1'b1}}, 23'b0}, + {2'b0, {10{1'b1}}, 52'b0}, + {FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01), ~FmtE&FOpCtrlE[2]&FOpCtrlE[1]&(FResultSelE==2'b01)}, + FSrcYE); // Force Z to be 0 for multiply instructions + // Force Z to be 0 for multiply instructions + mux3 #(64) fzmulmux (FPreSrcZE, 64'b0, FPreSrcYE, {FOpCtrlE[2]&FOpCtrlE[1], FOpCtrlE[2]&~FOpCtrlE[1]}, FSrcZE); + + // unpacking unit + // - splits FP inputs into their various parts + // - does some classifications (SNaN, NaN, Denorm, Norm, Zero, Infifnity) + unpacking unpacking (.X(FSrcXE), .Y(FSrcYE), .Z(FSrcZE), .FOpCtrlE, .FmtE, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, + .XNaNE, .YNaNE, .ZNaNE, .XSNaNE, .YSNaNE, .ZSNaNE, .XDenormE, .YDenormE, .ZDenormE, + .XZeroE, .YZeroE, .ZZeroE, .BiasE, .XInfE, .YInfE, .ZInfE, .XExpMaxE, .XNormE); + + // FMA + // - two stage FMA + // - execute stage - multiplication and addend shifting + // - memory stage - addition and rounding + // - handles FMA and multiply instructions + fma fma (.clk, .reset, .FlushM, .StallM, + .XSgnE, .YSgnE, .ZSgnE, .XExpE, .YExpE, .ZExpE, .XManE, .YManE, .ZManE, + .XDenormE, .YDenormE, .ZDenormE, .XZeroE, .YZeroE, .ZZeroE, + .XSgnM, .YSgnM, .XExpM, .YExpM, .ZExpM, .XManM, .YManM, .ZManM, + .XNaNM, .YNaNM, .ZNaNM, .XZeroM, .YZeroM, .ZZeroM, + .XInfM, .YInfM, .ZInfM, .XSNaNM, .YSNaNM, .ZSNaNM, + .FOpCtrlE, + .FmtE, .FmtM, .FrmM, + .FMAFlgM, .FMAResM); + + // fpdivsqrt using Goldschmidt's iteration + flopenrc #(64) reg_input1 (.d({XSgnE, XExpE, XManE[51:0]}), .q(DivInput1E), + .clear(FDivSqrtDoneE), .en(load_preload), + .reset(reset), .clk(clk)); + flopenrc #(64) reg_input2 (.d({YSgnE, YExpE, YManE[51:0]}), .q(DivInput2E), + .clear(FDivSqrtDoneE), .en(load_preload), + .reset(reset), .clk(clk)); + flopenrc #(8) reg_input3 (.d({XNaNE, YNaNE, XInfE, YInfE, XZeroE, YZeroE, FmtE, FOpCtrlE[0]}), + .q({XNaNQ, YNaNQ, XInfQ, YInfQ, XZeroQ, YZeroQ, FmtQ, FOpCtrlQ}), + .clear(FDivSqrtDoneE), .en(load_preload), + .reset(reset), .clk(clk)); + fpdiv_pipe fdivsqrt (.op1(DivInput1E), .op2(DivInput2E), .rm(FrmE[1:0]), .op_type(FOpCtrlQ), + .reset, .clk(clk), .start(FDivStartE), .P(~FmtQ), .OvEn(1'b1), .UnEn(1'b1), + .XNaNQ, .YNaNQ, .XInfQ, .YInfQ, .XZeroQ, .YZeroQ, .load_preload, + .FDivBusyE, .done(FDivSqrtDoneE), .AS_Result(FDivResM), .Flags(FDivFlgM)); + + // convert from signle to double and vice versa + cvtfp cvtfp (.XExpE, .XManE, .XSgnE, .XZeroE, .XDenormE, .XInfE, .XNaNE, .XSNaNE, .FrmE, .FmtE, .CvtFpResE, .CvtFpFlgE); + + // compare unit + // - computation is done in one stage + // - writes to FP file durring min/max instructions + // - other comparisons write a 1 or 0 to the integer register + fcmp fcmp (.op1({XSgnE,XExpE,XManE[`NF-1:0]}), .op2({YSgnE,YExpE,YManE[`NF-1:0]}), + .FSrcXE, .FSrcYE, .FOpCtrlE, + .FmtE, .XNaNE, .YNaNE, .XZeroE, .YZeroE, + .Invalid(CmpNVE), .CmpResE); + + // sign injection unit + fsgn fsgn (.SgnOpCodeE(FOpCtrlE[1:0]), .XSgnE, .YSgnE, .FSrcXE, .FmtE, .XExpMaxE, + .SgnNVE, .SgnResE); + + // classify + fclassify fclassify (.XSgnE, .XDenormE, .XZeroE, .XNaNE, .XInfE, .XNormE, + .XSNaNE, .ClassResE); + + // Convert + fcvt fcvt (.XSgnE, .XExpE, .XManE, .XZeroE, .XNaNE, .XInfE, .XDenormE, .BiasE, .ForwardedSrcAE, .FOpCtrlE, .FmtE, .FrmE, + .CvtResE, .CvtFlgE); + + // data to be stored in memory - to IEU + // - FP uses NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + assign FWriteDataE = FSrcYE[`XLEN-1:0]; + + // Align SrcA to MSB when single precicion + mux2 #(64) SrcAMux({{32{1'b1}}, ForwardedSrcAE[31:0]}, {{64-`XLEN{1'b1}}, ForwardedSrcAE}, FmtE, AlignedSrcAE); + + // select a result that may be written to the FP register + mux5 #(64) FResMux(AlignedSrcAE, SgnResE, CmpResE, CvtResE, CvtFpResE, FResSelE, FResE); + mux5 #(5) FFlgMux(5'b0, {4'b0, SgnNVE}, {4'b0, CmpNVE}, CvtFlgE, CvtFpFlgE, FResSelE, FFlgE); + + // select the result that may be written to the integer register - to IEU + mux4 #(`XLEN) IntResMux(CmpResE[`XLEN-1:0], FSrcXE[`XLEN-1:0], ClassResE[`XLEN-1:0], + CvtResE[`XLEN-1:0], FIntResSelE, FIntResE); + + // E/M pipe registers + + // flopenrc #(64) EMFpReg1(clk, reset, FlushM, ~StallM, FSrcXE, FSrcXM); + flopenrc #(65) EMFpReg2 (clk, reset, FlushM, ~StallM, {XSgnE,XExpE,XManE}, {XSgnM,XExpM,XManM}); + flopenrc #(65) EMFpReg3 (clk, reset, FlushM, ~StallM, {YSgnE,YExpE,YManE}, {YSgnM,YExpM,YManM}); + flopenrc #(64) EMFpReg4 (clk, reset, FlushM, ~StallM, {ZExpE,ZManE}, {ZExpM,ZManM}); + flopenrc #(12) EMFpReg5 (clk, reset, FlushM, ~StallM, + {XZeroE, YZeroE, ZZeroE, XInfE, YInfE, ZInfE, XNaNE, YNaNE, ZNaNE, XSNaNE, YSNaNE, ZSNaNE}, + {XZeroM, YZeroM, ZZeroM, XInfM, YInfM, ZInfM, XNaNM, YNaNM, ZNaNM, XSNaNM, YSNaNM, ZSNaNM}); + flopenrc #(64) EMRegCmpRes (clk, reset, FlushM, ~StallM, FResE, FResM); + flopenrc #(5) EMRegCmpFlg (clk, reset, FlushM, ~StallM, FFlgE, FFlgM); + flopenrc #(`XLEN) EMRegSgnRes (clk, reset, FlushM, ~StallM, FIntResE, FIntResM); + flopenrc #(7) EMCtrlReg (clk, reset, FlushM, ~StallM, + {FRegWriteE, FResultSelE, FrmE, FmtE}, + {FRegWriteM, FResultSelM, FrmM, FmtM}); + + // BEGIN MEMORY STAGE + + // FPU flag selection - to privileged + mux4 #(5) FPUFlgMux (5'b0, FMAFlgM, FDivFlgM, FFlgM, FResultSelM, SetFflagsM); + + // M/W pipe registers + flopenrc #(64) MWRegFma(clk, reset, FlushW, ~StallW, FMAResM, FMAResW); + flopenrc #(64) MWRegDiv(clk, reset, FlushW, ~StallW, FDivResM, FDivResW); + flopenrc #(64) MWRegClass(clk, reset, FlushW, ~StallW, FResM, FResW); + flopenrc #(4) MWCtrlReg(clk, reset, FlushW, ~StallW, + {FRegWriteM, FResultSelM, FmtM}, + {FRegWriteW, FResultSelW, FmtW}); + + // BEGIN WRITEBACK STAGE + + // put ReadData into NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + // - for load instruction + mux2 #(64) ReadResMux ({{32{1'b1}}, ReadDataW[31:0]}, {{64-`XLEN{1'b1}}, ReadDataW}, FmtW, ReadResW); + + // select the result to be written to the FP register + mux4 #(64) FPUResultMux (ReadResW, FMAResW, FDivResW, FResW, FResultSelW, FPUResultW); endmodule // fpu diff --git a/wally-pipelined/src/muldiv/muldiv.sv b/wally-pipelined/src/muldiv/muldiv.sv index b3bf4e83e..a1e76616c 100644 --- a/wally-pipelined/src/muldiv/muldiv.sv +++ b/wally-pipelined/src/muldiv/muldiv.sv @@ -40,65 +40,50 @@ module muldiv ( input logic StallM, StallW, FlushM, FlushW ); - generate - if (`M_SUPPORTED) begin - logic [`XLEN-1:0] MulDivResultM; - logic [`XLEN-1:0] PrelimResultM; - logic [`XLEN-1:0] QuotM, RemM; - logic [`XLEN*2-1:0] ProdM; + logic [`XLEN-1:0] MulDivResultM; + logic [`XLEN-1:0] PrelimResultM; + logic [`XLEN-1:0] QuotM, RemM; + logic [`XLEN*2-1:0] ProdM; - logic DivE; - logic DivSignedE; - logic W64M; - - // Multiplier - mul mul( - .clk, .reset, - .StallM, .FlushM, - // .SrcAE, .SrcBE, - .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B - .Funct3E, - .ProdM - ); + logic DivE; + logic DivSignedE; + logic W64M; - // Divide - // Start a divide when a new division instruction is received and the divider isn't already busy or finishing - assign DivE = MulDivE & Funct3E[2]; - assign DivSignedE = ~Funct3E[0]; - intdivrestoring div(.clk, .reset, .StallM, - .DivSignedE, .W64E, .DivE, .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM); - - // Result multiplexer - always_comb - case (Funct3M) - 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; - 3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; - 3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; - 3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; - 3'b100: PrelimResultM = QuotM; - 3'b101: PrelimResultM = QuotM; - 3'b110: PrelimResultM = RemM; - 3'b111: PrelimResultM = RemM; - endcase - - // Handle sign extension for W-type instructions - flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M); - if (`XLEN == 64) begin // RV64 has W-type instructions - assign MulDivResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM; - end else begin // RV32 has no W-type instructions - assign MulDivResultM = PrelimResultM; - end + // Multiplier + mul mul(.clk, .reset, .StallM, .FlushM, .ForwardedSrcAE, .ForwardedSrcBE, .Funct3E, .ProdM); - // Writeback stage pipeline register + // Divide + // Start a divide when a new division instruction is received and the divider isn't already busy or finishing + assign DivE = MulDivE & Funct3E[2]; + assign DivSignedE = ~Funct3E[0]; + intdivrestoring div(.clk, .reset, .StallM, .DivSignedE, .W64E, .DivE, + .ForwardedSrcAE, .ForwardedSrcBE, .DivBusyE, .QuotM, .RemM); + + // Result multiplexer + always_comb + case (Funct3M) + 3'b000: PrelimResultM = ProdM[`XLEN-1:0]; + 3'b001: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b010: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b011: PrelimResultM = ProdM[`XLEN*2-1:`XLEN]; + 3'b100: PrelimResultM = QuotM; + 3'b101: PrelimResultM = QuotM; + 3'b110: PrelimResultM = RemM; + 3'b111: PrelimResultM = RemM; + endcase - flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); - - end else begin // no M instructions supported - assign MulDivResultW = 0; - assign DivBusyE = 0; - end - endgenerate + // Handle sign extension for W-type instructions + flopenrc #(1) W64MReg(clk, reset, FlushM, ~StallM, W64E, W64M); + generate + if (`XLEN == 64) begin:resmux // RV64 has W-type instructions + assign MulDivResultM = W64M ? {{32{PrelimResultM[31]}}, PrelimResultM[31:0]} : PrelimResultM; + end else begin:resmux // RV32 has no W-type instructions + assign MulDivResultM = PrelimResultM; + end + endgenerate + // Writeback stage pipeline register + flopenrc #(`XLEN) MulDivResultWReg(clk, reset, FlushW, ~StallW, MulDivResultM, MulDivResultW); endmodule // muldiv diff --git a/wally-pipelined/src/privileged/csr.sv b/wally-pipelined/src/privileged/csr.sv index 440045d30..63d0a5831 100644 --- a/wally-pipelined/src/privileged/csr.sv +++ b/wally-pipelined/src/privileged/csr.sv @@ -86,76 +86,48 @@ module csr #(parameter logic IllegalCSRCAccessM, IllegalCSRMAccessM, IllegalCSRSAccessM, IllegalCSRUAccessM, IllegalCSRNAccessM, InsufficientCSRPrivilegeM; logic IllegalCSRMWriteReadonlyM; - generate - if (`ZICSR_SUPPORTED) begin - // modify CSRs - always_comb begin - // Choose either rs1 or uimm[4:0] as source - CSRSrcM = InstrM[14] ? {{(`XLEN-5){1'b0}}, InstrM[19:15]} : SrcAM; - // Compute AND/OR modification - CSRRWM = CSRSrcM; - CSRRSM = CSRReadValM | CSRSrcM; - CSRRCM = CSRReadValM & ~CSRSrcM; - case (InstrM[13:12]) - 2'b01: CSRWriteValM = CSRRWM; - 2'b10: CSRWriteValM = CSRRSM; - 2'b11: CSRWriteValM = CSRRCM; - default: CSRWriteValM = CSRReadValM; - endcase - end + // modify CSRs + always_comb begin + // Choose either rs1 or uimm[4:0] as source + CSRSrcM = InstrM[14] ? {{(`XLEN-5){1'b0}}, InstrM[19:15]} : SrcAM; + // Compute AND/OR modification + CSRRWM = CSRSrcM; + CSRRSM = CSRReadValM | CSRSrcM; + CSRRCM = CSRReadValM & ~CSRSrcM; + case (InstrM[13:12]) + 2'b01: CSRWriteValM = CSRRWM; + 2'b10: CSRWriteValM = CSRRSM; + 2'b11: CSRWriteValM = CSRRCM; + default: CSRWriteValM = CSRReadValM; + endcase + end - // write CSRs - assign CSRAdrM = InstrM[31:20]; - assign UnalignedNextEPCM = TrapM ? PCM : CSRWriteValM; - assign NextEPCM = `C_SUPPORTED ? {UnalignedNextEPCM[`XLEN-1:1], 1'b0} : {UnalignedNextEPCM[`XLEN-1:2], 2'b00}; // 3.1.15 alignment - assign NextCauseM = TrapM ? CauseM : CSRWriteValM; - assign NextMtvalM = TrapM ? NextFaultMtvalM : CSRWriteValM; - assign CSRMWriteM = CSRWriteM && (PrivilegeModeW == `M_MODE); - assign CSRSWriteM = CSRWriteM && (|PrivilegeModeW); - assign CSRUWriteM = CSRWriteM; + // write CSRs + assign CSRAdrM = InstrM[31:20]; + assign UnalignedNextEPCM = TrapM ? PCM : CSRWriteValM; + assign NextEPCM = `C_SUPPORTED ? {UnalignedNextEPCM[`XLEN-1:1], 1'b0} : {UnalignedNextEPCM[`XLEN-1:2], 2'b00}; // 3.1.15 alignment + assign NextCauseM = TrapM ? CauseM : CSRWriteValM; + assign NextMtvalM = TrapM ? NextFaultMtvalM : CSRWriteValM; + assign CSRMWriteM = CSRWriteM && (PrivilegeModeW == `M_MODE); + assign CSRSWriteM = CSRWriteM && (|PrivilegeModeW); + assign CSRUWriteM = CSRWriteM; - csri csri(.*); - csrsr csrsr(.*); - csrc counters(.*); - csrm csrm(.*); // Machine Mode CSRs - csrs csrs(.*); - csrn csrn(.CSRNWriteM(CSRUWriteM), .*); // User Mode Exception Registers - csru csru(.*); // Floating Point Flags are part of User MOde + csri csri(.*); + csrsr csrsr(.*); + csrc counters(.*); + csrm csrm(.*); // Machine Mode CSRs + csrs csrs(.*); + csrn csrn(.CSRNWriteM(CSRUWriteM), .*); // User Mode Exception Registers + csru csru(.*); // Floating Point Flags are part of User MOde - // merge CSR Reads - assign CSRReadValM = CSRUReadValM | CSRSReadValM | CSRMReadValM | CSRCReadValM | CSRNReadValM; - // *** add W stall 2/22/21 dh to try fixing memory stalls -// floprc #(`XLEN) CSRValWReg(clk, reset, FlushW, CSRReadValM, CSRReadValW); - flopenrc #(`XLEN) CSRValWReg(clk, reset, FlushW, ~StallW, CSRReadValM, CSRReadValW); + // merge CSR Reads + assign CSRReadValM = CSRUReadValM | CSRSReadValM | CSRMReadValM | CSRCReadValM | CSRNReadValM; + flopenrc #(`XLEN) CSRValWReg(clk, reset, FlushW, ~StallW, CSRReadValM, CSRReadValW); - // merge illegal accesses: illegal if none of the CSR addresses is legal or privilege is insufficient - assign InsufficientCSRPrivilegeM = (CSRAdrM[9:8] == 2'b11 && PrivilegeModeW != `M_MODE) || - (CSRAdrM[9:8] == 2'b01 && PrivilegeModeW == `U_MODE); - assign IllegalCSRAccessM = ((IllegalCSRCAccessM && IllegalCSRMAccessM && - IllegalCSRSAccessM && IllegalCSRUAccessM && IllegalCSRNAccessM || - InsufficientCSRPrivilegeM) && CSRReadM) || IllegalCSRMWriteReadonlyM; - end else begin // CSRs not implemented - assign STATUS_MPP = 2'b11; - assign STATUS_SPP = 2'b0; - assign STATUS_TSR = 0; - assign MEPC_REGW = 0; - assign SEPC_REGW = 0; - assign UEPC_REGW = 0; - assign UTVEC_REGW = 0; - assign STVEC_REGW = 0; - assign MTVEC_REGW = 0; - assign MEDELEG_REGW = 0; - assign MIDELEG_REGW = 0; - assign SEDELEG_REGW = 0; - assign SIDELEG_REGW = 0; - assign SATP_REGW = 0; - assign MIP_REGW = 0; - assign MIE_REGW = 0; - assign STATUS_MIE = 0; - assign STATUS_SIE = 0; - assign FRM_REGW = 0; - assign CSRReadValM = 0; - assign IllegalCSRAccessM = CSRReadM; - end - endgenerate + // merge illegal accesses: illegal if none of the CSR addresses is legal or privilege is insufficient + assign InsufficientCSRPrivilegeM = (CSRAdrM[9:8] == 2'b11 && PrivilegeModeW != `M_MODE) || + (CSRAdrM[9:8] == 2'b01 && PrivilegeModeW == `U_MODE); + assign IllegalCSRAccessM = ((IllegalCSRCAccessM && IllegalCSRMAccessM && + IllegalCSRSAccessM && IllegalCSRUAccessM && IllegalCSRNAccessM || + InsufficientCSRPrivilegeM) && CSRReadM) || IllegalCSRMWriteReadonlyM; endmodule diff --git a/wally-pipelined/src/privileged/privileged.sv b/wally-pipelined/src/privileged/privileged.sv index c5bc8a45c..c84a9d27b 100644 --- a/wally-pipelined/src/privileged/privileged.sv +++ b/wally-pipelined/src/privileged/privileged.sv @@ -239,8 +239,6 @@ module privileged ( .ExceptionM, .PendingInterruptM, .PrivilegedNextPCM, .CauseM, .NextFaultMtvalM); - - endmodule diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 7be5b8c6e..304ac1a99 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -198,7 +198,6 @@ module wallypipelinedhart ( ); // instruction fetch unit: PC, branch prediction, instruction cache - ieu ieu( .clk, .reset, @@ -276,7 +275,7 @@ module wallypipelinedhart ( .LSUStall); // change to LSUStall - + // *** Ross: please make EBU conditional when only supporting internal memories ahblite ebu(// IFU connections .clk, .reset, @@ -295,22 +294,7 @@ module wallypipelinedhart ( .HWRITED); - muldiv mdu( - .clk, .reset, - // Execute Stage interface - // .SrcAE, .SrcBE, - .ForwardedSrcAE, .ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B - .Funct3E, .Funct3M, - .MulDivE, .W64E, - // Writeback stage - .MulDivResultW, - // Divide Done - .DivBusyE, - // hazards - .StallM, .StallW, .FlushM, .FlushW - ); // multiply and divide unit - - hazard hzu( + hazard hzu( .BPPredWrongE, .CSRWritePendingDEM, .RetM, .TrapM, .LoadStallD, .StoreStallD, .MulDivStallD, .CSRRdStallD, .LSUStall, .ICacheStallF, @@ -323,57 +307,89 @@ module wallypipelinedhart ( .FlushF, .FlushD, .FlushE, .FlushM, .FlushW ); // global stall and flush control - // Priveleged block operates in M and W stages, handling CSRs and exceptions - privileged priv( - .clk, .reset, - .FlushD, .FlushE, .FlushM, .FlushW, - .StallD, .StallE, .StallM, .StallW, - .CSRReadM, .CSRWriteM, .SrcAM, .PCM, - .InstrM, .CSRReadValW, .PrivilegedNextPCM, - .RetM, .TrapM, - .ITLBFlushF, .DTLBFlushM, - .InstrValidM, .CommittedM, - .FRegWriteM, .LoadStallD, - .BPPredDirWrongM, .BTBPredPCWrongM, - .RASPredPCWrongM, .BPPredClassNonCFIWrongM, - .InstrClassM, .DCacheMiss, .DCacheAccess, .PrivilegedM, - .ITLBInstrPageFaultF, .DTLBLoadPageFaultM, .DTLBStorePageFaultM, - .WalkerInstrPageFaultF, .WalkerLoadPageFaultM, .WalkerStorePageFaultM, - .InstrMisalignedFaultM, .IllegalIEUInstrFaultD, .IllegalFPUInstrD, - .LoadMisalignedFaultM, .StoreMisalignedFaultM, - .TimerIntM, .ExtIntM, .SwIntM, - .MTIME_CLINT, .MTIMECMP_CLINT, - .InstrMisalignedAdrM, .MemAdrM, - .SetFflagsM, - // Trap signals from pmp/pma in mmu - // *** do these need to be split up into one for dmem and one for ifu? - // instead, could we only care about the instr and F pins that come from ifu and only care about the load/store and m pins that come from dmem? - .InstrAccessFaultF, .LoadAccessFaultM, .StoreAccessFaultM, - .ExceptionM, .PendingInterruptM, .IllegalFPUInstrE, - .PrivilegeModeW, .SATP_REGW, - .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, - .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, - .FRM_REGW,.BreakpointFaultM, .EcallFaultM - ); - + generate + if (`ZICSR_SUPPORTED) begin:priv + privileged priv( + .clk, .reset, + .FlushD, .FlushE, .FlushM, .FlushW, + .StallD, .StallE, .StallM, .StallW, + .CSRReadM, .CSRWriteM, .SrcAM, .PCM, + .InstrM, .CSRReadValW, .PrivilegedNextPCM, + .RetM, .TrapM, + .ITLBFlushF, .DTLBFlushM, + .InstrValidM, .CommittedM, + .FRegWriteM, .LoadStallD, + .BPPredDirWrongM, .BTBPredPCWrongM, + .RASPredPCWrongM, .BPPredClassNonCFIWrongM, + .InstrClassM, .DCacheMiss, .DCacheAccess, .PrivilegedM, + .ITLBInstrPageFaultF, .DTLBLoadPageFaultM, .DTLBStorePageFaultM, + .WalkerInstrPageFaultF, .WalkerLoadPageFaultM, .WalkerStorePageFaultM, + .InstrMisalignedFaultM, .IllegalIEUInstrFaultD, .IllegalFPUInstrD, + .LoadMisalignedFaultM, .StoreMisalignedFaultM, + .TimerIntM, .ExtIntM, .SwIntM, + .MTIME_CLINT, .MTIMECMP_CLINT, + .InstrMisalignedAdrM, .MemAdrM, + .SetFflagsM, + // Trap signals from pmp/pma in mmu + // *** do these need to be split up into one for dmem and one for ifu? + // instead, could we only care about the instr and F pins that come from ifu and only care about the load/store and m pins that come from dmem? + .InstrAccessFaultF, .LoadAccessFaultM, .StoreAccessFaultM, + .ExceptionM, .PendingInterruptM, .IllegalFPUInstrE, + .PrivilegeModeW, .SATP_REGW, + .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, + .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW, + .FRM_REGW,.BreakpointFaultM, .EcallFaultM + ); + end else begin + assign CSRReadValW = 0; + assign PrivilegedNextPCM = 0; + assign RetM = 0; + assign TrapM = 0; + assign ITLBFlushF = 0; + assign DTLBFlushM = 0; + end + if (`M_SUPPORTED) begin:mdu + muldiv mdu( + .clk, .reset, + .ForwardedSrcAE, .ForwardedSrcBE, + .Funct3E, .Funct3M, .MulDivE, .W64E, + .MulDivResultW, .DivBusyE, + .StallM, .StallW, .FlushM, .FlushW + ); + end else begin // no M instructions supported + assign MulDivResultW = 0; + assign DivBusyE = 0; + end - fpu fpu( - .clk, .reset, - .FRM_REGW, // Rounding mode from CSR - .InstrD, // instruction from IFU - .ReadDataW,// Read data from memory - .ForwardedSrcAE, // Integer input being processed (from IEU) - .StallE, .StallM, .StallW, // stall signals from HZU - .FlushE, .FlushM, .FlushW, // flush signals from HZU - .RdM, .RdW, // which FP register to write to (from IEU) - .FRegWriteM, // FP register write enable - .FStallD, // Stall the decode stage - .FWriteIntE, // integer register write enable - .FWriteDataE, // Data to be written to memory - .FIntResM, // data to be written to integer register - .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) - .IllegalFPUInstrD, // Is the instruction an illegal fpu instruction - .SetFflagsM // FPU flags (to privileged unit) - ); // floating point unit - + if (`F_SUPPORTED) begin:fpu + fpu fpu( + .clk, .reset, + .FRM_REGW, // Rounding mode from CSR + .InstrD, // instruction from IFU + .ReadDataW,// Read data from memory + .ForwardedSrcAE, // Integer input being processed (from IEU) + .StallE, .StallM, .StallW, // stall signals from HZU + .FlushE, .FlushM, .FlushW, // flush signals from HZU + .RdM, .RdW, // which FP register to write to (from IEU) + .FRegWriteM, // FP register write enable + .FStallD, // Stall the decode stage + .FWriteIntE, // integer register write enable + .FWriteDataE, // Data to be written to memory + .FIntResM, // data to be written to integer register + .FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) + .IllegalFPUInstrD, // Is the instruction an illegal fpu instruction + .SetFflagsM // FPU flags (to privileged unit) + ); // floating point unit + end else begin // no F_SUPPORTED or D_SUPPORTED; tie outputs low + assign FStallD = 0; + assign FWriteIntE = 0; + assign FWriteDataE = 0; + assign FIntResM = 0; + assign FDivBusyE = 0; + assign IllegalFPUInstrD = 1; + assign SetFflagsM = 0; + end + + endgenerate + // Priveleged block operates in M and W stages, handling CSRs and exceptions endmodule diff --git a/wally-pipelined/src/sdc/tb/ram2sdLoad.py b/wally-pipelined/testbench/sdc/ram2sdLoad.py similarity index 100% rename from wally-pipelined/src/sdc/tb/ram2sdLoad.py rename to wally-pipelined/testbench/sdc/ram2sdLoad.py diff --git a/wally-pipelined/src/sdc/tb/ramdisk2.hex b/wally-pipelined/testbench/sdc/ramdisk2.hex similarity index 100% rename from wally-pipelined/src/sdc/tb/ramdisk2.hex rename to wally-pipelined/testbench/sdc/ramdisk2.hex diff --git a/wally-pipelined/src/sdc/tb/run_tb.do b/wally-pipelined/testbench/sdc/run_tb.do similarity index 100% rename from wally-pipelined/src/sdc/tb/run_tb.do rename to wally-pipelined/testbench/sdc/run_tb.do diff --git a/wally-pipelined/src/sdc/tb/sd_top_tb.sv b/wally-pipelined/testbench/sdc/sd_top_tb.sv similarity index 100% rename from wally-pipelined/src/sdc/tb/sd_top_tb.sv rename to wally-pipelined/testbench/sdc/sd_top_tb.sv diff --git a/wally-pipelined/src/sdc/tb/wave.do b/wally-pipelined/testbench/sdc/wave.do similarity index 100% rename from wally-pipelined/src/sdc/tb/wave.do rename to wally-pipelined/testbench/sdc/wave.do diff --git a/wally-pipelined/testbench/testbench-linux.sv b/wally-pipelined/testbench/testbench-linux.sv index a64ca4340..41e9480bd 100644 --- a/wally-pipelined/testbench/testbench-linux.sv +++ b/wally-pipelined/testbench/testbench-linux.sv @@ -174,7 +174,7 @@ module testbench(); // Useful Aliases `define RF dut.hart.ieu.dp.regf.rf `define PC dut.hart.ifu.pcreg.q - `define CSR_BASE dut.hart.priv.csr.genblk1 + `define CSR_BASE dut.hart.priv.priv.csr `define HPMCOUNTER `CSR_BASE.counters.genblk1.HPMCOUNTER_REGW `define PMP_BASE `CSR_BASE.csrm.genblk4 `define PMPCFG genblk2.PMPCFGreg.q @@ -210,8 +210,8 @@ module testbench(); `define STATUS_MIE `CSR_BASE.csrsr.STATUS_MIE `define STATUS_SIE `CSR_BASE.csrsr.STATUS_SIE `define STATUS_UIE `CSR_BASE.csrsr.STATUS_UIE - `define PRIV dut.hart.priv.privmodereg.q - `define INSTRET dut.hart.priv.csr.genblk1.counters.genblk1.genblk2.INSTRETreg.q + `define PRIV dut.hart.priv.priv.privmodereg.q + `define INSTRET dut.hart.priv.priv.csr.counters.genblk1.genblk2.INSTRETreg.q // Common Macros `define checkCSR(CSR) \ begin \ @@ -308,9 +308,9 @@ module testbench(); integer ramFile; integer readResult; initial begin - force dut.hart.priv.SwIntM = 0; - force dut.hart.priv.TimerIntM = 0; - force dut.hart.priv.ExtIntM = 0; + force dut.hart.priv.priv.SwIntM = 0; + force dut.hart.priv.priv.TimerIntM = 0; + force dut.hart.priv.priv.ExtIntM = 0; $readmemh({`LINUX_TEST_VECTORS,"bootmem.txt"}, dut.uncore.bootrom.bootrom.RAM, 'h1000 >> 3); $readmemb(`TWO_BIT_PRELOAD, dut.hart.ifu.bpred.bpred.Predictor.DirPredictor.PHT.mem); $readmemb(`BTB_PRELOAD, dut.hart.ifu.bpred.bpred.TargetPredictor.memory.mem); @@ -365,7 +365,7 @@ module testbench(); // on the next falling edge the expected state is compared to the wally state. // step 0: read the expected state - assign checkInstrM = dut.hart.ieu.InstrValidM & ~dut.hart.priv.trap.InstrPageFaultM & ~dut.hart.priv.trap.InterruptM & ~dut.hart.StallM; + assign checkInstrM = dut.hart.ieu.InstrValidM & ~dut.hart.priv.priv.trap.InstrPageFaultM & ~dut.hart.priv.priv.trap.InterruptM & ~dut.hart.StallM; `define SCAN_NEW_INSTR_FROM_TRACE(STAGE) \ // always check PC, instruction bits \ if (checkInstrM) begin \ @@ -479,7 +479,7 @@ module testbench(); end else begin // update MIP immediately $display("%tns: Updating MIP to %x",$time,NextMIPexpected); MIPexpected = NextMIPexpected; - force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected; + force dut.hart.priv.priv.csr.csri.MIP_REGW = MIPexpected; end // $display("%tn: ExpectedCSRArrayM = %p",$time,ExpectedCSRArrayM); // $display("%tn: ExpectedCSRArrayValueM = %p",$time,ExpectedCSRArrayValueM); @@ -491,11 +491,11 @@ module testbench(); // $display("%tn: ExpectedCSRArrayValueM[NumCSRM] %x",$time,ExpectedCSRArrayValueM[NumCSRM]); end if(RequestDelayedMIP & checkInstrM) begin - $display("%tns: Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.csr.genblk1.csrm.MEPC_REGW); + $display("%tns: Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.priv.csr.csrm.MEPC_REGW); $display("%tns: Updating MIP to %x",$time,NextMIPexpected); MIPexpected = NextMIPexpected; - force dut.hart.priv.csr.genblk1.csri.MIP_REGW = MIPexpected; - $display("%tns: Finished Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.csr.genblk1.csrm.MEPC_REGW); + force dut.hart.priv.priv.csr.csri.MIP_REGW = MIPexpected; + $display("%tns: Finished Executing Delayed MIP. Current MEPC value is %x",$time,dut.hart.priv.priv.csr.csrm.MEPC_REGW); RequestDelayedMIP = 0; end end @@ -576,7 +576,7 @@ module testbench(); `checkEQ("PCW",PCW,ExpectedPCW) //`checkEQ("InstrW",InstrW,ExpectedInstrW) <-- not viable because of // compressed to uncompressed conversion - `checkEQ("Instr Count",dut.hart.priv.csr.genblk1.counters.genblk1.INSTRET_REGW,InstrCountW) + `checkEQ("Instr Count",dut.hart.priv.priv.csr.counters.genblk1.INSTRET_REGW,InstrCountW) #2; // delay 2 ns. if(`DEBUG_TRACE >= 5) begin $display("%tns, %d instrs: Reg Write Address %02d ? expected value: %02d", $time, InstrCountW, dut.hart.ieu.dp.regf.a3, ExpectedRegAdrW); @@ -601,19 +601,19 @@ module testbench(); // check csr for(NumCSRPostWIndex = 0; NumCSRPostWIndex < NumCSRW; NumCSRPostWIndex++) begin case(ExpectedCSRArrayW[NumCSRPostWIndex]) - "mhartid": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MHARTID_REGW) - "mstatus": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MSTATUS_REGW) - "mtvec": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MTVEC_REGW) - "mip": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MIP_REGW) - "mie": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MIE_REGW) - "mideleg": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MIDELEG_REGW) - "medeleg": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MEDELEG_REGW) - "mepc": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MEPC_REGW) - "mtval": `checkCSR(dut.hart.priv.csr.genblk1.csrm.MTVAL_REGW) - "sepc": `checkCSR(dut.hart.priv.csr.genblk1.csrs.SEPC_REGW) - "scause": `checkCSR(dut.hart.priv.csr.genblk1.csrs.genblk1.SCAUSE_REGW) - "stvec": `checkCSR(dut.hart.priv.csr.genblk1.csrs.STVEC_REGW) - "stval": `checkCSR(dut.hart.priv.csr.genblk1.csrs.genblk1.STVAL_REGW) + "mhartid": `checkCSR(dut.hart.priv.priv.csr.csrm.MHARTID_REGW) + "mstatus": `checkCSR(dut.hart.priv.priv.csr.csrm.MSTATUS_REGW) + "mtvec": `checkCSR(dut.hart.priv.priv.csr.csrm.MTVEC_REGW) + "mip": `checkCSR(dut.hart.priv.priv.csr.csrm.MIP_REGW) + "mie": `checkCSR(dut.hart.priv.priv.csr.csrm.MIE_REGW) + "mideleg": `checkCSR(dut.hart.priv.priv.csr.csrm.MIDELEG_REGW) + "medeleg": `checkCSR(dut.hart.priv.priv.csr.csrm.MEDELEG_REGW) + "mepc": `checkCSR(dut.hart.priv.priv.csr.csrm.MEPC_REGW) + "mtval": `checkCSR(dut.hart.priv.priv.csr.csrm.MTVAL_REGW) + "sepc": `checkCSR(dut.hart.priv.priv.csr.csrs.SEPC_REGW) + "scause": `checkCSR(dut.hart.priv.priv.csr.csrs.genblk1.SCAUSE_REGW) + "stvec": `checkCSR(dut.hart.priv.priv.csr.csrs.STVEC_REGW) + "stval": `checkCSR(dut.hart.priv.priv.csr.csrs.genblk1.STVAL_REGW) endcase end if (fault == 1) begin @@ -667,7 +667,7 @@ module testbench(); begin int i; // Grab the SATP register from privileged unit - SATP = dut.hart.priv.csr.SATP_REGW; + SATP = dut.hart.priv.priv.csr.SATP_REGW; // Split the virtual address into page number segments and offset VPN[2] = adrIn[38:30]; VPN[1] = adrIn[29:21]; @@ -677,7 +677,7 @@ module testbench(); SvMode = SATP[63]; // Only perform translation if translation is on and the processor is not // in machine mode - if (SvMode && (dut.hart.priv.PrivilegeModeW != `M_MODE)) begin + if (SvMode && (dut.hart.priv.priv.PrivilegeModeW != `M_MODE)) begin BaseAdr = SATP[43:0] << 12; for (i = 2; i >= 0; i--) begin PAdr = BaseAdr + (VPN[i] << 3); diff --git a/wally-pipelined/testbench/testbench.sv b/wally-pipelined/testbench/testbench.sv index 7301f454a..ef0c06923 100644 --- a/wally-pipelined/testbench/testbench.sv +++ b/wally-pipelined/testbench/testbench.sv @@ -287,7 +287,7 @@ logic [3:0] dummy; // Termination condition // terminate on a specific ECALL for Imperas tests, or on a jump to self infinite loop for RISC-V Arch tests - assign DCacheFlushStart = dut.hart.priv.EcallFaultM && + assign DCacheFlushStart = dut.hart.priv.priv.EcallFaultM && (dut.hart.ieu.dp.regf.rf[3] == 1 || (dut.hart.ieu.dp.regf.we3 && dut.hart.ieu.dp.regf.a3 == 3 && @@ -318,7 +318,7 @@ module riscvassertions; initial begin assert (`PMP_ENTRIES == 0 || `PMP_ENTRIES==16 || `PMP_ENTRIES==64) else $error("Illegal number of PMP entries: PMP_ENTRIES must be 0, 16, or 64"); assert (`DIV_BITSPERCYCLE == 1 || `DIV_BITSPERCYCLE==2 || `DIV_BITSPERCYCLE==4) else $error("Illegal number of divider bits/cycle: DIV_BITSPERCYCLE must be 1, 2, or 4"); - assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double without supporting float"); + assert (`F_SUPPORTED || ~`D_SUPPORTED) else $error("Can't support double (D) without supporting float (F)"); assert (`XLEN == 64 || ~`D_SUPPORTED) else $error("Wally does not yet support D extensions on RV32"); assert (`DCACHE_WAYSIZEINBYTES <= 4096 || `MEM_DCACHE == 0 || `MEM_VIRTMEM == 0) else $error("DCACHE_WAYSIZEINBYTES cannot exceed 4 KiB when caches and vitual memory is enabled (to prevent aliasing)"); assert (`DCACHE_BLOCKLENINBITS >= 128 || `MEM_DCACHE == 0) else $error("DCACHE_BLOCKLENINBITS must be at least 128 when caches are enabled"); From db7687858199f4996477b910083dbf0ffcfa11ab Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 20:11:32 -0600 Subject: [PATCH 30/38] Moved convert2bin.py to the tests directory. This file converts the qemu ram.txt output into a binary for copy to flash card. mv qemu patches to tests directory. --- .../linux-testgen/linux-testvectors/convert2bin.py | 0 .../linux-testgen/qemu-build-instructions.md | 0 {wally-pipelined => tests}/linux-testgen/wallyVirtIO.patch | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {wally-pipelined => tests}/linux-testgen/linux-testvectors/convert2bin.py (100%) rename {wally-pipelined => tests}/linux-testgen/qemu-build-instructions.md (100%) rename {wally-pipelined => tests}/linux-testgen/wallyVirtIO.patch (100%) diff --git a/wally-pipelined/linux-testgen/linux-testvectors/convert2bin.py b/tests/linux-testgen/linux-testvectors/convert2bin.py similarity index 100% rename from wally-pipelined/linux-testgen/linux-testvectors/convert2bin.py rename to tests/linux-testgen/linux-testvectors/convert2bin.py diff --git a/wally-pipelined/linux-testgen/qemu-build-instructions.md b/tests/linux-testgen/qemu-build-instructions.md similarity index 100% rename from wally-pipelined/linux-testgen/qemu-build-instructions.md rename to tests/linux-testgen/qemu-build-instructions.md diff --git a/wally-pipelined/linux-testgen/wallyVirtIO.patch b/tests/linux-testgen/wallyVirtIO.patch similarity index 100% rename from wally-pipelined/linux-testgen/wallyVirtIO.patch rename to tests/linux-testgen/wallyVirtIO.patch From 019e300a147688cc2eb65cbcb4fdc4a52620076a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 20:31:55 -0600 Subject: [PATCH 31/38] Added file showing how to compile riscv toolchain for different extension combinations. --- benchmarks/riscv-coremark/multilib.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 benchmarks/riscv-coremark/multilib.txt diff --git a/benchmarks/riscv-coremark/multilib.txt b/benchmarks/riscv-coremark/multilib.txt new file mode 100644 index 000000000..6070e42a1 --- /dev/null +++ b/benchmarks/riscv-coremark/multilib.txt @@ -0,0 +1,3 @@ +If you need to compile for different abi, below is an example of how to configure the gnu riscv toolchain. + +./configure --prefix=/import/eager1/ross/riscv-multilib-bin --enable-multilib --multilib-generate="rv32i-ilp32--;rv32im-ilp32--;rv32iac-ilp32--;rv32imac-ilp32--;rv32imafc-ilp32f--;rv64ic-lp64--;rv64imac-lp64--;rv64imafdc-lp64d--" From 30770db4ac35d77d3e3d03345a706daa1417d384 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 21:34:40 -0600 Subject: [PATCH 32/38] Removed lsuArb and placed remaining logic in lsu.sv. Removed after itlb walk signal as the dcache no longer has any need for this. Formated lsu.sv --- wally-pipelined/src/cache/dcache.sv | 2 - wally-pipelined/src/cache/dcachefsm.sv | 3 - wally-pipelined/src/lsu/lsu.sv | 334 ++++++++++++------------- wally-pipelined/src/lsu/lsuArb.sv | 105 -------- wally-pipelined/src/mmu/hptw.sv | 9 +- 5 files changed, 167 insertions(+), 286 deletions(-) delete mode 100644 wally-pipelined/src/lsu/lsuArb.sv diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv index ab870b647..949e2eb94 100644 --- a/wally-pipelined/src/cache/dcache.sv +++ b/wally-pipelined/src/cache/dcache.sv @@ -53,7 +53,6 @@ module dcache input logic CacheableM, // from ptw input logic IgnoreRequest, - output logic MemAfterIWalkDone, // ahb side (* mark_debug = "true" *)output logic [`PA_BITS-1:0] AHBPAdr, // to ahb (* mark_debug = "true" *)output logic AHBRead, @@ -358,7 +357,6 @@ module dcache .CommittedM, .DCacheMiss, .DCacheAccess, - .MemAfterIWalkDone, .AHBRead, .AHBWrite, .SelAdrM, diff --git a/wally-pipelined/src/cache/dcachefsm.sv b/wally-pipelined/src/cache/dcachefsm.sv index b13ed2646..607b747fa 100644 --- a/wally-pipelined/src/cache/dcachefsm.sv +++ b/wally-pipelined/src/cache/dcachefsm.sv @@ -53,8 +53,6 @@ module dcachefsm // counter outputs output logic DCacheMiss, output logic DCacheAccess, - // hptw outputs - output logic MemAfterIWalkDone, // Bus outputs output logic AHBRead, output logic AHBWrite, @@ -137,7 +135,6 @@ module dcachefsm SelUncached = 1'b0; SelEvict = 1'b0; LRUWriteEn = 1'b0; - MemAfterIWalkDone = 1'b0; SelFlush = 1'b0; FlushAdrCntEn = 1'b0; FlushWayCntEn = 1'b0; diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 2fbd9f67f..ff35f5b0a 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -30,116 +30,114 @@ // *** Ross Thompson amo misalignment check? module lsu ( - input logic clk, reset, - input logic StallM, FlushM, StallW, FlushW, - output logic LSUStall, + input logic clk, reset, + input logic StallM, FlushM, StallW, FlushW, + output logic LSUStall, // Memory Stage // connected to cpu (controls) - input logic [1:0] MemRWM, - input logic [2:0] Funct3M, - input logic [6:0] Funct7M, - input logic [1:0] AtomicM, - input logic ExceptionM, - input logic PendingInterruptM, - input logic FlushDCacheM, - output logic CommittedM, - output logic SquashSCW, - output logic DCacheMiss, - output logic DCacheAccess, + input logic [1:0] MemRWM, + input logic [2:0] Funct3M, + input logic [6:0] Funct7M, + input logic [1:0] AtomicM, + input logic ExceptionM, + input logic PendingInterruptM, + input logic FlushDCacheM, + output logic CommittedM, + output logic SquashSCW, + output logic DCacheMiss, + output logic DCacheAccess, // address and write data - input logic [`XLEN-1:0] IEUAdrE, + input logic [`XLEN-1:0] IEUAdrE, output logic [`XLEN-1:0] IEUAdrM, - input logic [`XLEN-1:0] WriteDataM, + input logic [`XLEN-1:0] WriteDataM, output logic [`XLEN-1:0] ReadDataM, // cpu privilege - input logic [1:0] PrivilegeModeW, - input logic DTLBFlushM, + input logic [1:0] PrivilegeModeW, + input logic DTLBFlushM, // faults - output logic DTLBLoadPageFaultM, DTLBStorePageFaultM, - output logic LoadMisalignedFaultM, LoadAccessFaultM, + output logic DTLBLoadPageFaultM, DTLBStorePageFaultM, + output logic LoadMisalignedFaultM, LoadAccessFaultM, // cpu hazard unit (trap) - output logic StoreMisalignedFaultM, StoreAccessFaultM, + output logic StoreMisalignedFaultM, StoreAccessFaultM, // connect to ahb output logic [`PA_BITS-1:0] DCtoAHBPAdrM, - output logic DCtoAHBReadM, - output logic DCtoAHBWriteM, - input logic DCfromAHBAck, - input logic [`XLEN-1:0] DCfromAHBReadData, + output logic DCtoAHBReadM, + output logic DCtoAHBWriteM, + input logic DCfromAHBAck, + input logic [`XLEN-1:0] DCfromAHBReadData, output logic [`XLEN-1:0] DCtoAHBWriteData, - output logic [2:0] DCtoAHBSizeM, + output logic [2:0] DCtoAHBSizeM, // mmu management // page table walker - input logic [`XLEN-1:0] SATP_REGW, // from csr - input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, - input logic [1:0] STATUS_MPP, + input logic [`XLEN-1:0] SATP_REGW, // from csr + input logic STATUS_MXR, STATUS_SUM, STATUS_MPRV, + input logic [1:0] STATUS_MPP, - input logic [`XLEN-1:0] PCF, - input logic ITLBMissF, + input logic [`XLEN-1:0] PCF, + input logic ITLBMissF, output logic [`XLEN-1:0] PTE, - output logic [1:0] PageType, - output logic ITLBWriteF, - output logic WalkerInstrPageFaultF, - output logic WalkerLoadPageFaultM, - output logic WalkerStorePageFaultM, + output logic [1:0] PageType, + output logic ITLBWriteF, + output logic WalkerInstrPageFaultF, + output logic WalkerLoadPageFaultM, + output logic WalkerStorePageFaultM, - input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], - input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // *** this one especially has a large note attached to it in pmpchecker. + input var logic [7:0] PMPCFG_ARRAY_REGW[`PMP_ENTRIES-1:0], + input var logic [`XLEN-1:0] PMPADDR_ARRAY_REGW[`PMP_ENTRIES-1:0] // *** this one especially has a large note attached to it in pmpchecker. ); - logic DTLBPageFaultM; - logic DataMisalignedM; + logic DTLBPageFaultM; - logic [`PA_BITS-1:0] MemPAdrM; // from mmu to dcache - - logic DTLBMissM; - logic DTLBWriteM; - logic HPTWStall; - logic [`PA_BITS-1:0] TranslationPAdr; - logic HPTWRead; - logic [1:0] MemRWMtoDCache; - logic [1:0] MemRWMtoLRSC; - logic [2:0] Funct3MtoDCache; - logic [1:0] AtomicMtoDCache; - logic [`PA_BITS-1:0] MemPAdrNoTranslate; - logic [11:0] MemAdrE, MemAdrE_RENAME; - logic StallWtoDCache; - logic MemReadM; - logic DataMisalignedMfromDCache; - logic DisableTranslation; // used to stop intermediate PTE physical addresses being saved to TLB. - logic DCacheStall; + logic [`PA_BITS-1:0] MemPAdrM; // from mmu to dcache + + logic DTLBMissM; + logic DTLBWriteM; + logic HPTWStall; + logic [`PA_BITS-1:0] TranslationPAdr; + logic HPTWRead; + logic [1:0] MemRWMtoDCache; + logic [1:0] MemRWMtoLRSC; + logic [2:0] Funct3MtoDCache; + logic [1:0] AtomicMtoDCache; + logic [`PA_BITS-1:0] MemPAdrNoTranslate; + logic [11:0] MemAdrE, MemAdrE_RENAME; + logic StallWtoDCache; + logic MemReadM; + logic DataMisalignedM; + logic DCacheStall; - logic CacheableM; - logic CacheableMtoDCache; - logic SelPTW; + logic CacheableM; + logic CacheableMtoDCache; + logic SelPTW; - logic CommittedMfromDCache; - logic PendingInterruptMtoDCache; -// logic FlushWtoDCache; - logic WalkerPageFaultM; + logic CommittedMfromDCache; + logic PendingInterruptMtoDCache; + // logic FlushWtoDCache; + logic WalkerPageFaultM; - logic AnyCPUReqM; - logic MemAfterIWalkDone; + logic AnyCPUReqM; + logic MemAfterIWalkDone; - typedef enum {STATE_T0_READY, - STATE_T0_REPLAY, - STATE_T0_FAULT_REPLAY, - STATE_T3_DTLB_MISS, - STATE_T4_ITLB_MISS, - STATE_T5_ITLB_MISS, - STATE_T7_DITLB_MISS} statetype; + typedef enum {STATE_T0_READY, + STATE_T0_REPLAY, + STATE_T0_FAULT_REPLAY, + STATE_T3_DTLB_MISS, + STATE_T4_ITLB_MISS, + STATE_T5_ITLB_MISS, + STATE_T7_DITLB_MISS} statetype; statetype CurrState, NextState; - logic InterlockStall; - logic SelReplayCPURequest; - logic WalkerInstrPageFaultRaw; - logic IgnoreRequest; + logic InterlockStall; + logic SelReplayCPURequest; + logic WalkerInstrPageFaultRaw; + logic IgnoreRequest; assign AnyCPUReqM = (|MemRWM) | (|AtomicM); @@ -221,14 +219,14 @@ module lsu end // always_comb // signal to CPU it needs to wait on HPTW. -/* -----\/----- EXCLUDED -----\/----- - // this code has a problem with imperas64mmu as it reads in an invalid uninitalized instruction. InterlockStall becomes x and it propagates - // everywhere. The case statement below implements the same logic but any x on the inputs will resolve to 0. - assign InterlockStall = (CurrState == STATE_T0_READY & (DTLBMissM | ITLBMissF)) | - (CurrState == STATE_T3_DTLB_MISS & ~WalkerPageFaultM) | (CurrState == STATE_T4_ITLB_MISS & ~WalkerInstrPageFaultRaw) | - (CurrState == STATE_T5_ITLB_MISS & ~WalkerInstrPageFaultRaw) | (CurrState == STATE_T7_DITLB_MISS & ~WalkerPageFaultM); + /* -----\/----- EXCLUDED -----\/----- + // this code has a problem with imperas64mmu as it reads in an invalid uninitalized instruction. InterlockStall becomes x and it propagates + // everywhere. The case statement below implements the same logic but any x on the inputs will resolve to 0. + assign InterlockStall = (CurrState == STATE_T0_READY & (DTLBMissM | ITLBMissF)) | + (CurrState == STATE_T3_DTLB_MISS & ~WalkerPageFaultM) | (CurrState == STATE_T4_ITLB_MISS & ~WalkerInstrPageFaultRaw) | + (CurrState == STATE_T5_ITLB_MISS & ~WalkerInstrPageFaultRaw) | (CurrState == STATE_T7_DITLB_MISS & ~WalkerPageFaultM); - -----/\----- EXCLUDED -----/\----- */ + -----/\----- EXCLUDED -----/\----- */ always_comb begin InterlockStall = 1'b0; @@ -257,64 +255,61 @@ module lsu // *** add generate to conditionally create hptw, lsuArb, and mmu // based on `MEM_VIRTMEM hptw hptw(.clk(clk), - .reset(reset), - .SATP_REGW(SATP_REGW), - .PCF(PCF), - .IEUAdrM(IEUAdrM), - .ITLBMissF(ITLBMissF & ~PendingInterruptM), - .DTLBMissM(DTLBMissM & ~PendingInterruptM), - .MemRWM(MemRWM), - .PTE(PTE), - .PageType, - .ITLBWriteF(ITLBWriteF), - .DTLBWriteM(DTLBWriteM), - .HPTWReadPTE(ReadDataM), - .DCacheStall(DCacheStall), - .TranslationPAdr, - .HPTWRead(HPTWRead), - .HPTWStall, - .AnyCPUReqM, - .MemAfterIWalkDone, - .WalkerInstrPageFaultF(WalkerInstrPageFaultRaw), - .WalkerLoadPageFaultM(WalkerLoadPageFaultM), - .WalkerStorePageFaultM(WalkerStorePageFaultM)); + .reset(reset), + .SATP_REGW(SATP_REGW), + .PCF(PCF), + .IEUAdrM(IEUAdrM), + .ITLBMissF(ITLBMissF & ~PendingInterruptM), + .DTLBMissM(DTLBMissM & ~PendingInterruptM), + .MemRWM(MemRWM), + .PTE(PTE), + .PageType, + .ITLBWriteF(ITLBWriteF), + .DTLBWriteM(DTLBWriteM), + .HPTWReadPTE(ReadDataM), + .DCacheStall(DCacheStall), + .TranslationPAdr, + .HPTWRead(HPTWRead), + .AnyCPUReqM, + .WalkerInstrPageFaultF(WalkerInstrPageFaultRaw), + .WalkerLoadPageFaultM(WalkerLoadPageFaultM), + .WalkerStorePageFaultM(WalkerStorePageFaultM)); assign LSUStall = DCacheStall | InterlockStall; assign WalkerPageFaultM = WalkerStorePageFaultM | WalkerLoadPageFaultM; // arbiter between IEU and hptw - lsuArb arbiter(.clk(clk), - // HPTW connection - .SelPTW, - .HPTWRead(HPTWRead), - .TranslationPAdrE(TranslationPAdr), - // CPU connection - .MemRWM(MemRWM), - .Funct3M(Funct3M), - .AtomicM(AtomicM), - .IEUAdrM(IEUAdrM), - .IEUAdrE(IEUAdrE[11:0]), - .CommittedM(CommittedM), - .PendingInterruptM(PendingInterruptM), - .StallW(StallW), - .DataMisalignedM(DataMisalignedM), - // DCACHE - .DisableTranslation(DisableTranslation), - .MemRWMtoLRSC(MemRWMtoLRSC), - .Funct3MtoDCache(Funct3MtoDCache), - .AtomicMtoDCache(AtomicMtoDCache), - .MemPAdrNoTranslate(MemPAdrNoTranslate), - .MemAdrE(MemAdrE), - .StallWtoDCache(StallWtoDCache), - .DataMisalignedMfromDCache(DataMisalignedMfromDCache), - .CommittedMfromDCache(CommittedMfromDCache), - .PendingInterruptMtoDCache(PendingInterruptMtoDCache), - .DCacheStall(DCacheStall)); + logic [2:0] PTWSize; + logic [`PA_BITS-1:0] TranslationPAdrM; + logic [`XLEN+1:0] IEUAdrMExt; + + // multiplex the outputs to LSU + assign MemRWMtoLRSC = SelPTW ? {HPTWRead, 1'b0} : MemRWM; + + generate + assign PTWSize = (`XLEN==32 ? 3'b010 : 3'b011); // 32 or 64-bit access from htpw + endgenerate + mux2 #(3) sizemux(Funct3M, PTWSize, SelPTW, Funct3MtoDCache); + + // this is for the d cache SRAM. + flop #(`PA_BITS) TranslationPAdrMReg(clk, TranslationPAdr, TranslationPAdrM); // delay TranslationPAdrM by a cycle + + assign AtomicMtoDCache = SelPTW ? 2'b00 : AtomicM; + assign IEUAdrMExt = {2'b00, IEUAdrM}; + assign MemPAdrNoTranslate = SelPTW ? TranslationPAdrM : IEUAdrMExt[`PA_BITS-1:0]; + assign MemAdrE = SelPTW ? TranslationPAdr[11:0] : IEUAdrE[11:0]; + assign StallWtoDCache = SelPTW ? 1'b0 : StallW; + // always block interrupts when using the hardware page table walker. + assign CommittedM = SelPTW ? 1'b1 : CommittedMfromDCache; + + + assign PendingInterruptMtoDCache = SelPTW ? 1'b0 : PendingInterruptM; + mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, - .PrivilegeModeW, .DisableTranslation(DisableTranslation), + .PrivilegeModeW, .DisableTranslation(SelPTW), .PAdr(MemPAdrNoTranslate), .VAdr(IEUAdrM), .Size(Funct3MtoDCache[1:0]), @@ -353,15 +348,15 @@ module lsu // Determine if an Unaligned access is taking place always_comb case(Funct3MtoDCache[1:0]) - 2'b00: DataMisalignedMfromDCache = 0; // lb, sb, lbu - 2'b01: DataMisalignedMfromDCache = MemPAdrNoTranslate[0]; // lh, sh, lhu - 2'b10: DataMisalignedMfromDCache = MemPAdrNoTranslate[1] | MemPAdrNoTranslate[0]; // lw, sw, flw, fsw, lwu - 2'b11: DataMisalignedMfromDCache = |MemPAdrNoTranslate[2:0]; // ld, sd, fld, fsd + 2'b00: DataMisalignedM = 0; // lb, sb, lbu + 2'b01: DataMisalignedM = MemPAdrNoTranslate[0]; // lh, sh, lhu + 2'b10: DataMisalignedM = MemPAdrNoTranslate[1] | MemPAdrNoTranslate[0]; // lw, sw, flw, fsw, lwu + 2'b11: DataMisalignedM = |MemPAdrNoTranslate[2:0]; // ld, sd, fld, fsd endcase // Determine if address is valid - assign LoadMisalignedFaultM = DataMisalignedMfromDCache & MemRWMtoLRSC[1]; - assign StoreMisalignedFaultM = DataMisalignedMfromDCache & MemRWMtoLRSC[0]; + assign LoadMisalignedFaultM = DataMisalignedM & MemRWMtoLRSC[1]; + assign StoreMisalignedFaultM = DataMisalignedM & MemRWMtoLRSC[0]; // conditional // 1. ram // controlled by `MEM_DTIM @@ -370,37 +365,36 @@ module lsu assign MemAdrE_RENAME = SelReplayCPURequest ? IEUAdrM[11:0] : MemAdrE[11:0]; dcache dcache(.clk(clk), - .reset(reset), - .StallWtoDCache(StallWtoDCache), - .MemRWM(MemRWMtoDCache), - .Funct3M(Funct3MtoDCache), - .Funct7M(Funct7M), - .FlushDCacheM, - .AtomicM(AtomicMtoDCache), - .MemAdrE(MemAdrE_RENAME), - .MemPAdrM(MemPAdrM), - .VAdr(IEUAdrM[11:0]), // this will be removed once the dcache hptw interlock is removed. - .WriteDataM(WriteDataM), - .ReadDataM(ReadDataM), - .DCacheStall(DCacheStall), - .CommittedM(CommittedMfromDCache), - .DCacheMiss, - .DCacheAccess, - .ExceptionM(ExceptionM), - .IgnoreRequest, - .PendingInterruptM(PendingInterruptMtoDCache), - .CacheableM(CacheableMtoDCache), - .MemAfterIWalkDone, + .reset(reset), + .StallWtoDCache(StallWtoDCache), + .MemRWM(MemRWMtoDCache), + .Funct3M(Funct3MtoDCache), + .Funct7M(Funct7M), + .FlushDCacheM, + .AtomicM(AtomicMtoDCache), + .MemAdrE(MemAdrE_RENAME), + .MemPAdrM(MemPAdrM), + .VAdr(IEUAdrM[11:0]), // this will be removed once the dcache hptw interlock is removed. + .WriteDataM(WriteDataM), + .ReadDataM(ReadDataM), + .DCacheStall(DCacheStall), + .CommittedM(CommittedMfromDCache), + .DCacheMiss, + .DCacheAccess, + .ExceptionM(ExceptionM), + .IgnoreRequest, + .PendingInterruptM(PendingInterruptMtoDCache), + .CacheableM(CacheableMtoDCache), - // AHB connection - .AHBPAdr(DCtoAHBPAdrM), - .AHBRead(DCtoAHBReadM), - .AHBWrite(DCtoAHBWriteM), - .AHBAck(DCfromAHBAck), - .HWDATA(DCtoAHBWriteData), - .HRDATA(DCfromAHBReadData), - .DCtoAHBSizeM - ); + // AHB connection + .AHBPAdr(DCtoAHBPAdrM), + .AHBRead(DCtoAHBReadM), + .AHBWrite(DCtoAHBWriteM), + .AHBAck(DCfromAHBAck), + .HWDATA(DCtoAHBWriteData), + .HRDATA(DCfromAHBReadData), + .DCtoAHBSizeM + ); endmodule diff --git a/wally-pipelined/src/lsu/lsuArb.sv b/wally-pipelined/src/lsu/lsuArb.sv deleted file mode 100644 index 91c5f75f1..000000000 --- a/wally-pipelined/src/lsu/lsuArb.sv +++ /dev/null @@ -1,105 +0,0 @@ -/////////////////////////////////////////// -// lsuArb.sv -// -// Written: Ross THompson and Kip Macsai-Goren -// Modified: kmacsaigoren@hmc.edu June 23, 2021 -// -// Purpose: LSU arbiter between the CPU's demand request for data memory and -// the page table walker -// -// A component of the Wally configurable RISC-V project. -// -// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University -// -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software -// is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT -// OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -/////////////////////////////////////////// - -`include "wally-config.vh" - -module lsuArb - (input logic clk, - - // from page table walker - input logic SelPTW, - input logic HPTWRead, - input logic [`PA_BITS-1:0] TranslationPAdrE, - - // from CPU - input logic [1:0] MemRWM, - input logic [2:0] Funct3M, - input logic [1:0] AtomicM, - input logic [`XLEN-1:0] IEUAdrM, - input logic [11:0] IEUAdrE, - input logic StallW, - input logic PendingInterruptM, - // to CPU - output logic DataMisalignedM, - output logic CommittedM, - //output logic LSUStall, - - // to D Cache - output logic DisableTranslation, - output logic [1:0] MemRWMtoLRSC, - output logic [2:0] Funct3MtoDCache, - output logic [1:0] AtomicMtoDCache, - output logic [`PA_BITS-1:0] MemPAdrNoTranslate, // THis name is very bad. need a better name. This is the raw address from either the ieu or the hptw. - output logic [11:0] MemAdrE, - output logic StallWtoDCache, - output logic PendingInterruptMtoDCache, - - - // from D Cache - input logic CommittedMfromDCache, - input logic DataMisalignedMfromDCache, - input logic DCacheStall - - ); - - logic [2:0] PTWSize; - logic [`PA_BITS-1:0] TranslationPAdrM; - logic [`XLEN+1:0] IEUAdrMExt; - - // multiplex the outputs to LSU - assign DisableTranslation = SelPTW; // change names between SelPTW would be confusing in DTLB. - assign MemRWMtoLRSC = SelPTW ? {HPTWRead, 1'b0} : MemRWM; - - generate - assign PTWSize = (`XLEN==32 ? 3'b010 : 3'b011); // 32 or 64-bit access from htpw - endgenerate - mux2 #(3) sizemux(Funct3M, PTWSize, SelPTW, Funct3MtoDCache); - - // this is for the d cache SRAM. - flop #(`PA_BITS) TranslationPAdrMReg(clk, TranslationPAdrE, TranslationPAdrM); // delay TranslationPAdrM by a cycle - - assign AtomicMtoDCache = SelPTW ? 2'b00 : AtomicM; - assign IEUAdrMExt = {2'b00, IEUAdrM}; - assign MemPAdrNoTranslate = SelPTW ? TranslationPAdrM : IEUAdrMExt[`PA_BITS-1:0]; - assign MemAdrE = SelPTW ? TranslationPAdrE[11:0] : IEUAdrE[11:0]; - assign StallWtoDCache = SelPTW ? 1'b0 : StallW; - // always block interrupts when using the hardware page table walker. - assign CommittedM = SelPTW ? 1'b1 : CommittedMfromDCache; - - // demux the inputs from LSU to walker or cpu's data port. - - // works without the demux 7/18/21 dh. Suggest deleting these and removing fromDCache suffix - assign DataMisalignedM = /*SelPTW ? 1'b0 : */DataMisalignedMfromDCache; - // *** need to rename DcacheStall and Datastall. - // not clear at all. I think it should be LSUStall from the LSU, - // which is demuxed to HPTWStall and CPUDataStall? (not sure on this last one). - //assign HPTWStall = SelPTW ? DCacheStall : 1'b1; - - assign PendingInterruptMtoDCache = SelPTW ? 1'b0 : PendingInterruptM; - - //assign LSUStall = SelPTW ? 1'b1 : DCacheStall; // *** this is probably going to change. - -endmodule diff --git a/wally-pipelined/src/mmu/hptw.sv b/wally-pipelined/src/mmu/hptw.sv index d2a5fa1ac..b436162f5 100644 --- a/wally-pipelined/src/mmu/hptw.sv +++ b/wally-pipelined/src/mmu/hptw.sv @@ -39,7 +39,6 @@ module hptw input logic [1:0] MemRWM, // 10 = read, 01 = write input logic [`XLEN-1:0] HPTWReadPTE, // page table entry from LSU input logic DCacheStall, // stall from LSU - input logic MemAfterIWalkDone, input logic AnyCPUReqM, output logic [`XLEN-1:0] PTE, // page table entry to TLBs output logic [1:0] PageType, // page type to TLBs @@ -54,7 +53,7 @@ module hptw L1_ADR, L1_RD, L2_ADR, L2_RD, L3_ADR, L3_RD, - LEAF, LEAF_DELAY, IDLE, FAULT} statetype; // *** placed outside generate statement to remove synthesis errors + LEAF, IDLE, FAULT} statetype; // *** placed outside generate statement to remove synthesis errors generate if (`MEM_VIRTMEM) begin @@ -198,10 +197,8 @@ module hptw else NextWalkerState = LEAF; // LEVEL0: if (ValidLeafPTE) NextWalkerState = LEAF; // else NextWalkerState = FAULT; - LEAF: if (DTLBWalk) NextWalkerState = IDLE; // updates TLB - else NextWalkerState = IDLE; - LEAF_DELAY: NextWalkerState = IDLE; // give time to allow address translation - FAULT: if (ITLBMissF & AnyCPUReqM & ~MemAfterIWalkDone) NextWalkerState = FAULT; + LEAF: NextWalkerState = IDLE; // updates TLB + FAULT: if (ITLBMissF & AnyCPUReqM) NextWalkerState = FAULT; else NextWalkerState = IDLE; default: begin // synthesis translate_off From 035ce999389e5f7ba2b4454ece2f25432f72ecae Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 21:36:54 -0600 Subject: [PATCH 33/38] Removed HPTWStall. Not needed as InterlockStall from the LSU provides the equivalent. --- wally-pipelined/src/mmu/hptw.sv | 2 -- 1 file changed, 2 deletions(-) diff --git a/wally-pipelined/src/mmu/hptw.sv b/wally-pipelined/src/mmu/hptw.sv index b436162f5..02de969fd 100644 --- a/wally-pipelined/src/mmu/hptw.sv +++ b/wally-pipelined/src/mmu/hptw.sv @@ -43,7 +43,6 @@ module hptw output logic [`XLEN-1:0] PTE, // page table entry to TLBs output logic [1:0] PageType, // page type to TLBs output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry - output logic HPTWStall, output logic [`PA_BITS-1:0] TranslationPAdr, output logic HPTWRead, // HPTW requesting to read memory output logic WalkerInstrPageFaultF, WalkerLoadPageFaultM,WalkerStorePageFaultM // faults @@ -99,7 +98,6 @@ module hptw // Enable and select signals based on states assign StartWalk = (WalkerState == IDLE) & TLBMiss; assign HPTWRead = (WalkerState == L3_RD) | (WalkerState == L2_RD) | (WalkerState == L1_RD) | (WalkerState == L0_RD); - assign HPTWStall = (WalkerState != IDLE) & (WalkerState != FAULT); assign DTLBWriteM = (WalkerState == LEAF) & DTLBWalk; assign ITLBWriteF = (WalkerState == LEAF) & ~DTLBWalk; From 2f5de7eb8229981beb30e2acbd7dc523907f5337 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 22:00:28 -0600 Subject: [PATCH 34/38] Hardware reductions in the lsu. --- wally-pipelined/src/lsu/lsu.sv | 37 ++++++++++----------------------- wally-pipelined/src/mmu/hptw.sv | 34 +++++++++++++++++------------- 2 files changed, 30 insertions(+), 41 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index ff35f5b0a..096b1b85f 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -101,6 +101,7 @@ module lsu logic DTLBWriteM; logic HPTWStall; logic [`PA_BITS-1:0] TranslationPAdr; + //logic [`PA_BITS-1:0] TranslationPAdrM; logic HPTWRead; logic [1:0] MemRWMtoDCache; logic [1:0] MemRWMtoLRSC; @@ -116,6 +117,8 @@ module lsu logic CacheableM; logic CacheableMtoDCache; logic SelPTW; + logic [2:0] HPTWSize; + logic CommittedMfromDCache; logic PendingInterruptMtoDCache; @@ -254,50 +257,32 @@ module lsu // *** add generate to conditionally create hptw, lsuArb, and mmu // based on `MEM_VIRTMEM - hptw hptw(.clk(clk), - .reset(reset), - .SATP_REGW(SATP_REGW), - .PCF(PCF), - .IEUAdrM(IEUAdrM), + hptw hptw(.clk, .reset, .SATP_REGW, .PCF, .IEUAdrM, .ITLBMissF(ITLBMissF & ~PendingInterruptM), .DTLBMissM(DTLBMissM & ~PendingInterruptM), - .MemRWM(MemRWM), - .PTE(PTE), - .PageType, - .ITLBWriteF(ITLBWriteF), - .DTLBWriteM(DTLBWriteM), + .MemRWM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM, .HPTWReadPTE(ReadDataM), - .DCacheStall(DCacheStall), - .TranslationPAdr, - .HPTWRead(HPTWRead), - .AnyCPUReqM, + .DCacheStall, .TranslationPAdr, .HPTWRead, .HPTWSize, .AnyCPUReqM, .WalkerInstrPageFaultF(WalkerInstrPageFaultRaw), - .WalkerLoadPageFaultM(WalkerLoadPageFaultM), - .WalkerStorePageFaultM(WalkerStorePageFaultM)); + .WalkerLoadPageFaultM, .WalkerStorePageFaultM); assign LSUStall = DCacheStall | InterlockStall; assign WalkerPageFaultM = WalkerStorePageFaultM | WalkerLoadPageFaultM; // arbiter between IEU and hptw - logic [2:0] PTWSize; - logic [`PA_BITS-1:0] TranslationPAdrM; - logic [`XLEN+1:0] IEUAdrMExt; // multiplex the outputs to LSU assign MemRWMtoLRSC = SelPTW ? {HPTWRead, 1'b0} : MemRWM; - generate - assign PTWSize = (`XLEN==32 ? 3'b010 : 3'b011); // 32 or 64-bit access from htpw - endgenerate - mux2 #(3) sizemux(Funct3M, PTWSize, SelPTW, Funct3MtoDCache); + mux2 #(3) sizemux(Funct3M, HPTWSize, SelPTW, Funct3MtoDCache); // this is for the d cache SRAM. - flop #(`PA_BITS) TranslationPAdrMReg(clk, TranslationPAdr, TranslationPAdrM); // delay TranslationPAdrM by a cycle + // turns out because we cannot pipeline hptw requests we don't need this register + //flop #(`PA_BITS) TranslationPAdrMReg(clk, TranslationPAdr, TranslationPAdrM); // delay TranslationPAdrM by a cycle assign AtomicMtoDCache = SelPTW ? 2'b00 : AtomicM; - assign IEUAdrMExt = {2'b00, IEUAdrM}; - assign MemPAdrNoTranslate = SelPTW ? TranslationPAdrM : IEUAdrMExt[`PA_BITS-1:0]; + assign MemPAdrNoTranslate = SelPTW ? TranslationPAdr : {2'b00, IEUAdrM}[`PA_BITS-1:0]; assign MemAdrE = SelPTW ? TranslationPAdr[11:0] : IEUAdrE[11:0]; assign StallWtoDCache = SelPTW ? 1'b0 : StallW; // always block interrupts when using the hardware page table walker. diff --git a/wally-pipelined/src/mmu/hptw.sv b/wally-pipelined/src/mmu/hptw.sv index 02de969fd..fa8043d83 100644 --- a/wally-pipelined/src/mmu/hptw.sv +++ b/wally-pipelined/src/mmu/hptw.sv @@ -32,20 +32,21 @@ module hptw ( - input logic clk, reset, - input logic [`XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table - input logic [`XLEN-1:0] PCF, IEUAdrM, // addresses to translate - input logic ITLBMissF, DTLBMissM, // TLB Miss - input logic [1:0] MemRWM, // 10 = read, 01 = write - input logic [`XLEN-1:0] HPTWReadPTE, // page table entry from LSU - input logic DCacheStall, // stall from LSU - input logic AnyCPUReqM, + input logic clk, reset, + input logic [`XLEN-1:0] SATP_REGW, // includes SATP.MODE to determine number of levels in page table + input logic [`XLEN-1:0] PCF, IEUAdrM, // addresses to translate + input logic ITLBMissF, DTLBMissM, // TLB Miss + input logic [1:0] MemRWM, // 10 = read, 01 = write + input logic [`XLEN-1:0] HPTWReadPTE, // page table entry from LSU + input logic DCacheStall, // stall from LSU + input logic AnyCPUReqM, output logic [`XLEN-1:0] PTE, // page table entry to TLBs - output logic [1:0] PageType, // page type to TLBs - output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry + output logic [1:0] PageType, // page type to TLBs + output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry output logic [`PA_BITS-1:0] TranslationPAdr, - output logic HPTWRead, // HPTW requesting to read memory - output logic WalkerInstrPageFaultF, WalkerLoadPageFaultM,WalkerStorePageFaultM // faults + output logic HPTWRead, // HPTW requesting to read memory + output logic [2:0] HPTWSize, // 32 or 64 bit access. + output logic WalkerInstrPageFaultF, WalkerLoadPageFaultM,WalkerStorePageFaultM // faults ); typedef enum {L0_ADR, L0_RD, @@ -123,7 +124,8 @@ module hptw logic [`PPN_BITS-1:0] PPN; assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; - assign TranslationPAdr = {PPN, VPN, 2'b00}; + assign TranslationPAdr = {PPN, VPN, 2'b00}; + assign HPTWSize = 3'b010; end else begin // RV64 logic [8:0] VPN; logic [`PPN_BITS-1:0] PPN; @@ -136,7 +138,8 @@ module hptw endcase assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) | (SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; - assign TranslationPAdr = {PPN, VPN, 3'b000}; + assign TranslationPAdr = {PPN, VPN, 3'b000}; + assign HPTWSize = 3'b011; end // Initial state and misalignment for RV32/64 @@ -208,7 +211,8 @@ module hptw end else begin // No Virtual memory supported; tie HPTW outputs to 0 assign HPTWRead = 0; assign WalkerInstrPageFaultF = 0; assign WalkerLoadPageFaultM = 0; assign WalkerStorePageFaultM = 0; - assign TranslationPAdr = 0; + assign TranslationPAdr = 0; + assign HPTWSize = 3'b000; end endgenerate endmodule From 9c2fc305073d4116a26a7f6c52d22ac31a7e5b24 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 22:21:03 -0600 Subject: [PATCH 35/38] Signal renames. --- wally-pipelined/src/cache/dcache.sv | 4 ++-- wally-pipelined/src/cache/dcachefsm.sv | 22 +++++++++++----------- wally-pipelined/src/lsu/lrsc.sv | 4 ++-- wally-pipelined/src/lsu/lsu.sv | 21 ++++++++++----------- wally-pipelined/src/mmu/hptw.sv | 12 ++++++------ 5 files changed, 31 insertions(+), 32 deletions(-) diff --git a/wally-pipelined/src/cache/dcache.sv b/wally-pipelined/src/cache/dcache.sv index 949e2eb94..1936705e5 100644 --- a/wally-pipelined/src/cache/dcache.sv +++ b/wally-pipelined/src/cache/dcache.sv @@ -28,7 +28,7 @@ module dcache (input logic clk, input logic reset, - input logic StallWtoDCache, + input logic CPUBusy, // cpu side input logic [1:0] MemRWM, @@ -346,7 +346,7 @@ module dcache .AtomicM, .ExceptionM, .PendingInterruptM, - .StallWtoDCache, + .CPUBusy, .CacheableM, .IgnoreRequest, .AHBAck, // from ahb diff --git a/wally-pipelined/src/cache/dcachefsm.sv b/wally-pipelined/src/cache/dcachefsm.sv index 607b747fa..eb191b9a8 100644 --- a/wally-pipelined/src/cache/dcachefsm.sv +++ b/wally-pipelined/src/cache/dcachefsm.sv @@ -35,7 +35,7 @@ module dcachefsm // hazard inputs input logic ExceptionM, input logic PendingInterruptM, - input logic StallWtoDCache, + input logic CPUBusy, input logic CacheableM, // hptw inputs input logic IgnoreRequest, @@ -182,7 +182,7 @@ module dcachefsm SelAdrM = 2'b10; DCacheStall = 1'b0; - if(StallWtoDCache) begin + if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; SelAdrM = 2'b10; end @@ -198,7 +198,7 @@ module dcachefsm DCacheStall = 1'b0; LRUWriteEn = 1'b1; - if(StallWtoDCache) begin + if(CPUBusy) begin NextState = STATE_CPU_BUSY; SelAdrM = 2'b10; end @@ -214,7 +214,7 @@ module dcachefsm SetDirty = 1'b1; LRUWriteEn = 1'b1; - if(StallWtoDCache) begin + if(CPUBusy) begin NextState = STATE_CPU_BUSY; SelAdrM = 2'b10; end @@ -307,7 +307,7 @@ module dcachefsm LRUWriteEn = 1'b0; if(&MemRWM & AtomicM[1]) begin // amo write SelAdrM = 2'b10; - if(StallWtoDCache) begin + if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; end else begin @@ -318,7 +318,7 @@ module dcachefsm end end else begin LRUWriteEn = 1'b1; - if(StallWtoDCache) begin + if(CPUBusy) begin NextState = STATE_CPU_BUSY; SelAdrM = 2'b10; end @@ -334,7 +334,7 @@ module dcachefsm SelAdrM = 2'b10; CommittedM = 1'b1; LRUWriteEn = 1'b1; - if(StallWtoDCache) begin + if(CPUBusy) begin NextState = STATE_CPU_BUSY; SelAdrM = 2'b10; end @@ -361,7 +361,7 @@ module dcachefsm STATE_CPU_BUSY: begin CommittedM = 1'b1; SelAdrM = 2'b00; - if(StallWtoDCache) begin + if(CPUBusy) begin NextState = STATE_CPU_BUSY; SelAdrM = 2'b10; end @@ -376,7 +376,7 @@ module dcachefsm SRAMWordWriteEnableM = 1'b0; SetDirty = 1'b0; LRUWriteEn = 1'b0; - if(StallWtoDCache) begin + if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; end else begin @@ -412,7 +412,7 @@ module dcachefsm STATE_UNCACHED_WRITE_DONE: begin CommittedM = 1'b1; SelAdrM = 2'b00; - if(StallWtoDCache) begin + if(CPUBusy) begin NextState = STATE_CPU_BUSY; SelAdrM = 2'b10; end @@ -425,7 +425,7 @@ module dcachefsm CommittedM = 1'b1; SelUncached = 1'b1; SelAdrM = 2'b00; - if(StallWtoDCache) begin + if(CPUBusy) begin NextState = STATE_CPU_BUSY; SelAdrM = 2'b10; end diff --git a/wally-pipelined/src/lsu/lrsc.sv b/wally-pipelined/src/lsu/lrsc.sv index 64bdbdd4d..a98b7dd5c 100644 --- a/wally-pipelined/src/lsu/lrsc.sv +++ b/wally-pipelined/src/lsu/lrsc.sv @@ -29,7 +29,7 @@ module lrsc ( input logic clk, reset, - input logic FlushW, StallWtoDCache, + input logic FlushW, CPUBusy, input logic MemReadM, input logic [1:0] MemRWMtoLRSC, output logic [1:0] MemRWMtoDCache, @@ -57,7 +57,7 @@ module lrsc end flopenrc #(`PA_BITS-2) resadrreg(clk, reset, FlushW, lrM, MemPAdrM[`PA_BITS-1:2], ReservationPAdrW); // could drop clear on this one but not valid flopenrc #(1) resvldreg(clk, reset, FlushW, lrM, ReservationValidM, ReservationValidW); - flopenrc #(1) squashreg(clk, reset, FlushW, ~StallWtoDCache, SquashSCM, SquashSCW); + flopenrc #(1) squashreg(clk, reset, FlushW, ~CPUBusy, SquashSCM, SquashSCW); end else begin // Atomic operations not supported assign SquashSCW = 0; assign MemRWMtoDCache = MemRWMtoLRSC; diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 096b1b85f..7b0f564e2 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -27,7 +27,6 @@ `include "wally-config.vh" -// *** Ross Thompson amo misalignment check? module lsu ( input logic clk, reset, @@ -100,8 +99,8 @@ module lsu logic DTLBMissM; logic DTLBWriteM; logic HPTWStall; - logic [`PA_BITS-1:0] TranslationPAdr; - //logic [`PA_BITS-1:0] TranslationPAdrM; + logic [`PA_BITS-1:0] HPTWAdr; + //logic [`PA_BITS-1:0] HPTWAdrM; logic HPTWRead; logic [1:0] MemRWMtoDCache; logic [1:0] MemRWMtoLRSC; @@ -109,7 +108,7 @@ module lsu logic [1:0] AtomicMtoDCache; logic [`PA_BITS-1:0] MemPAdrNoTranslate; logic [11:0] MemAdrE, MemAdrE_RENAME; - logic StallWtoDCache; + logic CPUBusy; logic MemReadM; logic DataMisalignedM; logic DCacheStall; @@ -262,7 +261,7 @@ module lsu .DTLBMissM(DTLBMissM & ~PendingInterruptM), .MemRWM, .PTE, .PageType, .ITLBWriteF, .DTLBWriteM, .HPTWReadPTE(ReadDataM), - .DCacheStall, .TranslationPAdr, .HPTWRead, .HPTWSize, .AnyCPUReqM, + .DCacheStall, .HPTWAdr, .HPTWRead, .HPTWSize, .AnyCPUReqM, .WalkerInstrPageFaultF(WalkerInstrPageFaultRaw), .WalkerLoadPageFaultM, .WalkerStorePageFaultM); @@ -279,12 +278,12 @@ module lsu // this is for the d cache SRAM. // turns out because we cannot pipeline hptw requests we don't need this register - //flop #(`PA_BITS) TranslationPAdrMReg(clk, TranslationPAdr, TranslationPAdrM); // delay TranslationPAdrM by a cycle + //flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle assign AtomicMtoDCache = SelPTW ? 2'b00 : AtomicM; - assign MemPAdrNoTranslate = SelPTW ? TranslationPAdr : {2'b00, IEUAdrM}[`PA_BITS-1:0]; - assign MemAdrE = SelPTW ? TranslationPAdr[11:0] : IEUAdrE[11:0]; - assign StallWtoDCache = SelPTW ? 1'b0 : StallW; + assign MemPAdrNoTranslate = SelPTW ? HPTWAdr : {2'b00, IEUAdrM}[`PA_BITS-1:0]; + assign MemAdrE = SelPTW ? HPTWAdr[11:0] : IEUAdrE[11:0]; + assign CPUBusy = SelPTW ? 1'b0 : StallW; // always block interrupts when using the hardware page table walker. assign CommittedM = SelPTW ? 1'b1 : CommittedMfromDCache; @@ -318,7 +317,7 @@ module lsu // Move generate from lrsc to outside this module. assign MemReadM = MemRWMtoLRSC[1] & ~(ExceptionM | PendingInterruptMtoDCache) & ~DTLBMissM; // & ~NonBusTrapM & ~DTLBMissM & CurrState != STATE_STALLED; - lrsc lrsc(.clk, .reset, .FlushW, .StallWtoDCache, .MemReadM, .MemRWMtoLRSC, .AtomicMtoDCache, .MemPAdrM, + lrsc lrsc(.clk, .reset, .FlushW, .CPUBusy, .MemReadM, .MemRWMtoLRSC, .AtomicMtoDCache, .MemPAdrM, .SquashSCW, .MemRWMtoDCache); // *** BUG, this is most likely wrong @@ -351,7 +350,7 @@ module lsu dcache dcache(.clk(clk), .reset(reset), - .StallWtoDCache(StallWtoDCache), + .CPUBusy(CPUBusy), .MemRWM(MemRWMtoDCache), .Funct3M(Funct3MtoDCache), .Funct7M(Funct7M), diff --git a/wally-pipelined/src/mmu/hptw.sv b/wally-pipelined/src/mmu/hptw.sv index fa8043d83..462ee7d38 100644 --- a/wally-pipelined/src/mmu/hptw.sv +++ b/wally-pipelined/src/mmu/hptw.sv @@ -5,7 +5,7 @@ // Modified: david_harris@hmc.edu 18 July 2021 cleanup and simplification // kmacsaigoren@hmc.edu 1 June 2021 // implemented SV48 on top of SV39. This included, adding a level of the FSM for the extra page number segment -// adding support for terapage encoding, and for setting the TranslationPAdr using the new level, +// adding support for terapage encoding, and for setting the HPTWAdr using the new level, // adding the internal SvMode signal // // Purpose: Page Table Walker @@ -43,7 +43,7 @@ module hptw output logic [`XLEN-1:0] PTE, // page table entry to TLBs output logic [1:0] PageType, // page type to TLBs output logic ITLBWriteF, DTLBWriteM, // write TLB with new entry - output logic [`PA_BITS-1:0] TranslationPAdr, + output logic [`PA_BITS-1:0] HPTWAdr, output logic HPTWRead, // HPTW requesting to read memory output logic [2:0] HPTWSize, // 32 or 64 bit access. output logic WalkerInstrPageFaultF, WalkerLoadPageFaultM,WalkerStorePageFaultM // faults @@ -118,13 +118,13 @@ module hptw default: NextPageType = PageType; endcase - // TranslationPAdr muxing + // HPTWAdr muxing if (`XLEN==32) begin // RV32 logic [9:0] VPN; logic [`PPN_BITS-1:0] PPN; assign VPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? TranslationVAdr[31:22] : TranslationVAdr[21:12]; // select VPN field based on HPTW state assign PPN = ((WalkerState == L1_ADR) | (WalkerState == L1_RD)) ? BasePageTablePPN : CurrentPPN; - assign TranslationPAdr = {PPN, VPN, 2'b00}; + assign HPTWAdr = {PPN, VPN, 2'b00}; assign HPTWSize = 3'b010; end else begin // RV64 logic [8:0] VPN; @@ -138,7 +138,7 @@ module hptw endcase assign PPN = ((WalkerState == L3_ADR) | (WalkerState == L3_RD) | (SvMode != `SV48 & ((WalkerState == L2_ADR) | (WalkerState == L2_RD)))) ? BasePageTablePPN : CurrentPPN; - assign TranslationPAdr = {PPN, VPN, 3'b000}; + assign HPTWAdr = {PPN, VPN, 3'b000}; assign HPTWSize = 3'b011; end @@ -211,7 +211,7 @@ module hptw end else begin // No Virtual memory supported; tie HPTW outputs to 0 assign HPTWRead = 0; assign WalkerInstrPageFaultF = 0; assign WalkerLoadPageFaultM = 0; assign WalkerStorePageFaultM = 0; - assign TranslationPAdr = 0; + assign HPTWAdr = 0; assign HPTWSize = 3'b000; end endgenerate From 82dd41a0fdfdf9841fa90aec4de0f4936c9b45ab Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 22:24:07 -0600 Subject: [PATCH 36/38] Rename of SelPTW to SelHPTW. --- wally-pipelined/regression/linux-wave.do | 2 +- wally-pipelined/regression/wave.do | 279 ++++++++++++----------- wally-pipelined/src/lsu/lsu.sv | 24 +- 3 files changed, 153 insertions(+), 152 deletions(-) diff --git a/wally-pipelined/regression/linux-wave.do b/wally-pipelined/regression/linux-wave.do index 4bf8bf7e6..60723d6d4 100644 --- a/wally-pipelined/regression/linux-wave.do +++ b/wally-pipelined/regression/linux-wave.do @@ -239,7 +239,7 @@ add wave -noupdate -expand -group AMO_ALU /testbench/dut/hart/lsu/dcache/genblk3 add wave -noupdate -expand -group AMO_ALU /testbench/dut/hart/lsu/dcache/genblk3/amoalu/srca add wave -noupdate -expand -group AMO_ALU /testbench/dut/hart/lsu/dcache/genblk3/amoalu/srcb add wave -noupdate -expand -group AMO_ALU /testbench/dut/hart/lsu/dcache/genblk3/amoalu/width -add wave -noupdate -expand -group lsu -group {LSU ARB} /testbench/dut/hart/lsu/arbiter/SelPTW +add wave -noupdate -expand -group lsu -group {LSU ARB} /testbench/dut/hart/lsu/arbiter/SelHPTW add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/hart/lsu/dcache/dcachefsm/CurrState add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/WalkerPageFaultM add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/WriteDataM diff --git a/wally-pipelined/regression/wave.do b/wally-pipelined/regression/wave.do index 3e19f65b3..133197081 100644 --- a/wally-pipelined/regression/wave.do +++ b/wally-pipelined/regression/wave.do @@ -4,7 +4,7 @@ add wave -noupdate /testbench/clk add wave -noupdate /testbench/reset add wave -noupdate /testbench/test add wave -noupdate /testbench/memfilename -add wave -noupdate /testbench/dut/hart/SATP_REGW +add wave -noupdate /testbench/dut/hart/lsu/SATP_REGW add wave -noupdate -group {Execution Stage} /testbench/dut/hart/ifu/PCE add wave -noupdate -group {Execution Stage} /testbench/InstrEName add wave -noupdate -group {Execution Stage} /testbench/dut/hart/ifu/InstrE @@ -15,41 +15,41 @@ add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/InstrM add wave -noupdate -expand -group {Memory Stage} /testbench/dut/hart/lsu/IEUAdrM add wave -noupdate /testbench/dut/hart/ieu/dp/ResultM add wave -noupdate /testbench/dut/hart/ieu/dp/ResultW -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/EcallFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/StorePageFaultM -add wave -noupdate -group HDU -group traps /testbench/dut/hart/priv/trap/InterruptM -add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/PendingIntsM -add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/CommittedM -add wave -noupdate -group HDU -group interrupts /testbench/dut/hart/priv/trap/InstrValidM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/StoreStallD -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/hzu/LSUStall -add wave -noupdate -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD -add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF -add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD -add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE -add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushM -add wave -noupdate -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushW -add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF -add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD -add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE -add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM -add wave -noupdate -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrMisalignedFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrAccessFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/IllegalInstrFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/BreakpointFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadMisalignedFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreMisalignedFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadAccessFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StoreAccessFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/EcallFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InstrPageFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/LoadPageFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/StorePageFaultM +add wave -noupdate -expand -group HDU -expand -group traps /testbench/dut/hart/priv/trap/InterruptM +add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/hart/priv/trap/PendingIntsM +add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/hart/priv/trap/CommittedM +add wave -noupdate -expand -group HDU -group interrupts /testbench/dut/hart/priv/trap/InstrValidM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/BPPredWrongE +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/CSRWritePendingDEM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/RetM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/TrapM +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LoadStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/StoreStallD +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/ICacheStallF +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/hzu/LSUStall +add wave -noupdate -expand -group HDU -expand -group hazards /testbench/dut/hart/MulDivStallD +add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/hzu/FlushF +add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushD +add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushE +add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushM +add wave -noupdate -expand -group HDU -expand -group Flush -color Yellow /testbench/dut/hart/FlushW +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallF +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallD +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallE +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallM +add wave -noupdate -expand -group HDU -expand -group Stall -color Orange /testbench/dut/hart/StallW add wave -noupdate -group Bpred -color Orange /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/GHR add wave -noupdate -group Bpred -expand -group {branch update selection inputs} /testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/BPPredF add wave -noupdate -group Bpred -expand -group {branch update selection inputs} {/testbench/dut/hart/ifu/bpred/bpred/Predictor/DirPredictor/InstrClassE[0]} @@ -103,22 +103,22 @@ add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/InstrD add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/InstrE add wave -noupdate -group {instruction pipeline} /testbench/dut/hart/ifu/InstrM add wave -noupdate -group {instruction pipeline} /testbench/InstrW -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredPCF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext0F -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext1F -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/SelBPPredF -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredWrongE -add wave -noupdate -group {PCNext Generation} /testbench/dut/hart/ifu/PrivilegedChangePCM +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNextF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCPlus2or4F +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredPCF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext0F +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PCNext1F +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/SelBPPredF +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/BPPredWrongE +add wave -noupdate -expand -group {PCNext Generation} /testbench/dut/hart/ifu/PrivilegedChangePCM add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ifu/InstrD add wave -noupdate -group {Decode Stage} /testbench/InstrDName add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/c/RegWriteD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/RdD add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs1D add wave -noupdate -group {Decode Stage} /testbench/dut/hart/ieu/dp/Rs2D -add wave -noupdate -group RegFile -expand /testbench/dut/hart/ieu/dp/regf/rf +add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/rf add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a1 add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a2 add wave -noupdate -group RegFile /testbench/dut/hart/ieu/dp/regf/a3 @@ -151,12 +151,12 @@ add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/Write add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/ALUResultE add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/SrcAE add wave -noupdate -group {alu execution stage} /testbench/dut/hart/ieu/dp/SrcBE -add wave -noupdate -group PCS /testbench/dut/hart/ifu/PCNextF -add wave -noupdate -group PCS /testbench/dut/hart/PCF -add wave -noupdate -group PCS /testbench/dut/hart/ifu/PCD -add wave -noupdate -group PCS /testbench/dut/hart/PCE -add wave -noupdate -group PCS /testbench/dut/hart/PCM -add wave -noupdate -group PCS /testbench/PCW +add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCNextF +add wave -noupdate -expand -group PCS /testbench/dut/hart/PCF +add wave -noupdate -expand -group PCS /testbench/dut/hart/ifu/PCD +add wave -noupdate -expand -group PCS /testbench/dut/hart/PCE +add wave -noupdate -expand -group PCS /testbench/dut/hart/PCM +add wave -noupdate -expand -group PCS /testbench/PCW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/ForwardedSrcAE add wave -noupdate -group muldiv /testbench/dut/hart/mdu/ForwardedSrcBE add wave -noupdate -group muldiv /testbench/dut/hart/mdu/Funct3E @@ -168,86 +168,86 @@ add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushM add wave -noupdate -group muldiv /testbench/dut/hart/mdu/FlushW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/MulDivResultW add wave -noupdate -group muldiv /testbench/dut/hart/mdu/DivBusyE -add wave -noupdate -group icache -color Gold /testbench/dut/hart/ifu/icache/controller/CurrState -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/BasePAdrF -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/WayHit -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/BlockReplacementBits -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/EncVicWay -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/VictimWay -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/SetValid} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[0]/CacheTagMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/ValidBits} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteWordEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[1]/CacheTagMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/ValidBits} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/SetValid} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[2]/CacheTagMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/ValidBits} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/SetValid} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[3]/CacheTagMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/DirtyBits} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/ValidBits} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/StoredData} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/WriteEnable} -add wave -noupdate -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/StoredData} -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/controller/NextState -add wave -noupdate -group icache /testbench/dut/hart/ifu/ITLBMissF -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ITLBWriteF -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ReadLineF -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/ReadLineF -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/BasePAdrF -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn -add wave -noupdate -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/SelAdr -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/RAdr -add wave -noupdate -group icache /testbench/dut/hart/ifu/icache/FinalInstrRawF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrPAdrF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrInF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/FetchCount -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable -add wave -noupdate -group icache -expand -group memory /testbench/dut/hart/ifu/icache/ICacheMemWriteData +add wave -noupdate -expand -group icache -color Gold /testbench/dut/hart/ifu/icache/controller/CurrState +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/BasePAdrF +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/WayHit +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/BlockReplacementBits +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/genblk1/cachereplacementpolicy/EncVicWay +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/VictimWay +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/SetValid} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[0]/CacheTagMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 {/testbench/dut/hart/ifu/icache/MemWay[0]/ValidBits} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -expand -group Way0Word0 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word1 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word2 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way0 -group Way0Word3 {/testbench/dut/hart/ifu/icache/MemWay[0]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/WriteWordEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[1]/CacheTagMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 {/testbench/dut/hart/ifu/icache/MemWay[1]/ValidBits} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -expand -group Way1Word0 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word1 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word2 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way1 -group Way1Word3 {/testbench/dut/hart/ifu/icache/MemWay[1]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/SetValid} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[2]/CacheTagMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 {/testbench/dut/hart/ifu/icache/MemWay[2]/ValidBits} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -expand -group Way2Word0 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word1 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word2 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -group way2 -group Way2Word3 {/testbench/dut/hart/ifu/icache/MemWay[2]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/SetValid} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -label TAG {/testbench/dut/hart/ifu/icache/MemWay[3]/CacheTagMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/DirtyBits} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 {/testbench/dut/hart/ifu/icache/MemWay[3]/ValidBits} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -expand -group Way3Word0 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[0]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word1 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[1]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word2 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[2]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/WriteEnable} +add wave -noupdate -expand -group icache -group {Cache SRAM writes} -expand -group way3 -group Way3Word3 {/testbench/dut/hart/ifu/icache/MemWay[3]/word[3]/CacheDataMem/StoredData} +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/controller/NextState +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/ITLBMissF +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/ITLBWriteF +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/ReadLineF +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/ReadLineF +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/BasePAdrF +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/hit +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spill +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/ICacheStallF +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/spillSave +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntReset +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/PreCntEn +add wave -noupdate -expand -group icache -expand -group {fsm out and control} /testbench/dut/hart/ifu/icache/controller/CntEn +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/SelAdr +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/RAdr +add wave -noupdate -expand -group icache /testbench/dut/hart/ifu/icache/FinalInstrRawF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrPAdrF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/InstrInF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/FetchCountFlag +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/FetchCount +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrReadF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/InstrAckF +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/controller/ICacheMemWriteEnable +add wave -noupdate -expand -group icache -expand -group memory /testbench/dut/hart/ifu/icache/ICacheMemWriteData add wave -noupdate -group AHB -color Gold /testbench/dut/hart/ebu/BusState add wave -noupdate -group AHB /testbench/dut/hart/ebu/NextBusState add wave -noupdate -group AHB -expand -group {input requests} /testbench/dut/hart/ebu/AtomicMaskedM @@ -269,8 +269,10 @@ add wave -noupdate -group AHB /testbench/dut/hart/ebu/HMASTLOCK add wave -noupdate -group AHB /testbench/dut/hart/ebu/HADDRD add wave -noupdate -group AHB /testbench/dut/hart/ebu/HSIZED add wave -noupdate -group AHB /testbench/dut/hart/ebu/HWRITED +add wave -noupdate -expand -group lsu -color Gold /testbench/dut/hart/lsu/CurrState +add wave -noupdate -expand -group lsu /testbench/dut/hart/lsu/InterlockStall add wave -noupdate -expand -group lsu /testbench/dut/hart/hzu/LSUStall -add wave -noupdate -expand -group lsu -expand -group {LSU ARB} /testbench/dut/hart/lsu/arbiter/SelPTW +add wave -noupdate -expand -group lsu -expand -group {LSU ARB} /testbench/dut/hart/lsu/arbiter/SelHPTW add wave -noupdate -expand -group lsu -expand -group dcache -color Gold /testbench/dut/hart/lsu/dcache/dcachefsm/CurrState add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/WriteDataM add wave -noupdate -expand -group lsu -expand -group dcache /testbench/dut/hart/lsu/dcache/SRAMBlockWriteEnableM @@ -422,7 +424,8 @@ add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group typ add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/WalkerInstrPageFaultF add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/WalkerLoadPageFaultM add wave -noupdate -expand -group lsu -expand -group ptwalker -expand -group types /testbench/dut/hart/lsu/hptw/WalkerStorePageFaultM -add wave -noupdate -group csr /testbench/dut/hart/priv/csr/MIP_REGW +add wave -noupdate -expand -group csr /testbench/dut/hart/priv/csr/MIP_REGW +add wave -noupdate -expand -group csr /testbench/dut/hart/priv/csr/genblk1/csrm/MEPC_REGW add wave -noupdate -group itlb /testbench/dut/hart/ifu/immu/TLBWrite add wave -noupdate -group itlb /testbench/dut/hart/ifu/ITLBMissF add wave -noupdate -group itlb /testbench/dut/hart/ifu/immu/PhysicalAddress @@ -496,8 +499,6 @@ add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HSELUART add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HADDR add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWRITE add wave -noupdate -group UART /testbench/dut/uncore/uart/uart/HWDATA -add wave -noupdate /testbench/dut/hart/lsu/CurrState -add wave -noupdate /testbench/dut/hart/lsu/InterlockStall add wave -noupdate /testbench/dut/hart/ifu/icache/PCNextF add wave -noupdate /testbench/dut/hart/ifu/icache/PCPF add wave -noupdate /testbench/dut/hart/lsu/WalkerInstrPageFaultF @@ -506,8 +507,8 @@ add wave -noupdate /testbench/dut/hart/lsu/dcache/RAdr add wave -noupdate /testbench/dut/hart/lsu/dcache/SelAdrM add wave -noupdate /testbench/dut/hart/lsu/SelReplayCPURequest TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 6} {24475 ns} 1} {{Cursor 2} {22501 ns} 1} {{Cursor 3} {3615 ns} 0} -quietly wave cursor active 3 +WaveRestoreCursors {{Cursor 6} {26646 ns} 1} {{Cursor 2} {70866 ns} 0} {{Cursor 3} {24171 ns} 1} +quietly wave cursor active 2 configure wave -namecolwidth 250 configure wave -valuecolwidth 297 configure wave -justifyvalue left @@ -522,4 +523,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {3453 ns} {3729 ns} +WaveRestoreZoom {70741 ns} {70973 ns} diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 7b0f564e2..47dda8aae 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -115,7 +115,7 @@ module lsu logic CacheableM; logic CacheableMtoDCache; - logic SelPTW; + logic SelHPTW; logic [2:0] HPTWSize; @@ -245,7 +245,7 @@ module lsu // When replaying CPU memory request after PTW select the IEUAdrM for correct address. assign SelReplayCPURequest = NextState == STATE_T0_REPLAY; - assign SelPTW = (CurrState == STATE_T3_DTLB_MISS) | (CurrState == STATE_T4_ITLB_MISS) | + assign SelHPTW = (CurrState == STATE_T3_DTLB_MISS) | (CurrState == STATE_T4_ITLB_MISS) | (CurrState == STATE_T5_ITLB_MISS) | (CurrState == STATE_T7_DITLB_MISS); assign IgnoreRequest = CurrState == STATE_T0_READY & (ITLBMissF | DTLBMissM); @@ -272,28 +272,28 @@ module lsu // arbiter between IEU and hptw // multiplex the outputs to LSU - assign MemRWMtoLRSC = SelPTW ? {HPTWRead, 1'b0} : MemRWM; + assign MemRWMtoLRSC = SelHPTW ? {HPTWRead, 1'b0} : MemRWM; - mux2 #(3) sizemux(Funct3M, HPTWSize, SelPTW, Funct3MtoDCache); + mux2 #(3) sizemux(Funct3M, HPTWSize, SelHPTW, Funct3MtoDCache); // this is for the d cache SRAM. // turns out because we cannot pipeline hptw requests we don't need this register //flop #(`PA_BITS) HPTWAdrMReg(clk, HPTWAdr, HPTWAdrM); // delay HPTWAdrM by a cycle - assign AtomicMtoDCache = SelPTW ? 2'b00 : AtomicM; - assign MemPAdrNoTranslate = SelPTW ? HPTWAdr : {2'b00, IEUAdrM}[`PA_BITS-1:0]; - assign MemAdrE = SelPTW ? HPTWAdr[11:0] : IEUAdrE[11:0]; - assign CPUBusy = SelPTW ? 1'b0 : StallW; + assign AtomicMtoDCache = SelHPTW ? 2'b00 : AtomicM; + assign MemPAdrNoTranslate = SelHPTW ? HPTWAdr : {2'b00, IEUAdrM}[`PA_BITS-1:0]; + assign MemAdrE = SelHPTW ? HPTWAdr[11:0] : IEUAdrE[11:0]; + assign CPUBusy = SelHPTW ? 1'b0 : StallW; // always block interrupts when using the hardware page table walker. - assign CommittedM = SelPTW ? 1'b1 : CommittedMfromDCache; + assign CommittedM = SelHPTW ? 1'b1 : CommittedMfromDCache; - assign PendingInterruptMtoDCache = SelPTW ? 1'b0 : PendingInterruptM; + assign PendingInterruptMtoDCache = SelHPTW ? 1'b0 : PendingInterruptM; mmu #(.TLB_ENTRIES(`DTLB_ENTRIES), .IMMU(0)) dmmu(.clk, .reset, .SATP_REGW, .STATUS_MXR, .STATUS_SUM, .STATUS_MPRV, .STATUS_MPP, - .PrivilegeModeW, .DisableTranslation(SelPTW), + .PrivilegeModeW, .DisableTranslation(SelHPTW), .PAdr(MemPAdrNoTranslate), .VAdr(IEUAdrM), .Size(Funct3MtoDCache[1:0]), @@ -321,7 +321,7 @@ module lsu .SquashSCW, .MemRWMtoDCache); // *** BUG, this is most likely wrong - assign CacheableMtoDCache = SelPTW ? 1'b1 : CacheableM; + assign CacheableMtoDCache = SelHPTW ? 1'b1 : CacheableM; // Specify which type of page fault is occurring From 533c2f355680ec277146884c87826b1738b789bc Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 22:41:34 -0600 Subject: [PATCH 37/38] Remove verbosity from lsu state machine. --- wally-pipelined/src/lsu/lsu.sv | 99 ++++++++++------------------------ 1 file changed, 27 insertions(+), 72 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index 47dda8aae..f10154dc3 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -150,76 +150,30 @@ module lsu always_comb begin case(CurrState) - STATE_T0_READY: begin - if(~ITLBMissF & DTLBMissM & AnyCPUReqM) begin - NextState = STATE_T3_DTLB_MISS; - end - else if(ITLBMissF & ~DTLBMissM & ~AnyCPUReqM) begin - NextState = STATE_T4_ITLB_MISS; - end - else if(ITLBMissF & ~DTLBMissM & AnyCPUReqM) begin - NextState = STATE_T5_ITLB_MISS; - end - else if(ITLBMissF & DTLBMissM & AnyCPUReqM) begin - NextState = STATE_T7_DITLB_MISS; - end else begin - NextState = STATE_T0_READY; - end - end - STATE_T0_REPLAY: begin - if(DCacheStall) begin - NextState = STATE_T0_REPLAY; - end else begin - NextState = STATE_T0_READY; - end - end - STATE_T3_DTLB_MISS: begin - if(WalkerLoadPageFaultM | WalkerStorePageFaultM) begin - NextState = STATE_T0_READY; - end else if(DTLBWriteM) begin - NextState = STATE_T0_REPLAY; - end else begin - NextState = STATE_T3_DTLB_MISS; - end - end - STATE_T4_ITLB_MISS: begin - if(WalkerInstrPageFaultRaw | ITLBWriteF) begin - NextState = STATE_T0_READY; - end else begin - NextState = STATE_T4_ITLB_MISS; - end - end - STATE_T5_ITLB_MISS: begin - if(ITLBWriteF) begin - NextState = STATE_T0_REPLAY; - end else if(WalkerInstrPageFaultRaw) begin - NextState = STATE_T0_FAULT_REPLAY; - end else begin - NextState = STATE_T5_ITLB_MISS; - end - end - STATE_T0_FAULT_REPLAY: begin - if(DCacheStall) begin - NextState = STATE_T0_FAULT_REPLAY; - end else begin - NextState = STATE_T0_READY; - end - end - STATE_T7_DITLB_MISS: begin - if(WalkerStorePageFaultM | WalkerLoadPageFaultM) begin - NextState = STATE_T0_READY; - end else if(DTLBWriteM) begin - NextState = STATE_T5_ITLB_MISS; - end else begin - NextState = STATE_T7_DITLB_MISS; - end - end - default: begin - NextState = STATE_T0_READY; - end + STATE_T0_READY: if(~ITLBMissF & DTLBMissM & AnyCPUReqM) NextState = STATE_T3_DTLB_MISS; + else if(ITLBMissF & ~DTLBMissM & ~AnyCPUReqM) NextState = STATE_T4_ITLB_MISS; + else if(ITLBMissF & ~DTLBMissM & AnyCPUReqM) NextState = STATE_T5_ITLB_MISS; + else if(ITLBMissF & DTLBMissM & AnyCPUReqM) NextState = STATE_T7_DITLB_MISS; + else NextState = STATE_T0_READY; + STATE_T0_REPLAY: if(DCacheStall) NextState = STATE_T0_REPLAY; + else NextState = STATE_T0_READY; + STATE_T3_DTLB_MISS: if(WalkerLoadPageFaultM | WalkerStorePageFaultM) NextState = STATE_T0_READY; + else if(DTLBWriteM) NextState = STATE_T0_REPLAY; + else NextState = STATE_T3_DTLB_MISS; + STATE_T4_ITLB_MISS: if(WalkerInstrPageFaultRaw | ITLBWriteF) NextState = STATE_T0_READY; + else NextState = STATE_T4_ITLB_MISS; + STATE_T5_ITLB_MISS: if(ITLBWriteF) NextState = STATE_T0_REPLAY; + else if(WalkerInstrPageFaultRaw) NextState = STATE_T0_FAULT_REPLAY; + else NextState = STATE_T5_ITLB_MISS; + STATE_T0_FAULT_REPLAY: if(DCacheStall) NextState = STATE_T0_FAULT_REPLAY; + else NextState = STATE_T0_READY; + STATE_T7_DITLB_MISS: if(WalkerStorePageFaultM | WalkerLoadPageFaultM) NextState = STATE_T0_READY; + else if(DTLBWriteM) NextState = STATE_T5_ITLB_MISS; + else NextState = STATE_T7_DITLB_MISS; + default: NextState = STATE_T0_READY; endcase end // always_comb - + // signal to CPU it needs to wait on HPTW. /* -----\/----- EXCLUDED -----\/----- // this code has a problem with imperas64mmu as it reads in an invalid uninitalized instruction. InterlockStall becomes x and it propagates @@ -330,12 +284,13 @@ module lsu assign DTLBStorePageFaultM = DTLBPageFaultM & MemRWMtoLRSC[0]; // Determine if an Unaligned access is taking place + // hptw guarantees alignment, only check inputs from IEU. always_comb - case(Funct3MtoDCache[1:0]) + case(Funct3M[1:0]) 2'b00: DataMisalignedM = 0; // lb, sb, lbu - 2'b01: DataMisalignedM = MemPAdrNoTranslate[0]; // lh, sh, lhu - 2'b10: DataMisalignedM = MemPAdrNoTranslate[1] | MemPAdrNoTranslate[0]; // lw, sw, flw, fsw, lwu - 2'b11: DataMisalignedM = |MemPAdrNoTranslate[2:0]; // ld, sd, fld, fsd + 2'b01: DataMisalignedM = IEUAdrM[0]; // lh, sh, lhu + 2'b10: DataMisalignedM = IEUAdrM[1] | IEUAdrM[0]; // lw, sw, flw, fsw, lwu + 2'b11: DataMisalignedM = |IEUAdrM[2:0]; // ld, sd, fld, fsd endcase // Determine if address is valid From b261b18aa89310fe33dc9ccf23ec87daa3287097 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 19 Dec 2021 22:47:48 -0600 Subject: [PATCH 38/38] More signal name cleanup in LSU. --- wally-pipelined/src/lsu/lsu.sv | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/wally-pipelined/src/lsu/lsu.sv b/wally-pipelined/src/lsu/lsu.sv index f10154dc3..77e07741e 100644 --- a/wally-pipelined/src/lsu/lsu.sv +++ b/wally-pipelined/src/lsu/lsu.sv @@ -92,15 +92,12 @@ module lsu ); logic DTLBPageFaultM; - logic [`PA_BITS-1:0] MemPAdrM; // from mmu to dcache - logic DTLBMissM; logic DTLBWriteM; logic HPTWStall; logic [`PA_BITS-1:0] HPTWAdr; - //logic [`PA_BITS-1:0] HPTWAdrM; logic HPTWRead; logic [1:0] MemRWMtoDCache; logic [1:0] MemRWMtoLRSC; @@ -121,7 +118,6 @@ module lsu logic CommittedMfromDCache; logic PendingInterruptMtoDCache; - // logic FlushWtoDCache; logic WalkerPageFaultM; logic AnyCPUReqM; @@ -251,21 +247,19 @@ module lsu .PAdr(MemPAdrNoTranslate), .VAdr(IEUAdrM), .Size(Funct3MtoDCache[1:0]), - .PTE(PTE), + .PTE, .PageTypeWriteVal(PageType), .TLBWrite(DTLBWriteM), .TLBFlush(DTLBFlushM), .PhysicalAddress(MemPAdrM), .TLBMiss(DTLBMissM), .Cacheable(CacheableM), - .Idempotent(), - .AtomicAllowed(), + .Idempotent(), .AtomicAllowed(), .TLBPageFault(DTLBPageFaultM), .InstrAccessFaultF(), .LoadAccessFaultM, .StoreAccessFaultM, .AtomicAccessM(1'b0), .ExecuteAccessF(1'b0), .WriteAccessM(MemRWMtoLRSC[0]), .ReadAccessM(MemRWMtoLRSC[1]), .PMPCFG_ARRAY_REGW, .PMPADDR_ARRAY_REGW - //.AtomicAccessM(AtomicMaskedM[1]), ); // *** the pma/pmp instruction access faults don't really matter here. is it possible to parameterize which outputs exist? @@ -303,25 +297,17 @@ module lsu // 3. wire pass-through assign MemAdrE_RENAME = SelReplayCPURequest ? IEUAdrM[11:0] : MemAdrE[11:0]; - dcache dcache(.clk(clk), - .reset(reset), - .CPUBusy(CPUBusy), + dcache dcache(.clk, .reset, .CPUBusy, .MemRWM(MemRWMtoDCache), .Funct3M(Funct3MtoDCache), - .Funct7M(Funct7M), - .FlushDCacheM, + .Funct7M, .FlushDCacheM, .AtomicM(AtomicMtoDCache), .MemAdrE(MemAdrE_RENAME), - .MemPAdrM(MemPAdrM), + .MemPAdrM, .VAdr(IEUAdrM[11:0]), // this will be removed once the dcache hptw interlock is removed. - .WriteDataM(WriteDataM), - .ReadDataM(ReadDataM), - .DCacheStall(DCacheStall), + .WriteDataM, .ReadDataM, .DCacheStall, .CommittedM(CommittedMfromDCache), - .DCacheMiss, - .DCacheAccess, - .ExceptionM(ExceptionM), - .IgnoreRequest, + .DCacheMiss, .DCacheAccess, .ExceptionM, .IgnoreRequest, .PendingInterruptM(PendingInterruptMtoDCache), .CacheableM(CacheableMtoDCache),