From 33110ed6363bc3a692e9a0255139b0fff334ce16 Mon Sep 17 00:00:00 2001 From: David Harris Date: Sun, 7 Feb 2021 23:21:55 -0500 Subject: [PATCH] Data memory bus integration --- wally-pipelined/config/rv64ic/wally-config.vh | 2 +- wally-pipelined/regression/wally-pipelined.do | 9 +- wally-pipelined/src/dmem/dmem.sv | 17 ++-- wally-pipelined/src/ebu/ahblite.sv | 93 +++++++++++++++---- wally-pipelined/src/ebu/subwordread.sv | 20 ++-- wally-pipelined/src/hazard/hazard.sv | 38 +++++--- wally-pipelined/src/ieu/controller.sv | 12 +-- wally-pipelined/src/ieu/datapath.sv | 30 +++--- wally-pipelined/src/ieu/forward.sv | 2 +- wally-pipelined/src/ieu/ieu.sv | 3 +- wally-pipelined/src/ifu/ifu.sv | 33 ++++--- wally-pipelined/src/uncore/dtim.sv | 52 ++++++++--- wally-pipelined/src/uncore/subwordwrite.sv | 33 ++++--- wally-pipelined/src/uncore/uartPC16550D.sv | 1 + wally-pipelined/src/uncore/uncore.sv | 6 +- .../src/wally/wallypipelinedhart.sv | 39 +++++--- .../src/wally/wallypipelinedsoc.sv | 3 + .../testbench/testbench-imperas.sv | 11 ++- 18 files changed, 267 insertions(+), 137 deletions(-) diff --git a/wally-pipelined/config/rv64ic/wally-config.vh b/wally-pipelined/config/rv64ic/wally-config.vh index eb340ddc2..8ab42cdd3 100644 --- a/wally-pipelined/config/rv64ic/wally-config.vh +++ b/wally-pipelined/config/rv64ic/wally-config.vh @@ -61,7 +61,7 @@ // Bus Interface width `define AHBW 64 -// Peripheral Addresses +// Peripheral Physiccal Addresses // Peripheral memory space extends from BASE to BASE+RANGE // Range should be a thermometer code with 0's in the upper bits and 1s in the lower bits diff --git a/wally-pipelined/regression/wally-pipelined.do b/wally-pipelined/regression/wally-pipelined.do index b7f41535d..a88569f49 100644 --- a/wally-pipelined/regression/wally-pipelined.do +++ b/wally-pipelined/regression/wally-pipelined.do @@ -45,11 +45,14 @@ view wave add wave /testbench/clk add wave /testbench/reset add wave -divider -add wave /testbench/dut/hart/ebu/IReadF +#add wave /testbench/dut/hart/ebu/IReadF add wave /testbench/dut/hart/DataStall add wave /testbench/dut/hart/InstrStall add wave /testbench/dut/hart/StallF add wave /testbench/dut/hart/StallD +add wave /testbench/dut/hart/StallE +add wave /testbench/dut/hart/StallM +add wave /testbench/dut/hart/StallW add wave /testbench/dut/hart/FlushD add wave /testbench/dut/hart/FlushE add wave /testbench/dut/hart/FlushM @@ -101,6 +104,6 @@ configure wave -childrowmargin 2 set DefaultRadix hexadecimal -- Run the Simulation -#run 1000 -run -all +run 2000 +#run -all #quit diff --git a/wally-pipelined/src/dmem/dmem.sv b/wally-pipelined/src/dmem/dmem.sv index 4c602659f..aab74fadc 100644 --- a/wally-pipelined/src/dmem/dmem.sv +++ b/wally-pipelined/src/dmem/dmem.sv @@ -30,19 +30,19 @@ module dmem ( input logic clk, reset, input logic FlushW, - output logic DataStall, + //output logic DataStall, // Memory Stage input logic [1:0] MemRWM, input logic [`XLEN-1:0] MemAdrM, input logic [2:0] Funct3M, - input logic [`XLEN-1:0] ReadDataM, + //input logic [`XLEN-1:0] ReadDataW, input logic [`XLEN-1:0] WriteDataM, output logic [`XLEN-1:0] MemPAdrM, - output logic [1:0] MemRWAlignedM, + output logic MemReadM, MemWriteM, output logic DataMisalignedM, // Writeback Stage input logic MemAckW, - output logic [`XLEN-1:0] ReadDataW, + input logic [`XLEN-1:0] ReadDataW, // faults input logic DataAccessFaultM, output logic LoadMisalignedFaultM, LoadAccessFaultM, @@ -52,9 +52,6 @@ module dmem ( // Initially no MMU assign MemPAdrM = MemAdrM; - // Pipeline register *** AHB data will eventually come back in W anyway - floprc #(`XLEN) ReadDataWReg(clk, reset, FlushW, ReadDataM, ReadDataW); - // Determine if an Unaligned access is taking place always_comb case(Funct3M[1:0]) @@ -66,7 +63,9 @@ module dmem ( // Squash unaligned data accesses // *** this is also the place to squash if the cache is hit - assign MemRWAlignedM = MemRWM & {2{~DataMisalignedM}}; + assign MemReadM = MemRWM[1] & ~DataMisalignedM; + assign MemWriteM = MemRWM[0] & ~DataMisalignedM; +// assign MemRWAlignedM = MemRWM & {2{~DataMisalignedM}}; // Determine if address is valid assign LoadMisalignedFaultM = DataMisalignedM & MemRWM[1]; @@ -75,7 +74,7 @@ module dmem ( assign StoreAccessFaultM = DataAccessFaultM & MemRWM[0]; // Data stall - assign DataStall = 0; + //assign DataStall = 0; endmodule diff --git a/wally-pipelined/src/ebu/ahblite.sv b/wally-pipelined/src/ebu/ahblite.sv index 8ce17545f..9d4e62be7 100644 --- a/wally-pipelined/src/ebu/ahblite.sv +++ b/wally-pipelined/src/ebu/ahblite.sv @@ -36,16 +36,16 @@ module ahblite ( input logic UnsignedLoadM, // Signals from Instruction Cache input logic [`XLEN-1:0] InstrPAdrF, // *** rename these to match block diagram - input logic IReadF, - output logic [`XLEN-1:0] IRData, + input logic InstrReadF, + output logic [31:0] InstrRData, // output logic IReady, // Signals from Data Cache input logic [`XLEN-1:0] MemPAdrM, - input logic DReadM, DWriteM, + input logic MemReadM, MemWriteM, input logic [`XLEN-1:0] WriteDataM, - input logic [1:0] DSizeM, + input logic [1:0] MemSizeM, // Return from bus - output logic [`XLEN-1:0] DRData, + output logic [`XLEN-1:0] ReadDataW, // output logic DReady, // AHB-Lite external signals input logic [`AHBW-1:0] HRDATA, @@ -59,49 +59,108 @@ module ahblite ( output logic [3:0] HPROT, output logic [1:0] HTRANS, output logic HMASTLOCK, + // Delayed signals for subword write + output logic [2:0] HADDRD, + output logic [3:0] HSIZED, + output logic HWRITED, // Acknowledge - output logic InstrAckD, MemAckW + output logic InstrAckD, MemAckW, // Stalls -// output logic InstrStall, DataStall + output logic InstrStall, DataStall ); logic GrantData; logic [2:0] ISize; logic [`AHBW-1:0] HRDATAMasked; logic IReady, DReady; +// logic [3:0] HSIZED; // size delayed by one cycle for reads +// logic [2:0] HADDRD; // address delayed for subword reads assign HCLK = clk; assign HRESETn = ~reset; // Arbitrate requests by giving data priority over instructions - assign GrantData = DReadM | DWriteM; + assign GrantData = MemReadM | MemWriteM; // *** initially support HABW = XLEN + // track bus state + typedef enum {IDLE, MEMREAD, MEMWRITE, INSTRREAD} statetype; + statetype AdrState, DataState, NextAdrState; // what is happening in the first and second phases of the bus + always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) begin + AdrState <= IDLE; DataState <= IDLE; + HWDATA <= 0; // unnecessary but avoids x at startup + HSIZED <= 0; + HADDRD <= 0; + HWRITED <= 0; + end else begin + if (HREADY || (DataState == IDLE)) begin // only advance bus state if bus is idle or previous transaction returns ready + DataState <= AdrState; + AdrState <= NextAdrState; + if (HWRITE) HWDATA <= WriteDataM; + HSIZED <= {UnsignedLoadM, HSIZE}; + HADDRD <= HADDR[2:0]; + HWRITED <= HWRITE; + end + end + always_comb + if (MemReadM) NextAdrState = MEMREAD; + else if (MemWriteM) NextAdrState = MEMWRITE; + else if (InstrReadF) NextAdrState = INSTRREAD; + else NextAdrState = IDLE; + + // Generate acknowledges based on bus state and ready + assign MemAckW = (AdrState == MEMREAD || AdrState == MEMWRITE) && HREADY; + assign InstrAckD = (AdrState == INSTRREAD) && HREADY; + // Choose ISize based on XLen generate - if (`AHBW == 32) assign ISize = 3'b010; // 32-bit transfers - else assign ISize = 3'b011; // 64-bit transfers + //if (`AHBW == 32) assign ISize = 3'b010; // 32-bit transfers + //else assign ISize = 3'b011; // 64-bit transfers + assign ISize = 3'b010; // 32 bit instructions for now; later improve for filling cache with full width endgenerate // drive bus outputs assign HADDR = GrantData ? MemPAdrM[31:0] : InstrPAdrF[31:0]; - assign HWDATA = WriteDataM; + //assign HWDATA = WriteDataW; //flop #(`XLEN) wdreg(HCLK, DWDataM, HWDATA); // delay HWDATA by 1 cycle per spec; *** assumes AHBW = XLEN - assign HWRITE = DWriteM; - assign HSIZE = GrantData ? {1'b0, DSizeM} : ISize; + assign HWRITE = MemWriteM; + assign HSIZE = GrantData ? {1'b0, MemSizeM} : ISize; assign HBURST = 3'b000; // Single burst only supported; consider generalizing for cache fillsfHPROT assign HPROT = 4'b0011; // not used; see Section 3.7 - assign HTRANS = IReadF | DReadM | DWriteM ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise + assign HTRANS = InstrReadF | MemReadM | MemWriteM ? 2'b10 : 2'b00; // NONSEQ if reading or writing, IDLE otherwise assign HMASTLOCK = 0; // no locking supported // Route signals to Instruction and Data Caches // *** assumes AHBW = XLEN - assign IRData = HRDATAMasked; - assign IReady = HREADY & IReadF & ~GrantData; // maybe unused?*** - assign DRData = HRDATAMasked; + assign InstrRData = HRDATAMasked[31:0]; + assign IReady = HREADY & InstrReadF & ~GrantData; // maybe unused?*** + assign ReadDataW = HRDATAMasked; assign DReady = HREADY & GrantData; // ***unused? + + // State machines for stalls (probably can merge with FSM above***) + // Idle, DataBusy, InstrBusy. Stall while in busystate add suffixes + logic MemState, NextMemState, InstrState, NextInstrState; + flopr #(1) msreg(HCLK, ~HRESETn, NextMemState, MemState); + flopr #(1) isreg(HCLK, ~HRESETn, NextInstrState, InstrState); +/* always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) MemState <= 0; + else MemState <= NextMemState; */ + assign NextMemState = (MemState == 0 && InstrState == 0 && (MemReadM || MemWriteM)) || (MemState == 1 && ~MemAckW); + assign DataStall = NextMemState; +/* always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) InstrState <= 0; + else InstrState <= NextInstrState;*/ + + assign NextInstrState = (InstrState == 0 && MemState == 0 && (~MemReadM && ~MemWriteM && InstrReadF)) || + (InstrState == 1 && ~InstrAckD); + assign InstrStall = NextInstrState | MemState | NextMemState; // *** check this, explain better + // temporarily turn off stalls and check it works + //assign DataStall = 0; + //assign InstrStall = 0; + // stalls // Stall MEM stage if data is being accessed and bus isn't yet ready //assign DataStall = GrantData & ~HREADY; diff --git a/wally-pipelined/src/ebu/subwordread.sv b/wally-pipelined/src/ebu/subwordread.sv index 74b1e0aa7..352a33c05 100644 --- a/wally-pipelined/src/ebu/subwordread.sv +++ b/wally-pipelined/src/ebu/subwordread.sv @@ -28,9 +28,9 @@ module subwordread ( // from AHB Interface input logic [`XLEN-1:0] HRDATA, - input logic [31:0] HADDR, - input logic UnsignedLoadM, - input logic [2:0] HSIZE, + input logic [2:0] HADDRD, + //input logic UnsignedLoadM, + input logic [3:0] HSIZED, // to ifu/dmems output logic [`XLEN-1:0] HRDATAMasked ); @@ -42,7 +42,7 @@ module subwordread ( if (`XLEN == 64) begin // ByteMe mux always_comb - case(HADDR[2:0]) + case(HADDRD[2:0]) 3'b000: ByteM = HRDATA[7:0]; 3'b001: ByteM = HRDATA[15:8]; 3'b010: ByteM = HRDATA[23:16]; @@ -55,7 +55,7 @@ module subwordread ( // halfword mux always_comb - case(HADDR[2:1]) + case(HADDRD[2:1]) 2'b00: HalfwordM = HRDATA[15:0]; 2'b01: HalfwordM = HRDATA[31:16]; 2'b10: HalfwordM = HRDATA[47:32]; @@ -65,14 +65,14 @@ module subwordread ( logic [31:0] WordM; always_comb - case(HADDR[2]) + case(HADDRD[2]) 1'b0: WordM = HRDATA[31:0]; 1'b1: WordM = HRDATA[63:32]; endcase // sign extension always_comb - case({UnsignedLoadM, HSIZE[1:0]}) + case({HSIZED[3], HSIZED[1:0]}) // HSIZED[3] indicates unsigned load 3'b000: HRDATAMasked = {{56{ByteM[7]}}, ByteM}; // lb 3'b001: HRDATAMasked = {{48{HalfwordM[15]}}, HalfwordM[15:0]}; // lh 3'b010: HRDATAMasked = {{32{WordM[31]}}, WordM[31:0]}; // lw @@ -85,7 +85,7 @@ module subwordread ( end else begin // 32-bit // byte mux always_comb - case(HADDR[1:0]) + case(HADDRD[1:0]) 2'b00: ByteM = HRDATA[7:0]; 2'b01: ByteM = HRDATA[15:8]; 2'b10: ByteM = HRDATA[23:16]; @@ -94,14 +94,14 @@ module subwordread ( // halfword mux always_comb - case(HADDR[1]) + case(HADDRD[1]) 1'b0: HalfwordM = HRDATA[15:0]; 1'b1: HalfwordM = HRDATA[31:16]; endcase // sign extension always_comb - case({UnsignedLoadM, HSIZE[1:0]}) + case({HSIZED[3], HSIZED[1:0]}) 3'b000: HRDATAMasked = {{24{ByteM[7]}}, ByteM}; // lb 3'b001: HRDATAMasked = {{16{HalfwordM[15]}}, HalfwordM[15:0]}; // lh 3'b010: HRDATAMasked = HRDATA; // lw diff --git a/wally-pipelined/src/hazard/hazard.sv b/wally-pipelined/src/hazard/hazard.sv index 2fe0541a5..7c4afd6fe 100644 --- a/wally-pipelined/src/hazard/hazard.sv +++ b/wally-pipelined/src/hazard/hazard.sv @@ -34,12 +34,14 @@ module hazard( input logic LoadStallD, input logic InstrStall, DataStall, // Stall outputs - output logic StallF, StallD, FlushD, FlushE, FlushM, FlushW + output logic StallF, StallD, StallE, StallM, StallW, + output logic FlushD, FlushE, FlushM, FlushW ); logic BranchFlushDE; - logic StallDCause, StallFCause, StallWCause; - + logic StallFCause, StallDCause, StallECause, StallMCause, StallWCause; + logic FirstUnstalledD, FirstUnstalledE, FirstUnstalledM, FirstUnstalledW; + // stalls and flushes // loads: stall for one cycle if the subsequent instruction depends on the load // branches and jumps: flush the next two instructions if the branch is taken in EXE @@ -54,14 +56,28 @@ module hazard( assign BranchFlushDE = PCSrcE | RetM | TrapM; - assign StallDCause = LoadStallD; - assign StallFCause = InstrStall | CSRWritePendingDEM; - assign StallWCause = DataStall; // *** not yet used + assign StallFCause = InstrStall | CSRWritePendingDEM; // stall at fetch if unable to get the instruction, + // or if a CSR will be written and may change system behavior + assign StallDCause = LoadStallD; // stall in decode if instruction is a load dependent on previous + assign StallECause = 0; + assign StallMCause = 0; // sDataStall; // not yet used*** + assign StallWCause = DataStall; - assign StallD = StallDCause; + // Each stage stalls if the next stage is stalled or there is a cause to stall this stage. assign StallF = StallD | StallFCause; - assign FlushD = BranchFlushDE | StallFCause; // PCSrcE |InstrStall | CSRWritePendingDEM | RetM | TrapM; - assign FlushE = StallD | BranchFlushDE; //LoadStallD | PCSrcE | RetM | TrapM; - assign FlushM = RetM | TrapM; - assign FlushW = TrapM; + assign StallD = StallE | StallDCause; + assign StallE = StallM | StallECause; + assign StallM = StallW | StallMCause; + assign StallW = StallWCause; + + assign FirstUnstalledD = (~StallD & StallF); + assign FirstUnstalledE = (~StallE & StallD); + assign FirstUnstalledM = (~StallM & StallE); + assign FirstUnstalledW = (~StallW & StallM);; + + // Each stage flushes if the previous stage is the last one stalled (for cause) or the system has reason to flush + assign FlushD = FirstUnstalledD || BranchFlushDE; // PCSrcE |InstrStall | CSRWritePendingDEM | RetM | TrapM; + assign FlushE = FirstUnstalledE || BranchFlushDE; //LoadStallD | PCSrcE | RetM | TrapM; + assign FlushM = FirstUnstalledM || RetM || TrapM; + assign FlushW = FirstUnstalledW | TrapM; endmodule diff --git a/wally-pipelined/src/ieu/controller.sv b/wally-pipelined/src/ieu/controller.sv index 5a62f0147..1b480dec3 100644 --- a/wally-pipelined/src/ieu/controller.sv +++ b/wally-pipelined/src/ieu/controller.sv @@ -37,7 +37,7 @@ module controller( input logic IllegalIEUInstrFaultD, output logic IllegalBaseInstrFaultD, // Execute stage control signals - input logic FlushE, + input logic StallE, FlushE, input logic [2:0] FlagsE, output logic PCSrcE, // for datapath and Hazard Unit output logic [4:0] ALUControlE, @@ -45,14 +45,14 @@ module controller( output logic TargetSrcE, output logic MemReadE, // for Hazard Unit // Memory stage control signals - input logic FlushM, + input logic StallM, FlushM, input logic DataMisalignedM, output logic [1:0] MemRWM, output logic CSRWriteM, PrivilegedM, output logic [2:0] Funct3M, output logic RegWriteM, // for Hazard Unit // Writeback stage control signals - input logic FlushW, + input logic StallW, FlushW, output logic RegWriteW, // for datapath and Hazard Unit output logic [1:0] ResultSrcW, output logic InstrValidW, @@ -132,7 +132,7 @@ module controller( endcase // Execute stage pipeline control register and logic - floprc #(21) controlregE(clk, reset, FlushE, + flopenrc #(21) controlregE(clk, reset, FlushE, ~StallE, {RegWriteD, ResultSrcD, MemRWD, JumpD, BranchD, ALUControlD, ALUSrcAD, ALUSrcBD, TargetSrcD, CSRWriteD, PrivilegedD, Funct3D, 1'b1}, {RegWriteE, ResultSrcE, MemRWE, JumpE, BranchE, ALUControlE, ALUSrcAE, ALUSrcBE, TargetSrcE, CSRWriteE, PrivilegedE, Funct3E, InstrValidE}); @@ -155,12 +155,12 @@ module controller( assign MemReadE = MemRWE[1]; // Memory stage pipeline control register - floprc #(11) controlregM(clk, reset, FlushM, + flopenrc #(11) controlregM(clk, reset, FlushM, ~StallM, {RegWriteE, ResultSrcE, MemRWE, CSRWriteE, PrivilegedE, Funct3E, InstrValidE}, {RegWriteM, ResultSrcM, MemRWM, CSRWriteM, PrivilegedM, Funct3M, InstrValidM}); // Writeback stage pipeline control register - floprc #(4) controlregW(clk, reset, FlushW, + flopenrc #(4) controlregW(clk, reset, FlushW, ~StallW, {RegWriteM, ResultSrcM, InstrValidM}, {RegWriteW, ResultSrcW, InstrValidW}); diff --git a/wally-pipelined/src/ieu/datapath.sv b/wally-pipelined/src/ieu/datapath.sv index bb02bad53..0147c487d 100644 --- a/wally-pipelined/src/ieu/datapath.sv +++ b/wally-pipelined/src/ieu/datapath.sv @@ -32,7 +32,7 @@ module datapath ( input logic [2:0] ImmSrcD, input logic [31:0] InstrD, // Execute stage signals - input logic FlushE, + input logic StallE, FlushE, input logic [1:0] ForwardAE, ForwardBE, input logic PCSrcE, input logic [4:0] ALUControlE, @@ -42,7 +42,7 @@ module datapath ( output logic [2:0] FlagsE, output logic [`XLEN-1:0] PCTargetE, // Memory stage signals - input logic FlushM, + input logic StallM, FlushM, input logic [2:0] Funct3M, input logic [`XLEN-1:0] CSRReadValW, input logic [`XLEN-1:0] ReadDataW, @@ -50,7 +50,7 @@ module datapath ( output logic [`XLEN-1:0] SrcAM, output logic [`XLEN-1:0] WriteDataM, MemAdrM, // Writeback stage signals - input logic FlushW, + input logic StallW, FlushW, input logic RegWriteW, input logic [1:0] ResultSrcW, input logic [`XLEN-1:0] PCLinkW, @@ -85,12 +85,12 @@ module datapath ( extend ext(.InstrD(InstrD[31:7]), .*); // Execute stage pipeline register and logic - floprc #(`XLEN) RD1EReg(clk, reset, FlushE, RD1D, RD1E); - floprc #(`XLEN) RD2EReg(clk, reset, FlushE, RD2D, RD2E); - floprc #(`XLEN) ExtImmEReg(clk, reset, FlushE, ExtImmD, ExtImmE); - floprc #(5) Rs1EReg(clk, reset, FlushE, Rs1D, Rs1E); - floprc #(5) Rs2EReg(clk, reset, FlushE, Rs2D, Rs2E); - floprc #(5) RdEReg(clk, reset, FlushE, RdD, RdE); + flopenrc #(`XLEN) RD1EReg(clk, reset, FlushE, ~StallE, RD1D, RD1E); + flopenrc #(`XLEN) RD2EReg(clk, reset, FlushE, ~StallE, RD2D, RD2E); + flopenrc #(`XLEN) ExtImmEReg(clk, reset, FlushE, ~StallE, ExtImmD, ExtImmE); + flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E); + flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); + flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); mux3 #(`XLEN) faemux(RD1E, ResultW, ALUResultM, ForwardAE, PreSrcAE); mux3 #(`XLEN) fbemux(RD2E, ResultW, ALUResultM, ForwardBE, WriteDataE); @@ -101,15 +101,15 @@ module datapath ( assign PCTargetE = ExtImmE + TargetBaseE; // Memory stage pipeline register - floprc #(`XLEN) SrcAMReg(clk, reset, FlushM, SrcAE, SrcAM); - floprc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ALUResultE, ALUResultM); + flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM); + flopenrc #(`XLEN) ALUResultMReg(clk, reset, FlushM, ~StallM, ALUResultE, ALUResultM); assign MemAdrM = ALUResultM; - floprc #(`XLEN) WriteDataMReg(clk, reset, FlushM, WriteDataE, WriteDataM); - floprc #(5) RdMEg(clk, reset, FlushM, RdE, RdM); + flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, WriteDataE, WriteDataM); + flopenrc #(5) RdMEg(clk, reset, FlushM, ~StallM, RdE, RdM); // Writeback stage pipeline register and logic - floprc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ALUResultM, ALUResultW); - floprc #(5) RdWEg(clk, reset, FlushW, RdM, RdW); + flopenrc #(`XLEN) ALUResultWReg(clk, reset, FlushW, ~StallW, ALUResultM, ALUResultW); + flopenrc #(5) RdWEg(clk, reset, FlushW, ~StallW, RdM, RdW); mux4 #(`XLEN) resultmux(ALUResultW, ReadDataW, PCLinkW, CSRReadValW, ResultSrcW, ResultW); endmodule diff --git a/wally-pipelined/src/ieu/forward.sv b/wally-pipelined/src/ieu/forward.sv index 6dcd5154f..166ebe23a 100644 --- a/wally-pipelined/src/ieu/forward.sv +++ b/wally-pipelined/src/ieu/forward.sv @@ -30,7 +30,7 @@ module forward( input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, input logic MemReadE, input logic RegWriteM, RegWriteW, - // Forwaring controls + // Forwarding controls output logic [1:0] ForwardAE, ForwardBE, output logic LoadStallD ); diff --git a/wally-pipelined/src/ieu/ieu.sv b/wally-pipelined/src/ieu/ieu.sv index 7ed4bdffa..2b1e7d415 100644 --- a/wally-pipelined/src/ieu/ieu.sv +++ b/wally-pipelined/src/ieu/ieu.sv @@ -47,7 +47,8 @@ module ieu ( input logic [`XLEN-1:0] PCLinkW, output logic InstrValidW, // hazards - input logic StallD, FlushD, FlushE, FlushM, FlushW, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, input logic RetM, TrapM, output logic LoadStallD, output logic PCSrcE, diff --git a/wally-pipelined/src/ifu/ifu.sv b/wally-pipelined/src/ifu/ifu.sv index 3a12b330a..86f96b95c 100644 --- a/wally-pipelined/src/ifu/ifu.sv +++ b/wally-pipelined/src/ifu/ifu.sv @@ -28,13 +28,15 @@ module ifu ( input logic clk, reset, - input logic StallF, StallD, FlushD, FlushE, FlushM, FlushW, + input logic StallF, StallD, StallE, StallM, StallW, + input logic FlushD, FlushE, FlushM, FlushW, // Fetch input logic [31:0] InstrF, output logic [`XLEN-1:0] PCF, output logic [`XLEN-1:0] InstrPAdrF, + output logic InstrReadF, // Decode - output logic InstrStall, + //output logic InstrStall, // Execute input logic PCSrcE, input logic [`XLEN-1:0] PCTargetE, @@ -59,12 +61,12 @@ module ifu ( logic IllegalCompInstrD; logic [`XLEN-1:0] PCPlusUpperF, PCPlus2or4F, PCD, PCW, PCLinkD, PCLinkE, PCLinkM; logic CompressedF; - logic [31:0] InstrRawD, InstrE; + logic [31:0] InstrRawD, InstrE, InstrW; logic [31:0] nop = 32'h00000013; // instruction for NOP // *** put memory interface on here, InstrF becomes output - assign InstrStall = 0; // *** assign InstrPAdrF = PCF; // *** no MMU + assign InstrReadF = ~StallD; assign PrivilegedChangePCM = RetM | TrapM; @@ -107,25 +109,26 @@ module ifu ( // pipeline misaligned faults to M stage assign BranchMisalignedFaultE = misaligned & PCSrcE; // E-stage (Branch/Jump) misaligned - flopr #(1) InstrMisalginedReg(clk, reset, BranchMisalignedFaultE, BranchMisalignedFaultM); - flopr #(`XLEN) InstrMisalignedAdrReg(clk, reset, PCNextF, InstrMisalignedAdrM); + flopenr #(1) InstrMisalginedReg(clk, reset, ~StallM, BranchMisalignedFaultE, BranchMisalignedFaultM); + flopenr #(`XLEN) InstrMisalignedAdrReg(clk, reset, ~StallM, PCNextF, InstrMisalignedAdrM); assign TrapMisalignedFaultM = misaligned & PrivilegedChangePCM; assign InstrMisalignedFaultM = BranchMisalignedFaultM; // | TrapMisalignedFaultM; *** put this back in without causing a cyclic path - flopr #(32) InstrEReg(clk, reset, FlushE ? nop : InstrD, InstrE); - flopr #(32) InstrMReg(clk, reset, FlushM ? nop : InstrE, InstrM); - flopr #(`XLEN) PCEReg(clk, reset, PCD, PCE); - flopr #(`XLEN) PCMReg(clk, reset, PCE, PCM); - flopr #(`XLEN) PCWReg(clk, reset, PCM, PCW); // *** probably not needed; delete later + flopenr #(32) InstrEReg(clk, reset, ~StallE, FlushE ? nop : InstrD, InstrE); + flopenr #(32) InstrMReg(clk, reset, ~StallM, FlushM ? nop : InstrE, InstrM); + flopenr #(32) InstrWReg(clk, reset, ~StallW, FlushW ? nop : InstrM, InstrW); // just for testbench, delete later + flopenr #(`XLEN) PCEReg(clk, reset, ~StallE, PCD, PCE); + flopenr #(`XLEN) PCMReg(clk, reset, ~StallM, PCE, PCM); + flopenr #(`XLEN) PCWReg(clk, reset, ~StallW, PCM, PCW); // *** probably not needed; delete later // seems like there should be a lower-cost way of doing this PC+2 or PC+4 for JAL. // either have ALU compute PC+2/4 and feed into ALUResult input of ResultMux or // have dedicated adder in Mem stage based on PCM + 2 or 4 // *** redo this - flopr #(`XLEN) PCPDReg(clk, reset, PCPlus2or4F, PCLinkD); - flopr #(`XLEN) PCPEReg(clk, reset, PCLinkD, PCLinkE); - flopr #(`XLEN) PCPMReg(clk, reset, PCLinkE, PCLinkM); - flopr #(`XLEN) PCPWReg(clk, reset, PCLinkM, PCLinkW); + flopenr #(`XLEN) PCPDReg(clk, reset, ~StallD, PCPlus2or4F, PCLinkD); + flopenr #(`XLEN) PCPEReg(clk, reset, ~StallE, PCLinkD, PCLinkE); + flopenr #(`XLEN) PCPMReg(clk, reset, ~StallM, PCLinkE, PCLinkM); + flopenr #(`XLEN) PCPWReg(clk, reset, ~StallW, PCLinkM, PCLinkW); endmodule diff --git a/wally-pipelined/src/uncore/dtim.sv b/wally-pipelined/src/uncore/dtim.sv index 6a49fcf59..611183605 100644 --- a/wally-pipelined/src/uncore/dtim.sv +++ b/wally-pipelined/src/uncore/dtim.sv @@ -36,13 +36,15 @@ module dtim ( ); logic [`XLEN-1:0] RAM[0:65535]; + logic [18:0] HWADDR; + // logic [`XLEN-1:0] write; logic [15:0] entry; logic memread, memwrite; logic [3:0] busycount; // busy FSM to extend READY signal - always_ff @(posedge HCLK, negedge HRESETn) +/* always_ff @(posedge HCLK, negedge HRESETn) if (~HRESETn) begin HREADYTim <= 1; end else begin @@ -52,25 +54,34 @@ module dtim ( end else if (~HREADYTim) begin if (busycount == 0) begin // TIM latency, for testing purposes HREADYTim <= 1; - end else + end else begin busycount <= busycount + 1; + end end + end*/ + always_ff @(posedge HCLK, negedge HRESETn) + if (~HRESETn) begin + HREADYTim <= 0; + end else begin + HREADYTim <= HSELTim; // always respond one cycle later end - + assign memread = MemRWtim[1]; assign memwrite = MemRWtim[0]; +// always_ff @(posedge HCLK) +// memwrite <= MemRWtim[0]; // delay memwrite to write phase assign HRESPTim = 0; // OK // assign HREADYTim = 1; // Respond immediately; *** extend this // word aligned reads - generate +/* generate if (`XLEN==64) assign #2 entry = HADDR[18:3]; else assign #2 entry = HADDR[17:2]; - endgenerate - assign HREADTim = RAM[entry]; + endgenerate */ +// assign HREADTim = RAM[entry]; // assign HREADTim = HREADYTim ? RAM[entry] : ~RAM[entry]; // *** temproary mess up read value before ready // write each byte based on the byte mask @@ -105,17 +116,34 @@ module dtim ( if (memwrite) RAM[HADDR[17:2]] <= write; end endgenerate */ + + // Model memory read and write + // If write occurs at end of phase (rising edge of clock), + // then read of same address on next cycle won't work. Would need to bypass. + // Faking for now with negedge clock write. Will need to adjust this to + // match capabilities of FPGA or actual chip RAM. + // Also, writes occuring later than reads throws off single ported RAM that + // might be asked to write on one instruction and read on the next and would need + // to stall because both accesses happen on same cycle with AHB delay + generate - if (`XLEN == 64) + if (`XLEN == 64) begin + always_ff @(negedge HCLK) + if (memwrite) RAM[HWADDR[17:3]] <= HWDATA; always_ff @(posedge HCLK) begin - if (memwrite) RAM[HADDR[17:3]] <= HWDATA; -// HREADTim <= RAM[HADDR[17:3]]; + //if (memwrite) RAM[HADDR[17:3]] <= HWDATA; + HWADDR <= HADDR; + HREADTim <= RAM[HADDR[17:3]]; end - else + end else begin + always_ff @(negedge HCLK) + if (memwrite) RAM[HWADDR[17:2]] <= HWDATA; always_ff @(posedge HCLK) begin - if (memwrite) RAM[HADDR[17:2]] <= HWDATA; -// HREADTim <= RAM[HADDR[17:2]]; + //if (memwrite) RAM[HADDR[17:2]] <= HWDATA; + HWADDR <= HADDR; + HREADTim <= RAM[HADDR[17:2]]; end + end endgenerate endmodule diff --git a/wally-pipelined/src/uncore/subwordwrite.sv b/wally-pipelined/src/uncore/subwordwrite.sv index 68c2b0e47..b1e8d683f 100644 --- a/wally-pipelined/src/uncore/subwordwrite.sv +++ b/wally-pipelined/src/uncore/subwordwrite.sv @@ -27,37 +27,35 @@ module subwordwrite ( input logic [`XLEN-1:0] HRDATA, - input logic [31:0] HADDR, - input logic [2:0] HSIZE, + input logic [2:0] HADDRD, + input logic [3:0] HSIZED, input logic [`XLEN-1:0] HWDATAIN, output logic [`XLEN-1:0] HWDATA ); - logic [7:0] ByteM; // *** declare locally to generate as either 4 or 8 bits - logic [15:0] HalfwordM; logic [`XLEN-1:0] WriteDataSubwordDuplicated; - logic [7:0] ByteMaskM; generate if (`XLEN == 64) begin + logic [7:0] ByteMaskM; // Compute write mask always_comb - case(HSIZE[1:0]) - 2'b00: begin ByteMaskM = 8'b00000000; ByteMaskM[HADDR[2:0]] = 1; end // sb - 2'b01: case (HADDR[2:1]) + case(HSIZED[1:0]) + 2'b00: begin ByteMaskM = 8'b00000000; ByteMaskM[HADDRD[2:0]] = 1; end // sb + 2'b01: case (HADDRD[2:1]) 2'b00: ByteMaskM = 8'b00000011; 2'b01: ByteMaskM = 8'b00001100; 2'b10: ByteMaskM = 8'b00110000; 2'b11: ByteMaskM = 8'b11000000; endcase - 2'b10: if (HADDR[2]) ByteMaskM = 8'b11110000; + 2'b10: if (HADDRD[2]) ByteMaskM = 8'b11110000; else ByteMaskM = 8'b00001111; 2'b11: ByteMaskM = 8'b11111111; endcase // Handle subword writes always_comb - case(HSIZE[1:0]) + case(HSIZED[1:0]) 2'b00: WriteDataSubwordDuplicated = {8{HWDATAIN[7:0]}}; // sb 2'b01: WriteDataSubwordDuplicated = {4{HWDATAIN[15:0]}}; // sh 2'b10: WriteDataSubwordDuplicated = {2{HWDATAIN[31:0]}}; // sw @@ -77,19 +75,20 @@ module subwordwrite ( end end else begin // 32-bit + logic [3:0] ByteMaskM; // Compute write mask always_comb - case(HSIZE[1:0]) - 2'b00: begin ByteMaskM = 8'b0000; ByteMaskM[{1'b0, HADDR[1:0]}] = 1; end // sb - 2'b01: if (HADDR[1]) ByteMaskM = 8'b1100; - else ByteMaskM = 8'b0011; - 2'b10: ByteMaskM = 8'b1111; - default: ByteMaskM = 8'b111; // shouldn't happen + case(HSIZED[1:0]) + 2'b00: begin ByteMaskM = 4'b0000; ByteMaskM[HADDRD[1:0]] = 1; end // sb + 2'b01: if (HADDRD[1]) ByteMaskM = 4'b1100; + else ByteMaskM = 4'b0011; + 2'b10: ByteMaskM = 4'b1111; + default: ByteMaskM = 4'b111; // shouldn't happen endcase // Handle subword writes always_comb - case(HSIZE[1:0]) + case(HSIZED[1:0]) 2'b00: WriteDataSubwordDuplicated = {4{HWDATAIN[7:0]}}; // sb 2'b01: WriteDataSubwordDuplicated = {2{HWDATAIN[15:0]}}; // sh 2'b10: WriteDataSubwordDuplicated = HWDATAIN; // sw diff --git a/wally-pipelined/src/uncore/uartPC16550D.sv b/wally-pipelined/src/uncore/uartPC16550D.sv index ec3e25227..051bcef47 100644 --- a/wally-pipelined/src/uncore/uartPC16550D.sv +++ b/wally-pipelined/src/uncore/uartPC16550D.sv @@ -6,6 +6,7 @@ // // Purpose: Universial Asynchronous Receiver/ Transmitter with FIFOs // Emulates interface of Texas Instruments PC16550D +// https://media.digikey.com/pdf/Data%20Sheets/Texas%20Instruments%20PDFs/PC16550D.pdf // Compatible with UART in Imperas Virtio model *** // // Compatible with most of PC16550D with the following known exceptions: diff --git a/wally-pipelined/src/uncore/uncore.sv b/wally-pipelined/src/uncore/uncore.sv index ada1eb93c..2d12d2b50 100644 --- a/wally-pipelined/src/uncore/uncore.sv +++ b/wally-pipelined/src/uncore/uncore.sv @@ -43,6 +43,10 @@ module uncore ( input logic HREADYEXT, HRESPEXT, output logic [`AHBW-1:0] HRDATA, output logic HREADY, HRESP, + // delayed signals + input logic [2:0] HADDRD, + input logic [3:0] HSIZED, + input logic HWRITED, // bus interface output logic DataAccessFaultM, // peripheral pins @@ -71,7 +75,7 @@ module uncore ( assign HSELUART = PreHSELUART && (HSIZE == 3'b000); // only byte writes to UART are supported // Enable read or write based on decoded address - assign MemRW = {~HWRITE, HWRITE}; + assign MemRW = {~HWRITE, HWRITED}; assign MemRWtim = MemRW & {2{HSELTim}}; assign MemRWclint = MemRW & {2{HSELCLINT}}; assign MemRWgpio = MemRW & {2{HSELGPIO}}; diff --git a/wally-pipelined/src/wally/wallypipelinedhart.sv b/wally-pipelined/src/wally/wallypipelinedhart.sv index 408045e23..c491bfc23 100644 --- a/wally-pipelined/src/wally/wallypipelinedhart.sv +++ b/wally-pipelined/src/wally/wallypipelinedhart.sv @@ -45,11 +45,16 @@ module wallypipelinedhart ( output logic [2:0] HBURST, output logic [3:0] HPROT, output logic [1:0] HTRANS, - output logic HMASTLOCK + output logic HMASTLOCK, + // Delayed signals for subword write + output logic [2:0] HADDRD, + output logic [3:0] HSIZED, + output logic HWRITED ); - logic [1:0] ForwardAE, ForwardBE; - logic StallF, StallD, FlushD, FlushE, FlushM, FlushW; +// logic [1:0] ForwardAE, ForwardBE; + logic StallF, StallD, StallE, StallM, StallW; + logic FlushD, FlushE, FlushM, FlushW; logic RetM, TrapM; // new signals that must connect through DP @@ -79,26 +84,34 @@ module wallypipelinedhart ( logic FloatRegWriteW; // bus interface to dmem - logic [1:0] MemRWAlignedM; - logic [2:0] Funct3M; + logic MemReadM, MemWriteM; + logic [2:0] Funct3M; logic [`XLEN-1:0] MemAdrM, MemPAdrM, WriteDataM; - logic [`XLEN-1:0] ReadDataM, ReadDataW; + logic [`XLEN-1:0] ReadDataW; logic [`XLEN-1:0] InstrPAdrF; + logic InstrReadF; logic DataStall, InstrStall; logic InstrAckD, MemAckW; ifu ifu(.*); // instruction fetch unit: PC, branch prediction, instruction cache ieu ieu(.*); // inteber execution unit: integer register file, datapath and controller - dmem dmem(/*.Funct3M(InstrM[14:12]),*/ .*); // data cache unit + dmem dmem(.*); // data cache unit - ahblite ebu( // *** make IRData InstrF - .IReadF(1'b1), .IRData(), //.IReady(), - .DReadM(MemRWAlignedM[1]), .DWriteM(MemRWAlignedM[0]), - .DSizeM(Funct3M[1:0]), .DRData(ReadDataM), //.DReady(), - .UnsignedLoadM(Funct3M[2]), +/* + ahblite ebu( + //.InstrReadF(1'b0), + .InstrRData(InstrF), // hook up InstrF later + .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), + .*); +*/ +// changing from this to the line above breaks the program. auipc at 104 fails; seems to be flushed. +// Would need to insertinstruction as InstrD, not InstrF + ahblite ebu( + .InstrReadF(1'b0), + .InstrRData(), // hook up InstrF later + .MemSizeM(Funct3M[1:0]), .UnsignedLoadM(Funct3M[2]), .*); - //assign InstrF = ReadDataM[31:0]; /* mdu mdu(.*); // multiply and divide unit diff --git a/wally-pipelined/src/wally/wallypipelinedsoc.sv b/wally-pipelined/src/wally/wallypipelinedsoc.sv index 9b0ed2456..bdb621726 100644 --- a/wally-pipelined/src/wally/wallypipelinedsoc.sv +++ b/wally-pipelined/src/wally/wallypipelinedsoc.sv @@ -64,6 +64,9 @@ module wallypipelinedsoc ( logic InstrAccessFaultF, DataAccessFaultM; logic TimerIntM, SwIntM; // from CLINT logic ExtIntM = 0; // not yet connected + logic [2:0] HADDRD; + logic [3:0] HSIZED; + logic HWRITED; // instantiate processor and memories wallypipelinedhart hart(.*); diff --git a/wally-pipelined/testbench/testbench-imperas.sv b/wally-pipelined/testbench/testbench-imperas.sv index 67a896445..4f5a24ffc 100644 --- a/wally-pipelined/testbench/testbench-imperas.sv +++ b/wally-pipelined/testbench/testbench-imperas.sv @@ -35,7 +35,7 @@ module testbench(); logic [`XLEN-1:0] signature[0:10000]; logic [`XLEN-1:0] testadr; string InstrFName, InstrDName, InstrEName, InstrMName, InstrWName; - logic [31:0] InstrW; + //logic [31:0] InstrW; logic [`XLEN-1:0] meminit; string tests64ic[] = '{ @@ -75,7 +75,7 @@ string tests64iNOc[] = { "rv64i/I-MISALIGN_JMP-01","2000" }; string tests64i[] = '{ - "rv64i/I-LW-01", "4110", + "rv64i/I-ENDIANESS-01", "2010", "rv64i/I-ADD-01", "3000", "rv64i/I-ADDI-01", "3000", "rv64i/I-ADDIW-01", "3000", @@ -262,7 +262,7 @@ string tests32i[] = { // Track names of instructions instrTrackerTB it(clk, reset, dut.hart.ieu.dp.FlushE, dut.hart.ifu.InstrD, dut.hart.ifu.InstrE, - dut.hart.ifu.InstrM, InstrW, + dut.hart.ifu.InstrM, dut.hart.ifu.InstrW, InstrDName, InstrEName, InstrMName, InstrWName); // initialize tests @@ -368,11 +368,12 @@ module instrTrackerTB( input logic clk, reset, FlushE, input logic [31:0] InstrD, input logic [31:0] InstrE, InstrM, - output logic [31:0] InstrW, + input logic [31:0] InstrW, +// output logic [31:0] InstrW, output string InstrDName, InstrEName, InstrMName, InstrWName); // stage Instr to Writeback for visualization - flopr #(32) InstrWReg(clk, reset, InstrM, InstrW); + // flopr #(32) InstrWReg(clk, reset, InstrM, InstrW); instrNameDecTB ddec(InstrD, InstrDName); instrNameDecTB edec(InstrE, InstrEName);