From b621eb78fbd5032e09bb7a4d6e7e929230dbe09d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 28 Jan 2022 11:43:49 -0600 Subject: [PATCH 01/37] Updated debug2 ila signal names. --- fpga/constraints/debug2.xdc | 2 +- fpga/generator/xlnx_ahblite_axi_bridge.tcl | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fpga/constraints/debug2.xdc b/fpga/constraints/debug2.xdc index 48046d1df..63fa17a40 100644 --- a/fpga/constraints/debug2.xdc +++ b/fpga/constraints/debug2.xdc @@ -282,7 +282,7 @@ connect_debug_port u_ila_0/probe65 [get_nets [list wallypipelinedsoc/core/priv.p create_debug_port u_ila_0 probe set_property port_width 1 [get_debug_ports u_ila_0/probe66] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe66] -connect_debug_port u_ila_0/probe66 [get_nets [list wallypipelinedsoc/core/priv.priv/trap/StorePageFaultM ]] +connect_debug_port u_ila_0/probe66 [get_nets [list wallypipelinedsoc/core/priv.priv/trap/StoreAmoPageFaultM ]] create_debug_port u_ila_0 probe set_property port_width 1 [get_debug_ports u_ila_0/probe67] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe67] diff --git a/fpga/generator/xlnx_ahblite_axi_bridge.tcl b/fpga/generator/xlnx_ahblite_axi_bridge.tcl index ab3839820..317a62c09 100644 --- a/fpga/generator/xlnx_ahblite_axi_bridge.tcl +++ b/fpga/generator/xlnx_ahblite_axi_bridge.tcl @@ -1,9 +1,15 @@ #set partNumber $::env(XILINX_PART) #set boardNmae $::env(XILINX_BOARD) + +# vcu118 board set partNumber xcvu9p-flga2104-2L-e set boardName xilinx.com:vcu118:part0:2.4 +# kcu105 board +#set partNumber xcku040-ffva1156-2-e +#set boardName xilinx.com:kcu105:part0:1.7 + set ipName xlnx_ahblite_axi_bridge create_project $ipName . -force -part $partNumber From 99bb2819445fb75806b84f6e729c7d67be8d2eee Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 1 Feb 2022 10:43:24 -0600 Subject: [PATCH 02/37] Updated fpga's bootloader to reflect the changes to the gpio address change. --- pipelined/src/uncore/ram.sv | 84 ++++++++++++++-------------- tests/testsBP/fpga-test-sdc/Makefile | 2 +- tests/testsBP/fpga-test-sdc/bios.s | 2 +- 3 files changed, 44 insertions(+), 44 deletions(-) diff --git a/pipelined/src/uncore/ram.sv b/pipelined/src/uncore/ram.sv index c5268f897..07c080255 100644 --- a/pipelined/src/uncore/ram.sv +++ b/pipelined/src/uncore/ram.sv @@ -59,48 +59,48 @@ module ram #(parameter BASE=0, RANGE = 65535) ( // *** need to address this preload for fpga. It should work as a preload file // but for some reason vivado is not synthesizing the preload. //$readmemh(PRELOAD, RAM); - RAM[0] = 64'h94e1819300002197; - RAM[1] = 64'h4281420141014081; - RAM[2] = 64'h4481440143814301; - RAM[3] = 64'h4681460145814501; - RAM[4] = 64'h4881480147814701; - RAM[5] = 64'h4a814a0149814901; - RAM[6] = 64'h4c814c014b814b01; - RAM[7] = 64'h4e814e014d814d01; - RAM[8] = 64'h0110011b4f814f01; - RAM[9] = 64'h059b45011161016e; - RAM[10] = 64'h0004063705fe0010; - RAM[11] = 64'h05a000ef8006061b; - RAM[12] = 64'h0ff003930000100f; - RAM[13] = 64'h4e952e3110012e37; - RAM[14] = 64'hc602829b0053f2b7; - RAM[15] = 64'h2023fe02dfe312fd; - RAM[16] = 64'h829b0053f2b7007e; - RAM[17] = 64'hfe02dfe312fdc602; - RAM[18] = 64'h4de31efd000e2023; - RAM[19] = 64'h059bf1402573fdd0; - RAM[20] = 64'h0000061705e20870; - RAM[21] = 64'h0010029b01260613; - RAM[22] = 64'h11010002806702fe; - RAM[23] = 64'h84b2842ae426e822; - RAM[24] = 64'h892ee04aec064505; - RAM[25] = 64'h06e000ef07e000ef; - RAM[26] = 64'h979334fd02905563; - RAM[27] = 64'h07930177d4930204; - RAM[28] = 64'h4089093394be2004; - RAM[29] = 64'h04138522008905b3; - RAM[30] = 64'h19e3014000ef2004; - RAM[31] = 64'h64a2644260e2fe94; - RAM[32] = 64'h6749808261056902; - RAM[33] = 64'hdfed8b8510472783; - RAM[34] = 64'h2423479110a73823; - RAM[35] = 64'h10472783674910f7; - RAM[36] = 64'h20058693ffed8b89; - RAM[37] = 64'h05a1118737836749; - RAM[38] = 64'hfed59be3fef5bc23; - RAM[39] = 64'h1047278367498082; - RAM[40] = 64'h67c98082dfed8b85; - RAM[41] = 64'h0000808210a7a023; + RAM[0] = 64'h94e1819300002197; + RAM[1] = 64'h4281420141014081; + RAM[2] = 64'h4481440143814301; + RAM[3] = 64'h4681460145814501; + RAM[4] = 64'h4881480147814701; + RAM[5] = 64'h4a814a0149814901; + RAM[6] = 64'h4c814c014b814b01; + RAM[7] = 64'h4e814e014d814d01; + RAM[8] = 64'h0110011b4f814f01; + RAM[9] = 64'h059b45011161016e; + RAM[10] = 64'h0004063705fe0010; + RAM[11] = 64'h05a000ef8006061b; + RAM[12] = 64'h0ff003930000100f; + RAM[13] = 64'h4e952e3110060e37; + RAM[14] = 64'hc602829b0053f2b7; + RAM[15] = 64'h2023fe02dfe312fd; + RAM[16] = 64'h829b0053f2b7007e; + RAM[17] = 64'hfe02dfe312fdc602; + RAM[18] = 64'h4de31efd000e2023; + RAM[19] = 64'h059bf1402573fdd0; + RAM[20] = 64'h0000061705e20870; + RAM[21] = 64'h0010029b01260613; + RAM[22] = 64'h11010002806702fe; + RAM[23] = 64'h84b2842ae426e822; + RAM[24] = 64'h892ee04aec064505; + RAM[25] = 64'h06e000ef07e000ef; + RAM[26] = 64'h979334fd02905563; + RAM[27] = 64'h07930177d4930204; + RAM[28] = 64'h4089093394be2004; + RAM[29] = 64'h04138522008905b3; + RAM[30] = 64'h19e3014000ef2004; + RAM[31] = 64'h64a2644260e2fe94; + RAM[32] = 64'h6749808261056902; + RAM[33] = 64'hdfed8b8510472783; + RAM[34] = 64'h2423479110a73823; + RAM[35] = 64'h10472783674910f7; + RAM[36] = 64'h20058693ffed8b89; + RAM[37] = 64'h05a1118737836749; + RAM[38] = 64'hfed59be3fef5bc23; + RAM[39] = 64'h1047278367498082; + RAM[40] = 64'h67c98082dfed8b85; + RAM[41] = 64'h0000808210a7a023; end // initial begin end // if (FPGA) diff --git a/tests/testsBP/fpga-test-sdc/Makefile b/tests/testsBP/fpga-test-sdc/Makefile index 0c6d3666c..348a67cd2 100644 --- a/tests/testsBP/fpga-test-sdc/Makefile +++ b/tests/testsBP/fpga-test-sdc/Makefile @@ -106,7 +106,7 @@ $(TARGET).memfile: $(TARGET) @echo 'Making object dump file.' @riscv64-unknown-elf-objdump -D $< > $<.objdump @echo 'Making memory file' - exe2memfile0.pl $< + riscv64-unknown-elf-elf2hex --bit-width 64 --input $^ --output $@ extractFunctionRadix.sh $<.objdump mkdir -p ../../imperas-riscv-tests/work/rv64BP/ cp -f $(TARGETDIR)/* ../../imperas-riscv-tests/work/rv64BP/ diff --git a/tests/testsBP/fpga-test-sdc/bios.s b/tests/testsBP/fpga-test-sdc/bios.s index eab7ae1f4..8057a277d 100644 --- a/tests/testsBP/fpga-test-sdc/bios.s +++ b/tests/testsBP/fpga-test-sdc/bios.s @@ -61,7 +61,7 @@ _start: # write to gpio li t2, 0xFF - la t3, 0x1001200C + la t3, 0x1006000C li t4, 5 loop: From 83fdedcec6d471f410b6cd18eb7a5e6b63f76345 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 4 Feb 2022 13:31:32 -0600 Subject: [PATCH 03/37] Working first cut of the cache changes moving the replay to a save/restore. The current implementation is too expensive costing (tag+linelen)*numway flip flops and muxes. --- pipelined/src/cache/cache.sv | 9 ++-- pipelined/src/cache/cachefsm.sv | 83 +++++++++++++++++++-------------- pipelined/src/cache/cacheway.sv | 75 ++++++++++++++++------------- 3 files changed, 96 insertions(+), 71 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index e554fa999..1bb8a88d1 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -105,8 +105,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( logic LRUWriteEn; logic [NUMWAYS-1:0] VDWriteEnableWay; logic SelFlush; - logic ResetOrFlushAdr, ResetOrFlushWay; - + logic ResetOrFlushAdr, ResetOrFlushWay; + logic save, restore; + ///////////////////////////////////////////////////////////////////////////////////////////// // Read Path ///////////////////////////////////////////////////////////////////////////////////////////// @@ -125,7 +126,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( .WriteWordEnable(SRAMWordEnable), .TagWriteEnable(SRAMLineWayWriteEnable), .WriteData(SRAMWriteData), - .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .Victim(VictimWay), .Flush(FlushWay), .SelFlush, + .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .Victim(VictimWay), .Flush(FlushWay), + .save, .restore, .SelFlush, .SelectedReadDataLine(ReadDataLineWay), .WayHit, .VictimDirty(VictimDirtyWay), .VictimTag(VictimTagWay), .InvalidateAll(InvalidateCacheM)); if(NUMWAYS > 1) begin:vict @@ -213,5 +215,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( .SRAMLineWriteEnable, .SelEvict, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, .FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, + .save, .restore, .VDWriteEnable, .LRUWriteEn); endmodule diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 115107bcd..d550427bc 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -32,49 +32,51 @@ module cachefsm (input logic clk, - input logic reset, + input logic reset, // inputs from IEU input logic [1:0] RW, input logic [1:0] Atomic, - input logic FlushCache, + input logic FlushCache, // hazard inputs - input logic CPUBusy, + input logic CPUBusy, // interlock fsm - input logic IgnoreRequest, + input logic IgnoreRequest, // Bus inputs - input logic CacheBusAck, + input logic CacheBusAck, // dcache internals - input logic CacheHit, - input logic VictimDirty, - input logic FlushAdrFlag, - input logic FlushWayFlag, + input logic CacheHit, + input logic VictimDirty, + input logic FlushAdrFlag, + input logic FlushWayFlag, // hazard outputs - output logic CacheStall, + output logic CacheStall, // counter outputs - output logic CacheMiss, - output logic CacheAccess, + output logic CacheMiss, + output logic CacheAccess, // Bus outputs - output logic CacheCommitted, - output logic CacheWriteLine, - output logic CacheFetchLine, + output logic CacheCommitted, + output logic CacheWriteLine, + output logic CacheFetchLine, // dcache internals output logic [1:0] SelAdr, - output logic SetValid, - output logic ClearValid, - output logic SetDirty, - output logic ClearDirty, - output logic SRAMWordWriteEnable, - output logic SRAMLineWriteEnable, - output logic SelEvict, - output logic LRUWriteEn, - output logic SelFlush, - output logic FlushAdrCntEn, - output logic FlushWayCntEn, - output logic FlushAdrCntRst, - output logic FlushWayCntRst, - output logic VDWriteEnable + output logic SetValid, + output logic ClearValid, + output logic SetDirty, + output logic ClearDirty, + output logic SRAMWordWriteEnable, + output logic SRAMLineWriteEnable, + output logic SelEvict, + output logic LRUWriteEn, + output logic SelFlush, + output logic FlushAdrCntEn, + output logic FlushWayCntEn, + output logic FlushAdrCntRst, + output logic FlushWayCntRst, + output logic save, + output logic restore, + output logic VDWriteEnable ); @@ -141,7 +143,8 @@ module cachefsm NextState = STATE_READY; CacheFetchLine = 1'b0; CacheWriteLine = 1'b0; - + save = 1'b0; + restore = 1'b0; case (CurrState) STATE_READY: begin @@ -178,7 +181,8 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; + save = 1'b1; end else begin SRAMWordWriteEnable = 1'b1; @@ -194,7 +198,8 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY; - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; + save = 1'b1; end else begin NextState = STATE_READY; @@ -210,7 +215,8 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY; - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; + save = 1'b1; end else begin NextState = STATE_READY; @@ -278,6 +284,7 @@ module cachefsm PreSelAdr = 2'b01; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; + save = 1'b1; end else begin SRAMWordWriteEnable = 1'b1; @@ -289,7 +296,8 @@ module cachefsm LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; + save = 1'b1; end else begin NextState = STATE_READY; @@ -304,7 +312,8 @@ module cachefsm LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; + save = 1'b1; end else begin NextState = STATE_READY; @@ -325,9 +334,10 @@ module cachefsm STATE_CPU_BUSY: begin PreSelAdr = 2'b00; + restore = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; end else begin NextState = STATE_READY; @@ -339,6 +349,7 @@ module cachefsm SRAMWordWriteEnable = 1'b0; SetDirty = 1'b0; LRUWriteEn = 1'b0; + restore = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; end diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 0c623b8d4..d6ecfb4df 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -32,51 +32,52 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, parameter OFFSETLEN = 5, parameter INDEXLEN = 9, parameter DIRTY_BITS = 1) ( - input logic clk, - input logic reset, + input logic clk, + input logic reset, - input logic [$clog2(NUMLINES)-1:0] RAdr, - input logic [`PA_BITS-1:0] PAdr, - input logic WriteEnable, - input logic VDWriteEnable, - input logic [LINELEN/`XLEN-1:0] WriteWordEnable, - input logic TagWriteEnable, - input logic [LINELEN-1:0] WriteData, - input logic SetValid, - input logic ClearValid, - input logic SetDirty, - input logic ClearDirty, - input logic SelEvict, - input logic Victim, - input logic InvalidateAll, - input logic SelFlush, - input logic Flush, + input logic [$clog2(NUMLINES)-1:0] RAdr, + input logic [`PA_BITS-1:0] PAdr, + input logic WriteEnable, + input logic VDWriteEnable, + input logic [LINELEN/`XLEN-1:0] WriteWordEnable, + input logic TagWriteEnable, + input logic [LINELEN-1:0] WriteData, + input logic SetValid, + input logic ClearValid, + input logic SetDirty, + input logic ClearDirty, + input logic SelEvict, + input logic Victim, + input logic InvalidateAll, + input logic SelFlush, + input logic Flush, + input logic save, restore, - output logic [LINELEN-1:0] SelectedReadDataLine, - output logic WayHit, - output logic VictimDirty, - output logic [TAGLEN-1:0] VictimTag); + output logic [LINELEN-1:0] SelectedReadDataLine, + output logic WayHit, + output logic VictimDirty, + output logic [TAGLEN-1:0] VictimTag); logic [NUMLINES-1:0] ValidBits; logic [NUMLINES-1:0] DirtyBits; - logic [LINELEN-1:0] ReadDataLine; - logic [TAGLEN-1:0] ReadTag; - logic Valid; - logic Dirty; + logic [LINELEN-1:0] ReadDataLine, ReadDataLineRaw, ReadDataLineSaved; + logic [TAGLEN-1:0] ReadTag, ReadTagRaw, ReadTagSaved; + logic Valid, ValidRaw, ValidSaved; + logic Dirty, DirtyRaw, DirtySaved; logic SelData; - logic SelTag; + logic SelTag; logic [$clog2(NUMLINES)-1:0] RAdrD; logic SetValidD, ClearValidD; logic SetDirtyD, ClearDirtyD; logic WriteEnableD, VDWriteEnableD; - + ///////////////////////////////////////////////////////////////////////////////////////////// // Tag Array ///////////////////////////////////////////////////////////////////////////////////////////// sram1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk(clk), - .Adr(RAdr), .ReadData(ReadTag), + .Adr(RAdr), .ReadData(ReadTagRaw), .WriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(TagWriteEnable)); // AND portion of distributed tag multiplexer @@ -92,7 +93,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, genvar words; for(words = 0; words < LINELEN/`XLEN; words++) begin: word sram1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk(clk), .Adr(RAdr), - .ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ), + .ReadData(ReadDataLineRaw[(words+1)*`XLEN-1:words*`XLEN] ), .WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]), .WriteEnable(WriteEnable & WriteWordEnable[words])); end @@ -115,7 +116,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD); flop #(4) ValidCtrlDelayReg(clk, {SetValid, ClearValid, WriteEnable, VDWriteEnable}, {SetValidD, ClearValidD, WriteEnableD, VDWriteEnableD}); - assign Valid = ValidBits[RAdrD]; + assign ValidRaw = ValidBits[RAdrD]; ///////////////////////////////////////////////////////////////////////////////////////////// // Dirty Bits @@ -129,8 +130,18 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, else if (ClearDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= #1 1'b0; end flop #(2) DirtyCtlDelayReg(clk, {SetDirty, ClearDirty}, {SetDirtyD, ClearDirtyD}); - assign Dirty = DirtyBits[RAdrD]; + assign DirtyRaw = DirtyBits[RAdrD]; + flopenr #(1) cachedirtysavereg(clk, reset, save, DirtyRaw, DirtySaved); + mux2 #(1) saverestoredirtymux(DirtyRaw, DirtySaved, restore, Dirty); end else assign Dirty = 1'b0; + + // save restore option of handling cpu busy + flopen #(TAGLEN+LINELEN) cachereadsavereg(clk, save, {ReadTagRaw, ReadDataLineRaw}, {ReadTagSaved, ReadDataLineSaved}); + flopenr #(1) cachevalidsavereg(clk, reset, save, ValidRaw, ValidSaved); + mux2 #(1+TAGLEN+LINELEN) saverestoremux({ValidRaw, ReadTagRaw, ReadDataLineRaw}, {ValidSaved, ReadTagSaved, ReadDataLineSaved}, + restore, {Valid, ReadTag, ReadDataLine}); + + endmodule From 498c2b589afc2aaf4d82ba87059e2a6beb45cc59 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 4 Feb 2022 14:18:10 -0600 Subject: [PATCH 04/37] Optimization of cache save/restore. --- pipelined/src/cache/cache.sv | 20 ++++++++++++++++---- pipelined/src/cache/cacheway.sv | 26 ++++++++------------------ 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 1bb8a88d1..30a5c6bec 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -107,6 +107,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( logic SelFlush; logic ResetOrFlushAdr, ResetOrFlushWay; logic save, restore; + logic [NUMWAYS-1:0] WayHitSaved, WayHitRaw; + logic [LINELEN-1:0] ReadDataLineRaw, ReadDataLineSaved; ///////////////////////////////////////////////////////////////////////////////////////////// // Read Path @@ -127,8 +129,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( .TagWriteEnable(SRAMLineWayWriteEnable), .WriteData(SRAMWriteData), .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .Victim(VictimWay), .Flush(FlushWay), - .save, .restore, .SelFlush, - .SelectedReadDataLine(ReadDataLineWay), .WayHit, .VictimDirty(VictimDirtyWay), .VictimTag(VictimTagWay), + .SelFlush, + .SelectedReadDataLine(ReadDataLineWay), .WayHit(WayHitRaw), .VictimDirty(VictimDirtyWay), .VictimTag(VictimTagWay), .InvalidateAll(InvalidateCacheM)); if(NUMWAYS > 1) begin:vict cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy( @@ -139,10 +141,20 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( // ReadDataLineWay is a 2d array of cache line len by number of ways. // Need to OR together each way in a bitwise manner. // Final part of the AO Mux. First is the AND in the cacheway. - or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLine)); - or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); + or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineRaw)); + or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); + // Because of the sram clocked read when the ieu is stalled the read data maybe lost. + // There are two ways to resolve. 1. We can replay the read of the sram or we can save + // the data. Replay is eaiser but creates a longer critical path. + // save/restore only wayhit and readdata. + flopenr #(NUMWAYS) wayhitsavereg(clk, save, reset, WayHitRaw, WayHitSaved); + flopen #(LINELEN) cachereadsavereg(clk, save, ReadDataLineRaw, ReadDataLineSaved); + mux2 #(NUMWAYS+LINELEN) saverestoremux({WayHitRaw, ReadDataLineRaw}, {WayHitSaved, ReadDataLineSaved}, + restore, {WayHit, ReadDataLine}); + + // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can // easily build a variable input mux. // *** move this to LSU and IFU, also remove mux from busdp into LSU. diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index d6ecfb4df..3bca5eb0b 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -51,7 +51,6 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic InvalidateAll, input logic SelFlush, input logic Flush, - input logic save, restore, output logic [LINELEN-1:0] SelectedReadDataLine, output logic WayHit, @@ -60,10 +59,10 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, logic [NUMLINES-1:0] ValidBits; logic [NUMLINES-1:0] DirtyBits; - logic [LINELEN-1:0] ReadDataLine, ReadDataLineRaw, ReadDataLineSaved; - logic [TAGLEN-1:0] ReadTag, ReadTagRaw, ReadTagSaved; - logic Valid, ValidRaw, ValidSaved; - logic Dirty, DirtyRaw, DirtySaved; + logic [LINELEN-1:0] ReadDataLine; + logic [TAGLEN-1:0] ReadTag; + logic Valid; + logic Dirty; logic SelData; logic SelTag; @@ -77,7 +76,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, ///////////////////////////////////////////////////////////////////////////////////////////// sram1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk(clk), - .Adr(RAdr), .ReadData(ReadTagRaw), + .Adr(RAdr), .ReadData(ReadTag), .WriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(TagWriteEnable)); // AND portion of distributed tag multiplexer @@ -93,7 +92,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, genvar words; for(words = 0; words < LINELEN/`XLEN; words++) begin: word sram1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk(clk), .Adr(RAdr), - .ReadData(ReadDataLineRaw[(words+1)*`XLEN-1:words*`XLEN] ), + .ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ), .WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]), .WriteEnable(WriteEnable & WriteWordEnable[words])); end @@ -116,7 +115,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD); flop #(4) ValidCtrlDelayReg(clk, {SetValid, ClearValid, WriteEnable, VDWriteEnable}, {SetValidD, ClearValidD, WriteEnableD, VDWriteEnableD}); - assign ValidRaw = ValidBits[RAdrD]; + assign Valid = ValidBits[RAdrD]; ///////////////////////////////////////////////////////////////////////////////////////////// // Dirty Bits @@ -130,18 +129,9 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, else if (ClearDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= #1 1'b0; end flop #(2) DirtyCtlDelayReg(clk, {SetDirty, ClearDirty}, {SetDirtyD, ClearDirtyD}); - assign DirtyRaw = DirtyBits[RAdrD]; - flopenr #(1) cachedirtysavereg(clk, reset, save, DirtyRaw, DirtySaved); - mux2 #(1) saverestoredirtymux(DirtyRaw, DirtySaved, restore, Dirty); + assign Dirty = DirtyBits[RAdrD]; end else assign Dirty = 1'b0; - // save restore option of handling cpu busy - flopen #(TAGLEN+LINELEN) cachereadsavereg(clk, save, {ReadTagRaw, ReadDataLineRaw}, {ReadTagSaved, ReadDataLineSaved}); - flopenr #(1) cachevalidsavereg(clk, reset, save, ValidRaw, ValidSaved); - mux2 #(1+TAGLEN+LINELEN) saverestoremux({ValidRaw, ReadTagRaw, ReadDataLineRaw}, {ValidSaved, ReadTagSaved, ReadDataLineSaved}, - restore, {Valid, ReadTag, ReadDataLine}); - - endmodule From ceb2cc30b9704be1972519025ba5692283d14d3f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 4 Feb 2022 14:35:12 -0600 Subject: [PATCH 05/37] Second optimization of save/restore. --- pipelined/src/cache/cache.sv | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 30a5c6bec..6ffd2b42b 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -141,7 +141,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( // ReadDataLineWay is a 2d array of cache line len by number of ways. // Need to OR together each way in a bitwise manner. // Final part of the AO Mux. First is the AND in the cacheway. - or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLineRaw)); + or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLine)); or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); @@ -150,22 +150,21 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( // the data. Replay is eaiser but creates a longer critical path. // save/restore only wayhit and readdata. flopenr #(NUMWAYS) wayhitsavereg(clk, save, reset, WayHitRaw, WayHitSaved); - flopen #(LINELEN) cachereadsavereg(clk, save, ReadDataLineRaw, ReadDataLineSaved); - mux2 #(NUMWAYS+LINELEN) saverestoremux({WayHitRaw, ReadDataLineRaw}, {WayHitSaved, ReadDataLineSaved}, - restore, {WayHit, ReadDataLine}); + mux2 #(NUMWAYS) saverestoremux(WayHitRaw, WayHitSaved, restore, WayHit); // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can // easily build a variable input mux. // *** move this to LSU and IFU, also remove mux from busdp into LSU. // *** give this a module name to match block diagram + logic [`XLEN-1:0] ReadDataWordRaw, ReadDataWordSaved; genvar index; - if(DCACHE == 1) begin: readdata + if(DCACHE == 1) begin: readdata for (index = 0; index < WORDSPERLINE; index++) begin:readdatalinesetsmux assign ReadDataLineSets[index] = ReadDataLine[((index+1)*`XLEN)-1: (index*`XLEN)]; end // variable input mux - assign ReadDataWord = ReadDataLineSets[PAdr[LOGWPL + LOGXLENBYTES - 1 : LOGXLENBYTES]]; + assign ReadDataWordRaw = ReadDataLineSets[PAdr[LOGWPL + LOGXLENBYTES - 1 : LOGXLENBYTES]]; end else begin: readdata logic [31:0] ReadLineSetsF [LINELEN/16-1:0]; logic [31:0] FinalInstrRawF; @@ -173,9 +172,12 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( assign ReadLineSetsF[index] = ReadDataLine[((index+1)*16)+16-1 : (index*16)]; assign ReadLineSetsF[LINELEN/16-1] = {16'b0, ReadDataLine[LINELEN-1:LINELEN-16]}; assign FinalInstrRawF = ReadLineSetsF[PAdr[$clog2(LINELEN / 32) + 1 : 1]]; - if (`XLEN == 64) assign ReadDataWord = {32'b0, FinalInstrRawF}; - else assign ReadDataWord = FinalInstrRawF; + if (`XLEN == 64) assign ReadDataWordRaw = {32'b0, FinalInstrRawF}; + else assign ReadDataWordRaw = FinalInstrRawF; end + flopen #(`XLEN) cachereaddatasavereg(clk, save, ReadDataWordRaw, ReadDataWordSaved); + mux2 #(`XLEN) readdatasaverestoremux(ReadDataWordRaw, ReadDataWordSaved, + restore, ReadDataWord); ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write Enables From f6f0539e10000aee5ca9e91c2d2b25160882a141 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 4 Feb 2022 16:18:01 -0600 Subject: [PATCH 06/37] Got separate module for the sub cache line read. --- pipelined/src/cache/cache.sv | 26 ++++------ pipelined/src/cache/cachefsm.sv | 14 ++--- pipelined/src/cache/subcachelineread.sv | 68 +++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 22 deletions(-) create mode 100644 pipelined/src/cache/subcachelineread.sv diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 6ffd2b42b..2abfbced3 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -157,27 +157,23 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( // easily build a variable input mux. // *** move this to LSU and IFU, also remove mux from busdp into LSU. // *** give this a module name to match block diagram - logic [`XLEN-1:0] ReadDataWordRaw, ReadDataWordSaved; genvar index; if(DCACHE == 1) begin: readdata + subcachelineread #(LINELEN, `XLEN, `XLEN) subcachelineread( + .clk, .reset, .PAdr, .save, .restore, + .ReadDataLine, .ReadDataWord); + // *** only here temporary for (index = 0; index < WORDSPERLINE; index++) begin:readdatalinesetsmux assign ReadDataLineSets[index] = ReadDataLine[((index+1)*`XLEN)-1: (index*`XLEN)]; end - // variable input mux - assign ReadDataWordRaw = ReadDataLineSets[PAdr[LOGWPL + LOGXLENBYTES - 1 : LOGXLENBYTES]]; - end else begin: readdata - logic [31:0] ReadLineSetsF [LINELEN/16-1:0]; - logic [31:0] FinalInstrRawF; - for(index = 0; index < LINELEN / 16 - 1; index++) - assign ReadLineSetsF[index] = ReadDataLine[((index+1)*16)+16-1 : (index*16)]; - assign ReadLineSetsF[LINELEN/16-1] = {16'b0, ReadDataLine[LINELEN-1:LINELEN-16]}; - assign FinalInstrRawF = ReadLineSetsF[PAdr[$clog2(LINELEN / 32) + 1 : 1]]; - if (`XLEN == 64) assign ReadDataWordRaw = {32'b0, FinalInstrRawF}; - else assign ReadDataWordRaw = FinalInstrRawF; + end else begin: readdata + logic [31:0] FinalInstrRawF; + subcachelineread #(LINELEN, 32, 16) subcachelineread( + .clk, .reset, .PAdr, .save, .restore, + .ReadDataLine, .ReadDataWord(FinalInstrRawF)); + if (`XLEN == 64) assign ReadDataWord = {32'b0, FinalInstrRawF}; + else assign ReadDataWord = FinalInstrRawF; end - flopen #(`XLEN) cachereaddatasavereg(clk, save, ReadDataWordRaw, ReadDataWordSaved); - mux2 #(`XLEN) readdatasaverestoremux(ReadDataWordRaw, ReadDataWordSaved, - restore, ReadDataWord); ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write Enables diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index d550427bc..3b60efe1f 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -181,7 +181,7 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; - //PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; `REPLAY save = 1'b1; end else begin @@ -198,7 +198,7 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY; - //PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; `REPLAY save = 1'b1; end else begin @@ -215,7 +215,7 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY; - //PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; `REPLAY save = 1'b1; end else begin @@ -276,7 +276,7 @@ module cachefsm end STATE_MISS_READ_WORD_DELAY: begin - //PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; `REPLAY SRAMWordWriteEnable = 1'b0; SetDirty = 1'b0; LRUWriteEn = 1'b0; @@ -296,7 +296,7 @@ module cachefsm LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - //PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; `REPLAY save = 1'b1; end else begin @@ -312,7 +312,7 @@ module cachefsm LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - //PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; `REPLAY save = 1'b1; end else begin @@ -337,7 +337,7 @@ module cachefsm restore = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - //PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; `REPLAY end else begin NextState = STATE_READY; diff --git a/pipelined/src/cache/subcachelineread.sv b/pipelined/src/cache/subcachelineread.sv new file mode 100644 index 000000000..e42f5e711 --- /dev/null +++ b/pipelined/src/cache/subcachelineread.sv @@ -0,0 +1,68 @@ +/////////////////////////////////////////// +// subcachelineread +// +// Written: Ross Thompson ross1728@gmail.com February 04, 2022 +// Muxes the cache line downto the word size. Also include possilbe save/restore registers/muxes. +// +// Purpose: Controller for the dcache fsm +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL)( + input logic clk, + input logic reset, + input logic [`PA_BITS-1:0] PAdr, + input logic save, restore, + input logic [LINELEN-1:0] ReadDataLine, + output logic [WORDLEN-1:0] ReadDataWord); + + localparam WORDSPERLINE = LINELEN/MUXINTERVAL; + localparam PADLEN = WORDLEN-MUXINTERVAL; + // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can + // easily build a variable input mux. + // *** move this to LSU and IFU, also remove mux from busdp into LSU. + // *** give this a module name to match block diagram + logic [LINELEN+(WORDLEN-MUXINTERVAL)-1:0] ReadDataLinePad; + logic [WORDLEN-1:0] ReadDataLineSets [(LINELEN/MUXINTERVAL)-1:0]; + logic [WORDLEN-1:0] ReadDataWordRaw, ReadDataWordSaved; + + if (PADLEN > 0) begin + logic [PADLEN-1:0] Pad; + assign Pad = '0; + assign ReadDataLinePad = {Pad, ReadDataLine}; + end else assign ReadDataLinePad = ReadDataLine; + + + genvar index; + for (index = 0; index < WORDSPERLINE; index++) begin:readdatalinesetsmux + assign ReadDataLineSets[index] = ReadDataLinePad[(index*MUXINTERVAL)+WORDLEN-1: (index*MUXINTERVAL)]; + end + // variable input mux + assign ReadDataWordRaw = ReadDataLineSets[PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]]; + flopen #(WORDLEN) cachereaddatasavereg(clk, save, ReadDataWordRaw, ReadDataWordSaved); + mux2 #(WORDLEN) readdatasaverestoremux(ReadDataWordRaw, ReadDataWordSaved, + restore, ReadDataWord); + +endmodule From c8463685378a60708c9756bfed1940a0f3991094 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 4 Feb 2022 20:42:53 -0600 Subject: [PATCH 07/37] Moved the sub cache line read logic to lsu/ifu. --- pipelined/src/cache/cache.sv | 58 +++++++++++++++--------------------- pipelined/src/ifu/ifu.sv | 21 ++++++++----- pipelined/src/lsu/busdp.sv | 10 +++---- pipelined/src/lsu/lsu.sv | 13 ++++++-- 4 files changed, 53 insertions(+), 49 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 2abfbced3..e08b65e87 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -31,32 +31,33 @@ `include "wally-config.vh" module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( - input logic clk, - input logic reset, + input logic clk, + input logic reset, // cpu side - input logic CPUBusy, - input logic [1:0] RW, - input logic [1:0] Atomic, - input logic FlushCache, - input logic InvalidateCacheM, - input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. - input logic [`PA_BITS-1:0] PAdr, // physical address - input logic [`XLEN-1:0] FinalWriteData, - output logic [`XLEN-1:0] ReadDataWord, - output logic CacheCommitted, - output logic CacheStall, + input logic CPUBusy, + input logic [1:0] RW, + input logic [1:0] Atomic, + input logic FlushCache, + input logic InvalidateCacheM, + input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. + input logic [`PA_BITS-1:0] PAdr, // physical address + input logic [`XLEN-1:0] FinalWriteData, + output logic CacheCommitted, + output logic CacheStall, // to performance counters to cpu - output logic CacheMiss, - output logic CacheAccess, + output logic CacheMiss, + output logic CacheAccess, + output logic save, restore, // lsu control - input logic IgnoreRequest, + input logic IgnoreRequest, // Bus fsm interface - output logic CacheFetchLine, - output logic CacheWriteLine, - input logic CacheBusAck, - output logic [`PA_BITS-1:0] CacheBusAdr, - input logic [LINELEN-1:0] CacheMemWriteData, - output logic [`XLEN-1:0] ReadDataLineSets [(LINELEN/`XLEN)-1:0]); + output logic CacheFetchLine, + output logic CacheWriteLine, + input logic CacheBusAck, + output logic [`PA_BITS-1:0] CacheBusAdr, + input logic [LINELEN-1:0] CacheMemWriteData, + output logic [LINELEN-1:0] ReadDataLine, + output logic [`XLEN-1:0] ReadDataLineSets [(LINELEN/`XLEN)-1:0]); // Cache parameters localparam LINEBYTELEN = LINELEN/8; @@ -77,7 +78,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [NUMWAYS-1:0] WayHit; logic CacheHit; - logic [LINELEN-1:0] ReadDataLine; logic [WORDSPERLINE-1:0] SRAMWordEnable; logic SRAMWordWriteEnable; logic SRAMLineWriteEnable; @@ -106,7 +106,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( logic [NUMWAYS-1:0] VDWriteEnableWay; logic SelFlush; logic ResetOrFlushAdr, ResetOrFlushWay; - logic save, restore; logic [NUMWAYS-1:0] WayHitSaved, WayHitRaw; logic [LINELEN-1:0] ReadDataLineRaw, ReadDataLineSaved; @@ -159,21 +158,12 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( // *** give this a module name to match block diagram genvar index; if(DCACHE == 1) begin: readdata - subcachelineread #(LINELEN, `XLEN, `XLEN) subcachelineread( - .clk, .reset, .PAdr, .save, .restore, - .ReadDataLine, .ReadDataWord); // *** only here temporary for (index = 0; index < WORDSPERLINE; index++) begin:readdatalinesetsmux assign ReadDataLineSets[index] = ReadDataLine[((index+1)*`XLEN)-1: (index*`XLEN)]; end end else begin: readdata - logic [31:0] FinalInstrRawF; - subcachelineread #(LINELEN, 32, 16) subcachelineread( - .clk, .reset, .PAdr, .save, .restore, - .ReadDataLine, .ReadDataWord(FinalInstrRawF)); - if (`XLEN == 64) assign ReadDataWord = {32'b0, FinalInstrRawF}; - else assign ReadDataWord = FinalInstrRawF; - end + end ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write Enables diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index ea1c504a7..afbf46761 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -92,7 +92,7 @@ module ifu ( logic [`XLEN-3:0] PCPlusUpperF; logic CompressedF; logic [31:0] InstrRawD, InstrRawF; - logic [`XLEN-1:0] FinalInstrRawF; + logic [31:0] FinalInstrRawF; logic [31:0] InstrE; logic [`XLEN-1:0] PCD; @@ -180,12 +180,14 @@ module ifu ( end else begin : bus localparam integer WORDSPERLINE = (`IMEM == `MEM_CACHE) ? `ICACHE_LINELENINBITS/`XLEN : 1; localparam integer LINELEN = (`IMEM == `MEM_CACHE) ? `ICACHE_LINELENINBITS : `XLEN; + logic [LINELEN-1:0] ReadDataLine; logic [LINELEN-1:0] ICacheMemWriteData; logic [`PA_BITS-1:0] ICacheBusAdr; logic ICacheBusAck; - - - busdp #(WORDSPERLINE, LINELEN) + logic save,restore; + logic [31:0] temp; + + busdp #(WORDSPERLINE, LINELEN, 32) busdp(.clk, .reset, .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusWrite(), .LSUBusRead(IFUBusRead), .LSUBusHWDATA(), .LSUBusSize(), @@ -193,10 +195,14 @@ module ifu ( .ReadDataLineSetsM(), .DCacheFetchLine(ICacheFetchLine), .DCacheWriteLine(1'b0), .DCacheBusAck(ICacheBusAck), .DCacheMemWriteData(ICacheMemWriteData), .LSUPAdrM(PCPF), - .FinalAMOWriteDataM(), .ReadDataWordM(FinalInstrRawF), .ReadDataWordMuxM(AllInstrRawF), + .FinalAMOWriteDataM(), .ReadDataWordM(FinalInstrRawF), .ReadDataWordMuxM(AllInstrRawF[31:0]), .IgnoreRequest(ITLBMissF), .LSURWM(2'b10), .CPUBusy, .CacheableM(CacheableF), .BusStall, .BusCommittedM()); + subcachelineread #(LINELEN, 32, 16) subcachelineread( + .clk, .reset, .PAdr(PCPF), .save, .restore, + .ReadDataLine, .ReadDataWord(FinalInstrRawF)); + if(`IMEM == `MEM_CACHE) begin : icache logic [1:0] IFURWF; assign IFURWF = CacheableF ? 2'b10 : 2'b00; @@ -207,8 +213,9 @@ module ifu ( icache(.clk, .reset, .CPUBusy, .IgnoreRequest(ITLBMissF), .CacheMemWriteData(ICacheMemWriteData), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), - .ReadDataWord(FinalInstrRawF), .CacheFetchLine(ICacheFetchLine), - .CacheWriteLine(), .ReadDataLineSets(), + .CacheFetchLine(ICacheFetchLine), + .CacheWriteLine(), .ReadDataLineSets(), .ReadDataLine(ReadDataLine), + .save, .restore, .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), .FinalWriteData('0), .RW(IFURWF), diff --git a/pipelined/src/lsu/busdp.sv b/pipelined/src/lsu/busdp.sv index ac54e303f..9fc5887c2 100644 --- a/pipelined/src/lsu/busdp.sv +++ b/pipelined/src/lsu/busdp.sv @@ -34,7 +34,7 @@ `include "wally-config.vh" -module busdp #(parameter WORDSPERLINE, parameter LINELEN) +module busdp #(parameter WORDSPERLINE, parameter LINELEN, WORDLEN) ( input logic clk, reset, // bus interface @@ -58,8 +58,8 @@ module busdp #(parameter WORDSPERLINE, parameter LINELEN) // lsu interface input logic [`PA_BITS-1:0] LSUPAdrM, input logic [`XLEN-1:0] FinalAMOWriteDataM, - input logic [`XLEN-1:0] ReadDataWordM, - output logic [`XLEN-1:0] ReadDataWordMuxM, + input logic [WORDLEN-1:0] ReadDataWordM, + output logic [WORDLEN-1:0] ReadDataWordMuxM, input logic IgnoreRequest, input logic [1:0] LSURWM, input logic CPUBusy, @@ -90,8 +90,8 @@ module busdp #(parameter WORDSPERLINE, parameter LINELEN) .d0(PreLSUBusHWDATA), .d1(FinalAMOWriteDataM), .s(SelUncachedAdr), .y(LSUBusHWDATA)); mux2 #(3) lsubussizemux( .d0(`XLEN == 32 ? 3'b010 : 3'b011), .d1(LSUFunct3M), .s(SelUncachedAdr), .y(LSUBusSize)); - mux2 #(`XLEN) UnCachedDataMux( - .d0(ReadDataWordM), .d1(DCacheMemWriteData[`XLEN-1:0]), .s(SelUncachedAdr), .y(ReadDataWordMuxM)); + mux2 #(WORDLEN) UnCachedDataMux( + .d0(ReadDataWordM), .d1(DCacheMemWriteData[WORDLEN-1:0]), .s(SelUncachedAdr), .y(ReadDataWordMuxM)); busfsm #(WordCountThreshold, LOGWPL, (`DMEM == `MEM_CACHE)) // *** cleanup busfsm(.clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine, diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index fdaf56fa9..18c0ccfb2 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -184,19 +184,25 @@ module lsu ( localparam integer WORDSPERLINE = (`DMEM == `MEM_CACHE) ? `DCACHE_LINELENINBITS/`XLEN : 1; localparam integer LINELEN = (`DMEM == `MEM_CACHE) ? `DCACHE_LINELENINBITS : `XLEN; logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0]; + logic [LINELEN-1:0] ReadDataLineM; logic [LINELEN-1:0] DCacheMemWriteData; logic [`PA_BITS-1:0] DCacheBusAdr; logic DCacheWriteLine; logic DCacheFetchLine; logic DCacheBusAck; + logic save,restore; - busdp #(WORDSPERLINE, LINELEN) busdp( + busdp #(WORDSPERLINE, LINELEN, `XLEN) busdp( .clk, .reset, .LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusHWDATA, .LSUBusSize, .LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .ReadDataLineSetsM, .DCacheFetchLine, .DCacheWriteLine, .DCacheBusAck, .DCacheMemWriteData, .LSUPAdrM, .FinalAMOWriteDataM, .ReadDataWordM, .ReadDataWordMuxM, .IgnoreRequest, .LSURWM, .CPUBusy, .CacheableM, .BusStall, .BusCommittedM); + + subcachelineread #(LINELEN, `XLEN, `XLEN) subcachelineread( + .clk, .reset, .PAdr(LSUPAdrM), .save, .restore, + .ReadDataLine(ReadDataLineM), .ReadDataWord(ReadDataWordM)); if(`DMEM == `MEM_CACHE) begin : dcache cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), @@ -204,10 +210,11 @@ module lsu ( .clk, .reset, .CPUBusy, .RW(CacheableM ? LSURWM : 2'b00), .FlushCache(FlushDCacheM), .Atomic(CacheableM ? LSUAtomicM : 2'b00), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .FinalWriteData(FinalWriteDataM), .ReadDataWord(ReadDataWordM), + .save, .restore, + .FinalWriteData(FinalWriteDataM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequest, .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), - .ReadDataLineSets(ReadDataLineSetsM), .CacheMemWriteData(DCacheMemWriteData), + .ReadDataLineSets(ReadDataLineSetsM), .ReadDataLine(ReadDataLineM), .CacheMemWriteData(DCacheMemWriteData), .CacheFetchLine(DCacheFetchLine), .CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCacheM(1'b0)); From 34cf77797a165c777bd6307a2774b581fb446699 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 4 Feb 2022 22:30:04 -0600 Subject: [PATCH 08/37] Merged together the two sub cache line read muxes. One mux was used for loads and the other for eviction. --- pipelined/src/ifu/ifu.sv | 6 ++++-- pipelined/src/lsu/busdp.sv | 17 +++++++---------- pipelined/src/lsu/lsu.sv | 26 ++++++++++++++++++++------ 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index afbf46761..d44e61f3c 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -180,6 +180,7 @@ module ifu ( end else begin : bus localparam integer WORDSPERLINE = (`IMEM == `MEM_CACHE) ? `ICACHE_LINELENINBITS/`XLEN : 1; localparam integer LINELEN = (`IMEM == `MEM_CACHE) ? `ICACHE_LINELENINBITS : `XLEN; + localparam integer LOGWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1; logic [LINELEN-1:0] ReadDataLine; logic [LINELEN-1:0] ICacheMemWriteData; logic [`PA_BITS-1:0] ICacheBusAdr; @@ -187,11 +188,12 @@ module ifu ( logic save,restore; logic [31:0] temp; - busdp #(WORDSPERLINE, LINELEN, 32) + busdp #(WORDSPERLINE, LINELEN, 32, LOGWPL) busdp(.clk, .reset, .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusWrite(), - .LSUBusRead(IFUBusRead), .LSUBusHWDATA(), .LSUBusSize(), + .LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr), + .WordCount(), .SelUncachedAdr(), .ReadDataLineSetsM(), .DCacheFetchLine(ICacheFetchLine), .DCacheWriteLine(1'b0), .DCacheBusAck(ICacheBusAck), .DCacheMemWriteData(ICacheMemWriteData), .LSUPAdrM(PCPF), diff --git a/pipelined/src/lsu/busdp.sv b/pipelined/src/lsu/busdp.sv index 9fc5887c2..dea671103 100644 --- a/pipelined/src/lsu/busdp.sv +++ b/pipelined/src/lsu/busdp.sv @@ -34,7 +34,7 @@ `include "wally-config.vh" -module busdp #(parameter WORDSPERLINE, parameter LINELEN, WORDLEN) +module busdp #(parameter WORDSPERLINE, LINELEN, WORDLEN, LOGWPL) ( input logic clk, reset, // bus interface @@ -42,14 +42,15 @@ module busdp #(parameter WORDSPERLINE, parameter LINELEN, WORDLEN) input logic LSUBusAck, output logic LSUBusWrite, output logic LSUBusRead, - output logic [`XLEN-1:0] LSUBusHWDATA, +// output logic [`XLEN-1:0] LSUBusHWDATA, output logic [2:0] LSUBusSize, input logic [2:0] LSUFunct3M, output logic [`PA_BITS-1:0] LSUBusAdr, - + output logic [LOGWPL-1:0] WordCount, + output logic SelUncachedAdr, // cache interface. input logic [`PA_BITS-1:0] DCacheBusAdr, - input var logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0], + input var logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0], input logic DCacheFetchLine, input logic DCacheWriteLine, output logic DCacheBusAck, @@ -69,12 +70,10 @@ module busdp #(parameter WORDSPERLINE, parameter LINELEN, WORDLEN) localparam integer WordCountThreshold = (`DMEM == `MEM_CACHE) ? WORDSPERLINE - 1 : 0; - localparam integer LOGWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1; - logic SelUncachedAdr; logic [`XLEN-1:0] PreLSUBusHWDATA; logic [`PA_BITS-1:0] LocalLSUBusAdr; - logic [LOGWPL-1:0] WordCount; + genvar index; for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer @@ -85,9 +84,7 @@ module busdp #(parameter WORDSPERLINE, parameter LINELEN, WORDLEN) mux2 #(`PA_BITS) localadrmux(DCacheBusAdr, LSUPAdrM, SelUncachedAdr, LocalLSUBusAdr); assign LSUBusAdr = ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) + LocalLSUBusAdr; - assign PreLSUBusHWDATA = ReadDataLineSetsM[WordCount]; // only in lsu, not ifu - mux2 #(`XLEN) lsubushwdatamux( - .d0(PreLSUBusHWDATA), .d1(FinalAMOWriteDataM), .s(SelUncachedAdr), .y(LSUBusHWDATA)); + //assign PreLSUBusHWDATA = ReadDataWordM;// ReadDataLineSetsM[WordCount]; // only in lsu, not ifu mux2 #(3) lsubussizemux( .d0(`XLEN == 32 ? 3'b010 : 3'b011), .d1(LSUFunct3M), .s(SelUncachedAdr), .y(LSUBusSize)); mux2 #(WORDLEN) UnCachedDataMux( diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 18c0ccfb2..dcaf683f1 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -173,6 +173,8 @@ module lsu ( logic [`XLEN-1:0] FinalAMOWriteDataM, FinalWriteDataM; logic [`XLEN-1:0] ReadDataWordM; logic [`XLEN-1:0] ReadDataWordMuxM; + logic SelUncachedAdr; + if (`DMEM == `MEM_TIM) begin : dtim dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM, @@ -183,25 +185,37 @@ module lsu ( end else begin : bus localparam integer WORDSPERLINE = (`DMEM == `MEM_CACHE) ? `DCACHE_LINELENINBITS/`XLEN : 1; localparam integer LINELEN = (`DMEM == `MEM_CACHE) ? `DCACHE_LINELENINBITS : `XLEN; - logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0]; + localparam integer LOGWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1; logic [LINELEN-1:0] ReadDataLineM; logic [LINELEN-1:0] DCacheMemWriteData; logic [`PA_BITS-1:0] DCacheBusAdr; logic DCacheWriteLine; logic DCacheFetchLine; logic DCacheBusAck; - logic save,restore; - - busdp #(WORDSPERLINE, LINELEN, `XLEN) busdp( + logic save, restore; + logic [`PA_BITS-1:0] WordOffsetAddr; + logic SelBus; + logic [LOGWPL-1:0] WordCount; + logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0]; + logic [`PA_BITS-1-`XLEN/8-LOGWPL:0] Pad; + + busdp #(WORDSPERLINE, LINELEN, `XLEN, LOGWPL) busdp( .clk, .reset, - .LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusHWDATA, .LSUBusSize, + .LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize, + .WordCount, .SelUncachedAdr, .LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .ReadDataLineSetsM, .DCacheFetchLine, .DCacheWriteLine, .DCacheBusAck, .DCacheMemWriteData, .LSUPAdrM, .FinalAMOWriteDataM, .ReadDataWordM, .ReadDataWordMuxM, .IgnoreRequest, .LSURWM, .CPUBusy, .CacheableM, .BusStall, .BusCommittedM); + assign Pad = '0; + assign WordOffsetAddr = LSUBusWrite ? ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) : LSUPAdrM; + mux2 #(`XLEN) lsubushwdatamux( + .d0(ReadDataWordM), .d1(FinalAMOWriteDataM), .s(SelUncachedAdr), .y(LSUBusHWDATA)); + + subcachelineread #(LINELEN, `XLEN, `XLEN) subcachelineread( - .clk, .reset, .PAdr(LSUPAdrM), .save, .restore, + .clk, .reset, .PAdr(WordOffsetAddr), .save, .restore, .ReadDataLine(ReadDataLineM), .ReadDataWord(ReadDataWordM)); if(`DMEM == `MEM_CACHE) begin : dcache From 53551ab53328025abef7db33a4743e6d211198ca Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 4 Feb 2022 22:39:13 -0600 Subject: [PATCH 09/37] Moved the hwdata mux back into the busdp. --- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/busdp.sv | 9 +++++++-- pipelined/src/lsu/lsu.sv | 8 +++----- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index d44e61f3c..f53669522 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -193,7 +193,7 @@ module ifu ( .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusWrite(), .LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr), - .WordCount(), .SelUncachedAdr(), + .WordCount(), .SelUncachedAdr(), .LSUBusHWDATA(), .ReadDataLineSetsM(), .DCacheFetchLine(ICacheFetchLine), .DCacheWriteLine(1'b0), .DCacheBusAck(ICacheBusAck), .DCacheMemWriteData(ICacheMemWriteData), .LSUPAdrM(PCPF), diff --git a/pipelined/src/lsu/busdp.sv b/pipelined/src/lsu/busdp.sv index dea671103..d987c1a1a 100644 --- a/pipelined/src/lsu/busdp.sv +++ b/pipelined/src/lsu/busdp.sv @@ -34,7 +34,7 @@ `include "wally-config.vh" -module busdp #(parameter WORDSPERLINE, LINELEN, WORDLEN, LOGWPL) +module busdp #(parameter WORDSPERLINE, LINELEN, WORDLEN, LOGWPL, LSU=0) ( input logic clk, reset, // bus interface @@ -42,7 +42,7 @@ module busdp #(parameter WORDSPERLINE, LINELEN, WORDLEN, LOGWPL) input logic LSUBusAck, output logic LSUBusWrite, output logic LSUBusRead, -// output logic [`XLEN-1:0] LSUBusHWDATA, + output logic [`XLEN-1:0] LSUBusHWDATA, output logic [2:0] LSUBusSize, input logic [2:0] LSUFunct3M, output logic [`PA_BITS-1:0] LSUBusAdr, @@ -85,6 +85,11 @@ module busdp #(parameter WORDSPERLINE, LINELEN, WORDLEN, LOGWPL) mux2 #(`PA_BITS) localadrmux(DCacheBusAdr, LSUPAdrM, SelUncachedAdr, LocalLSUBusAdr); assign LSUBusAdr = ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) + LocalLSUBusAdr; //assign PreLSUBusHWDATA = ReadDataWordM;// ReadDataLineSetsM[WordCount]; // only in lsu, not ifu + // this mux is only used in the LSU's bus. + if(LSU == 1) mux2 #(`XLEN) lsubushwdatamux( .d0(ReadDataWordM), .d1(FinalAMOWriteDataM), + .s(SelUncachedAdr), .y(LSUBusHWDATA)); + else assign LSUBusHWDATA = '0; + mux2 #(3) lsubussizemux( .d0(`XLEN == 32 ? 3'b010 : 3'b011), .d1(LSUFunct3M), .s(SelUncachedAdr), .y(LSUBusSize)); mux2 #(WORDLEN) UnCachedDataMux( diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index dcaf683f1..b625a4194 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -199,10 +199,10 @@ module lsu ( logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0]; logic [`PA_BITS-1-`XLEN/8-LOGWPL:0] Pad; - busdp #(WORDSPERLINE, LINELEN, `XLEN, LOGWPL) busdp( + busdp #(WORDSPERLINE, LINELEN, `XLEN, LOGWPL, 1) busdp( .clk, .reset, - .LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize, - .WordCount, .SelUncachedAdr, + .LSUBusHRDATA, .LSUBusHWDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize, + .WordCount, .SelUncachedAdr, .LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .ReadDataLineSetsM, .DCacheFetchLine, .DCacheWriteLine, .DCacheBusAck, .DCacheMemWriteData, .LSUPAdrM, .FinalAMOWriteDataM, .ReadDataWordM, .ReadDataWordMuxM, .IgnoreRequest, .LSURWM, .CPUBusy, .CacheableM, @@ -210,8 +210,6 @@ module lsu ( assign Pad = '0; assign WordOffsetAddr = LSUBusWrite ? ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) : LSUPAdrM; - mux2 #(`XLEN) lsubushwdatamux( - .d0(ReadDataWordM), .d1(FinalAMOWriteDataM), .s(SelUncachedAdr), .y(LSUBusHWDATA)); subcachelineread #(LINELEN, `XLEN, `XLEN) subcachelineread( From dce4f8a0e5acdd742574f7b54fa3ec0a32ec2484 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 4 Feb 2022 22:40:51 -0600 Subject: [PATCH 10/37] Cleanup. --- pipelined/src/ifu/ifu.sv | 2 +- pipelined/src/lsu/busdp.sv | 3 +-- pipelined/src/lsu/lsu.sv | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index f53669522..31038bff5 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -193,7 +193,7 @@ module ifu ( .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusWrite(), .LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr), - .WordCount(), .SelUncachedAdr(), .LSUBusHWDATA(), + .WordCount(), .LSUBusHWDATA(), .ReadDataLineSetsM(), .DCacheFetchLine(ICacheFetchLine), .DCacheWriteLine(1'b0), .DCacheBusAck(ICacheBusAck), .DCacheMemWriteData(ICacheMemWriteData), .LSUPAdrM(PCPF), diff --git a/pipelined/src/lsu/busdp.sv b/pipelined/src/lsu/busdp.sv index d987c1a1a..fc43d0c1a 100644 --- a/pipelined/src/lsu/busdp.sv +++ b/pipelined/src/lsu/busdp.sv @@ -47,7 +47,6 @@ module busdp #(parameter WORDSPERLINE, LINELEN, WORDLEN, LOGWPL, LSU=0) input logic [2:0] LSUFunct3M, output logic [`PA_BITS-1:0] LSUBusAdr, output logic [LOGWPL-1:0] WordCount, - output logic SelUncachedAdr, // cache interface. input logic [`PA_BITS-1:0] DCacheBusAdr, input var logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0], @@ -73,7 +72,7 @@ module busdp #(parameter WORDSPERLINE, LINELEN, WORDLEN, LOGWPL, LSU=0) logic [`XLEN-1:0] PreLSUBusHWDATA; logic [`PA_BITS-1:0] LocalLSUBusAdr; - + logic SelUncachedAdr; genvar index; for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index b625a4194..80e5ebfd8 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -202,7 +202,7 @@ module lsu ( busdp #(WORDSPERLINE, LINELEN, `XLEN, LOGWPL, 1) busdp( .clk, .reset, .LSUBusHRDATA, .LSUBusHWDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize, - .WordCount, .SelUncachedAdr, + .WordCount, .LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .ReadDataLineSetsM, .DCacheFetchLine, .DCacheWriteLine, .DCacheBusAck, .DCacheMemWriteData, .LSUPAdrM, .FinalAMOWriteDataM, .ReadDataWordM, .ReadDataWordMuxM, .IgnoreRequest, .LSURWM, .CPUBusy, .CacheableM, @@ -211,7 +211,6 @@ module lsu ( assign Pad = '0; assign WordOffsetAddr = LSUBusWrite ? ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) : LSUPAdrM; - subcachelineread #(LINELEN, `XLEN, `XLEN) subcachelineread( .clk, .reset, .PAdr(WordOffsetAddr), .save, .restore, .ReadDataLine(ReadDataLineM), .ReadDataWord(ReadDataWordM)); From 1766c0f5baac4181544ed3bf2030f6ca742ea97d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 4 Feb 2022 22:52:51 -0600 Subject: [PATCH 11/37] Removed unused ports from caches and buses. --- pipelined/src/cache/cache.sv | 17 +---------------- pipelined/src/ifu/ifu.sv | 4 ++-- pipelined/src/lsu/busdp.sv | 16 ++++++---------- pipelined/src/lsu/lsu.sv | 8 ++------ 4 files changed, 11 insertions(+), 34 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index e08b65e87..b2e6d2391 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -56,8 +56,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( input logic CacheBusAck, output logic [`PA_BITS-1:0] CacheBusAdr, input logic [LINELEN-1:0] CacheMemWriteData, - output logic [LINELEN-1:0] ReadDataLine, - output logic [`XLEN-1:0] ReadDataLineSets [(LINELEN/`XLEN)-1:0]); + output logic [LINELEN-1:0] ReadDataLine); // Cache parameters localparam LINEBYTELEN = LINELEN/8; @@ -151,20 +150,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( flopenr #(NUMWAYS) wayhitsavereg(clk, save, reset, WayHitRaw, WayHitSaved); mux2 #(NUMWAYS) saverestoremux(WayHitRaw, WayHitSaved, restore, WayHit); - - // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can - // easily build a variable input mux. - // *** move this to LSU and IFU, also remove mux from busdp into LSU. - // *** give this a module name to match block diagram - genvar index; - if(DCACHE == 1) begin: readdata - // *** only here temporary - for (index = 0; index < WORDSPERLINE; index++) begin:readdatalinesetsmux - assign ReadDataLineSets[index] = ReadDataLine[((index+1)*`XLEN)-1: (index*`XLEN)]; - end - end else begin: readdata - end - ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write Enables ///////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 31038bff5..2505b6575 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -194,7 +194,7 @@ module ifu ( .LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr), .WordCount(), .LSUBusHWDATA(), - .ReadDataLineSetsM(), .DCacheFetchLine(ICacheFetchLine), + .DCacheFetchLine(ICacheFetchLine), .DCacheWriteLine(1'b0), .DCacheBusAck(ICacheBusAck), .DCacheMemWriteData(ICacheMemWriteData), .LSUPAdrM(PCPF), .FinalAMOWriteDataM(), .ReadDataWordM(FinalInstrRawF), .ReadDataWordMuxM(AllInstrRawF[31:0]), @@ -216,7 +216,7 @@ module ifu ( .CacheMemWriteData(ICacheMemWriteData), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), .CacheFetchLine(ICacheFetchLine), - .CacheWriteLine(), .ReadDataLineSets(), .ReadDataLine(ReadDataLine), + .CacheWriteLine(), .ReadDataLine(ReadDataLine), .save, .restore, .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), .FinalWriteData('0), diff --git a/pipelined/src/lsu/busdp.sv b/pipelined/src/lsu/busdp.sv index fc43d0c1a..f4b2bb234 100644 --- a/pipelined/src/lsu/busdp.sv +++ b/pipelined/src/lsu/busdp.sv @@ -49,7 +49,6 @@ module busdp #(parameter WORDSPERLINE, LINELEN, WORDLEN, LOGWPL, LSU=0) output logic [LOGWPL-1:0] WordCount, // cache interface. input logic [`PA_BITS-1:0] DCacheBusAdr, - input var logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0], input logic DCacheFetchLine, input logic DCacheWriteLine, output logic DCacheBusAck, @@ -83,18 +82,15 @@ module busdp #(parameter WORDSPERLINE, LINELEN, WORDLEN, LOGWPL, LSU=0) mux2 #(`PA_BITS) localadrmux(DCacheBusAdr, LSUPAdrM, SelUncachedAdr, LocalLSUBusAdr); assign LSUBusAdr = ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) + LocalLSUBusAdr; - //assign PreLSUBusHWDATA = ReadDataWordM;// ReadDataLineSetsM[WordCount]; // only in lsu, not ifu - // this mux is only used in the LSU's bus. if(LSU == 1) mux2 #(`XLEN) lsubushwdatamux( .d0(ReadDataWordM), .d1(FinalAMOWriteDataM), - .s(SelUncachedAdr), .y(LSUBusHWDATA)); + .s(SelUncachedAdr), .y(LSUBusHWDATA)); else assign LSUBusHWDATA = '0; - - mux2 #(3) lsubussizemux( - .d0(`XLEN == 32 ? 3'b010 : 3'b011), .d1(LSUFunct3M), .s(SelUncachedAdr), .y(LSUBusSize)); - mux2 #(WORDLEN) UnCachedDataMux( - .d0(ReadDataWordM), .d1(DCacheMemWriteData[WORDLEN-1:0]), .s(SelUncachedAdr), .y(ReadDataWordMuxM)); + mux2 #(3) lsubussizemux(.d0(`XLEN == 32 ? 3'b010 : 3'b011), .d1(LSUFunct3M), + .s(SelUncachedAdr), .y(LSUBusSize)); + mux2 #(WORDLEN) UnCachedDataMux(.d0(ReadDataWordM), .d1(DCacheMemWriteData[WORDLEN-1:0]), + .s(SelUncachedAdr), .y(ReadDataWordMuxM)); - busfsm #(WordCountThreshold, LOGWPL, (`DMEM == `MEM_CACHE)) // *** cleanup + busfsm #(WordCountThreshold, LOGWPL, (`DMEM == `MEM_CACHE)) // *** cleanup Icache? must fix. busfsm(.clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine, .LSUBusAck, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusRead, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount); diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index 80e5ebfd8..c078ec897 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -175,7 +175,6 @@ module lsu ( logic [`XLEN-1:0] ReadDataWordMuxM; logic SelUncachedAdr; - if (`DMEM == `MEM_TIM) begin : dtim dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM, .ReadDataWordM, .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM, @@ -196,19 +195,16 @@ module lsu ( logic [`PA_BITS-1:0] WordOffsetAddr; logic SelBus; logic [LOGWPL-1:0] WordCount; - logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0]; - logic [`PA_BITS-1-`XLEN/8-LOGWPL:0] Pad; busdp #(WORDSPERLINE, LINELEN, `XLEN, LOGWPL, 1) busdp( .clk, .reset, .LSUBusHRDATA, .LSUBusHWDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize, .WordCount, - .LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .ReadDataLineSetsM, .DCacheFetchLine, + .LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .DCacheFetchLine, .DCacheWriteLine, .DCacheBusAck, .DCacheMemWriteData, .LSUPAdrM, .FinalAMOWriteDataM, .ReadDataWordM, .ReadDataWordMuxM, .IgnoreRequest, .LSURWM, .CPUBusy, .CacheableM, .BusStall, .BusCommittedM); - assign Pad = '0; assign WordOffsetAddr = LSUBusWrite ? ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) : LSUPAdrM; subcachelineread #(LINELEN, `XLEN, `XLEN) subcachelineread( @@ -225,7 +221,7 @@ module lsu ( .FinalWriteData(FinalWriteDataM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequest, .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), - .ReadDataLineSets(ReadDataLineSetsM), .ReadDataLine(ReadDataLineM), .CacheMemWriteData(DCacheMemWriteData), + .ReadDataLine(ReadDataLineM), .CacheMemWriteData(DCacheMemWriteData), .CacheFetchLine(DCacheFetchLine), .CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCacheM(1'b0)); From 308cc34d6fb414323eb2a17a19ccf3c19e330e15 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 4 Feb 2022 23:19:00 -0600 Subject: [PATCH 12/37] Added config to allow using the save/restore or replay implementation to handle sram clocked read delay. --- pipelined/config/buildroot/wally-config.vh | 2 ++ pipelined/config/fpga/wally-config.vh | 2 ++ pipelined/config/rv32etim/wally-config.vh | 2 ++ pipelined/config/rv32gc/wally-config.vh | 2 ++ pipelined/config/rv32ic/wally-config.vh | 2 ++ pipelined/config/rv32tim/wally-config.vh | 2 ++ pipelined/config/rv64BP/wally-config.vh | 2 ++ pipelined/config/rv64gc/wally-config.vh | 1 + pipelined/config/rv64ic/wally-config.vh | 1 + pipelined/src/cache/cache.sv | 6 ++++-- pipelined/src/cache/cachefsm.sv | 25 +++++++++++----------- pipelined/src/cache/subcachelineread.sv | 9 ++++---- 12 files changed, 37 insertions(+), 19 deletions(-) diff --git a/pipelined/config/buildroot/wally-config.vh b/pipelined/config/buildroot/wally-config.vh index f879ce2a6..1bdea74a7 100644 --- a/pipelined/config/buildroot/wally-config.vh +++ b/pipelined/config/buildroot/wally-config.vh @@ -124,3 +124,5 @@ `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 + +`define REPLAY 0 diff --git a/pipelined/config/fpga/wally-config.vh b/pipelined/config/fpga/wally-config.vh index 58efd0463..223aa3d80 100644 --- a/pipelined/config/fpga/wally-config.vh +++ b/pipelined/config/fpga/wally-config.vh @@ -130,3 +130,5 @@ `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 1 + +`define REPLAY 0 diff --git a/pipelined/config/rv32etim/wally-config.vh b/pipelined/config/rv32etim/wally-config.vh index bbeccbe85..4f77ae8bb 100644 --- a/pipelined/config/rv32etim/wally-config.vh +++ b/pipelined/config/rv32etim/wally-config.vh @@ -127,3 +127,5 @@ `define BPRED_ENABLED 0 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 + +`define REPLAY 0 diff --git a/pipelined/config/rv32gc/wally-config.vh b/pipelined/config/rv32gc/wally-config.vh index a3859740c..af6ef40cf 100644 --- a/pipelined/config/rv32gc/wally-config.vh +++ b/pipelined/config/rv32gc/wally-config.vh @@ -126,3 +126,5 @@ `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 + +`define REPLAY 0 diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh index d0b8adfb0..4d7b0418a 100644 --- a/pipelined/config/rv32ic/wally-config.vh +++ b/pipelined/config/rv32ic/wally-config.vh @@ -126,3 +126,5 @@ `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 + +`define REPLAY 0 diff --git a/pipelined/config/rv32tim/wally-config.vh b/pipelined/config/rv32tim/wally-config.vh index 0fb1dafa2..713a6a6b8 100644 --- a/pipelined/config/rv32tim/wally-config.vh +++ b/pipelined/config/rv32tim/wally-config.vh @@ -126,3 +126,5 @@ `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 + +`define REPLAY 0 diff --git a/pipelined/config/rv64BP/wally-config.vh b/pipelined/config/rv64BP/wally-config.vh index 78230552c..f8ee8903b 100644 --- a/pipelined/config/rv64BP/wally-config.vh +++ b/pipelined/config/rv64BP/wally-config.vh @@ -129,3 +129,5 @@ //`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE `define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE `define TESTSBP 1 + +`define REPLAY 0 diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh index f5bc8a29f..ea17620c3 100644 --- a/pipelined/config/rv64gc/wally-config.vh +++ b/pipelined/config/rv64gc/wally-config.vh @@ -130,3 +130,4 @@ `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 +`define REPLAY 0 diff --git a/pipelined/config/rv64ic/wally-config.vh b/pipelined/config/rv64ic/wally-config.vh index 00b9a87f5..ec497db2a 100644 --- a/pipelined/config/rv64ic/wally-config.vh +++ b/pipelined/config/rv64ic/wally-config.vh @@ -130,3 +130,4 @@ `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 +`define REPLAY 0 diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index b2e6d2391..4c8f88aa4 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -147,8 +147,10 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( // There are two ways to resolve. 1. We can replay the read of the sram or we can save // the data. Replay is eaiser but creates a longer critical path. // save/restore only wayhit and readdata. - flopenr #(NUMWAYS) wayhitsavereg(clk, save, reset, WayHitRaw, WayHitSaved); - mux2 #(NUMWAYS) saverestoremux(WayHitRaw, WayHitSaved, restore, WayHit); + if(!`REPLAY) begin + flopenr #(NUMWAYS) wayhitsavereg(clk, save, reset, WayHitRaw, WayHitSaved); + mux2 #(NUMWAYS) saverestoremux(WayHitRaw, WayHitSaved, restore, WayHit); + end else assign WayHit = WayHitRaw; ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path: Write Enables diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 3b60efe1f..75bcb2663 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -181,8 +181,8 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; - //PreSelAdr = 2'b01; `REPLAY - save = 1'b1; + if (`REPLAY) PreSelAdr = 2'b01; + else save = 1'b1; end else begin SRAMWordWriteEnable = 1'b1; @@ -198,8 +198,8 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY; - //PreSelAdr = 2'b01; `REPLAY - save = 1'b1; + if(`REPLAY) PreSelAdr = 2'b01; + else save = 1'b1; end else begin NextState = STATE_READY; @@ -215,8 +215,8 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY; - //PreSelAdr = 2'b01; `REPLAY - save = 1'b1; + if(`REPLAY) PreSelAdr = 2'b01; + else save = 1'b1; end else begin NextState = STATE_READY; @@ -276,7 +276,6 @@ module cachefsm end STATE_MISS_READ_WORD_DELAY: begin - //PreSelAdr = 2'b01; `REPLAY SRAMWordWriteEnable = 1'b0; SetDirty = 1'b0; LRUWriteEn = 1'b0; @@ -284,7 +283,7 @@ module cachefsm PreSelAdr = 2'b01; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; - save = 1'b1; + if(~`REPLAY) save = 1'b1; end else begin SRAMWordWriteEnable = 1'b1; @@ -296,8 +295,8 @@ module cachefsm LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - //PreSelAdr = 2'b01; `REPLAY - save = 1'b1; + if(`REPLAY) PreSelAdr = 2'b01; + else save = 1'b1; end else begin NextState = STATE_READY; @@ -312,8 +311,8 @@ module cachefsm LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - //PreSelAdr = 2'b01; `REPLAY - save = 1'b1; + if(`REPLAY) PreSelAdr = 2'b01; + else save = 1'b1; end else begin NextState = STATE_READY; @@ -337,7 +336,7 @@ module cachefsm restore = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - //PreSelAdr = 2'b01; `REPLAY + if(`REPLAY) PreSelAdr = 2'b01; end else begin NextState = STATE_READY; diff --git a/pipelined/src/cache/subcachelineread.sv b/pipelined/src/cache/subcachelineread.sv index e42f5e711..111ec506f 100644 --- a/pipelined/src/cache/subcachelineread.sv +++ b/pipelined/src/cache/subcachelineread.sv @@ -61,8 +61,9 @@ module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL)( end // variable input mux assign ReadDataWordRaw = ReadDataLineSets[PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]]; - flopen #(WORDLEN) cachereaddatasavereg(clk, save, ReadDataWordRaw, ReadDataWordSaved); - mux2 #(WORDLEN) readdatasaverestoremux(ReadDataWordRaw, ReadDataWordSaved, - restore, ReadDataWord); - + if(!`REPLAY) begin + flopen #(WORDLEN) cachereaddatasavereg(clk, save, ReadDataWordRaw, ReadDataWordSaved); + mux2 #(WORDLEN) readdatasaverestoremux(ReadDataWordRaw, ReadDataWordSaved, + restore, ReadDataWord); + end else assign ReadDataWord = ReadDataWordRaw; endmodule From 347e9228f8c913689b6d1e9a7766f1b85f549c11 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 6 Feb 2022 21:39:38 -0600 Subject: [PATCH 13/37] started cachefsm cleanup. --- pipelined/src/cache/cachefsm.sv | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 75bcb2663..b378760d7 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -124,7 +124,6 @@ module cachefsm // next state logic and some state ouputs. // *** Ross simplify: factor out next state and output logic always_comb begin - CacheStall = 1'b0; PreSelAdr = 2'b00; SetValid = 1'b0; ClearValid = 1'b0; @@ -148,7 +147,6 @@ module cachefsm case (CurrState) STATE_READY: begin - CacheStall = 1'b0; PreSelAdr = 2'b00; SRAMWordWriteEnable = 1'b0; SetDirty = 1'b0; @@ -171,13 +169,11 @@ module cachefsm NextState = STATE_FLUSH; FlushAdrCntRst = 1'b1; FlushWayCntRst = 1'b1; - CacheStall = 1'b1; end // amo hit else if(Atomic[1] & (&RW) & CacheHit) begin PreSelAdr = 2'b01; - CacheStall = 1'b0; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; @@ -193,7 +189,6 @@ module cachefsm end // read hit valid cached else if(RW[1] & CacheHit) begin - CacheStall = 1'b0; LRUWriteEn = 1'b1; if(CPUBusy) begin @@ -208,7 +203,6 @@ module cachefsm // write hit valid cached else if (RW[0] & CacheHit) begin PreSelAdr = 2'b01; - CacheStall = 1'b0; SRAMWordWriteEnable = 1'b1; SetDirty = 1'b1; LRUWriteEn = 1'b1; @@ -225,14 +219,12 @@ module cachefsm // read or write miss valid cached else if((|RW) & ~CacheHit) begin NextState = STATE_MISS_FETCH_WDV; - CacheStall = 1'b1; CacheFetchLine = 1'b1; end else NextState = STATE_READY; end STATE_MISS_FETCH_WDV: begin - CacheStall = 1'b1; PreSelAdr = 2'b01; if (CacheBusAck) begin @@ -243,7 +235,6 @@ module cachefsm end STATE_MISS_FETCH_DONE: begin - CacheStall = 1'b1; PreSelAdr = 2'b01; if(VictimDirty) begin NextState = STATE_MISS_EVICT_DIRTY; @@ -255,7 +246,6 @@ module cachefsm STATE_MISS_WRITE_CACHE_LINE: begin SRAMLineWriteEnable = 1'b1; - CacheStall = 1'b1; NextState = STATE_MISS_READ_WORD; PreSelAdr = 2'b01; SetValid = 1'b1; @@ -265,7 +255,6 @@ module cachefsm STATE_MISS_READ_WORD: begin PreSelAdr = 2'b01; - CacheStall = 1'b1; if (RW[0] & ~Atomic[1]) begin // handles stores and amo write. NextState = STATE_MISS_WRITE_WORD; end else begin @@ -320,7 +309,6 @@ module cachefsm end STATE_MISS_EVICT_DIRTY: begin - CacheStall = 1'b1; PreSelAdr = 2'b01; SelEvict = 1'b1; if(CacheBusAck) begin @@ -363,13 +351,11 @@ module cachefsm STATE_FLUSH: begin // intialize flush counters SelFlush = 1'b1; - CacheStall = 1'b1; PreSelAdr = 2'b10; NextState = STATE_FLUSH_CHECK; end STATE_FLUSH_CHECK: begin - CacheStall = 1'b1; PreSelAdr = 2'b10; SelFlush = 1'b1; if(VictimDirty) begin @@ -378,7 +364,6 @@ module cachefsm CacheWriteLine = 1'b1; end else if (FlushAdrFlag & FlushWayFlag) begin NextState = STATE_READY; - CacheStall = 1'b0; PreSelAdr = 2'b00; FlushWayCntEn = 1'b0; end else if(FlushWayFlag) begin @@ -393,7 +378,6 @@ module cachefsm end STATE_FLUSH_INCR: begin - CacheStall = 1'b1; PreSelAdr = 2'b10; SelFlush = 1'b1; FlushWayCntRst = 1'b1; @@ -401,7 +385,6 @@ module cachefsm end STATE_FLUSH_WRITE_BACK: begin - CacheStall = 1'b1; PreSelAdr = 2'b10; SelFlush = 1'b1; if(CacheBusAck) begin @@ -412,7 +395,6 @@ module cachefsm end STATE_FLUSH_CLEAR_DIRTY: begin - CacheStall = 1'b1; ClearDirty = 1'b1; VDWriteEnable = 1'b1; SelFlush = 1'b1; @@ -420,7 +402,6 @@ module cachefsm FlushWayCntEn = 1'b0; if(FlushAdrFlag & FlushWayFlag) begin NextState = STATE_READY; - CacheStall = 1'b0; PreSelAdr = 2'b00; end else if (FlushWayFlag) begin NextState = STATE_FLUSH_INCR; @@ -440,6 +421,19 @@ module cachefsm end assign CacheCommitted = CurrState != STATE_READY; + assign CacheStall = (CurrState == STATE_READY & (FlushCache | (|RW & ~CacheHit)) & ~IgnoreRequest) | + + (CurrState == STATE_MISS_FETCH_WDV) | + (CurrState == STATE_MISS_FETCH_DONE) | + (CurrState == STATE_MISS_WRITE_CACHE_LINE) | + (CurrState == STATE_MISS_READ_WORD) | + (CurrState == STATE_MISS_EVICT_DIRTY) | + (CurrState == STATE_FLUSH) | + (CurrState == STATE_FLUSH_CHECK & ~(FlushAdrFlag & FlushWayFlag)) | + (CurrState == STATE_FLUSH_INCR) | + (CurrState == STATE_FLUSH_WRITE_BACK) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushAdrFlag & FlushWayFlag)); + endmodule // cachefsm From 8bcaadda6b5e44e154179efb95090d13164eb993 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Sun, 6 Feb 2022 21:50:44 -0600 Subject: [PATCH 14/37] More cachefsm cleanup. --- pipelined/src/cache/cachefsm.sv | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index b378760d7..047505504 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -125,10 +125,6 @@ module cachefsm // *** Ross simplify: factor out next state and output logic always_comb begin PreSelAdr = 2'b00; - SetValid = 1'b0; - ClearValid = 1'b0; - SetDirty = 1'b0; - ClearDirty = 1'b0; SRAMWordWriteEnable = 1'b0; SRAMLineWriteEnable = 1'b0; SelEvict = 1'b0; @@ -149,7 +145,6 @@ module cachefsm PreSelAdr = 2'b00; SRAMWordWriteEnable = 1'b0; - SetDirty = 1'b0; LRUWriteEn = 1'b0; // TLB Miss @@ -182,7 +177,6 @@ module cachefsm end else begin SRAMWordWriteEnable = 1'b1; - SetDirty = 1'b1; LRUWriteEn = 1'b1; NextState = STATE_READY; end @@ -204,7 +198,6 @@ module cachefsm else if (RW[0] & CacheHit) begin PreSelAdr = 2'b01; SRAMWordWriteEnable = 1'b1; - SetDirty = 1'b1; LRUWriteEn = 1'b1; if(CPUBusy) begin @@ -248,8 +241,6 @@ module cachefsm SRAMLineWriteEnable = 1'b1; NextState = STATE_MISS_READ_WORD; PreSelAdr = 2'b01; - SetValid = 1'b1; - ClearDirty = 1'b1; //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write end @@ -266,7 +257,6 @@ module cachefsm STATE_MISS_READ_WORD_DELAY: begin SRAMWordWriteEnable = 1'b0; - SetDirty = 1'b0; LRUWriteEn = 1'b0; if(&RW & Atomic[1]) begin // amo write PreSelAdr = 2'b01; @@ -276,7 +266,6 @@ module cachefsm end else begin SRAMWordWriteEnable = 1'b1; - SetDirty = 1'b1; LRUWriteEn = 1'b1; NextState = STATE_READY; end @@ -295,7 +284,6 @@ module cachefsm STATE_MISS_WRITE_WORD: begin SRAMWordWriteEnable = 1'b1; - SetDirty = 1'b1; PreSelAdr = 2'b01; LRUWriteEn = 1'b1; if(CPUBusy) begin @@ -334,7 +322,6 @@ module cachefsm STATE_CPU_BUSY_FINISH_AMO: begin PreSelAdr = 2'b01; SRAMWordWriteEnable = 1'b0; - SetDirty = 1'b0; LRUWriteEn = 1'b0; restore = 1'b1; if(CPUBusy) begin @@ -342,7 +329,6 @@ module cachefsm end else begin SRAMWordWriteEnable = 1'b1; - SetDirty = 1'b1; LRUWriteEn = 1'b1; NextState = STATE_READY; end @@ -395,7 +381,6 @@ module cachefsm end STATE_FLUSH_CLEAR_DIRTY: begin - ClearDirty = 1'b1; VDWriteEnable = 1'b1; SelFlush = 1'b1; PreSelAdr = 2'b10; @@ -421,8 +406,8 @@ module cachefsm end assign CacheCommitted = CurrState != STATE_READY; + // *** stall missing check on amo miss? assign CacheStall = (CurrState == STATE_READY & (FlushCache | (|RW & ~CacheHit)) & ~IgnoreRequest) | - (CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_FETCH_DONE) | (CurrState == STATE_MISS_WRITE_CACHE_LINE) | @@ -433,6 +418,17 @@ module cachefsm (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushAdrFlag & FlushWayFlag)); + assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; + assign ClearValid = '0; + // *** setdirty can probably be simplified by not caring about cpubusy + assign SetDirty = (CurrState == STATE_READY & Atomic[1] & (&RW) & CacheHit & ~CPUBusy & ~IgnoreRequest) | + (CurrState == STATE_READY & RW[0] & CacheHit & ~IgnoreRequest) | + (CurrState == STATE_MISS_READ_WORD_DELAY & &RW & Atomic[1] & ~CPUBusy) | + (CurrState == STATE_MISS_WRITE_WORD) | + (CurrState == STATE_CPU_BUSY_FINISH_AMO & ~CPUBusy); + assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY); + endmodule // cachefsm From 6f4a321d311c68ac794b46d0e27a151b251dc5db Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 10:33:50 -0600 Subject: [PATCH 15/37] More cachfsm cleanup. --- pipelined/src/cache/cachefsm.sv | 54 ++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 047505504..265255bf5 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -83,6 +83,10 @@ module cachefsm logic AnyCPUReqM; logic [1:0] PreSelAdr; logic resetDelay; + logic DoAMO, DoRead, DoWrite, DoFlush; + logic DoAMOHit, DoReadHit, DoWriteHit; + logic DoAMOMiss, DoReadMiss, DoWriteMiss; + typedef enum {STATE_READY, @@ -105,11 +109,22 @@ module cachefsm (* mark_debug = "true" *) statetype CurrState, NextState; - assign AnyCPUReqM = |RW | (|Atomic); + assign DoFlush = FlushCache & ~IgnoreRequest; + assign DoAMO = Atomic[1] & (&RW) & ~IgnoreRequest; + assign DoAMOHit = DoAMO & CacheHit; + assign DoAMOMiss = DoAMOHit & ~CacheHit; + assign DoRead = RW[1] & ~IgnoreRequest; + assign DoReadHit = DoRead & CacheHit; + assign DoReadMiss = DoRead & ~CacheHit; + assign DoWrite = RW[0] & ~IgnoreRequest; + assign DoWriteHit = DoWrite & CacheHit; + assign DoWriteMiss = DoWrite & ~CacheHit; + + //assign AnyCPUReqM = |RW | (|Atomic); **** remove // outputs for the performance counters. - assign CacheAccess = AnyCPUReqM & CurrState == STATE_READY; - assign CacheMiss = CacheAccess & ~CacheHit; + assign CacheAccess = (DoAMO | DoRead | DoWrite) & CurrState == STATE_READY; + assign CacheMiss = CacheAccess & ~CacheHit; // special case on reset. When the fsm first exists reset the // PCNextF will no longer be pointing to the correct address. @@ -125,9 +140,6 @@ module cachefsm // *** Ross simplify: factor out next state and output logic always_comb begin PreSelAdr = 2'b00; - SRAMWordWriteEnable = 1'b0; - SRAMLineWriteEnable = 1'b0; - SelEvict = 1'b0; LRUWriteEn = 1'b0; SelFlush = 1'b0; FlushAdrCntEn = 1'b0; @@ -144,7 +156,6 @@ module cachefsm STATE_READY: begin PreSelAdr = 2'b00; - SRAMWordWriteEnable = 1'b0; LRUWriteEn = 1'b0; // TLB Miss @@ -169,6 +180,7 @@ module cachefsm // amo hit else if(Atomic[1] & (&RW) & CacheHit) begin PreSelAdr = 2'b01; + LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; @@ -176,8 +188,6 @@ module cachefsm else save = 1'b1; end else begin - SRAMWordWriteEnable = 1'b1; - LRUWriteEn = 1'b1; NextState = STATE_READY; end end @@ -197,7 +207,6 @@ module cachefsm // write hit valid cached else if (RW[0] & CacheHit) begin PreSelAdr = 2'b01; - SRAMWordWriteEnable = 1'b1; LRUWriteEn = 1'b1; if(CPUBusy) begin @@ -238,7 +247,6 @@ module cachefsm end STATE_MISS_WRITE_CACHE_LINE: begin - SRAMLineWriteEnable = 1'b1; NextState = STATE_MISS_READ_WORD; PreSelAdr = 2'b01; //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write @@ -256,7 +264,6 @@ module cachefsm end STATE_MISS_READ_WORD_DELAY: begin - SRAMWordWriteEnable = 1'b0; LRUWriteEn = 1'b0; if(&RW & Atomic[1]) begin // amo write PreSelAdr = 2'b01; @@ -265,7 +272,6 @@ module cachefsm if(~`REPLAY) save = 1'b1; end else begin - SRAMWordWriteEnable = 1'b1; LRUWriteEn = 1'b1; NextState = STATE_READY; end @@ -283,7 +289,6 @@ module cachefsm end STATE_MISS_WRITE_WORD: begin - SRAMWordWriteEnable = 1'b1; PreSelAdr = 2'b01; LRUWriteEn = 1'b1; if(CPUBusy) begin @@ -298,7 +303,6 @@ module cachefsm STATE_MISS_EVICT_DIRTY: begin PreSelAdr = 2'b01; - SelEvict = 1'b1; if(CacheBusAck) begin NextState = STATE_MISS_WRITE_CACHE_LINE; end else begin @@ -321,14 +325,12 @@ module cachefsm STATE_CPU_BUSY_FINISH_AMO: begin PreSelAdr = 2'b01; - SRAMWordWriteEnable = 1'b0; LRUWriteEn = 1'b0; restore = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; end else begin - SRAMWordWriteEnable = 1'b1; LRUWriteEn = 1'b1; NextState = STATE_READY; end @@ -407,7 +409,7 @@ module cachefsm assign CacheCommitted = CurrState != STATE_READY; // *** stall missing check on amo miss? - assign CacheStall = (CurrState == STATE_READY & (FlushCache | (|RW & ~CacheHit)) & ~IgnoreRequest) | + assign CacheStall = (CurrState == STATE_READY & (DoFlush | DoAMOMiss | DoReadMiss | DoWriteMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_FETCH_DONE) | (CurrState == STATE_MISS_WRITE_CACHE_LINE) | @@ -421,15 +423,17 @@ module cachefsm assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; assign ClearValid = '0; // *** setdirty can probably be simplified by not caring about cpubusy - assign SetDirty = (CurrState == STATE_READY & Atomic[1] & (&RW) & CacheHit & ~CPUBusy & ~IgnoreRequest) | - (CurrState == STATE_READY & RW[0] & CacheHit & ~IgnoreRequest) | - (CurrState == STATE_MISS_READ_WORD_DELAY & &RW & Atomic[1] & ~CPUBusy) | - (CurrState == STATE_MISS_WRITE_WORD) | - (CurrState == STATE_CPU_BUSY_FINISH_AMO & ~CPUBusy); + assign SetDirty = (CurrState == STATE_READY & DoAMO) | + (CurrState == STATE_READY & DoWrite) | + (CurrState == STATE_MISS_READ_WORD_DELAY & DoAMO) | + (CurrState == STATE_MISS_WRITE_WORD); assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE) | (CurrState == STATE_FLUSH_CLEAR_DIRTY); + assign SRAMWordWriteEnable = (CurrState == STATE_READY & (DoAMOHit | DoWriteHit)) | + (CurrState == STATE_MISS_READ_WORD_DELAY & DoAMO) | + (CurrState == STATE_MISS_WRITE_WORD); + assign SRAMLineWriteEnable = (CurrState == STATE_MISS_WRITE_CACHE_LINE); + assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY); - - endmodule // cachefsm From b89ce18473ce02ad2ad9c190ca78217d932f1602 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 10:43:58 -0600 Subject: [PATCH 16/37] Cache cleanup. --- pipelined/src/cache/cachefsm.sv | 36 ++++++++++++++------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 265255bf5..6093b34d7 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -140,9 +140,7 @@ module cachefsm // *** Ross simplify: factor out next state and output logic always_comb begin PreSelAdr = 2'b00; - LRUWriteEn = 1'b0; - SelFlush = 1'b0; - FlushAdrCntEn = 1'b0; + //SelFlush = 1'b0; FlushWayCntEn = 1'b0; FlushAdrCntRst = 1'b0; FlushWayCntRst = 1'b0; @@ -156,7 +154,6 @@ module cachefsm STATE_READY: begin PreSelAdr = 2'b00; - LRUWriteEn = 1'b0; // TLB Miss if(IgnoreRequest) begin @@ -180,7 +177,6 @@ module cachefsm // amo hit else if(Atomic[1] & (&RW) & CacheHit) begin PreSelAdr = 2'b01; - LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; @@ -193,7 +189,6 @@ module cachefsm end // read hit valid cached else if(RW[1] & CacheHit) begin - LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; @@ -207,7 +202,6 @@ module cachefsm // write hit valid cached else if (RW[0] & CacheHit) begin PreSelAdr = 2'b01; - LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; @@ -264,7 +258,6 @@ module cachefsm end STATE_MISS_READ_WORD_DELAY: begin - LRUWriteEn = 1'b0; if(&RW & Atomic[1]) begin // amo write PreSelAdr = 2'b01; if(CPUBusy) begin @@ -272,11 +265,9 @@ module cachefsm if(~`REPLAY) save = 1'b1; end else begin - LRUWriteEn = 1'b1; NextState = STATE_READY; end end else begin - LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; if(`REPLAY) PreSelAdr = 2'b01; @@ -290,7 +281,6 @@ module cachefsm STATE_MISS_WRITE_WORD: begin PreSelAdr = 2'b01; - LRUWriteEn = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY; if(`REPLAY) PreSelAdr = 2'b01; @@ -325,27 +315,25 @@ module cachefsm STATE_CPU_BUSY_FINISH_AMO: begin PreSelAdr = 2'b01; - LRUWriteEn = 1'b0; restore = 1'b1; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; end else begin - LRUWriteEn = 1'b1; NextState = STATE_READY; end end STATE_FLUSH: begin // intialize flush counters - SelFlush = 1'b1; + //SelFlush = 1'b1; PreSelAdr = 2'b10; NextState = STATE_FLUSH_CHECK; end STATE_FLUSH_CHECK: begin PreSelAdr = 2'b10; - SelFlush = 1'b1; + //SelFlush = 1'b1; if(VictimDirty) begin NextState = STATE_FLUSH_WRITE_BACK; FlushWayCntEn = 1'b0; @@ -356,8 +344,6 @@ module cachefsm FlushWayCntEn = 1'b0; end else if(FlushWayFlag) begin NextState = STATE_FLUSH_INCR; - FlushAdrCntEn = 1'b1; - FlushWayCntEn = 1'b1; end else begin FlushWayCntEn = 1'b1; @@ -367,14 +353,14 @@ module cachefsm STATE_FLUSH_INCR: begin PreSelAdr = 2'b10; - SelFlush = 1'b1; + //SelFlush = 1'b1; FlushWayCntRst = 1'b1; NextState = STATE_FLUSH_CHECK; end STATE_FLUSH_WRITE_BACK: begin PreSelAdr = 2'b10; - SelFlush = 1'b1; + //SelFlush = 1'b1; if(CacheBusAck) begin NextState = STATE_FLUSH_CLEAR_DIRTY; end else begin @@ -384,7 +370,7 @@ module cachefsm STATE_FLUSH_CLEAR_DIRTY: begin VDWriteEnable = 1'b1; - SelFlush = 1'b1; + //SelFlush = 1'b1; PreSelAdr = 2'b10; FlushWayCntEn = 1'b0; if(FlushAdrFlag & FlushWayFlag) begin @@ -392,7 +378,6 @@ module cachefsm PreSelAdr = 2'b00; end else if (FlushWayFlag) begin NextState = STATE_FLUSH_INCR; - FlushAdrCntEn = 1'b1; FlushWayCntEn = 1'b1; end else begin @@ -434,6 +419,15 @@ module cachefsm (CurrState == STATE_MISS_WRITE_WORD); assign SRAMLineWriteEnable = (CurrState == STATE_MISS_WRITE_CACHE_LINE); assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY); + assign LRUWriteEn = (CurrState == STATE_READY & (DoAMOHit | DoReadHit | DoWriteHit)) | + (CurrState == STATE_MISS_READ_WORD_DELAY) | + (CurrState == STATE_MISS_WRITE_WORD); + assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | + (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY); + assign FlushAdrCntEn = (CurrState == STATE_FLUSH_CHECK & VictimDirty & FlushWayFlag & ~FlushAdrFlag) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY & FlushWayFlag & ~FlushAdrFlag); + endmodule // cachefsm From f1781c6bc8be156d891dd49265f639d805ebd717 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 10:54:22 -0600 Subject: [PATCH 17/37] More cachefsm cleanup. --- pipelined/src/cache/cachefsm.sv | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 6093b34d7..7b82b0b47 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -140,11 +140,7 @@ module cachefsm // *** Ross simplify: factor out next state and output logic always_comb begin PreSelAdr = 2'b00; - //SelFlush = 1'b0; - FlushWayCntEn = 1'b0; - FlushAdrCntRst = 1'b0; - FlushWayCntRst = 1'b0; - VDWriteEnable = 1'b0; + //VDWriteEnable = 1'b0; NextState = STATE_READY; CacheFetchLine = 1'b0; CacheWriteLine = 1'b0; @@ -170,8 +166,6 @@ module cachefsm // Flush dcache to next level of memory else if(FlushCache) begin NextState = STATE_FLUSH; - FlushAdrCntRst = 1'b1; - FlushWayCntRst = 1'b1; end // amo hit @@ -326,41 +320,32 @@ module cachefsm STATE_FLUSH: begin // intialize flush counters - //SelFlush = 1'b1; PreSelAdr = 2'b10; NextState = STATE_FLUSH_CHECK; end STATE_FLUSH_CHECK: begin PreSelAdr = 2'b10; - //SelFlush = 1'b1; if(VictimDirty) begin NextState = STATE_FLUSH_WRITE_BACK; - FlushWayCntEn = 1'b0; CacheWriteLine = 1'b1; end else if (FlushAdrFlag & FlushWayFlag) begin NextState = STATE_READY; PreSelAdr = 2'b00; - FlushWayCntEn = 1'b0; end else if(FlushWayFlag) begin NextState = STATE_FLUSH_INCR; - FlushWayCntEn = 1'b1; end else begin - FlushWayCntEn = 1'b1; NextState = STATE_FLUSH_CHECK; end end STATE_FLUSH_INCR: begin PreSelAdr = 2'b10; - //SelFlush = 1'b1; - FlushWayCntRst = 1'b1; NextState = STATE_FLUSH_CHECK; end STATE_FLUSH_WRITE_BACK: begin PreSelAdr = 2'b10; - //SelFlush = 1'b1; if(CacheBusAck) begin NextState = STATE_FLUSH_CLEAR_DIRTY; end else begin @@ -369,20 +354,16 @@ module cachefsm end STATE_FLUSH_CLEAR_DIRTY: begin - VDWriteEnable = 1'b1; - //SelFlush = 1'b1; + //VDWriteEnable = 1'b1; PreSelAdr = 2'b10; - FlushWayCntEn = 1'b0; if(FlushAdrFlag & FlushWayFlag) begin NextState = STATE_READY; PreSelAdr = 2'b00; end else if (FlushWayFlag) begin NextState = STATE_FLUSH_INCR; - FlushWayCntEn = 1'b1; end else begin NextState = STATE_FLUSH_CHECK; - FlushWayCntEn = 1'b1; end end @@ -427,6 +408,11 @@ module cachefsm (CurrState == STATE_FLUSH_CLEAR_DIRTY); assign FlushAdrCntEn = (CurrState == STATE_FLUSH_CHECK & VictimDirty & FlushWayFlag & ~FlushAdrFlag) | (CurrState == STATE_FLUSH_CLEAR_DIRTY & FlushWayFlag & ~FlushAdrFlag); + assign FlushWayCntEn = (CurrState == STATE_FLUSH_CHECK & VictimDirty & ~(FlushAdrFlag & FlushWayFlag)) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushAdrFlag & FlushWayFlag)); + assign FlushAdrCntRst = (CurrState == STATE_READY & DoFlush); + assign FlushWayCntRst = (CurrState == STATE_READY & DoFlush) | (CurrState == STATE_FLUSH_INCR); + assign VDWriteEnable = (CurrState == STATE_FLUSH_CLEAR_DIRTY); endmodule // cachefsm From be67c4d559f05854979a23aaf236032500075643 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 11:12:28 -0600 Subject: [PATCH 18/37] More cachefsm cleanup. --- pipelined/src/cache/cachefsm.sv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 7b82b0b47..5dfeed620 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -406,9 +406,9 @@ module cachefsm assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | (CurrState == STATE_FLUSH_CLEAR_DIRTY); - assign FlushAdrCntEn = (CurrState == STATE_FLUSH_CHECK & VictimDirty & FlushWayFlag & ~FlushAdrFlag) | + assign FlushAdrCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & FlushWayFlag & ~FlushAdrFlag) | (CurrState == STATE_FLUSH_CLEAR_DIRTY & FlushWayFlag & ~FlushAdrFlag); - assign FlushWayCntEn = (CurrState == STATE_FLUSH_CHECK & VictimDirty & ~(FlushAdrFlag & FlushWayFlag)) | + assign FlushWayCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & ~(FlushAdrFlag & FlushWayFlag)) | (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushAdrFlag & FlushWayFlag)); assign FlushAdrCntRst = (CurrState == STATE_READY & DoFlush); assign FlushWayCntRst = (CurrState == STATE_READY & DoFlush) | (CurrState == STATE_FLUSH_INCR); From 7f732eb57153f69b9a29c73df3014c8a6956484d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 11:16:20 -0600 Subject: [PATCH 19/37] More cachefsm cleanup. --- pipelined/src/cache/cachefsm.sv | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 5dfeed620..345db3a8a 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -140,10 +140,7 @@ module cachefsm // *** Ross simplify: factor out next state and output logic always_comb begin PreSelAdr = 2'b00; - //VDWriteEnable = 1'b0; NextState = STATE_READY; - CacheFetchLine = 1'b0; - CacheWriteLine = 1'b0; save = 1'b0; restore = 1'b0; case (CurrState) @@ -209,7 +206,6 @@ module cachefsm // read or write miss valid cached else if((|RW) & ~CacheHit) begin NextState = STATE_MISS_FETCH_WDV; - CacheFetchLine = 1'b1; end else NextState = STATE_READY; end @@ -228,7 +224,6 @@ module cachefsm PreSelAdr = 2'b01; if(VictimDirty) begin NextState = STATE_MISS_EVICT_DIRTY; - CacheWriteLine = 1'b1; end else begin NextState = STATE_MISS_WRITE_CACHE_LINE; end @@ -328,7 +323,6 @@ module cachefsm PreSelAdr = 2'b10; if(VictimDirty) begin NextState = STATE_FLUSH_WRITE_BACK; - CacheWriteLine = 1'b1; end else if (FlushAdrFlag & FlushWayFlag) begin NextState = STATE_READY; PreSelAdr = 2'b00; @@ -354,7 +348,6 @@ module cachefsm end STATE_FLUSH_CLEAR_DIRTY: begin - //VDWriteEnable = 1'b1; PreSelAdr = 2'b10; if(FlushAdrFlag & FlushWayFlag) begin NextState = STATE_READY; @@ -413,7 +406,11 @@ module cachefsm assign FlushAdrCntRst = (CurrState == STATE_READY & DoFlush); assign FlushWayCntRst = (CurrState == STATE_READY & DoFlush) | (CurrState == STATE_FLUSH_INCR); assign VDWriteEnable = (CurrState == STATE_FLUSH_CLEAR_DIRTY); + assign CacheFetchLine = (CurrState == STATE_READY & (DoAMOMiss | DoWriteMiss | DoReadMiss)); + assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_DONE & VictimDirty) | + (CurrState == STATE_FLUSH_CHECK & VictimDirty); + endmodule // cachefsm From a6a7779ec06b66b54c2ddf9c7fca4fc024ebf79d Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 12:30:27 -0600 Subject: [PATCH 20/37] More cachefsm cleanup. --- pipelined/src/cache/cachefsm.sv | 93 ++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 345db3a8a..274159762 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -139,14 +139,12 @@ module cachefsm // next state logic and some state ouputs. // *** Ross simplify: factor out next state and output logic always_comb begin - PreSelAdr = 2'b00; + //PreSelAdr = 2'b00; NextState = STATE_READY; - save = 1'b0; - restore = 1'b0; case (CurrState) STATE_READY: begin - PreSelAdr = 2'b00; + //PreSelAdr = 2'b00; // TLB Miss if(IgnoreRequest) begin @@ -156,7 +154,7 @@ module cachefsm // PTW ready the CPU will stall. // The page table walker asserts it's control 1 cycle // after the TLBs miss. - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; NextState = STATE_READY; end @@ -167,12 +165,12 @@ module cachefsm // amo hit else if(Atomic[1] & (&RW) & CacheHit) begin - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; - if (`REPLAY) PreSelAdr = 2'b01; - else save = 1'b1; + //if (`REPLAY) PreSelAdr = 2'b01; + //else save = 1'b1; end else begin NextState = STATE_READY; @@ -183,8 +181,8 @@ module cachefsm if(CPUBusy) begin NextState = STATE_CPU_BUSY; - if(`REPLAY) PreSelAdr = 2'b01; - else save = 1'b1; + //if(`REPLAY) PreSelAdr = 2'b01; + //else save = 1'b1; end else begin NextState = STATE_READY; @@ -192,12 +190,12 @@ module cachefsm end // write hit valid cached else if (RW[0] & CacheHit) begin - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - if(`REPLAY) PreSelAdr = 2'b01; - else save = 1'b1; + //if(`REPLAY) PreSelAdr = 2'b01; + //else save = 1'b1; end else begin NextState = STATE_READY; @@ -211,7 +209,7 @@ module cachefsm end STATE_MISS_FETCH_WDV: begin - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; if (CacheBusAck) begin NextState = STATE_MISS_FETCH_DONE; @@ -221,7 +219,7 @@ module cachefsm end STATE_MISS_FETCH_DONE: begin - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; if(VictimDirty) begin NextState = STATE_MISS_EVICT_DIRTY; end else begin @@ -231,12 +229,12 @@ module cachefsm STATE_MISS_WRITE_CACHE_LINE: begin NextState = STATE_MISS_READ_WORD; - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write end STATE_MISS_READ_WORD: begin - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; if (RW[0] & ~Atomic[1]) begin // handles stores and amo write. NextState = STATE_MISS_WRITE_WORD; end else begin @@ -248,10 +246,10 @@ module cachefsm STATE_MISS_READ_WORD_DELAY: begin if(&RW & Atomic[1]) begin // amo write - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; - if(~`REPLAY) save = 1'b1; + //if(~`REPLAY) save = 1'b1; end else begin NextState = STATE_READY; @@ -259,8 +257,8 @@ module cachefsm end else begin if(CPUBusy) begin NextState = STATE_CPU_BUSY; - if(`REPLAY) PreSelAdr = 2'b01; - else save = 1'b1; + //if(`REPLAY) PreSelAdr = 2'b01; + //else save = 1'b1; end else begin NextState = STATE_READY; @@ -269,11 +267,11 @@ module cachefsm end STATE_MISS_WRITE_WORD: begin - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - if(`REPLAY) PreSelAdr = 2'b01; - else save = 1'b1; + //if(`REPLAY) PreSelAdr = 2'b01; + //else save = 1'b1; end else begin NextState = STATE_READY; @@ -281,7 +279,7 @@ module cachefsm end STATE_MISS_EVICT_DIRTY: begin - PreSelAdr = 2'b01; + //PreSelAdr = 2'b01; if(CacheBusAck) begin NextState = STATE_MISS_WRITE_CACHE_LINE; end else begin @@ -291,11 +289,10 @@ module cachefsm STATE_CPU_BUSY: begin - PreSelAdr = 2'b00; - restore = 1'b1; + //PreSelAdr = 2'b00; if(CPUBusy) begin NextState = STATE_CPU_BUSY; - if(`REPLAY) PreSelAdr = 2'b01; + //if(`REPLAY) PreSelAdr = 2'b01; end else begin NextState = STATE_READY; @@ -303,8 +300,7 @@ module cachefsm end STATE_CPU_BUSY_FINISH_AMO: begin - PreSelAdr = 2'b01; - restore = 1'b1; + //PreSelAdr = 2'b01; if(CPUBusy) begin NextState = STATE_CPU_BUSY_FINISH_AMO; end @@ -315,17 +311,17 @@ module cachefsm STATE_FLUSH: begin // intialize flush counters - PreSelAdr = 2'b10; + //PreSelAdr = 2'b10; NextState = STATE_FLUSH_CHECK; end STATE_FLUSH_CHECK: begin - PreSelAdr = 2'b10; + //PreSelAdr = 2'b10; if(VictimDirty) begin NextState = STATE_FLUSH_WRITE_BACK; end else if (FlushAdrFlag & FlushWayFlag) begin NextState = STATE_READY; - PreSelAdr = 2'b00; + //PreSelAdr = 2'b00; end else if(FlushWayFlag) begin NextState = STATE_FLUSH_INCR; end else begin @@ -334,12 +330,12 @@ module cachefsm end STATE_FLUSH_INCR: begin - PreSelAdr = 2'b10; + //PreSelAdr = 2'b10; NextState = STATE_FLUSH_CHECK; end STATE_FLUSH_WRITE_BACK: begin - PreSelAdr = 2'b10; + //PreSelAdr = 2'b10; if(CacheBusAck) begin NextState = STATE_FLUSH_CLEAR_DIRTY; end else begin @@ -348,10 +344,10 @@ module cachefsm end STATE_FLUSH_CLEAR_DIRTY: begin - PreSelAdr = 2'b10; + //PreSelAdr = 2'b10; if(FlushAdrFlag & FlushWayFlag) begin NextState = STATE_READY; - PreSelAdr = 2'b00; + //PreSelAdr = 2'b00; end else if (FlushWayFlag) begin NextState = STATE_FLUSH_INCR; @@ -409,7 +405,30 @@ module cachefsm assign CacheFetchLine = (CurrState == STATE_READY & (DoAMOMiss | DoWriteMiss | DoReadMiss)); assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_DONE & VictimDirty) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); + assign restore = ((CurrState == STATE_CPU_BUSY) | (CurrState == STATE_CPU_BUSY_FINISH_AMO)) & ~`REPLAY; + assign save = ((CurrState == STATE_READY & (DoAMOHit | DoReadHit | DoWriteHit) & CPUBusy) | + (CurrState == STATE_MISS_READ_WORD_DELAY & (DoAMO | DoRead) & CPUBusy) | + (CurrState == STATE_MISS_WRITE_WORD & DoWrite & CPUBusy)) & ~`REPLAY; + assign PreSelAdr = ((CurrState == STATE_READY & IgnoreRequest) | + (CurrState == STATE_READY & DoAMOHit) | + (CurrState == STATE_READY & DoReadHit & (CPUBusy & `REPLAY)) | + (CurrState == STATE_READY & DoWriteHit) | + (CurrState == STATE_MISS_FETCH_WDV) | + (CurrState == STATE_MISS_FETCH_DONE) | + (CurrState == STATE_MISS_WRITE_CACHE_LINE) | + (CurrState == STATE_MISS_READ_WORD) | + (CurrState == STATE_MISS_READ_WORD_DELAY & (DoAMO | (CPUBusy & `REPLAY))) | + (CurrState == STATE_MISS_WRITE_WORD) | + (CurrState == STATE_MISS_EVICT_DIRTY) | + (CurrState == STATE_CPU_BUSY & (CPUBusy & `REPLAY)) | + (CurrState == STATE_CPU_BUSY_FINISH_AMO)) ? 2'b01 : + ((CurrState == STATE_FLUSH) | + (CurrState == STATE_FLUSH_CHECK & ~(VictimDirty & FlushAdrFlag & FlushWayFlag)) | + (CurrState == STATE_FLUSH_INCR) | + (CurrState == STATE_FLUSH_WRITE_BACK) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushAdrFlag & FlushWayFlag))) ? 2'b10 : + 2'b00; endmodule // cachefsm From e72d54ea98178a4a47736eacc960e851c42a715c Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 13:19:37 -0600 Subject: [PATCH 21/37] More cachefsm cleanup. --- pipelined/src/cache/cachefsm.sv | 262 +++++--------------------------- 1 file changed, 39 insertions(+), 223 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 274159762..18e962190 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -136,229 +136,45 @@ module cachefsm if (reset) CurrState <= #1 STATE_READY; else CurrState <= #1 NextState; - // next state logic and some state ouputs. - // *** Ross simplify: factor out next state and output logic always_comb begin - //PreSelAdr = 2'b00; NextState = STATE_READY; case (CurrState) - STATE_READY: begin - - //PreSelAdr = 2'b00; - - // TLB Miss - if(IgnoreRequest) begin - // the LSU arbiter has not yet selected the PTW. - // The CPU needs to be stalled until that happens. - // If we set CacheStall for 1 cycle before going to - // PTW ready the CPU will stall. - // The page table walker asserts it's control 1 cycle - // after the TLBs miss. - //PreSelAdr = 2'b01; - NextState = STATE_READY; - end - - // Flush dcache to next level of memory - else if(FlushCache) begin - NextState = STATE_FLUSH; - end - - // amo hit - else if(Atomic[1] & (&RW) & CacheHit) begin - //PreSelAdr = 2'b01; - - if(CPUBusy) begin - NextState = STATE_CPU_BUSY_FINISH_AMO; - //if (`REPLAY) PreSelAdr = 2'b01; - //else save = 1'b1; - end - else begin - NextState = STATE_READY; - end - end - // read hit valid cached - else if(RW[1] & CacheHit) begin - - if(CPUBusy) begin - NextState = STATE_CPU_BUSY; - //if(`REPLAY) PreSelAdr = 2'b01; - //else save = 1'b1; - end - else begin - NextState = STATE_READY; - end - end - // write hit valid cached - else if (RW[0] & CacheHit) begin - //PreSelAdr = 2'b01; - - if(CPUBusy) begin - NextState = STATE_CPU_BUSY; - //if(`REPLAY) PreSelAdr = 2'b01; - //else save = 1'b1; - end - else begin - NextState = STATE_READY; - end - end - // read or write miss valid cached - else if((|RW) & ~CacheHit) begin - NextState = STATE_MISS_FETCH_WDV; - end - else NextState = STATE_READY; - end - - STATE_MISS_FETCH_WDV: begin - //PreSelAdr = 2'b01; - - if (CacheBusAck) begin - NextState = STATE_MISS_FETCH_DONE; - end else begin - NextState = STATE_MISS_FETCH_WDV; - end - end - - STATE_MISS_FETCH_DONE: begin - //PreSelAdr = 2'b01; - if(VictimDirty) begin - NextState = STATE_MISS_EVICT_DIRTY; - end else begin - NextState = STATE_MISS_WRITE_CACHE_LINE; - end - end - - STATE_MISS_WRITE_CACHE_LINE: begin - NextState = STATE_MISS_READ_WORD; - //PreSelAdr = 2'b01; - //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write - end - - STATE_MISS_READ_WORD: begin - //PreSelAdr = 2'b01; - if (RW[0] & ~Atomic[1]) begin // handles stores and amo write. - NextState = STATE_MISS_WRITE_WORD; - end else begin - NextState = STATE_MISS_READ_WORD_DELAY; - // delay state is required as the read signal RW[1] is still high when we - // return to the ready state because the cache is stalling the cpu. - end - end - - STATE_MISS_READ_WORD_DELAY: begin - if(&RW & Atomic[1]) begin // amo write - //PreSelAdr = 2'b01; - if(CPUBusy) begin - NextState = STATE_CPU_BUSY_FINISH_AMO; - //if(~`REPLAY) save = 1'b1; - end - else begin - NextState = STATE_READY; - end - end else begin - if(CPUBusy) begin - NextState = STATE_CPU_BUSY; - //if(`REPLAY) PreSelAdr = 2'b01; - //else save = 1'b1; - end - else begin - NextState = STATE_READY; - end - end - end - - STATE_MISS_WRITE_WORD: begin - //PreSelAdr = 2'b01; - if(CPUBusy) begin - NextState = STATE_CPU_BUSY; - //if(`REPLAY) PreSelAdr = 2'b01; - //else save = 1'b1; - end - else begin - NextState = STATE_READY; - end - end - - STATE_MISS_EVICT_DIRTY: begin - //PreSelAdr = 2'b01; - if(CacheBusAck) begin - NextState = STATE_MISS_WRITE_CACHE_LINE; - end else begin - NextState = STATE_MISS_EVICT_DIRTY; - end - end - - - STATE_CPU_BUSY: begin - //PreSelAdr = 2'b00; - if(CPUBusy) begin - NextState = STATE_CPU_BUSY; - //if(`REPLAY) PreSelAdr = 2'b01; - end - else begin - NextState = STATE_READY; - end - end - - STATE_CPU_BUSY_FINISH_AMO: begin - //PreSelAdr = 2'b01; - if(CPUBusy) begin - NextState = STATE_CPU_BUSY_FINISH_AMO; - end - else begin - NextState = STATE_READY; - end - end - - STATE_FLUSH: begin - // intialize flush counters - //PreSelAdr = 2'b10; - NextState = STATE_FLUSH_CHECK; - end - - STATE_FLUSH_CHECK: begin - //PreSelAdr = 2'b10; - if(VictimDirty) begin - NextState = STATE_FLUSH_WRITE_BACK; - end else if (FlushAdrFlag & FlushWayFlag) begin - NextState = STATE_READY; - //PreSelAdr = 2'b00; - end else if(FlushWayFlag) begin - NextState = STATE_FLUSH_INCR; - end else begin - NextState = STATE_FLUSH_CHECK; - end - end - - STATE_FLUSH_INCR: begin - //PreSelAdr = 2'b10; - NextState = STATE_FLUSH_CHECK; - end - - STATE_FLUSH_WRITE_BACK: begin - //PreSelAdr = 2'b10; - if(CacheBusAck) begin - NextState = STATE_FLUSH_CLEAR_DIRTY; - end else begin - NextState = STATE_FLUSH_WRITE_BACK; - end - end - - STATE_FLUSH_CLEAR_DIRTY: begin - //PreSelAdr = 2'b10; - if(FlushAdrFlag & FlushWayFlag) begin - NextState = STATE_READY; - //PreSelAdr = 2'b00; - end else if (FlushWayFlag) begin - NextState = STATE_FLUSH_INCR; - - end else begin - NextState = STATE_FLUSH_CHECK; - end - end - - default: begin - NextState = STATE_READY; - end + STATE_READY: if(DoFlush) NextState = STATE_FLUSH; + else if(DoAMOHit & CPUBusy) NextState = STATE_CPU_BUSY_FINISH_AMO; + else if(DoReadHit & CPUBusy) NextState = STATE_CPU_BUSY; + else if (DoWriteHit & CPUBusy) NextState = STATE_CPU_BUSY; + else if(DoReadMiss | DoWriteMiss | DoAMOMiss) NextState = STATE_MISS_FETCH_WDV; + else NextState = STATE_READY; + STATE_MISS_FETCH_WDV: if (CacheBusAck) NextState = STATE_MISS_FETCH_DONE; + else NextState = STATE_MISS_FETCH_WDV; + STATE_MISS_FETCH_DONE: if(VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; + else NextState = STATE_MISS_WRITE_CACHE_LINE; + STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_MISS_READ_WORD; + STATE_MISS_READ_WORD: if (DoWrite & ~DoAMO) NextState = STATE_MISS_WRITE_WORD; + else NextState = STATE_MISS_READ_WORD_DELAY; + STATE_MISS_READ_WORD_DELAY: if(DoAMO & CPUBusy) NextState = STATE_CPU_BUSY_FINISH_AMO; + else if(CPUBusy) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + STATE_MISS_WRITE_WORD: if(CPUBusy) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; + else NextState = STATE_MISS_EVICT_DIRTY; + STATE_CPU_BUSY: if(CPUBusy) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + STATE_CPU_BUSY_FINISH_AMO: if(CPUBusy) NextState = STATE_CPU_BUSY_FINISH_AMO; + else NextState = STATE_READY; + STATE_FLUSH: NextState = STATE_FLUSH_CHECK; + STATE_FLUSH_CHECK: if(VictimDirty) NextState = STATE_FLUSH_WRITE_BACK; + else if (FlushAdrFlag & FlushWayFlag) NextState = STATE_READY; + else if(FlushWayFlag) NextState = STATE_FLUSH_INCR; + else NextState = STATE_FLUSH_CHECK; + STATE_FLUSH_INCR: NextState = STATE_FLUSH_CHECK; + STATE_FLUSH_WRITE_BACK: if(CacheBusAck) NextState = STATE_FLUSH_CLEAR_DIRTY; + else NextState = STATE_FLUSH_WRITE_BACK; + STATE_FLUSH_CLEAR_DIRTY: if(FlushAdrFlag & FlushWayFlag) NextState = STATE_READY; + else if (FlushWayFlag) NextState = STATE_FLUSH_INCR; + else NextState = STATE_FLUSH_CHECK; + default: NextState = STATE_READY; endcase end @@ -377,7 +193,6 @@ module cachefsm (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushAdrFlag & FlushWayFlag)); assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; assign ClearValid = '0; - // *** setdirty can probably be simplified by not caring about cpubusy assign SetDirty = (CurrState == STATE_READY & DoAMO) | (CurrState == STATE_READY & DoWrite) | (CurrState == STATE_MISS_READ_WORD_DELAY & DoAMO) | @@ -388,6 +203,7 @@ module cachefsm (CurrState == STATE_MISS_READ_WORD_DELAY & DoAMO) | (CurrState == STATE_MISS_WRITE_WORD); assign SRAMLineWriteEnable = (CurrState == STATE_MISS_WRITE_CACHE_LINE); + assign VDWriteEnable = (CurrState == STATE_FLUSH_CLEAR_DIRTY); assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY); assign LRUWriteEn = (CurrState == STATE_READY & (DoAMOHit | DoReadHit | DoWriteHit)) | (CurrState == STATE_MISS_READ_WORD_DELAY) | @@ -401,7 +217,6 @@ module cachefsm (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushAdrFlag & FlushWayFlag)); assign FlushAdrCntRst = (CurrState == STATE_READY & DoFlush); assign FlushWayCntRst = (CurrState == STATE_READY & DoFlush) | (CurrState == STATE_FLUSH_INCR); - assign VDWriteEnable = (CurrState == STATE_FLUSH_CLEAR_DIRTY); assign CacheFetchLine = (CurrState == STATE_READY & (DoAMOMiss | DoWriteMiss | DoReadMiss)); assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_DONE & VictimDirty) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); @@ -409,7 +224,8 @@ module cachefsm assign save = ((CurrState == STATE_READY & (DoAMOHit | DoReadHit | DoWriteHit) & CPUBusy) | (CurrState == STATE_MISS_READ_WORD_DELAY & (DoAMO | DoRead) & CPUBusy) | (CurrState == STATE_MISS_WRITE_WORD & DoWrite & CPUBusy)) & ~`REPLAY; - + + // **** can this be simplified? assign PreSelAdr = ((CurrState == STATE_READY & IgnoreRequest) | (CurrState == STATE_READY & DoAMOHit) | (CurrState == STATE_READY & DoReadHit & (CPUBusy & `REPLAY)) | From fcd43ea00438b73a75d2dd84d74c9447ab59092f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 13:29:19 -0600 Subject: [PATCH 22/37] more cleanup. --- pipelined/src/cache/cachefsm.sv | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 18e962190..16c9f76fc 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -80,14 +80,13 @@ module cachefsm ); - logic AnyCPUReqM; logic [1:0] PreSelAdr; logic resetDelay; logic DoAMO, DoRead, DoWrite, DoFlush; logic DoAMOHit, DoReadHit, DoWriteHit; logic DoAMOMiss, DoReadMiss, DoWriteMiss; - - + logic FlushFlag; + typedef enum {STATE_READY, STATE_MISS_FETCH_WDV, @@ -119,8 +118,8 @@ module cachefsm assign DoWrite = RW[0] & ~IgnoreRequest; assign DoWriteHit = DoWrite & CacheHit; assign DoWriteMiss = DoWrite & ~CacheHit; - - //assign AnyCPUReqM = |RW | (|Atomic); **** remove + + assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. assign CacheAccess = (DoAMO | DoRead | DoWrite) & CurrState == STATE_READY; @@ -165,7 +164,7 @@ module cachefsm else NextState = STATE_READY; STATE_FLUSH: NextState = STATE_FLUSH_CHECK; STATE_FLUSH_CHECK: if(VictimDirty) NextState = STATE_FLUSH_WRITE_BACK; - else if (FlushAdrFlag & FlushWayFlag) NextState = STATE_READY; + else if (FlushFlag) NextState = STATE_READY; else if(FlushWayFlag) NextState = STATE_FLUSH_INCR; else NextState = STATE_FLUSH_CHECK; STATE_FLUSH_INCR: NextState = STATE_FLUSH_CHECK; @@ -179,7 +178,6 @@ module cachefsm end assign CacheCommitted = CurrState != STATE_READY; - // *** stall missing check on amo miss? assign CacheStall = (CurrState == STATE_READY & (DoFlush | DoAMOMiss | DoReadMiss | DoWriteMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_FETCH_DONE) | @@ -187,10 +185,10 @@ module cachefsm (CurrState == STATE_MISS_READ_WORD) | (CurrState == STATE_MISS_EVICT_DIRTY) | (CurrState == STATE_FLUSH) | - (CurrState == STATE_FLUSH_CHECK & ~(FlushAdrFlag & FlushWayFlag)) | + (CurrState == STATE_FLUSH_CHECK & ~(FlushFlag)) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | - (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushAdrFlag & FlushWayFlag)); + (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushFlag)); assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; assign ClearValid = '0; assign SetDirty = (CurrState == STATE_READY & DoAMO) | @@ -213,8 +211,8 @@ module cachefsm (CurrState == STATE_FLUSH_CLEAR_DIRTY); assign FlushAdrCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & FlushWayFlag & ~FlushAdrFlag) | (CurrState == STATE_FLUSH_CLEAR_DIRTY & FlushWayFlag & ~FlushAdrFlag); - assign FlushWayCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & ~(FlushAdrFlag & FlushWayFlag)) | - (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushAdrFlag & FlushWayFlag)); + assign FlushWayCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & ~(FlushFlag)) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushFlag)); assign FlushAdrCntRst = (CurrState == STATE_READY & DoFlush); assign FlushWayCntRst = (CurrState == STATE_READY & DoFlush) | (CurrState == STATE_FLUSH_INCR); assign CacheFetchLine = (CurrState == STATE_READY & (DoAMOMiss | DoWriteMiss | DoReadMiss)); @@ -240,12 +238,11 @@ module cachefsm (CurrState == STATE_CPU_BUSY & (CPUBusy & `REPLAY)) | (CurrState == STATE_CPU_BUSY_FINISH_AMO)) ? 2'b01 : ((CurrState == STATE_FLUSH) | - (CurrState == STATE_FLUSH_CHECK & ~(VictimDirty & FlushAdrFlag & FlushWayFlag)) | + (CurrState == STATE_FLUSH_CHECK & ~(VictimDirty & FlushFlag)) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | - (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushAdrFlag & FlushWayFlag))) ? 2'b10 : + (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushFlag))) ? 2'b10 : 2'b00; endmodule // cachefsm - From 23a60d9875a479a4c9b0493a6a1dd54c9f7b5d31 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 17:23:09 -0600 Subject: [PATCH 23/37] Progress towards simplifying the cache's write enables. --- pipelined/src/cache/cache.sv | 13 ++++++++++++- pipelined/src/cache/cachefsm.sv | 4 ++-- pipelined/src/cache/cacheway.sv | 4 ++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 4c8f88aa4..672920e72 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -107,6 +107,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( logic ResetOrFlushAdr, ResetOrFlushWay; logic [NUMWAYS-1:0] WayHitSaved, WayHitRaw; logic [LINELEN-1:0] ReadDataLineRaw, ReadDataLineSaved; + logic [NUMWAYS-1:0] SelectedWay; + logic [NUMWAYS-1:0] SetValidWay, ClearValidWay, SetDirtyWay, ClearDirtyWay; ///////////////////////////////////////////////////////////////////////////////////////////// // Read Path @@ -126,7 +128,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( .WriteWordEnable(SRAMWordEnable), .TagWriteEnable(SRAMLineWayWriteEnable), .WriteData(SRAMWriteData), - .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .Victim(VictimWay), .Flush(FlushWay), + //.SetValid(SetValidWay), .ClearValid(ClearValidWay), .SetDirty(SetDirtyWay), .ClearDirty(ClearDirtyWay), + .SetValid(SetValidWay), .ClearValid(ClearValidWay), .SetDirty, .ClearDirty, + .SelEvict, .Victim(VictimWay), .Flush(FlushWay), .SelFlush, .SelectedReadDataLine(ReadDataLineWay), .WayHit(WayHitRaw), .VictimDirty(VictimDirtyWay), .VictimTag(VictimTagWay), .InvalidateAll(InvalidateCacheM)); @@ -190,6 +194,13 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( assign VDWriteEnableWay = FlushWay & {NUMWAYS{VDWriteEnable}}; assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; + assign SelectedWay = SelFlush ? FlushWay : VictimWay; + assign SetValidWay = SetValid ? SelectedWay : '0; + assign ClearValidWay = ClearValid ? SelectedWay : '0; + assign SetDirtyWay = SetDirty ? SelectedWay : '0; + assign ClearDirtyWay = ClearDirty ? SelectedWay : '0; + + ///////////////////////////////////////////////////////////////////////////////////////////// // Cache FSM ///////////////////////////////////////////////////////////////////////////////////////////// diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 16c9f76fc..cbde7e018 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -97,8 +97,8 @@ module cachefsm STATE_MISS_READ_WORD_DELAY, STATE_MISS_WRITE_WORD, - STATE_CPU_BUSY, // *** Ross will change - STATE_CPU_BUSY_FINISH_AMO, // *** Ross will change + STATE_CPU_BUSY, + STATE_CPU_BUSY_FINISH_AMO, STATE_FLUSH, STATE_FLUSH_CHECK, diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 3bca5eb0b..3d61e09dc 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -108,8 +108,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, always_ff @(posedge clk) begin // Valid bit array, if (reset | InvalidateAll) ValidBits <= #1 '0; - else if (SetValidD & (WriteEnableD | VDWriteEnableD)) ValidBits[RAdrD] <= #1 1'b1; - else if (ClearValidD & (WriteEnableD | VDWriteEnableD)) ValidBits[RAdrD] <= #1 1'b0; + else if (SetValidD) ValidBits[RAdrD] <= #1 1'b1; + else if (ClearValidD) ValidBits[RAdrD] <= #1 1'b0; end // *** consider revisiting whether these delays are the best option? flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD); From 494802b2e1c23462748fbdef06c1ab28b0d22971 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 17:41:56 -0600 Subject: [PATCH 24/37] more partial cleanup of fsm and write enables. --- pipelined/src/cache/cache.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 672920e72..cd2a2dfb2 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -129,7 +129,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( .TagWriteEnable(SRAMLineWayWriteEnable), .WriteData(SRAMWriteData), //.SetValid(SetValidWay), .ClearValid(ClearValidWay), .SetDirty(SetDirtyWay), .ClearDirty(ClearDirtyWay), - .SetValid(SetValidWay), .ClearValid(ClearValidWay), .SetDirty, .ClearDirty, + .SetValid(SetValidWay), .ClearValid(ClearValidWay), .SetDirty(SetDirtyWay), .ClearDirty(ClearDirtyWay), .SelEvict, .Victim(VictimWay), .Flush(FlushWay), .SelFlush, .SelectedReadDataLine(ReadDataLineWay), .WayHit(WayHitRaw), .VictimDirty(VictimDirtyWay), .VictimTag(VictimTagWay), @@ -194,7 +194,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( assign VDWriteEnableWay = FlushWay & {NUMWAYS{VDWriteEnable}}; assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; - assign SelectedWay = SelFlush ? FlushWay : VictimWay; + assign SelectedWay = SelFlush ? FlushWay : (SRAMLineWriteEnable ? VictimWay : WayHit); + //assign SelectedWay = SelFlush ? FlushWay : VictimWay; assign SetValidWay = SetValid ? SelectedWay : '0; assign ClearValidWay = ClearValid ? SelectedWay : '0; assign SetDirtyWay = SetDirty ? SelectedWay : '0; From ca459a59156078a1db5a0a212c6f0c414f7d342f Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 21:59:18 -0600 Subject: [PATCH 25/37] Removed VDWriteEnable. --- pipelined/src/cache/cache.sv | 6 +----- pipelined/src/cache/cachefsm.sv | 6 +----- pipelined/src/cache/cacheway.sv | 11 +++++------ 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index cd2a2dfb2..474a91562 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -99,10 +99,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( logic [NUMWAYS-1:0] NextFlushWay; logic FlushWayCntEn; logic FlushWayCntRst; - logic VDWriteEnable; logic SelEvict; logic LRUWriteEn; - logic [NUMWAYS-1:0] VDWriteEnableWay; logic SelFlush; logic ResetOrFlushAdr, ResetOrFlushWay; logic [NUMWAYS-1:0] WayHitSaved, WayHitRaw; @@ -124,7 +122,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) CacheWays[NUMWAYS-1:0]( .clk, .reset, .RAdr, .PAdr, .WriteEnable(SRAMWayWriteEnable), - .VDWriteEnable(VDWriteEnableWay), .WriteWordEnable(SRAMWordEnable), .TagWriteEnable(SRAMLineWayWriteEnable), .WriteData(SRAMWriteData), @@ -191,7 +188,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( .en(FlushWayCntEn), .val({{NUMWAYS-1{1'b0}}, 1'b1}), .d(NextFlushWay), .q(FlushWay)); assign FlushWayFlag = FlushWay[NUMWAYS-1]; - assign VDWriteEnableWay = FlushWay & {NUMWAYS{VDWriteEnable}}; assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; assign SelectedWay = SelFlush ? FlushWay : (SRAMLineWriteEnable ? VictimWay : WayHit); @@ -215,5 +211,5 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, .FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .save, .restore, - .VDWriteEnable, .LRUWriteEn); + .LRUWriteEn); endmodule diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index cbde7e018..63c452862 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -75,10 +75,7 @@ module cachefsm output logic FlushAdrCntRst, output logic FlushWayCntRst, output logic save, - output logic restore, - output logic VDWriteEnable - - ); + output logic restore); logic [1:0] PreSelAdr; logic resetDelay; @@ -201,7 +198,6 @@ module cachefsm (CurrState == STATE_MISS_READ_WORD_DELAY & DoAMO) | (CurrState == STATE_MISS_WRITE_WORD); assign SRAMLineWriteEnable = (CurrState == STATE_MISS_WRITE_CACHE_LINE); - assign VDWriteEnable = (CurrState == STATE_FLUSH_CLEAR_DIRTY); assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY); assign LRUWriteEn = (CurrState == STATE_READY & (DoAMOHit | DoReadHit | DoWriteHit)) | (CurrState == STATE_MISS_READ_WORD_DELAY) | diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 3d61e09dc..7cb2084ad 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -38,7 +38,6 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic [$clog2(NUMLINES)-1:0] RAdr, input logic [`PA_BITS-1:0] PAdr, input logic WriteEnable, - input logic VDWriteEnable, input logic [LINELEN/`XLEN-1:0] WriteWordEnable, input logic TagWriteEnable, input logic [LINELEN-1:0] WriteData, @@ -69,7 +68,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, logic [$clog2(NUMLINES)-1:0] RAdrD; logic SetValidD, ClearValidD; logic SetDirtyD, ClearDirtyD; - logic WriteEnableD, VDWriteEnableD; + logic WriteEnableD; ///////////////////////////////////////////////////////////////////////////////////////////// // Tag Array @@ -113,8 +112,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, end // *** consider revisiting whether these delays are the best option? flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD); - flop #(4) ValidCtrlDelayReg(clk, {SetValid, ClearValid, WriteEnable, VDWriteEnable}, - {SetValidD, ClearValidD, WriteEnableD, VDWriteEnableD}); + flop #(3) ValidCtrlDelayReg(clk, {SetValid, ClearValid, WriteEnable}, + {SetValidD, ClearValidD, WriteEnableD}); assign Valid = ValidBits[RAdrD]; ///////////////////////////////////////////////////////////////////////////////////////////// @@ -125,8 +124,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, if (DIRTY_BITS) begin:dirty always_ff @(posedge clk) begin if (reset) DirtyBits <= #1 {NUMLINES{1'b0}}; - else if (SetDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= #1 1'b1; - else if (ClearDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= #1 1'b0; + else if (SetDirtyD) DirtyBits[RAdrD] <= #1 1'b1; + else if (ClearDirtyD) DirtyBits[RAdrD] <= #1 1'b0; end flop #(2) DirtyCtlDelayReg(clk, {SetDirty, ClearDirty}, {SetDirtyD, ClearDirtyD}); assign Dirty = DirtyBits[RAdrD]; From 190d619940659f563bd53edb34fab37adaee04c5 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Mon, 7 Feb 2022 22:09:56 -0600 Subject: [PATCH 26/37] cachefsm cleanup. --- pipelined/src/cache/cache.sv | 2 -- 1 file changed, 2 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 474a91562..5089672c3 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -125,7 +125,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( .WriteWordEnable(SRAMWordEnable), .TagWriteEnable(SRAMLineWayWriteEnable), .WriteData(SRAMWriteData), - //.SetValid(SetValidWay), .ClearValid(ClearValidWay), .SetDirty(SetDirtyWay), .ClearDirty(ClearDirtyWay), .SetValid(SetValidWay), .ClearValid(ClearValidWay), .SetDirty(SetDirtyWay), .ClearDirty(ClearDirtyWay), .SelEvict, .Victim(VictimWay), .Flush(FlushWay), .SelFlush, @@ -191,7 +190,6 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; assign SelectedWay = SelFlush ? FlushWay : (SRAMLineWriteEnable ? VictimWay : WayHit); - //assign SelectedWay = SelFlush ? FlushWay : VictimWay; assign SetValidWay = SetValid ? SelectedWay : '0; assign ClearValidWay = ClearValid ? SelectedWay : '0; assign SetDirtyWay = SetDirty ? SelectedWay : '0; From 492c1473f30859af20c7ca601bcc4071b0a46070 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 8 Feb 2022 09:47:01 -0600 Subject: [PATCH 27/37] Preparing to make a major change to the cache's write enables. --- pipelined/src/cache/cache.sv | 7 +++++-- pipelined/src/cache/cacheway.sv | 12 ++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index 5089672c3..f70467f4a 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -121,8 +121,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) CacheWays[NUMWAYS-1:0]( .clk, .reset, .RAdr, .PAdr, - .WriteEnable(SRAMWayWriteEnable), - .WriteWordEnable(SRAMWordEnable), + .SRAMWayWriteEnable, + .SRAMWordEnable, .TagWriteEnable(SRAMLineWayWriteEnable), .WriteData(SRAMWriteData), .SetValid(SetValidWay), .ClearValid(ClearValidWay), .SetDirty(SetDirtyWay), .ClearDirty(ClearDirtyWay), @@ -157,9 +157,12 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( ///////////////////////////////////////////////////////////////////////////////////////////// // *** Ross considering restructuring + // move decoder and wordwritenable into cacheway. onehotdecoder #(LOGWPL) adrdec( .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); assign SRAMWordEnable = SRAMLineWriteEnable ? '1 : MemPAdrDecoded; // OR + + assign SRAMLineWayWriteEnable = SRAMLineWriteEnable ? VictimWay : '0; // AND assign SRAMWordWayWriteEnable = SRAMWordWriteEnable ? WayHit : '0; // AND mux2 #(NUMWAYS) WriteEnableMux(.d0(SRAMWordWayWriteEnable), .d1(VictimWay), diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 7cb2084ad..09fcbfa29 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -37,8 +37,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic [$clog2(NUMLINES)-1:0] RAdr, input logic [`PA_BITS-1:0] PAdr, - input logic WriteEnable, - input logic [LINELEN/`XLEN-1:0] WriteWordEnable, + input logic SRAMWayWriteEnable, + input logic [LINELEN/`XLEN-1:0] SRAMWordEnable, input logic TagWriteEnable, input logic [LINELEN-1:0] WriteData, input logic SetValid, @@ -68,7 +68,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, logic [$clog2(NUMLINES)-1:0] RAdrD; logic SetValidD, ClearValidD; logic SetDirtyD, ClearDirtyD; - logic WriteEnableD; + logic SRAMWayWriteEnableD; ///////////////////////////////////////////////////////////////////////////////////////////// // Tag Array @@ -93,7 +93,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, sram1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk(clk), .Adr(RAdr), .ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ), .WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]), - .WriteEnable(WriteEnable & WriteWordEnable[words])); + .WriteEnable(SRAMWayWriteEnable & SRAMWordEnable[words])); end // AND portion of distributed read multiplexers @@ -112,8 +112,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, end // *** consider revisiting whether these delays are the best option? flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD); - flop #(3) ValidCtrlDelayReg(clk, {SetValid, ClearValid, WriteEnable}, - {SetValidD, ClearValidD, WriteEnableD}); + flop #(3) ValidCtrlDelayReg(clk, {SetValid, ClearValid, SRAMWayWriteEnable}, + {SetValidD, ClearValidD, SRAMWayWriteEnableD}); assign Valid = ValidBits[RAdrD]; ///////////////////////////////////////////////////////////////////////////////////////////// From 9ad3f263655ffcefc4f804569ea731450b8bafbe Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 8 Feb 2022 16:41:11 +0000 Subject: [PATCH 28/37] Restored E tests to makefrag --- pipelined/config/rv32e/wally-config.vh | 4 +- pipelined/testbench/tests.vh | 2 +- .../riscv-test-suite/rv32i_m/I/Makefrag | 37 +++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/pipelined/config/rv32e/wally-config.vh b/pipelined/config/rv32e/wally-config.vh index 2443b7ae9..45d0948a9 100644 --- a/pipelined/config/rv32e/wally-config.vh +++ b/pipelined/config/rv32e/wally-config.vh @@ -49,8 +49,8 @@ `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 // *** replace with MEM_BUS -`define DMEM `MEM_BUS -`define IMEM `MEM_BUS +`define DMEM `MEM_CACHE +`define IMEM `MEM_CACHE `define VIRTMEM_SUPPORTED 0 `define VECTORED_INTERRUPTS_SUPPORTED 0 diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 4a78455ea..9cce5847f 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1498,12 +1498,12 @@ string imperas32f[] = '{ string wally32e[] = '{ `WALLYTEST, + "rv32i_m/I/E-beq-01", "03b010", "rv32i_m/I/E-add-01", "005010", "rv32i_m/I/E-addi-01", "004010", "rv32i_m/I/E-and-01", "005010", "rv32i_m/I/E-andi-01", "004010", "rv32i_m/I/E-auipc-01", "002010", - "rv32i_m/I/E-beq-01", "03b010", "rv32i_m/I/E-bge-01", "034010", "rv32i_m/I/E-bgeu-01", "047010", "rv32i_m/I/E-blt-01", "038010", diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/Makefrag index eda625075..792ea2e24 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/Makefrag @@ -28,6 +28,43 @@ # Description: Makefrag for RV32I architectural tests rv32i_sc_tests = \ + E-add-01 \ + E-addi-01 \ + E-and-01 \ + E-andi-01 \ + E-auipc-01 \ + E-beq-01 \ + E-bge-01 \ + E-bgeu-01 \ + E-blt-01 \ + E-bltu-01 \ + E-bne-01 \ + E-jal-01 \ + E-jalr-01 \ + E-lb-align-01 \ + E-lbu-align-01 \ + E-lh-align-01 \ + E-lhu-align-01 \ + E-lui-01 \ + E-lw-align-01 \ + E-or-01 \ + E-ori-01 \ + E-sb-align-01 \ + E-sh-align-01 \ + E-sll-01 \ + E-slli-01 \ + E-slt-01 \ + E-slti-01 \ + E-sltiu-01 \ + E-sltu-01 \ + E-sra-01 \ + E-srai-01 \ + E-srl-01 \ + E-srli-01 \ + E-sub-01 \ + E-sw-align-01 \ + E-xor-01 \ + E-xori-01 \ WALLY-ADD \ WALLY-SLT \ WALLY-SLTU \ From c07584bb701f9ecad2e6ffc827604e6e1fd3c6e3 Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 8 Feb 2022 17:59:50 +0000 Subject: [PATCH 29/37] rv32e config update --- pipelined/config/rv32e/wally-config.vh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelined/config/rv32e/wally-config.vh b/pipelined/config/rv32e/wally-config.vh index 45d0948a9..2443b7ae9 100644 --- a/pipelined/config/rv32e/wally-config.vh +++ b/pipelined/config/rv32e/wally-config.vh @@ -49,8 +49,8 @@ `define UARCH_SUPERSCALR 0 `define UARCH_SINGLECYCLE 0 // *** replace with MEM_BUS -`define DMEM `MEM_CACHE -`define IMEM `MEM_CACHE +`define DMEM `MEM_BUS +`define IMEM `MEM_BUS `define VIRTMEM_SUPPORTED 0 `define VECTORED_INTERRUPTS_SUPPORTED 0 From d5d9bb9d4dc25403c6e513e31b08e561480cddd4 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 8 Feb 2022 14:13:55 -0600 Subject: [PATCH 30/37] Temporary commit which gets the no branch predictor implementation working. --- pipelined/src/ifu/ifu.sv | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 7be1b56f8..196746361 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -243,19 +243,29 @@ module ifu ( assign PrivilegedChangePCM = RetM | TrapM; + logic SelBPPredF; + logic [`XLEN-1:0] BPPredPCF, PCNext0F; + logic BPPredWrongM; + // The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE. mux2 #(`XLEN) pccorrectemux(.d0(PCLinkE), .d1(IEUAdrE), .s(PCSrcE), .y(PCCorrectE)); mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PCBPWrongInvalidate), .s(InvalidateICacheM), .y(PCNext2F)); mux2 #(`XLEN) pcmux3(.d0(PCNext2F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), .y(UnalignedPCNextF)); + mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), .d1(BPPredPCF), .s(SelBPPredF), .y(PCNext0F)); + mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F)); + // Mux only required on instruction class miss prediction. + mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF), + .s(BPPredWrongM), .y(PCBPWrongInvalidate)); + + + + assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); // branch and jump predictor if (`BPRED_ENABLED) begin : bpred - logic SelBPPredF; - logic [`XLEN-1:0] BPPredPCF, PCNext0F; - logic BPPredWrongM; bpred bpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, @@ -264,17 +274,12 @@ module ifu ( .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .BPPredWrongM, .BPPredDirWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPPredClassNonCFIWrongM); - mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), .d1(BPPredPCF), .s(SelBPPredF), .y(PCNext0F)); - mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F)); - // Mux only required on instruction class miss prediction. - mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF), - .s(BPPredWrongM), .y(PCBPWrongInvalidate)); - end else begin : bpred assign BPPredWrongE = PCSrcE; assign {BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM} = '0; - assign PCNext1F = PCPlus2or4F; - assign PCBPWrongInvalidate = PCE; + assign SelBPPredF = 1'b0; + //assign PCNext1F = PCPlus2or4F; + //assign PCBPWrongInvalidate = PCE; end // pcadder From 3e16730226bec44d098c070c2ea1ef40b9d0c68b Mon Sep 17 00:00:00 2001 From: David Harris Date: Tue, 8 Feb 2022 20:15:23 +0000 Subject: [PATCH 31/37] RAM simplification --- pipelined/src/cache/sram1rw.sv | 1 + pipelined/src/generic/flop/simpleram.sv | 20 ++++++++------------ 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pipelined/src/cache/sram1rw.sv b/pipelined/src/cache/sram1rw.sv index b17aa20d3..921c0af47 100644 --- a/pipelined/src/cache/sram1rw.sv +++ b/pipelined/src/cache/sram1rw.sv @@ -46,6 +46,7 @@ module sram1rw #(parameter DEPTH=128, WIDTH=256) ( logic WriteEnableD; //*** model as single port + // *** merge with simpleram always_ff @(posedge clk) begin AddrD <= Adr; WriteDataD <= WriteData; /// ****** this is not right. there should not need to be a delay. Implement alternative cache stall to avoid this. Eliminates a bunch of delay flops elsewhere diff --git a/pipelined/src/generic/flop/simpleram.sv b/pipelined/src/generic/flop/simpleram.sv index 43b873567..3ad367bd5 100644 --- a/pipelined/src/generic/flop/simpleram.sv +++ b/pipelined/src/generic/flop/simpleram.sv @@ -40,18 +40,14 @@ module simpleram #(parameter BASE=0, RANGE = 65535) ( logic [`XLEN-1:0] RAM[BASE>>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)]; - /* verilator lint_off WIDTH */ - if (`XLEN == 64) begin:ramrw - always_ff @(posedge clk) begin - rd <= RAM[a[31:3]]; - if (we) RAM[a[31:3]] <= #1 wd; - end - end else begin - always_ff @(posedge clk) begin:ramrw - rd <= RAM[a[31:2]]; - if (we) RAM[a[31:2]] <= #1 wd; - end + // discard bottom 2 or 3 bits of address offset within word or doubleword + localparam adrlsb = (`XLEN==64) ? 3 : 2; + logic [31:adrlsb] adrmsbs; + assign adrmsbs = a[31:adrlsb]; + + always_ff @(posedge clk) begin + rd <= RAM[adrmsbs]; + if (we) RAM[adrmsbs] <= #1 wd; end - /* verilator lint_on WIDTH */ endmodule From 39149c618f78c044e9f5fec28af1b8c4f5675e0c Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 8 Feb 2022 14:17:44 -0600 Subject: [PATCH 32/37] Moved some muxes back into the bp. --- pipelined/src/ifu/ifu.sv | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 196746361..27b56f219 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -243,20 +243,15 @@ module ifu ( assign PrivilegedChangePCM = RetM | TrapM; - logic SelBPPredF; logic [`XLEN-1:0] BPPredPCF, PCNext0F; - logic BPPredWrongM; + // The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE. mux2 #(`XLEN) pccorrectemux(.d0(PCLinkE), .d1(IEUAdrE), .s(PCSrcE), .y(PCCorrectE)); mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PCBPWrongInvalidate), .s(InvalidateICacheM), .y(PCNext2F)); mux2 #(`XLEN) pcmux3(.d0(PCNext2F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), .y(UnalignedPCNextF)); - mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), .d1(BPPredPCF), .s(SelBPPredF), .y(PCNext0F)); mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F)); - // Mux only required on instruction class miss prediction. - mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF), - .s(BPPredWrongM), .y(PCBPWrongInvalidate)); @@ -266,18 +261,24 @@ module ifu ( // branch and jump predictor if (`BPRED_ENABLED) begin : bpred - + logic BPPredWrongM; + logic SelBPPredF; bpred bpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushF, .FlushD, .FlushE, .FlushM, .InstrD, .PCNextF, .BPPredPCF, .SelBPPredF, .PCE, .PCSrcE, .IEUAdrE, .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .BPPredWrongM, .BPPredDirWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPPredClassNonCFIWrongM); + + mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), .d1(BPPredPCF), .s(SelBPPredF), .y(PCNext0F)); + // Mux only required on instruction class miss prediction. + mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF), + .s(BPPredWrongM), .y(PCBPWrongInvalidate)); end else begin : bpred assign BPPredWrongE = PCSrcE; assign {BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM} = '0; - assign SelBPPredF = 1'b0; + assign PCNext0F = PCPlus2or4F; //assign PCNext1F = PCPlus2or4F; //assign PCBPWrongInvalidate = PCE; end From cecbb3362dbc9ac51e745171a98c53a6ccc1d3cf Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 8 Feb 2022 14:21:55 -0600 Subject: [PATCH 33/37] rv32e works for now. Still need to optimize. --- pipelined/src/ifu/ifu.sv | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index 27b56f219..d9a296687 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -251,10 +251,8 @@ module ifu ( mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PCBPWrongInvalidate), .s(InvalidateICacheM), .y(PCNext2F)); mux2 #(`XLEN) pcmux3(.d0(PCNext2F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), .y(UnalignedPCNextF)); - mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F)); - - - + // *** moved outside the bp. clean up. Should not be able to remove without bp. + mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F)); assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); From e0a605e95dc27b2c37f398d13eae04547bca5f6e Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 8 Feb 2022 14:54:53 -0600 Subject: [PATCH 34/37] Cleanup IFU. --- pipelined/src/ifu/ifu.sv | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index d9a296687..f7b96cd60 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -113,6 +113,8 @@ module ifu ( logic ICacheStallF, IFUCacheBusStallF; logic CPUBusy; (* mark_debug = "true" *) logic [31:0] PostSpillInstrRawF; + // branch predictor signal + logic [`XLEN-1:0] PCNext1F, PCNext2F, PCNext0F; assign PCFExt = {2'b00, PCFSpill}; @@ -232,9 +234,6 @@ module ifu ( end end - // branch predictor signal - logic [`XLEN-1:0] PCNext1F, PCNext2F; - assign IFUCacheBusStallF = ICacheStallF | BusStall; assign IFUStallF = IFUCacheBusStallF | SelNextSpillF; assign CPUBusy = StallF & ~SelNextSpillF; @@ -243,17 +242,10 @@ module ifu ( assign PrivilegedChangePCM = RetM | TrapM; - logic [`XLEN-1:0] BPPredPCF, PCNext0F; - - - // The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE. - mux2 #(`XLEN) pccorrectemux(.d0(PCLinkE), .d1(IEUAdrE), .s(PCSrcE), .y(PCCorrectE)); + mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F)); mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PCBPWrongInvalidate), .s(InvalidateICacheM), .y(PCNext2F)); mux2 #(`XLEN) pcmux3(.d0(PCNext2F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), .y(UnalignedPCNextF)); - // *** moved outside the bp. clean up. Should not be able to remove without bp. - mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F)); - assign PCNextF = {UnalignedPCNextF[`XLEN-1:1], 1'b0}; // hart-SPEC p. 21 about 16-bit alignment flopenl #(`XLEN) pcreg(clk, reset, ~StallF, PCNextF, `RESET_VECTOR, PCF); @@ -261,6 +253,7 @@ module ifu ( if (`BPRED_ENABLED) begin : bpred logic BPPredWrongM; logic SelBPPredF; + logic [`XLEN-1:0] BPPredPCF; bpred bpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushF, .FlushD, .FlushE, .FlushM, @@ -272,13 +265,15 @@ module ifu ( // Mux only required on instruction class miss prediction. mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF), .s(BPPredWrongM), .y(PCBPWrongInvalidate)); + // The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE. + mux2 #(`XLEN) pccorrectemux(.d0(PCLinkE), .d1(IEUAdrE), .s(PCSrcE), .y(PCCorrectE)); end else begin : bpred assign BPPredWrongE = PCSrcE; assign {BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM} = '0; assign PCNext0F = PCPlus2or4F; - //assign PCNext1F = PCPlus2or4F; - //assign PCBPWrongInvalidate = PCE; + assign PCCorrectE = IEUAdrE; + assign PCBPWrongInvalidate = PCE; end // pcadder From 6a82ee057921e7a9afa945183b91dd91316b5829 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 8 Feb 2022 15:23:44 -0600 Subject: [PATCH 35/37] Fixed debug2.xdc to match wally changes. --- fpga/constraints/debug2.xdc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fpga/constraints/debug2.xdc b/fpga/constraints/debug2.xdc index 63fa17a40..19d0eae5f 100644 --- a/fpga/constraints/debug2.xdc +++ b/fpga/constraints/debug2.xdc @@ -446,7 +446,7 @@ connect_debug_port u_ila_0/probe98 [get_nets [list wallypipelinedsoc/core/hzu/Fl create_debug_port u_ila_0 probe set_property port_width 4 [get_debug_ports u_ila_0/probe99] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe99] -connect_debug_port u_ila_0/probe99 [get_nets [list {wallypipelinedsoc/core/ifu/icache.icache/cachefsm/CurrState[0]} {wallypipelinedsoc/core/ifu/icache.icache/cachefsm/CurrState[1]} {wallypipelinedsoc/core/ifu/icache.icache/cachefsm/CurrState[2]} {wallypipelinedsoc/core/ifu/icache.icache/cachefsm/CurrState[3]}]] +connect_debug_port u_ila_0/probe99 [get_nets [list {wallypipelinedsoc/core/ifu/bus.icache.icache/cachefsm/CurrState[0]} {wallypipelinedsoc/core/ifu/bus.icache.icache/cachefsm/CurrState[1]} {wallypipelinedsoc/core/ifu/bus.icache.icache/cachefsm/CurrState[2]} {wallypipelinedsoc/core/ifu/bus.icache.icache/cachefsm/CurrState[3]}]] create_debug_port u_ila_0 probe @@ -555,7 +555,7 @@ connect_debug_port u_ila_0/probe119 [get_nets [list wallypipelinedsoc/core/lsu/D create_debug_port u_ila_0 probe set_property port_width 11 [get_debug_ports u_ila_0/probe120] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe120] -connect_debug_port u_ila_0/probe120 [get_nets [list {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[0]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[1]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[2]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[3]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[4]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[5]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[6]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[7]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[8]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[9]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[10]}]] +connect_debug_port u_ila_0/probe120 [get_nets [list {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[0]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[1]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[2]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[3]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[4]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[5]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[6]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[7]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[8]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[9]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[10]}]] create_debug_port u_ila_0 probe @@ -574,23 +574,23 @@ connect_debug_port u_ila_0/probe122 [get_nets [list {wallypipelinedsoc/core/ifu/ create_debug_port u_ila_0 probe set_property port_width 3 [get_debug_ports u_ila_0/probe123] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe123] -connect_debug_port u_ila_0/probe123 [get_nets [list {wallypipelinedsoc/core/ifu/bus.busfsm/BusCurrState[0]} {wallypipelinedsoc/core/ifu/bus.busfsm/BusCurrState[1]} {wallypipelinedsoc/core/ifu/bus.busfsm/BusCurrState[2]} ]] +connect_debug_port u_ila_0/probe123 [get_nets [list {wallypipelinedsoc/core/ifu/bus.busdp/busfsm/BusCurrState[0]} {wallypipelinedsoc/core/ifu/bus.busdp/busfsm/BusCurrState[1]} {wallypipelinedsoc/core/ifu/bus.busdp/busfsm/BusCurrState[2]} ]] create_debug_port u_ila_0 probe set_property port_width 1 [get_debug_ports u_ila_0/probe124] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe124] -connect_debug_port u_ila_0/probe124 [get_nets [list wallypipelinedsoc/core/ifu/SpillSupport.CurrState[0] ]] +connect_debug_port u_ila_0/probe124 [get_nets [list wallypipelinedsoc/core/ifu/SpillSupport.spillsupport/CurrState[0] ]] create_debug_port u_ila_0 probe set_property port_width 3 [get_debug_ports u_ila_0/probe125] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe125] -connect_debug_port u_ila_0/probe125 [get_nets [list {wallypipelinedsoc/core/lsu/bus.busfsm/BusCurrState[0]} {wallypipelinedsoc/core/lsu/bus.busfsm/BusCurrState[1]} {wallypipelinedsoc/core/lsu/bus.busfsm/BusCurrState[2]} ]] +connect_debug_port u_ila_0/probe125 [get_nets [list {wallypipelinedsoc/core/lsu/bus.busdp/busfsm/BusCurrState[0]} {wallypipelinedsoc/core/lsu/bus.busdp/busfsm/BusCurrState[1]} {wallypipelinedsoc/core/lsu/bus.busdp/busfsm/BusCurrState[2]} ]] create_debug_port u_ila_0 probe set_property port_width 3 [get_debug_ports u_ila_0/probe126] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe126] -connect_debug_port u_ila_0/probe126 [get_nets [list {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.interlockfsm/InterlockCurrState[0]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.interlockfsm/InterlockCurrState[1]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.interlockfsm/InterlockCurrState[2]} ]] +connect_debug_port u_ila_0/probe126 [get_nets [list {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/interlockfsm/InterlockCurrState[0]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/interlockfsm/InterlockCurrState[1]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/interlockfsm/InterlockCurrState[2]} ]] create_debug_port u_ila_0 probe From 498388c63699996f069d015aee7bfb05c372bd25 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 8 Feb 2022 17:52:09 -0600 Subject: [PATCH 36/37] Cache cleanup write enables. --- pipelined/src/cache/cache.sv | 46 ++++++++++++++------------------- pipelined/src/cache/cachefsm.sv | 19 +++++++++----- pipelined/src/cache/cacheway.sv | 30 +++++++++++++++------ 3 files changed, 53 insertions(+), 42 deletions(-) diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index f70467f4a..1ddb53f23 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -62,7 +62,7 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( localparam LINEBYTELEN = LINELEN/8; localparam OFFSETLEN = $clog2(LINEBYTELEN); localparam SETLEN = $clog2(NUMLINES); - localparam SETTOP = SETLEN+OFFSETLEN; + localparam SETTOP = SETLEN+OFFSETLEN; localparam TAGLEN = `PA_BITS - SETTOP; localparam WORDSPERLINE = LINELEN/`XLEN; localparam LOGWPL = $clog2(WORDSPERLINE); @@ -77,12 +77,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [NUMWAYS-1:0] WayHit; logic CacheHit; - logic [WORDSPERLINE-1:0] SRAMWordEnable; - logic SRAMWordWriteEnable; - logic SRAMLineWriteEnable; + logic FSMWordWriteEn; + logic FSMLineWriteEn; logic [NUMWAYS-1:0] SRAMLineWayWriteEnable; - logic [NUMWAYS-1:0] SRAMWayWriteEnable; - logic [NUMWAYS-1:0] SRAMWordWayWriteEnable; logic [NUMWAYS-1:0] VictimWay; logic [NUMWAYS-1:0] VictimDirtyWay; logic VictimDirty; @@ -106,7 +103,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( logic [NUMWAYS-1:0] WayHitSaved, WayHitRaw; logic [LINELEN-1:0] ReadDataLineRaw, ReadDataLineSaved; logic [NUMWAYS-1:0] SelectedWay; - logic [NUMWAYS-1:0] SetValidWay, ClearValidWay, SetDirtyWay, ClearDirtyWay; + logic [NUMWAYS-1:0] SetValidWay, ClearValidWay, SetDirtyWay, ClearDirtyWay; + logic [NUMWAYS-1:0] WriteWordWayEn, WriteLineWayEn; ///////////////////////////////////////////////////////////////////////////////////////////// // Read Path @@ -121,9 +119,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) CacheWays[NUMWAYS-1:0]( .clk, .reset, .RAdr, .PAdr, - .SRAMWayWriteEnable, - .SRAMWordEnable, - .TagWriteEnable(SRAMLineWayWriteEnable), + .WriteWordEn(WriteWordWayEn), + .WriteLineEn(WriteLineWayEn), .WriteData(SRAMWriteData), .SetValid(SetValidWay), .ClearValid(ClearValidWay), .SetDirty(SetDirtyWay), .ClearDirty(ClearDirtyWay), .SelEvict, .Victim(VictimWay), .Flush(FlushWay), @@ -153,22 +150,11 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( end else assign WayHit = WayHitRaw; ///////////////////////////////////////////////////////////////////////////////////////////// - // Write Path: Write Enables + // Write Path: Write data and address. Muxes between writes from bus and writes from CPU. ///////////////////////////////////////////////////////////////////////////////////////////// - - // *** Ross considering restructuring - // move decoder and wordwritenable into cacheway. - onehotdecoder #(LOGWPL) adrdec( - .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); - assign SRAMWordEnable = SRAMLineWriteEnable ? '1 : MemPAdrDecoded; // OR - - assign SRAMLineWayWriteEnable = SRAMLineWriteEnable ? VictimWay : '0; // AND - assign SRAMWordWayWriteEnable = SRAMWordWriteEnable ? WayHit : '0; // AND - mux2 #(NUMWAYS) WriteEnableMux(.d0(SRAMWordWayWriteEnable), .d1(VictimWay), - .s(SRAMLineWriteEnable), .y(SRAMWayWriteEnable)); mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), - .d1(CacheMemWriteData), .s(SRAMLineWriteEnable), .y(SRAMWriteData)); + .d1(CacheMemWriteData), .s(FSMLineWriteEn), .y(SRAMWriteData)); mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), .d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), .d2({VictimTag, FlushAdr, {{OFFSETLEN}{1'b0}}}), @@ -192,11 +178,17 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( assign FlushWayFlag = FlushWay[NUMWAYS-1]; assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; - assign SelectedWay = SelFlush ? FlushWay : (SRAMLineWriteEnable ? VictimWay : WayHit); + ///////////////////////////////////////////////////////////////////////////////////////////// + // Write Path: Write Enables + ///////////////////////////////////////////////////////////////////////////////////////////// + + assign SelectedWay = SelFlush ? FlushWay : (FSMLineWriteEn ? VictimWay : WayHit); assign SetValidWay = SetValid ? SelectedWay : '0; assign ClearValidWay = ClearValid ? SelectedWay : '0; assign SetDirtyWay = SetDirty ? SelectedWay : '0; - assign ClearDirtyWay = ClearDirty ? SelectedWay : '0; + assign ClearDirtyWay = ClearDirty ? SelectedWay : '0; + assign WriteWordWayEn = FSMWordWriteEn ? SelectedWay : '0; + assign WriteLineWayEn = FSMLineWriteEn ? SelectedWay : '0; ///////////////////////////////////////////////////////////////////////////////////////////// @@ -207,8 +199,8 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( .RW, .Atomic, .CPUBusy, .IgnoreRequest, .CacheHit, .VictimDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, .SetValid, - .ClearValid, .SetDirty, .ClearDirty, .SRAMWordWriteEnable, - .SRAMLineWriteEnable, .SelEvict, .SelFlush, + .ClearValid, .SetDirty, .ClearDirty, .FSMWordWriteEn, + .FSMLineWriteEn, .SelEvict, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, .FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, .save, .restore, diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 63c452862..3947309bf 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -65,8 +65,8 @@ module cachefsm output logic ClearValid, output logic SetDirty, output logic ClearDirty, - output logic SRAMWordWriteEnable, - output logic SRAMLineWriteEnable, + output logic FSMWordWriteEn, + output logic FSMLineWriteEn, output logic SelEvict, output logic LRUWriteEn, output logic SelFlush, @@ -174,6 +174,7 @@ module cachefsm endcase end + // com back to CPU assign CacheCommitted = CurrState != STATE_READY; assign CacheStall = (CurrState == STATE_READY & (DoFlush | DoAMOMiss | DoReadMiss | DoWriteMiss)) | (CurrState == STATE_MISS_FETCH_WDV) | @@ -186,6 +187,7 @@ module cachefsm (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushFlag)); + // write enables internal to cache assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; assign ClearValid = '0; assign SetDirty = (CurrState == STATE_READY & DoAMO) | @@ -194,14 +196,15 @@ module cachefsm (CurrState == STATE_MISS_WRITE_WORD); assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE) | (CurrState == STATE_FLUSH_CLEAR_DIRTY); - assign SRAMWordWriteEnable = (CurrState == STATE_READY & (DoAMOHit | DoWriteHit)) | - (CurrState == STATE_MISS_READ_WORD_DELAY & DoAMO) | - (CurrState == STATE_MISS_WRITE_WORD); - assign SRAMLineWriteEnable = (CurrState == STATE_MISS_WRITE_CACHE_LINE); - assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY); + assign FSMWordWriteEn = (CurrState == STATE_READY & (DoAMOHit | DoWriteHit)) | + (CurrState == STATE_MISS_READ_WORD_DELAY & DoAMO) | + (CurrState == STATE_MISS_WRITE_WORD); + assign FSMLineWriteEn = (CurrState == STATE_MISS_WRITE_CACHE_LINE); assign LRUWriteEn = (CurrState == STATE_READY & (DoAMOHit | DoReadHit | DoWriteHit)) | (CurrState == STATE_MISS_READ_WORD_DELAY) | (CurrState == STATE_MISS_WRITE_WORD); + // Flush and eviction controls + assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY); assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | (CurrState == STATE_FLUSH_CLEAR_DIRTY); @@ -211,9 +214,11 @@ module cachefsm (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushFlag)); assign FlushAdrCntRst = (CurrState == STATE_READY & DoFlush); assign FlushWayCntRst = (CurrState == STATE_READY & DoFlush) | (CurrState == STATE_FLUSH_INCR); + // Bus interface controls assign CacheFetchLine = (CurrState == STATE_READY & (DoAMOMiss | DoWriteMiss | DoReadMiss)); assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_DONE & VictimDirty) | (CurrState == STATE_FLUSH_CHECK & VictimDirty); + // handle cpu stall. assign restore = ((CurrState == STATE_CPU_BUSY) | (CurrState == STATE_CPU_BUSY_FINISH_AMO)) & ~`REPLAY; assign save = ((CurrState == STATE_READY & (DoAMOHit | DoReadHit | DoWriteHit) & CPUBusy) | (CurrState == STATE_MISS_READ_WORD_DELAY & (DoAMO | DoRead) & CPUBusy) | diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 09fcbfa29..cb53a8160 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -37,9 +37,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, input logic [$clog2(NUMLINES)-1:0] RAdr, input logic [`PA_BITS-1:0] PAdr, - input logic SRAMWayWriteEnable, - input logic [LINELEN/`XLEN-1:0] SRAMWordEnable, - input logic TagWriteEnable, + input logic WriteWordEn, + input logic WriteLineEn, input logic [LINELEN-1:0] WriteData, input logic SetValid, input logic ClearValid, @@ -56,6 +55,10 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, output logic VictimDirty, output logic [TAGLEN-1:0] VictimTag); + localparam WORDSPERLINE = LINELEN/`XLEN; + localparam LOGWPL = $clog2(WORDSPERLINE); + localparam LOGXLENBYTES = $clog2(`XLEN/8); + logic [NUMLINES-1:0] ValidBits; logic [NUMLINES-1:0] DirtyBits; logic [LINELEN-1:0] ReadDataLine; @@ -68,7 +71,18 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, logic [$clog2(NUMLINES)-1:0] RAdrD; logic SetValidD, ClearValidD; logic SetDirtyD, ClearDirtyD; - logic SRAMWayWriteEnableD; + + logic [2**LOGWPL-1:0] MemPAdrDecoded; + logic [LINELEN/`XLEN-1:0] SelectedWriteWordEn; + + + ///////////////////////////////////////////////////////////////////////////////////////////// + // Write Enable demux + ///////////////////////////////////////////////////////////////////////////////////////////// + onehotdecoder #(LOGWPL) adrdec( + .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); + // If writing the whole line set all write enables to 1, else only set the correct word. + assign SelectedWriteWordEn = WriteLineEn ? '1 : WriteWordEn ? MemPAdrDecoded : '0; // OR-AND ///////////////////////////////////////////////////////////////////////////////////////////// // Tag Array @@ -76,7 +90,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, sram1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk(clk), .Adr(RAdr), .ReadData(ReadTag), - .WriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(TagWriteEnable)); + .WriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(WriteLineEn)); // AND portion of distributed tag multiplexer assign SelTag = SelFlush ? Flush : Victim; @@ -93,7 +107,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, sram1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk(clk), .Adr(RAdr), .ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ), .WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]), - .WriteEnable(SRAMWayWriteEnable & SRAMWordEnable[words])); + .WriteEnable(SelectedWriteWordEn[words])); end // AND portion of distributed read multiplexers @@ -112,8 +126,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, end // *** consider revisiting whether these delays are the best option? flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD); - flop #(3) ValidCtrlDelayReg(clk, {SetValid, ClearValid, SRAMWayWriteEnable}, - {SetValidD, ClearValidD, SRAMWayWriteEnableD}); + flop #(2) ValidCtrlDelayReg(clk, {SetValid, ClearValid}, + {SetValidD, ClearValidD}); assign Valid = ValidBits[RAdrD]; ///////////////////////////////////////////////////////////////////////////////////////////// From 01126535db8e19bb62278c1dc9e280ce472d306a Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Tue, 8 Feb 2022 18:17:31 -0600 Subject: [PATCH 37/37] Annotated the final changes required to move sram address off the critial path. --- pipelined/src/cache/cachefsm.sv | 16 ++++++++-------- pipelined/src/cache/cacheway.sv | 11 +++++------ pipelined/src/cache/sram1rw.sv | 19 +++++++++++-------- pipelined/src/lsu/interlockfsm.sv | 3 +-- 4 files changed, 25 insertions(+), 24 deletions(-) diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 3947309bf..90a4974e0 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -105,14 +105,14 @@ module cachefsm (* mark_debug = "true" *) statetype CurrState, NextState; - assign DoFlush = FlushCache & ~IgnoreRequest; - assign DoAMO = Atomic[1] & (&RW) & ~IgnoreRequest; + assign DoFlush = FlushCache & ~IgnoreRequest; // *** have to fix ignorerequest timing path + assign DoAMO = Atomic[1] & (&RW) & ~IgnoreRequest; // *** assign DoAMOHit = DoAMO & CacheHit; - assign DoAMOMiss = DoAMOHit & ~CacheHit; - assign DoRead = RW[1] & ~IgnoreRequest; + assign DoAMOMiss = DoAMO & ~CacheHit; + assign DoRead = RW[1] & ~IgnoreRequest; // *** assign DoReadHit = DoRead & CacheHit; assign DoReadMiss = DoRead & ~CacheHit; - assign DoWrite = RW[0] & ~IgnoreRequest; + assign DoWrite = RW[0] & ~IgnoreRequest; // *** assign DoWriteHit = DoWrite & CacheHit; assign DoWriteMiss = DoWrite & ~CacheHit; @@ -225,15 +225,15 @@ module cachefsm (CurrState == STATE_MISS_WRITE_WORD & DoWrite & CPUBusy)) & ~`REPLAY; // **** can this be simplified? - assign PreSelAdr = ((CurrState == STATE_READY & IgnoreRequest) | - (CurrState == STATE_READY & DoAMOHit) | + assign PreSelAdr = ((CurrState == STATE_READY & IgnoreRequest) | // *** ignorerequest comes from TrapM. Have to fix. why is ignorerequest here anyway? + (CurrState == STATE_READY & DoAMOHit) | // also depends on ignorerequest (CurrState == STATE_READY & DoReadHit & (CPUBusy & `REPLAY)) | (CurrState == STATE_READY & DoWriteHit) | (CurrState == STATE_MISS_FETCH_WDV) | (CurrState == STATE_MISS_FETCH_DONE) | (CurrState == STATE_MISS_WRITE_CACHE_LINE) | (CurrState == STATE_MISS_READ_WORD) | - (CurrState == STATE_MISS_READ_WORD_DELAY & (DoAMO | (CPUBusy & `REPLAY))) | + (CurrState == STATE_MISS_READ_WORD_DELAY & (DoAMO | (CPUBusy & `REPLAY))) | // *** (CurrState == STATE_MISS_WRITE_WORD) | (CurrState == STATE_MISS_EVICT_DIRTY) | (CurrState == STATE_CPU_BUSY & (CPUBusy & `REPLAY)) | diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index cb53a8160..d9f468a92 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -121,13 +121,12 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, always_ff @(posedge clk) begin // Valid bit array, if (reset | InvalidateAll) ValidBits <= #1 '0; - else if (SetValidD) ValidBits[RAdrD] <= #1 1'b1; - else if (ClearValidD) ValidBits[RAdrD] <= #1 1'b0; + else if (SetValid) ValidBits[RAdr] <= #1 1'b1; + else if (ClearValid) ValidBits[RAdr] <= #1 1'b0; end // *** consider revisiting whether these delays are the best option? flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD); - flop #(2) ValidCtrlDelayReg(clk, {SetValid, ClearValid}, - {SetValidD, ClearValidD}); + //flop #(2) ValidCtrlDelayReg(clk, {SetValid, ClearValid}, {SetValidD, ClearValidD}); assign Valid = ValidBits[RAdrD]; ///////////////////////////////////////////////////////////////////////////////////////////// @@ -138,8 +137,8 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, if (DIRTY_BITS) begin:dirty always_ff @(posedge clk) begin if (reset) DirtyBits <= #1 {NUMLINES{1'b0}}; - else if (SetDirtyD) DirtyBits[RAdrD] <= #1 1'b1; - else if (ClearDirtyD) DirtyBits[RAdrD] <= #1 1'b0; + else if (SetDirty) DirtyBits[RAdr] <= #1 1'b1; + else if (ClearDirty) DirtyBits[RAdr] <= #1 1'b0; end flop #(2) DirtyCtlDelayReg(clk, {SetDirty, ClearDirty}, {SetDirtyD, ClearDirtyD}); assign Dirty = DirtyBits[RAdrD]; diff --git a/pipelined/src/cache/sram1rw.sv b/pipelined/src/cache/sram1rw.sv index 921c0af47..41dcbee04 100644 --- a/pipelined/src/cache/sram1rw.sv +++ b/pipelined/src/cache/sram1rw.sv @@ -41,22 +41,25 @@ module sram1rw #(parameter DEPTH=128, WIDTH=256) ( output logic [WIDTH-1:0] ReadData); logic [WIDTH-1:0] StoredData[DEPTH-1:0]; - logic [$clog2(DEPTH)-1:0] AddrD; + logic [$clog2(DEPTH)-1:0] AdrD; logic [WIDTH-1:0] WriteDataD; logic WriteEnableD; //*** model as single port // *** merge with simpleram always_ff @(posedge clk) begin - AddrD <= Adr; - WriteDataD <= WriteData; /// ****** this is not right. there should not need to be a delay. Implement alternative cache stall to avoid this. Eliminates a bunch of delay flops elsewhere - WriteEnableD <= WriteEnable; - if (WriteEnableD) begin - StoredData[AddrD] <= #1 WriteDataD; - end + AdrD <= Adr; + //WriteDataD <= WriteData; /// ****** this is not right. there should not need to be a delay. Implement alternative cache stall to avoid this. Eliminates a bunch of delay flops elsewhere + //WriteEnableD <= WriteEnable; + //if (WriteEnableD) begin + //StoredData[AddrD] <= #1 WriteDataD; + //end + if (WriteEnable) begin + StoredData[Adr] <= #1 WriteData; + end end - assign ReadData = StoredData[AddrD]; + assign ReadData = StoredData[AdrD]; /* always_ff @(posedge clk) begin ReadData <= RAM[Adr]; diff --git a/pipelined/src/lsu/interlockfsm.sv b/pipelined/src/lsu/interlockfsm.sv index bed28563d..5d53bd776 100644 --- a/pipelined/src/lsu/interlockfsm.sv +++ b/pipelined/src/lsu/interlockfsm.sv @@ -112,7 +112,6 @@ module interlockfsm assign SelHPTW = (InterlockCurrState == STATE_T3_DTLB_MISS) | (InterlockCurrState == STATE_T4_ITLB_MISS) | (InterlockCurrState == STATE_T5_ITLB_MISS) | (InterlockCurrState == STATE_T7_DITLB_MISS); assign IgnoreRequest = (InterlockCurrState == STATE_T0_READY & (ITLBMissF | DTLBMissM | TrapM)) | - ((InterlockCurrState == STATE_T0_REPLAY) - & (TrapM)); + ((InterlockCurrState == STATE_T0_REPLAY) & (TrapM)); endmodule