diff --git a/fpga/constraints/debug2.xdc b/fpga/constraints/debug2.xdc index 48046d1df..19d0eae5f 100644 --- a/fpga/constraints/debug2.xdc +++ b/fpga/constraints/debug2.xdc @@ -282,7 +282,7 @@ connect_debug_port u_ila_0/probe65 [get_nets [list wallypipelinedsoc/core/priv.p create_debug_port u_ila_0 probe set_property port_width 1 [get_debug_ports u_ila_0/probe66] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe66] -connect_debug_port u_ila_0/probe66 [get_nets [list wallypipelinedsoc/core/priv.priv/trap/StorePageFaultM ]] +connect_debug_port u_ila_0/probe66 [get_nets [list wallypipelinedsoc/core/priv.priv/trap/StoreAmoPageFaultM ]] create_debug_port u_ila_0 probe set_property port_width 1 [get_debug_ports u_ila_0/probe67] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe67] @@ -446,7 +446,7 @@ connect_debug_port u_ila_0/probe98 [get_nets [list wallypipelinedsoc/core/hzu/Fl create_debug_port u_ila_0 probe set_property port_width 4 [get_debug_ports u_ila_0/probe99] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe99] -connect_debug_port u_ila_0/probe99 [get_nets [list {wallypipelinedsoc/core/ifu/icache.icache/cachefsm/CurrState[0]} {wallypipelinedsoc/core/ifu/icache.icache/cachefsm/CurrState[1]} {wallypipelinedsoc/core/ifu/icache.icache/cachefsm/CurrState[2]} {wallypipelinedsoc/core/ifu/icache.icache/cachefsm/CurrState[3]}]] +connect_debug_port u_ila_0/probe99 [get_nets [list {wallypipelinedsoc/core/ifu/bus.icache.icache/cachefsm/CurrState[0]} {wallypipelinedsoc/core/ifu/bus.icache.icache/cachefsm/CurrState[1]} {wallypipelinedsoc/core/ifu/bus.icache.icache/cachefsm/CurrState[2]} {wallypipelinedsoc/core/ifu/bus.icache.icache/cachefsm/CurrState[3]}]] create_debug_port u_ila_0 probe @@ -555,7 +555,7 @@ connect_debug_port u_ila_0/probe119 [get_nets [list wallypipelinedsoc/core/lsu/D create_debug_port u_ila_0 probe set_property port_width 11 [get_debug_ports u_ila_0/probe120] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe120] -connect_debug_port u_ila_0/probe120 [get_nets [list {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[0]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[1]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[2]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[3]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[4]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[5]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[6]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[7]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[8]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[9]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.hptw/WalkerState[10]}]] +connect_debug_port u_ila_0/probe120 [get_nets [list {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[0]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[1]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[2]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[3]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[4]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[5]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[6]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[7]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[8]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[9]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/hptw/WalkerState[10]}]] create_debug_port u_ila_0 probe @@ -574,23 +574,23 @@ connect_debug_port u_ila_0/probe122 [get_nets [list {wallypipelinedsoc/core/ifu/ create_debug_port u_ila_0 probe set_property port_width 3 [get_debug_ports u_ila_0/probe123] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe123] -connect_debug_port u_ila_0/probe123 [get_nets [list {wallypipelinedsoc/core/ifu/bus.busfsm/BusCurrState[0]} {wallypipelinedsoc/core/ifu/bus.busfsm/BusCurrState[1]} {wallypipelinedsoc/core/ifu/bus.busfsm/BusCurrState[2]} ]] +connect_debug_port u_ila_0/probe123 [get_nets [list {wallypipelinedsoc/core/ifu/bus.busdp/busfsm/BusCurrState[0]} {wallypipelinedsoc/core/ifu/bus.busdp/busfsm/BusCurrState[1]} {wallypipelinedsoc/core/ifu/bus.busdp/busfsm/BusCurrState[2]} ]] create_debug_port u_ila_0 probe set_property port_width 1 [get_debug_ports u_ila_0/probe124] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe124] -connect_debug_port u_ila_0/probe124 [get_nets [list wallypipelinedsoc/core/ifu/SpillSupport.CurrState[0] ]] +connect_debug_port u_ila_0/probe124 [get_nets [list wallypipelinedsoc/core/ifu/SpillSupport.spillsupport/CurrState[0] ]] create_debug_port u_ila_0 probe set_property port_width 3 [get_debug_ports u_ila_0/probe125] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe125] -connect_debug_port u_ila_0/probe125 [get_nets [list {wallypipelinedsoc/core/lsu/bus.busfsm/BusCurrState[0]} {wallypipelinedsoc/core/lsu/bus.busfsm/BusCurrState[1]} {wallypipelinedsoc/core/lsu/bus.busfsm/BusCurrState[2]} ]] +connect_debug_port u_ila_0/probe125 [get_nets [list {wallypipelinedsoc/core/lsu/bus.busdp/busfsm/BusCurrState[0]} {wallypipelinedsoc/core/lsu/bus.busdp/busfsm/BusCurrState[1]} {wallypipelinedsoc/core/lsu/bus.busdp/busfsm/BusCurrState[2]} ]] create_debug_port u_ila_0 probe set_property port_width 3 [get_debug_ports u_ila_0/probe126] set_property PROBE_TYPE DATA_AND_TRIGGER [get_debug_ports u_ila_0/probe126] -connect_debug_port u_ila_0/probe126 [get_nets [list {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.interlockfsm/InterlockCurrState[0]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.interlockfsm/InterlockCurrState[1]} {wallypipelinedsoc/core/lsu/MEM_VIRTMEM.interlockfsm/InterlockCurrState[2]} ]] +connect_debug_port u_ila_0/probe126 [get_nets [list {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/interlockfsm/InterlockCurrState[0]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/interlockfsm/InterlockCurrState[1]} {wallypipelinedsoc/core/lsu/VIRTMEM_SUPPORTED.lsuvirtmem/interlockfsm/InterlockCurrState[2]} ]] create_debug_port u_ila_0 probe diff --git a/fpga/generator/xlnx_ahblite_axi_bridge.tcl b/fpga/generator/xlnx_ahblite_axi_bridge.tcl index ab3839820..317a62c09 100644 --- a/fpga/generator/xlnx_ahblite_axi_bridge.tcl +++ b/fpga/generator/xlnx_ahblite_axi_bridge.tcl @@ -1,9 +1,15 @@ #set partNumber $::env(XILINX_PART) #set boardNmae $::env(XILINX_BOARD) + +# vcu118 board set partNumber xcvu9p-flga2104-2L-e set boardName xilinx.com:vcu118:part0:2.4 +# kcu105 board +#set partNumber xcku040-ffva1156-2-e +#set boardName xilinx.com:kcu105:part0:1.7 + set ipName xlnx_ahblite_axi_bridge create_project $ipName . -force -part $partNumber diff --git a/pipelined/config/buildroot/wally-config.vh b/pipelined/config/buildroot/wally-config.vh index f879ce2a6..1bdea74a7 100644 --- a/pipelined/config/buildroot/wally-config.vh +++ b/pipelined/config/buildroot/wally-config.vh @@ -124,3 +124,5 @@ `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 + +`define REPLAY 0 diff --git a/pipelined/config/fpga/wally-config.vh b/pipelined/config/fpga/wally-config.vh index 58efd0463..223aa3d80 100644 --- a/pipelined/config/fpga/wally-config.vh +++ b/pipelined/config/fpga/wally-config.vh @@ -130,3 +130,5 @@ `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 1 + +`define REPLAY 0 diff --git a/pipelined/config/rv32e/wally-config.vh b/pipelined/config/rv32e/wally-config.vh index 2443b7ae9..9102cf637 100644 --- a/pipelined/config/rv32e/wally-config.vh +++ b/pipelined/config/rv32e/wally-config.vh @@ -128,3 +128,5 @@ `define BPRED_ENABLED 0 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 + +`define REPLAY 0 diff --git a/pipelined/config/rv32gc/wally-config.vh b/pipelined/config/rv32gc/wally-config.vh index a3859740c..af6ef40cf 100644 --- a/pipelined/config/rv32gc/wally-config.vh +++ b/pipelined/config/rv32gc/wally-config.vh @@ -126,3 +126,5 @@ `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 + +`define REPLAY 0 diff --git a/pipelined/config/rv32ic/wally-config.vh b/pipelined/config/rv32ic/wally-config.vh index d0b8adfb0..4d7b0418a 100644 --- a/pipelined/config/rv32ic/wally-config.vh +++ b/pipelined/config/rv32ic/wally-config.vh @@ -126,3 +126,5 @@ `define BPRED_ENABLED 1 `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 + +`define REPLAY 0 diff --git a/pipelined/config/rv64BP/wally-config.vh b/pipelined/config/rv64BP/wally-config.vh index 78230552c..f8ee8903b 100644 --- a/pipelined/config/rv64BP/wally-config.vh +++ b/pipelined/config/rv64BP/wally-config.vh @@ -129,3 +129,5 @@ //`define BPTYPE "BPGSHARE" // BPGLOBAL or BPTWOBIT or BPGSHARE `define BPTYPE "BPGSHARE" // BPTWOBIT or "BPGLOBAL" or BPLOCALPAg or BPGSHARE `define TESTSBP 1 + +`define REPLAY 0 diff --git a/pipelined/config/rv64gc/wally-config.vh b/pipelined/config/rv64gc/wally-config.vh index f5bc8a29f..ea17620c3 100644 --- a/pipelined/config/rv64gc/wally-config.vh +++ b/pipelined/config/rv64gc/wally-config.vh @@ -130,3 +130,4 @@ `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 +`define REPLAY 0 diff --git a/pipelined/config/rv64ic/wally-config.vh b/pipelined/config/rv64ic/wally-config.vh index 00b9a87f5..ec497db2a 100644 --- a/pipelined/config/rv64ic/wally-config.vh +++ b/pipelined/config/rv64ic/wally-config.vh @@ -130,3 +130,4 @@ `define BPTYPE "BPGSHARE" // BPLOCALPAg or BPGLOBAL or BPTWOBIT or BPGSHARE `define TESTSBP 0 +`define REPLAY 0 diff --git a/pipelined/src/cache/cache.sv b/pipelined/src/cache/cache.sv index e554fa999..1ddb53f23 100644 --- a/pipelined/src/cache/cache.sv +++ b/pipelined/src/cache/cache.sv @@ -31,38 +31,38 @@ `include "wally-config.vh" module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( - input logic clk, - input logic reset, + input logic clk, + input logic reset, // cpu side - input logic CPUBusy, - input logic [1:0] RW, - input logic [1:0] Atomic, - input logic FlushCache, - input logic InvalidateCacheM, - input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. - input logic [`PA_BITS-1:0] PAdr, // physical address - input logic [`XLEN-1:0] FinalWriteData, - output logic [`XLEN-1:0] ReadDataWord, - output logic CacheCommitted, - output logic CacheStall, + input logic CPUBusy, + input logic [1:0] RW, + input logic [1:0] Atomic, + input logic FlushCache, + input logic InvalidateCacheM, + input logic [11:0] NextAdr, // virtual address, but we only use the lower 12 bits. + input logic [`PA_BITS-1:0] PAdr, // physical address + input logic [`XLEN-1:0] FinalWriteData, + output logic CacheCommitted, + output logic CacheStall, // to performance counters to cpu - output logic CacheMiss, - output logic CacheAccess, + output logic CacheMiss, + output logic CacheAccess, + output logic save, restore, // lsu control - input logic IgnoreRequest, + input logic IgnoreRequest, // Bus fsm interface - output logic CacheFetchLine, - output logic CacheWriteLine, - input logic CacheBusAck, - output logic [`PA_BITS-1:0] CacheBusAdr, - input logic [LINELEN-1:0] CacheMemWriteData, - output logic [`XLEN-1:0] ReadDataLineSets [(LINELEN/`XLEN)-1:0]); + output logic CacheFetchLine, + output logic CacheWriteLine, + input logic CacheBusAck, + output logic [`PA_BITS-1:0] CacheBusAdr, + input logic [LINELEN-1:0] CacheMemWriteData, + output logic [LINELEN-1:0] ReadDataLine); // Cache parameters localparam LINEBYTELEN = LINELEN/8; localparam OFFSETLEN = $clog2(LINEBYTELEN); localparam SETLEN = $clog2(NUMLINES); - localparam SETTOP = SETLEN+OFFSETLEN; + localparam SETTOP = SETLEN+OFFSETLEN; localparam TAGLEN = `PA_BITS - SETTOP; localparam WORDSPERLINE = LINELEN/`XLEN; localparam LOGWPL = $clog2(WORDSPERLINE); @@ -77,13 +77,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( logic [LINELEN-1:0] ReadDataLineWay [NUMWAYS-1:0]; logic [NUMWAYS-1:0] WayHit; logic CacheHit; - logic [LINELEN-1:0] ReadDataLine; - logic [WORDSPERLINE-1:0] SRAMWordEnable; - logic SRAMWordWriteEnable; - logic SRAMLineWriteEnable; + logic FSMWordWriteEn; + logic FSMLineWriteEn; logic [NUMWAYS-1:0] SRAMLineWayWriteEnable; - logic [NUMWAYS-1:0] SRAMWayWriteEnable; - logic [NUMWAYS-1:0] SRAMWordWayWriteEnable; logic [NUMWAYS-1:0] VictimWay; logic [NUMWAYS-1:0] VictimDirtyWay; logic VictimDirty; @@ -100,13 +96,16 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( logic [NUMWAYS-1:0] NextFlushWay; logic FlushWayCntEn; logic FlushWayCntRst; - logic VDWriteEnable; logic SelEvict; logic LRUWriteEn; - logic [NUMWAYS-1:0] VDWriteEnableWay; logic SelFlush; - logic ResetOrFlushAdr, ResetOrFlushWay; - + logic ResetOrFlushAdr, ResetOrFlushWay; + logic [NUMWAYS-1:0] WayHitSaved, WayHitRaw; + logic [LINELEN-1:0] ReadDataLineRaw, ReadDataLineSaved; + logic [NUMWAYS-1:0] SelectedWay; + logic [NUMWAYS-1:0] SetValidWay, ClearValidWay, SetDirtyWay, ClearDirtyWay; + logic [NUMWAYS-1:0] WriteWordWayEn, WriteLineWayEn; + ///////////////////////////////////////////////////////////////////////////////////////////// // Read Path ///////////////////////////////////////////////////////////////////////////////////////////// @@ -120,13 +119,13 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( // Array of cache ways, along with victim, hit, dirty, and read merging logic cacheway #(NUMLINES, LINELEN, TAGLEN, OFFSETLEN, SETLEN) CacheWays[NUMWAYS-1:0]( .clk, .reset, .RAdr, .PAdr, - .WriteEnable(SRAMWayWriteEnable), - .VDWriteEnable(VDWriteEnableWay), - .WriteWordEnable(SRAMWordEnable), - .TagWriteEnable(SRAMLineWayWriteEnable), + .WriteWordEn(WriteWordWayEn), + .WriteLineEn(WriteLineWayEn), .WriteData(SRAMWriteData), - .SetValid, .ClearValid, .SetDirty, .ClearDirty, .SelEvict, .Victim(VictimWay), .Flush(FlushWay), .SelFlush, - .SelectedReadDataLine(ReadDataLineWay), .WayHit, .VictimDirty(VictimDirtyWay), .VictimTag(VictimTagWay), + .SetValid(SetValidWay), .ClearValid(ClearValidWay), .SetDirty(SetDirtyWay), .ClearDirty(ClearDirtyWay), + .SelEvict, .Victim(VictimWay), .Flush(FlushWay), + .SelFlush, + .SelectedReadDataLine(ReadDataLineWay), .WayHit(WayHitRaw), .VictimDirty(VictimDirtyWay), .VictimTag(VictimTagWay), .InvalidateAll(InvalidateCacheM)); if(NUMWAYS > 1) begin:vict cachereplacementpolicy #(NUMWAYS, SETLEN, OFFSETLEN, NUMLINES) cachereplacementpolicy( @@ -138,45 +137,24 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( // Need to OR together each way in a bitwise manner. // Final part of the AO Mux. First is the AND in the cacheway. or_rows #(NUMWAYS, LINELEN) ReadDataAOMux(.a(ReadDataLineWay), .y(ReadDataLine)); - or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); + or_rows #(NUMWAYS, TAGLEN) VictimTagAOMux(.a(VictimTagWay), .y(VictimTag)); - // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can - // easily build a variable input mux. - // *** move this to LSU and IFU, also remove mux from busdp into LSU. - // *** give this a module name to match block diagram - genvar index; - if(DCACHE == 1) begin: readdata - for (index = 0; index < WORDSPERLINE; index++) begin:readdatalinesetsmux - assign ReadDataLineSets[index] = ReadDataLine[((index+1)*`XLEN)-1: (index*`XLEN)]; - end - // variable input mux - assign ReadDataWord = ReadDataLineSets[PAdr[LOGWPL + LOGXLENBYTES - 1 : LOGXLENBYTES]]; - end else begin: readdata - logic [31:0] ReadLineSetsF [LINELEN/16-1:0]; - logic [31:0] FinalInstrRawF; - for(index = 0; index < LINELEN / 16 - 1; index++) - assign ReadLineSetsF[index] = ReadDataLine[((index+1)*16)+16-1 : (index*16)]; - assign ReadLineSetsF[LINELEN/16-1] = {16'b0, ReadDataLine[LINELEN-1:LINELEN-16]}; - assign FinalInstrRawF = ReadLineSetsF[PAdr[$clog2(LINELEN / 32) + 1 : 1]]; - if (`XLEN == 64) assign ReadDataWord = {32'b0, FinalInstrRawF}; - else assign ReadDataWord = FinalInstrRawF; - end - + // Because of the sram clocked read when the ieu is stalled the read data maybe lost. + // There are two ways to resolve. 1. We can replay the read of the sram or we can save + // the data. Replay is eaiser but creates a longer critical path. + // save/restore only wayhit and readdata. + if(!`REPLAY) begin + flopenr #(NUMWAYS) wayhitsavereg(clk, save, reset, WayHitRaw, WayHitSaved); + mux2 #(NUMWAYS) saverestoremux(WayHitRaw, WayHitSaved, restore, WayHit); + end else assign WayHit = WayHitRaw; + ///////////////////////////////////////////////////////////////////////////////////////////// - // Write Path: Write Enables + // Write Path: Write data and address. Muxes between writes from bus and writes from CPU. ///////////////////////////////////////////////////////////////////////////////////////////// - - // *** Ross considering restructuring - onehotdecoder #(LOGWPL) adrdec( - .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); - assign SRAMWordEnable = SRAMLineWriteEnable ? '1 : MemPAdrDecoded; // OR - assign SRAMLineWayWriteEnable = SRAMLineWriteEnable ? VictimWay : '0; // AND - assign SRAMWordWayWriteEnable = SRAMWordWriteEnable ? WayHit : '0; // AND - mux2 #(NUMWAYS) WriteEnableMux(.d0(SRAMWordWayWriteEnable), .d1(VictimWay), - .s(SRAMLineWriteEnable), .y(SRAMWayWriteEnable)); + mux2 #(LINELEN) WriteDataMux(.d0({WORDSPERLINE{FinalWriteData}}), - .d1(CacheMemWriteData), .s(SRAMLineWriteEnable), .y(SRAMWriteData)); + .d1(CacheMemWriteData), .s(FSMLineWriteEn), .y(SRAMWriteData)); mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), .d1({VictimTag, PAdr[SETTOP-1:OFFSETLEN], {{OFFSETLEN}{1'b0}}}), .d2({VictimTag, FlushAdr, {{OFFSETLEN}{1'b0}}}), @@ -198,9 +176,21 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( .en(FlushWayCntEn), .val({{NUMWAYS-1{1'b0}}, 1'b1}), .d(NextFlushWay), .q(FlushWay)); assign FlushWayFlag = FlushWay[NUMWAYS-1]; - assign VDWriteEnableWay = FlushWay & {NUMWAYS{VDWriteEnable}}; assign NextFlushWay = {FlushWay[NUMWAYS-2:0], FlushWay[NUMWAYS-1]}; + ///////////////////////////////////////////////////////////////////////////////////////////// + // Write Path: Write Enables + ///////////////////////////////////////////////////////////////////////////////////////////// + + assign SelectedWay = SelFlush ? FlushWay : (FSMLineWriteEn ? VictimWay : WayHit); + assign SetValidWay = SetValid ? SelectedWay : '0; + assign ClearValidWay = ClearValid ? SelectedWay : '0; + assign SetDirtyWay = SetDirty ? SelectedWay : '0; + assign ClearDirtyWay = ClearDirty ? SelectedWay : '0; + assign WriteWordWayEn = FSMWordWriteEn ? SelectedWay : '0; + assign WriteLineWayEn = FSMLineWriteEn ? SelectedWay : '0; + + ///////////////////////////////////////////////////////////////////////////////////////////// // Cache FSM ///////////////////////////////////////////////////////////////////////////////////////////// @@ -209,9 +199,10 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, DCACHE = 1) ( .RW, .Atomic, .CPUBusy, .IgnoreRequest, .CacheHit, .VictimDirty, .CacheStall, .CacheCommitted, .CacheMiss, .CacheAccess, .SelAdr, .SetValid, - .ClearValid, .SetDirty, .ClearDirty, .SRAMWordWriteEnable, - .SRAMLineWriteEnable, .SelEvict, .SelFlush, + .ClearValid, .SetDirty, .ClearDirty, .FSMWordWriteEn, + .FSMLineWriteEn, .SelEvict, .SelFlush, .FlushAdrCntEn, .FlushWayCntEn, .FlushAdrCntRst, .FlushWayCntRst, .FlushAdrFlag, .FlushWayFlag, .FlushCache, - .VDWriteEnable, .LRUWriteEn); + .save, .restore, + .LRUWriteEn); endmodule diff --git a/pipelined/src/cache/cachefsm.sv b/pipelined/src/cache/cachefsm.sv index 115107bcd..90a4974e0 100644 --- a/pipelined/src/cache/cachefsm.sv +++ b/pipelined/src/cache/cachefsm.sv @@ -32,56 +32,58 @@ module cachefsm (input logic clk, - input logic reset, + input logic reset, // inputs from IEU input logic [1:0] RW, input logic [1:0] Atomic, - input logic FlushCache, + input logic FlushCache, // hazard inputs - input logic CPUBusy, + input logic CPUBusy, // interlock fsm - input logic IgnoreRequest, + input logic IgnoreRequest, // Bus inputs - input logic CacheBusAck, + input logic CacheBusAck, // dcache internals - input logic CacheHit, - input logic VictimDirty, - input logic FlushAdrFlag, - input logic FlushWayFlag, + input logic CacheHit, + input logic VictimDirty, + input logic FlushAdrFlag, + input logic FlushWayFlag, // hazard outputs - output logic CacheStall, + output logic CacheStall, // counter outputs - output logic CacheMiss, - output logic CacheAccess, + output logic CacheMiss, + output logic CacheAccess, // Bus outputs - output logic CacheCommitted, - output logic CacheWriteLine, - output logic CacheFetchLine, + output logic CacheCommitted, + output logic CacheWriteLine, + output logic CacheFetchLine, // dcache internals output logic [1:0] SelAdr, - output logic SetValid, - output logic ClearValid, - output logic SetDirty, - output logic ClearDirty, - output logic SRAMWordWriteEnable, - output logic SRAMLineWriteEnable, - output logic SelEvict, - output logic LRUWriteEn, - output logic SelFlush, - output logic FlushAdrCntEn, - output logic FlushWayCntEn, - output logic FlushAdrCntRst, - output logic FlushWayCntRst, - output logic VDWriteEnable - - ); + output logic SetValid, + output logic ClearValid, + output logic SetDirty, + output logic ClearDirty, + output logic FSMWordWriteEn, + output logic FSMLineWriteEn, + output logic SelEvict, + output logic LRUWriteEn, + output logic SelFlush, + output logic FlushAdrCntEn, + output logic FlushWayCntEn, + output logic FlushAdrCntRst, + output logic FlushWayCntRst, + output logic save, + output logic restore); - logic AnyCPUReqM; logic [1:0] PreSelAdr; logic resetDelay; - + logic DoAMO, DoRead, DoWrite, DoFlush; + logic DoAMOHit, DoReadHit, DoWriteHit; + logic DoAMOMiss, DoReadMiss, DoWriteMiss; + logic FlushFlag; + typedef enum {STATE_READY, STATE_MISS_FETCH_WDV, @@ -92,8 +94,8 @@ module cachefsm STATE_MISS_READ_WORD_DELAY, STATE_MISS_WRITE_WORD, - STATE_CPU_BUSY, // *** Ross will change - STATE_CPU_BUSY_FINISH_AMO, // *** Ross will change + STATE_CPU_BUSY, + STATE_CPU_BUSY_FINISH_AMO, STATE_FLUSH, STATE_FLUSH_CHECK, @@ -103,11 +105,22 @@ module cachefsm (* mark_debug = "true" *) statetype CurrState, NextState; - assign AnyCPUReqM = |RW | (|Atomic); + assign DoFlush = FlushCache & ~IgnoreRequest; // *** have to fix ignorerequest timing path + assign DoAMO = Atomic[1] & (&RW) & ~IgnoreRequest; // *** + assign DoAMOHit = DoAMO & CacheHit; + assign DoAMOMiss = DoAMO & ~CacheHit; + assign DoRead = RW[1] & ~IgnoreRequest; // *** + assign DoReadHit = DoRead & CacheHit; + assign DoReadMiss = DoRead & ~CacheHit; + assign DoWrite = RW[0] & ~IgnoreRequest; // *** + assign DoWriteHit = DoWrite & CacheHit; + assign DoWriteMiss = DoWrite & ~CacheHit; + + assign FlushFlag = FlushAdrFlag & FlushWayFlag; // outputs for the performance counters. - assign CacheAccess = AnyCPUReqM & CurrState == STATE_READY; - assign CacheMiss = CacheAccess & ~CacheHit; + assign CacheAccess = (DoAMO | DoRead | DoWrite) & CurrState == STATE_READY; + assign CacheMiss = CacheAccess & ~CacheHit; // special case on reset. When the fsm first exists reset the // PCNextF will no longer be pointing to the correct address. @@ -119,317 +132,118 @@ module cachefsm if (reset) CurrState <= #1 STATE_READY; else CurrState <= #1 NextState; - // next state logic and some state ouputs. - // *** Ross simplify: factor out next state and output logic always_comb begin - CacheStall = 1'b0; - PreSelAdr = 2'b00; - SetValid = 1'b0; - ClearValid = 1'b0; - SetDirty = 1'b0; - ClearDirty = 1'b0; - SRAMWordWriteEnable = 1'b0; - SRAMLineWriteEnable = 1'b0; - SelEvict = 1'b0; - LRUWriteEn = 1'b0; - SelFlush = 1'b0; - FlushAdrCntEn = 1'b0; - FlushWayCntEn = 1'b0; - FlushAdrCntRst = 1'b0; - FlushWayCntRst = 1'b0; - VDWriteEnable = 1'b0; NextState = STATE_READY; - CacheFetchLine = 1'b0; - CacheWriteLine = 1'b0; - case (CurrState) - STATE_READY: begin - - CacheStall = 1'b0; - PreSelAdr = 2'b00; - SRAMWordWriteEnable = 1'b0; - SetDirty = 1'b0; - LRUWriteEn = 1'b0; - - // TLB Miss - if(IgnoreRequest) begin - // the LSU arbiter has not yet selected the PTW. - // The CPU needs to be stalled until that happens. - // If we set CacheStall for 1 cycle before going to - // PTW ready the CPU will stall. - // The page table walker asserts it's control 1 cycle - // after the TLBs miss. - PreSelAdr = 2'b01; - NextState = STATE_READY; - end - - // Flush dcache to next level of memory - else if(FlushCache) begin - NextState = STATE_FLUSH; - FlushAdrCntRst = 1'b1; - FlushWayCntRst = 1'b1; - CacheStall = 1'b1; - end - - // amo hit - else if(Atomic[1] & (&RW) & CacheHit) begin - PreSelAdr = 2'b01; - CacheStall = 1'b0; - - if(CPUBusy) begin - NextState = STATE_CPU_BUSY_FINISH_AMO; - PreSelAdr = 2'b01; - end - else begin - SRAMWordWriteEnable = 1'b1; - SetDirty = 1'b1; - LRUWriteEn = 1'b1; - NextState = STATE_READY; - end - end - // read hit valid cached - else if(RW[1] & CacheHit) begin - CacheStall = 1'b0; - LRUWriteEn = 1'b1; - - if(CPUBusy) begin - NextState = STATE_CPU_BUSY; - PreSelAdr = 2'b01; - end - else begin - NextState = STATE_READY; - end - end - // write hit valid cached - else if (RW[0] & CacheHit) begin - PreSelAdr = 2'b01; - CacheStall = 1'b0; - SRAMWordWriteEnable = 1'b1; - SetDirty = 1'b1; - LRUWriteEn = 1'b1; - - if(CPUBusy) begin - NextState = STATE_CPU_BUSY; - PreSelAdr = 2'b01; - end - else begin - NextState = STATE_READY; - end - end - // read or write miss valid cached - else if((|RW) & ~CacheHit) begin - NextState = STATE_MISS_FETCH_WDV; - CacheStall = 1'b1; - CacheFetchLine = 1'b1; - end - else NextState = STATE_READY; - end - - STATE_MISS_FETCH_WDV: begin - CacheStall = 1'b1; - PreSelAdr = 2'b01; - - if (CacheBusAck) begin - NextState = STATE_MISS_FETCH_DONE; - end else begin - NextState = STATE_MISS_FETCH_WDV; - end - end - - STATE_MISS_FETCH_DONE: begin - CacheStall = 1'b1; - PreSelAdr = 2'b01; - if(VictimDirty) begin - NextState = STATE_MISS_EVICT_DIRTY; - CacheWriteLine = 1'b1; - end else begin - NextState = STATE_MISS_WRITE_CACHE_LINE; - end - end - - STATE_MISS_WRITE_CACHE_LINE: begin - SRAMLineWriteEnable = 1'b1; - CacheStall = 1'b1; - NextState = STATE_MISS_READ_WORD; - PreSelAdr = 2'b01; - SetValid = 1'b1; - ClearDirty = 1'b1; - //LRUWriteEn = 1'b1; // DO not update LRU on SRAM fetch update. Wait for subsequent read/write - end - - STATE_MISS_READ_WORD: begin - PreSelAdr = 2'b01; - CacheStall = 1'b1; - if (RW[0] & ~Atomic[1]) begin // handles stores and amo write. - NextState = STATE_MISS_WRITE_WORD; - end else begin - NextState = STATE_MISS_READ_WORD_DELAY; - // delay state is required as the read signal RW[1] is still high when we - // return to the ready state because the cache is stalling the cpu. - end - end - - STATE_MISS_READ_WORD_DELAY: begin - //PreSelAdr = 2'b01; - SRAMWordWriteEnable = 1'b0; - SetDirty = 1'b0; - LRUWriteEn = 1'b0; - if(&RW & Atomic[1]) begin // amo write - PreSelAdr = 2'b01; - if(CPUBusy) begin - NextState = STATE_CPU_BUSY_FINISH_AMO; - end - else begin - SRAMWordWriteEnable = 1'b1; - SetDirty = 1'b1; - LRUWriteEn = 1'b1; - NextState = STATE_READY; - end - end else begin - LRUWriteEn = 1'b1; - if(CPUBusy) begin - NextState = STATE_CPU_BUSY; - PreSelAdr = 2'b01; - end - else begin - NextState = STATE_READY; - end - end - end - - STATE_MISS_WRITE_WORD: begin - SRAMWordWriteEnable = 1'b1; - SetDirty = 1'b1; - PreSelAdr = 2'b01; - LRUWriteEn = 1'b1; - if(CPUBusy) begin - NextState = STATE_CPU_BUSY; - PreSelAdr = 2'b01; - end - else begin - NextState = STATE_READY; - end - end - - STATE_MISS_EVICT_DIRTY: begin - CacheStall = 1'b1; - PreSelAdr = 2'b01; - SelEvict = 1'b1; - if(CacheBusAck) begin - NextState = STATE_MISS_WRITE_CACHE_LINE; - end else begin - NextState = STATE_MISS_EVICT_DIRTY; - end - end - - - STATE_CPU_BUSY: begin - PreSelAdr = 2'b00; - if(CPUBusy) begin - NextState = STATE_CPU_BUSY; - PreSelAdr = 2'b01; - end - else begin - NextState = STATE_READY; - end - end - - STATE_CPU_BUSY_FINISH_AMO: begin - PreSelAdr = 2'b01; - SRAMWordWriteEnable = 1'b0; - SetDirty = 1'b0; - LRUWriteEn = 1'b0; - if(CPUBusy) begin - NextState = STATE_CPU_BUSY_FINISH_AMO; - end - else begin - SRAMWordWriteEnable = 1'b1; - SetDirty = 1'b1; - LRUWriteEn = 1'b1; - NextState = STATE_READY; - end - end - - STATE_FLUSH: begin - // intialize flush counters - SelFlush = 1'b1; - CacheStall = 1'b1; - PreSelAdr = 2'b10; - NextState = STATE_FLUSH_CHECK; - end - - STATE_FLUSH_CHECK: begin - CacheStall = 1'b1; - PreSelAdr = 2'b10; - SelFlush = 1'b1; - if(VictimDirty) begin - NextState = STATE_FLUSH_WRITE_BACK; - FlushWayCntEn = 1'b0; - CacheWriteLine = 1'b1; - end else if (FlushAdrFlag & FlushWayFlag) begin - NextState = STATE_READY; - CacheStall = 1'b0; - PreSelAdr = 2'b00; - FlushWayCntEn = 1'b0; - end else if(FlushWayFlag) begin - NextState = STATE_FLUSH_INCR; - FlushAdrCntEn = 1'b1; - - FlushWayCntEn = 1'b1; - end else begin - FlushWayCntEn = 1'b1; - NextState = STATE_FLUSH_CHECK; - end - end - - STATE_FLUSH_INCR: begin - CacheStall = 1'b1; - PreSelAdr = 2'b10; - SelFlush = 1'b1; - FlushWayCntRst = 1'b1; - NextState = STATE_FLUSH_CHECK; - end - - STATE_FLUSH_WRITE_BACK: begin - CacheStall = 1'b1; - PreSelAdr = 2'b10; - SelFlush = 1'b1; - if(CacheBusAck) begin - NextState = STATE_FLUSH_CLEAR_DIRTY; - end else begin - NextState = STATE_FLUSH_WRITE_BACK; - end - end - - STATE_FLUSH_CLEAR_DIRTY: begin - CacheStall = 1'b1; - ClearDirty = 1'b1; - VDWriteEnable = 1'b1; - SelFlush = 1'b1; - PreSelAdr = 2'b10; - FlushWayCntEn = 1'b0; - if(FlushAdrFlag & FlushWayFlag) begin - NextState = STATE_READY; - CacheStall = 1'b0; - PreSelAdr = 2'b00; - end else if (FlushWayFlag) begin - NextState = STATE_FLUSH_INCR; - FlushAdrCntEn = 1'b1; - - FlushWayCntEn = 1'b1; - end else begin - NextState = STATE_FLUSH_CHECK; - FlushWayCntEn = 1'b1; - end - end - - default: begin - NextState = STATE_READY; - end + STATE_READY: if(DoFlush) NextState = STATE_FLUSH; + else if(DoAMOHit & CPUBusy) NextState = STATE_CPU_BUSY_FINISH_AMO; + else if(DoReadHit & CPUBusy) NextState = STATE_CPU_BUSY; + else if (DoWriteHit & CPUBusy) NextState = STATE_CPU_BUSY; + else if(DoReadMiss | DoWriteMiss | DoAMOMiss) NextState = STATE_MISS_FETCH_WDV; + else NextState = STATE_READY; + STATE_MISS_FETCH_WDV: if (CacheBusAck) NextState = STATE_MISS_FETCH_DONE; + else NextState = STATE_MISS_FETCH_WDV; + STATE_MISS_FETCH_DONE: if(VictimDirty) NextState = STATE_MISS_EVICT_DIRTY; + else NextState = STATE_MISS_WRITE_CACHE_LINE; + STATE_MISS_WRITE_CACHE_LINE: NextState = STATE_MISS_READ_WORD; + STATE_MISS_READ_WORD: if (DoWrite & ~DoAMO) NextState = STATE_MISS_WRITE_WORD; + else NextState = STATE_MISS_READ_WORD_DELAY; + STATE_MISS_READ_WORD_DELAY: if(DoAMO & CPUBusy) NextState = STATE_CPU_BUSY_FINISH_AMO; + else if(CPUBusy) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + STATE_MISS_WRITE_WORD: if(CPUBusy) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + STATE_MISS_EVICT_DIRTY: if(CacheBusAck) NextState = STATE_MISS_WRITE_CACHE_LINE; + else NextState = STATE_MISS_EVICT_DIRTY; + STATE_CPU_BUSY: if(CPUBusy) NextState = STATE_CPU_BUSY; + else NextState = STATE_READY; + STATE_CPU_BUSY_FINISH_AMO: if(CPUBusy) NextState = STATE_CPU_BUSY_FINISH_AMO; + else NextState = STATE_READY; + STATE_FLUSH: NextState = STATE_FLUSH_CHECK; + STATE_FLUSH_CHECK: if(VictimDirty) NextState = STATE_FLUSH_WRITE_BACK; + else if (FlushFlag) NextState = STATE_READY; + else if(FlushWayFlag) NextState = STATE_FLUSH_INCR; + else NextState = STATE_FLUSH_CHECK; + STATE_FLUSH_INCR: NextState = STATE_FLUSH_CHECK; + STATE_FLUSH_WRITE_BACK: if(CacheBusAck) NextState = STATE_FLUSH_CLEAR_DIRTY; + else NextState = STATE_FLUSH_WRITE_BACK; + STATE_FLUSH_CLEAR_DIRTY: if(FlushAdrFlag & FlushWayFlag) NextState = STATE_READY; + else if (FlushWayFlag) NextState = STATE_FLUSH_INCR; + else NextState = STATE_FLUSH_CHECK; + default: NextState = STATE_READY; endcase end + // com back to CPU assign CacheCommitted = CurrState != STATE_READY; + assign CacheStall = (CurrState == STATE_READY & (DoFlush | DoAMOMiss | DoReadMiss | DoWriteMiss)) | + (CurrState == STATE_MISS_FETCH_WDV) | + (CurrState == STATE_MISS_FETCH_DONE) | + (CurrState == STATE_MISS_WRITE_CACHE_LINE) | + (CurrState == STATE_MISS_READ_WORD) | + (CurrState == STATE_MISS_EVICT_DIRTY) | + (CurrState == STATE_FLUSH) | + (CurrState == STATE_FLUSH_CHECK & ~(FlushFlag)) | + (CurrState == STATE_FLUSH_INCR) | + (CurrState == STATE_FLUSH_WRITE_BACK) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushFlag)); + // write enables internal to cache + assign SetValid = CurrState == STATE_MISS_WRITE_CACHE_LINE; + assign ClearValid = '0; + assign SetDirty = (CurrState == STATE_READY & DoAMO) | + (CurrState == STATE_READY & DoWrite) | + (CurrState == STATE_MISS_READ_WORD_DELAY & DoAMO) | + (CurrState == STATE_MISS_WRITE_WORD); + assign ClearDirty = (CurrState == STATE_MISS_WRITE_CACHE_LINE) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY); + assign FSMWordWriteEn = (CurrState == STATE_READY & (DoAMOHit | DoWriteHit)) | + (CurrState == STATE_MISS_READ_WORD_DELAY & DoAMO) | + (CurrState == STATE_MISS_WRITE_WORD); + assign FSMLineWriteEn = (CurrState == STATE_MISS_WRITE_CACHE_LINE); + assign LRUWriteEn = (CurrState == STATE_READY & (DoAMOHit | DoReadHit | DoWriteHit)) | + (CurrState == STATE_MISS_READ_WORD_DELAY) | + (CurrState == STATE_MISS_WRITE_WORD); + // Flush and eviction controls + assign SelEvict = (CurrState == STATE_MISS_EVICT_DIRTY); + assign SelFlush = (CurrState == STATE_FLUSH) | (CurrState == STATE_FLUSH_CHECK) | + (CurrState == STATE_FLUSH_INCR) | (CurrState == STATE_FLUSH_WRITE_BACK) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY); + assign FlushAdrCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & FlushWayFlag & ~FlushAdrFlag) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY & FlushWayFlag & ~FlushAdrFlag); + assign FlushWayCntEn = (CurrState == STATE_FLUSH_CHECK & ~VictimDirty & ~(FlushFlag)) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushFlag)); + assign FlushAdrCntRst = (CurrState == STATE_READY & DoFlush); + assign FlushWayCntRst = (CurrState == STATE_READY & DoFlush) | (CurrState == STATE_FLUSH_INCR); + // Bus interface controls + assign CacheFetchLine = (CurrState == STATE_READY & (DoAMOMiss | DoWriteMiss | DoReadMiss)); + assign CacheWriteLine = (CurrState == STATE_MISS_FETCH_DONE & VictimDirty) | + (CurrState == STATE_FLUSH_CHECK & VictimDirty); + // handle cpu stall. + assign restore = ((CurrState == STATE_CPU_BUSY) | (CurrState == STATE_CPU_BUSY_FINISH_AMO)) & ~`REPLAY; + assign save = ((CurrState == STATE_READY & (DoAMOHit | DoReadHit | DoWriteHit) & CPUBusy) | + (CurrState == STATE_MISS_READ_WORD_DELAY & (DoAMO | DoRead) & CPUBusy) | + (CurrState == STATE_MISS_WRITE_WORD & DoWrite & CPUBusy)) & ~`REPLAY; + // **** can this be simplified? + assign PreSelAdr = ((CurrState == STATE_READY & IgnoreRequest) | // *** ignorerequest comes from TrapM. Have to fix. why is ignorerequest here anyway? + (CurrState == STATE_READY & DoAMOHit) | // also depends on ignorerequest + (CurrState == STATE_READY & DoReadHit & (CPUBusy & `REPLAY)) | + (CurrState == STATE_READY & DoWriteHit) | + (CurrState == STATE_MISS_FETCH_WDV) | + (CurrState == STATE_MISS_FETCH_DONE) | + (CurrState == STATE_MISS_WRITE_CACHE_LINE) | + (CurrState == STATE_MISS_READ_WORD) | + (CurrState == STATE_MISS_READ_WORD_DELAY & (DoAMO | (CPUBusy & `REPLAY))) | // *** + (CurrState == STATE_MISS_WRITE_WORD) | + (CurrState == STATE_MISS_EVICT_DIRTY) | + (CurrState == STATE_CPU_BUSY & (CPUBusy & `REPLAY)) | + (CurrState == STATE_CPU_BUSY_FINISH_AMO)) ? 2'b01 : + ((CurrState == STATE_FLUSH) | + (CurrState == STATE_FLUSH_CHECK & ~(VictimDirty & FlushFlag)) | + (CurrState == STATE_FLUSH_INCR) | + (CurrState == STATE_FLUSH_WRITE_BACK) | + (CurrState == STATE_FLUSH_CLEAR_DIRTY & ~(FlushFlag))) ? 2'b10 : + 2'b00; + + endmodule // cachefsm - diff --git a/pipelined/src/cache/cacheway.sv b/pipelined/src/cache/cacheway.sv index 0c623b8d4..d9f468a92 100644 --- a/pipelined/src/cache/cacheway.sv +++ b/pipelined/src/cache/cacheway.sv @@ -32,30 +32,32 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, parameter OFFSETLEN = 5, parameter INDEXLEN = 9, parameter DIRTY_BITS = 1) ( - input logic clk, - input logic reset, + input logic clk, + input logic reset, - input logic [$clog2(NUMLINES)-1:0] RAdr, - input logic [`PA_BITS-1:0] PAdr, - input logic WriteEnable, - input logic VDWriteEnable, - input logic [LINELEN/`XLEN-1:0] WriteWordEnable, - input logic TagWriteEnable, - input logic [LINELEN-1:0] WriteData, - input logic SetValid, - input logic ClearValid, - input logic SetDirty, - input logic ClearDirty, - input logic SelEvict, - input logic Victim, - input logic InvalidateAll, - input logic SelFlush, - input logic Flush, + input logic [$clog2(NUMLINES)-1:0] RAdr, + input logic [`PA_BITS-1:0] PAdr, + input logic WriteWordEn, + input logic WriteLineEn, + input logic [LINELEN-1:0] WriteData, + input logic SetValid, + input logic ClearValid, + input logic SetDirty, + input logic ClearDirty, + input logic SelEvict, + input logic Victim, + input logic InvalidateAll, + input logic SelFlush, + input logic Flush, - output logic [LINELEN-1:0] SelectedReadDataLine, - output logic WayHit, - output logic VictimDirty, - output logic [TAGLEN-1:0] VictimTag); + output logic [LINELEN-1:0] SelectedReadDataLine, + output logic WayHit, + output logic VictimDirty, + output logic [TAGLEN-1:0] VictimTag); + + localparam WORDSPERLINE = LINELEN/`XLEN; + localparam LOGWPL = $clog2(WORDSPERLINE); + localparam LOGXLENBYTES = $clog2(`XLEN/8); logic [NUMLINES-1:0] ValidBits; logic [NUMLINES-1:0] DirtyBits; @@ -64,20 +66,31 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, logic Valid; logic Dirty; logic SelData; - logic SelTag; + logic SelTag; logic [$clog2(NUMLINES)-1:0] RAdrD; logic SetValidD, ClearValidD; logic SetDirtyD, ClearDirtyD; - logic WriteEnableD, VDWriteEnableD; + + logic [2**LOGWPL-1:0] MemPAdrDecoded; + logic [LINELEN/`XLEN-1:0] SelectedWriteWordEn; + + ///////////////////////////////////////////////////////////////////////////////////////////// + // Write Enable demux + ///////////////////////////////////////////////////////////////////////////////////////////// + onehotdecoder #(LOGWPL) adrdec( + .bin(PAdr[LOGWPL+LOGXLENBYTES-1:LOGXLENBYTES]), .decoded(MemPAdrDecoded)); + // If writing the whole line set all write enables to 1, else only set the correct word. + assign SelectedWriteWordEn = WriteLineEn ? '1 : WriteWordEn ? MemPAdrDecoded : '0; // OR-AND + ///////////////////////////////////////////////////////////////////////////////////////////// // Tag Array ///////////////////////////////////////////////////////////////////////////////////////////// sram1rw #(.DEPTH(NUMLINES), .WIDTH(TAGLEN)) CacheTagMem(.clk(clk), .Adr(RAdr), .ReadData(ReadTag), - .WriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(TagWriteEnable)); + .WriteData(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .WriteEnable(WriteLineEn)); // AND portion of distributed tag multiplexer assign SelTag = SelFlush ? Flush : Victim; @@ -94,7 +107,7 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, sram1rw #(.DEPTH(NUMLINES), .WIDTH(`XLEN)) CacheDataMem(.clk(clk), .Adr(RAdr), .ReadData(ReadDataLine[(words+1)*`XLEN-1:words*`XLEN] ), .WriteData(WriteData[(words+1)*`XLEN-1:words*`XLEN]), - .WriteEnable(WriteEnable & WriteWordEnable[words])); + .WriteEnable(SelectedWriteWordEn[words])); end // AND portion of distributed read multiplexers @@ -108,13 +121,12 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, always_ff @(posedge clk) begin // Valid bit array, if (reset | InvalidateAll) ValidBits <= #1 '0; - else if (SetValidD & (WriteEnableD | VDWriteEnableD)) ValidBits[RAdrD] <= #1 1'b1; - else if (ClearValidD & (WriteEnableD | VDWriteEnableD)) ValidBits[RAdrD] <= #1 1'b0; + else if (SetValid) ValidBits[RAdr] <= #1 1'b1; + else if (ClearValid) ValidBits[RAdr] <= #1 1'b0; end // *** consider revisiting whether these delays are the best option? flop #($clog2(NUMLINES)) RAdrDelayReg(clk, RAdr, RAdrD); - flop #(4) ValidCtrlDelayReg(clk, {SetValid, ClearValid, WriteEnable, VDWriteEnable}, - {SetValidD, ClearValidD, WriteEnableD, VDWriteEnableD}); + //flop #(2) ValidCtrlDelayReg(clk, {SetValid, ClearValid}, {SetValidD, ClearValidD}); assign Valid = ValidBits[RAdrD]; ///////////////////////////////////////////////////////////////////////////////////////////// @@ -125,12 +137,13 @@ module cacheway #(parameter NUMLINES=512, parameter LINELEN = 256, TAGLEN = 26, if (DIRTY_BITS) begin:dirty always_ff @(posedge clk) begin if (reset) DirtyBits <= #1 {NUMLINES{1'b0}}; - else if (SetDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= #1 1'b1; - else if (ClearDirtyD & (WriteEnableD | VDWriteEnableD)) DirtyBits[RAdrD] <= #1 1'b0; + else if (SetDirty) DirtyBits[RAdr] <= #1 1'b1; + else if (ClearDirty) DirtyBits[RAdr] <= #1 1'b0; end flop #(2) DirtyCtlDelayReg(clk, {SetDirty, ClearDirty}, {SetDirtyD, ClearDirtyD}); assign Dirty = DirtyBits[RAdrD]; end else assign Dirty = 1'b0; + endmodule diff --git a/pipelined/src/cache/sram1rw.sv b/pipelined/src/cache/sram1rw.sv index b17aa20d3..41dcbee04 100644 --- a/pipelined/src/cache/sram1rw.sv +++ b/pipelined/src/cache/sram1rw.sv @@ -41,21 +41,25 @@ module sram1rw #(parameter DEPTH=128, WIDTH=256) ( output logic [WIDTH-1:0] ReadData); logic [WIDTH-1:0] StoredData[DEPTH-1:0]; - logic [$clog2(DEPTH)-1:0] AddrD; + logic [$clog2(DEPTH)-1:0] AdrD; logic [WIDTH-1:0] WriteDataD; logic WriteEnableD; //*** model as single port + // *** merge with simpleram always_ff @(posedge clk) begin - AddrD <= Adr; - WriteDataD <= WriteData; /// ****** this is not right. there should not need to be a delay. Implement alternative cache stall to avoid this. Eliminates a bunch of delay flops elsewhere - WriteEnableD <= WriteEnable; - if (WriteEnableD) begin - StoredData[AddrD] <= #1 WriteDataD; - end + AdrD <= Adr; + //WriteDataD <= WriteData; /// ****** this is not right. there should not need to be a delay. Implement alternative cache stall to avoid this. Eliminates a bunch of delay flops elsewhere + //WriteEnableD <= WriteEnable; + //if (WriteEnableD) begin + //StoredData[AddrD] <= #1 WriteDataD; + //end + if (WriteEnable) begin + StoredData[Adr] <= #1 WriteData; + end end - assign ReadData = StoredData[AddrD]; + assign ReadData = StoredData[AdrD]; /* always_ff @(posedge clk) begin ReadData <= RAM[Adr]; diff --git a/pipelined/src/cache/subcachelineread.sv b/pipelined/src/cache/subcachelineread.sv new file mode 100644 index 000000000..111ec506f --- /dev/null +++ b/pipelined/src/cache/subcachelineread.sv @@ -0,0 +1,69 @@ +/////////////////////////////////////////// +// subcachelineread +// +// Written: Ross Thompson ross1728@gmail.com February 04, 2022 +// Muxes the cache line downto the word size. Also include possilbe save/restore registers/muxes. +// +// Purpose: Controller for the dcache fsm +// +// A component of the Wally configurable RISC-V project. +// +// Copyright (C) 2021 Harvey Mudd College & Oklahoma State University +// +// MIT LICENSE +// Permission is hereby granted, free of charge, to any person obtaining a copy of this +// software and associated documentation files (the "Software"), to deal in the Software +// without restriction, including without limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +// to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or +// substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +// BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +// OR OTHER DEALINGS IN THE SOFTWARE. +//////////////////////////////////////////////////////////////////////////////////////////////// + +`include "wally-config.vh" + +module subcachelineread #(parameter LINELEN, WORDLEN, MUXINTERVAL)( + input logic clk, + input logic reset, + input logic [`PA_BITS-1:0] PAdr, + input logic save, restore, + input logic [LINELEN-1:0] ReadDataLine, + output logic [WORDLEN-1:0] ReadDataWord); + + localparam WORDSPERLINE = LINELEN/MUXINTERVAL; + localparam PADLEN = WORDLEN-MUXINTERVAL; + // Convert the Read data bus ReadDataSelectWay into sets of XLEN so we can + // easily build a variable input mux. + // *** move this to LSU and IFU, also remove mux from busdp into LSU. + // *** give this a module name to match block diagram + logic [LINELEN+(WORDLEN-MUXINTERVAL)-1:0] ReadDataLinePad; + logic [WORDLEN-1:0] ReadDataLineSets [(LINELEN/MUXINTERVAL)-1:0]; + logic [WORDLEN-1:0] ReadDataWordRaw, ReadDataWordSaved; + + if (PADLEN > 0) begin + logic [PADLEN-1:0] Pad; + assign Pad = '0; + assign ReadDataLinePad = {Pad, ReadDataLine}; + end else assign ReadDataLinePad = ReadDataLine; + + + genvar index; + for (index = 0; index < WORDSPERLINE; index++) begin:readdatalinesetsmux + assign ReadDataLineSets[index] = ReadDataLinePad[(index*MUXINTERVAL)+WORDLEN-1: (index*MUXINTERVAL)]; + end + // variable input mux + assign ReadDataWordRaw = ReadDataLineSets[PAdr[$clog2(LINELEN/8) - 1 : $clog2(MUXINTERVAL/8)]]; + if(!`REPLAY) begin + flopen #(WORDLEN) cachereaddatasavereg(clk, save, ReadDataWordRaw, ReadDataWordSaved); + mux2 #(WORDLEN) readdatasaverestoremux(ReadDataWordRaw, ReadDataWordSaved, + restore, ReadDataWord); + end else assign ReadDataWord = ReadDataWordRaw; +endmodule diff --git a/pipelined/src/generic/flop/simpleram.sv b/pipelined/src/generic/flop/simpleram.sv index 43b873567..3ad367bd5 100644 --- a/pipelined/src/generic/flop/simpleram.sv +++ b/pipelined/src/generic/flop/simpleram.sv @@ -40,18 +40,14 @@ module simpleram #(parameter BASE=0, RANGE = 65535) ( logic [`XLEN-1:0] RAM[BASE>>(1+`XLEN/32):(RANGE+BASE)>>1+(`XLEN/32)]; - /* verilator lint_off WIDTH */ - if (`XLEN == 64) begin:ramrw - always_ff @(posedge clk) begin - rd <= RAM[a[31:3]]; - if (we) RAM[a[31:3]] <= #1 wd; - end - end else begin - always_ff @(posedge clk) begin:ramrw - rd <= RAM[a[31:2]]; - if (we) RAM[a[31:2]] <= #1 wd; - end + // discard bottom 2 or 3 bits of address offset within word or doubleword + localparam adrlsb = (`XLEN==64) ? 3 : 2; + logic [31:adrlsb] adrmsbs; + assign adrmsbs = a[31:adrlsb]; + + always_ff @(posedge clk) begin + rd <= RAM[adrmsbs]; + if (we) RAM[adrmsbs] <= #1 wd; end - /* verilator lint_on WIDTH */ endmodule diff --git a/pipelined/src/ifu/ifu.sv b/pipelined/src/ifu/ifu.sv index aa46cd24d..f7b96cd60 100644 --- a/pipelined/src/ifu/ifu.sv +++ b/pipelined/src/ifu/ifu.sv @@ -92,7 +92,7 @@ module ifu ( logic [`XLEN-3:0] PCPlusUpperF; logic CompressedF; logic [31:0] InstrRawD, InstrRawF; - logic [`XLEN-1:0] FinalInstrRawF; + logic [31:0] FinalInstrRawF; logic [31:0] InstrE; logic [`XLEN-1:0] PCD; @@ -113,6 +113,8 @@ module ifu ( logic ICacheStallF, IFUCacheBusStallF; logic CPUBusy; (* mark_debug = "true" *) logic [31:0] PostSpillInstrRawF; + // branch predictor signal + logic [`XLEN-1:0] PCNext1F, PCNext2F, PCNext0F; assign PCFExt = {2'b00, PCFSpill}; @@ -180,23 +182,27 @@ module ifu ( end else begin : bus localparam integer WORDSPERLINE = (`IMEM == `MEM_CACHE) ? `ICACHE_LINELENINBITS/`XLEN : 1; localparam integer LINELEN = (`IMEM == `MEM_CACHE) ? `ICACHE_LINELENINBITS : `XLEN; + localparam integer LOGWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1; + logic [LINELEN-1:0] ReadDataLine; logic [LINELEN-1:0] ICacheMemWriteData; logic [`PA_BITS-1:0] ICacheBusAdr; logic ICacheBusAck; - - - busdp #(WORDSPERLINE, LINELEN) + logic save,restore; + logic [31:0] temp; + + busdp #(WORDSPERLINE, LINELEN, 32, LOGWPL) busdp(.clk, .reset, .LSUBusHRDATA(IFUBusHRDATA), .LSUBusAck(IFUBusAck), .LSUBusWrite(), - .LSUBusRead(IFUBusRead), .LSUBusHWDATA(), .LSUBusSize(), + .LSUBusRead(IFUBusRead), .LSUBusSize(), .LSUFunct3M(3'b010), .LSUBusAdr(IFUBusAdr), .DCacheBusAdr(ICacheBusAdr), - .ReadDataLineSetsM(), .DCacheFetchLine(ICacheFetchLine), + .WordCount(), .LSUBusHWDATA(), + .DCacheFetchLine(ICacheFetchLine), .DCacheWriteLine(1'b0), .DCacheBusAck(ICacheBusAck), .DCacheMemWriteData(ICacheMemWriteData), .LSUPAdrM(PCPF), - .FinalAMOWriteDataM(), .ReadDataWordM(FinalInstrRawF), .ReadDataWordMuxM(AllInstrRawF), + .FinalAMOWriteDataM(), .ReadDataWordM(FinalInstrRawF), .ReadDataWordMuxM(AllInstrRawF[31:0]), .IgnoreRequest(ITLBMissF), .LSURWM(2'b10), .CPUBusy, .CacheableM(CacheableF), .BusStall, .BusCommittedM()); - + if(`IMEM == `MEM_CACHE) begin : icache logic [1:0] IFURWF; assign IFURWF = CacheableF ? 2'b10 : 2'b00; @@ -207,8 +213,9 @@ module ifu ( icache(.clk, .reset, .CPUBusy, .IgnoreRequest(ITLBMissF), .CacheMemWriteData(ICacheMemWriteData), .CacheBusAck(ICacheBusAck), .CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF), - .ReadDataWord(FinalInstrRawF), .CacheFetchLine(ICacheFetchLine), - .CacheWriteLine(), .ReadDataLineSets(), + .CacheFetchLine(ICacheFetchLine), + .CacheWriteLine(), .ReadDataLine(ReadDataLine), + .save, .restore, .CacheMiss(ICacheMiss), .CacheAccess(ICacheAccess), .FinalWriteData('0), .RW(IFURWF), @@ -217,15 +224,16 @@ module ifu ( .PAdr(PCPF), .CacheCommitted(), .InvalidateCacheM(InvalidateICacheM)); + subcachelineread #(LINELEN, 32, 16) subcachelineread( + .clk, .reset, .PAdr(PCPF), .save, .restore, + .ReadDataLine, .ReadDataWord(FinalInstrRawF)); + end else begin : passthrough assign {ICacheFetchLine, ICacheBusAdr, ICacheStallF, FinalInstrRawF} = '0; assign ICacheAccess = CacheableF; assign ICacheMiss = CacheableF; end end - // branch predictor signal - logic [`XLEN-1:0] PCNext1F, PCNext2F; - assign IFUCacheBusStallF = ICacheStallF | BusStall; assign IFUStallF = IFUCacheBusStallF | SelNextSpillF; assign CPUBusy = StallF & ~SelNextSpillF; @@ -234,8 +242,7 @@ module ifu ( assign PrivilegedChangePCM = RetM | TrapM; - // The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE. - mux2 #(`XLEN) pccorrectemux(.d0(PCLinkE), .d1(IEUAdrE), .s(PCSrcE), .y(PCCorrectE)); + mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F)); mux2 #(`XLEN) pcmux2(.d0(PCNext1F), .d1(PCBPWrongInvalidate), .s(InvalidateICacheM), .y(PCNext2F)); mux2 #(`XLEN) pcmux3(.d0(PCNext2F), .d1(PrivilegedNextPCM), .s(PrivilegedChangePCM), .y(UnalignedPCNextF)); @@ -244,27 +251,28 @@ module ifu ( // branch and jump predictor if (`BPRED_ENABLED) begin : bpred - logic SelBPPredF; - logic [`XLEN-1:0] BPPredPCF, PCNext0F; logic BPPredWrongM; - + logic SelBPPredF; + logic [`XLEN-1:0] BPPredPCF; bpred bpred(.clk, .reset, .StallF, .StallD, .StallE, .StallM, .FlushF, .FlushD, .FlushE, .FlushM, .InstrD, .PCNextF, .BPPredPCF, .SelBPPredF, .PCE, .PCSrcE, .IEUAdrE, .PCD, .PCLinkE, .InstrClassM, .BPPredWrongE, .BPPredWrongM, .BPPredDirWrongM, .BTBPredPCWrongM, .RASPredPCWrongM, .BPPredClassNonCFIWrongM); - + mux2 #(`XLEN) pcmux0(.d0(PCPlus2or4F), .d1(BPPredPCF), .s(SelBPPredF), .y(PCNext0F)); - mux2 #(`XLEN) pcmux1(.d0(PCNext0F), .d1(PCCorrectE), .s(BPPredWrongE), .y(PCNext1F)); // Mux only required on instruction class miss prediction. mux2 #(`XLEN) pcmuxBPWrongInvalidateFlush(.d0(PCE), .d1(PCF), .s(BPPredWrongM), .y(PCBPWrongInvalidate)); - + // The true correct target is IEUAdrE if PCSrcE is 1 else it is the fall through PCLinkE. + mux2 #(`XLEN) pccorrectemux(.d0(PCLinkE), .d1(IEUAdrE), .s(PCSrcE), .y(PCCorrectE)); + end else begin : bpred assign BPPredWrongE = PCSrcE; assign {BPPredDirWrongM, BTBPredPCWrongM, RASPredPCWrongM, BPPredClassNonCFIWrongM} = '0; - assign PCNext1F = PCPlus2or4F; + assign PCNext0F = PCPlus2or4F; + assign PCCorrectE = IEUAdrE; assign PCBPWrongInvalidate = PCE; end diff --git a/pipelined/src/lsu/busdp.sv b/pipelined/src/lsu/busdp.sv index ac54e303f..f4b2bb234 100644 --- a/pipelined/src/lsu/busdp.sv +++ b/pipelined/src/lsu/busdp.sv @@ -34,7 +34,7 @@ `include "wally-config.vh" -module busdp #(parameter WORDSPERLINE, parameter LINELEN) +module busdp #(parameter WORDSPERLINE, LINELEN, WORDLEN, LOGWPL, LSU=0) ( input logic clk, reset, // bus interface @@ -46,10 +46,9 @@ module busdp #(parameter WORDSPERLINE, parameter LINELEN) output logic [2:0] LSUBusSize, input logic [2:0] LSUFunct3M, output logic [`PA_BITS-1:0] LSUBusAdr, - + output logic [LOGWPL-1:0] WordCount, // cache interface. input logic [`PA_BITS-1:0] DCacheBusAdr, - input var logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0], input logic DCacheFetchLine, input logic DCacheWriteLine, output logic DCacheBusAck, @@ -58,8 +57,8 @@ module busdp #(parameter WORDSPERLINE, parameter LINELEN) // lsu interface input logic [`PA_BITS-1:0] LSUPAdrM, input logic [`XLEN-1:0] FinalAMOWriteDataM, - input logic [`XLEN-1:0] ReadDataWordM, - output logic [`XLEN-1:0] ReadDataWordMuxM, + input logic [WORDLEN-1:0] ReadDataWordM, + output logic [WORDLEN-1:0] ReadDataWordMuxM, input logic IgnoreRequest, input logic [1:0] LSURWM, input logic CPUBusy, @@ -69,12 +68,10 @@ module busdp #(parameter WORDSPERLINE, parameter LINELEN) localparam integer WordCountThreshold = (`DMEM == `MEM_CACHE) ? WORDSPERLINE - 1 : 0; - localparam integer LOGWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1; - logic SelUncachedAdr; logic [`XLEN-1:0] PreLSUBusHWDATA; logic [`PA_BITS-1:0] LocalLSUBusAdr; - logic [LOGWPL-1:0] WordCount; + logic SelUncachedAdr; genvar index; for (index = 0; index < WORDSPERLINE; index++) begin:fetchbuffer @@ -85,15 +82,15 @@ module busdp #(parameter WORDSPERLINE, parameter LINELEN) mux2 #(`PA_BITS) localadrmux(DCacheBusAdr, LSUPAdrM, SelUncachedAdr, LocalLSUBusAdr); assign LSUBusAdr = ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) + LocalLSUBusAdr; - assign PreLSUBusHWDATA = ReadDataLineSetsM[WordCount]; // only in lsu, not ifu - mux2 #(`XLEN) lsubushwdatamux( - .d0(PreLSUBusHWDATA), .d1(FinalAMOWriteDataM), .s(SelUncachedAdr), .y(LSUBusHWDATA)); - mux2 #(3) lsubussizemux( - .d0(`XLEN == 32 ? 3'b010 : 3'b011), .d1(LSUFunct3M), .s(SelUncachedAdr), .y(LSUBusSize)); - mux2 #(`XLEN) UnCachedDataMux( - .d0(ReadDataWordM), .d1(DCacheMemWriteData[`XLEN-1:0]), .s(SelUncachedAdr), .y(ReadDataWordMuxM)); + if(LSU == 1) mux2 #(`XLEN) lsubushwdatamux( .d0(ReadDataWordM), .d1(FinalAMOWriteDataM), + .s(SelUncachedAdr), .y(LSUBusHWDATA)); + else assign LSUBusHWDATA = '0; + mux2 #(3) lsubussizemux(.d0(`XLEN == 32 ? 3'b010 : 3'b011), .d1(LSUFunct3M), + .s(SelUncachedAdr), .y(LSUBusSize)); + mux2 #(WORDLEN) UnCachedDataMux(.d0(ReadDataWordM), .d1(DCacheMemWriteData[WORDLEN-1:0]), + .s(SelUncachedAdr), .y(ReadDataWordMuxM)); - busfsm #(WordCountThreshold, LOGWPL, (`DMEM == `MEM_CACHE)) // *** cleanup + busfsm #(WordCountThreshold, LOGWPL, (`DMEM == `MEM_CACHE)) // *** cleanup Icache? must fix. busfsm(.clk, .reset, .IgnoreRequest, .LSURWM, .DCacheFetchLine, .DCacheWriteLine, .LSUBusAck, .CPUBusy, .CacheableM, .BusStall, .LSUBusWrite, .LSUBusRead, .DCacheBusAck, .BusCommittedM, .SelUncachedAdr, .WordCount); diff --git a/pipelined/src/lsu/interlockfsm.sv b/pipelined/src/lsu/interlockfsm.sv index bed28563d..5d53bd776 100644 --- a/pipelined/src/lsu/interlockfsm.sv +++ b/pipelined/src/lsu/interlockfsm.sv @@ -112,7 +112,6 @@ module interlockfsm assign SelHPTW = (InterlockCurrState == STATE_T3_DTLB_MISS) | (InterlockCurrState == STATE_T4_ITLB_MISS) | (InterlockCurrState == STATE_T5_ITLB_MISS) | (InterlockCurrState == STATE_T7_DITLB_MISS); assign IgnoreRequest = (InterlockCurrState == STATE_T0_READY & (ITLBMissF | DTLBMissM | TrapM)) | - ((InterlockCurrState == STATE_T0_REPLAY) - & (TrapM)); + ((InterlockCurrState == STATE_T0_REPLAY) & (TrapM)); endmodule diff --git a/pipelined/src/lsu/lsu.sv b/pipelined/src/lsu/lsu.sv index fdaf56fa9..fe8e4b766 100644 --- a/pipelined/src/lsu/lsu.sv +++ b/pipelined/src/lsu/lsu.sv @@ -173,7 +173,8 @@ module lsu ( logic [`XLEN-1:0] FinalAMOWriteDataM, FinalWriteDataM; logic [`XLEN-1:0] ReadDataWordM; logic [`XLEN-1:0] ReadDataWordMuxM; - + logic SelUncachedAdr; + if (`DMEM == `MEM_TIM) begin : dtim dtim dtim(.clk, .reset, .CPUBusy, .LSURWM, .IEUAdrM, .IEUAdrE, .TrapM, .FinalWriteDataM, .ReadDataWordM, .BusStall, .LSUBusWrite,.LSUBusRead, .BusCommittedM, @@ -183,20 +184,29 @@ module lsu ( end else begin : bus localparam integer WORDSPERLINE = (`DMEM == `MEM_CACHE) ? `DCACHE_LINELENINBITS/`XLEN : 1; localparam integer LINELEN = (`DMEM == `MEM_CACHE) ? `DCACHE_LINELENINBITS : `XLEN; - logic [`XLEN-1:0] ReadDataLineSetsM [WORDSPERLINE-1:0]; + localparam integer LOGWPL = (`DMEM == `MEM_CACHE) ? $clog2(WORDSPERLINE) : 1; + logic [LINELEN-1:0] ReadDataLineM; logic [LINELEN-1:0] DCacheMemWriteData; logic [`PA_BITS-1:0] DCacheBusAdr; logic DCacheWriteLine; logic DCacheFetchLine; logic DCacheBusAck; - - busdp #(WORDSPERLINE, LINELEN) busdp( + logic save, restore; + logic [`PA_BITS-1:0] WordOffsetAddr; + logic SelBus; + logic [LOGWPL-1:0] WordCount; + + busdp #(WORDSPERLINE, LINELEN, `XLEN, LOGWPL, 1) busdp( .clk, .reset, - .LSUBusHRDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusHWDATA, .LSUBusSize, - .LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .ReadDataLineSetsM, .DCacheFetchLine, + .LSUBusHRDATA, .LSUBusHWDATA, .LSUBusAck, .LSUBusWrite, .LSUBusRead, .LSUBusSize, + .WordCount, + .LSUFunct3M, .LSUBusAdr, .DCacheBusAdr, .DCacheFetchLine, .DCacheWriteLine, .DCacheBusAck, .DCacheMemWriteData, .LSUPAdrM, .FinalAMOWriteDataM, .ReadDataWordM, .ReadDataWordMuxM, .IgnoreRequest, .LSURWM, .CPUBusy, .CacheableM, .BusStall, .BusCommittedM); + + assign WordOffsetAddr = LSUBusWrite ? ({{`PA_BITS-LOGWPL{1'b0}}, WordCount} << $clog2(`XLEN/8)) : LSUPAdrM; + if(`DMEM == `MEM_CACHE) begin : dcache cache #(.LINELEN(`DCACHE_LINELENINBITS), .NUMLINES(`DCACHE_WAYSIZEINBYTES*8/LINELEN), @@ -204,13 +214,18 @@ module lsu ( .clk, .reset, .CPUBusy, .RW(CacheableM ? LSURWM : 2'b00), .FlushCache(FlushDCacheM), .Atomic(CacheableM ? LSUAtomicM : 2'b00), .NextAdr(LSUAdrE), .PAdr(LSUPAdrM), - .FinalWriteData(FinalWriteDataM), .ReadDataWord(ReadDataWordM), + .save, .restore, + .FinalWriteData(FinalWriteDataM), .CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess), .IgnoreRequest, .CacheCommitted(DCacheCommittedM), .CacheBusAdr(DCacheBusAdr), - .ReadDataLineSets(ReadDataLineSetsM), .CacheMemWriteData(DCacheMemWriteData), + .ReadDataLine(ReadDataLineM), .CacheMemWriteData(DCacheMemWriteData), .CacheFetchLine(DCacheFetchLine), .CacheWriteLine(DCacheWriteLine), .CacheBusAck(DCacheBusAck), .InvalidateCacheM(1'b0)); + subcachelineread #(LINELEN, `XLEN, `XLEN) subcachelineread( + .clk, .reset, .PAdr(WordOffsetAddr), .save, .restore, + .ReadDataLine(ReadDataLineM), .ReadDataWord(ReadDataWordM)); + end else begin : passthrough assign {ReadDataWordM, DCacheStallM, DCacheCommittedM, DCacheFetchLine, DCacheWriteLine} = '0; assign DCacheMiss = CacheableM; assign DCacheAccess = CacheableM; diff --git a/pipelined/src/uncore/ram.sv b/pipelined/src/uncore/ram.sv index c5268f897..07c080255 100644 --- a/pipelined/src/uncore/ram.sv +++ b/pipelined/src/uncore/ram.sv @@ -59,48 +59,48 @@ module ram #(parameter BASE=0, RANGE = 65535) ( // *** need to address this preload for fpga. It should work as a preload file // but for some reason vivado is not synthesizing the preload. //$readmemh(PRELOAD, RAM); - RAM[0] = 64'h94e1819300002197; - RAM[1] = 64'h4281420141014081; - RAM[2] = 64'h4481440143814301; - RAM[3] = 64'h4681460145814501; - RAM[4] = 64'h4881480147814701; - RAM[5] = 64'h4a814a0149814901; - RAM[6] = 64'h4c814c014b814b01; - RAM[7] = 64'h4e814e014d814d01; - RAM[8] = 64'h0110011b4f814f01; - RAM[9] = 64'h059b45011161016e; - RAM[10] = 64'h0004063705fe0010; - RAM[11] = 64'h05a000ef8006061b; - RAM[12] = 64'h0ff003930000100f; - RAM[13] = 64'h4e952e3110012e37; - RAM[14] = 64'hc602829b0053f2b7; - RAM[15] = 64'h2023fe02dfe312fd; - RAM[16] = 64'h829b0053f2b7007e; - RAM[17] = 64'hfe02dfe312fdc602; - RAM[18] = 64'h4de31efd000e2023; - RAM[19] = 64'h059bf1402573fdd0; - RAM[20] = 64'h0000061705e20870; - RAM[21] = 64'h0010029b01260613; - RAM[22] = 64'h11010002806702fe; - RAM[23] = 64'h84b2842ae426e822; - RAM[24] = 64'h892ee04aec064505; - RAM[25] = 64'h06e000ef07e000ef; - RAM[26] = 64'h979334fd02905563; - RAM[27] = 64'h07930177d4930204; - RAM[28] = 64'h4089093394be2004; - RAM[29] = 64'h04138522008905b3; - RAM[30] = 64'h19e3014000ef2004; - RAM[31] = 64'h64a2644260e2fe94; - RAM[32] = 64'h6749808261056902; - RAM[33] = 64'hdfed8b8510472783; - RAM[34] = 64'h2423479110a73823; - RAM[35] = 64'h10472783674910f7; - RAM[36] = 64'h20058693ffed8b89; - RAM[37] = 64'h05a1118737836749; - RAM[38] = 64'hfed59be3fef5bc23; - RAM[39] = 64'h1047278367498082; - RAM[40] = 64'h67c98082dfed8b85; - RAM[41] = 64'h0000808210a7a023; + RAM[0] = 64'h94e1819300002197; + RAM[1] = 64'h4281420141014081; + RAM[2] = 64'h4481440143814301; + RAM[3] = 64'h4681460145814501; + RAM[4] = 64'h4881480147814701; + RAM[5] = 64'h4a814a0149814901; + RAM[6] = 64'h4c814c014b814b01; + RAM[7] = 64'h4e814e014d814d01; + RAM[8] = 64'h0110011b4f814f01; + RAM[9] = 64'h059b45011161016e; + RAM[10] = 64'h0004063705fe0010; + RAM[11] = 64'h05a000ef8006061b; + RAM[12] = 64'h0ff003930000100f; + RAM[13] = 64'h4e952e3110060e37; + RAM[14] = 64'hc602829b0053f2b7; + RAM[15] = 64'h2023fe02dfe312fd; + RAM[16] = 64'h829b0053f2b7007e; + RAM[17] = 64'hfe02dfe312fdc602; + RAM[18] = 64'h4de31efd000e2023; + RAM[19] = 64'h059bf1402573fdd0; + RAM[20] = 64'h0000061705e20870; + RAM[21] = 64'h0010029b01260613; + RAM[22] = 64'h11010002806702fe; + RAM[23] = 64'h84b2842ae426e822; + RAM[24] = 64'h892ee04aec064505; + RAM[25] = 64'h06e000ef07e000ef; + RAM[26] = 64'h979334fd02905563; + RAM[27] = 64'h07930177d4930204; + RAM[28] = 64'h4089093394be2004; + RAM[29] = 64'h04138522008905b3; + RAM[30] = 64'h19e3014000ef2004; + RAM[31] = 64'h64a2644260e2fe94; + RAM[32] = 64'h6749808261056902; + RAM[33] = 64'hdfed8b8510472783; + RAM[34] = 64'h2423479110a73823; + RAM[35] = 64'h10472783674910f7; + RAM[36] = 64'h20058693ffed8b89; + RAM[37] = 64'h05a1118737836749; + RAM[38] = 64'hfed59be3fef5bc23; + RAM[39] = 64'h1047278367498082; + RAM[40] = 64'h67c98082dfed8b85; + RAM[41] = 64'h0000808210a7a023; end // initial begin end // if (FPGA) diff --git a/pipelined/testbench/tests.vh b/pipelined/testbench/tests.vh index 4a78455ea..9cce5847f 100644 --- a/pipelined/testbench/tests.vh +++ b/pipelined/testbench/tests.vh @@ -1498,12 +1498,12 @@ string imperas32f[] = '{ string wally32e[] = '{ `WALLYTEST, + "rv32i_m/I/E-beq-01", "03b010", "rv32i_m/I/E-add-01", "005010", "rv32i_m/I/E-addi-01", "004010", "rv32i_m/I/E-and-01", "005010", "rv32i_m/I/E-andi-01", "004010", "rv32i_m/I/E-auipc-01", "002010", - "rv32i_m/I/E-beq-01", "03b010", "rv32i_m/I/E-bge-01", "034010", "rv32i_m/I/E-bgeu-01", "047010", "rv32i_m/I/E-blt-01", "038010", diff --git a/tests/testsBP/fpga-test-sdc/Makefile b/tests/testsBP/fpga-test-sdc/Makefile index 0c6d3666c..348a67cd2 100644 --- a/tests/testsBP/fpga-test-sdc/Makefile +++ b/tests/testsBP/fpga-test-sdc/Makefile @@ -106,7 +106,7 @@ $(TARGET).memfile: $(TARGET) @echo 'Making object dump file.' @riscv64-unknown-elf-objdump -D $< > $<.objdump @echo 'Making memory file' - exe2memfile0.pl $< + riscv64-unknown-elf-elf2hex --bit-width 64 --input $^ --output $@ extractFunctionRadix.sh $<.objdump mkdir -p ../../imperas-riscv-tests/work/rv64BP/ cp -f $(TARGETDIR)/* ../../imperas-riscv-tests/work/rv64BP/ diff --git a/tests/testsBP/fpga-test-sdc/bios.s b/tests/testsBP/fpga-test-sdc/bios.s index eab7ae1f4..8057a277d 100644 --- a/tests/testsBP/fpga-test-sdc/bios.s +++ b/tests/testsBP/fpga-test-sdc/bios.s @@ -61,7 +61,7 @@ _start: # write to gpio li t2, 0xFF - la t3, 0x1001200C + la t3, 0x1006000C li t4, 5 loop: diff --git a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/Makefrag b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/Makefrag index eda625075..792ea2e24 100644 --- a/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/Makefrag +++ b/tests/wally-riscv-arch-test/riscv-test-suite/rv32i_m/I/Makefrag @@ -28,6 +28,43 @@ # Description: Makefrag for RV32I architectural tests rv32i_sc_tests = \ + E-add-01 \ + E-addi-01 \ + E-and-01 \ + E-andi-01 \ + E-auipc-01 \ + E-beq-01 \ + E-bge-01 \ + E-bgeu-01 \ + E-blt-01 \ + E-bltu-01 \ + E-bne-01 \ + E-jal-01 \ + E-jalr-01 \ + E-lb-align-01 \ + E-lbu-align-01 \ + E-lh-align-01 \ + E-lhu-align-01 \ + E-lui-01 \ + E-lw-align-01 \ + E-or-01 \ + E-ori-01 \ + E-sb-align-01 \ + E-sh-align-01 \ + E-sll-01 \ + E-slli-01 \ + E-slt-01 \ + E-slti-01 \ + E-sltiu-01 \ + E-sltu-01 \ + E-sra-01 \ + E-srai-01 \ + E-srl-01 \ + E-srli-01 \ + E-sub-01 \ + E-sw-align-01 \ + E-xor-01 \ + E-xori-01 \ WALLY-ADD \ WALLY-SLT \ WALLY-SLTU \