Modified the LSU/IFU and caches to improve critical path. Arty A7 went from 15 to 17Mhz. I believe we can push all the way to 20+Mhz with relatively little effort. Along the way I'm fixing up the scripts build the linux images for the flash card.

This commit is contained in:
Ross Thompson 2023-07-21 13:06:27 -05:00
parent 2752e5de4c
commit d04d2afed2
5 changed files with 22 additions and 12 deletions

View file

@ -106,7 +106,9 @@ set_output_delay -clock [get_clocks clk_out3_xlnx_mmcm] -max -add_delay 6.000 [g
set_output_delay -clock [get_clocks clk_out3_xlnx_mmcm] 0.000 [get_ports SDCCLK]
set_multicycle_path -from [get_pins xlnx_ddr3_c0/u_xlnx_ddr3_mig/u_memc_ui_top_axi/mem_intfc0/ddr_phy_top0/u_ddr_calib_top/init_calib_complete_reg/C] -to [get_pins xlnx_proc_sys_reset_0/U0/EXT_LPF/lpf_int_reg/D] 10
#set_multicycle_path -from [get_pins xlnx_ddr3_c0/u_xlnx_ddr3_mig/u_memc_ui_top_axi/mem_intfc0/ddr_phy_top0/u_ddr_calib_top/init_calib_complete_reg/C] -to [get_pins xlnx_proc_sys_reset_0/U0/EXT_LPF/lpf_int_reg/D] 10
set_max_delay -datapath_only -from [get_pins xlnx_ddr3_c0/u_xlnx_ddr3_mig/u_memc_ui_top_axi/mem_intfc0/ddr_phy_top0/u_ddr_calib_top/init_calib_complete_reg/C] -to [get_pins xlnx_proc_sys_reset_0/U0/EXT_LPF/lpf_int_reg/D] 20.000
# *********************************
#set_property DCI_CASCADE {64} [get_iobanks 65]

View file

@ -21,8 +21,8 @@
cpus {
#address-cells = <0x01>;
#size-cells = <0x00>;
clock-frequency = <0xE4E1C0>;
timebase-frequency = <0xE4E1C0>;
clock-frequency = <0x1036640>;
timebase-frequency = <0x1036640>;
cpu@0 {
phandle = <0x01>;
@ -51,7 +51,7 @@
uart@10000000 {
interrupts = <0x0a>;
interrupt-parent = <0x03>;
clock-frequency = <0xE4E1C0>;
clock-frequency = <0x1036640>;
reg = <0x00 0x10000000 0x00 0x100>;
compatible = "ns16550a";
};

3
src/cache/cache.sv vendored
View file

@ -33,6 +33,7 @@ module cache import cvw::*; #(parameter cvw_t P,
input logic reset,
input logic Stall, // Stall the cache, preventing new accesses. In-flight access finished but does not return to READY
input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations)
input logic IgnoreRequestTLB, //
// cpu side
input logic [1:0] CacheRW, // [1] Read, [0] Write
input logic [1:0] CacheAtomic, // Atomic operation
@ -210,7 +211,7 @@ module cache import cvw::*; #(parameter cvw_t P,
/////////////////////////////////////////////////////////////////////////////////////////////
// Cache FSM
/////////////////////////////////////////////////////////////////////////////////////////////
cachefsm #(READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck,
.FlushStage, .CacheRW, .CacheAtomic, .Stall,
.CacheHit, .LineDirty, .CacheStall, .CacheCommitted,

View file

@ -229,13 +229,14 @@ module ifu import cvw::*; #(parameter cvw_t P) (
logic [P.PA_BITS-1:0] ICacheBusAdr;
logic ICacheBusAck;
logic [1:0] CacheBusRW, BusRW, CacheRWF;
logic [1:0] CacheBusRWTemp;
assign BusRW = ~ITLBMissF & ~CacheableF & ~SelIROM ? IFURWF : '0;
assign CacheRWF = ~ITLBMissF & CacheableF & ~SelIROM ? IFURWF : '0;
cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.ICACHE_LINELENINBITS),
.NUMLINES(P.ICACHE_WAYSIZEINBYTES*8/P.ICACHE_LINELENINBITS),
.NUMWAYS(P.ICACHE_NUMWAYS), .LOGBWPL(LOGBWPL), .WORDLEN(32), .MUXINTERVAL(16), .READ_ONLY_CACHE(1))
icache(.clk, .reset, .FlushStage(FlushD), .Stall(GatedStallD),
icache(.clk, .reset, .FlushStage(FlushD), .IgnoreRequestTLB(1'b0), .Stall(GatedStallD),
.FetchBuffer, .CacheBusAck(ICacheBusAck),
.CacheBusAdr(ICacheBusAdr), .CacheStall(ICacheStallF),
.CacheBusRW,
@ -249,6 +250,7 @@ module ifu import cvw::*; #(parameter cvw_t P) (
.NextSet(PCSpillNextF[11:0]),
.PAdr(PCPF),
.CacheCommitted(CacheCommittedF), .InvalidateCache(InvalidateICacheM));
ahbcacheinterface #(P.AHBW, P.LLEN, P.PA_BITS, WORDSPERLINE, LOGBWPL, LINELEN, LLENPOVERAHBW, 1)
ahbcacheinterface(.HCLK(clk), .HRESETn(~reset),
.HRDATA,

View file

@ -255,26 +255,31 @@ module lsu import cvw::*; #(parameter cvw_t P) (
logic [1:0] CacheRWM; // Cache read (10), write (01), AMO (11)
logic [1:0] CacheAtomicM; // Cache AMO
logic FlushDCache; // Suppress d cache flush if there is an ITLB miss.
logic CacheStall;
logic [1:0] CacheBusRWTemp;
assign BusRW = ~CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0;
assign CacheableOrFlushCacheM = CacheableM | FlushDCacheM;
assign CacheRWM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSURWM : '0;
assign CacheAtomicM = CacheableM & ~IgnoreRequestTLB & ~SelDTIM ? LSUAtomicM : '0;
assign FlushDCache = FlushDCacheM & ~(IgnoreRequestTLB | SelHPTW);
assign CacheRWM = CacheableM & ~SelDTIM ? LSURWM : '0;
assign CacheAtomicM = CacheableM & ~SelDTIM ? LSUAtomicM : '0;
assign FlushDCache = FlushDCacheM & ~(SelHPTW);
// *** need RT to add support for CMOpM and LSUPrefetchM (DH 7/2/23)
// *** prefetch can just act as a read operation
cache #(.P(P), .PA_BITS(P.PA_BITS), .XLEN(P.XLEN), .LINELEN(P.DCACHE_LINELENINBITS), .NUMLINES(P.DCACHE_WAYSIZEINBYTES*8/LINELEN),
.NUMWAYS(P.DCACHE_NUMWAYS), .LOGBWPL(LLENLOGBWPL), .WORDLEN(P.LLEN), .MUXINTERVAL(P.LLEN), .READ_ONLY_CACHE(0)) dcache(
.clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW), .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM),
.clk, .reset, .Stall(GatedStallW), .SelBusBeat, .FlushStage(FlushW | IgnoreRequestTLB), .IgnoreRequestTLB, .CacheRW(CacheRWM), .CacheAtomic(CacheAtomicM),
.FlushCache(FlushDCache), .NextSet(IEUAdrE[11:0]), .PAdr(PAdrM),
.ByteMask(ByteMaskM), .BeatCount(BeatCount[AHBWLOGBWPL-1:AHBWLOGBWPL-LLENLOGBWPL]),
.CacheWriteData(LSUWriteDataM), .SelHPTW,
.CacheStall(DCacheStallM), .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.CacheStall, .CacheMiss(DCacheMiss), .CacheAccess(DCacheAccess),
.CacheCommitted(DCacheCommittedM),
.CacheBusAdr(DCacheBusAdr), .ReadDataWord(DCacheReadDataWordM),
.FetchBuffer, .CacheBusRW,
.FetchBuffer, .CacheBusRW(CacheBusRWTemp),
.CacheBusAck(DCacheBusAck), .InvalidateCache(1'b0));
assign DCacheStallM = CacheStall & ~IgnoreRequestTLB;
assign CacheBusRW = IgnoreRequestTLB ? 2'b0 : CacheBusRWTemp;
ahbcacheinterface #(.AHBW(P.AHBW), .LLEN(P.LLEN), .PA_BITS(P.PA_BITS), .BEATSPERLINE(BEATSPERLINE), .AHBWLOGBWPL(AHBWLOGBWPL), .LINELEN(LINELEN), .LLENPOVERAHBW(LLENPOVERAHBW), .READ_ONLY_CACHE(0)) ahbcacheinterface(
.HCLK(clk), .HRESETn(~reset), .Flush(FlushW),