diff --git a/src/ebu/ahbcacheinterface.sv b/src/ebu/ahbcacheinterface.sv index f033b40cc..5316e215f 100644 --- a/src/ebu/ahbcacheinterface.sv +++ b/src/ebu/ahbcacheinterface.sv @@ -114,11 +114,12 @@ module ahbcacheinterface import cvw::*; #( .s(~(CacheableOrFlushCacheM)), .y(PreHWDATA)); flopen #(P.AHBW) wdreg(HCLK, HREADY, PreHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec - // *** bummer need a second byte mask for bus as it is AHBW rather than LLEN. - // probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0. - swbytemask #(P.AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(P.AHBW/8)-1:0]), .ByteMask(BusByteMaskM), .ByteMaskExtended()); - - flopen #(P.AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[P.AHBW/8-1:0], HWSTRB); + if (READ_ONLY_CACHE) begin + assign HWSTRB = '0; + end else begin // compute byte mask for AHB transaction based on size and address. AHBW may be different than LLEN + swbytemask #(P.AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(P.AHBW/8)-1:0]), .ByteMask(BusByteMaskM), .ByteMaskExtended()); + flopen #(P.AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[P.AHBW/8-1:0], HWSTRB); + end buscachefsm #(BeatCountThreshold, AHBWLOGBWPL, READ_ONLY_CACHE, P.BURST_EN) AHBBuscachefsm( .HCLK, .HRESETn, .Flush, .BusRW, .BusAtomic, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat, diff --git a/src/fpu/fdivsqrt/fdivsqrtiter.sv b/src/fpu/fdivsqrt/fdivsqrtiter.sv index 4bfcebcd1..0f092706a 100644 --- a/src/fpu/fdivsqrt/fdivsqrtiter.sv +++ b/src/fpu/fdivsqrt/fdivsqrtiter.sv @@ -44,7 +44,7 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) ( logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.DIVb logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.DIVb logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.DIVb - logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.DIVb // *** probably Q not U. See Table 16.26 notes + logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.DIVb logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.DIVb logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.DIVb logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.DIVb diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index 802ac92dc..ffc62b5cc 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -222,7 +222,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) ( logic RemOpE; /* verilator lint_off WIDTH */ - assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain + assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. n = (Cycles * k - 1) assign IntRemNormShiftE = mE + (P.DIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift /* verilator lint_on WIDTH */ assign RemOpE = Funct3E[1]; diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv index 856273a5e..47b1d4b26 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -52,7 +52,7 @@ module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) ( // Digit Selection logic assign j0 = ~C[P.DIVb+1]; // first step of R digit selection: C = 00...0 - assign j1 = C[P.DIVb] & ~C[P.DIVb-1]; // second step of R digit selection: C = 1100...0; *** could simplify to ~C[P.DIVb-1] because j=0 case takes priority + assign j1 = ~C[P.DIVb-1]; // second step of R digit selection: C = 1100...0; simplified from C[P.DIVb] & ~C[P.DIVb-1] because j=0 case takes priority assign Smsbs = U[P.DIVb:P.DIVb-4]; // U1.4 most significant bits of square root assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; // U0.3 most significant fractional bits of divisor after leading 1 assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual diff --git a/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv index fd1092497..bf75532b3 100644 --- a/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv +++ b/src/fpu/fdivsqrt/fdivsqrtuslc4cmp.sv @@ -95,7 +95,7 @@ module fdivsqrtuslc4cmp ( // Choose A for current operation always_comb if (SqrtE) begin - if (Smsbs[4]) A = 3'b111; // for S = 1.0000 *** can we optimize away this case? + if (Smsbs[4]) A = 3'b111; // for S = 1.0000 else A = Smsbs[2:0]; end else A = Dmsbs; @@ -108,7 +108,7 @@ module fdivsqrtuslc4cmp ( /* Nannarelli12 design to exploit symmetry is slower because of negation and mux for special case of A = 000 assign mk0 = -mk1; - assign mkm1 = (A == 3'b000) ? -13 : -mk2; // asymmetry in table *** can we hide from critical path + assign mkm1 = (A == 3'b000) ? -13 : -mk2; // asymmetry in table */ // Compare residual W to selection constants to choose digit @@ -117,5 +117,5 @@ module fdivsqrtuslc4cmp ( else if ($signed(Wmsbs) >= $signed(mk1)) udigit = 4'b0100; // choose 1 else if ($signed(Wmsbs) >= $signed(mk0)) udigit = 4'b0000; // choose 0 else if ($signed(Wmsbs) >= $signed(mkm1)) udigit = 4'b0010; // choose -1 - else udigit = 4'b0001; // choose -2 + else udigit = 4'b0001; // choose -2 endmodule diff --git a/src/generic/mem/ram1p1rwbe.sv b/src/generic/mem/ram1p1rwbe.sv index 010e55a30..2c15716d9 100644 --- a/src/generic/mem/ram1p1rwbe.sv +++ b/src/generic/mem/ram1p1rwbe.sv @@ -44,8 +44,6 @@ module ram1p1rwbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=64, WIDTH=44, PRE output logic [WIDTH-1:0] dout ); - bit [WIDTH-1:0] RAM[DEPTH-1:0]; - /////////////////////////////////////////////////////////////////////////////// // TRUE SRAM macro /////////////////////////////////////////////////////////////////////////////// @@ -83,6 +81,7 @@ module ram1p1rwbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=64, WIDTH=44, PRE // READ first SRAM model /////////////////////////////////////////////////////////////////////////////// end else begin: ram + bit [WIDTH-1:0] RAM[DEPTH-1:0]; integer i; if (PRELOAD_ENABLED) begin diff --git a/src/generic/mem/ram1p1rwe.sv b/src/generic/mem/ram1p1rwe.sv index a030d2aab..240af6db1 100644 --- a/src/generic/mem/ram1p1rwe.sv +++ b/src/generic/mem/ram1p1rwe.sv @@ -41,11 +41,9 @@ module ram1p1rwe import cvw::* ; #(parameter USE_SRAM=0, DEPTH=64, WIDTH=44) ( output logic [WIDTH-1:0] dout ); - bit [WIDTH-1:0] RAM[DEPTH-1:0]; - - // *************************************************************************** + ////////////////////////////////////////////////////////////////////////////// // TRUE SRAM macro - // *************************************************************************** + ////////////////////////////////////////////////////////////////////////////// if ((USE_SRAM == 1) & (WIDTH == 128) & (DEPTH == 64)) begin // Cache data subarray // 64 x 128-bit SRAM ram1p1rwbe_64x128 sram1A (.CLK(clk), .CEB(~ce), .WEB(~we), @@ -64,12 +62,15 @@ module ram1p1rwe import cvw::* ; #(parameter USE_SRAM=0, DEPTH=64, WIDTH=44) ( .A(addr), .D(din), .BWEB('0), .Q(dout)); - // *************************************************************************** + ////////////////////////////////////////////////////////////////////////////// // READ first SRAM model - // *************************************************************************** + ////////////////////////////////////////////////////////////////////////////// end else begin: ram // *** Vivado is not implementing this as block ram for some reason. // The version with byte write enables it correctly infers block ram. + + bit [WIDTH-1:0] RAM[DEPTH-1:0]; + integer i; // Combinational read: register address and read after clock edge diff --git a/src/generic/mem/ram2p1r1wbe.sv b/src/generic/mem/ram2p1r1wbe.sv index ba6919958..5a677ffaa 100644 --- a/src/generic/mem/ram2p1r1wbe.sv +++ b/src/generic/mem/ram2p1r1wbe.sv @@ -44,7 +44,6 @@ module ram2p1r1wbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=1024, WIDTH=68) output logic [WIDTH-1:0] rd1 ); - bit [WIDTH-1:0] mem[DEPTH-1:0]; localparam SRAMWIDTH = 32; localparam SRAMNUMSETS = SRAMWIDTH/WIDTH; @@ -105,24 +104,26 @@ module ram2p1r1wbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=1024, WIDTH=68) .QA(SRAMReadData), .QB()); - end else begin + end else begin:ram /////////////////////////////////////////////////////////////////////////////// // READ first SRAM model /////////////////////////////////////////////////////////////////////////////// + + bit [WIDTH-1:0] RAM[DEPTH-1:0]; integer i; /* initial begin // initialize memory for simulation only; not needed because done in the testbench now integer j; for (j=0; j < DEPTH; j++) - mem[j] = '0; + RAM[j] = '0; end */ // Read logic [$clog2(DEPTH)-1:0] ra1d; flopen #($clog2(DEPTH)) adrreg(clk, ce1, ra1, ra1d); - assign rd1 = mem[ra1d]; + assign rd1 = RAM[ra1d]; // Write divided into part for bytes and part for extra msbs // coverage off @@ -131,13 +132,13 @@ module ram2p1r1wbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=1024, WIDTH=68) always @(posedge clk) if (ce2 & we2) for(i = 0; i < WIDTH/8; i++) - if(bwe2[i]) mem[wa2][i*8 +: 8] <= wd2[i*8 +: 8]; + if(bwe2[i]) RAM[wa2][i*8 +: 8] <= wd2[i*8 +: 8]; // coverage on if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8 always @(posedge clk) if (ce2 & we2 & bwe2[WIDTH/8]) - mem[wa2][WIDTH-1:WIDTH-WIDTH%8] <= wd2[WIDTH-1:WIDTH-WIDTH%8]; + RAM[wa2][WIDTH-1:WIDTH-WIDTH%8] <= wd2[WIDTH-1:WIDTH-WIDTH%8]; end endmodule diff --git a/testbench/common/DCacheFlushFSM.sv b/testbench/common/DCacheFlushFSM.sv index ed9d56342..affb10b10 100644 --- a/testbench/common/DCacheFlushFSM.sv +++ b/testbench/common/DCacheFlushFSM.sv @@ -64,13 +64,13 @@ module DCacheFlushFSM import cvw::*; #(parameter cvw_t P) .loglinebytelen(loglinebytelen), .sramlen(sramlen)) copyShadow(.clk, .start, - .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][P.PA_BITS-1-tagstart:0]), + .tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.ram.RAM[index][P.PA_BITS-1-tagstart:0]), .valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]), .dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]), // these dirty bit selections would be needed if dirty is moved inside the tag array. //.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].dirty.DirtyMem.RAM[index]), //.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][P.PA_BITS+tagstart]), - .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].wordram.CacheDataMem.RAM[index]), + .data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].wordram.CacheDataMem.ram.RAM[index]), .index(index), .cacheWord(cacheWord), .CacheData(CacheData[way][index][cacheWord]), diff --git a/testbench/testbench.sv b/testbench/testbench.sv index 17def063c..1ab9522b8 100644 --- a/testbench/testbench.sv +++ b/testbench/testbench.sv @@ -499,10 +499,10 @@ module testbench; readResult = $fread(dut.uncoregen.uncore.bootrom.bootrom.memory.ROM, memFile); $fclose(memFile); memFile = $fopen(memfilename, "rb"); - readResult = $fread(dut.uncoregen.uncore.ram.ram.memory.RAM, memFile); + readResult = $fread(dut.uncoregen.uncore.ram.ram.memory.ram.RAM, memFile); $fclose(memFile); end else - $readmemh(memfilename, dut.uncoregen.uncore.ram.ram.memory.RAM); + $readmemh(memfilename, dut.uncoregen.uncore.ram.ram.memory.ram.RAM); if (TEST == "embench") $display("Read memfile %s", memfilename); end if (CopyRAM) begin @@ -511,7 +511,7 @@ module testbench; EndIndex = (end_signature_addr >> LogXLEN) + 8; BaseIndex = P.UNCORE_RAM_BASE >> LogXLEN; for(ShadowIndex = StartIndex; ShadowIndex <= EndIndex; ShadowIndex++) begin - testbench.DCacheFlushFSM.ShadowRAM[ShadowIndex] = dut.uncoregen.uncore.ram.ram.memory.RAM[ShadowIndex - BaseIndex]; + testbench.DCacheFlushFSM.ShadowRAM[ShadowIndex] = dut.uncoregen.uncore.ram.ram.memory.ram.RAM[ShadowIndex - BaseIndex]; end end end @@ -519,7 +519,7 @@ module testbench; if (P.DTIM_SUPPORTED) begin always @(posedge clk) begin if (LoadMem) begin - $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.RAM); + $readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.ram.RAM); $display("Read memfile %s", memfilename); end if (CopyRAM) begin @@ -528,7 +528,7 @@ module testbench; EndIndex = (end_signature_addr >> LogXLEN) + 8; BaseIndex = P.UNCORE_RAM_BASE >> LogXLEN; for(ShadowIndex = StartIndex; ShadowIndex <= EndIndex; ShadowIndex++) begin - testbench.DCacheFlushFSM.ShadowRAM[ShadowIndex] = dut.core.lsu.dtim.dtim.ram.RAM[ShadowIndex - BaseIndex]; + testbench.DCacheFlushFSM.ShadowRAM[ShadowIndex] = dut.core.lsu.dtim.dtim.ram.ram.RAM[ShadowIndex - BaseIndex]; end end end @@ -539,7 +539,7 @@ module testbench; always @(posedge clk) if (ResetMem) // program memory is sometimes reset (e.g. for CoreMark, which needs zeroed memory) for (adrindex=0; adrindex<(P.UNCORE_RAM_RANGE>>1+(P.XLEN/32)); adrindex = adrindex+1) - dut.uncoregen.uncore.ram.ram.memory.RAM[adrindex] = '0; + dut.uncoregen.uncore.ram.ram.memory.ram.RAM[adrindex] = '0; //////////////////////////////////////////////////////////////////////////////// // Actual hardware