Code cleanup: RAM, fdivsqrt

This commit is contained in:
David Harris 2024-06-14 03:35:05 -07:00
parent 6789f32154
commit b1c9450b4a
10 changed files with 35 additions and 33 deletions

View file

@ -114,11 +114,12 @@ module ahbcacheinterface import cvw::*; #(
.s(~(CacheableOrFlushCacheM)), .y(PreHWDATA));
flopen #(P.AHBW) wdreg(HCLK, HREADY, PreHWDATA, HWDATA); // delay HWDATA by 1 cycle per spec
// *** bummer need a second byte mask for bus as it is AHBW rather than LLEN.
// probably can merge by muxing PAdrM's LLEN/8-1 index bit based on HTRANS being != 0.
swbytemask #(P.AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(P.AHBW/8)-1:0]), .ByteMask(BusByteMaskM), .ByteMaskExtended());
flopen #(P.AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[P.AHBW/8-1:0], HWSTRB);
if (READ_ONLY_CACHE) begin
assign HWSTRB = '0;
end else begin // compute byte mask for AHB transaction based on size and address. AHBW may be different than LLEN
swbytemask #(P.AHBW) busswbytemask(.Size(HSIZE), .Adr(HADDR[$clog2(P.AHBW/8)-1:0]), .ByteMask(BusByteMaskM), .ByteMaskExtended());
flopen #(P.AHBW/8) HWSTRBReg(HCLK, HREADY, BusByteMaskM[P.AHBW/8-1:0], HWSTRB);
end
buscachefsm #(BeatCountThreshold, AHBWLOGBWPL, READ_ONLY_CACHE, P.BURST_EN) AHBBuscachefsm(
.HCLK, .HRESETn, .Flush, .BusRW, .BusAtomic, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat,

View file

@ -44,7 +44,7 @@ module fdivsqrtiter import cvw::*; #(parameter cvw_t P) (
logic [P.DIVb+3:0] WCNext[P.DIVCOPIES-1:0]; // Q4.DIVb
logic [P.DIVb+3:0] WS[P.DIVCOPIES:0]; // Q4.DIVb
logic [P.DIVb+3:0] WC[P.DIVCOPIES:0]; // Q4.DIVb
logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.DIVb // *** probably Q not U. See Table 16.26 notes
logic [P.DIVb:0] U[P.DIVCOPIES:0]; // U1.DIVb
logic [P.DIVb:0] UM[P.DIVCOPIES:0]; // U1.DIVb
logic [P.DIVb:0] UNext[P.DIVCOPIES-1:0]; // U1.DIVb
logic [P.DIVb:0] UMNext[P.DIVCOPIES-1:0]; // U1.DIVb

View file

@ -222,7 +222,7 @@ module fdivsqrtpreproc import cvw::*; #(parameter cvw_t P) (
logic RemOpE;
/* verilator lint_off WIDTH */
assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. rn = Cycles * r * k - r ***explain
assign IntDivNormShiftE = P.DIVb - (CyclesE * P.RK - P.LOGR); // b - rn, used for integer normalization right shift. n = (Cycles * k - 1)
assign IntRemNormShiftE = mE + (P.DIVb-(P.XLEN-1)); // m + b - (N-1) for remainder normalization shift
/* verilator lint_on WIDTH */
assign RemOpE = Funct3E[1];

View file

@ -52,7 +52,7 @@ module fdivsqrtstage4 import cvw::*; #(parameter cvw_t P) (
// Digit Selection logic
assign j0 = ~C[P.DIVb+1]; // first step of R digit selection: C = 00...0
assign j1 = C[P.DIVb] & ~C[P.DIVb-1]; // second step of R digit selection: C = 1100...0; *** could simplify to ~C[P.DIVb-1] because j=0 case takes priority
assign j1 = ~C[P.DIVb-1]; // second step of R digit selection: C = 1100...0; simplified from C[P.DIVb] & ~C[P.DIVb-1] because j=0 case takes priority
assign Smsbs = U[P.DIVb:P.DIVb-4]; // U1.4 most significant bits of square root
assign Dmsbs = D[P.DIVb-1:P.DIVb-3]; // U0.3 most significant fractional bits of divisor after leading 1
assign WCmsbs = WC[P.DIVb+3:P.DIVb-4]; // Q4.4 most significant bits of residual

View file

@ -95,7 +95,7 @@ module fdivsqrtuslc4cmp (
// Choose A for current operation
always_comb
if (SqrtE) begin
if (Smsbs[4]) A = 3'b111; // for S = 1.0000 *** can we optimize away this case?
if (Smsbs[4]) A = 3'b111; // for S = 1.0000
else A = Smsbs[2:0];
end else A = Dmsbs;
@ -108,7 +108,7 @@ module fdivsqrtuslc4cmp (
/* Nannarelli12 design to exploit symmetry is slower because of negation and mux for special case of A = 000
assign mk0 = -mk1;
assign mkm1 = (A == 3'b000) ? -13 : -mk2; // asymmetry in table *** can we hide from critical path
assign mkm1 = (A == 3'b000) ? -13 : -mk2; // asymmetry in table
*/
// Compare residual W to selection constants to choose digit
@ -117,5 +117,5 @@ module fdivsqrtuslc4cmp (
else if ($signed(Wmsbs) >= $signed(mk1)) udigit = 4'b0100; // choose 1
else if ($signed(Wmsbs) >= $signed(mk0)) udigit = 4'b0000; // choose 0
else if ($signed(Wmsbs) >= $signed(mkm1)) udigit = 4'b0010; // choose -1
else udigit = 4'b0001; // choose -2
else udigit = 4'b0001; // choose -2
endmodule

View file

@ -44,8 +44,6 @@ module ram1p1rwbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=64, WIDTH=44, PRE
output logic [WIDTH-1:0] dout
);
bit [WIDTH-1:0] RAM[DEPTH-1:0];
///////////////////////////////////////////////////////////////////////////////
// TRUE SRAM macro
///////////////////////////////////////////////////////////////////////////////
@ -83,6 +81,7 @@ module ram1p1rwbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=64, WIDTH=44, PRE
// READ first SRAM model
///////////////////////////////////////////////////////////////////////////////
end else begin: ram
bit [WIDTH-1:0] RAM[DEPTH-1:0];
integer i;
if (PRELOAD_ENABLED) begin

View file

@ -41,11 +41,9 @@ module ram1p1rwe import cvw::* ; #(parameter USE_SRAM=0, DEPTH=64, WIDTH=44) (
output logic [WIDTH-1:0] dout
);
bit [WIDTH-1:0] RAM[DEPTH-1:0];
// ***************************************************************************
//////////////////////////////////////////////////////////////////////////////
// TRUE SRAM macro
// ***************************************************************************
//////////////////////////////////////////////////////////////////////////////
if ((USE_SRAM == 1) & (WIDTH == 128) & (DEPTH == 64)) begin // Cache data subarray
// 64 x 128-bit SRAM
ram1p1rwbe_64x128 sram1A (.CLK(clk), .CEB(~ce), .WEB(~we),
@ -64,12 +62,15 @@ module ram1p1rwe import cvw::* ; #(parameter USE_SRAM=0, DEPTH=64, WIDTH=44) (
.A(addr), .D(din),
.BWEB('0), .Q(dout));
// ***************************************************************************
//////////////////////////////////////////////////////////////////////////////
// READ first SRAM model
// ***************************************************************************
//////////////////////////////////////////////////////////////////////////////
end else begin: ram
// *** Vivado is not implementing this as block ram for some reason.
// The version with byte write enables it correctly infers block ram.
bit [WIDTH-1:0] RAM[DEPTH-1:0];
integer i;
// Combinational read: register address and read after clock edge

View file

@ -44,7 +44,6 @@ module ram2p1r1wbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=1024, WIDTH=68)
output logic [WIDTH-1:0] rd1
);
bit [WIDTH-1:0] mem[DEPTH-1:0];
localparam SRAMWIDTH = 32;
localparam SRAMNUMSETS = SRAMWIDTH/WIDTH;
@ -105,24 +104,26 @@ module ram2p1r1wbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=1024, WIDTH=68)
.QA(SRAMReadData),
.QB());
end else begin
end else begin:ram
///////////////////////////////////////////////////////////////////////////////
// READ first SRAM model
///////////////////////////////////////////////////////////////////////////////
bit [WIDTH-1:0] RAM[DEPTH-1:0];
integer i;
/*
initial begin // initialize memory for simulation only; not needed because done in the testbench now
integer j;
for (j=0; j < DEPTH; j++)
mem[j] = '0;
RAM[j] = '0;
end
*/
// Read
logic [$clog2(DEPTH)-1:0] ra1d;
flopen #($clog2(DEPTH)) adrreg(clk, ce1, ra1, ra1d);
assign rd1 = mem[ra1d];
assign rd1 = RAM[ra1d];
// Write divided into part for bytes and part for extra msbs
// coverage off
@ -131,13 +132,13 @@ module ram2p1r1wbe import cvw::*; #(parameter USE_SRAM=0, DEPTH=1024, WIDTH=68)
always @(posedge clk)
if (ce2 & we2)
for(i = 0; i < WIDTH/8; i++)
if(bwe2[i]) mem[wa2][i*8 +: 8] <= wd2[i*8 +: 8];
if(bwe2[i]) RAM[wa2][i*8 +: 8] <= wd2[i*8 +: 8];
// coverage on
if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8
always @(posedge clk)
if (ce2 & we2 & bwe2[WIDTH/8])
mem[wa2][WIDTH-1:WIDTH-WIDTH%8] <= wd2[WIDTH-1:WIDTH-WIDTH%8];
RAM[wa2][WIDTH-1:WIDTH-WIDTH%8] <= wd2[WIDTH-1:WIDTH-WIDTH%8];
end
endmodule

View file

@ -64,13 +64,13 @@ module DCacheFlushFSM import cvw::*; #(parameter cvw_t P)
.loglinebytelen(loglinebytelen), .sramlen(sramlen))
copyShadow(.clk,
.start,
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][P.PA_BITS-1-tagstart:0]),
.tag(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.ram.RAM[index][P.PA_BITS-1-tagstart:0]),
.valid(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].ValidBits[index]),
.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].DirtyBits[index]),
// these dirty bit selections would be needed if dirty is moved inside the tag array.
//.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].dirty.DirtyMem.RAM[index]),
//.dirty(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].CacheTagMem.RAM[index][P.PA_BITS+tagstart]),
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].wordram.CacheDataMem.RAM[index]),
.data(testbench.dut.core.lsu.bus.dcache.dcache.CacheWays[way].word[cacheWord].wordram.CacheDataMem.ram.RAM[index]),
.index(index),
.cacheWord(cacheWord),
.CacheData(CacheData[way][index][cacheWord]),

View file

@ -499,10 +499,10 @@ module testbench;
readResult = $fread(dut.uncoregen.uncore.bootrom.bootrom.memory.ROM, memFile);
$fclose(memFile);
memFile = $fopen(memfilename, "rb");
readResult = $fread(dut.uncoregen.uncore.ram.ram.memory.RAM, memFile);
readResult = $fread(dut.uncoregen.uncore.ram.ram.memory.ram.RAM, memFile);
$fclose(memFile);
end else
$readmemh(memfilename, dut.uncoregen.uncore.ram.ram.memory.RAM);
$readmemh(memfilename, dut.uncoregen.uncore.ram.ram.memory.ram.RAM);
if (TEST == "embench") $display("Read memfile %s", memfilename);
end
if (CopyRAM) begin
@ -511,7 +511,7 @@ module testbench;
EndIndex = (end_signature_addr >> LogXLEN) + 8;
BaseIndex = P.UNCORE_RAM_BASE >> LogXLEN;
for(ShadowIndex = StartIndex; ShadowIndex <= EndIndex; ShadowIndex++) begin
testbench.DCacheFlushFSM.ShadowRAM[ShadowIndex] = dut.uncoregen.uncore.ram.ram.memory.RAM[ShadowIndex - BaseIndex];
testbench.DCacheFlushFSM.ShadowRAM[ShadowIndex] = dut.uncoregen.uncore.ram.ram.memory.ram.RAM[ShadowIndex - BaseIndex];
end
end
end
@ -519,7 +519,7 @@ module testbench;
if (P.DTIM_SUPPORTED) begin
always @(posedge clk) begin
if (LoadMem) begin
$readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.RAM);
$readmemh(memfilename, dut.core.lsu.dtim.dtim.ram.ram.RAM);
$display("Read memfile %s", memfilename);
end
if (CopyRAM) begin
@ -528,7 +528,7 @@ module testbench;
EndIndex = (end_signature_addr >> LogXLEN) + 8;
BaseIndex = P.UNCORE_RAM_BASE >> LogXLEN;
for(ShadowIndex = StartIndex; ShadowIndex <= EndIndex; ShadowIndex++) begin
testbench.DCacheFlushFSM.ShadowRAM[ShadowIndex] = dut.core.lsu.dtim.dtim.ram.RAM[ShadowIndex - BaseIndex];
testbench.DCacheFlushFSM.ShadowRAM[ShadowIndex] = dut.core.lsu.dtim.dtim.ram.ram.RAM[ShadowIndex - BaseIndex];
end
end
end
@ -539,7 +539,7 @@ module testbench;
always @(posedge clk)
if (ResetMem) // program memory is sometimes reset (e.g. for CoreMark, which needs zeroed memory)
for (adrindex=0; adrindex<(P.UNCORE_RAM_RANGE>>1+(P.XLEN/32)); adrindex = adrindex+1)
dut.uncoregen.uncore.ram.ram.memory.RAM[adrindex] = '0;
dut.uncoregen.uncore.ram.ram.memory.ram.RAM[adrindex] = '0;
////////////////////////////////////////////////////////////////////////////////
// Actual hardware