[rtl] Icache RAM primitive changes

- Bring in a version of ram primitive with configurable width similar to
  the OT RAM primitive.
- Change the RAM banking structure to be a single bank of LineSize (64
  bits) to match the upcoming ECC granularity.

Signed-off-by: Tom Roberts <tomroberts@lowrisc.org>
This commit is contained in:
Tom Roberts 2020-03-13 11:37:31 +00:00 committed by Tom Roberts
parent c800ae957b
commit fe00eb46e9
4 changed files with 143 additions and 40 deletions

View file

@ -81,8 +81,7 @@ This causes a minor performance inefficiency, but should not happen often in pra
RAM Arrangement
---------------
The data RAMs are arranged as a series of 32 bit banks, the number depending on the cache line width and number of ways.
Each group of banks forming a single way can be combined into wider RAM instances if required since they are always accessed together.
The data RAMs are arranged as ``NumWays`` banks of ``LineSize`` width.
Indicative RAM sizes for common configurations are given in the table below:

View file

@ -71,10 +71,9 @@ module ibex_icache #(
localparam int unsigned LINE_BEATS = LINE_SIZE_BYTES / BUS_BYTES;
localparam int unsigned LINE_BEATS_W = $clog2(LINE_BEATS);
localparam int unsigned NUM_LINES = CacheSizeBytes / NumWays / LINE_SIZE_BYTES;
localparam int unsigned NUM_BANKS = LINE_BEATS * NumWays;
localparam int unsigned INDEX_W = $clog2(NUM_LINES);
localparam int unsigned INDEX_HI = INDEX_W + LINE_W - 1;
localparam int unsigned TAG_W = ADDR_W - INDEX_W - LINE_W + 1; // 1 valid bit
localparam int unsigned TAG_SIZE = ADDR_W - INDEX_W - LINE_W + 1; // 1 valid bit
localparam int unsigned OUTPUT_BEATS = (BUS_BYTES / 2); // number of halfwords
// Prefetch signals
@ -90,7 +89,6 @@ module ibex_icache #(
logic [INDEX_W-1:0] fill_index_ic0;
logic [31:INDEX_HI+1] fill_tag_ic0;
logic [LineSize-1:0] fill_wdata_ic0;
logic [NUM_BANKS-1:0] fill_banks_ic0;
logic lookup_grant_ic0;
logic lookup_actual_ic0;
logic fill_grant_ic0;
@ -98,14 +96,14 @@ module ibex_icache #(
logic [INDEX_W-1:0] tag_index_ic0;
logic [NumWays-1:0] tag_banks_ic0;
logic tag_write_ic0;
logic [TAG_W-1:0] tag_wdata_ic0;
logic [TAG_SIZE-1:0] tag_wdata_ic0;
logic data_req_ic0;
logic [INDEX_W-1:0] data_index_ic0;
logic [NUM_BANKS-1:0] data_banks_ic0;
logic [NumWays-1:0] data_banks_ic0;
logic data_write_ic0;
logic [LineSize-1:0] data_wdata_ic0;
// Cache pipelipe IC1 signals
logic [TAG_W-1:0] tag_rdata_ic1 [NumWays];
logic [TAG_SIZE-1:0] tag_rdata_ic1 [NumWays];
logic [LineSize-1:0] data_rdata_ic1 [NumWays];
logic [LineSize-1:0] hit_data_ic1;
logic lookup_valid_ic1;
@ -226,9 +224,6 @@ module ibex_icache #(
assign fill_index_ic0 = fill_ram_req_addr[INDEX_HI:LINE_W];
assign fill_tag_ic0 = fill_ram_req_addr[ADDR_W-1:INDEX_HI+1];
assign fill_wdata_ic0 = fill_ram_req_data;
for (genvar way = 0; way < NumWays; way++) begin : gen_way_banks
assign fill_banks_ic0[way*LINE_BEATS+:LINE_BEATS] = {LINE_BEATS{fill_ram_req_way[way]}};
end
// Arbitrated signals - lookups have highest priority
assign lookup_grant_ic0 = lookup_req_ic0;
@ -248,7 +243,7 @@ module ibex_icache #(
// Dataram
assign data_req_ic0 = lookup_req_ic0 | fill_req_ic0;
assign data_index_ic0 = tag_index_ic0;
assign data_banks_ic0 = fill_grant_ic0 ? fill_banks_ic0 : {NUM_BANKS{1'b1}};
assign data_banks_ic0 = tag_banks_ic0;
assign data_write_ic0 = tag_write_ic0;
assign data_wdata_ic0 = fill_wdata_ic0;
@ -258,36 +253,35 @@ module ibex_icache #(
for (genvar way = 0; way < NumWays; way++) begin : gen_rams
// Tag RAM instantiation
logic [ADDR_W-TAG_W-1:0] unused_tag_ic1;
ram_1p #(
.Depth (NUM_LINES)
prim_generic_ram_1p #(
.Width (TAG_SIZE),
.Depth (NUM_LINES)
) tag_bank (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (tag_req_ic0 & tag_banks_ic0[way]),
.we_i (tag_write_ic0),
.be_i (4'hF),
.addr_i ({{32-INDEX_W-2{1'b0}},tag_index_ic0,2'b00}),
.wdata_i ({{32-TAG_W{1'b0}},tag_wdata_ic0}),
.rvalid_o (),
.rdata_o ({unused_tag_ic1,tag_rdata_ic1[way]})
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (tag_req_ic0 & tag_banks_ic0[way]),
.write_i (tag_write_ic0),
.wmask_i ({TAG_SIZE{1'b1}}),
.addr_i (tag_index_ic0),
.wdata_i (tag_wdata_ic0),
.rvalid_o (),
.rdata_o (tag_rdata_ic1[way])
);
// Data RAM instantiation
for (genvar sub = 0; sub < LINE_BEATS; sub++) begin : gen_sub_banks
ram_1p #(
.Depth (NUM_LINES)
) data_bank (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (data_req_ic0 & data_banks_ic0[way*LINE_BEATS+sub]),
.we_i (data_write_ic0),
.be_i (4'hF),
.addr_i ({{32-INDEX_W-2{1'b0}},data_index_ic0,2'b00}),
.wdata_i (data_wdata_ic0[sub*32+:32]),
.rvalid_o (),
.rdata_o (data_rdata_ic1[way][sub*32+:32])
);
end
prim_generic_ram_1p #(
.Width (LineSize),
.Depth (NUM_LINES)
) data_bank (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (data_req_ic0 & data_banks_ic0[way]),
.write_i (data_write_ic0),
.wmask_i ({LineSize{1'b1}}),
.addr_i (data_index_ic0),
.wdata_i (data_wdata_ic0),
.rvalid_o (),
.rdata_o (data_rdata_ic1[way])
);
end
always_ff @(posedge clk_i or negedge rst_ni) begin
@ -313,7 +307,7 @@ module ibex_icache #(
for (genvar way = 0; way < NumWays; way++) begin : gen_tag_match
assign tag_match_ic1[way] = (tag_rdata_ic1[way] ==
{1'b1,lookup_addr_ic1[ADDR_W-1:INDEX_HI+1]});
assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][TAG_W-1];
assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][TAG_SIZE-1];
end
assign tag_hit_ic1 = |tag_match_ic1;

View file

@ -0,0 +1,109 @@
// Copyright lowRISC contributors.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Synchronous single-port SRAM model
`include "prim_assert.sv"
module prim_generic_ram_1p #(
parameter int Width = 32, // bit
parameter int Depth = 128,
parameter int DataBitsPerMask = 1, // Number of data bits per bit of write mask
localparam int Aw = $clog2(Depth) // derived parameter
) (
input logic clk_i,
input logic rst_ni,
input logic req_i,
input logic write_i,
input logic [Aw-1:0] addr_i,
input logic [Width-1:0] wdata_i,
input logic [Width-1:0] wmask_i,
output logic rvalid_o,
output logic [Width-1:0] rdata_o
);
// Width of internal write mask. Note wmask_i input into the module is always assumed
// to be the full bit mask
localparam int MaskWidth = Width / DataBitsPerMask;
logic [Width-1:0] mem [Depth];
logic [MaskWidth-1:0] wmask;
always_comb begin
for (int i=0; i < MaskWidth; i = i + 1) begin : create_wmask
wmask[i] = &wmask_i[i*DataBitsPerMask +: DataBitsPerMask];
end
end
// using always instead of always_ff to avoid 'ICPD - illegal combination of drivers' error
// thrown when using $readmemh system task to backdoor load an image
always @(posedge clk_i) begin
if (req_i) begin
if (write_i) begin
for (int i=0; i < MaskWidth; i = i + 1) begin
if (wmask[i]) begin
mem[addr_i][i*DataBitsPerMask +: DataBitsPerMask] <=
wdata_i[i*DataBitsPerMask +: DataBitsPerMask];
end
end
end else begin
rdata_o <= mem[addr_i];
end
end
end
always_ff @(posedge clk_i, negedge rst_ni) begin
if (!rst_ni) begin
rvalid_o <= '0;
end else begin
rvalid_o <= req_i & ~write_i;
end
end
`ifdef VERILATOR
// Task for loading 'mem' with SystemVerilog system task $readmemh()
export "DPI-C" task simutil_verilator_memload;
task simutil_verilator_memload;
input string file;
$readmemh(file, mem);
endtask
// TODO: Allow 'val' to have other widths than 32 bit
// Note that the DPI export and function definition must both be in the same generate
// context to get the correct name.
if (Width == 32) begin : gen_32bit
// Function for setting a specific 32 bit element in |mem|
// Returns 1 (true) for success, 0 (false) for errors.
export "DPI-C" function simutil_verilator_set_mem;
function int simutil_verilator_set_mem(input int index,
input logic[31:0] val);
if (index >= Depth) begin
return 0;
end
mem[index] = val;
return 1;
endfunction
end else begin : gen_other
// Function doesn't work for Width != 32 so just return 0
export "DPI-C" function simutil_verilator_set_mem;
function int simutil_verilator_set_mem(input int index,
input logic[31:0] val);
return 0;
endfunction
end
`endif
`ifdef SRAM_INIT_FILE
localparam MEM_FILE = `PRIM_STRINGIFY(`SRAM_INIT_FILE);
initial begin
$display("Initializing SRAM from %s", MEM_FILE);
$readmemh(MEM_FILE, mem);
end
`endif
endmodule

View file

@ -10,6 +10,7 @@ filesets:
- lowrisc:prim:assert
files:
- ./rtl/prim_clock_gating.sv
- ./rtl/prim_generic_ram_1p.sv
- ./rtl/ram_1p.sv
- ./rtl/ram_2p.sv
- ./rtl/bus.sv