diff --git a/doc/icache.rst b/doc/icache.rst index 6b9d65ec..11e8357e 100644 --- a/doc/icache.rst +++ b/doc/icache.rst @@ -81,8 +81,7 @@ This causes a minor performance inefficiency, but should not happen often in pra RAM Arrangement --------------- -The data RAMs are arranged as a series of 32 bit banks, the number depending on the cache line width and number of ways. -Each group of banks forming a single way can be combined into wider RAM instances if required since they are always accessed together. +The data RAMs are arranged as ``NumWays`` banks of ``LineSize`` width. Indicative RAM sizes for common configurations are given in the table below: diff --git a/rtl/ibex_icache.sv b/rtl/ibex_icache.sv index 58016140..3c88ea2b 100644 --- a/rtl/ibex_icache.sv +++ b/rtl/ibex_icache.sv @@ -71,10 +71,9 @@ module ibex_icache #( localparam int unsigned LINE_BEATS = LINE_SIZE_BYTES / BUS_BYTES; localparam int unsigned LINE_BEATS_W = $clog2(LINE_BEATS); localparam int unsigned NUM_LINES = CacheSizeBytes / NumWays / LINE_SIZE_BYTES; - localparam int unsigned NUM_BANKS = LINE_BEATS * NumWays; localparam int unsigned INDEX_W = $clog2(NUM_LINES); localparam int unsigned INDEX_HI = INDEX_W + LINE_W - 1; - localparam int unsigned TAG_W = ADDR_W - INDEX_W - LINE_W + 1; // 1 valid bit + localparam int unsigned TAG_SIZE = ADDR_W - INDEX_W - LINE_W + 1; // 1 valid bit localparam int unsigned OUTPUT_BEATS = (BUS_BYTES / 2); // number of halfwords // Prefetch signals @@ -90,7 +89,6 @@ module ibex_icache #( logic [INDEX_W-1:0] fill_index_ic0; logic [31:INDEX_HI+1] fill_tag_ic0; logic [LineSize-1:0] fill_wdata_ic0; - logic [NUM_BANKS-1:0] fill_banks_ic0; logic lookup_grant_ic0; logic lookup_actual_ic0; logic fill_grant_ic0; @@ -98,14 +96,14 @@ module ibex_icache #( logic [INDEX_W-1:0] tag_index_ic0; logic [NumWays-1:0] tag_banks_ic0; logic tag_write_ic0; - logic [TAG_W-1:0] tag_wdata_ic0; + logic [TAG_SIZE-1:0] tag_wdata_ic0; logic data_req_ic0; logic [INDEX_W-1:0] data_index_ic0; - logic [NUM_BANKS-1:0] data_banks_ic0; + logic [NumWays-1:0] data_banks_ic0; logic data_write_ic0; logic [LineSize-1:0] data_wdata_ic0; // Cache pipelipe IC1 signals - logic [TAG_W-1:0] tag_rdata_ic1 [NumWays]; + logic [TAG_SIZE-1:0] tag_rdata_ic1 [NumWays]; logic [LineSize-1:0] data_rdata_ic1 [NumWays]; logic [LineSize-1:0] hit_data_ic1; logic lookup_valid_ic1; @@ -226,9 +224,6 @@ module ibex_icache #( assign fill_index_ic0 = fill_ram_req_addr[INDEX_HI:LINE_W]; assign fill_tag_ic0 = fill_ram_req_addr[ADDR_W-1:INDEX_HI+1]; assign fill_wdata_ic0 = fill_ram_req_data; - for (genvar way = 0; way < NumWays; way++) begin : gen_way_banks - assign fill_banks_ic0[way*LINE_BEATS+:LINE_BEATS] = {LINE_BEATS{fill_ram_req_way[way]}}; - end // Arbitrated signals - lookups have highest priority assign lookup_grant_ic0 = lookup_req_ic0; @@ -248,7 +243,7 @@ module ibex_icache #( // Dataram assign data_req_ic0 = lookup_req_ic0 | fill_req_ic0; assign data_index_ic0 = tag_index_ic0; - assign data_banks_ic0 = fill_grant_ic0 ? fill_banks_ic0 : {NUM_BANKS{1'b1}}; + assign data_banks_ic0 = tag_banks_ic0; assign data_write_ic0 = tag_write_ic0; assign data_wdata_ic0 = fill_wdata_ic0; @@ -258,36 +253,35 @@ module ibex_icache #( for (genvar way = 0; way < NumWays; way++) begin : gen_rams // Tag RAM instantiation - logic [ADDR_W-TAG_W-1:0] unused_tag_ic1; - ram_1p #( - .Depth (NUM_LINES) + prim_generic_ram_1p #( + .Width (TAG_SIZE), + .Depth (NUM_LINES) ) tag_bank ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .req_i (tag_req_ic0 & tag_banks_ic0[way]), - .we_i (tag_write_ic0), - .be_i (4'hF), - .addr_i ({{32-INDEX_W-2{1'b0}},tag_index_ic0,2'b00}), - .wdata_i ({{32-TAG_W{1'b0}},tag_wdata_ic0}), - .rvalid_o (), - .rdata_o ({unused_tag_ic1,tag_rdata_ic1[way]}) + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (tag_req_ic0 & tag_banks_ic0[way]), + .write_i (tag_write_ic0), + .wmask_i ({TAG_SIZE{1'b1}}), + .addr_i (tag_index_ic0), + .wdata_i (tag_wdata_ic0), + .rvalid_o (), + .rdata_o (tag_rdata_ic1[way]) ); // Data RAM instantiation - for (genvar sub = 0; sub < LINE_BEATS; sub++) begin : gen_sub_banks - ram_1p #( - .Depth (NUM_LINES) - ) data_bank ( - .clk_i (clk_i), - .rst_ni (rst_ni), - .req_i (data_req_ic0 & data_banks_ic0[way*LINE_BEATS+sub]), - .we_i (data_write_ic0), - .be_i (4'hF), - .addr_i ({{32-INDEX_W-2{1'b0}},data_index_ic0,2'b00}), - .wdata_i (data_wdata_ic0[sub*32+:32]), - .rvalid_o (), - .rdata_o (data_rdata_ic1[way][sub*32+:32]) - ); - end + prim_generic_ram_1p #( + .Width (LineSize), + .Depth (NUM_LINES) + ) data_bank ( + .clk_i (clk_i), + .rst_ni (rst_ni), + .req_i (data_req_ic0 & data_banks_ic0[way]), + .write_i (data_write_ic0), + .wmask_i ({LineSize{1'b1}}), + .addr_i (data_index_ic0), + .wdata_i (data_wdata_ic0), + .rvalid_o (), + .rdata_o (data_rdata_ic1[way]) + ); end always_ff @(posedge clk_i or negedge rst_ni) begin @@ -313,7 +307,7 @@ module ibex_icache #( for (genvar way = 0; way < NumWays; way++) begin : gen_tag_match assign tag_match_ic1[way] = (tag_rdata_ic1[way] == {1'b1,lookup_addr_ic1[ADDR_W-1:INDEX_HI+1]}); - assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][TAG_W-1]; + assign tag_invalid_ic1[way] = ~tag_rdata_ic1[way][TAG_SIZE-1]; end assign tag_hit_ic1 = |tag_match_ic1; diff --git a/shared/rtl/prim_generic_ram_1p.sv b/shared/rtl/prim_generic_ram_1p.sv new file mode 100644 index 00000000..2a33a516 --- /dev/null +++ b/shared/rtl/prim_generic_ram_1p.sv @@ -0,0 +1,109 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Synchronous single-port SRAM model + +`include "prim_assert.sv" + +module prim_generic_ram_1p #( + parameter int Width = 32, // bit + parameter int Depth = 128, + parameter int DataBitsPerMask = 1, // Number of data bits per bit of write mask + localparam int Aw = $clog2(Depth) // derived parameter +) ( + input logic clk_i, + input logic rst_ni, + + input logic req_i, + input logic write_i, + input logic [Aw-1:0] addr_i, + input logic [Width-1:0] wdata_i, + input logic [Width-1:0] wmask_i, + output logic rvalid_o, + output logic [Width-1:0] rdata_o +); + + // Width of internal write mask. Note wmask_i input into the module is always assumed + // to be the full bit mask + localparam int MaskWidth = Width / DataBitsPerMask; + + logic [Width-1:0] mem [Depth]; + logic [MaskWidth-1:0] wmask; + + always_comb begin + for (int i=0; i < MaskWidth; i = i + 1) begin : create_wmask + wmask[i] = &wmask_i[i*DataBitsPerMask +: DataBitsPerMask]; + end + end + + // using always instead of always_ff to avoid 'ICPD - illegal combination of drivers' error + // thrown when using $readmemh system task to backdoor load an image + always @(posedge clk_i) begin + if (req_i) begin + if (write_i) begin + for (int i=0; i < MaskWidth; i = i + 1) begin + if (wmask[i]) begin + mem[addr_i][i*DataBitsPerMask +: DataBitsPerMask] <= + wdata_i[i*DataBitsPerMask +: DataBitsPerMask]; + end + end + end else begin + rdata_o <= mem[addr_i]; + end + end + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + rvalid_o <= '0; + end else begin + rvalid_o <= req_i & ~write_i; + end + end + + `ifdef VERILATOR + // Task for loading 'mem' with SystemVerilog system task $readmemh() + export "DPI-C" task simutil_verilator_memload; + + task simutil_verilator_memload; + input string file; + $readmemh(file, mem); + endtask + + // TODO: Allow 'val' to have other widths than 32 bit + // Note that the DPI export and function definition must both be in the same generate + // context to get the correct name. + if (Width == 32) begin : gen_32bit + // Function for setting a specific 32 bit element in |mem| + // Returns 1 (true) for success, 0 (false) for errors. + export "DPI-C" function simutil_verilator_set_mem; + + function int simutil_verilator_set_mem(input int index, + input logic[31:0] val); + if (index >= Depth) begin + return 0; + end + + mem[index] = val; + return 1; + endfunction + end else begin : gen_other + // Function doesn't work for Width != 32 so just return 0 + export "DPI-C" function simutil_verilator_set_mem; + + function int simutil_verilator_set_mem(input int index, + input logic[31:0] val); + return 0; + endfunction + end + `endif + + `ifdef SRAM_INIT_FILE + localparam MEM_FILE = `PRIM_STRINGIFY(`SRAM_INIT_FILE); + initial begin + $display("Initializing SRAM from %s", MEM_FILE); + $readmemh(MEM_FILE, mem); + end + `endif +endmodule diff --git a/shared/sim_shared.core b/shared/sim_shared.core index 02ff4d4d..1c6a6516 100644 --- a/shared/sim_shared.core +++ b/shared/sim_shared.core @@ -10,6 +10,7 @@ filesets: - lowrisc:prim:assert files: - ./rtl/prim_clock_gating.sv + - ./rtl/prim_generic_ram_1p.sv - ./rtl/ram_1p.sv - ./rtl/ram_2p.sv - ./rtl/bus.sv