diff --git a/include/std_cache_pkg.sv b/include/std_cache_pkg.sv index 363d692c0..f6280b62a 100644 --- a/include/std_cache_pkg.sv +++ b/include/std_cache_pkg.sv @@ -56,10 +56,9 @@ package std_cache_pkg; // cache line byte enable typedef struct packed { - logic [(ariane_pkg::DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array - logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array - logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dirty; // byte enable into state array - logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] valid; // byte enable into state array + logic [(ariane_pkg::DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array + logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array + logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits) } cl_be_t; // convert one hot to bin for -> needed for cache replacement diff --git a/src/cache_subsystem/cache_ctrl.sv b/src/cache_subsystem/cache_ctrl.sv index 594b6ed1c..75ccd38e6 100644 --- a/src/cache_subsystem/cache_ctrl.sv +++ b/src/cache_subsystem/cache_ctrl.sv @@ -283,9 +283,8 @@ module cache_ctrl #( addr_o = mem_req_q.index; we_o = 1'b1; - be_o.dirty = hit_way_q; - be_o.valid = hit_way_q; - + be_o.vldrty = hit_way_q; + // set the correct byte enable be_o.data[cl_offset>>3 +: 8] = mem_req_q.be; data_o.data[cl_offset +: 64] = mem_req_q.wdata; diff --git a/src/cache_subsystem/miss_handler.sv b/src/cache_subsystem/miss_handler.sv index d583b965e..f4c3b1ff4 100644 --- a/src/cache_subsystem/miss_handler.sv +++ b/src/cache_subsystem/miss_handler.sv @@ -233,8 +233,7 @@ module miss_handler #( req_o = evict_way_q; we_o = 1'b1; be_o = '1; - be_o.valid = evict_way_q; - be_o.dirty = evict_way_q; + be_o.vldrty = evict_way_q; data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]; data_o.data = data_miss_fsm; data_o.valid = 1'b1; @@ -277,8 +276,7 @@ module miss_handler #( req_o = 1'b1; we_o = 1'b1; // invalidate - be_o.valid = evict_way_q; - be_o.dirty = evict_way_q; + be_o.vldrty = evict_way_q; // go back to handling the miss or flushing, depending on where we came from state_d = (state_q == WB_CACHELINE_MISS) ? MISS : FLUSH_REQ_STATUS; end @@ -305,12 +303,12 @@ module miss_handler #( // not dirty ~> increment and continue end else begin // increment and re-request - cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); - state_d = FLUSH_REQ_STATUS; - addr_o = cnt_q; - req_o = 1'b1; - be_o.valid = '1; - we_o = 1'b1; + cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); + state_d = FLUSH_REQ_STATUS; + addr_o = cnt_q; + req_o = 1'b1; + be_o.vldrty = '1; + we_o = 1'b1; // finished with flushing operation, go back to idle if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1) begin flush_ack_o = 1'b1; @@ -326,9 +324,8 @@ module miss_handler #( req_o = 1'b1; we_o = 1'b1; // only write the dirty array - be_o.dirty = '1; - be_o.valid = '1; - cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); + be_o.vldrty = '1; + cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); // finished initialization if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1) state_d = IDLE; diff --git a/src/cache_subsystem/std_nbdcache.sv b/src/cache_subsystem/std_nbdcache.sv index ce1d0730d..6d89c7d68 100644 --- a/src/cache_subsystem/std_nbdcache.sv +++ b/src/cache_subsystem/std_nbdcache.sv @@ -182,7 +182,7 @@ module std_nbdcache #( .we_i ( we_ram ), .addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ), .wdata_i ( wdata_ram.tag ), - .be_i ( be_ram.tag ), + .be_i ( be_ram.tag ), .rdata_o ( rdata_ram[i].tag ), .* ); @@ -192,26 +192,30 @@ module std_nbdcache #( // ---------------- // Valid/Dirty Regs // ---------------- - logic [DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata; + + // align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals. + // note: if you have an SRAM that supports flat bit enables for your target technology, + // you can use it here to save the extra 4x overhead introduced by this workaround. + logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata; for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin - assign dirty_wdata[i] = wdata_ram.dirty; - assign dirty_wdata[DCACHE_SET_ASSOC + i] = wdata_ram.valid; - assign rdata_ram[i].valid = dirty_rdata[DCACHE_SET_ASSOC + i]; - assign rdata_ram[i].dirty = dirty_rdata[i]; + assign dirty_wdata[8*i] = wdata_ram.dirty; + assign dirty_wdata[8*i+1] = wdata_ram.valid; + assign rdata_ram[i].dirty = dirty_rdata[8*i]; + assign rdata_ram[i].valid = dirty_rdata[8*i+1]; end - vdregs #( - .DATA_WIDTH ( DCACHE_DIRTY_WIDTH ), - .DATA_DEPTH ( DCACHE_NUM_WORDS ) - ) i_vdregs ( + sram #( + .DATA_WIDTH ( 4*DCACHE_DIRTY_WIDTH ), + .NUM_WORDS ( DCACHE_NUM_WORDS ) + ) valid_dirty_sram ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .req_i ( |req_ram ), .we_i ( we_ram ), .addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ), .wdata_i ( dirty_wdata ), - .biten_i ( {be_ram.valid, be_ram.dirty} ), + .be_i ( be_ram.vldrty ), .rdata_o ( dirty_rdata ) ); diff --git a/src/cache_subsystem/vdregs.sv b/src/cache_subsystem/vdregs.sv deleted file mode 100644 index 60ba7fc3b..000000000 --- a/src/cache_subsystem/vdregs.sv +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// -// Author: Florian Zaruba , ETH Zurich -// Michael Schaffner , ETH Zurich -// Date: 15.08.2018 -// Description: valid/dirty regfile for caches -// - -module vdregs #( - parameter DATA_WIDTH = 64, - parameter DATA_DEPTH = 1024 -)( - input logic clk_i, - input logic rst_ni, - input logic req_i, - input logic we_i, - input logic [$clog2(DATA_DEPTH)-1:0] addr_i, - input logic [DATA_WIDTH-1:0] wdata_i, - input logic [DATA_WIDTH-1:0] biten_i, // bit enable - output logic [DATA_WIDTH-1:0] rdata_o -); - localparam ADDR_WIDTH = $clog2(DATA_DEPTH); - logic [DATA_WIDTH-1:0] regs_q [DATA_DEPTH-1:0]; - - always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin - regs_q <= '{default:0}; - end else if (req_i) begin - if (we_i) begin - for (int i = 0; i < DATA_WIDTH; i++) - if (biten_i[i]) regs_q[addr_i][i] <= wdata_i[i]; - end - rdata_o <= regs_q[addr_i]; - end - end - -endmodule : vdregs