mirror of
https://github.com/openhwgroup/cva5.git
synced 2025-04-23 21:47:15 -04:00
Add snoopy dcache
This commit is contained in:
parent
cc96545edf
commit
8b17eadd8c
5 changed files with 716 additions and 14 deletions
119
core/common_components/ram/tdp_ram.sv
Normal file
119
core/common_components/ram/tdp_ram.sv
Normal file
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Copyright © 2024 Chris Keilbart, Lesley Shannon
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module tdp_ram
|
||||
|
||||
#(
|
||||
parameter ADDR_WIDTH = 10,
|
||||
parameter NUM_COL = 4, //Number of independently writeable components
|
||||
parameter COL_WIDTH = 16, //Width the "byte" enable controls
|
||||
parameter PIPELINE_DEPTH = 1, //Depth of the output pipeline, is latency in clock cycles
|
||||
parameter CASCADE_DEPTH = 4 //Maximum depth of the memory block cascade
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
//Port A
|
||||
input logic a_en,
|
||||
input logic[NUM_COL-1:0] a_wbe,
|
||||
input logic[COL_WIDTH*NUM_COL-1:0] a_wdata,
|
||||
input logic[ADDR_WIDTH-1:0] a_addr,
|
||||
output logic[COL_WIDTH*NUM_COL-1:0] a_rdata,
|
||||
|
||||
//Port B
|
||||
input logic b_en,
|
||||
input logic[NUM_COL-1:0] b_wbe,
|
||||
input logic[COL_WIDTH*NUM_COL-1:0] b_wdata,
|
||||
input logic[ADDR_WIDTH-1:0] b_addr,
|
||||
output logic[COL_WIDTH*NUM_COL-1:0] b_rdata
|
||||
);
|
||||
|
||||
localparam DATA_WIDTH = COL_WIDTH*NUM_COL;
|
||||
|
||||
(* cascade_height = CASCADE_DEPTH, ramstyle = "no_rw_check" *) //Higher depths use less resources but are slower
|
||||
logic[DATA_WIDTH-1:0] mem[(1<<ADDR_WIDTH)-1:0];
|
||||
|
||||
initial mem = '{default: '0};
|
||||
|
||||
//A read/write
|
||||
logic[DATA_WIDTH-1:0] a_ram_output;
|
||||
always_ff @(posedge clk) begin
|
||||
if (a_en) begin
|
||||
for (int i = 0; i < NUM_COL; i++) begin
|
||||
if (a_wbe[i])
|
||||
mem[a_addr][i*COL_WIDTH +: COL_WIDTH] <= a_wdata[i*COL_WIDTH +: COL_WIDTH];
|
||||
end
|
||||
if (~|a_wbe)
|
||||
a_ram_output <= mem[a_addr];
|
||||
end
|
||||
end
|
||||
|
||||
//A pipeline
|
||||
generate if (PIPELINE_DEPTH > 0) begin : gen_a_pipeline
|
||||
logic[DATA_WIDTH-1:0] a_data_pipeline[PIPELINE_DEPTH-1:0];
|
||||
logic[PIPELINE_DEPTH-1:0] a_en_pipeline;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
for (int i = 0; i < PIPELINE_DEPTH; i++) begin
|
||||
a_en_pipeline[i] <= i == 0 ? a_en : a_en_pipeline[i-1];
|
||||
if (a_en_pipeline[i])
|
||||
a_data_pipeline[i] <= i == 0 ? a_ram_output : a_data_pipeline[i-1];
|
||||
end
|
||||
end
|
||||
assign a_rdata = a_data_pipeline[PIPELINE_DEPTH-1];
|
||||
end
|
||||
else begin : gen_a_transparent_output
|
||||
assign a_rdata = a_ram_output;
|
||||
end endgenerate
|
||||
|
||||
|
||||
//B read/write
|
||||
logic[DATA_WIDTH-1:0] b_ram_output;
|
||||
always_ff @(posedge clk) begin
|
||||
if (b_en) begin
|
||||
for (int i = 0; i < NUM_COL; i++) begin
|
||||
if (b_wbe[i])
|
||||
mem[b_addr][i*COL_WIDTH +: COL_WIDTH] <= b_wdata[i*COL_WIDTH +: COL_WIDTH];
|
||||
end
|
||||
if (~|b_wbe)
|
||||
b_ram_output <= mem[b_addr];
|
||||
end
|
||||
end
|
||||
|
||||
//B pipeline
|
||||
generate if (PIPELINE_DEPTH > 0) begin : gen_b_pipeline
|
||||
logic[DATA_WIDTH-1:0] b_data_pipeline[PIPELINE_DEPTH-1:0];
|
||||
logic[PIPELINE_DEPTH-1:0] b_en_pipeline;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
for (int i = 0; i < PIPELINE_DEPTH; i++) begin
|
||||
b_en_pipeline[i] <= i == 0 ? b_en : b_en_pipeline[i-1];
|
||||
if (b_en_pipeline[i])
|
||||
b_data_pipeline[i] <= i == 0 ? b_ram_output : b_data_pipeline[i-1];
|
||||
end
|
||||
end
|
||||
assign b_rdata = b_data_pipeline[PIPELINE_DEPTH-1];
|
||||
end
|
||||
else begin : gen_b_transparent_output
|
||||
assign b_rdata = b_ram_output;
|
||||
end endgenerate
|
||||
|
||||
endmodule
|
566
core/execution_units/load_store_unit/dcache_inv.sv
Normal file
566
core/execution_units/load_store_unit/dcache_inv.sv
Normal file
|
@ -0,0 +1,566 @@
|
|||
/*
|
||||
* Copyright © 2024 Chris Keilbart
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Initial code developed under the supervision of Dr. Lesley Shannon,
|
||||
* Reconfigurable Computing Lab, Simon Fraser University.
|
||||
*
|
||||
* Author(s):
|
||||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module dcache_inv
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
||||
import cva5_types::*;
|
||||
|
||||
# (
|
||||
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
|
||||
)
|
||||
(
|
||||
input logic clk,
|
||||
input logic rst,
|
||||
mem_interface.rw_master mem,
|
||||
output logic write_outstanding,
|
||||
input logic amo,
|
||||
input amo_t amo_type,
|
||||
amo_interface.subunit amo_unit,
|
||||
input logic cbo,
|
||||
input logic uncacheable,
|
||||
memory_sub_unit_interface.responder ls,
|
||||
input logic load_peek, //If the next request may be a load
|
||||
input logic[31:0] load_addr_peek //The address in that case
|
||||
);
|
||||
|
||||
localparam derived_cache_config_t SCONFIG = get_derived_cache_params(CONFIG, CONFIG.DCACHE, CONFIG.DCACHE_ADDR);
|
||||
localparam DB_ADDR_LEN = SCONFIG.LINE_ADDR_W + SCONFIG.SUB_LINE_ADDR_W;
|
||||
|
||||
cache_functions_interface # (.TAG_W(SCONFIG.TAG_W), .LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils ();
|
||||
|
||||
typedef logic[SCONFIG.TAG_W-1:0] tag_t;
|
||||
typedef logic[SCONFIG.LINE_ADDR_W-1:0] line_t;
|
||||
typedef logic[SCONFIG.SUB_LINE_ADDR_W-1:0] block_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
tag_t tag;
|
||||
} tb_entry_t;
|
||||
|
||||
typedef enum {
|
||||
WRITE,
|
||||
CBO,
|
||||
READ,
|
||||
AMO_LR,
|
||||
AMO_SC,
|
||||
AMO_RMW
|
||||
} req_type_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic[31:0] addr;
|
||||
logic[31:0] wdata;
|
||||
logic[3:0] be;
|
||||
amo_t amo_type;
|
||||
logic uncacheable;
|
||||
} req_t;
|
||||
req_t stage0;
|
||||
req_t stage1;
|
||||
logic stage1_valid;
|
||||
logic stage1_done;
|
||||
logic stage0_advance_r;
|
||||
|
||||
logic resetting;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
stage0_advance_r <= 0;
|
||||
stage1_valid <= 0;
|
||||
stage1_type <= WRITE;
|
||||
end
|
||||
else begin
|
||||
stage0_advance_r <= ls.new_request;
|
||||
if (ls.new_request) begin
|
||||
stage1_valid <= 1;
|
||||
stage1_type <= stage0_type;
|
||||
end
|
||||
else if (stage1_done)
|
||||
stage1_valid <= 0;
|
||||
end
|
||||
if (ls.new_request)
|
||||
stage1 <= stage0;
|
||||
end
|
||||
|
||||
req_type_t stage0_type;
|
||||
req_type_t stage1_type;
|
||||
always_comb begin
|
||||
if (cbo)
|
||||
stage0_type = CBO;
|
||||
else if (ls.we)
|
||||
stage0_type = WRITE;
|
||||
else if (amo & amo_type == AMO_LR_FN5)
|
||||
stage0_type = AMO_LR;
|
||||
else if (amo & amo_type == AMO_SC_FN5)
|
||||
stage0_type = AMO_SC;
|
||||
else if (amo)
|
||||
stage0_type = AMO_RMW;
|
||||
else
|
||||
stage0_type = READ;
|
||||
end
|
||||
|
||||
assign stage0 = '{
|
||||
addr : ls.addr,
|
||||
wdata : ls.data_in,
|
||||
be : ls.be,
|
||||
amo_type : amo_type,
|
||||
uncacheable : uncacheable
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Snooping
|
||||
//Invalidate a line in the tagbank upon a hit
|
||||
line_t snoop_line;
|
||||
tag_t snoop_tag;
|
||||
logic snoop_valid;
|
||||
tb_entry_t[CONFIG.DCACHE.WAYS-1:0] snoop_rdata;
|
||||
line_t snoop_line_r;
|
||||
tag_t snoop_tag_r;
|
||||
logic[CONFIG.DCACHE.WAYS-1:0] snoop_hit;
|
||||
logic snoop_write;
|
||||
|
||||
//Technically snoop addresses do not need to lie within our addressable space, so their tag should be wider
|
||||
//But this is a niche scenario and there is no harm in aliasing requests into our address space (beyond performance)
|
||||
|
||||
assign {snoop_tag, snoop_line} = mem.inv_addr[2+SCONFIG.SUB_LINE_ADDR_W+:SCONFIG.TAG_W+SCONFIG.LINE_ADDR_W];
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
snoop_valid <= 0;
|
||||
else
|
||||
snoop_valid <= mem.inv;
|
||||
snoop_line_r <= snoop_line;
|
||||
snoop_tag_r <= snoop_tag;
|
||||
end
|
||||
|
||||
//Hit detection
|
||||
assign snoop_write = snoop_valid & |snoop_hit;
|
||||
always_comb begin
|
||||
for (int i = 0; i < CONFIG.DCACHE.WAYS; i++)
|
||||
snoop_hit[i] = {snoop_rdata[i].valid, snoop_rdata[i].tag} == {1'b1, snoop_tag_r};
|
||||
end
|
||||
|
||||
//Random replacement policy (cycler)
|
||||
logic[CONFIG.DCACHE.WAYS-1:0] replacement_way;
|
||||
cycler #(.C_WIDTH(CONFIG.DCACHE.WAYS)) replacement_policy (
|
||||
.en(ls.new_request),
|
||||
.one_hot(replacement_way),
|
||||
.*);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Tagbank
|
||||
//Snoops are always accepted and cannot be delayed
|
||||
//Port A therefore handles all requests and snoop writes
|
||||
//Port B handles snoop reads + resets
|
||||
logic a_en;
|
||||
logic[CONFIG.DCACHE.WAYS-1:0] a_wbe;
|
||||
tb_entry_t a_wdata;
|
||||
line_t a_addr;
|
||||
tb_entry_t[CONFIG.DCACHE.WAYS-1:0] a_rdata;
|
||||
logic stage1_tb_write;
|
||||
logic stage1_tb_wval;
|
||||
logic stage1_tb_write_r;
|
||||
logic stage1_tb_wval_r;
|
||||
logic[CONFIG.DCACHE.WAYS-1:0] hit_ohot_r;
|
||||
|
||||
assign a_en = snoop_write | stage1_tb_write_r | ls.new_request;
|
||||
assign a_wbe = ({CONFIG.DCACHE.WAYS{snoop_write}} & snoop_hit) | ({CONFIG.DCACHE.WAYS{stage1_tb_write_r}} | (stage1_type == CBO ? hit_ohot_r : replacement_way));
|
||||
|
||||
always_comb begin
|
||||
if (snoop_write)
|
||||
a_addr = snoop_line_r;
|
||||
else if (stage1_tb_write_r)
|
||||
a_addr = stage1.addr[2+SCONFIG.SUB_LINE_ADDR_W+:SCONFIG.LINE_ADDR_W];
|
||||
else
|
||||
a_addr = stage0.addr[2+SCONFIG.SUB_LINE_ADDR_W+:SCONFIG.LINE_ADDR_W];
|
||||
end
|
||||
|
||||
assign a_wdata = '{
|
||||
valid : stage1_tb_write_r & stage1_tb_wval_r,
|
||||
tag : stage1.addr[2+SCONFIG.SUB_LINE_ADDR_W+SCONFIG.LINE_ADDR_W+:SCONFIG.TAG_W]
|
||||
};
|
||||
|
||||
//Reset routine
|
||||
logic b_en;
|
||||
logic[CONFIG.DCACHE.WAYS-1:0] b_wbe;
|
||||
tb_entry_t b_wdata;
|
||||
line_t b_addr;
|
||||
logic rst_invalid;
|
||||
line_t rst_line;
|
||||
assign resetting = ~rst_invalid;
|
||||
|
||||
assign b_en = mem.inv | resetting;
|
||||
assign b_wbe = {CONFIG.DCACHE.WAYS{resetting}};
|
||||
assign b_wdata = '{default: '0};
|
||||
assign b_addr = resetting ? rst_line : snoop_line;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst) begin
|
||||
rst_invalid <= 0;
|
||||
rst_line <= '0;
|
||||
end
|
||||
else if (resetting)
|
||||
{rst_invalid, rst_line} <= rst_line + 1;
|
||||
end
|
||||
|
||||
tdp_ram #(
|
||||
.ADDR_WIDTH(SCONFIG.LINE_ADDR_W),
|
||||
.NUM_COL(CONFIG.DCACHE.WAYS),
|
||||
.COL_WIDTH($bits(tb_entry_t)),
|
||||
.PIPELINE_DEPTH(0)
|
||||
) tagbank (
|
||||
.a_en(a_en),
|
||||
.a_wbe(a_wbe),
|
||||
.a_wdata({CONFIG.DCACHE.WAYS{a_wdata}}),
|
||||
.a_addr(a_addr),
|
||||
.a_rdata(a_rdata),
|
||||
.b_en(b_en),
|
||||
.b_wbe(b_wbe),
|
||||
.b_wdata({CONFIG.DCACHE.WAYS{b_wdata}}),
|
||||
.b_addr(b_addr),
|
||||
.b_rdata(snoop_rdata),
|
||||
.*);
|
||||
|
||||
//Hit detection
|
||||
logic hit;
|
||||
logic hit_r;
|
||||
logic[CONFIG.DCACHE.WAYS-1:0] hit_ohot;
|
||||
|
||||
always_comb begin
|
||||
hit_ohot = '0;
|
||||
for (int i = 0; i < CONFIG.DCACHE.WAYS; i++)
|
||||
hit_ohot[i] = a_rdata[i].valid & (a_rdata[i].tag == stage1.addr[2+SCONFIG.SUB_LINE_ADDR_W+SCONFIG.LINE_ADDR_W+:SCONFIG.TAG_W]);
|
||||
end
|
||||
assign hit = |hit_ohot;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (stage0_advance_r) begin
|
||||
hit_r <= hit;
|
||||
hit_ohot_r <= hit_ohot;
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Atomic read/modify/write state machine
|
||||
//Separate from other logic because atomic requests will need to be retried on a snoop invalidation
|
||||
typedef enum {
|
||||
RMW_IDLE,
|
||||
RMW_READ,
|
||||
RMW_WRITE,
|
||||
RMW_FILLING
|
||||
} rmw_state_t;
|
||||
rmw_state_t current_state;
|
||||
rmw_state_t next_state;
|
||||
|
||||
logic rmw_mem_request;
|
||||
logic rmw_mem_rnw;
|
||||
logic rmw_stage1_tb_write;
|
||||
logic rmw_db_wen;
|
||||
logic[31:0] rmw_db_wdata;
|
||||
logic rmw_ls_data_valid;
|
||||
logic rmw_stage1_done;
|
||||
logic rmw_retry;
|
||||
logic force_miss;
|
||||
logic return_done;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst)
|
||||
current_state <= RMW_IDLE;
|
||||
else
|
||||
current_state <= next_state;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
unique case (current_state)
|
||||
RMW_READ : begin
|
||||
rmw_mem_request = 1;
|
||||
rmw_mem_rnw = 1;
|
||||
rmw_stage1_tb_write = mem.ack & ~stage1.uncacheable;
|
||||
rmw_db_wen = 0;
|
||||
rmw_db_wdata = 'x;
|
||||
rmw_ls_data_valid = 0;
|
||||
rmw_stage1_done = 0;
|
||||
next_state = mem.ack ? RMW_FILLING : RMW_READ;
|
||||
end
|
||||
RMW_WRITE : begin
|
||||
rmw_mem_request = ~rmw_retry;
|
||||
rmw_mem_rnw = 0;
|
||||
rmw_stage1_tb_write = 0;
|
||||
rmw_db_wen = ~stage1.uncacheable & mem.ack;
|
||||
rmw_db_wdata = amo_unit.rd;
|
||||
rmw_ls_data_valid = mem.ack;
|
||||
rmw_stage1_done = mem.ack;
|
||||
if (mem.ack)
|
||||
next_state = RMW_IDLE;
|
||||
else if (rmw_retry)
|
||||
next_state = RMW_READ;
|
||||
else
|
||||
next_state = RMW_WRITE;
|
||||
end
|
||||
RMW_FILLING : begin
|
||||
rmw_mem_request = 0;
|
||||
rmw_mem_rnw = 'x;
|
||||
rmw_stage1_tb_write = 0;
|
||||
rmw_db_wen = mem.rvalid & ~stage1.uncacheable;
|
||||
rmw_db_wdata = mem.rdata;
|
||||
rmw_ls_data_valid = 0;
|
||||
rmw_stage1_done = 0;
|
||||
if (return_done)
|
||||
next_state = rmw_retry ? RMW_READ : RMW_WRITE;
|
||||
else
|
||||
next_state = RMW_FILLING;
|
||||
end
|
||||
RMW_IDLE : begin
|
||||
rmw_mem_request = 0;
|
||||
rmw_mem_rnw = 'x;
|
||||
rmw_stage1_tb_write = 0;
|
||||
rmw_db_wen = 0;
|
||||
rmw_db_wdata = 'x;
|
||||
rmw_ls_data_valid = 0;
|
||||
rmw_stage1_done = 0;
|
||||
if (stage1_valid & stage1_type == AMO_RMW)
|
||||
next_state = hit & ~force_miss & ~stage1.uncacheable ? RMW_WRITE : RMW_READ;
|
||||
else
|
||||
next_state = RMW_IDLE;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Supporting logic
|
||||
//Various piece of additional stateful logic supporting stage one requests
|
||||
|
||||
//Tagbank write logic; always on ack_r because it is guaranteed that there won't be a conflicting snoop tb write
|
||||
logic ack_r;
|
||||
always_ff @(posedge clk) begin
|
||||
ack_r <= mem.ack;
|
||||
stage1_tb_write_r <= stage1_tb_write;
|
||||
stage1_tb_wval_r <= stage1_tb_wval;
|
||||
end
|
||||
|
||||
//Track if a request has been sent in stage 1 to prevent duplicates
|
||||
logic request_sent;
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst | stage1_done)
|
||||
request_sent <= 0;
|
||||
else if (mem.ack)
|
||||
request_sent <= 1;
|
||||
end
|
||||
|
||||
//Atomics that collide with a snoop on stage0 must be treated as a miss
|
||||
always_ff @(posedge clk) begin
|
||||
if (ls.new_request)
|
||||
force_miss <= amo & mem.inv & mem.inv_addr[31:2+SCONFIG.SUB_LINE_ADDR_W] == stage0.addr[31:2+SCONFIG.SUB_LINE_ADDR_W];
|
||||
end
|
||||
|
||||
//RMW requests must be retried if invalidated after the read but before the write
|
||||
logic inv_matches_stage1;
|
||||
assign inv_matches_stage1 = mem.inv & stage1.addr[31:2+SCONFIG.SUB_LINE_ADDR_W] == mem.inv_addr[31:2+SCONFIG.SUB_LINE_ADDR_W];
|
||||
always_ff @(posedge clk) begin
|
||||
case (current_state)
|
||||
RMW_IDLE, RMW_FILLING, RMW_WRITE : rmw_retry <= stage1_valid & (rmw_retry | inv_matches_stage1);
|
||||
default: rmw_retry <= 0;
|
||||
endcase
|
||||
end
|
||||
|
||||
//Fill burst word counting
|
||||
logic correct_word;
|
||||
block_t word_counter;
|
||||
assign return_done = mem.rvalid & (stage1.uncacheable | word_counter == SCONFIG.SUB_LINE_ADDR_W'(CONFIG.DCACHE.LINE_W-1));
|
||||
assign correct_word = mem.rvalid & (stage1.uncacheable | word_counter == stage1.addr[2+:SCONFIG.SUB_LINE_ADDR_W]);
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst | stage1_done)
|
||||
word_counter <= '0;
|
||||
else
|
||||
word_counter <= word_counter + block_t'(mem.rvalid);
|
||||
end
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Stage 1 request handling
|
||||
//Heavily dependent on request type
|
||||
logic db_wen;
|
||||
logic[CONFIG.DCACHE.WAYS-1:0] db_way;
|
||||
logic[31:0] db_wdata;
|
||||
logic lr_valid;
|
||||
|
||||
always_comb begin
|
||||
unique case (stage1_type)
|
||||
WRITE : begin
|
||||
mem.request = stage1_valid;
|
||||
mem.wdata = stage1.wdata;
|
||||
mem.rnw = 0;
|
||||
stage1_tb_write = 0;
|
||||
stage1_tb_wval = 'x;
|
||||
db_wen = stage0_advance_r & hit & ~stage1.uncacheable;
|
||||
db_wdata = stage1.wdata;
|
||||
db_way = hit_ohot;
|
||||
ls.data_valid = 0;
|
||||
ls.data_out = 'x;
|
||||
stage1_done = mem.ack;
|
||||
end
|
||||
CBO : begin
|
||||
mem.request = stage1_valid & ~request_sent;
|
||||
mem.wdata = 'x;
|
||||
mem.rnw = 0;
|
||||
stage1_tb_write = ~stage1.uncacheable & mem.ack & (stage0_advance_r ? hit : hit_r);
|
||||
stage1_tb_wval = 0;
|
||||
db_wen = 0;
|
||||
db_wdata = 'x;
|
||||
db_way = 'x;
|
||||
ls.data_valid = 0;
|
||||
ls.data_out = 'x;
|
||||
stage1_done = request_sent & ~stage1_tb_write_r;
|
||||
end
|
||||
AMO_LR, READ : begin
|
||||
mem.request = stage1_valid & ~stage0_advance_r & (stage1.uncacheable | ~hit_r) & ~request_sent;
|
||||
mem.wdata = 'x;
|
||||
mem.rnw = 1;
|
||||
stage1_tb_write = ~stage1.uncacheable & mem.ack;
|
||||
stage1_tb_wval = 1;
|
||||
db_wen = mem.rvalid & ~stage1.uncacheable;
|
||||
db_wdata = mem.rdata;
|
||||
db_way = replacement_way;
|
||||
ls.data_valid = stage0_advance_r ? hit & ~stage1.uncacheable : correct_word;
|
||||
ls.data_out = stage0_advance_r ? db_hit_entry : mem.rdata;
|
||||
stage1_done = stage0_advance_r ? hit & ~stage1.uncacheable : return_done;
|
||||
end
|
||||
AMO_SC : begin
|
||||
mem.request = stage1_valid & lr_valid;
|
||||
mem.wdata = stage1.wdata;
|
||||
mem.rnw = 0;
|
||||
stage1_tb_write = 0;
|
||||
stage1_tb_wval = 'x;
|
||||
db_wen = mem.ack;
|
||||
db_wdata = stage1.wdata;
|
||||
db_way = stage0_advance_r ? hit_ohot : hit_ohot_r;
|
||||
ls.data_valid = stage1_valid & (mem.ack | ~lr_valid);
|
||||
ls.data_out = {31'b0, ~lr_valid};
|
||||
stage1_done = stage1_valid & (mem.ack | ~lr_valid);
|
||||
end
|
||||
AMO_RMW : begin
|
||||
mem.request = rmw_mem_request;
|
||||
mem.wdata = amo_unit.rd;
|
||||
mem.rnw = rmw_mem_rnw;
|
||||
stage1_tb_write = rmw_stage1_tb_write;
|
||||
stage1_tb_wval = 1;
|
||||
db_wen = rmw_db_wen;
|
||||
db_wdata = rmw_db_wdata;
|
||||
db_way = hit_r ? hit_ohot_r : replacement_way; //Will not write on first cycle so can use registered
|
||||
ls.data_valid = rmw_ls_data_valid;
|
||||
ls.data_out = amo_unit.rs1;
|
||||
stage1_done = rmw_stage1_done;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign mem.addr = stage1.addr[31:2];
|
||||
assign mem.wbe = stage1.be;
|
||||
assign mem.rlen = stage1.uncacheable ? '0 : 5'(CONFIG.DCACHE.LINE_W-1);
|
||||
|
||||
assign ls.ready = ~resetting & ~snoop_write & (~stage1_valid | stage1_done) & ~(db_wen & load_peek & load_addr_peek[31:2] == stage1.addr[31:2]);
|
||||
assign write_outstanding = (stage1_valid & ~(stage1_type inside {READ, AMO_LR})) | mem.write_outstanding;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Atomics
|
||||
logic local_reservation_valid;
|
||||
//local_reservation_valid is with respect to invalidations, the amo.reservation_valid is for other ports
|
||||
assign lr_valid = amo_unit.reservation_valid & local_reservation_valid;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (rst | inv_matches_stage1)
|
||||
local_reservation_valid <= 0;
|
||||
else if (amo_unit.set_reservation)
|
||||
local_reservation_valid <= 1;
|
||||
end
|
||||
|
||||
assign amo_unit.reservation = stage1.addr;
|
||||
//On a miss, set on ack_r
|
||||
//On a hit, set as long as ~force_miss & ~inv_matches_stage1
|
||||
assign amo_unit.set_reservation = stage1_valid & stage1_type == AMO_LR & (stage0_advance_r & hit & ~stage1.uncacheable & ~force_miss & ~inv_matches_stage1 | ack_r);
|
||||
assign amo_unit.clear_reservation = stage1_done & stage1_type != AMO_LR;
|
||||
|
||||
//RMW
|
||||
assign amo_unit.rs2 = stage1.wdata;
|
||||
assign amo_unit.rmw_valid = stage1_valid & stage1_type == AMO_RMW;
|
||||
assign amo_unit.op = stage1.amo_type;
|
||||
always_ff @(posedge clk) begin
|
||||
if (stage0_advance_r)
|
||||
amo_unit.rs1 <= db_hit_entry;
|
||||
else if (correct_word)
|
||||
amo_unit.rs1 <= mem.rdata;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Databank
|
||||
logic[CONFIG.DCACHE.WAYS-1:0][31:0] db_entries;
|
||||
logic[31:0] db_hit_entry;
|
||||
logic[CONFIG.DCACHE.WAYS-1:0][3:0] db_wbe_full;
|
||||
logic[DB_ADDR_LEN-1:0] db_addr;
|
||||
|
||||
always_comb begin
|
||||
for (int i = 0; i < CONFIG.DCACHE.WAYS; i++)
|
||||
db_wbe_full[i] = {4{db_way[i]}} & stage1.be;
|
||||
end
|
||||
|
||||
assign db_addr[SCONFIG.SUB_LINE_ADDR_W+:SCONFIG.LINE_ADDR_W] = stage1.addr[2+SCONFIG.SUB_LINE_ADDR_W+:SCONFIG.LINE_ADDR_W];
|
||||
assign db_addr[SCONFIG.SUB_LINE_ADDR_W-1:0] = mem.rvalid ? word_counter : stage1.addr[2+:SCONFIG.SUB_LINE_ADDR_W];
|
||||
|
||||
sdp_ram #(
|
||||
.ADDR_WIDTH(DB_ADDR_LEN),
|
||||
.NUM_COL(4*CONFIG.DCACHE.WAYS),
|
||||
.COL_WIDTH(8),
|
||||
.PIPELINE_DEPTH(0)
|
||||
) databank (
|
||||
.a_en(db_wen),
|
||||
.a_wbe(db_wbe_full),
|
||||
.a_wdata({CONFIG.DCACHE.WAYS{db_wdata}}),
|
||||
.a_addr(db_addr),
|
||||
.b_en(ls.new_request),
|
||||
.b_addr(addr_utils.getDataLineAddr(stage0.addr)),
|
||||
.b_rdata(db_entries),
|
||||
.*);
|
||||
|
||||
one_hot_mux #(
|
||||
.OPTIONS(CONFIG.DCACHE.WAYS),
|
||||
.DATA_TYPE(logic[31:0])
|
||||
) db_mux (
|
||||
.clk(clk),
|
||||
.rst(1'b1), //Disable the assertion
|
||||
.one_hot(hit_ohot),
|
||||
.choices(db_entries),
|
||||
.sel(db_hit_entry)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Assertions
|
||||
dcache_request_when_not_ready_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) ls.new_request |-> ls.ready)
|
||||
else $error("dcache received request when not ready");
|
||||
|
||||
dache_suprious_l1_ack_assertion:
|
||||
assert property (@(posedge clk) disable iff (rst) mem.ack |-> mem.request)
|
||||
else $error("dcache received ack without a request");
|
||||
|
||||
endmodule
|
|
@ -20,7 +20,7 @@
|
|||
* Chris Keilbart <ckeilbar@sfu.ca>
|
||||
*/
|
||||
|
||||
module dcache
|
||||
module dcache_noinv
|
||||
|
||||
import cva5_config::*;
|
||||
import riscv_types::*;
|
|
@ -674,18 +674,33 @@ module load_store_unit
|
|||
assign uncacheable_load = CONFIG.DCACHE.USE_NON_CACHEABLE & uncacheable_utils.address_range_check(shared_inputs.addr);
|
||||
assign uncacheable_store = CONFIG.DCACHE.USE_NON_CACHEABLE & uncacheable_utils.address_range_check(shared_inputs.addr);
|
||||
|
||||
dcache #(.CONFIG(CONFIG)) data_cache (
|
||||
.mem(mem),
|
||||
.write_outstanding(unit_write_outstanding[DCACHE_ID]),
|
||||
.amo(shared_inputs.amo),
|
||||
.amo_type(shared_inputs.amo_type),
|
||||
.amo_unit(amo_if[DCACHE_ID]),
|
||||
.uncacheable(uncacheable_load | uncacheable_store),
|
||||
.cbo(shared_inputs.cache_op),
|
||||
.ls(sub_unit[DCACHE_ID]),
|
||||
.load_peek(lsq.load_valid),
|
||||
.load_addr_peek(lsq.load_data_out.addr),
|
||||
.*);
|
||||
if (CONFIG.DCACHE.USE_EXTERNAL_INVALIDATIONS) begin : gen_full_dcache
|
||||
dcache_inv #(.CONFIG(CONFIG)) data_cache (
|
||||
.mem(mem),
|
||||
.write_outstanding(unit_write_outstanding[DCACHE_ID]),
|
||||
.amo(shared_inputs.amo),
|
||||
.amo_type(shared_inputs.amo_type),
|
||||
.amo_unit(amo_if[DCACHE_ID]),
|
||||
.uncacheable(uncacheable_load | uncacheable_store),
|
||||
.cbo(shared_inputs.cache_op),
|
||||
.ls(sub_unit[DCACHE_ID]),
|
||||
.load_peek(lsq.load_valid),
|
||||
.load_addr_peek(lsq.load_data_out.addr),
|
||||
.*);
|
||||
end else begin : gen_small_dcache
|
||||
dcache_noinv #(.CONFIG(CONFIG)) data_cache (
|
||||
.mem(mem),
|
||||
.write_outstanding(unit_write_outstanding[DCACHE_ID]),
|
||||
.amo(shared_inputs.amo),
|
||||
.amo_type(shared_inputs.amo_type),
|
||||
.amo_unit(amo_if[DCACHE_ID]),
|
||||
.uncacheable(uncacheable_load | uncacheable_store),
|
||||
.cbo(shared_inputs.cache_op),
|
||||
.ls(sub_unit[DCACHE_ID]),
|
||||
.load_peek(lsq.load_valid),
|
||||
.load_addr_peek(lsq.load_data_out.addr),
|
||||
.*);
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ core/common_components/ram/lutram_1w_1r.sv
|
|||
core/common_components/ram/lutram_1w_mr.sv
|
||||
core/common_components/ram/sdp_ram.sv
|
||||
core/common_components/ram/sdp_ram_padded.sv
|
||||
core/common_components/ram/tdp_ram.sv
|
||||
core/common_components/ram/dual_port_bram.sv
|
||||
core/common_components/set_clr_reg_with_rst.sv
|
||||
core/common_components/one_hot_to_integer.sv
|
||||
|
@ -50,7 +51,8 @@ core/memory_sub_units/wishbone_master.sv
|
|||
|
||||
core/execution_units/load_store_unit/amo_alu.sv
|
||||
core/execution_units/load_store_unit/amo_unit.sv
|
||||
core/execution_units/load_store_unit/dcache.sv
|
||||
core/execution_units/load_store_unit/dcache_inv.sv
|
||||
core/execution_units/load_store_unit/dcache_noinv.sv
|
||||
core/execution_units/load_store_unit/addr_hash.sv
|
||||
core/execution_units/load_store_unit/store_queue.sv
|
||||
core/execution_units/load_store_unit/load_store_queue.sv
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue