Added dcache uncachable region support

Signed-off-by: Eric Matthews <ematthew@sfu.ca>
This commit is contained in:
Eric Matthews 2022-05-11 13:20:24 -04:00
parent 00b0c9e3dd
commit 331842f1b3
3 changed files with 91 additions and 55 deletions

View file

@ -50,6 +50,13 @@ package cva5_config;
csr_non_standard_config_t NON_STANDARD_OPTIONS;
} csr_config_t;
//Memory range [L, H]
//Address range is inclusive and must be aligned to its size
typedef struct packed {
bit [31:0] L;
bit [31:0] H;
} memory_config_t;
////////////////////////////////////////////////////
//Cache Options
//Size in bytes: (LINES * WAYS * LINE_W * 4)
@ -59,6 +66,8 @@ package cva5_config;
int unsigned LINE_W;// In words
int unsigned WAYS;
bit USE_EXTERNAL_INVALIDATIONS;
bit USE_NON_CACHEABLE;
memory_config_t NON_CACHEABLE;
} cache_config_t;
typedef struct packed {
@ -67,13 +76,6 @@ package cva5_config;
int unsigned TAG_W;
} derived_cache_config_t;
//Memory range [L, H]
//Address range is inclusive and must be aligned to its size
typedef struct packed {
bit [31:0] L;
bit [31:0] H;
} memory_config_t;
////////////////////////////////////////////////////
//Branch Predictor Options
typedef struct packed {
@ -185,7 +187,12 @@ package cva5_config;
LINES : 512,
LINE_W : 4,
WAYS : 2,
USE_EXTERNAL_INVALIDATIONS : 0
USE_EXTERNAL_INVALIDATIONS : 0,
USE_NON_CACHEABLE : 0,
NON_CACHEABLE : '{
L: 32'h00000000,
H: 32'h00000000
}
},
ITLB : '{
WAYS : 2,
@ -200,7 +207,12 @@ package cva5_config;
LINES : 512,
LINE_W : 4,
WAYS : 2,
USE_EXTERNAL_INVALIDATIONS : 0
USE_EXTERNAL_INVALIDATIONS : 0,
USE_NON_CACHEABLE : 0,
NON_CACHEABLE : '{
L: 32'h00000000,
H: 32'h00000000
}
},
DTLB : '{
WAYS : 2,

View file

@ -40,12 +40,23 @@ module dcache
input logic sc_success,
input logic clear_reservation,
input amo_details_t amo,
input logic uncacheable,
memory_sub_unit_interface.responder ls
);
localparam DCACHE_SIZE_IN_WORDS = CONFIG.DCACHE.LINES*CONFIG.DCACHE.LINE_W*CONFIG.DCACHE.WAYS;
localparam derived_cache_config_t SCONFIG = get_derived_cache_params(CONFIG, CONFIG.DCACHE, CONFIG.DCACHE_ADDR);
typedef struct packed{
logic [29:0] addr;
logic [3:0] be;
logic load;
logic store;
logic [31:0] data;
amo_details_t amo;
logic uncacheable;
} stage2_t;
logic [$clog2(DCACHE_SIZE_IN_WORDS)-1:0] data_bank_addr_a;
logic [$clog2(DCACHE_SIZE_IN_WORDS)-1:0] data_bank_addr_b;
@ -68,13 +79,7 @@ module dcache
logic line_complete;
logic reservation;
logic [31:0] stage2_addr;
logic stage2_load;
logic stage2_store;
logic [3:0] stage2_be;
logic [31:0] stage2_data;
amo_details_t stage2_amo;
stage2_t stage2;
logic [31:0] dbank_data_out;
logic [31:0] hit_data;
@ -95,6 +100,7 @@ module dcache
logic hit_allowed;
logic read_hit_allowed;
logic read_hit_data_valid;
logic read_hit;
logic address_range_valid;
@ -103,6 +109,9 @@ module dcache
logic store_complete;
amo_alu_inputs_t amo_alu_inputs;
////////////////////////////////////////////////////
//Implementation
@ -110,12 +119,13 @@ module dcache
//2nd Cycle Control Signals
always_ff @ (posedge clk) begin
if (ls.new_request) begin
stage2_addr <= ls.addr;
stage2_be <= ls.be;
stage2_load <= ls.re;
stage2_store <= ls.we;
stage2_data <= ls.data_in;
stage2_amo <= amo;
stage2.addr <= ls.addr[31:2];
stage2.be <= ls.be;
stage2.load <= ls.re;
stage2.store <= ls.we;
stage2.data <= ls.data_in;
stage2.amo <= amo;
stage2.uncacheable <= uncacheable;
end
end
@ -124,42 +134,44 @@ module dcache
//LR and AMO ops are forced misses (if there is a tag hit they will reuse the same way)
//Signal is valid for a single cycle, RAM enables are used to hold outputs in case of pipeline stalls
always_ff @ (posedge clk) begin
read_hit_allowed <= ls.new_request & ls.re & dcache_on & ~(amo.is_lr | amo.is_amo);
read_hit_allowed <= ls.new_request & ls.re & dcache_on & ~(amo.is_lr | amo.is_amo) & ~uncacheable;
read_hit_data_valid <= read_hit_allowed;
second_cycle <= ls.new_request;
tag_update <= second_cycle & dcache_on & stage2_load & ~tag_hit;//Cache enabled, read miss
tag_update <= second_cycle & dcache_on & stage2.load & ~tag_hit & ~stage2.uncacheable;//Cache enabled, read miss
end
assign read_hit = tag_hit & read_hit_allowed;
//LR reservation, cleared on exceptions
always_ff @ (posedge clk) begin
if (rst)
reservation <= 0;
else if (second_cycle)
reservation <= stage2_amo.is_lr;
reservation <= stage2.amo.is_lr;
else if (sc_complete | clear_reservation)
reservation <= 0;
end
////////////////////////////////////////////////////
//L1 Arbiter Interface
assign l1_request.addr = {stage2_addr[31:2], 2'b0} ;//Memory interface aligns request to burst size (done there to support AMO line-read word-write)
assign l1_request.data = stage2_data;
assign l1_request.rnw = ~stage2_store;
assign l1_request.be = stage2_be;
assign l1_request.size = stage2_load ? 5'(CONFIG.DCACHE.LINE_W-1) : 0;//LR and AMO ops are included in load
assign l1_request.is_amo = (stage2_amo.is_amo | stage2_amo.is_lr | stage2_amo.is_sc);
assign l1_request.amo = stage2_amo.op;
assign l1_request.addr = {stage2.addr, 2'b0} ;//Memory interface aligns request to burst size (done there to support AMO line-read word-write)
assign l1_request.data = stage2.data;
assign l1_request.rnw = ~stage2.store;
assign l1_request.be = stage2.be;
assign l1_request.size = (stage2.load & ~stage2.uncacheable) ? 5'(CONFIG.DCACHE.LINE_W-1) : 0;//LR and AMO ops are included in load
assign l1_request.is_amo = (stage2.amo.is_amo | stage2.amo.is_lr | stage2.amo.is_sc);
assign l1_request.amo = stage2.amo.op;
always_ff @ (posedge clk) begin
if (rst)
if (rst | line_complete)
word_count <= 0;
else if (l1_response.data_valid)
word_count <= word_count + 1;
end
assign is_target_word = (stage2_addr[SCONFIG.SUB_LINE_ADDR_W+1:2] == word_count);
assign is_target_word = (stage2.addr[SCONFIG.SUB_LINE_ADDR_W-1:0] == word_count) | stage2.uncacheable;
assign new_arb_request = second_cycle & (~(tag_hit & read_hit_allowed) | ~dcache_on);
assign new_arb_request = second_cycle & (~read_hit);
always_ff @ (posedge clk) begin
if (rst)
arb_request_r <= 0;
@ -194,7 +206,7 @@ module dcache
//If atomic load (LR or AMO op) and there's a tag hit reuse same line
logic stage2_amo_with_load;
assign stage2_amo_with_load = stage2_amo.is_amo | stage2_amo.is_lr;
assign stage2_amo_with_load = stage2.amo.is_amo | stage2.amo.is_lr;
always_ff @ (posedge clk) begin
if (second_cycle) begin
tag_update_way<= (stage2_amo_with_load & tag_hit) ? tag_hit_way : replacement_way;
@ -209,8 +221,8 @@ module dcache
.clk (clk),
.rst (rst),
.stage1_addr (ls.addr),
.stage2_addr (stage2_addr),
.inv_addr ({l1_response.inv_addr, 2'b00}),
.stage2_addr ({stage2.addr, 2'b0}),
.inv_addr ({l1_response.inv_addr, 2'b0}),
.update_way (tag_update_way),
.update (tag_update),
.stage1_adv (ls.new_request),
@ -224,12 +236,12 @@ module dcache
////////////////////////////////////////////////////
//AMO logic
always_ff @ (posedge clk) begin
amo_rs2 <= stage2_data;
amo_rs2 <= stage2.data;
end
assign amo_alu_inputs.rs1_load = l1_response.data;
assign amo_alu_inputs.rs2 = amo_rs2;
assign amo_alu_inputs.op = stage2_amo.op;
assign amo_alu_inputs.op = stage2.amo.op;
generate if (CONFIG.INCLUDE_AMO)
amo_alu amo_unit (
@ -239,35 +251,40 @@ module dcache
endgenerate
always_comb begin
if (stage2_amo.is_amo & is_target_word)
if (stage2.amo.is_amo & is_target_word)
new_line_data = amo_result;
else if (stage2_amo.is_sc)
new_line_data = stage2_data;
else if (stage2.amo.is_sc)
new_line_data = stage2.data;
else
new_line_data = l1_response.data;
end
assign sc_write_index = stage2_addr[SCONFIG.SUB_LINE_ADDR_W+1:2];
assign sc_write_index = stage2.addr[SCONFIG.SUB_LINE_ADDR_W-1:0];
////////////////////////////////////////////////////
//Data Bank(s)
//Tag bank selection done with upper address bits
//On miss, word index in line provided by: update_word_index
assign write_hit_be = stage2_be & {4{tag_hit}};
assign update_word_index = stage2_amo.is_sc ? sc_write_index : word_count;
assign write_hit_be = stage2.be & {4{tag_hit}};
assign update_word_index = stage2.amo.is_sc ? sc_write_index : word_count;
assign data_bank_addr_a = {tag_hit_way_int, stage2_addr[SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W+2-1:2]};
assign data_bank_addr_b = {tag_update_way_int, stage2_addr[SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W+2-1:SCONFIG.SUB_LINE_ADDR_W+2], update_word_index};
generate if (CONFIG.DCACHE.WAYS == 1) begin : bank_sel_gen
assign data_bank_addr_a = stage2.addr[SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W-1:0];
assign data_bank_addr_b = {stage2.addr[SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W-1:SCONFIG.SUB_LINE_ADDR_W], update_word_index};
end else begin
assign data_bank_addr_a = {tag_hit_way_int, stage2.addr[SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W-1:0]};
assign data_bank_addr_b = {tag_update_way_int, stage2.addr[SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W-1:SCONFIG.SUB_LINE_ADDR_W], update_word_index};
end endgenerate
ddata_bank #(.LINES(DCACHE_SIZE_IN_WORDS)) data_bank (
.clk(clk),
.addr_a(data_bank_addr_a),
.addr_b(data_bank_addr_b),
.en_a(second_cycle),
.en_b(l1_response.data_valid | (sc_complete & sc_success)),
.en_b((l1_response.data_valid & ~stage2.uncacheable) | (sc_complete & sc_success)),
.be_a(write_hit_be),
.data_in_a(stage2_data),
.data_in_a(stage2.data),
.data_in_b(new_line_data),
.data_out_a(dbank_data_out)
);
@ -285,8 +302,8 @@ module dcache
////////////////////////////////////////////////////
//Pipeline Advancement
assign line_complete = (l1_response.data_valid && (word_count == $clog2(CONFIG.DCACHE.LINE_W)'(CONFIG.DCACHE.LINE_W-1))); //covers load, LR, AMO
assign store_complete = l1_request.ack & stage2_store & ~stage2_amo.is_sc;
assign line_complete = l1_response.data_valid & ((word_count == $clog2(CONFIG.DCACHE.LINE_W)'(CONFIG.DCACHE.LINE_W-1)) | stage2.uncacheable); //covers load, LR, AMO
assign store_complete = l1_request.ack & stage2.store & ~stage2.amo.is_sc;
//read miss complete includes store conditional complete
always_ff @ (posedge clk) begin
@ -300,10 +317,10 @@ module dcache
if (rst)
ls.data_valid <= 0;
else
ls.data_valid <= ((l1_response.data_valid & is_target_word) | (read_hit_allowed & tag_hit) | sc_complete);
ls.data_valid <= (l1_response.data_valid & is_target_word) | read_hit | sc_complete;
end
assign ls.ready = (read_hit_allowed & tag_hit) | store_complete | read_miss_complete | idle;
assign ls.ready = read_hit | store_complete | read_miss_complete | idle;
always_ff @ (posedge clk) begin
if (rst)

View file

@ -78,7 +78,7 @@ module load_store_unit
localparam DCACHE_ID = int'(CONFIG.INCLUDE_DLOCAL_MEM) + int'(CONFIG.INCLUDE_PERIPHERAL_BUS);
//Should be equal to pipeline depth of longest load/store subunit
localparam ATTRIBUTES_DEPTH = CONFIG.INCLUDE_DCACHE ? 2 : 1;
localparam ATTRIBUTES_DEPTH = 2;//CONFIG.INCLUDE_DCACHE ? 2 : 1;
//Subunit signals
addr_utils_interface #(CONFIG.DLOCAL_MEM_ADDR.L, CONFIG.DLOCAL_MEM_ADDR.H) dlocal_mem_addr_utils ();
@ -86,6 +86,9 @@ module load_store_unit
addr_utils_interface #(CONFIG.DCACHE_ADDR.L, CONFIG.DCACHE_ADDR.H) dcache_addr_utils ();
memory_sub_unit_interface sub_unit[NUM_SUB_UNITS-1:0]();
addr_utils_interface #(CONFIG.DCACHE.NON_CACHEABLE.L, CONFIG.DCACHE.NON_CACHEABLE.H) uncacheable_utils ();
data_access_shared_inputs_t shared_inputs;
logic [31:0] unit_data_array [NUM_SUB_UNITS-1:0];
logic [NUM_SUB_UNITS-1:0] unit_ready;
@ -353,7 +356,10 @@ endgenerate
endgenerate
generate if (CONFIG.INCLUDE_DCACHE) begin : gen_ls_dcache
logic uncacheable;
assign sub_unit_address_match[DCACHE_ID] = dcache_addr_utils.address_range_check(shared_inputs.addr);
assign uncacheable = uncacheable_utils.address_range_check(shared_inputs.addr);
dcache # (.CONFIG(CONFIG))
data_cache (
.clk (clk),
@ -365,6 +371,7 @@ endgenerate
.sc_success (sc_success),
.clear_reservation (clear_reservation),
.amo (ls_inputs.amo),
.uncacheable (uncacheable),
.ls (sub_unit[DCACHE_ID])
);
end