Add dcache cbo instruction support

Signed-off-by: Eric Matthews <ematthew@sfu.ca>
This commit is contained in:
Eric Matthews 2023-05-02 14:58:26 -04:00
parent f11b9582e8
commit 8769842249
11 changed files with 70 additions and 47 deletions

View file

@ -68,6 +68,7 @@ module dcache
logic [31:0] addr;
logic [3:0] be;
logic [31:0] data;
logic cache_op;
logic uncacheable;
} store_stage2_t;
store_stage2_t stage2_store;
@ -167,8 +168,8 @@ module dcache
end
always_comb begin
store_state_next[STORE_IDLE] = (store_state[STORE_IDLE] & ~store_request) | (store_l1_arb_ack & ~store_request);
store_state_next[STORE_L1_REQUEST] = (store_state[STORE_L1_REQUEST] & ~store_l1_arb_ack) | store_request;
store_state_next[STORE_IDLE] = (store_state[STORE_IDLE] & (~store_request | (store_request & ls_store.cache_op))) | (store_l1_arb_ack & ~store_request);
store_state_next[STORE_L1_REQUEST] = (store_state[STORE_L1_REQUEST] & ~store_l1_arb_ack) | (store_request & ~ls_store.cache_op);
end
assign store_ready = (store_state[STORE_IDLE] | store_l1_arb_ack) & (load_state[LOAD_IDLE] | load_hit);
@ -180,6 +181,7 @@ module dcache
stage2_store.uncacheable <= uncacheable_store;
stage2_store.be <= ls_store.be;
stage2_store.data <= ls_store.data_in;
stage2_store.cache_op <= ls_store.cache_op;
end
end
@ -189,7 +191,7 @@ module dcache
fifo_interface #(.DATA_TYPE(logic)) request_order();
assign request_order.data_in = load_request;
assign request_order.push = load_request | store_request;
assign request_order.push = load_request | (store_request & ~ls_store.cache_op);
assign request_order.potential_push = request_order.push;
assign request_order.pop = l1_request.ack | load_hit;
@ -241,6 +243,7 @@ module dcache
.store_addr (ls_store.addr),
.store_addr_r (stage2_store.addr),
.store_req (store_request),
.cache_op_req (ls_store.cache_op),
.load_tag_hit (load_hit),
.load_tag_hit_way (load_tag_hit_way),
.store_tag_hit (store_hit),

View file

@ -48,6 +48,7 @@ module dcache_tag_banks
input logic[31:0] store_addr,
input logic[31:0] store_addr_r,
input logic store_req,
input logic cache_op_req,
output logic load_tag_hit,
output logic store_tag_hit,
@ -76,7 +77,7 @@ module dcache_tag_banks
////////////////////////////////////////////////////
//Implementation
always_ff @ (posedge clk) load_req_r <= load_req;
always_ff @ (posedge clk) store_req_r <= store_req;
always_ff @ (posedge clk) store_req_r <= store_req & ~cache_op_req;
assign external_inv = extern_inv & CONFIG.DCACHE.USE_EXTERNAL_INVALIDATIONS;
@ -93,7 +94,7 @@ module dcache_tag_banks
dual_port_bram #(.WIDTH($bits(dtag_entry_t)), .LINES(CONFIG.DCACHE.LINES)) dtag_bank (
.clk (clk),
.en_a (store_req | (miss_req & miss_way[i]) | external_inv),
.wen_a ((miss_req & miss_way[i]) | external_inv),
.wen_a ((miss_req & miss_way[i]) | external_inv | (store_req & cache_op_req)),
.addr_a (porta_addr),
.data_in_a (new_tagline),
.data_out_a (tag_line_a[i]),

View file

@ -101,7 +101,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
};
////////////////////////////////////////////////////
//Store Queue
assign sq.push = lsq.push & lsq.data_in.store;
assign sq.push = lsq.push & (lsq.data_in.store | lsq.data_in.cache_op);
assign sq.pop = lsq.store_pop;
assign sq.data_in = lsq.data_in;
@ -132,6 +132,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
addr : lq.data_out.addr,
load : 1,
store : 0,
cache_op : 0,
be : 'x,
fn3 : lq.data_out.fn3,
data_in : 'x,
@ -142,6 +143,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
addr : sq.data_out.addr,
load : 0,
store : 1,
cache_op : sq.data_out.cache_op,
be : sq.data_out.be,
fn3 : 'x,
data_in : sq.data_out.data,

View file

@ -153,42 +153,44 @@ module load_store_unit
//Decode
assign instruction = decode_stage.instruction;
assign unit_needed = decode_stage.instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW, FENCE};
assign unit_needed = decode_stage.instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW, FENCE} | (CONFIG.INCLUDE_CBO & decode_stage.instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH});
always_comb begin
uses_rs = '0;
uses_rs[RS1] = decode_stage.instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW};
uses_rs[RS1] = decode_stage.instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW} | (CONFIG.INCLUDE_CBO & decode_stage.instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH});
uses_rs[RS2] = CONFIG.INCLUDE_FORWARDING_TO_STORES ? 0 : decode_stage.instruction inside {SB, SH, SW};
uses_rd = decode_stage.instruction inside {LB, LH, LW, LBU, LHU};
end
amo_details_t amo;
amo_details_t amo_r;
logic is_load;
logic is_store;
logic is_load_r;
logic is_store_r;
logic is_fence_r;
logic [2:0] fn3_r;
logic [11:0] ls_offset_r;
////////////////////////////////////////////////////
//LS specific decode support
typedef struct packed{
logic is_load;
logic is_store;
logic is_fence;
logic is_cbo;
logic [11:0] offset;
} ls_attr_t;
ls_attr_t decode_attr;
ls_attr_t issue_attr;
assign amo.is_amo = CONFIG.INCLUDE_AMO & (instruction.upper_opcode == AMO_T);
assign amo.op = CONFIG.INCLUDE_AMO ? decode_stage.instruction[31:27] : '0;
assign amo.is_lr = CONFIG.INCLUDE_AMO & (amo.op == AMO_LR_FN5);
assign amo.is_sc = CONFIG.INCLUDE_AMO & (amo.op == AMO_SC_FN5);
logic [11:0] load_offset;
logic [11:0] store_offset;
assign load_offset = decode_stage.instruction[31:20];
assign store_offset = {decode_stage.instruction[31:25], decode_stage.instruction[11:7]};
assign is_load = (instruction.upper_opcode inside {LOAD_T, AMO_T}) & !(amo.is_amo & amo.is_sc); //LR and AMO_ops perform a read operation as well
assign is_store = (instruction.upper_opcode == STORE_T) | (amo.is_amo & amo.is_sc);//Used for LS unit and for ID tracking
assign decode_is_store = is_store;
assign decode_attr = '{
is_load : decode_stage.instruction inside {LB, LH, LW, LBU, LHU},
is_store : decode_stage.instruction inside {SB, SH, SW},
is_fence : decode_stage.instruction inside {FENCE},
is_cbo : CONFIG.INCLUDE_CBO & decode_stage.instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH},
offset : decode_stage.instruction[5] ? store_offset : ((CONFIG.INCLUDE_CBO & decode_stage.instruction[2]) ? '0 :load_offset)
};
assign decode_is_store = decode_attr.is_store | decode_attr.is_cbo;
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
ls_offset_r <= decode_stage.instruction[5] ? {decode_stage.instruction[31:25], decode_stage.instruction[11:7]} : decode_stage.instruction[31:20];
is_load_r <= is_load;
is_store_r <= is_store;
is_fence_r <= (instruction.upper_opcode == FENCE_T);
amo_r <= amo;
fn3_r <= amo.is_amo ? LS_W_fn3 : instruction.fn3;
end
if (issue_stage_ready)
issue_attr <= decode_attr;
end
typedef struct packed{
@ -215,14 +217,14 @@ module load_store_unit
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_ls_exceptions
logic new_exception;
always_comb begin
case(fn3_r)
case(issue_stage.fn3)
LS_H_fn3, L_HU_fn3 : unaligned_addr = virtual_address[0];
LS_W_fn3 : unaligned_addr = |virtual_address[1:0];
default : unaligned_addr = 0;
endcase
end
assign new_exception = unaligned_addr & issue.new_request & ~is_fence_r;
assign new_exception = unaligned_addr & issue.new_request & ~issue_attr.is_fence;
always_ff @(posedge clk) begin
if (rst)
exception.valid <= 0;
@ -232,7 +234,7 @@ module load_store_unit
always_ff @(posedge clk) begin
if (new_exception & ~exception.valid) begin
exception.code <= is_store_r ? STORE_AMO_ADDR_MISSALIGNED : LOAD_ADDR_MISSALIGNED;
exception.code <= issue_attr.is_store ? STORE_AMO_ADDR_MISSALIGNED : LOAD_ADDR_MISSALIGNED;
exception.tval <= virtual_address;
exception.id <= issue.id;
end
@ -256,12 +258,12 @@ module load_store_unit
////////////////////////////////////////////////////
//TLB interface
assign virtual_address = rf[RS1] + 32'(signed'(ls_offset_r));
assign virtual_address = rf[RS1] + 32'(signed'(issue_attr.offset));
assign tlb.virtual_address = virtual_address;
assign tlb.new_request = tlb_on & issue.new_request;
assign tlb.execute = 0;
assign tlb.rnw = is_load_r & ~is_store_r;
assign tlb.rnw = issue_attr.is_load & ~issue_attr.is_store;
////////////////////////////////////////////////////
//Byte enable generation
@ -271,7 +273,7 @@ module load_store_unit
// SB: specific byte
always_comb begin
be = 0;
case(fn3_r[1:0])
case(issue_stage.fn3[1:0])
LS_B_fn3[1:0] : be[virtual_address[1:0]] = 1;
LS_H_fn3[1:0] : begin
be[virtual_address[1:0]] = 1;
@ -285,17 +287,18 @@ module load_store_unit
//Load Store Queue
assign lsq.data_in = '{
addr : tlb_on ? tlb.physical_address : virtual_address,
fn3 : fn3_r,
fn3 : issue_stage.fn3,
be : be,
data : rf[RS2],
load : is_load_r,
store : is_store_r,
load : issue_attr.is_load,
store : issue_attr.is_store,
cache_op : issue_attr.is_cbo,
id : issue.id,
id_needed : rd_attributes.id
};
assign lsq.potential_push = issue.possible_issue;
assign lsq.push = issue.new_request & ~unaligned_addr & (~tlb_on | tlb.done) & ~is_fence_r;
assign lsq.push = issue.new_request & ~unaligned_addr & (~tlb_on | tlb.done) & ~issue_attr.is_fence;
load_store_queue # (.CONFIG(CONFIG)) lsq_block (
.clk (clk),
@ -341,7 +344,7 @@ module load_store_unit
if (rst)
fence_hold <= 0;
else
fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & is_fence_r);
fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & issue_attr.is_fence);
end
////////////////////////////////////////////////////
@ -463,7 +466,7 @@ module load_store_unit
.sc_complete (sc_complete),
.sc_success (sc_success),
.clear_reservation (clear_reservation),
.amo (amo_r),
.amo (),
.uncacheable_load (uncacheable_load),
.uncacheable_store (uncacheable_store),
.is_load (sel_load),

View file

@ -125,6 +125,7 @@ module store_queue
.new_ram_data('{
addr : sq.data_in.addr,
be : sq.data_in.be,
cache_op : sq.data_in.cache_op,
data : '0
}),
.ram_data_out(output_entry)
@ -253,6 +254,7 @@ module store_queue
assign sq.data_out = '{
addr : output_entry_r.addr,
be : output_entry_r.be,
cache_op : output_entry_r.cache_op,
data : sq_data_out
};

View file

@ -165,6 +165,7 @@ package cva5_config;
bit INCLUDE_IFENCE; //local mem operations only
bit INCLUDE_AMO;
bit INCLUDE_CBO; //Data cache invalidation operations
//Units
units_t INCLUDE_UNIT;
@ -246,7 +247,8 @@ package cva5_config;
INCLUDE_IFENCE : 1,
INCLUDE_AMO : 0,
INCLUDE_CBO : 0,
//CSR constants
CSRS : '{
MACHINE_IMPLEMENTATION_ID : 0,

View file

@ -107,6 +107,7 @@ package cva5_types;
logic [31:0] addr;
logic load;
logic store;
logic cache_op;
logic [3:0] be;
logic [2:0] fn3;
logic [31:0] data;
@ -117,6 +118,7 @@ package cva5_types;
typedef struct packed {
logic [31:0] addr;
logic [3:0] be;
logic cache_op;
logic [31:0] data;
} sq_entry_t;
@ -141,6 +143,7 @@ package cva5_types;
logic [31:0] addr;
logic load;
logic store;
logic cache_op;
logic [3:0] be;
logic [2:0] fn3;
logic [31:0] data_in;

View file

@ -101,6 +101,11 @@ package opcodes;
localparam [31:0] SFENCE_VMA = 32'b0001001??????????000000001110011;
localparam [31:0] WFI = 32'b00010000010100000000000001110011;
//Cache
localparam [31:0] CBO_INVAL = 32'b000000000000?????010000000001111;
localparam [31:0] CBO_CLEAN = 32'b000000000001?????010000000001111;
localparam [31:0] CBO_FLUSH = 32'b000000000010?????010000000001111;
localparam [31:0] CUSTOM = 32'b?????????????????????????1111011;

View file

@ -49,6 +49,8 @@ package nexys_config;
},
INCLUDE_IFENCE : 0,
INCLUDE_AMO : 0,
INCLUDE_CBO : 0,
//CSR constants
CSRS : '{
MACHINE_IMPLEMENTATION_ID : 0,

View file

@ -309,7 +309,7 @@ module cva5_sim
//Misc Issue stats
stats[ISSUE_OPERAND_STALL_FOR_BRANCH_STAT] = stats[ISSUE_OPERANDS_NOT_READY_STAT] & `ISSUE_P.unit_needed_issue_stage[BR_ID];
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[LS_ID] & `LS_P.is_store_r & `LS_P.rs2_inuse;
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[LS_ID] & `LS_P.issue_attr.is_store & `LS_P.rs2_inuse;
stats[ISSUE_DIVIDER_RESULT_REUSE_STAT] = `ISSUE_P.issue_to[DIV_ID] & `DIV_P.div_op_reuse;
//Issue Stall Source

View file

@ -545,7 +545,7 @@ module cva5_sim
//Misc Issue stats
stats[ISSUE_OPERAND_STALL_FOR_BRANCH_STAT] = stats[ISSUE_OPERANDS_NOT_READY_STAT] & `ISSUE_P.unit_needed_issue_stage[BR_ID];
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[LS_ID] & `LS_P.is_store_r & `LS_P.rs2_inuse;
stats[ISSUE_STORE_WITH_FORWARDED_DATA_STAT] = `ISSUE_P.issue_to[LS_ID] & `LS_P.issue_attr.is_store & `LS_P.rs2_inuse;
stats[ISSUE_DIVIDER_RESULT_REUSE_STAT] = `ISSUE_P.issue_to[DIV_ID] & `DIV_P.div_op_reuse;
//Issue Stall Source