mirror of
https://github.com/openhwgroup/cva5.git
synced 2025-04-20 03:57:18 -04:00
Store queue data forwarding restructure
Signed-off-by: Eric Matthews <ematthew@sfu.ca>
This commit is contained in:
parent
4bada38942
commit
f15fe83a9c
8 changed files with 172 additions and 116 deletions
13
core/cva5.sv
13
core/cva5.sv
|
@ -157,12 +157,14 @@ module cva5
|
|||
logic decode_uses_rd;
|
||||
rs_addr_t decode_rd_addr;
|
||||
exception_sources_t decode_exception_unit;
|
||||
logic decode_is_store;
|
||||
phys_addr_t decode_phys_rd_addr;
|
||||
phys_addr_t decode_phys_rs_addr [REGFILE_READ_PORTS];
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] decode_rs_wb_group [REGFILE_READ_PORTS];
|
||||
|
||||
//ID freeing
|
||||
retire_packet_t retire;
|
||||
retire_packet_t store_retire;
|
||||
id_t retire_ids [RETIRE_PORTS];
|
||||
id_t retire_ids_next [RETIRE_PORTS];
|
||||
logic retire_port_valid [RETIRE_PORTS];
|
||||
|
@ -195,6 +197,7 @@ module cva5
|
|||
logic issue_stage_ready;
|
||||
phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS];
|
||||
rs_addr_t issue_rs_addr [REGFILE_READ_PORTS];
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rd_wb_group;
|
||||
logic illegal_instruction;
|
||||
logic instruction_issued;
|
||||
logic instruction_issued_with_rd;
|
||||
|
@ -241,11 +244,13 @@ module cva5
|
|||
.decode_rd_addr (decode_rd_addr),
|
||||
.decode_phys_rd_addr (decode_phys_rd_addr),
|
||||
.decode_exception_unit (decode_exception_unit),
|
||||
.decode_is_store (decode_is_store),
|
||||
.issue (issue),
|
||||
.instruction_issued (instruction_issued),
|
||||
.instruction_issued_with_rd (instruction_issued_with_rd),
|
||||
.wb_packet (wb_packet),
|
||||
.retire (retire),
|
||||
.store_retire (store_retire),
|
||||
.retire_ids (retire_ids),
|
||||
.retire_ids_next (retire_ids_next),
|
||||
.retire_port_valid(retire_port_valid),
|
||||
|
@ -371,9 +376,10 @@ module cva5
|
|||
.instruction_issued (instruction_issued),
|
||||
.instruction_issued_with_rd (instruction_issued_with_rd),
|
||||
.issue (issue),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.issue_stage_ready (issue_stage_ready),
|
||||
.issue_phys_rs_addr (issue_phys_rs_addr),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.issue_rd_wb_group (issue_rd_wb_group),
|
||||
.rf (rf_issue),
|
||||
.constant_alu (constant_alu),
|
||||
.unit_issue (unit_issue),
|
||||
|
@ -447,8 +453,10 @@ module cva5
|
|||
.unit_needed (unit_needed[UNIT_IDS.LS]),
|
||||
.uses_rs (unit_uses_rs[UNIT_IDS.LS]),
|
||||
.uses_rd (unit_uses_rd[UNIT_IDS.LS]),
|
||||
.decode_is_store (decode_is_store),
|
||||
.instruction_issued_with_rd (instruction_issued_with_rd),
|
||||
.issue_rs_addr (issue_rs_addr),
|
||||
.issue_rd_wb_group (issue_rd_wb_group),
|
||||
.rs2_inuse (rf_issue.inuse[RS2]),
|
||||
.rf (rf_issue.data),
|
||||
.issue (unit_issue[UNIT_IDS.LS]),
|
||||
|
@ -465,8 +473,7 @@ module cva5
|
|||
.dwishbone (dwishbone),
|
||||
.data_bram (data_bram),
|
||||
.wb_packet (wb_packet),
|
||||
.retire_ids (retire_ids),
|
||||
.retire_port_valid(retire_port_valid),
|
||||
.store_retire (store_retire),
|
||||
.exception (exception[LS_EXCEPTION]),
|
||||
.load_store_status(load_store_status),
|
||||
.wb (unit_wb2[LS_UNIT_WB2_ID])
|
||||
|
|
|
@ -62,6 +62,7 @@ module decode_and_issue
|
|||
output issue_packet_t issue,
|
||||
output rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
|
||||
output phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS],
|
||||
output logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rd_wb_group,
|
||||
output logic issue_stage_ready,
|
||||
|
||||
//Register File
|
||||
|
@ -153,7 +154,6 @@ module decode_and_issue
|
|||
assign decode_phys_rd_addr = renamer.phys_rd_addr;
|
||||
assign decode_phys_rs_addr = renamer.phys_rs_addr;
|
||||
assign decode_rs_wb_group = renamer.rs_wb_group;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Issue
|
||||
always_ff @(posedge clk) begin
|
||||
|
@ -168,6 +168,7 @@ module decode_and_issue
|
|||
issue_rs_wb_group <= renamer.rs_wb_group;
|
||||
issue.rd_addr <= decode_instruction.rd_addr;
|
||||
issue.phys_rd_addr <= renamer.phys_rd_addr;
|
||||
issue_rd_wb_group <= renamer_wb_group;
|
||||
issue.is_multicycle <= ~unit_needed[UNIT_IDS.ALU];
|
||||
issue.id <= decode.id;
|
||||
issue.exception_unit <= decode_exception_unit;
|
||||
|
|
|
@ -53,6 +53,7 @@ module instruction_metadata_and_id_management
|
|||
input logic decode_uses_rd,
|
||||
input rs_addr_t decode_rd_addr,
|
||||
input exception_sources_t decode_exception_unit,
|
||||
input logic decode_is_store,
|
||||
//renamer
|
||||
input phys_addr_t decode_phys_rd_addr,
|
||||
|
||||
|
@ -66,6 +67,7 @@ module instruction_metadata_and_id_management
|
|||
|
||||
//Retirer
|
||||
output retire_packet_t retire,
|
||||
output retire_packet_t store_retire,
|
||||
output id_t retire_ids [RETIRE_PORTS],
|
||||
output id_t retire_ids_next [RETIRE_PORTS],
|
||||
output logic retire_port_valid [RETIRE_PORTS],
|
||||
|
@ -82,6 +84,7 @@ module instruction_metadata_and_id_management
|
|||
(* ramstyle = "MLAB, no_rw_check" *) logic [0:0] valid_fetch_addr_table [MAX_IDS];
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [0:0] uses_rd_table [MAX_IDS];
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [0:0] is_store_table [MAX_IDS];
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [$bits(fetch_metadata_t)-1:0] fetch_metadata_table [MAX_IDS];
|
||||
|
||||
|
@ -98,6 +101,8 @@ module instruction_metadata_and_id_management
|
|||
logic [LOG2_MAX_IDS:0] inflight_count;
|
||||
|
||||
retire_packet_t retire_next;
|
||||
retire_packet_t store_retire_next;
|
||||
|
||||
logic retire_port_valid_next [RETIRE_PORTS];
|
||||
|
||||
genvar i;
|
||||
|
@ -137,6 +142,15 @@ module instruction_metadata_and_id_management
|
|||
uses_rd_table[decode_id] <= decode_uses_rd & |decode_rd_addr;
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Is store table
|
||||
//Number of read ports = RETIRE_PORTS
|
||||
always_ff @ (posedge clk) begin
|
||||
if (decode_advance)
|
||||
is_store_table[decode_id] <= decode_is_store;
|
||||
end
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Exception unit table
|
||||
always_ff @ (posedge clk) begin
|
||||
|
@ -184,7 +198,6 @@ module instruction_metadata_and_id_management
|
|||
if (~gc.retire_hold)
|
||||
retire_ids[i] <= retire_ids_next[i];
|
||||
end
|
||||
|
||||
end endgenerate
|
||||
|
||||
//Represented as a negative value so that the MSB indicates that the decode stage is valid
|
||||
|
@ -262,12 +275,15 @@ module instruction_metadata_and_id_management
|
|||
logic contiguous_retire;
|
||||
logic id_is_post_issue [RETIRE_PORTS];
|
||||
logic id_ready_to_retire [RETIRE_PORTS];
|
||||
logic [LOG2_RETIRE_PORTS-1:0] phys_id_sel;
|
||||
logic [LOG2_RETIRE_PORTS-1:0] retire_with_rd_sel;
|
||||
logic [LOG2_RETIRE_PORTS-1:0] retire_with_store_sel;
|
||||
logic [RETIRE_PORTS-1:0] retire_id_uses_rd;
|
||||
logic [RETIRE_PORTS-1:0] retire_id_is_store;
|
||||
logic [RETIRE_PORTS-1:0] retire_id_waiting_for_writeback;
|
||||
|
||||
generate for (i = 0; i < RETIRE_PORTS; i++) begin : gen_retire_writeback
|
||||
assign retire_id_uses_rd[i] = uses_rd_table[retire_ids_next[i]];
|
||||
assign retire_id_is_store[i] = is_store_table[retire_ids_next[i]];
|
||||
assign retire_id_waiting_for_writeback[i] = id_waiting_for_writeback[i];
|
||||
end endgenerate
|
||||
|
||||
|
@ -277,29 +293,34 @@ module instruction_metadata_and_id_management
|
|||
//If an exception is pending, only retire a single intrustuction per cycle. As such, the pending
|
||||
//exception will have to become the oldest instruction retire_ids[0] before it can retire.
|
||||
logic retire_with_rd_found;
|
||||
logic retire_with_store_found;
|
||||
always_comb begin
|
||||
contiguous_retire = ~gc.retire_hold;
|
||||
retire_with_rd_found = 0;
|
||||
retire_with_store_found = 0;
|
||||
for (int i = 0; i < RETIRE_PORTS; i++) begin
|
||||
id_is_post_issue[i] = post_issue_count > ID_COUNTER_W'(i);
|
||||
|
||||
id_ready_to_retire[i] = (id_is_post_issue[i] & contiguous_retire & ~id_waiting_for_writeback[i]);
|
||||
retire_port_valid_next[i] = id_ready_to_retire[i] & ~(retire_id_uses_rd[i] & retire_with_rd_found);
|
||||
retire_port_valid_next[i] = id_ready_to_retire[i] & ~((retire_id_uses_rd[i] & retire_with_rd_found) | (retire_id_is_store[i] & retire_with_store_found));
|
||||
|
||||
retire_with_rd_found |= retire_port_valid_next[i] & retire_id_uses_rd[i];
|
||||
retire_with_store_found |= retire_port_valid_next[i] & retire_id_is_store[i];
|
||||
|
||||
contiguous_retire &= retire_port_valid_next[i] & ~gc.exception_pending;
|
||||
end
|
||||
end
|
||||
|
||||
//retire_next packet
|
||||
priority_encoder #(.WIDTH(RETIRE_PORTS))
|
||||
phys_id_sel_encoder (
|
||||
retire_with_rd_sel_encoder (
|
||||
.priority_vector (retire_id_uses_rd),
|
||||
.encoded_result (phys_id_sel)
|
||||
.encoded_result (retire_with_rd_sel)
|
||||
);
|
||||
assign retire_next.phys_id = retire_ids_next[phys_id_sel];
|
||||
assign retire_next.valid = retire_with_rd_found;
|
||||
|
||||
assign retire_next.phys_id = retire_ids_next[retire_with_rd_sel];
|
||||
assign retire_next.valid = retire_with_rd_found;
|
||||
|
||||
always_comb begin
|
||||
retire_next.count = 0;
|
||||
for (int i = 0; i < RETIRE_PORTS; i++) begin
|
||||
|
@ -315,6 +336,19 @@ module instruction_metadata_and_id_management
|
|||
retire_port_valid[i] <= retire_port_valid_next[i] & ~gc.writeback_supress;
|
||||
end
|
||||
|
||||
priority_encoder #(.WIDTH(RETIRE_PORTS))
|
||||
retire_with_store_sel_encoder (
|
||||
.priority_vector (retire_id_is_store),
|
||||
.encoded_result (retire_with_store_sel)
|
||||
);
|
||||
|
||||
assign store_retire_next.phys_id = retire_ids_next[retire_with_store_sel];
|
||||
assign store_retire_next.valid = retire_with_store_found;
|
||||
assign store_retire_next.count = 1;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
store_retire <= store_retire_next;
|
||||
end
|
||||
////////////////////////////////////////////////////
|
||||
//Outputs
|
||||
assign pc_id_available = ~inflight_count[LOG2_MAX_IDS];
|
||||
|
|
|
@ -35,26 +35,30 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
|
|||
input gc_outputs_t gc,
|
||||
|
||||
load_store_queue_interface.queue lsq,
|
||||
input logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] store_forward_wb_group,
|
||||
//Writeback snooping
|
||||
input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
|
||||
|
||||
//Retire release
|
||||
input id_t retire_ids [RETIRE_PORTS],
|
||||
input logic retire_port_valid [RETIRE_PORTS]
|
||||
input retire_packet_t store_retire
|
||||
);
|
||||
localparam LOG2_SQ_DEPTH = $clog2(CONFIG.SQ_DEPTH);
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] addr;
|
||||
logic [2:0] fn3;
|
||||
id_t id;
|
||||
phys_addr_t phys_addr;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] potential_store_conflicts;
|
||||
logic store_collision;
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_index;
|
||||
} lq_entry_t;
|
||||
|
||||
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_index;
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_oldest;
|
||||
addr_hash_t addr_hash;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] potential_store_conflicts;
|
||||
logic potential_store_conflict;
|
||||
sq_entry_t sq_entry;
|
||||
logic store_conflict;
|
||||
|
||||
lq_entry_t lq_data_in;
|
||||
lq_entry_t lq_data_out;
|
||||
|
@ -95,7 +99,8 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
|
|||
fn3 : lsq.data_in.fn3,
|
||||
id : lsq.data_in.id,
|
||||
phys_addr : lsq.data_in.phys_addr,
|
||||
potential_store_conflicts : potential_store_conflicts
|
||||
store_collision : potential_store_conflict,
|
||||
sq_index : sq_index
|
||||
};
|
||||
assign lq.data_in = lq_data_in;
|
||||
assign lq_data_out = lq.data_out;
|
||||
|
@ -111,20 +116,23 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
|
|||
.lq_push (lq.push),
|
||||
.lq_pop (lq.pop),
|
||||
.sq (sq),
|
||||
.store_forward_wb_group (store_forward_wb_group),
|
||||
.addr_hash (addr_hash),
|
||||
.potential_store_conflicts (potential_store_conflicts),
|
||||
.prev_store_conflicts (lq_data_out.potential_store_conflicts),
|
||||
.store_conflict (store_conflict),
|
||||
.potential_store_conflict (potential_store_conflict),
|
||||
.sq_index (sq_index),
|
||||
.sq_oldest (sq_oldest),
|
||||
.wb_packet (wb_packet),
|
||||
.retire_ids (retire_ids),
|
||||
.retire_port_valid (retire_port_valid)
|
||||
.store_retire (store_retire)
|
||||
);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Output
|
||||
//Priority is for loads over stores.
|
||||
//A store will be selected only if either no loads are ready, OR if the store queue is full and a store is ready
|
||||
assign lsq.load_valid = lq.valid & ~store_conflict;
|
||||
//A store will be selected only if no loads are ready
|
||||
logic load_blocked;
|
||||
assign load_blocked = (lq_data_out.store_collision & (lq_data_out.sq_index != sq_oldest));
|
||||
|
||||
assign lsq.load_valid = lq.valid & ~load_blocked;
|
||||
assign lsq.store_valid = sq.valid;
|
||||
|
||||
assign lsq.load_data_out = '{
|
||||
|
|
|
@ -40,12 +40,14 @@ module load_store_unit
|
|||
output logic unit_needed,
|
||||
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
|
||||
output logic uses_rd,
|
||||
|
||||
output logic decode_is_store,
|
||||
|
||||
input issue_packet_t issue_stage,
|
||||
input logic issue_stage_ready,
|
||||
input logic instruction_issued_with_rd,
|
||||
input logic rs2_inuse,
|
||||
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
|
||||
input logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rd_wb_group,
|
||||
input logic [31:0] rf [REGFILE_READ_PORTS],
|
||||
|
||||
unit_issue_interface.unit issue,
|
||||
|
@ -70,8 +72,7 @@ module load_store_unit
|
|||
input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
|
||||
|
||||
//Retire release
|
||||
input id_t retire_ids [RETIRE_PORTS],
|
||||
input logic retire_port_valid [RETIRE_PORTS],
|
||||
input retire_packet_t store_retire,
|
||||
|
||||
exception_interface.unit exception,
|
||||
output load_store_status_t load_store_status,
|
||||
|
@ -178,7 +179,7 @@ module load_store_unit
|
|||
|
||||
assign is_load = (instruction.upper_opcode inside {LOAD_T, AMO_T}) & !(amo.is_amo & amo.is_sc); //LR and AMO_ops perform a read operation as well
|
||||
assign is_store = (instruction.upper_opcode == STORE_T) | (amo.is_amo & amo.is_sc);//Used for LS unit and for ID tracking
|
||||
|
||||
assign decode_is_store = is_store;
|
||||
|
||||
always_ff @(posedge clk) begin
|
||||
if (issue_stage_ready) begin
|
||||
|
@ -192,12 +193,20 @@ module load_store_unit
|
|||
end
|
||||
|
||||
(* ramstyle = "MLAB, no_rw_check" *) id_t rd_to_id_table [32];
|
||||
(* ramstyle = "MLAB, no_rw_check" *) logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] rd_to_wb_group_table [32];
|
||||
|
||||
id_t store_forward_id;
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] store_forward_wb_group;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (instruction_issued_with_rd)
|
||||
if (instruction_issued_with_rd) begin
|
||||
rd_to_id_table[issue_stage.rd_addr] <= issue_stage.id;
|
||||
rd_to_wb_group_table[issue_stage.rd_addr] <= issue_rd_wb_group;
|
||||
end
|
||||
end
|
||||
|
||||
assign store_forward_id = rd_to_id_table[issue_rs_addr[RS2]];
|
||||
assign store_forward_wb_group = rs2_inuse ? rd_to_wb_group_table[issue_rs_addr[RS2]] : '0;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Alignment Exception
|
||||
|
@ -293,9 +302,9 @@ module load_store_unit
|
|||
.rst (rst),
|
||||
.gc (gc),
|
||||
.lsq (lsq),
|
||||
.store_forward_wb_group (store_forward_wb_group),
|
||||
.wb_packet (wb_packet),
|
||||
.retire_ids (retire_ids),
|
||||
.retire_port_valid (retire_port_valid)
|
||||
.store_retire (store_retire)
|
||||
);
|
||||
assign shared_inputs = sel_load ? lsq.load_data_out : lsq.store_data_out;
|
||||
assign lsq.load_pop = sub_unit_load_issue;
|
||||
|
|
|
@ -36,25 +36,31 @@ module store_queue
|
|||
input logic lq_push,
|
||||
input logic lq_pop,
|
||||
store_queue_interface.queue sq,
|
||||
input logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] store_forward_wb_group,
|
||||
|
||||
//Address hash (shared by loads and stores)
|
||||
input addr_hash_t addr_hash,
|
||||
//hash check on adding a load to the queue
|
||||
output logic [CONFIG.SQ_DEPTH-1:0] potential_store_conflicts,
|
||||
//Load issue collision check
|
||||
input logic [CONFIG.SQ_DEPTH-1:0] prev_store_conflicts,
|
||||
output logic store_conflict,
|
||||
output logic [LOG2_SQ_DEPTH-1:0] sq_index,
|
||||
output logic [LOG2_SQ_DEPTH-1:0] sq_oldest,
|
||||
output logic potential_store_conflict,
|
||||
|
||||
//Writeback snooping
|
||||
input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
|
||||
|
||||
//Retire
|
||||
input id_t retire_ids [RETIRE_PORTS],
|
||||
input logic retire_port_valid [RETIRE_PORTS]
|
||||
input retire_packet_t store_retire
|
||||
);
|
||||
|
||||
localparam LOG2_SQ_DEPTH = $clog2(CONFIG.SQ_DEPTH);
|
||||
typedef logic [LOG2_MAX_IDS:0] load_check_count_t;
|
||||
localparam NUM_OF_FORWARDING_PORTS = CONFIG.NUM_WB_GROUPS - 1;
|
||||
typedef struct packed {
|
||||
id_t id_needed;
|
||||
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] wb_group;
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_index;
|
||||
} retire_table_t;
|
||||
retire_table_t retire_table_in;
|
||||
retire_table_t retire_table_out;
|
||||
|
||||
wb_packet_t wb_snoop [CONFIG.NUM_WB_GROUPS];
|
||||
wb_packet_t wb_snoop_r [CONFIG.NUM_WB_GROUPS];
|
||||
|
@ -63,26 +69,19 @@ module store_queue
|
|||
logic [CONFIG.SQ_DEPTH-1:0] valid;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] valid_next;
|
||||
addr_hash_t [CONFIG.SQ_DEPTH-1:0] hashes;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] data_needed;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] released;
|
||||
id_t [CONFIG.SQ_DEPTH-1:0] ids;
|
||||
id_t [CONFIG.SQ_DEPTH-1:0] id_needed;
|
||||
load_check_count_t [CONFIG.SQ_DEPTH-1:0] load_check_count;
|
||||
logic [31:0] store_data [CONFIG.SQ_DEPTH];
|
||||
|
||||
//LUTRAM-based memory blocks
|
||||
sq_entry_t sq_entry_in;
|
||||
sq_entry_t output_entry;
|
||||
|
||||
load_check_count_t [CONFIG.SQ_DEPTH-1:0] load_check_count_next;
|
||||
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_index;
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_index_next;
|
||||
logic [LOG2_SQ_DEPTH-1:0] sq_oldest;
|
||||
logic [LOG2_SQ_DEPTH:0] released_count;
|
||||
|
||||
logic [CONFIG.SQ_DEPTH-1:0] new_request_one_hot;
|
||||
logic [CONFIG.SQ_DEPTH-1:0] issued_one_hot;
|
||||
|
||||
logic [31:0] data_pre_alignment;
|
||||
logic [31:0] sq_data_out;
|
||||
////////////////////////////////////////////////////
|
||||
//Implementation
|
||||
assign sq_index_next = sq_index +LOG2_SQ_DEPTH'(sq.push);
|
||||
|
@ -97,7 +96,7 @@ module store_queue
|
|||
if (rst)
|
||||
sq_oldest <= 0;
|
||||
else
|
||||
sq_oldest <= sq_oldest +LOG2_SQ_DEPTH'(sq.pop);
|
||||
sq_oldest <= sq_oldest + LOG2_SQ_DEPTH'(sq.pop);
|
||||
end
|
||||
|
||||
assign new_request_one_hot = CONFIG.SQ_DEPTH'(sq.push) << sq_index;
|
||||
|
@ -117,7 +116,7 @@ module store_queue
|
|||
if (rst)
|
||||
sq.full <= 0;
|
||||
else
|
||||
sq.full <= valid_next[sq_index_next] | (|load_check_count_next[sq_index_next]);
|
||||
sq.full <= valid_next[sq_index_next];
|
||||
end
|
||||
|
||||
//SQ attributes and issue data
|
||||
|
@ -125,8 +124,8 @@ module store_queue
|
|||
addr : sq.data_in.addr,
|
||||
be : sq.data_in.be,
|
||||
fn3 : sq.data_in.fn3,
|
||||
forwarded_store : sq.data_in.forwarded_store,
|
||||
data : sq.data_in.data
|
||||
forwarded_store : '0,
|
||||
data : '0
|
||||
};
|
||||
lutram_1w_1r #(.WIDTH($bits(sq_entry_t)), .DEPTH(CONFIG.SQ_DEPTH))
|
||||
store_attr (
|
||||
|
@ -138,63 +137,38 @@ module store_queue
|
|||
.ram_data_out(output_entry)
|
||||
);
|
||||
|
||||
//Keep count of the number of pending loads that might need a store result
|
||||
//Mask out any store completing on this cycle
|
||||
//Compare store addr-hashes against new load addr-hash
|
||||
//Optionally mask out any store completing on this cycle (~issued_one_hot)
|
||||
//Without masking out an issuing store, the store queue may be flushed more often
|
||||
//Omitted as negligible impact on embench at sq depth 4
|
||||
always_comb begin
|
||||
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
|
||||
potential_store_conflicts[i] = (valid[i] & ~issued_one_hot[i]) & (addr_hash == hashes[i]);
|
||||
load_check_count_next[i] = load_check_count[i]
|
||||
+ (LOG2_MAX_IDS+1)'({potential_store_conflicts[i] & lq_push})
|
||||
- (LOG2_MAX_IDS+1)'({prev_store_conflicts[i] & lq_pop});
|
||||
end
|
||||
potential_store_conflict = 0;
|
||||
for (int i = 0; i < CONFIG.SQ_DEPTH; i++)
|
||||
potential_store_conflict |= {valid[i], addr_hash} == {1'b1, hashes[i]};
|
||||
end
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
load_check_count <= '0;
|
||||
else
|
||||
load_check_count <= load_check_count_next;
|
||||
end
|
||||
|
||||
//If a potential blocking store has not been issued yet, the load is blocked until the store(s) complete
|
||||
assign store_conflict = |(prev_store_conflicts & valid);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Register-based storage
|
||||
//IDs of stores
|
||||
//ID needed for forwarded data
|
||||
//Address hashes
|
||||
always_ff @ (posedge clk) begin
|
||||
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
|
||||
if (new_request_one_hot[i]) begin
|
||||
id_needed[i] <= sq.data_in.id_needed;
|
||||
ids[i] <= sq.data_in.id;
|
||||
if (new_request_one_hot[i])
|
||||
hashes[i] <= addr_hash;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Release Handling
|
||||
logic [CONFIG.SQ_DEPTH-1:0] newly_released;
|
||||
always_comb begin
|
||||
newly_released = '0;
|
||||
for (int i = 0; i < CONFIG.SQ_DEPTH; i++)
|
||||
for (int j = 0; j < RETIRE_PORTS; j++)
|
||||
newly_released[i] |= {1'b1, ids[i]} == {retire_port_valid[j], retire_ids[j]};
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
released <= 0;
|
||||
released_count <= 0;
|
||||
else
|
||||
released <= (released | (newly_released & valid)) & ~issued_one_hot;
|
||||
released_count <= released_count + (LOG2_SQ_DEPTH + 1)'(store_retire.valid) - (LOG2_SQ_DEPTH + 1)'(sq.pop);
|
||||
end
|
||||
|
||||
assign sq.no_released_stores_pending = ~|released;
|
||||
assign sq.no_released_stores_pending = ~|released_count;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Forwarding and Store Data
|
||||
//Need to support forwarding from any multi-cycle writeback port
|
||||
//Forwarding is only needed from multi-cycle writeback ports
|
||||
//Currently this is the LS port [1] and the MUL/DIV/CSR port [2]
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
|
@ -202,39 +176,62 @@ module store_queue
|
|||
wb_snoop_r <= wb_snoop;
|
||||
end
|
||||
|
||||
logic [CONFIG.SQ_DEPTH-1:0] write_forward [2];
|
||||
always_comb begin
|
||||
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
|
||||
write_forward[0][i] = CONFIG.INCLUDE_FORWARDING_TO_STORES & {1'b1, wb_snoop_r[1].valid, wb_snoop_r[1].id} == {data_needed[i], 1'b1, id_needed[i]};
|
||||
write_forward[1][i] = CONFIG.INCLUDE_FORWARDING_TO_STORES & {1'b1, wb_snoop_r[2].valid, wb_snoop_r[2].id} == {data_needed[i], 1'b1, id_needed[i]};
|
||||
end
|
||||
end
|
||||
assign retire_table_in = '{id_needed : sq.data_in.id_needed, wb_group : store_forward_wb_group, sq_index : sq_index};
|
||||
lutram_1w_1r #(.WIDTH($bits(retire_table_t)), .DEPTH(MAX_IDS))
|
||||
store_retire_table_lutram (
|
||||
.clk(clk),
|
||||
.waddr(sq.data_in.id),
|
||||
.raddr(store_retire.phys_id),
|
||||
.ram_write(sq.push),
|
||||
.new_ram_data(retire_table_in),
|
||||
.ram_data_out(retire_table_out)
|
||||
);
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (rst)
|
||||
data_needed <= 0;
|
||||
else
|
||||
data_needed <= (data_needed | (new_request_one_hot & {CONFIG.SQ_DEPTH{sq.data_in.forwarded_store}})) & ~(write_forward[0] | write_forward[1]);
|
||||
end
|
||||
logic [31:0] wb_data [NUM_OF_FORWARDING_PORTS+1];
|
||||
|
||||
//Data issued with the store can be stored by store-id
|
||||
lutram_1w_1r #(.WIDTH(32), .DEPTH(MAX_IDS))
|
||||
non_forwarded_port (
|
||||
.clk(clk),
|
||||
.waddr(sq.data_in.id),
|
||||
.raddr(store_retire.phys_id),
|
||||
.ram_write(sq.push),
|
||||
.new_ram_data(sq.data_in.data),
|
||||
.ram_data_out(wb_data[0])
|
||||
);
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
|
||||
if (write_forward[0][i] | write_forward[1][i] | new_request_one_hot[i])
|
||||
store_data[i] <= write_forward[0][i] ? wb_snoop_r[1].data : (write_forward[1][i] ? wb_snoop_r[2].data : sq.data_in.data);
|
||||
end
|
||||
//Data from wb ports is stored by ID and then accessed by store-id to store-id-needed translation
|
||||
generate
|
||||
for (genvar i = 0; i < NUM_OF_FORWARDING_PORTS; i++) begin : lutrams
|
||||
lutram_1w_1r #(.WIDTH(32), .DEPTH(MAX_IDS))
|
||||
writeback_port (
|
||||
.clk(clk),
|
||||
.waddr(wb_snoop[i+1].id),
|
||||
.raddr(retire_table_out.id_needed),
|
||||
.ram_write(wb_snoop[i+1].valid),
|
||||
.new_ram_data(wb_snoop[i+1].data),
|
||||
.ram_data_out(wb_data[i+1])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
//Final storage table for the store queue
|
||||
//SQ-index addressed
|
||||
lutram_1w_1r #(.WIDTH(32), .DEPTH(CONFIG.SQ_DEPTH))
|
||||
sq_data_lutram (
|
||||
.clk(clk),
|
||||
.waddr(retire_table_out.sq_index),
|
||||
.raddr(sq_oldest),
|
||||
.ram_write(store_retire.valid),
|
||||
.new_ram_data(wb_data[retire_table_out.wb_group]),
|
||||
.ram_data_out(data_pre_alignment)
|
||||
);
|
||||
////////////////////////////////////////////////////
|
||||
//Store Transaction Outputs
|
||||
logic [31:0] data_pre_alignment;
|
||||
logic [31:0] sq_data_out;
|
||||
|
||||
always_comb begin
|
||||
//Input: ABCD
|
||||
//Assuming aligned requests,
|
||||
//Possible byte selections: (A/C/D, B/D, C/D, D)
|
||||
data_pre_alignment = store_data[sq_oldest];
|
||||
|
||||
sq_data_out[7:0] = data_pre_alignment[7:0];
|
||||
sq_data_out[15:8] = (output_entry.addr[1:0] == 2'b01) ? data_pre_alignment[7:0] : data_pre_alignment[15:8];
|
||||
sq_data_out[23:16] = (output_entry.addr[1:0] == 2'b10) ? data_pre_alignment[7:0] : data_pre_alignment[23:16];
|
||||
|
@ -245,12 +242,12 @@ module store_queue
|
|||
endcase
|
||||
end
|
||||
|
||||
assign sq.valid = released[sq_oldest];
|
||||
assign sq.valid = |released_count;
|
||||
assign sq.data_out = '{
|
||||
addr : output_entry.addr,
|
||||
be : output_entry.be,
|
||||
fn3 : output_entry.fn3,
|
||||
forwarded_store : output_entry.forwarded_store,
|
||||
forwarded_store : 0,
|
||||
data : sq_data_out
|
||||
};
|
||||
|
||||
|
|
|
@ -416,7 +416,7 @@ module cva5_sim
|
|||
end
|
||||
|
||||
//LS Stats
|
||||
stats[LSU_LOAD_BLOCKED_BY_STORE_STAT] = `LSQ_P.lq.valid & `LSQ_P.store_conflict;
|
||||
stats[LSU_LOAD_BLOCKED_BY_STORE_STAT] = `LSQ_P.lq.valid & `LSQ_P.load_blocked;
|
||||
stats[LSU_SUB_UNIT_STALL_STAT] = (`LS_P.lsq.load_valid | `LS_P.lsq.store_valid) & ~`LS_P.sub_unit_ready;
|
||||
stats[LSU_DC_HIT_STAT] = dcache_hit;
|
||||
stats[LSU_DC_MISS_STAT] = dcache_miss;
|
||||
|
|
|
@ -551,7 +551,7 @@ module cva5_sim
|
|||
end
|
||||
|
||||
//LS Stats
|
||||
stats[LSU_LOAD_BLOCKED_BY_STORE_STAT] = `LSQ_P.lq.valid & `LSQ_P.store_conflict;
|
||||
stats[LSU_LOAD_BLOCKED_BY_STORE_STAT] = `LSQ_P.lq.valid & `LSQ_P.load_blocked;
|
||||
stats[LSU_SUB_UNIT_STALL_STAT] = (`LS_P.lsq.load_valid | `LS_P.lsq.store_valid) & ~`LS_P.sub_unit_ready;
|
||||
stats[LSU_DC_HIT_STAT] = dcache_hit;
|
||||
stats[LSU_DC_MISS_STAT] = dcache_miss;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue