Store queue data forwarding restructure

Signed-off-by: Eric Matthews <ematthew@sfu.ca>
This commit is contained in:
Eric Matthews 2023-02-01 23:07:51 -05:00
parent 4bada38942
commit f15fe83a9c
8 changed files with 172 additions and 116 deletions

View file

@ -157,12 +157,14 @@ module cva5
logic decode_uses_rd;
rs_addr_t decode_rd_addr;
exception_sources_t decode_exception_unit;
logic decode_is_store;
phys_addr_t decode_phys_rd_addr;
phys_addr_t decode_phys_rs_addr [REGFILE_READ_PORTS];
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] decode_rs_wb_group [REGFILE_READ_PORTS];
//ID freeing
retire_packet_t retire;
retire_packet_t store_retire;
id_t retire_ids [RETIRE_PORTS];
id_t retire_ids_next [RETIRE_PORTS];
logic retire_port_valid [RETIRE_PORTS];
@ -195,6 +197,7 @@ module cva5
logic issue_stage_ready;
phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS];
rs_addr_t issue_rs_addr [REGFILE_READ_PORTS];
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rd_wb_group;
logic illegal_instruction;
logic instruction_issued;
logic instruction_issued_with_rd;
@ -241,11 +244,13 @@ module cva5
.decode_rd_addr (decode_rd_addr),
.decode_phys_rd_addr (decode_phys_rd_addr),
.decode_exception_unit (decode_exception_unit),
.decode_is_store (decode_is_store),
.issue (issue),
.instruction_issued (instruction_issued),
.instruction_issued_with_rd (instruction_issued_with_rd),
.wb_packet (wb_packet),
.retire (retire),
.store_retire (store_retire),
.retire_ids (retire_ids),
.retire_ids_next (retire_ids_next),
.retire_port_valid(retire_port_valid),
@ -371,9 +376,10 @@ module cva5
.instruction_issued (instruction_issued),
.instruction_issued_with_rd (instruction_issued_with_rd),
.issue (issue),
.issue_rs_addr (issue_rs_addr),
.issue_stage_ready (issue_stage_ready),
.issue_phys_rs_addr (issue_phys_rs_addr),
.issue_rs_addr (issue_rs_addr),
.issue_rd_wb_group (issue_rd_wb_group),
.rf (rf_issue),
.constant_alu (constant_alu),
.unit_issue (unit_issue),
@ -447,8 +453,10 @@ module cva5
.unit_needed (unit_needed[UNIT_IDS.LS]),
.uses_rs (unit_uses_rs[UNIT_IDS.LS]),
.uses_rd (unit_uses_rd[UNIT_IDS.LS]),
.decode_is_store (decode_is_store),
.instruction_issued_with_rd (instruction_issued_with_rd),
.issue_rs_addr (issue_rs_addr),
.issue_rd_wb_group (issue_rd_wb_group),
.rs2_inuse (rf_issue.inuse[RS2]),
.rf (rf_issue.data),
.issue (unit_issue[UNIT_IDS.LS]),
@ -465,8 +473,7 @@ module cva5
.dwishbone (dwishbone),
.data_bram (data_bram),
.wb_packet (wb_packet),
.retire_ids (retire_ids),
.retire_port_valid(retire_port_valid),
.store_retire (store_retire),
.exception (exception[LS_EXCEPTION]),
.load_store_status(load_store_status),
.wb (unit_wb2[LS_UNIT_WB2_ID])

View file

@ -62,6 +62,7 @@ module decode_and_issue
output issue_packet_t issue,
output rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
output phys_addr_t issue_phys_rs_addr [REGFILE_READ_PORTS],
output logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rd_wb_group,
output logic issue_stage_ready,
//Register File
@ -153,7 +154,6 @@ module decode_and_issue
assign decode_phys_rd_addr = renamer.phys_rd_addr;
assign decode_phys_rs_addr = renamer.phys_rs_addr;
assign decode_rs_wb_group = renamer.rs_wb_group;
////////////////////////////////////////////////////
//Issue
always_ff @(posedge clk) begin
@ -168,6 +168,7 @@ module decode_and_issue
issue_rs_wb_group <= renamer.rs_wb_group;
issue.rd_addr <= decode_instruction.rd_addr;
issue.phys_rd_addr <= renamer.phys_rd_addr;
issue_rd_wb_group <= renamer_wb_group;
issue.is_multicycle <= ~unit_needed[UNIT_IDS.ALU];
issue.id <= decode.id;
issue.exception_unit <= decode_exception_unit;

View file

@ -53,6 +53,7 @@ module instruction_metadata_and_id_management
input logic decode_uses_rd,
input rs_addr_t decode_rd_addr,
input exception_sources_t decode_exception_unit,
input logic decode_is_store,
//renamer
input phys_addr_t decode_phys_rd_addr,
@ -66,6 +67,7 @@ module instruction_metadata_and_id_management
//Retirer
output retire_packet_t retire,
output retire_packet_t store_retire,
output id_t retire_ids [RETIRE_PORTS],
output id_t retire_ids_next [RETIRE_PORTS],
output logic retire_port_valid [RETIRE_PORTS],
@ -82,6 +84,7 @@ module instruction_metadata_and_id_management
(* ramstyle = "MLAB, no_rw_check" *) logic [0:0] valid_fetch_addr_table [MAX_IDS];
(* ramstyle = "MLAB, no_rw_check" *) logic [0:0] uses_rd_table [MAX_IDS];
(* ramstyle = "MLAB, no_rw_check" *) logic [0:0] is_store_table [MAX_IDS];
(* ramstyle = "MLAB, no_rw_check" *) logic [$bits(fetch_metadata_t)-1:0] fetch_metadata_table [MAX_IDS];
@ -98,6 +101,8 @@ module instruction_metadata_and_id_management
logic [LOG2_MAX_IDS:0] inflight_count;
retire_packet_t retire_next;
retire_packet_t store_retire_next;
logic retire_port_valid_next [RETIRE_PORTS];
genvar i;
@ -137,6 +142,15 @@ module instruction_metadata_and_id_management
uses_rd_table[decode_id] <= decode_uses_rd & |decode_rd_addr;
end
////////////////////////////////////////////////////
//Is store table
//Number of read ports = RETIRE_PORTS
always_ff @ (posedge clk) begin
if (decode_advance)
is_store_table[decode_id] <= decode_is_store;
end
////////////////////////////////////////////////////
//Exception unit table
always_ff @ (posedge clk) begin
@ -184,7 +198,6 @@ module instruction_metadata_and_id_management
if (~gc.retire_hold)
retire_ids[i] <= retire_ids_next[i];
end
end endgenerate
//Represented as a negative value so that the MSB indicates that the decode stage is valid
@ -262,12 +275,15 @@ module instruction_metadata_and_id_management
logic contiguous_retire;
logic id_is_post_issue [RETIRE_PORTS];
logic id_ready_to_retire [RETIRE_PORTS];
logic [LOG2_RETIRE_PORTS-1:0] phys_id_sel;
logic [LOG2_RETIRE_PORTS-1:0] retire_with_rd_sel;
logic [LOG2_RETIRE_PORTS-1:0] retire_with_store_sel;
logic [RETIRE_PORTS-1:0] retire_id_uses_rd;
logic [RETIRE_PORTS-1:0] retire_id_is_store;
logic [RETIRE_PORTS-1:0] retire_id_waiting_for_writeback;
generate for (i = 0; i < RETIRE_PORTS; i++) begin : gen_retire_writeback
assign retire_id_uses_rd[i] = uses_rd_table[retire_ids_next[i]];
assign retire_id_is_store[i] = is_store_table[retire_ids_next[i]];
assign retire_id_waiting_for_writeback[i] = id_waiting_for_writeback[i];
end endgenerate
@ -277,29 +293,34 @@ module instruction_metadata_and_id_management
//If an exception is pending, only retire a single intrustuction per cycle. As such, the pending
//exception will have to become the oldest instruction retire_ids[0] before it can retire.
logic retire_with_rd_found;
logic retire_with_store_found;
always_comb begin
contiguous_retire = ~gc.retire_hold;
retire_with_rd_found = 0;
retire_with_store_found = 0;
for (int i = 0; i < RETIRE_PORTS; i++) begin
id_is_post_issue[i] = post_issue_count > ID_COUNTER_W'(i);
id_ready_to_retire[i] = (id_is_post_issue[i] & contiguous_retire & ~id_waiting_for_writeback[i]);
retire_port_valid_next[i] = id_ready_to_retire[i] & ~(retire_id_uses_rd[i] & retire_with_rd_found);
retire_port_valid_next[i] = id_ready_to_retire[i] & ~((retire_id_uses_rd[i] & retire_with_rd_found) | (retire_id_is_store[i] & retire_with_store_found));
retire_with_rd_found |= retire_port_valid_next[i] & retire_id_uses_rd[i];
retire_with_store_found |= retire_port_valid_next[i] & retire_id_is_store[i];
contiguous_retire &= retire_port_valid_next[i] & ~gc.exception_pending;
end
end
//retire_next packet
priority_encoder #(.WIDTH(RETIRE_PORTS))
phys_id_sel_encoder (
retire_with_rd_sel_encoder (
.priority_vector (retire_id_uses_rd),
.encoded_result (phys_id_sel)
.encoded_result (retire_with_rd_sel)
);
assign retire_next.phys_id = retire_ids_next[phys_id_sel];
assign retire_next.valid = retire_with_rd_found;
assign retire_next.phys_id = retire_ids_next[retire_with_rd_sel];
assign retire_next.valid = retire_with_rd_found;
always_comb begin
retire_next.count = 0;
for (int i = 0; i < RETIRE_PORTS; i++) begin
@ -315,6 +336,19 @@ module instruction_metadata_and_id_management
retire_port_valid[i] <= retire_port_valid_next[i] & ~gc.writeback_supress;
end
priority_encoder #(.WIDTH(RETIRE_PORTS))
retire_with_store_sel_encoder (
.priority_vector (retire_id_is_store),
.encoded_result (retire_with_store_sel)
);
assign store_retire_next.phys_id = retire_ids_next[retire_with_store_sel];
assign store_retire_next.valid = retire_with_store_found;
assign store_retire_next.count = 1;
always_ff @ (posedge clk) begin
store_retire <= store_retire_next;
end
////////////////////////////////////////////////////
//Outputs
assign pc_id_available = ~inflight_count[LOG2_MAX_IDS];

View file

@ -35,26 +35,30 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
input gc_outputs_t gc,
load_store_queue_interface.queue lsq,
input logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] store_forward_wb_group,
//Writeback snooping
input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
//Retire release
input id_t retire_ids [RETIRE_PORTS],
input logic retire_port_valid [RETIRE_PORTS]
input retire_packet_t store_retire
);
localparam LOG2_SQ_DEPTH = $clog2(CONFIG.SQ_DEPTH);
typedef struct packed {
logic [31:0] addr;
logic [2:0] fn3;
id_t id;
phys_addr_t phys_addr;
logic [CONFIG.SQ_DEPTH-1:0] potential_store_conflicts;
logic store_collision;
logic [LOG2_SQ_DEPTH-1:0] sq_index;
} lq_entry_t;
logic [LOG2_SQ_DEPTH-1:0] sq_index;
logic [LOG2_SQ_DEPTH-1:0] sq_oldest;
addr_hash_t addr_hash;
logic [CONFIG.SQ_DEPTH-1:0] potential_store_conflicts;
logic potential_store_conflict;
sq_entry_t sq_entry;
logic store_conflict;
lq_entry_t lq_data_in;
lq_entry_t lq_data_out;
@ -95,7 +99,8 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
fn3 : lsq.data_in.fn3,
id : lsq.data_in.id,
phys_addr : lsq.data_in.phys_addr,
potential_store_conflicts : potential_store_conflicts
store_collision : potential_store_conflict,
sq_index : sq_index
};
assign lq.data_in = lq_data_in;
assign lq_data_out = lq.data_out;
@ -111,20 +116,23 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
.lq_push (lq.push),
.lq_pop (lq.pop),
.sq (sq),
.store_forward_wb_group (store_forward_wb_group),
.addr_hash (addr_hash),
.potential_store_conflicts (potential_store_conflicts),
.prev_store_conflicts (lq_data_out.potential_store_conflicts),
.store_conflict (store_conflict),
.potential_store_conflict (potential_store_conflict),
.sq_index (sq_index),
.sq_oldest (sq_oldest),
.wb_packet (wb_packet),
.retire_ids (retire_ids),
.retire_port_valid (retire_port_valid)
.store_retire (store_retire)
);
////////////////////////////////////////////////////
//Output
//Priority is for loads over stores.
//A store will be selected only if either no loads are ready, OR if the store queue is full and a store is ready
assign lsq.load_valid = lq.valid & ~store_conflict;
//A store will be selected only if no loads are ready
logic load_blocked;
assign load_blocked = (lq_data_out.store_collision & (lq_data_out.sq_index != sq_oldest));
assign lsq.load_valid = lq.valid & ~load_blocked;
assign lsq.store_valid = sq.valid;
assign lsq.load_data_out = '{

View file

@ -40,12 +40,14 @@ module load_store_unit
output logic unit_needed,
output logic [REGFILE_READ_PORTS-1:0] uses_rs,
output logic uses_rd,
output logic decode_is_store,
input issue_packet_t issue_stage,
input logic issue_stage_ready,
input logic instruction_issued_with_rd,
input logic rs2_inuse,
input rs_addr_t issue_rs_addr [REGFILE_READ_PORTS],
input logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] issue_rd_wb_group,
input logic [31:0] rf [REGFILE_READ_PORTS],
unit_issue_interface.unit issue,
@ -70,8 +72,7 @@ module load_store_unit
input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
//Retire release
input id_t retire_ids [RETIRE_PORTS],
input logic retire_port_valid [RETIRE_PORTS],
input retire_packet_t store_retire,
exception_interface.unit exception,
output load_store_status_t load_store_status,
@ -178,7 +179,7 @@ module load_store_unit
assign is_load = (instruction.upper_opcode inside {LOAD_T, AMO_T}) & !(amo.is_amo & amo.is_sc); //LR and AMO_ops perform a read operation as well
assign is_store = (instruction.upper_opcode == STORE_T) | (amo.is_amo & amo.is_sc);//Used for LS unit and for ID tracking
assign decode_is_store = is_store;
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
@ -192,12 +193,20 @@ module load_store_unit
end
(* ramstyle = "MLAB, no_rw_check" *) id_t rd_to_id_table [32];
(* ramstyle = "MLAB, no_rw_check" *) logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] rd_to_wb_group_table [32];
id_t store_forward_id;
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] store_forward_wb_group;
always_ff @ (posedge clk) begin
if (instruction_issued_with_rd)
if (instruction_issued_with_rd) begin
rd_to_id_table[issue_stage.rd_addr] <= issue_stage.id;
rd_to_wb_group_table[issue_stage.rd_addr] <= issue_rd_wb_group;
end
end
assign store_forward_id = rd_to_id_table[issue_rs_addr[RS2]];
assign store_forward_wb_group = rs2_inuse ? rd_to_wb_group_table[issue_rs_addr[RS2]] : '0;
////////////////////////////////////////////////////
//Alignment Exception
@ -293,9 +302,9 @@ module load_store_unit
.rst (rst),
.gc (gc),
.lsq (lsq),
.store_forward_wb_group (store_forward_wb_group),
.wb_packet (wb_packet),
.retire_ids (retire_ids),
.retire_port_valid (retire_port_valid)
.store_retire (store_retire)
);
assign shared_inputs = sel_load ? lsq.load_data_out : lsq.store_data_out;
assign lsq.load_pop = sub_unit_load_issue;

View file

@ -36,25 +36,31 @@ module store_queue
input logic lq_push,
input logic lq_pop,
store_queue_interface.queue sq,
input logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] store_forward_wb_group,
//Address hash (shared by loads and stores)
input addr_hash_t addr_hash,
//hash check on adding a load to the queue
output logic [CONFIG.SQ_DEPTH-1:0] potential_store_conflicts,
//Load issue collision check
input logic [CONFIG.SQ_DEPTH-1:0] prev_store_conflicts,
output logic store_conflict,
output logic [LOG2_SQ_DEPTH-1:0] sq_index,
output logic [LOG2_SQ_DEPTH-1:0] sq_oldest,
output logic potential_store_conflict,
//Writeback snooping
input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
//Retire
input id_t retire_ids [RETIRE_PORTS],
input logic retire_port_valid [RETIRE_PORTS]
input retire_packet_t store_retire
);
localparam LOG2_SQ_DEPTH = $clog2(CONFIG.SQ_DEPTH);
typedef logic [LOG2_MAX_IDS:0] load_check_count_t;
localparam NUM_OF_FORWARDING_PORTS = CONFIG.NUM_WB_GROUPS - 1;
typedef struct packed {
id_t id_needed;
logic [$clog2(CONFIG.NUM_WB_GROUPS)-1:0] wb_group;
logic [LOG2_SQ_DEPTH-1:0] sq_index;
} retire_table_t;
retire_table_t retire_table_in;
retire_table_t retire_table_out;
wb_packet_t wb_snoop [CONFIG.NUM_WB_GROUPS];
wb_packet_t wb_snoop_r [CONFIG.NUM_WB_GROUPS];
@ -63,26 +69,19 @@ module store_queue
logic [CONFIG.SQ_DEPTH-1:0] valid;
logic [CONFIG.SQ_DEPTH-1:0] valid_next;
addr_hash_t [CONFIG.SQ_DEPTH-1:0] hashes;
logic [CONFIG.SQ_DEPTH-1:0] data_needed;
logic [CONFIG.SQ_DEPTH-1:0] released;
id_t [CONFIG.SQ_DEPTH-1:0] ids;
id_t [CONFIG.SQ_DEPTH-1:0] id_needed;
load_check_count_t [CONFIG.SQ_DEPTH-1:0] load_check_count;
logic [31:0] store_data [CONFIG.SQ_DEPTH];
//LUTRAM-based memory blocks
sq_entry_t sq_entry_in;
sq_entry_t output_entry;
load_check_count_t [CONFIG.SQ_DEPTH-1:0] load_check_count_next;
logic [LOG2_SQ_DEPTH-1:0] sq_index;
logic [LOG2_SQ_DEPTH-1:0] sq_index_next;
logic [LOG2_SQ_DEPTH-1:0] sq_oldest;
logic [LOG2_SQ_DEPTH:0] released_count;
logic [CONFIG.SQ_DEPTH-1:0] new_request_one_hot;
logic [CONFIG.SQ_DEPTH-1:0] issued_one_hot;
logic [31:0] data_pre_alignment;
logic [31:0] sq_data_out;
////////////////////////////////////////////////////
//Implementation
assign sq_index_next = sq_index +LOG2_SQ_DEPTH'(sq.push);
@ -97,7 +96,7 @@ module store_queue
if (rst)
sq_oldest <= 0;
else
sq_oldest <= sq_oldest +LOG2_SQ_DEPTH'(sq.pop);
sq_oldest <= sq_oldest + LOG2_SQ_DEPTH'(sq.pop);
end
assign new_request_one_hot = CONFIG.SQ_DEPTH'(sq.push) << sq_index;
@ -117,7 +116,7 @@ module store_queue
if (rst)
sq.full <= 0;
else
sq.full <= valid_next[sq_index_next] | (|load_check_count_next[sq_index_next]);
sq.full <= valid_next[sq_index_next];
end
//SQ attributes and issue data
@ -125,8 +124,8 @@ module store_queue
addr : sq.data_in.addr,
be : sq.data_in.be,
fn3 : sq.data_in.fn3,
forwarded_store : sq.data_in.forwarded_store,
data : sq.data_in.data
forwarded_store : '0,
data : '0
};
lutram_1w_1r #(.WIDTH($bits(sq_entry_t)), .DEPTH(CONFIG.SQ_DEPTH))
store_attr (
@ -138,63 +137,38 @@ module store_queue
.ram_data_out(output_entry)
);
//Keep count of the number of pending loads that might need a store result
//Mask out any store completing on this cycle
//Compare store addr-hashes against new load addr-hash
//Optionally mask out any store completing on this cycle (~issued_one_hot)
//Without masking out an issuing store, the store queue may be flushed more often
//Omitted as negligible impact on embench at sq depth 4
always_comb begin
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
potential_store_conflicts[i] = (valid[i] & ~issued_one_hot[i]) & (addr_hash == hashes[i]);
load_check_count_next[i] = load_check_count[i]
+ (LOG2_MAX_IDS+1)'({potential_store_conflicts[i] & lq_push})
- (LOG2_MAX_IDS+1)'({prev_store_conflicts[i] & lq_pop});
end
potential_store_conflict = 0;
for (int i = 0; i < CONFIG.SQ_DEPTH; i++)
potential_store_conflict |= {valid[i], addr_hash} == {1'b1, hashes[i]};
end
always_ff @ (posedge clk) begin
if (rst)
load_check_count <= '0;
else
load_check_count <= load_check_count_next;
end
//If a potential blocking store has not been issued yet, the load is blocked until the store(s) complete
assign store_conflict = |(prev_store_conflicts & valid);
////////////////////////////////////////////////////
//Register-based storage
//IDs of stores
//ID needed for forwarded data
//Address hashes
always_ff @ (posedge clk) begin
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
if (new_request_one_hot[i]) begin
id_needed[i] <= sq.data_in.id_needed;
ids[i] <= sq.data_in.id;
if (new_request_one_hot[i])
hashes[i] <= addr_hash;
end
end
end
////////////////////////////////////////////////////
//Release Handling
logic [CONFIG.SQ_DEPTH-1:0] newly_released;
always_comb begin
newly_released = '0;
for (int i = 0; i < CONFIG.SQ_DEPTH; i++)
for (int j = 0; j < RETIRE_PORTS; j++)
newly_released[i] |= {1'b1, ids[i]} == {retire_port_valid[j], retire_ids[j]};
end
always_ff @ (posedge clk) begin
if (rst)
released <= 0;
released_count <= 0;
else
released <= (released | (newly_released & valid)) & ~issued_one_hot;
released_count <= released_count + (LOG2_SQ_DEPTH + 1)'(store_retire.valid) - (LOG2_SQ_DEPTH + 1)'(sq.pop);
end
assign sq.no_released_stores_pending = ~|released;
assign sq.no_released_stores_pending = ~|released_count;
////////////////////////////////////////////////////
//Forwarding and Store Data
//Need to support forwarding from any multi-cycle writeback port
//Forwarding is only needed from multi-cycle writeback ports
//Currently this is the LS port [1] and the MUL/DIV/CSR port [2]
always_ff @ (posedge clk) begin
@ -202,39 +176,62 @@ module store_queue
wb_snoop_r <= wb_snoop;
end
logic [CONFIG.SQ_DEPTH-1:0] write_forward [2];
always_comb begin
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
write_forward[0][i] = CONFIG.INCLUDE_FORWARDING_TO_STORES & {1'b1, wb_snoop_r[1].valid, wb_snoop_r[1].id} == {data_needed[i], 1'b1, id_needed[i]};
write_forward[1][i] = CONFIG.INCLUDE_FORWARDING_TO_STORES & {1'b1, wb_snoop_r[2].valid, wb_snoop_r[2].id} == {data_needed[i], 1'b1, id_needed[i]};
end
end
assign retire_table_in = '{id_needed : sq.data_in.id_needed, wb_group : store_forward_wb_group, sq_index : sq_index};
lutram_1w_1r #(.WIDTH($bits(retire_table_t)), .DEPTH(MAX_IDS))
store_retire_table_lutram (
.clk(clk),
.waddr(sq.data_in.id),
.raddr(store_retire.phys_id),
.ram_write(sq.push),
.new_ram_data(retire_table_in),
.ram_data_out(retire_table_out)
);
always_ff @ (posedge clk) begin
if (rst)
data_needed <= 0;
else
data_needed <= (data_needed | (new_request_one_hot & {CONFIG.SQ_DEPTH{sq.data_in.forwarded_store}})) & ~(write_forward[0] | write_forward[1]);
end
logic [31:0] wb_data [NUM_OF_FORWARDING_PORTS+1];
//Data issued with the store can be stored by store-id
lutram_1w_1r #(.WIDTH(32), .DEPTH(MAX_IDS))
non_forwarded_port (
.clk(clk),
.waddr(sq.data_in.id),
.raddr(store_retire.phys_id),
.ram_write(sq.push),
.new_ram_data(sq.data_in.data),
.ram_data_out(wb_data[0])
);
always_ff @ (posedge clk) begin
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
if (write_forward[0][i] | write_forward[1][i] | new_request_one_hot[i])
store_data[i] <= write_forward[0][i] ? wb_snoop_r[1].data : (write_forward[1][i] ? wb_snoop_r[2].data : sq.data_in.data);
end
//Data from wb ports is stored by ID and then accessed by store-id to store-id-needed translation
generate
for (genvar i = 0; i < NUM_OF_FORWARDING_PORTS; i++) begin : lutrams
lutram_1w_1r #(.WIDTH(32), .DEPTH(MAX_IDS))
writeback_port (
.clk(clk),
.waddr(wb_snoop[i+1].id),
.raddr(retire_table_out.id_needed),
.ram_write(wb_snoop[i+1].valid),
.new_ram_data(wb_snoop[i+1].data),
.ram_data_out(wb_data[i+1])
);
end
endgenerate
//Final storage table for the store queue
//SQ-index addressed
lutram_1w_1r #(.WIDTH(32), .DEPTH(CONFIG.SQ_DEPTH))
sq_data_lutram (
.clk(clk),
.waddr(retire_table_out.sq_index),
.raddr(sq_oldest),
.ram_write(store_retire.valid),
.new_ram_data(wb_data[retire_table_out.wb_group]),
.ram_data_out(data_pre_alignment)
);
////////////////////////////////////////////////////
//Store Transaction Outputs
logic [31:0] data_pre_alignment;
logic [31:0] sq_data_out;
always_comb begin
//Input: ABCD
//Assuming aligned requests,
//Possible byte selections: (A/C/D, B/D, C/D, D)
data_pre_alignment = store_data[sq_oldest];
sq_data_out[7:0] = data_pre_alignment[7:0];
sq_data_out[15:8] = (output_entry.addr[1:0] == 2'b01) ? data_pre_alignment[7:0] : data_pre_alignment[15:8];
sq_data_out[23:16] = (output_entry.addr[1:0] == 2'b10) ? data_pre_alignment[7:0] : data_pre_alignment[23:16];
@ -245,12 +242,12 @@ module store_queue
endcase
end
assign sq.valid = released[sq_oldest];
assign sq.valid = |released_count;
assign sq.data_out = '{
addr : output_entry.addr,
be : output_entry.be,
fn3 : output_entry.fn3,
forwarded_store : output_entry.forwarded_store,
forwarded_store : 0,
data : sq_data_out
};

View file

@ -416,7 +416,7 @@ module cva5_sim
end
//LS Stats
stats[LSU_LOAD_BLOCKED_BY_STORE_STAT] = `LSQ_P.lq.valid & `LSQ_P.store_conflict;
stats[LSU_LOAD_BLOCKED_BY_STORE_STAT] = `LSQ_P.lq.valid & `LSQ_P.load_blocked;
stats[LSU_SUB_UNIT_STALL_STAT] = (`LS_P.lsq.load_valid | `LS_P.lsq.store_valid) & ~`LS_P.sub_unit_ready;
stats[LSU_DC_HIT_STAT] = dcache_hit;
stats[LSU_DC_MISS_STAT] = dcache_miss;

View file

@ -551,7 +551,7 @@ module cva5_sim
end
//LS Stats
stats[LSU_LOAD_BLOCKED_BY_STORE_STAT] = `LSQ_P.lq.valid & `LSQ_P.store_conflict;
stats[LSU_LOAD_BLOCKED_BY_STORE_STAT] = `LSQ_P.lq.valid & `LSQ_P.load_blocked;
stats[LSU_SUB_UNIT_STALL_STAT] = (`LS_P.lsq.load_valid | `LS_P.lsq.store_valid) & ~`LS_P.sub_unit_ready;
stats[LSU_DC_HIT_STAT] = dcache_hit;
stats[LSU_DC_MISS_STAT] = dcache_miss;