mirror of
https://github.com/openhwgroup/cva5.git
synced 2025-04-20 03:57:18 -04:00
additional tracing for branch predictor and increased flexibility for store forwarding
This commit is contained in:
parent
bcf600831e
commit
5bd0f9eda1
11 changed files with 84 additions and 71 deletions
|
@ -34,6 +34,7 @@ module branch_unit(
|
|||
output branch_flush,
|
||||
|
||||
//Trace signals
|
||||
output logic tr_branch_correct,
|
||||
output logic tr_branch_misspredict,
|
||||
output logic tr_return_misspredict
|
||||
);
|
||||
|
@ -180,7 +181,8 @@ module branch_unit(
|
|||
////////////////////////////////////////////////////
|
||||
//Trace Interface
|
||||
generate if (ENABLE_TRACE_INTERFACE) begin
|
||||
assign tr_branch_misspredict = ~is_return & miss_predict;
|
||||
assign tr_branch_correct = ~jump_ex & branch_issued_r & ~miss_predict;
|
||||
assign tr_branch_misspredict = ~jump_ex & miss_predict;
|
||||
assign tr_return_misspredict = is_return & miss_predict;
|
||||
end
|
||||
endgenerate
|
||||
|
|
|
@ -189,7 +189,7 @@ module decode(
|
|||
assign issue_valid = fb_valid & ti.id_available & ~gc_issue_hold & ~gc_fetch_flush;
|
||||
|
||||
assign operands_ready = ~rf_decode.rs1_conflict & ~rf_decode.rs2_conflict;
|
||||
assign load_store_operands_ready = ~rf_decode.rs1_conflict & (~rf_decode.rs2_conflict | (rf_decode.rs2_conflict & load_store_forward_possible));
|
||||
assign load_store_operands_ready = ~rf_decode.rs1_conflict & (~rf_decode.rs2_conflict | (rf_decode.rs2_conflict & (opcode_trim == STORE_T)));
|
||||
|
||||
//All units share the same operand ready logic except load-store which has an internal forwarding path
|
||||
always_comb begin
|
||||
|
@ -267,31 +267,9 @@ module decode(
|
|||
assign ls_inputs.load = is_load;
|
||||
assign ls_inputs.store = is_store;
|
||||
assign ls_inputs.load_store_forward = rf_decode.rs2_conflict;
|
||||
assign ls_inputs.store_forward_id = rf_decode.rs2_id;
|
||||
assign ls_inputs.instruction_id = ti.issue_id;
|
||||
|
||||
//Last store RD tracking for Load-Store data forwarding
|
||||
logic [4:0] last_load_rd;
|
||||
logic basic_load;
|
||||
|
||||
assign basic_load = (opcode_trim == LOAD_T);
|
||||
always_ff @ (posedge clk) begin
|
||||
if (issue[LS_UNIT_WB_ID] & basic_load)
|
||||
last_load_rd <= future_rd_addr;
|
||||
end
|
||||
|
||||
initial begin
|
||||
foreach(register_in_use_by_load_op[i])
|
||||
register_in_use_by_load_op[i] = 0;
|
||||
end
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
if (instruction_issued_with_rd & ~rd_zero)
|
||||
register_in_use_by_load_op[future_rd_addr] <= new_request[LS_UNIT_WB_ID] & basic_load;
|
||||
end
|
||||
|
||||
assign store_data_in_use_by_load_op = register_in_use_by_load_op[rs2_addr];
|
||||
assign load_store_forward_possible = (opcode_trim == STORE_T) && store_data_in_use_by_load_op && (last_load_rd == rs2_addr);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//Branch unit inputs
|
||||
assign branch_inputs.rs1 = rf_decode.rs1_data;
|
||||
|
|
|
@ -112,10 +112,11 @@ interface register_file_decode_interface;
|
|||
logic uses_rs2;
|
||||
logic rs1_conflict;
|
||||
logic rs2_conflict;
|
||||
instruction_id_t rs2_id;
|
||||
logic instruction_issued;
|
||||
|
||||
modport decode (output future_rd_addr, rs1_addr, rs2_addr, instruction_issued, id, uses_rs1, uses_rs2, input rs1_conflict, rs2_conflict, rs1_data, rs2_data);
|
||||
modport unit (input future_rd_addr, rs1_addr, rs2_addr, instruction_issued, id, uses_rs1, uses_rs2, output rs1_conflict, rs2_conflict, rs1_data, rs2_data);
|
||||
modport decode (output future_rd_addr, rs1_addr, rs2_addr, instruction_issued, id, uses_rs1, uses_rs2, input rs1_conflict, rs2_conflict, rs1_data, rs2_data, rs2_id);
|
||||
modport unit (input future_rd_addr, rs1_addr, rs2_addr, instruction_issued, id, uses_rs1, uses_rs2, output rs1_conflict, rs2_conflict, rs1_data, rs2_data, rs2_id);
|
||||
endinterface
|
||||
|
||||
|
||||
|
|
|
@ -46,6 +46,12 @@ module load_store_unit (
|
|||
|
||||
local_memory_interface.master data_bram,
|
||||
|
||||
output instruction_id_t store_id,
|
||||
output instruction_id_t store_done_id,
|
||||
output logic store_complete,
|
||||
input logic [31:0] wb_buffer_data,
|
||||
input logic wb_buffer_data_valid,
|
||||
|
||||
input logic[31:0] csr_rd,
|
||||
input instruction_id_t csr_id,
|
||||
input logic csr_done,
|
||||
|
@ -72,10 +78,9 @@ module load_store_unit (
|
|||
ls_sub_unit_interface #(.BASE_ADDR(MEMORY_ADDR_L), .UPPER_BOUND(MEMORY_ADDR_H), .BIT_CHECK(BUS_BIT_CHECK)) cache();
|
||||
|
||||
logic units_ready;
|
||||
logic store_bypass_stall;
|
||||
logic store_ready;
|
||||
logic issue_request;
|
||||
logic load_complete;
|
||||
logic store_complete;
|
||||
|
||||
logic [31:0] virtual_address;
|
||||
logic [3:0] be;
|
||||
|
@ -139,12 +144,17 @@ module load_store_unit (
|
|||
//Primary Control Signals
|
||||
assign units_ready = &unit_ready;
|
||||
assign load_complete = |unit_data_valid;
|
||||
assign store_complete = stage2_attr.is_store & load_attributes.valid;
|
||||
|
||||
always_ff @ (posedge clk) begin
|
||||
store_done_id <= stage1.instruction_id;
|
||||
store_complete <= stage1.store & issue_request;
|
||||
end
|
||||
assign store_id = stage1.store_forward_id;
|
||||
|
||||
//When switching units, ensure no outstanding loads so that there can be no timing collisions with results
|
||||
assign unit_stall = (current_unit != last_unit) && ~load_attributes.empty;
|
||||
assign store_bypass_stall = stage1.store & stage1.load_store_forward & ~load_attributes.empty;
|
||||
assign issue_request = input_fifo.valid & units_ready & ~unit_stall & ~unaligned_addr & ~store_bypass_stall;
|
||||
assign store_ready = stage1.store & ((stage1.load_store_forward & wb_buffer_data_valid) | ~stage1.load_store_forward);
|
||||
assign issue_request = input_fifo.valid & units_ready & ~unit_stall & ~unaligned_addr & (~stage1.store | store_ready);
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
//TLB interface
|
||||
|
@ -205,7 +215,7 @@ module load_store_unit (
|
|||
assign shared_inputs.be = be;
|
||||
assign shared_inputs.fn3 = stage1.fn3;
|
||||
|
||||
assign stage1_raw_data = stage1.load_store_forward ? previous_load : stage1.rs2;
|
||||
assign stage1_raw_data = stage1.load_store_forward ? wb_buffer_data : stage1.rs2;
|
||||
|
||||
//Input: ABCD
|
||||
//Assuming aligned requests,
|
||||
|
@ -232,8 +242,8 @@ module load_store_unit (
|
|||
|
||||
assign load_attributes.data_in = load_attributes_in;
|
||||
|
||||
assign load_attributes.push = issue_request;
|
||||
assign load_attributes.pop = load_complete | store_complete;
|
||||
assign load_attributes.push = issue_request & stage1.load;
|
||||
assign load_attributes.pop = load_complete;
|
||||
|
||||
assign stage2_attr = load_attributes.data_out;
|
||||
|
||||
|
@ -325,7 +335,7 @@ module load_store_unit (
|
|||
always_ff @ (posedge clk) begin
|
||||
exception_complete <= (input_fifo.valid & ls_exception_valid & stage1.load);
|
||||
end
|
||||
assign ls_done = load_complete | exception_complete | store_complete;
|
||||
assign ls_done = load_complete | exception_complete;
|
||||
|
||||
assign wb.done = csr_done | ls_done;
|
||||
assign wb.id = csr_done ? csr_id : stage2_attr.instruction_id;
|
||||
|
|
|
@ -81,6 +81,7 @@ module register_file(
|
|||
|
||||
assign rf_wb.rs1_id = in_use_by[rf_decode.rs1_addr];
|
||||
assign rf_wb.rs2_id = in_use_by[rf_decode.rs2_addr];
|
||||
assign rf_decode.rs2_id = rf_wb.rs2_id;
|
||||
|
||||
assign valid_write = rf_wb.rd_nzero & rf_wb.retired;
|
||||
|
||||
|
|
|
@ -114,6 +114,13 @@ module taiga (
|
|||
logic instruction_complete;
|
||||
logic gc_flush_required;
|
||||
|
||||
//LS
|
||||
instruction_id_t store_id;
|
||||
instruction_id_t store_done_id;
|
||||
logic store_complete;
|
||||
logic [31:0] wb_buffer_data;
|
||||
logic wb_buffer_data_valid;
|
||||
|
||||
//Trace Interface Signals
|
||||
logic tr_operand_stall;
|
||||
logic tr_unit_stall;
|
||||
|
@ -128,6 +135,7 @@ module taiga (
|
|||
logic [31:0] tr_instruction_pc_dec;
|
||||
logic [31:0] tr_instruction_data_dec;
|
||||
|
||||
logic tr_branch_correct;
|
||||
logic tr_branch_misspredict;
|
||||
logic tr_return_misspredict;
|
||||
logic tr_wb_mux_contention;
|
||||
|
@ -221,6 +229,7 @@ module taiga (
|
|||
tr.events.branch_operand_stall <= tr_branch_operand_stall;
|
||||
tr.events.alu_operand_stall <= tr_alu_operand_stall;
|
||||
tr.events.ls_operand_stall <= tr_ls_operand_stall;
|
||||
tr.events.branch_correct <= tr_branch_correct;
|
||||
tr.events.branch_misspredict <= tr_branch_misspredict;
|
||||
tr.events.return_misspredict <= tr_return_misspredict;
|
||||
tr.events.wb_mux_contention <= tr_wb_mux_contention;
|
||||
|
|
|
@ -375,6 +375,7 @@ package taiga_types;
|
|||
logic load;
|
||||
logic store;
|
||||
logic load_store_forward;
|
||||
instruction_id_t store_forward_id;
|
||||
instruction_id_t instruction_id;
|
||||
//exception support
|
||||
logic [31:0] pc;
|
||||
|
@ -455,6 +456,7 @@ package taiga_types;
|
|||
logic ls_operand_stall;
|
||||
|
||||
//Branch Unit
|
||||
logic branch_correct;
|
||||
logic branch_misspredict;
|
||||
logic return_misspredict;
|
||||
|
||||
|
|
|
@ -36,6 +36,13 @@ module write_back(
|
|||
output logic instruction_queue_empty,
|
||||
output instruction_id_t oldest_id,
|
||||
|
||||
input instruction_id_t store_id,
|
||||
input instruction_id_t store_done_id,
|
||||
input logic store_complete,
|
||||
output logic [31:0] wb_buffer_data,
|
||||
output logic wb_buffer_data_valid,
|
||||
|
||||
|
||||
//Trace signals
|
||||
output logic tr_wb_mux_contention
|
||||
);
|
||||
|
@ -85,6 +92,8 @@ module write_back(
|
|||
id_done_new[i] |= 1;
|
||||
end
|
||||
end
|
||||
if (store_complete && store_done_id == i[$clog2(MAX_INFLIGHT_COUNT)-1:0])
|
||||
id_done_new[i] |= 1;
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -98,6 +107,9 @@ module write_back(
|
|||
end
|
||||
endgenerate
|
||||
|
||||
assign wb_buffer_data = rds_by_id[store_id];
|
||||
assign wb_buffer_data_valid = id_done_r[store_id];
|
||||
|
||||
//ID tracking
|
||||
id_tracking id_fifos (.*, .issued(ti.issued), .retired(retired), .id_available(ti.id_available),
|
||||
.oldest_id(oldest_id), .next_id(issue_id), .empty(instruction_queue_empty));
|
||||
|
|
|
@ -16,15 +16,15 @@
|
|||
</db_ref>
|
||||
</db_ref_list>
|
||||
<zoom_setting>
|
||||
<ZoomStartTime time="3978080000fs"></ZoomStartTime>
|
||||
<ZoomEndTime time="3994700001fs"></ZoomEndTime>
|
||||
<Cursor1Time time="3991800000fs"></Cursor1Time>
|
||||
<ZoomStartTime time="0fs"></ZoomStartTime>
|
||||
<ZoomEndTime time="511000001fs"></ZoomEndTime>
|
||||
<Cursor1Time time="333000000fs"></Cursor1Time>
|
||||
</zoom_setting>
|
||||
<column_width_setting>
|
||||
<NameColumnWidth column_width="259"></NameColumnWidth>
|
||||
<ValueColumnWidth column_width="248"></ValueColumnWidth>
|
||||
<ValueColumnWidth column_width="244"></ValueColumnWidth>
|
||||
</column_width_setting>
|
||||
<WVObjectSize size="164" />
|
||||
<WVObjectSize size="160" />
|
||||
<wvobject type="logic" fp_name="/taiga_full_simulation/clk">
|
||||
<obj_property name="ElementShortName">clk</obj_property>
|
||||
<obj_property name="ObjectShortName">clk</obj_property>
|
||||
|
@ -207,33 +207,25 @@
|
|||
<obj_property name="label">Register File</obj_property>
|
||||
<obj_property name="DisplayName">label</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="logic" fp_name="/taiga_full_simulation/uut/register_file_block/register_inuse_tracking/clr">
|
||||
<obj_property name="ElementShortName">clr</obj_property>
|
||||
<obj_property name="ObjectShortName">clr</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/sim_register">
|
||||
<obj_property name="ElementShortName">sim_register</obj_property>
|
||||
<obj_property name="ObjectShortName">sim_register</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="logic" fp_name="/taiga_full_simulation/uut/register_file_block/inuse_mem/issued">
|
||||
<obj_property name="ElementShortName">issued</obj_property>
|
||||
<obj_property name="ObjectShortName">issued</obj_property>
|
||||
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/register_inuse_tracking/reg_inuse_A">
|
||||
<obj_property name="ElementShortName">reg_inuse_A[31:0]</obj_property>
|
||||
<obj_property name="ObjectShortName">reg_inuse_A[31:0]</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/inuse_mem/rs1_addr">
|
||||
<obj_property name="ElementShortName">rs1_addr[4:0]</obj_property>
|
||||
<obj_property name="ObjectShortName">rs1_addr[4:0]</obj_property>
|
||||
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/register_inuse_tracking/reg_inuse_B">
|
||||
<obj_property name="ElementShortName">reg_inuse_B[31:0]</obj_property>
|
||||
<obj_property name="ObjectShortName">reg_inuse_B[31:0]</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/inuse_mem/rs2_addr">
|
||||
<obj_property name="ElementShortName">rs2_addr[4:0]</obj_property>
|
||||
<obj_property name="ObjectShortName">rs2_addr[4:0]</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="logic" fp_name="/taiga_full_simulation/uut/register_file_block/inuse_mem/rs1_inuse">
|
||||
<obj_property name="ElementShortName">rs1_inuse</obj_property>
|
||||
<obj_property name="ObjectShortName">rs1_inuse</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="logic" fp_name="/taiga_full_simulation/uut/register_file_block/inuse_mem/rs2_inuse">
|
||||
<obj_property name="ElementShortName">rs2_inuse</obj_property>
|
||||
<obj_property name="ObjectShortName">rs2_inuse</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="logic" fp_name="/taiga_full_simulation/uut/register_file_block/in_use_match">
|
||||
<obj_property name="ElementShortName">in_use_match</obj_property>
|
||||
<obj_property name="ObjectShortName">in_use_match</obj_property>
|
||||
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/register_inuse_tracking/sim_inuse">
|
||||
<obj_property name="ElementShortName">sim_inuse[31:0]</obj_property>
|
||||
<obj_property name="ObjectShortName">sim_inuse[31:0]</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/in_use_by">
|
||||
<obj_property name="ElementShortName">in_use_by[31:0][1:0]</obj_property>
|
||||
|
@ -411,10 +403,6 @@
|
|||
<obj_property name="label">L2</obj_property>
|
||||
<obj_property name="DisplayName">label</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="array" fp_name="/taiga_full_simulation/l2_arb/\request[0] /request">
|
||||
<obj_property name="ElementShortName">request</obj_property>
|
||||
<obj_property name="ObjectShortName">request</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="logic" fp_name="/taiga_full_simulation/l2_arb/\request[0] /request_push">
|
||||
<obj_property name="ElementShortName">request_push</obj_property>
|
||||
<obj_property name="ObjectShortName">request_push</obj_property>
|
||||
|
@ -495,10 +483,6 @@
|
|||
<obj_property name="ElementShortName">rd_data_ack</obj_property>
|
||||
<obj_property name="ObjectShortName">rd_data_ack</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="array" fp_name="/taiga_full_simulation/l2_arb/mem/request">
|
||||
<obj_property name="ElementShortName">request</obj_property>
|
||||
<obj_property name="ObjectShortName">request</obj_property>
|
||||
</wvobject>
|
||||
<wvobject type="logic" fp_name="/taiga_full_simulation/l2_arb/mem/request_pop">
|
||||
<obj_property name="ElementShortName">request_pop</obj_property>
|
||||
<obj_property name="ObjectShortName">request_pop</obj_property>
|
||||
|
|
|
@ -46,6 +46,7 @@ static const char * const eventNames[] = {
|
|||
"branch_operand_stall",
|
||||
"alu_operand_stall",
|
||||
"ls_operand_stall",
|
||||
"branch_correct",
|
||||
"branch_misspredict",
|
||||
"return_misspredict",
|
||||
"wb_mux_contention",
|
||||
|
|
|
@ -57,6 +57,8 @@ embench_hw = $(addsuffix .hw_init, $(EMBENCH_BENCHMARKS))
|
|||
embench_sim = $(addsuffix .sim_init, $(EMBENCH_BENCHMARKS))
|
||||
###############################################################
|
||||
|
||||
COREMARK_DIR=/home/ematthew/Research/RISCV/software/coremark
|
||||
|
||||
#Binary to Verilog HW init file
|
||||
###############################################################
|
||||
ELF_TO_HW_INIT ?= python3 $(TAIGA_DIR)/tools/taiga_binary_converter.py $(RISCV_PREFIX) 0x80000000 131072
|
||||
|
@ -86,6 +88,17 @@ build_embench :
|
|||
make
|
||||
|
||||
|
||||
.PHONY: build_coremark
|
||||
build_coremark:
|
||||
$(MAKE) -C $(COREMARK_DIR) compile PORT_DIR=taiga-sim ITERATIONS=5000
|
||||
cd $(MAKEFILE_DIR);
|
||||
$(ELF_TO_HW_INIT) $(COREMARK_DIR)/coremark.bin coremark.hw_init coremark.sim_init
|
||||
|
||||
.PHONY: run_coremark_verilator
|
||||
run_coremark_verilator :
|
||||
./verilator_local_mem_test/Vtaiga_local_mem "/dev/null" "/dev/null" $(TAIGA_DIR)/tools/coremark.hw_init $(VERILATOR_TRACE_FILE) >> $@
|
||||
|
||||
|
||||
#Benchmarks already built
|
||||
.PHONY : $(EMBENCH_BENCHMARKS)
|
||||
|
||||
|
@ -126,7 +139,7 @@ run_compliance_tests_verilator:
|
|||
|
||||
.PHONY: run_dhrystone_verilator
|
||||
run_dhrystone_verilator :
|
||||
./verilator_local_mem_test/Vtaiga_local_mem "/dev/null" "/dev/null" /home/ematthew/Research/RISCV/software/taiga-benchmarks/dhrystone.riscv.hw_init $(VERILATOR_TRACE_FILE) >> $@
|
||||
./verilator_local_mem_test/Vtaiga_local_mem "/dev/null" "/dev/null" /home/ematthew/Research/RISCV/software/taiga-benchmarks/dhrystone.riscv.hw_init $(VERILATOR_TRACE_FILE) > $@
|
||||
|
||||
clean:
|
||||
rm -rf $(CRUFT)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue