additional tracing for branch predictor and increased flexibility for store forwarding

This commit is contained in:
Eric Matthews 2019-09-24 13:39:32 -07:00
parent bcf600831e
commit 5bd0f9eda1
11 changed files with 84 additions and 71 deletions

View file

@ -34,6 +34,7 @@ module branch_unit(
output branch_flush,
//Trace signals
output logic tr_branch_correct,
output logic tr_branch_misspredict,
output logic tr_return_misspredict
);
@ -180,7 +181,8 @@ module branch_unit(
////////////////////////////////////////////////////
//Trace Interface
generate if (ENABLE_TRACE_INTERFACE) begin
assign tr_branch_misspredict = ~is_return & miss_predict;
assign tr_branch_correct = ~jump_ex & branch_issued_r & ~miss_predict;
assign tr_branch_misspredict = ~jump_ex & miss_predict;
assign tr_return_misspredict = is_return & miss_predict;
end
endgenerate

View file

@ -189,7 +189,7 @@ module decode(
assign issue_valid = fb_valid & ti.id_available & ~gc_issue_hold & ~gc_fetch_flush;
assign operands_ready = ~rf_decode.rs1_conflict & ~rf_decode.rs2_conflict;
assign load_store_operands_ready = ~rf_decode.rs1_conflict & (~rf_decode.rs2_conflict | (rf_decode.rs2_conflict & load_store_forward_possible));
assign load_store_operands_ready = ~rf_decode.rs1_conflict & (~rf_decode.rs2_conflict | (rf_decode.rs2_conflict & (opcode_trim == STORE_T)));
//All units share the same operand ready logic except load-store which has an internal forwarding path
always_comb begin
@ -267,31 +267,9 @@ module decode(
assign ls_inputs.load = is_load;
assign ls_inputs.store = is_store;
assign ls_inputs.load_store_forward = rf_decode.rs2_conflict;
assign ls_inputs.store_forward_id = rf_decode.rs2_id;
assign ls_inputs.instruction_id = ti.issue_id;
//Last store RD tracking for Load-Store data forwarding
logic [4:0] last_load_rd;
logic basic_load;
assign basic_load = (opcode_trim == LOAD_T);
always_ff @ (posedge clk) begin
if (issue[LS_UNIT_WB_ID] & basic_load)
last_load_rd <= future_rd_addr;
end
initial begin
foreach(register_in_use_by_load_op[i])
register_in_use_by_load_op[i] = 0;
end
always_ff @ (posedge clk) begin
if (instruction_issued_with_rd & ~rd_zero)
register_in_use_by_load_op[future_rd_addr] <= new_request[LS_UNIT_WB_ID] & basic_load;
end
assign store_data_in_use_by_load_op = register_in_use_by_load_op[rs2_addr];
assign load_store_forward_possible = (opcode_trim == STORE_T) && store_data_in_use_by_load_op && (last_load_rd == rs2_addr);
////////////////////////////////////////////////////
//Branch unit inputs
assign branch_inputs.rs1 = rf_decode.rs1_data;

View file

@ -112,10 +112,11 @@ interface register_file_decode_interface;
logic uses_rs2;
logic rs1_conflict;
logic rs2_conflict;
instruction_id_t rs2_id;
logic instruction_issued;
modport decode (output future_rd_addr, rs1_addr, rs2_addr, instruction_issued, id, uses_rs1, uses_rs2, input rs1_conflict, rs2_conflict, rs1_data, rs2_data);
modport unit (input future_rd_addr, rs1_addr, rs2_addr, instruction_issued, id, uses_rs1, uses_rs2, output rs1_conflict, rs2_conflict, rs1_data, rs2_data);
modport decode (output future_rd_addr, rs1_addr, rs2_addr, instruction_issued, id, uses_rs1, uses_rs2, input rs1_conflict, rs2_conflict, rs1_data, rs2_data, rs2_id);
modport unit (input future_rd_addr, rs1_addr, rs2_addr, instruction_issued, id, uses_rs1, uses_rs2, output rs1_conflict, rs2_conflict, rs1_data, rs2_data, rs2_id);
endinterface

View file

@ -46,6 +46,12 @@ module load_store_unit (
local_memory_interface.master data_bram,
output instruction_id_t store_id,
output instruction_id_t store_done_id,
output logic store_complete,
input logic [31:0] wb_buffer_data,
input logic wb_buffer_data_valid,
input logic[31:0] csr_rd,
input instruction_id_t csr_id,
input logic csr_done,
@ -72,10 +78,9 @@ module load_store_unit (
ls_sub_unit_interface #(.BASE_ADDR(MEMORY_ADDR_L), .UPPER_BOUND(MEMORY_ADDR_H), .BIT_CHECK(BUS_BIT_CHECK)) cache();
logic units_ready;
logic store_bypass_stall;
logic store_ready;
logic issue_request;
logic load_complete;
logic store_complete;
logic [31:0] virtual_address;
logic [3:0] be;
@ -139,12 +144,17 @@ module load_store_unit (
//Primary Control Signals
assign units_ready = &unit_ready;
assign load_complete = |unit_data_valid;
assign store_complete = stage2_attr.is_store & load_attributes.valid;
always_ff @ (posedge clk) begin
store_done_id <= stage1.instruction_id;
store_complete <= stage1.store & issue_request;
end
assign store_id = stage1.store_forward_id;
//When switching units, ensure no outstanding loads so that there can be no timing collisions with results
assign unit_stall = (current_unit != last_unit) && ~load_attributes.empty;
assign store_bypass_stall = stage1.store & stage1.load_store_forward & ~load_attributes.empty;
assign issue_request = input_fifo.valid & units_ready & ~unit_stall & ~unaligned_addr & ~store_bypass_stall;
assign store_ready = stage1.store & ((stage1.load_store_forward & wb_buffer_data_valid) | ~stage1.load_store_forward);
assign issue_request = input_fifo.valid & units_ready & ~unit_stall & ~unaligned_addr & (~stage1.store | store_ready);
////////////////////////////////////////////////////
//TLB interface
@ -205,7 +215,7 @@ module load_store_unit (
assign shared_inputs.be = be;
assign shared_inputs.fn3 = stage1.fn3;
assign stage1_raw_data = stage1.load_store_forward ? previous_load : stage1.rs2;
assign stage1_raw_data = stage1.load_store_forward ? wb_buffer_data : stage1.rs2;
//Input: ABCD
//Assuming aligned requests,
@ -232,8 +242,8 @@ module load_store_unit (
assign load_attributes.data_in = load_attributes_in;
assign load_attributes.push = issue_request;
assign load_attributes.pop = load_complete | store_complete;
assign load_attributes.push = issue_request & stage1.load;
assign load_attributes.pop = load_complete;
assign stage2_attr = load_attributes.data_out;
@ -325,7 +335,7 @@ module load_store_unit (
always_ff @ (posedge clk) begin
exception_complete <= (input_fifo.valid & ls_exception_valid & stage1.load);
end
assign ls_done = load_complete | exception_complete | store_complete;
assign ls_done = load_complete | exception_complete;
assign wb.done = csr_done | ls_done;
assign wb.id = csr_done ? csr_id : stage2_attr.instruction_id;

View file

@ -81,6 +81,7 @@ module register_file(
assign rf_wb.rs1_id = in_use_by[rf_decode.rs1_addr];
assign rf_wb.rs2_id = in_use_by[rf_decode.rs2_addr];
assign rf_decode.rs2_id = rf_wb.rs2_id;
assign valid_write = rf_wb.rd_nzero & rf_wb.retired;

View file

@ -114,6 +114,13 @@ module taiga (
logic instruction_complete;
logic gc_flush_required;
//LS
instruction_id_t store_id;
instruction_id_t store_done_id;
logic store_complete;
logic [31:0] wb_buffer_data;
logic wb_buffer_data_valid;
//Trace Interface Signals
logic tr_operand_stall;
logic tr_unit_stall;
@ -128,6 +135,7 @@ module taiga (
logic [31:0] tr_instruction_pc_dec;
logic [31:0] tr_instruction_data_dec;
logic tr_branch_correct;
logic tr_branch_misspredict;
logic tr_return_misspredict;
logic tr_wb_mux_contention;
@ -221,6 +229,7 @@ module taiga (
tr.events.branch_operand_stall <= tr_branch_operand_stall;
tr.events.alu_operand_stall <= tr_alu_operand_stall;
tr.events.ls_operand_stall <= tr_ls_operand_stall;
tr.events.branch_correct <= tr_branch_correct;
tr.events.branch_misspredict <= tr_branch_misspredict;
tr.events.return_misspredict <= tr_return_misspredict;
tr.events.wb_mux_contention <= tr_wb_mux_contention;

View file

@ -375,6 +375,7 @@ package taiga_types;
logic load;
logic store;
logic load_store_forward;
instruction_id_t store_forward_id;
instruction_id_t instruction_id;
//exception support
logic [31:0] pc;
@ -455,6 +456,7 @@ package taiga_types;
logic ls_operand_stall;
//Branch Unit
logic branch_correct;
logic branch_misspredict;
logic return_misspredict;

View file

@ -36,6 +36,13 @@ module write_back(
output logic instruction_queue_empty,
output instruction_id_t oldest_id,
input instruction_id_t store_id,
input instruction_id_t store_done_id,
input logic store_complete,
output logic [31:0] wb_buffer_data,
output logic wb_buffer_data_valid,
//Trace signals
output logic tr_wb_mux_contention
);
@ -85,6 +92,8 @@ module write_back(
id_done_new[i] |= 1;
end
end
if (store_complete && store_done_id == i[$clog2(MAX_INFLIGHT_COUNT)-1:0])
id_done_new[i] |= 1;
end
end
@ -98,6 +107,9 @@ module write_back(
end
endgenerate
assign wb_buffer_data = rds_by_id[store_id];
assign wb_buffer_data_valid = id_done_r[store_id];
//ID tracking
id_tracking id_fifos (.*, .issued(ti.issued), .retired(retired), .id_available(ti.id_available),
.oldest_id(oldest_id), .next_id(issue_id), .empty(instruction_queue_empty));

View file

@ -16,15 +16,15 @@
</db_ref>
</db_ref_list>
<zoom_setting>
<ZoomStartTime time="3978080000fs"></ZoomStartTime>
<ZoomEndTime time="3994700001fs"></ZoomEndTime>
<Cursor1Time time="3991800000fs"></Cursor1Time>
<ZoomStartTime time="0fs"></ZoomStartTime>
<ZoomEndTime time="511000001fs"></ZoomEndTime>
<Cursor1Time time="333000000fs"></Cursor1Time>
</zoom_setting>
<column_width_setting>
<NameColumnWidth column_width="259"></NameColumnWidth>
<ValueColumnWidth column_width="248"></ValueColumnWidth>
<ValueColumnWidth column_width="244"></ValueColumnWidth>
</column_width_setting>
<WVObjectSize size="164" />
<WVObjectSize size="160" />
<wvobject type="logic" fp_name="/taiga_full_simulation/clk">
<obj_property name="ElementShortName">clk</obj_property>
<obj_property name="ObjectShortName">clk</obj_property>
@ -207,33 +207,25 @@
<obj_property name="label">Register File</obj_property>
<obj_property name="DisplayName">label</obj_property>
</wvobject>
<wvobject type="logic" fp_name="/taiga_full_simulation/uut/register_file_block/register_inuse_tracking/clr">
<obj_property name="ElementShortName">clr</obj_property>
<obj_property name="ObjectShortName">clr</obj_property>
</wvobject>
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/sim_register">
<obj_property name="ElementShortName">sim_register</obj_property>
<obj_property name="ObjectShortName">sim_register</obj_property>
</wvobject>
<wvobject type="logic" fp_name="/taiga_full_simulation/uut/register_file_block/inuse_mem/issued">
<obj_property name="ElementShortName">issued</obj_property>
<obj_property name="ObjectShortName">issued</obj_property>
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/register_inuse_tracking/reg_inuse_A">
<obj_property name="ElementShortName">reg_inuse_A[31:0]</obj_property>
<obj_property name="ObjectShortName">reg_inuse_A[31:0]</obj_property>
</wvobject>
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/inuse_mem/rs1_addr">
<obj_property name="ElementShortName">rs1_addr[4:0]</obj_property>
<obj_property name="ObjectShortName">rs1_addr[4:0]</obj_property>
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/register_inuse_tracking/reg_inuse_B">
<obj_property name="ElementShortName">reg_inuse_B[31:0]</obj_property>
<obj_property name="ObjectShortName">reg_inuse_B[31:0]</obj_property>
</wvobject>
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/inuse_mem/rs2_addr">
<obj_property name="ElementShortName">rs2_addr[4:0]</obj_property>
<obj_property name="ObjectShortName">rs2_addr[4:0]</obj_property>
</wvobject>
<wvobject type="logic" fp_name="/taiga_full_simulation/uut/register_file_block/inuse_mem/rs1_inuse">
<obj_property name="ElementShortName">rs1_inuse</obj_property>
<obj_property name="ObjectShortName">rs1_inuse</obj_property>
</wvobject>
<wvobject type="logic" fp_name="/taiga_full_simulation/uut/register_file_block/inuse_mem/rs2_inuse">
<obj_property name="ElementShortName">rs2_inuse</obj_property>
<obj_property name="ObjectShortName">rs2_inuse</obj_property>
</wvobject>
<wvobject type="logic" fp_name="/taiga_full_simulation/uut/register_file_block/in_use_match">
<obj_property name="ElementShortName">in_use_match</obj_property>
<obj_property name="ObjectShortName">in_use_match</obj_property>
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/register_inuse_tracking/sim_inuse">
<obj_property name="ElementShortName">sim_inuse[31:0]</obj_property>
<obj_property name="ObjectShortName">sim_inuse[31:0]</obj_property>
</wvobject>
<wvobject type="array" fp_name="/taiga_full_simulation/uut/register_file_block/in_use_by">
<obj_property name="ElementShortName">in_use_by[31:0][1:0]</obj_property>
@ -411,10 +403,6 @@
<obj_property name="label">L2</obj_property>
<obj_property name="DisplayName">label</obj_property>
</wvobject>
<wvobject type="array" fp_name="/taiga_full_simulation/l2_arb/\request[0] /request">
<obj_property name="ElementShortName">request</obj_property>
<obj_property name="ObjectShortName">request</obj_property>
</wvobject>
<wvobject type="logic" fp_name="/taiga_full_simulation/l2_arb/\request[0] /request_push">
<obj_property name="ElementShortName">request_push</obj_property>
<obj_property name="ObjectShortName">request_push</obj_property>
@ -495,10 +483,6 @@
<obj_property name="ElementShortName">rd_data_ack</obj_property>
<obj_property name="ObjectShortName">rd_data_ack</obj_property>
</wvobject>
<wvobject type="array" fp_name="/taiga_full_simulation/l2_arb/mem/request">
<obj_property name="ElementShortName">request</obj_property>
<obj_property name="ObjectShortName">request</obj_property>
</wvobject>
<wvobject type="logic" fp_name="/taiga_full_simulation/l2_arb/mem/request_pop">
<obj_property name="ElementShortName">request_pop</obj_property>
<obj_property name="ObjectShortName">request_pop</obj_property>

View file

@ -46,6 +46,7 @@ static const char * const eventNames[] = {
"branch_operand_stall",
"alu_operand_stall",
"ls_operand_stall",
"branch_correct",
"branch_misspredict",
"return_misspredict",
"wb_mux_contention",

View file

@ -57,6 +57,8 @@ embench_hw = $(addsuffix .hw_init, $(EMBENCH_BENCHMARKS))
embench_sim = $(addsuffix .sim_init, $(EMBENCH_BENCHMARKS))
###############################################################
COREMARK_DIR=/home/ematthew/Research/RISCV/software/coremark
#Binary to Verilog HW init file
###############################################################
ELF_TO_HW_INIT ?= python3 $(TAIGA_DIR)/tools/taiga_binary_converter.py $(RISCV_PREFIX) 0x80000000 131072
@ -86,6 +88,17 @@ build_embench :
make
.PHONY: build_coremark
build_coremark:
$(MAKE) -C $(COREMARK_DIR) compile PORT_DIR=taiga-sim ITERATIONS=5000
cd $(MAKEFILE_DIR);
$(ELF_TO_HW_INIT) $(COREMARK_DIR)/coremark.bin coremark.hw_init coremark.sim_init
.PHONY: run_coremark_verilator
run_coremark_verilator :
./verilator_local_mem_test/Vtaiga_local_mem "/dev/null" "/dev/null" $(TAIGA_DIR)/tools/coremark.hw_init $(VERILATOR_TRACE_FILE) >> $@
#Benchmarks already built
.PHONY : $(EMBENCH_BENCHMARKS)
@ -126,7 +139,7 @@ run_compliance_tests_verilator:
.PHONY: run_dhrystone_verilator
run_dhrystone_verilator :
./verilator_local_mem_test/Vtaiga_local_mem "/dev/null" "/dev/null" /home/ematthew/Research/RISCV/software/taiga-benchmarks/dhrystone.riscv.hw_init $(VERILATOR_TRACE_FILE) >> $@
./verilator_local_mem_test/Vtaiga_local_mem "/dev/null" "/dev/null" /home/ematthew/Research/RISCV/software/taiga-benchmarks/dhrystone.riscv.hw_init $(VERILATOR_TRACE_FILE) > $@
clean:
rm -rf $(CRUFT)