diff --git a/doc/instruction_fetch.rst b/doc/instruction_fetch.rst index e160c23d..c723c0d8 100644 --- a/doc/instruction_fetch.rst +++ b/doc/instruction_fetch.rst @@ -26,6 +26,16 @@ The interfaces of the icache module are the same as the prefetch buffer with two Firstly, a signal to enable the cache which is driven from a custom CSR. Secondly a signal to the flush the cache which is set every time a ``fence.i`` instruction is executed. +Branch Prediction +----------------- + +Ibex can be configured to use static branch prediction by setting the ``BranchPrediction`` parameter to 1. +This improves performance by predicting that any branch with a negative offset is taken and that any branch with a positive offset is not. +When successful, the prediction removes a stall cycle from a taken branch. +However, there is a mis-predict penalty if a branch is wrongly predicted to be taken. +This penalty is at least one cycle, or at least two cycles if the instruction following the branch is uncompressed and not aligned. +This feature is *EXPERIMENTAL* and its effects are not yet fully documented. + Instruction-Side Memory Interface --------------------------------- diff --git a/doc/integration.rst b/doc/integration.rst index fc6be2f4..da066f84 100644 --- a/doc/integration.rst +++ b/doc/integration.rst @@ -23,6 +23,7 @@ Instantiation Template .RegFile ( ibex_pkg::RegFileFF ), .ICache ( 0 ), .ICacheECC ( 0 ), + .BranchPrediction ( 0 ), .SecureIbex ( 0 ), .DbgTriggerEn ( 0 ), .DmHaltAddr ( 32'h1A110800 ), @@ -119,6 +120,8 @@ Parameters | ``ICacheECC`` | bit | 0 | *EXPERIMENTAL* Enable SECDED ECC protection in ICache (if | | | | | ICache == 1) | +------------------------------+---------------------+------------+-----------------------------------------------------------------------+ +| ``BranchPrediction`` | bit | 0 | *EXPERIMENTAL* Enable Static branch prediction | ++------------------------------+---------------------+------------+-----------------------------------------------------------------------+ | ``SecureIbex`` | bit | 0 | *EXPERIMENTAL* Enable various additional features targeting | | | | | secure code execution. | +------------------------------+---------------------+------------+-----------------------------------------------------------------------+ diff --git a/ibex_configs.yaml b/ibex_configs.yaml index 226ec78a..68411251 100644 --- a/ibex_configs.yaml +++ b/ibex_configs.yaml @@ -77,7 +77,7 @@ experimental-maxperf-pmp-bmfull: PMPGranularity : 0 PMPNumRegions : 16 -# experimental-maxperf with branch predictor switched on, this exists to allow +# experimental-maxperf with branch predictor switched on. This exists to allow # easy use of Ibex with the branch predictor in particular for CI runs. The # branch predictor will be enabled in all the 'maxperf' configs after further # development. diff --git a/rtl/ibex_branch_predict.sv b/rtl/ibex_branch_predict.sv index 010d50aa..988099fa 100644 --- a/rtl/ibex_branch_predict.sv +++ b/rtl/ibex_branch_predict.sv @@ -5,14 +5,14 @@ /** * Branch Predictor * - * This implements static branch prediction. It takes an instruction and it's PC and determines if + * This implements static branch prediction. It takes an instruction and its PC and determines if * it's a branch or a jump and calculates its target. For jumps it will always predict taken. For * branches it will predict taken if the PC offset is negative. * * This handles both compressed and uncompressed instructions. Compressed instructions must be in * the lower 16-bits of instr. * - * The predictor is entirely combinitorial but takes clk/rst_n signals for use by assertions. + * The predictor is entirely combinational but takes clk/rst_n signals for use by assertions. */ `include "prim_assert.sv" @@ -88,7 +88,7 @@ module ibex_branch_predict ( endcase end - `ASSERT_IF(BranchInsTypeOnehot, $onehot0({instr_j, instr_b, instr_cj, instr_cb}), fetch_valid_i); + `ASSERT_IF(BranchInsTypeOneHot, $onehot0({instr_j, instr_b, instr_cj, instr_cb}), fetch_valid_i); // Determine branch prediction, taken if offset is negative assign instr_b_taken = (instr_b & imm_b_type[31]) | (instr_cb & imm_cb_type[31]); diff --git a/rtl/ibex_controller.sv b/rtl/ibex_controller.sv index 3a6e27b8..afa7fff8 100644 --- a/rtl/ibex_controller.sv +++ b/rtl/ibex_controller.sv @@ -29,8 +29,7 @@ module ibex_controller #( // instr from IF-ID pipeline stage input logic instr_valid_i, // instr is valid - input logic [31:0] instr_i, // instr data (uncompressed if compressed - // otherwise raw data) for mtval + input logic [31:0] instr_i, // uncompressed instr data for mtval input logic [15:0] instr_compressed_i, // instr compressed data for mtval input logic instr_is_compressed_i, // instr is compressed input logic instr_bp_taken_i, // instr was predicted taken branch @@ -62,9 +61,11 @@ module ibex_controller #( output logic wb_exception_o, // Instruction in WB taking an exception // jump/branch signals - input logic branch_set_i, // branch taken set signal - input logic branch_set_spec_i, // speculative branch signal - input logic branch_not_set_i, // branch was not taken + input logic branch_set_i, // branch set signal (branch definitely + // taken) + input logic branch_set_spec_i, // speculative branch signal (branch + // may be taken) + input logic branch_not_set_i, // branch is definitely not taken input logic jump_set_i, // jump taken set signal // interrupt signals @@ -372,9 +373,9 @@ module ibex_controller #( // below always set pc_mux and exc_pc_mux but only set pc_set if certain conditions are met. // This avoid having to factor those conditions into the pc_mux and exc_pc_mux select signals // helping timing. - pc_mux_o = PC_BOOT; - pc_set_o = 1'b0; - pc_set_spec_o = 1'b0; + pc_mux_o = PC_BOOT; + pc_set_o = 1'b0; + pc_set_spec_o = 1'b0; nt_branch_mispredict_o = 1'b0; exc_pc_mux_o = EXC_PC_IRQ; @@ -384,9 +385,9 @@ module ibex_controller #( ctrl_busy_o = 1'b1; - halt_if = 1'b0; - retain_id = 1'b0; - flush_id = 1'b0; + halt_if = 1'b0; + retain_id = 1'b0; + flush_id = 1'b0; debug_csr_save_o = 1'b0; debug_cause_o = DBG_CAUSE_EBREAK; @@ -501,7 +502,7 @@ module ibex_controller #( end if (!special_req_branch) begin - if ((branch_set_i || jump_set_i)) begin + if (branch_set_i || jump_set_i) begin // Only set the PC if the branch predictor hasn't already done the branch for us pc_set_o = BranchPredictor ? ~instr_bp_taken_i : 1'b1; diff --git a/rtl/ibex_id_stage.sv b/rtl/ibex_id_stage.sv index dd05811e..5ca2e22b 100644 --- a/rtl/ibex_id_stage.sv +++ b/rtl/ibex_id_stage.sv @@ -695,10 +695,9 @@ module ibex_id_stage #( end - // Unless the first branch/jump was predicted holding branch_set/jump_set high for more than one - // cycle may not cause a functional issue but could generate needless prefetch buffer flushes and - // instruction fetches. ID/EX is designed such that this shouldn't ever happen for non-predicted - // branches. + // Holding branch_set/jump_set high for more than one cycle should not cause a functional issue. + // However it could generate needless prefetch buffer flushes and instruction fetches. The ID/EX + // designs ensures that this never happens for non-predicted branches. `ASSERT(NeverDoubleBranch, branch_set & ~instr_bp_taken_i |=> ~branch_set) `ASSERT(NeverDoubleJump, jump_set & ~instr_bp_taken_i |=> ~jump_set) diff --git a/rtl/ibex_if_stage.sv b/rtl/ibex_if_stage.sv index 9e1983c8..39798fb2 100644 --- a/rtl/ibex_if_stage.sv +++ b/rtl/ibex_if_stage.sv @@ -125,7 +125,6 @@ module ibex_if_stage #( logic if_id_pipe_reg_we; // IF-ID pipeline reg write enable // Dummy instruction signals - //logic fetch_valid_out; logic stall_dummy_instr; logic [31:0] instr_out; logic instr_is_compressed_out; @@ -171,11 +170,9 @@ module ibex_if_stage #( PC_EXC: fetch_addr_n = exc_pc; // set PC to exception handler PC_ERET: fetch_addr_n = csr_mepc_i; // restore PC when returning from EXC PC_DRET: fetch_addr_n = csr_depc_i; - // Without branch predictor will never get pc_mux_internal == PC_BP, still handle no branch + // Without branch predictor will never get pc_mux_internal == PC_BP. We still handle no branch // predictor case here to ensure redundant mux logic isn't synthesised. - PC_BP: begin - fetch_addr_n = BranchPredictor ? predict_branch_pc : { boot_addr_i[31:8], 8'h80 }; - end + PC_BP: fetch_addr_n = BranchPredictor ? predict_branch_pc : { boot_addr_i[31:8], 8'h80 }; default: fetch_addr_n = { boot_addr_i[31:8], 8'h80 }; endcase end @@ -255,12 +252,6 @@ module ibex_if_stage #( assign unused_icinv = icache_inval_i; end - // For predicted branches only set branch_req when the ID/EX stage is ready to accept the branch - // instruction. Otherwise the branch instruction ends up getting flush out of the IF stage by the - // branch_req and is lost. Whilst it is possible to begin fetching the predicted branch without - // flushing the branch instruction from IF this adds design complexity and for situations where - // ID/EX stage stalls are common more timely fetching of branches is likely to have limited - // performance impact. assign branch_req = pc_set_i | predict_branch_taken; assign branch_spec = pc_set_spec_i | predict_branch_taken; @@ -305,7 +296,6 @@ module ibex_if_stage #( ); // Mux between actual instructions and dummy instructions - //assign fetch_valid_out = insert_dummy_instr | if_instr_valid; assign instr_out = insert_dummy_instr ? dummy_instr_data : instr_decompressed; assign instr_is_compressed_out = insert_dummy_instr ? 1'b0 : instr_is_compressed; assign illegal_c_instr_out = insert_dummy_instr ? 1'b0 : illegal_c_insn; @@ -335,7 +325,6 @@ module ibex_if_stage #( assign unused_dummy_mask = dummy_instr_mask_i; assign unused_dummy_seed_en = dummy_instr_seed_en_i; assign unused_dummy_seed = dummy_instr_seed_i; - //assign fetch_valid_out = fetch_valid; assign instr_out = instr_decompressed; assign instr_is_compressed_out = instr_is_compressed; assign illegal_c_instr_out = illegal_c_insn; @@ -426,16 +415,17 @@ module ibex_if_stage #( end // When branch prediction is enabled a skid buffer between the IF and ID/EX stage is introduced. - // If an instruction in IF is predicted to be a branch and ID/EX is not ready the instruction is - // moved to the skid buffer which becomes the output of the IF stage until the ID/EX stage - // accepts the instruction. The skid buffer is required as otherwise the ID/EX ready signal is - // coupled to the instr_req_o output which produces a feedthrough path from data_gnt_i -> - // instr_req_o (which needs to be avoided as for some interconnects this will result in - // a combinational loop). + // If an instruction in IF is predicted to be a taken branch and ID/EX is not ready the + // instruction in IF is moved to the skid buffer which becomes the output of the IF stage until + // the ID/EX stage accepts the instruction. The skid buffer is required as otherwise the ID/EX + // ready signal is coupled to the instr_req_o output which produces a feedthrough path from + // data_gnt_i -> instr_req_o (which needs to be avoided as for some interconnects this will + // result in a combinational loop). assign instr_skid_en = predicted_branch & ~id_in_ready_i & ~instr_skid_valid_q; - assign instr_skid_valid_d = (instr_skid_valid_q & ~id_in_ready_i & ~stall_dummy_instr) | instr_skid_en; + assign instr_skid_valid_d = (instr_skid_valid_q & ~id_in_ready_i & ~stall_dummy_instr) | + instr_skid_en; always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin diff --git a/rtl/ibex_prefetch_buffer.sv b/rtl/ibex_prefetch_buffer.sv index fc570cde..f206b2ad 100644 --- a/rtl/ibex_prefetch_buffer.sv +++ b/rtl/ibex_prefetch_buffer.sv @@ -11,7 +11,7 @@ */ module ibex_prefetch_buffer #( parameter bit BranchPredictor = 1'b0 -)( +) ( input logic clk_i, input logic rst_ni, @@ -252,7 +252,8 @@ module ibex_prefetch_buffer #( // If a branch is received at any point while a request is outstanding, it must be tracked // to ensure we discard the data once received assign branch_discard_n[i] = (valid_req & gnt_or_pmp_err & discard_req_d) | - (branch_or_mispredict & rdata_outstanding_q[i]) | branch_discard_q[i]; + (branch_or_mispredict & rdata_outstanding_q[i]) | + branch_discard_q[i]; // Record whether this request received a PMP error assign rdata_pmp_err_n[i] = (valid_req & ~rdata_outstanding_q[i] & instr_pmp_err_i) | rdata_pmp_err_q[i]; @@ -266,7 +267,8 @@ module ibex_prefetch_buffer #( rdata_outstanding_q[i]; assign branch_discard_n[i] = (valid_req & gnt_or_pmp_err & discard_req_d & rdata_outstanding_q[i-1]) | - (branch_or_mispredict & rdata_outstanding_q[i]) | branch_discard_q[i]; + (branch_or_mispredict & rdata_outstanding_q[i]) | + branch_discard_q[i]; assign rdata_pmp_err_n[i] = (valid_req & ~rdata_outstanding_q[i] & instr_pmp_err_i & rdata_outstanding_q[i-1]) | rdata_pmp_err_q[i];