diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index 8522d7bd9..41a72ea3c 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -34,20 +34,26 @@ package ariane_pkg; typedef struct packed { logic [63:0] cause; // cause of exception logic [63:0] tval; // additional information of causing exception (e.g.: instruction causing it), - // address of ld/st fault + // address of LD/ST fault logic valid; } exception; // branch-predict + // this is the struct we get back from ex stage and we will use it to update + // all the necessary data structures typedef struct packed { logic [63:0] pc; // pc of predict or mis-predict logic [63:0] target_address; // target address at which to jump, or not logic is_mispredict; // set if this was a mis-predict logic is_taken; // branch is taken + logic is_lower_16; // branch instruction is compressed and resides + // in the lower 16 bit of the word logic valid; // prediction with all its values is valid } branchpredict; // branchpredict scoreboard entry + // this is the struct which we will inject into the pipeline to guide the various + // units towards the correct branch decision and resolve typedef struct packed { logic [63:0] predict_address_i; // target address at which to jump, or not logic predict_taken_i; // set if this was a mis-predict @@ -114,7 +120,7 @@ package ariane_pkg; logic [14:12] funct3; logic [11:7] rd; logic [6:0] opcode; - } rtype; + } rtype_t; typedef struct packed { logic [31:20] imm; @@ -122,7 +128,7 @@ package ariane_pkg; logic [14:12] funct3; logic [11:7] rd; logic [6:0] opcode; - } itype; + } itype_t; typedef struct packed { logic [31:25] imm1; @@ -131,27 +137,21 @@ package ariane_pkg; logic [14:12] funct3; logic [11:7] imm0; logic [6:0] opcode; - } stype; + } stype_t; typedef struct packed { logic [31:12] funct3; logic [11:7] rd; logic [6:0] opcode; - } utype; + } utype_t; - // for some reason verilator complains about this union - // since I am not using it for simulation anyway and linting only - // it is not too bad to deactivate it, but a future me (or you) - // should look into that more thoroughly - `ifndef verilator typedef union packed { - logic [31:0] instr; - rtype rtype; - itype itype; - stype stype; - utype utype; + logic [31:0] instr; + rtype_t rtype; + itype_t itype; + stype_t stype; + utype_t utype; } instruction; - `endif // -------------------- // Opcodes @@ -247,10 +247,9 @@ package ariane_pkg; logic [7:0] address; } csr_addr_t; - // `ifndef VERILATOR typedef union packed { csr_reg_t address; csr_addr_t csr_decode; } csr_t; - // `endif + endpackage diff --git a/src/branch_engine.sv b/src/branch_engine.sv index 0c6c350c2..46b3b90f1 100644 --- a/src/branch_engine.sv +++ b/src/branch_engine.sv @@ -31,8 +31,8 @@ module branch_engine ( input logic valid_i, input branchpredict_sbe branch_predict_i, // this is the address we predicted - output branchpredict resolved_branch_o, // this is the actual address we are targeting - output exception branch_ex_o // branch exception out + output branchpredict resolved_branch_o, // this is the actual address we are targeting + output exception branch_ex_o // branch exception out ); logic [63:0] target_address; logic [63:0] next_pc; @@ -69,7 +69,7 @@ module branch_engine ( // calculate next PC, depending on whether the instruction is compressed or not this may be different next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4); // calculate target address simple 64 bit addition - target_address = $signed(operand_c_i) + $signed(imm_i); + target_address = $unsigned($signed(operand_c_i) + $signed(imm_i)); // save PC - we need this to get the target row in the branch target buffer // we play this trick with the branch instruction which wraps a byte boundary: // |---------- Place the prediction on this PC @@ -79,7 +79,11 @@ module branch_engine ( // |____________________________________________________ // This will relief the prefetcher to re-fetch partially fetched unaligned branch instructions e.g.: // we don't have a back arch between prefetcher and decoder/instruction FIFO. - resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : (pc_i[63:2] + 64'h4); + resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : ({pc_i[63:2], 2'b0} + 64'h4); + // save if the branch instruction was in the lower 16 bit of the instruction word + // the first case is a compressed instruction which is in slot 0 + // the other case is a misaligned uncompressed instruction which we only predict in the next cycle (see notes above) + resolved_branch_o.is_lower_16 = (is_compressed_instr_i && pc_i[1] == 1'b0) || (!is_compressed_instr_i && pc_i[1] == 1'b1); // write target address which goes to pc gen resolved_branch_o.target_address = (comparison_result) ? target_address : next_pc; resolved_branch_o.is_taken = comparison_result; diff --git a/src/btb.sv b/src/btb.sv index 78ef5e603..ce04f5ba0 100644 --- a/src/btb.sv +++ b/src/btb.sv @@ -43,6 +43,7 @@ module btb #( logic valid; logic [63:0] target_address; logic [BITS_SATURATION_COUNTER-1:0] saturation_counter; + logic is_lower_16; } btb_n [NR_ENTRIES-1:0], btb_q [NR_ENTRIES-1:0]; logic [$clog2(NR_ENTRIES)-1:0] index, update_pc; @@ -55,36 +56,38 @@ module btb #( assign index = vpc_i[$clog2(NR_ENTRIES) + OFFSET - 1:OFFSET]; // we combinatorially predict the branch and the target address - assign is_branch_o = btb_q[$unsigned(index)].valid; - assign predict_taken_o = btb_q[$unsigned(index)].saturation_counter[BITS_SATURATION_COUNTER-1]; - assign branch_target_address_o = btb_q[$unsigned(index)].target_address; + assign is_branch_o = btb_q[index].valid; + assign predict_taken_o = btb_q[index].saturation_counter[BITS_SATURATION_COUNTER-1]; + assign branch_target_address_o = btb_q[index].target_address; // update on a mis-predict always_comb begin : update_branchpredict btb_n = btb_q; - saturation_counter = btb_q[$unsigned(update_pc)].saturation_counter; + saturation_counter = btb_q[update_pc].saturation_counter; if (branchpredict_i.valid) begin - btb_n[$unsigned(update_pc)].valid = 1'b1; + btb_n[update_pc].valid = 1'b1; // update saturation counter // first check if counter is already saturated in the positive regime e.g.: branch taken if (saturation_counter == {BITS_SATURATION_COUNTER{1'b1}}) begin // we can safely decrease it if (~branchpredict_i.is_taken) - btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter - 1; + btb_n[update_pc].saturation_counter = saturation_counter - 1; // then check if it saturated in the negative regime e.g.: branch not taken end else if (saturation_counter == {BITS_SATURATION_COUNTER{1'b0}}) begin // we can safely increase it if (branchpredict_i.is_taken) - btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter + 1; + btb_n[update_pc].saturation_counter = saturation_counter + 1; end else begin // otherwise we are not in any boundaries and can decrease or increase it if (branchpredict_i.is_taken) - btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter + 1; + btb_n[update_pc].saturation_counter = saturation_counter + 1; else - btb_n[$unsigned(update_pc)].saturation_counter = saturation_counter - 1; + btb_n[update_pc].saturation_counter = saturation_counter - 1; end // the target address is simply updated - btb_n[$unsigned(update_pc)].target_address = branchpredict_i.target_address; + btb_n[update_pc].target_address = branchpredict_i.target_address; + // as is the information whether this was a compressed branch + btb_n[update_pc].is_lower_16 = branchpredict_i.is_lower_16; end end @@ -93,7 +96,7 @@ module btb #( if(~rst_ni) begin // Bias the branches to be taken upon first arrival for (int i = 0; i < NR_ENTRIES; i++) - btb_q[i] <= '{1'b0, 64'b0, 2'b10}; + btb_q[i] <= '{1'b0, 64'b0, 2'b10, 1'b0}; end else begin // evict all entries if (flush_i) begin diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 91cea8a78..ba39d1713 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -40,7 +40,7 @@ module fetch_fifo input logic in_valid_i, output logic in_ready_o, // output port - output branchpredict_sbe [1:0] branch_predict_o, + output branchpredict_sbe branch_predict_o, output logic [63:0] out_addr_o, output logic [31:0] out_rdata_o, output logic out_valid_o, @@ -65,7 +65,7 @@ module fetch_fifo fetch_entry mem_n[DEPTH-1:0], mem_q[DEPTH-1:0]; logic [$clog2(DEPTH)-1:0] read_pointer_n, read_pointer_q; logic [$clog2(DEPTH)-1:0] write_pointer_n, write_pointer_q; - int unsigned status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool + logic [$clog2(DEPTH)-1:0] status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool // status signals logic full, empty, one_left; @@ -78,9 +78,11 @@ module fetch_fifo // we always need two empty places // as it could happen that we get two compressed instructions/cycle + /* verilator lint_off WIDTH */ assign full = (status_cnt_q > DEPTH - 2); assign one_left = (status_cnt_q == DEPTH - 1); // two spaces are left assign empty = (status_cnt_q == 0); + /* verilator lint_on WIDTH */ // the output is valid if we are either empty or just got a valid assign out_valid_o = !empty || in_valid_q; // we need space for at least two instructions: the full flag is conditioned on that @@ -114,8 +116,8 @@ module fetch_fifo // -------------- always_comb begin : output_port // counter - automatic int status_cnt = status_cnt_q; - automatic int write_pointer = write_pointer_q; + automatic logic [$clog2(DEPTH)-1:0] status_cnt = status_cnt_q; + automatic logic [$clog2(DEPTH)-1:0] write_pointer = write_pointer_q; write_pointer_n = write_pointer_q; read_pointer_n = read_pointer_q; @@ -135,7 +137,7 @@ module fetch_fifo // it is compressed mem_n[write_pointer_q].branch_predict = branch_predict_q; mem_n[write_pointer_q].address = in_addr_q; - mem_n[write_pointer_q].instruction = in_rdata_q[15:0]; + mem_n[write_pointer_q].instruction = {16'b0, in_rdata_q[15:0]}; status_cnt++; write_pointer++; @@ -146,7 +148,7 @@ module fetch_fifo if (in_rdata_q[17:16] != 2'b11) begin mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; - mem_n[write_pointer_q + 1].instruction = in_rdata_q[31:16]; + mem_n[write_pointer_q + 1].instruction = {16'b0, in_rdata_q[31:16]}; status_cnt++; write_pointer++; @@ -191,7 +193,7 @@ module fetch_fifo if (in_rdata_q[17:16] != 2'b11) begin mem_n[write_pointer_q + 1].branch_predict = branch_predict_q; mem_n[write_pointer_q + 1].address = {in_addr_q[63:2], 2'b10}; - mem_n[write_pointer_q + 1].instruction = in_rdata_q[31:16]; + mem_n[write_pointer_q + 1].instruction = {16'b0, in_rdata_q[31:16]}; status_cnt++; write_pointer++; // unaligned access served