Complete branch prediction path through IF, ID, EX

This commit is contained in:
Florian Zaruba 2017-05-10 15:08:39 +02:00
parent 6b9ba5b314
commit 61afdb9e30
6 changed files with 191 additions and 117 deletions

View file

@ -112,6 +112,9 @@ module ariane
logic illegal_c_insn_id_if;
logic [63:0] pc_id_if_id;
exception exception_if_id;
logic branch_valid_if_id;
logic [63:0] predict_address_if_id;
logic predict_taken_if_id;
// --------------
// ID <-> EX
// --------------
@ -199,6 +202,8 @@ module ariane
// EX <-> CSR
// --------------
// * -> CTRL
logic flush_csr_ctrl;
// TODO: Preliminary signal assignments
logic flush_tlb;
assign flush_tlb = 1'b0;
@ -214,7 +219,7 @@ module ariane
.branchpredict_i ( branchpredict ),
.pc_if_o ( pc_pcgen_if ),
.set_pc_o ( set_pc_pcgen_if ),
.is_branch_o ( is_branch_o ),
.is_branch_o ( is_branch_pcgen_if ),
.boot_addr_i ( boot_addr_i ),
.epc_i ( epc_commit_pcgen ),
.trap_vector_base_i ( trap_vector_base_commit_pcgen ),
@ -231,6 +236,10 @@ module ariane
.id_ready_i ( ready_id_if ),
.halt_if_i ( halt_if ),
.set_pc_i ( set_pc_pcgen_if ),
.is_branch_i ( is_branch_pcgen_if ),
.branch_valid_o ( branch_valid_if_id ),
.predict_address_o ( predict_address_if_id ),
.predict_taken_o ( predict_taken_if_id ),
.fetch_addr_i ( pc_pcgen_if ),
.instr_req_o ( fetch_req_if_ex ),
.instr_addr_o ( fetch_vaddr_if_ex ),
@ -275,6 +284,9 @@ module ariane
.alu_ready_i ( alu_ready_ex_id ),
.alu_valid_o ( alu_valid_id_ex ),
// Branches and Jumps
.branch_valid_i ( branch_valid_if_id ),
.predict_address_i ( predict_address_if_id ),
.predict_taken_i ( predict_taken_if_id ),
.branch_valid_o ( branch_valid_id_ex ),
.predict_address_o ( predict_address_id_ex ),
.predict_taken_o ( predict_taken_id_ex ),
@ -387,7 +399,7 @@ module ariane
.ASID_WIDTH ( ASID_WIDTH )
)
csr_regfile_i (
.flush_o ( flus_csr_ctrl ),
.flush_o ( flush_csr_ctrl ),
.ex_i ( ex_commit ),
.csr_op_i ( csr_op_commit_csr ),
.csr_addr_i ( csr_addr_ex_csr ),
@ -417,7 +429,7 @@ module ariane
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_commit_i ( flush_commit_i ),
.flush_csr_i ( flus_csr_ctrl ),
.flush_csr_i ( flsh_csr_ctrl ),
.branchpredict_i ( branchpredict )
);

View file

@ -26,6 +26,7 @@ module branch_engine (
input logic comparison_result_i, // result of comparison
input logic [63:0] predict_address_i, // this is the address we predicted
input logic predict_taken_i,
output branchpredict branchpredict_o, // this is the actual address we are targeting
output exception branch_ex_o // branch exception out
);
@ -46,8 +47,10 @@ module branch_engine (
branchpredict_o.target_address = target_address;
branchpredict_o.is_taken = comparison_result_i;
// we mis-predicted e.g.: the predicted address is unequal to the actual address
if (target_address != predict_address_i && target_address[1:0] == 2'b0) begin
branchpredict_o.is_mispredict = 1'b0;
if (target_address[1:0] == 2'b0) begin
if (target_address != predict_address_i || predict_taken_i != comparison_result_i) begin
branchpredict_o.is_mispredict = 1'b1;
end
end
end
end

View file

@ -43,8 +43,8 @@ module ex_stage #(
// Branches and Jumps
input logic branch_valid_i,
input logic [63:0] predict_address_i,
output branchpredict branchpredict_o,
input logic predict_taken_i,
output branchpredict branchpredict_o,
// LSU
output logic lsu_ready_o, // FU is ready
input logic lsu_valid_i, // Input is valid
@ -123,8 +123,6 @@ module ex_stage #(
.operand_b_i ( imm_i ),
.valid_i ( branch_valid_i ),
.comparison_result_i ( comparison_result_alu_branch ),
.predict_address_i ( predict_address_i ),
.branchpredict_o ( branchpredict_o ),
.branch_ex_o ( alu_exception_o ),
.*
);

View file

@ -45,7 +45,11 @@ module id_stage #(
input logic alu_ready_i,
output logic alu_valid_o,
// Branch predict In
input logic branch_valid_i,
input logic [63:0] predict_address_i,
input logic predict_taken_i,
// Branch predict Out
output logic branch_valid_o,
output logic [63:0] predict_address_o,
output logic predict_taken_o,
@ -108,6 +112,10 @@ module id_stage #(
// This limitation is in place to ease the backtracking of mis-predicted branches as they
// can simply be in the front-end of the processor.
logic unresolved_branch_n, unresolved_branch_q;
// branch predict registers
logic branch_valid_n, branch_valid_q;
logic [63:0] predict_address_n, predict_address_q;
logic predict_taken_n, predict_taken_q;
always_comb begin : unresolved_branch
unresolved_branch_n = unresolved_branch_q;
@ -119,9 +127,23 @@ module id_stage #(
if (instruction_valid_i && is_control_flow_instr) begin
unresolved_branch_n = 1'b1;
end
branch_valid_n = branch_valid_q;
predict_address_n = predict_address_q;
predict_taken_n = predict_taken_q;
// save branch prediction information until the ex stage resolves the prediction
if (~unresolved_branch_q) begin
branch_valid_n = branch_valid_i;
predict_address_n = predict_address_i;
predict_taken_n = predict_taken_i;
end
end
// we are ready if we are not full and don't have any unresolved branches
assign ready_o = ~full & ~unresolved_branch_q;
assign ready_o = ~full & ~unresolved_branch_q;
// output branch prediction bits
assign branch_valid_o = branch_valid_q;
assign predict_address_o = predict_address_q;
assign predict_taken_o = predict_taken_q;
decoder decoder_i (
.clk_i ( clk_i ),
@ -178,10 +200,16 @@ module id_stage #(
);
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
if (~rst_ni) begin
unresolved_branch_q <= 1'b0;
branch_valid_q <= 1'b0;
predict_address_q <= 64'b0;
predict_taken_q <= 1'b0;
end else begin
unresolved_branch_q <= unresolved_branch_n;
branch_valid_q <= branch_valid_n;
predict_address_q <= predict_address_n;
predict_taken_q <= predict_taken_n;
end
end

View file

@ -36,8 +36,14 @@ module if_stage (
output logic if_busy_o, // is the IF stage busy fetching instructions?
input logic id_ready_i,
input logic halt_if_i, // pipeline stall
input logic set_pc_i, // set new PC
// ctrl flow instruction in
input logic [63:0] fetch_addr_i,
input logic set_pc_i, // set new PC
input logic is_branch_i, // the new PC was a branch e.g.: branch or jump
// branchpredict out
output logic branch_valid_o,
output logic [63:0] predict_address_o,
output logic predict_taken_o,
// instruction cache interface
output logic instr_req_o,
output logic [63:0] instr_addr_o,
@ -64,6 +70,11 @@ module if_stage (
logic [31:0] fetch_rdata;
logic [63:0] fetch_addr;
// branch predict registers
logic branch_valid_n, branch_valid_q;
logic [63:0] predict_address_n, predict_address_q;
logic predict_taken_n, predict_taken_q;
// offset FSM
enum logic[0:0] {WAIT, IDLE} offset_fsm_cs, offset_fsm_ns;
logic [31:0] instr_decompressed;
@ -71,26 +82,20 @@ module if_stage (
logic instr_compressed_int;
logic clear_instr_valid_i;
assign pc_if_o = fetch_addr;
// id stage acknowledged
assign clear_instr_valid_i = id_ready_i;
// compressed instruction decoding, or more precisely compressed instruction
// expander
//
// since it does not matter where we decompress instructions, we do it here
// to ease timing closure
compressed_decoder compressed_decoder_i
(
compressed_decoder compressed_decoder_i (
.instr_i ( fetch_rdata ),
.instr_o ( instr_decompressed ),
.is_compressed_o ( instr_compressed_int ),
.illegal_instr_o ( illegal_c_insn )
);
);
// prefetch buffer, caches a fixed number of instructions
prefetch_buffer prefetch_buffer_i
(
// Pre-fetch buffer, caches a fixed number of instructions
prefetch_buffer prefetch_buffer_i (
.clk ( clk_i ),
.rst_n ( rst_ni ),
@ -113,110 +118,138 @@ module if_stage (
// Prefetch Buffer Status
.busy_o ( prefetch_busy )
);
);
// offset FSM state
always_ff @(posedge clk_i, negedge rst_ni)
begin
if (rst_ni == 1'b0) begin
offset_fsm_cs <= IDLE;
end else begin
offset_fsm_cs <= offset_fsm_ns;
end
end
// offset FSM state transition logic
always_comb begin
offset_fsm_ns = offset_fsm_cs;
// offset FSM state transition logic
always_comb
begin
offset_fsm_ns = offset_fsm_cs;
fetch_ready = 1'b0;
branch_req = 1'b0;
valid = 1'b0;
fetch_ready = 1'b0;
branch_req = 1'b0;
valid = 1'b0;
unique case (offset_fsm_cs)
// no valid instruction data for ID stage
// assume aligned
IDLE: begin
if (req_i) begin
branch_req = 1'b1;
offset_fsm_ns = WAIT;
end
end
// serving aligned 32 bit or 16 bit instruction, we don't know yet
WAIT: begin
if (fetch_valid) begin
valid = 1'b1; // an instruction is ready for ID stage
if (req_i && if_valid) begin
fetch_ready = 1'b1;
offset_fsm_ns = WAIT;
end
end
end
default: begin
offset_fsm_ns = IDLE;
end
endcase
// take care of control flow changes
if (set_pc_i) begin
valid = 1'b0;
// switch to new PC from ID stage
branch_req = 1'b1;
unique case (offset_fsm_cs)
// no valid instruction data for ID stage
// assume aligned
IDLE: begin
if (req_i) begin
branch_req = 1'b1;
offset_fsm_ns = WAIT;
end
end
// IF-ID pipeline registers, frozen when the ID stage is stalled
always_ff @(posedge clk_i, negedge rst_ni)
begin : IF_ID_PIPE_REGISTERS
if (rst_ni == 1'b0)
begin
instr_valid_id_o <= 1'b0;
instr_rdata_id_o <= '0;
illegal_c_insn_id_o <= 1'b0;
is_compressed_id_o <= 1'b0;
pc_id_o <= '0;
ex_o <= '{default: 0};
end
else
begin
if (if_valid)
begin
instr_valid_id_o <= 1'b1;
instr_rdata_id_o <= instr_decompressed;
illegal_c_insn_id_o <= illegal_c_insn;
is_compressed_id_o <= instr_compressed_int;
pc_id_o <= pc_if_o;
ex_o.cause <= 64'b0; // TODO: Output exception
ex_o.tval <= 64'b0; // TODO: Output exception
ex_o.valid <= 1'b0; // TODO: Output exception
end else if (clear_instr_valid_i) begin
instr_valid_id_o <= 1'b0;
end
// serving aligned 32 bit or 16 bit instruction, we don't know yet
WAIT: begin
if (fetch_valid) begin
valid = 1'b1; // an instruction is ready for ID stage
if (req_i && if_valid) begin
fetch_ready = 1'b1;
offset_fsm_ns = WAIT;
end
end
end
default: begin
offset_fsm_ns = IDLE;
end
endcase
assign if_ready = valid & id_ready_i;
assign if_valid = (~halt_if_i) & if_ready;
assign if_busy_o = prefetch_busy;
//-------------
// Assertions
//-------------
`ifndef SYNTHESIS
`ifndef VERILATOR
// there should never be a grant when there was no request
assert property (
@(posedge clk_i) (instr_gnt_i) |-> (instr_req_o) )
else $warning("There was a grant without a request");
`endif
`endif
// take care of control flow changes
if (set_pc_i) begin
valid = 1'b0;
// switch to new PC from ID stage
branch_req = 1'b1;
offset_fsm_ns = WAIT;
end
end
// -------------
// Branch Logic
// -------------
// We need to pass those registers on to ID in the case we've set
// a new branch target (or jump) and we got a valid instruction
always_comb begin
branch_valid_n = branch_valid_q;
predict_address_n = predict_address_q;
predict_taken_n = predict_taken_q;
// we got a branch redirect from PCGEN
if (is_branch_i) begin
// set the registers to the correct address
branch_valid_n = 1'b1;
predict_address_n = fetch_addr_i;
// whether we took the branch or not can be seen from the set PC
// nevertheless we also need to keep branches not taken
predict_taken_n = set_pc_i;
end
// we have a valid instruction and id excepted it so we consider all the
// branch information to be sampled correctly
if (if_valid && clear_instr_valid_i) begin
branch_valid_n = 1'b0;
end
end
// --------------------------------------------------------------
// IF-ID pipeline registers, frozen when the ID stage is stalled
// --------------------------------------------------------------
always_ff @(posedge clk_i, negedge rst_ni) begin : IF_ID_PIPE_REGISTERS
if (~rst_ni) begin
// offset FSM state
offset_fsm_cs <= IDLE;
instr_valid_id_o <= 1'b0;
instr_rdata_id_o <= '0;
illegal_c_insn_id_o <= 1'b0;
is_compressed_id_o <= 1'b0;
pc_id_o <= '0;
ex_o <= '{default: 0};
branch_valid_q <= 1'b0;
predict_address_q <= 64'b0;
predict_taken_q <= 1'b0;
end
else
begin
offset_fsm_cs <= offset_fsm_ns;
branch_valid_q <= branch_valid_n;
predict_address_q <= predict_address_n;
predict_taken_q <= predict_taken_n;
if (if_valid) begin
instr_valid_id_o <= 1'b1;
instr_rdata_id_o <= instr_decompressed;
illegal_c_insn_id_o <= illegal_c_insn;
is_compressed_id_o <= instr_compressed_int;
pc_id_o <= pc_if_o;
ex_o.cause <= 64'b0; // TODO: Output exception
ex_o.tval <= 64'b0; // TODO: Output exception
ex_o.valid <= 1'b0; // TODO: Output exception
end else if (clear_instr_valid_i) begin
instr_valid_id_o <= 1'b0;
end
end
end
// Assignments
assign pc_if_o = fetch_addr;
// id stage acknowledged
assign clear_instr_valid_i = id_ready_i;
assign if_ready = valid & id_ready_i;
assign if_valid = (~halt_if_i) & if_ready;
assign if_busy_o = prefetch_busy;
assign branch_valid_o = branch_valid_q;
assign predict_address_o = predict_address_q;
assign predict_taken_o = predict_taken_q;
//-------------
// Assertions
//-------------
`ifndef SYNTHESIS
`ifndef VERILATOR
// there should never be a grant when there was no request
assert property (
@(posedge clk_i) (instr_gnt_i) |-> (instr_req_o) )
else $warning("There was a grant without a request");
`endif
`endif
endmodule

View file

@ -25,7 +25,7 @@ module pcgen (
input logic flush_i,
input logic [63:0] pc_if_i,
input branchpredict branchpredict_i, // from controller signaling a branchpredict -> update BTB
input branchpredict branchpredict_i, // from controller signaling a branchpredict -> update BTB
// to IF
output logic [63:0] pc_if_o, // new PC
output logic set_pc_o, // request the PC to be set to pc_if_o