Fix jalr stall and make jump more efficient

Jumps now use only one cycle instead of two. This is the optimum we can
achieve, we cannot get any better without a delay slot
This commit is contained in:
Andreas Traber 2015-08-27 13:57:13 +02:00
parent 4baf8eaad9
commit 8c4a99b5ec
5 changed files with 63 additions and 45 deletions

View file

@ -1094,12 +1094,22 @@ module controller
DECODE:
begin
if (jump_in_id != `BRANCH_NONE) begin
// handle conditional branches
if (jump_in_id == `BRANCH_COND) begin
// handle branch if decision is availble in next cycle
if (~stall_id_o)
ctrl_fsm_ns = BRANCH;
end
// handle unconditional jumps
// we can jump directly since we know the address already
if (jump_in_id == `BRANCH_JALR || jump_in_id == `BRANCH_JAL) begin
pc_mux_sel_o = `PC_JUMP;
if (~stall_id_o)
ctrl_fsm_ns = BRANCH_DELAY;
end
// handle illegal instructions
if (illegal_insn_int) begin
illegal_insn_o = 1'b1;
@ -1130,7 +1140,7 @@ module controller
ctrl_fsm_ns = DECODE;
end else begin
// branch taken or jump
pc_mux_sel_o = `PC_JUMP;
pc_mux_sel_o = `PC_BRANCH;
if (~stall_id_o)
ctrl_fsm_ns = BRANCH_DELAY;
end
@ -1203,14 +1213,12 @@ module controller
load_stall = 1'b1;
end
// TODO: check JALR/JR
// Stall because of jr path
// - Load results cannot directly be forwarded to PC
// - Multiplication results cannot be forwarded to PC
if ((instr_rdata_i[6:0] == `OPCODE_JALR) &&
(((regfile_we_wb_i == 1'b1) && (reg_d_wb_is_reg_b_id == 1'b1)) ||
((regfile_we_ex_i == 1'b1) && (reg_d_ex_is_reg_b_id == 1'b1)) ||
((regfile_alu_we_fw_i == 1'b1) && (reg_d_alu_is_reg_b_id == 1'b1))) )
// - always stall if a result is to be forwarded to the PC
if ((jump_in_id == `BRANCH_JALR) &&
(((regfile_we_wb_i == 1'b1) && (reg_d_wb_is_reg_a_id == 1'b1)) ||
((regfile_we_ex_i == 1'b1) && (reg_d_ex_is_reg_a_id == 1'b1)) ||
((regfile_alu_we_fw_i == 1'b1) && (reg_d_alu_is_reg_a_id == 1'b1))) )
begin
jr_stall = 1'b1;
deassert_we = 1'b1;
@ -1249,7 +1257,7 @@ module controller
// we unstall the if_stage if the debug unit wants to set a new
// pc, so that the new value gets written into current_pc_if and is
// used by the instr_core_interface
stall_if_o = instr_ack_stall | load_stall | jr_stall | lsu_stall | misalign_stall | dbg_halt | dbg_stall_i | (~pc_valid_i) | (jump_in_id_o != `BRANCH_NONE);
stall_if_o = instr_ack_stall | load_stall | jr_stall | lsu_stall | misalign_stall | dbg_halt | dbg_stall_i | (~pc_valid_i) | (jump_in_id_o == `BRANCH_COND);
stall_id_o = instr_ack_stall | load_stall | jr_stall | lsu_stall | misalign_stall | dbg_halt | dbg_stall_i;
stall_ex_o = instr_ack_stall | lsu_stall | dbg_stall_i;
stall_wb_o = lsu_stall | dbg_stall_i;

View file

@ -51,6 +51,7 @@ module id_stage
output logic [1:0] jump_in_id_o,
output logic [1:0] jump_in_ex_o,
input logic branch_decision_i,
output logic [31:0] jump_target_o,
// IF and ID stage signals
output logic compressed_instr_o,
@ -359,14 +360,16 @@ module id_stage
always_comb
begin
unique case (instr[6:0])
`OPCODE_JAL: jump_target = current_pc_id_i + imm_uj_type;
`OPCODE_JALR: jump_target = operand_a_fw_id + imm_i_type;
`OPCODE_BRANCH: jump_target = current_pc_id_i + imm_sb_type;
unique case (jump_in_id_o)
`BRANCH_JAL: jump_target = current_pc_id_i + imm_uj_type;
`BRANCH_JALR: jump_target = regfile_data_ra_id + imm_i_type; // cannot forward rA as path too long
`BRANCH_COND: jump_target = current_pc_id_i + imm_sb_type;
default: jump_target = '0;
endcase // unique case (instr[6:0])
end
assign jump_target_o = jump_target;
////////////////////////////////////////////////////////
// ___ _ _ //

View file

@ -67,8 +67,9 @@ module if_stage
// jump and branch target and decision
input logic [1:0] jump_in_id_i,
input logic [1:0] jump_in_ex_i, // jump in EX -> get PC from jump target (could also be branch)
input logic [31:0] jump_target_i, // jump target address
input logic [1:0] jump_in_ex_i, // jump in EX -> get PC from jump target (could also be branch)
input logic [31:0] jump_target_id_i, // jump target address
input logic [31:0] jump_target_ex_i, // jump target address
input logic branch_decision_i,
// from debug unit
@ -152,7 +153,8 @@ module if_stage
begin
unique case (pc_mux_sel_i)
`PC_BOOT: fetch_addr_n = {boot_addr_i[31:5], `EXC_OFF_RST};
`PC_JUMP: fetch_addr_n = {jump_target_i[31:2], 2'b0};
`PC_JUMP: fetch_addr_n = {jump_target_id_i[31:2], 2'b0};
`PC_BRANCH: fetch_addr_n = {jump_target_ex_i[31:2], 2'b0};
`PC_INCR: fetch_addr_n = fetch_addr + 32'd4; // incremented PC
`PC_EXCEPTION: fetch_addr_n = exc_pc; // set PC to exception handler
`PC_ERET: fetch_addr_n = exception_pc_reg_i; // PC is restored when returning from IRQ/exception
@ -167,6 +169,18 @@ module if_stage
endcase
end
always_comb
begin
unaligned_jump = 1'b0;
case (pc_mux_sel_i)
`PC_JUMP: unaligned_jump = jump_target_id_i[1];
`PC_BRANCH: unaligned_jump = jump_target_ex_i[1];
`PC_ERET: unaligned_jump = exception_pc_reg_i[1];
`PC_HWLOOP: unaligned_jump = pc_from_hwloop_i[1];
endcase
end
// cache fetch interface
instr_core_interface instr_core_if_i
@ -341,9 +355,11 @@ module if_stage
// Puh, lucky, we got a 16 bit instruction
valid_o = 1'b1;
// next instruction will be aligned
fetch_req = 1'b1;
offset_fsm_ns = WAIT_ALIGNED;
if (req_i && ~stall_if_i) begin
// next instruction will be aligned
fetch_req = 1'b1;
offset_fsm_ns = WAIT_ALIGNED;
end
end else begin
// a 32 bit unaligned instruction, let's fetch the upper half
@ -363,9 +379,8 @@ module if_stage
// take care of jumps and branches
if(~stall_id_i) begin
if (jump_in_ex_i != `BRANCH_NONE) begin
if ((jump_in_ex_i == `BRANCH_COND && branch_decision_i) ||
jump_in_ex_i == `BRANCH_JAL || jump_in_ex_i == `BRANCH_JALR) begin
if (jump_in_ex_i == `BRANCH_COND) begin
if (branch_decision_i) begin
// branch taken
fetch_req = 1'b1;
@ -373,32 +388,20 @@ module if_stage
offset_fsm_ns = WAIT_JUMPED_UNALIGNED;
else
offset_fsm_ns = WAIT_JUMPED_ALIGNED;
end else begin
// branch not taken
// we don't need to do anything?
end
end else if (jump_in_id_i != `BRANCH_NONE) begin
// new branch in ID, just wait
//fetch_req = 1'b0;
end else if (jump_in_id_i == `BRANCH_JAL || jump_in_id_i == `BRANCH_JALR) begin
fetch_req = 1'b1;
if (unaligned_jump)
offset_fsm_ns = WAIT_JUMPED_UNALIGNED;
else
offset_fsm_ns = WAIT_JUMPED_ALIGNED;
end
end
end
always_comb
begin
unaligned_jump = 1'b0;
case (pc_mux_sel_i)
`PC_JUMP: unaligned_jump = jump_target_i[1];
`PC_ERET: unaligned_jump = exception_pc_reg_i[1];
`PC_HWLOOP: unaligned_jump = pc_from_hwloop_i[1];
endcase
end
// store instr_core_if data in local cache
always_ff @(posedge clk, negedge rst_n)
begin

View file

@ -355,6 +355,7 @@ endfunction // prettyPrintInstruction
`define PC_BOOT 3'b000
`define PC_INCR 3'b001
`define PC_JUMP 3'b010
`define PC_BRANCH 3'b011
`define PC_EXCEPTION 3'b100
`define PC_ERET 3'b101
`define PC_HWLOOP 3'b110

View file

@ -93,7 +93,7 @@ module riscv_core
// Forwarding
// Jump and branch target and decision (EX->IF)
logic [31:0] jump_target;
logic [31:0] jump_target_id, jump_target_ex;
logic [1:0] jump_in_id;
logic [1:0] jump_in_ex;
logic branch_decision;
@ -282,7 +282,8 @@ module riscv_core
.jump_in_id_i ( jump_in_id ),
.jump_in_ex_i ( jump_in_ex ),
.branch_decision_i ( branch_decision ),
.jump_target_i ( jump_target ),
.jump_target_id_i ( jump_target_id ),
.jump_target_ex_i ( jump_target_ex ),
// pipeline stalls
.stall_if_i ( stall_if ),
@ -310,6 +311,8 @@ module riscv_core
.jump_in_ex_o ( jump_in_ex ),
.branch_decision_i ( branch_decision ),
.jump_target_o ( jump_target_id ),
.core_busy_o ( core_busy_o ),
// Interface to instruction memory
@ -492,7 +495,7 @@ module riscv_core
.hwloop_cnt_data_o ( hwlp_cnt_data_ex ),
// To IF: Jump and branch target and decision
.jump_target_o ( jump_target ),
.jump_target_o ( jump_target_ex ),
.branch_decision_o ( branch_decision ),
// To ID stage: Forwarding signals