[Prefetch buffer] - various bug fixes

- Fix incorrect address output to mepc on interrupt (fixes #320)
- Fix instruction address changing before grant (fixes #296)
- Suppress requests and reg writes on fetch error (fixes #340)
- Remove excess address flops in fetch_fifo
- Remove restriction on outstanding requests
This commit is contained in:
Tom Roberts 2019-09-18 15:37:42 +01:00 committed by Tom Roberts
parent b03ae4e2a7
commit f2fccaadbc
7 changed files with 151 additions and 148 deletions

View file

@ -49,25 +49,3 @@ Protocol
The protocol used to communicate with the instruction cache or the instruction memory is very similar to the protocol used by the LSU on the data interface of Ibex.
See the description of the LSU in :ref:`LSU Protocol<lsu-protocol>` for details about this protocol.
.. caution::
The IF protocol differs from the LSU protocol in that the address can change while the request is valid (``instr_req_o`` is high).
This allows the core to immediately update the instruction fetch address when a branch occurs.
As depicted in :numref:`if_timing_difference`, care has to be taken when working with the address.
The data returned must match the address during the grant cycle.
.. wavedrom::
:name: if_timing_difference
:caption: Memory transaction with wait states
{"signal":
[
{"name": "clk", "wave": "p......"},
{"name": "instr_req_o", "wave": "01..0.."},
{"name": "instr_addr_o", "wave": "x=.=xxx", "data": ["Addr1", "Addr2"]},
{"name": "instr_gnt_i", "wave": "0..10.."},
{"name": "instr_rvalid_i", "wave": "0....10"},
{"name": "instr_rdata_i", "wave": "xxxxx=x", "data": ["RData2"]}
],
"config": { "hscale": 2 }
}

View file

@ -56,15 +56,15 @@ lint_off -msg UNUSED -file "*/rtl/ibex_pmp.sv" -lines 16
// Signal is not used: csr_pmp_addr
// Signal not connected when PMP is not configured
lint_off -msg UNUSED -file "*/rtl/ibex_core.sv" -lines 185
lint_off -msg UNUSED -file "*/rtl/ibex_core.sv" -lines 186
// Signal is not used: csr_pmp_cfg
// Signal not connected when PMP is not configured
lint_off -msg UNUSED -file "*/rtl/ibex_core.sv" -lines 186
lint_off -msg UNUSED -file "*/rtl/ibex_core.sv" -lines 187
// Signal is not used: priv_mode
// Signal not connected when PMP is not configured
lint_off -msg UNUSED -file "*/rtl/ibex_core.sv" -lines 198
lint_off -msg UNUSED -file "*/rtl/ibex_core.sv" -lines 199
// Signal unoptimizable: Feedback to clock or circular logic:
// ibex_core.id_stage_i.controller_i.ctrl_fsm_cs

View file

@ -122,7 +122,7 @@ module ibex_controller (
// glitches
always_ff @(negedge clk_i) begin
// print warning in case of decoding errors
if ((ctrl_fsm_cs == DECODE) && instr_valid_i && illegal_insn) begin
if ((ctrl_fsm_cs == DECODE) && instr_valid_i && !instr_fetch_err_i && illegal_insn) begin
$display("%t: Illegal instruction (hart %0x) at PC 0x%h: 0x%h", $time, ibex_core.hart_id_i,
ibex_id_stage.pc_id_i, ibex_id_stage.instr_rdata_i);
end
@ -296,21 +296,21 @@ module ibex_controller (
if (instr_valid_i) begin
// get ready for special instructions, exceptions, pipeline flushes
if (special_req) begin
ctrl_fsm_ns = FLUSH;
halt_if = 1'b1;
halt_id = 1'b1;
// set PC in IF stage to branch or jump target
if (branch_set_i || jump_set_i) begin
end else if (branch_set_i || jump_set_i) begin
pc_mux_o = PC_JUMP;
pc_set_o = 1'b1;
perf_tbranch_o = branch_set_i;
perf_jump_o = jump_set_i;
// get ready for special instructions, exceptions, pipeline flushes
end else if (special_req) begin
ctrl_fsm_ns = FLUSH;
halt_if = 1'b1;
halt_id = 1'b1;
end
// stall IF stage to not starve debug and interrupt requests, these just
// need to wait until after the current (multicycle) instruction
if ((enter_debug_mode || handle_irq) && stall) begin

View file

@ -147,6 +147,7 @@ module ibex_core #(
// CSR control
logic csr_access;
logic valid_csr_id;
csr_op_e csr_op;
csr_num_e csr_addr;
logic [31:0] csr_rdata;
@ -554,6 +555,9 @@ module ibex_core #(
assign perf_load = data_req_o & data_gnt_i & (~data_we_o);
assign perf_store = data_req_o & data_gnt_i & data_we_o;
// CSR access is qualified by instruction fetch error
assign valid_csr_id = instr_new_id & ~instr_fetch_err;
ibex_cs_registers #(
.MHPMCounterNum ( MHPMCounterNum ),
.MHPMCounterWidth ( MHPMCounterWidth ),
@ -618,7 +622,7 @@ module ibex_core #(
.csr_mtval_i ( csr_mtval ),
.illegal_csr_insn_o ( illegal_csr_insn_id ),
.instr_new_id_i ( instr_new_id ),
.instr_new_id_i ( valid_csr_id ),
// performance counter related signals
.instr_ret_i ( instr_ret ),

View file

@ -9,7 +9,9 @@
* input port: send address and data to the FIFO
* clear_i clears the FIFO for the following cycle, including any new request
*/
module ibex_fetch_fifo (
module ibex_fetch_fifo #(
parameter int unsigned NUM_REQS = 2
) (
input logic clk_i,
input logic rst_ni,
@ -31,10 +33,9 @@ module ibex_fetch_fifo (
output logic out_err_o
);
localparam int unsigned DEPTH = 3; // must be 3 or greater
localparam int unsigned DEPTH = NUM_REQS+1;
// index 0 is used for output
logic [DEPTH-1:0] [31:2] addr_d, addr_q;
logic [DEPTH-1:0] [31:0] rdata_d, rdata_q;
logic [DEPTH-1:0] err_d, err_q;
logic [DEPTH-1:0] valid_d, valid_q;
@ -47,9 +48,11 @@ module ibex_fetch_fifo (
logic err, err_unaligned;
logic valid, valid_unaligned;
logic entry0_unaligned_d, entry0_unaligned_q;
logic aligned_is_compressed, unaligned_is_compressed;
logic addr_incr_two;
logic [31:1] instr_addr_d, instr_addr_q;
logic instr_addr_en;
logic unused_addr_in;
/////////////////
@ -70,21 +73,6 @@ module ibex_fetch_fifo (
// The FIFO also has a direct bypass path, so a complete instruction might be made up of data
// from the FIFO and new incoming data.
//
// Additionally, branches can cause a fetch from an unaligned address. The full data word will be
// fetched, but the FIFO must output the unaligned instruction as the first valid data.
// Alignment is tracked with a flag, this records whether entry[0] of the FIFO has become unaligned.
// The flag is set once any compressed instruction enters the FIFO and is only cleared once a
// a compressed instruction realigns the FIFO, or the FIFO is cleared.
// New incoming unaligned request (must be a branch) or already unaligned
assign entry0_unaligned_d = ((((in_valid_i & in_addr_i[1]) | entry0_unaligned_q) &
// cleared by a compressed unaligned instruction
~(out_ready_i & unaligned_is_compressed)) |
// Also set when a new aligned compressed instruction is driven
(valid & out_ready_i & ~out_addr_o[1] & aligned_is_compressed)) &
// reset by a FIFO clear
~clear_i;
// Construct the output data for an unaligned instruction
assign rdata_unaligned = valid_q[1] ? {rdata_q[1][15:0], rdata[31:16]} :
@ -129,8 +117,29 @@ module ibex_fetch_fifo (
end
end
assign out_addr_o[31:2] = valid_q[0] ? addr_q[0] : in_addr_i[31:2];
assign out_addr_o[1] = valid_q[0] ? entry0_unaligned_q : in_addr_i[1];
/////////////////////////
// Instruction address //
/////////////////////////
// Update the address on branches and every time an instruction is driven
assign instr_addr_en = clear_i | (out_ready_i & out_valid_o);
// Increment the address by two every time a compressed instruction is popped
assign addr_incr_two = instr_addr_q[1] ? unaligned_is_compressed :
aligned_is_compressed;
assign instr_addr_d = clear_i ? in_addr_i[31:1] :
(instr_addr_q[31:1] +
// Increment address by 4 or 2
{29'd0,~addr_incr_two,addr_incr_two});
always_ff @(posedge clk_i) begin
if (instr_addr_en) begin
instr_addr_q <= instr_addr_d;
end
end
assign out_addr_o[31:1] = instr_addr_q[31:1];
assign out_addr_o[0] = 1'b0;
// The LSB of the address is unused, since all addresses are halfword aligned
@ -140,10 +149,10 @@ module ibex_fetch_fifo (
// input port //
////////////////
// we accept data as long as our FIFO is not full
// we don't care about clear here as the data will be received one cycle
// later anyway
assign in_ready_o = ~valid_q[DEPTH-2];
// Accept data as long as our FIFO has space to accept the maximum number of outstanding
// requests. Note that the prefetch buffer does not count how many requests are actually
// outstanding, so space must be reserved for the maximum number.
assign in_ready_o = ~valid_q[DEPTH-NUM_REQS];
/////////////////////
// FIFO management //
@ -157,7 +166,7 @@ module ibex_fetch_fifo (
if (i == 0) begin : g_ent0
assign lowest_free_entry[i] = ~valid_q[i];
end else begin : g_ent_others
assign lowest_free_entry[i] = ~valid_q[i] & (&valid_q[i-1:0]);
assign lowest_free_entry[i] = ~valid_q[i] & valid_q[i-1];
end
// An entry is set when an incoming request chooses the lowest available entry
@ -174,17 +183,15 @@ module ibex_fetch_fifo (
(in_valid_i & lowest_free_entry[i] & ~pop_fifo);
// take the next entry or the incoming data
assign addr_d [i] = valid_q[i+1] ? addr_q [i+1] : in_addr_i[31:2];
assign rdata_d[i] = valid_q[i+1] ? rdata_q[i+1] : in_rdata_i;
assign err_d [i] = valid_q[i+1] ? err_q [i+1] : in_err_i;
end
// The top entry is similar but with simpler muxing
assign lowest_free_entry[DEPTH-1] = ~valid_q[DEPTH-1] & (&valid_q[DEPTH-2:0]);
assign lowest_free_entry[DEPTH-1] = ~valid_q[DEPTH-1] & valid_q[DEPTH-2];
assign valid_pushed [DEPTH-1] = valid_q[DEPTH-1] | (in_valid_i & lowest_free_entry[DEPTH-1]);
assign valid_popped [DEPTH-1] = pop_fifo ? 1'b0 : valid_pushed[DEPTH-1];
assign valid_d [DEPTH-1] = valid_popped[DEPTH-1] & ~clear_i;
assign entry_en[DEPTH-1] = in_valid_i & lowest_free_entry[DEPTH-1];
assign addr_d [DEPTH-1] = in_addr_i[31:2];
assign rdata_d [DEPTH-1] = in_rdata_i;
assign err_d [DEPTH-1] = in_err_i;
@ -194,18 +201,15 @@ module ibex_fetch_fifo (
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
valid_q <= '0;
entry0_unaligned_q <= '0;
valid_q <= '0;
end else begin
valid_q <= valid_d;
entry0_unaligned_q <= entry0_unaligned_d;
valid_q <= valid_d;
end
end
for (genvar i = 0; i < DEPTH; i++) begin : g_fifo_regs
always_ff @(posedge clk_i) begin
if (entry_en[i]) begin
addr_q[i] <= addr_d[i];
rdata_q[i] <= rdata_d[i];
err_q[i] <= err_d[i];
end

View file

@ -469,7 +469,9 @@ module ibex_id_stage #(
// being executed. This is the case if the current instr is either:
// - a new instr (not yet done)
// - a multicycle instr that is not yet done
assign instr_executing = instr_new_i | (instr_multicycle & ~instr_multicycle_done_q);
// An instruction error will suppress any requests or register writes
assign instr_executing = (instr_new_i | (instr_multicycle & ~instr_multicycle_done_q)) &
~instr_fetch_err_i;
assign data_req_id = instr_executing ? data_req_dec : 1'b0;
assign mult_en_id = instr_executing ? mult_en_dec : 1'b0;
assign div_en_id = instr_executing ? div_en_dec : 1'b0;
@ -536,7 +538,7 @@ module ibex_id_stage #(
IDLE: begin
// only detect multicycle when instruction is new, do not re-detect after
// execution (when waiting for next instruction from IF stage)
if (instr_new_i) begin
if (instr_new_i & ~instr_fetch_err_i) begin
unique case (1'b1)
data_req_dec: begin
// LSU operation

View file

@ -39,20 +39,21 @@ module ibex_prefetch_buffer (
output logic busy_o
);
// Changes to the address flops would be required for > 2 outstanding requests
localparam int unsigned NUM_REQS = 2;
localparam int unsigned NUM_REQS = 2;
logic valid_req;
logic valid_new_req, valid_req;
logic valid_req_d, valid_req_q;
logic hold_addr_d, hold_addr_q;
logic discard_req_d, discard_req_q;
logic gnt_or_pmp_err, rvalid_or_pmp_err;
logic [NUM_REQS-1:0] rdata_outstanding_n, rdata_outstanding_s, rdata_outstanding_q;
logic [NUM_REQS-1:0] branch_abort_n, branch_abort_s, branch_abort_q;
logic [NUM_REQS-1:0] branch_discard_n, branch_discard_s, branch_discard_q;
logic [NUM_REQS-1:0] rdata_pmp_err_n, rdata_pmp_err_s, rdata_pmp_err_q;
logic [31:0] instr_addr_q, fetch_addr;
logic [31:0] stored_addr_d, stored_addr_q;
logic stored_addr_en;
logic [31:0] fetch_addr_d, fetch_addr_q;
logic fetch_addr_en;
logic [31:0] instr_addr, instr_addr_w_aligned;
logic addr_valid;
logic pmp_err_q;
logic instr_or_pmp_err;
logic fifo_valid;
@ -71,19 +72,21 @@ module ibex_prefetch_buffer (
// Instruction fetch errors are valid on the data phase of a request
// PMP errors are generated in the address phase, and registered into a fake data phase
assign instr_or_pmp_err = instr_err_i | pmp_err_q;
assign instr_or_pmp_err = instr_err_i | rdata_pmp_err_q[0];
// A branch will invalidate any previously fetched instructions
assign fifo_clear = branch_i;
ibex_fetch_fifo fifo_i (
ibex_fetch_fifo #(
.NUM_REQS (NUM_REQS)
) fifo_i (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.clear_i ( fifo_clear ),
.in_valid_i ( fifo_valid ),
.in_addr_i ( instr_addr_q ),
.in_addr_i ( addr_i ),
.in_rdata_i ( instr_rdata_i ),
.in_err_i ( instr_or_pmp_err ),
.in_ready_o ( fifo_ready ),
@ -101,55 +104,72 @@ module ibex_prefetch_buffer (
//////////////
// Make a new request any time there is space in the FIFO, and space in the request queue
assign valid_req = valid_req_q | (req_i & (fifo_ready | branch_i) & (~&rdata_outstanding_q));
assign valid_new_req = req_i & (fifo_ready | branch_i) & ~rdata_outstanding_q[NUM_REQS-1];
assign valid_req = valid_req_q | valid_new_req;
// If a request address triggers a PMP error, the external bus request is suppressed. We might
// therefore never receive a grant for such a request. The grant is faked in this case to make
// sure the request proceeds and the error is pushed to the FIFO.
// We always use the registered version of the signal since it will be held stable throughout
// the request, and the penalty of waiting for an extra cycle to consume the error is irrelevant.
// A branch could update the address (and therefore data_pmp_err_i) on the cycle a request is
// issued, in which case we must ignore the registered version.
assign gnt_or_pmp_err = instr_gnt_i | (pmp_err_q & ~branch_i);
assign gnt_or_pmp_err = instr_gnt_i | instr_pmp_err_i;
// As with the grant, the rvalid must be faked for a PMP error, since the request was suppressed.
// Since the pmp_err_q flop is only updated when the address updates, it will always point to the
// PMP error status of the oldest outstanding request
assign rvalid_or_pmp_err = rdata_outstanding_q[0] & (instr_rvalid_i | pmp_err_q);
assign rvalid_or_pmp_err = rdata_outstanding_q[0] & (instr_rvalid_i | rdata_pmp_err_q[0]);
// Hold the request stable for requests that didn't get granted
assign valid_req_d = valid_req & ~instr_gnt_i;
assign valid_req_d = valid_req & ~gnt_or_pmp_err;
// Hold the address stable for requests that couldn't be issued, or didn't get granted.
// This is different to valid_req_q since there are cases where we must use addr+4 for
// an ungranted request rather than addr_q (where addr_q has not been updated).
assign hold_addr_d = (branch_i | hold_addr_q) & ~(valid_req & instr_gnt_i);
// Record whether an outstanding bus request is cancelled by a branch
assign discard_req_d = valid_req_q & (branch_i | discard_req_q);
////////////////
// Fetch addr //
////////////////
// The address flop is used to hold the address steady for ungranted requests and also to
// push the address to the FIFO for completed requests. For this reason, the address is only
// updated once a request is the oldest outstanding to ensure that newer requests do not
// overwrite the addresses of older ones. Branches are an exception to this, since all older
// addresses will be discarded due to the branch.
// Two addresses are tracked in the prefetch buffer:
// 1. stored_addr_q - This is the address issued on the bus. It stays stable until
// the request is granted.
// 2. fetch_addr_q - This is our next address to fetch from. It is updated on branches to
// capture the new address, and then for each new request issued.
// A third address is tracked in the fetch FIFO itself:
// 3. instr_addr_q - This is the address at the head of the FIFO, efectively our oldest fetched
// address. This address is updated on branches, and does its own increment
// each time the FIFO is popped.
// Update the addr_q flop on any branch, or
assign addr_valid = branch_i |
// A new request which will be the oldest, or
(valid_req & instr_gnt_i & ~rdata_outstanding_q[0]) |
// each time a valid request becomes the oldest
(rvalid_or_pmp_err & ~branch_abort_q[0] &
((valid_req & instr_gnt_i) | rdata_outstanding_q[1]));
// 1. stored_addr_q
// Fetch the next word-aligned instruction address
assign fetch_addr = {instr_addr_q[31:2], 2'b00} + 32'd4;
// Only update stored_addr_q for new ungranted requests
assign stored_addr_en = valid_new_req & ~valid_req_q & ~gnt_or_pmp_err;
// Store whatever address was issued on the bus
assign stored_addr_d = instr_addr;
// CPU resets with a branch, so no need to reset these addresses
always_ff @(posedge clk_i) begin
if (stored_addr_en) begin
stored_addr_q <= stored_addr_d;
end
end
// 2. fetch_addr_q
// Update on a branch or as soon as a request is issued
assign fetch_addr_en = branch_i | (valid_new_req & ~valid_req_q);
assign fetch_addr_d = (branch_i ? addr_i :
{fetch_addr_q[31:2], 2'b00}) +
// Current address + 4
{{29{1'b0}},(valid_new_req & ~valid_req_q),2'b00};
always_ff @(posedge clk_i) begin
if (fetch_addr_en) begin
fetch_addr_q <= fetch_addr_d;
end
end
// Address mux
assign instr_addr = branch_i ? addr_i :
hold_addr_q ? instr_addr_q :
fetch_addr;
assign instr_addr = valid_req_q ? stored_addr_q :
branch_i ? addr_i :
fetch_addr_q;
assign instr_addr_w_aligned = {instr_addr[31:2], 2'b00};
@ -161,31 +181,43 @@ module ibex_prefetch_buffer (
// Request 0 (always the oldest outstanding request)
if (i == 0) begin : g_req0
// A request becomes outstanding once granted, and is cleared once the rvalid is received.
// Outstanding requests shift down the queue towards entry 0. Entry 0 considers the PMP
// error cases while newer entries do not (pmp_err_q is only valid for entry 0)
// Outstanding requests shift down the queue towards entry 0.
assign rdata_outstanding_n[i] = (valid_req & gnt_or_pmp_err) |
rdata_outstanding_q[i];
// If a branch is received at any point while a request is outstanding, it must be tracked
// to ensure we discard the data once received
assign branch_abort_n[i] = (branch_i & rdata_outstanding_q[i]) | branch_abort_q[i];
assign branch_discard_n[i] = (valid_req & gnt_or_pmp_err & discard_req_d) |
(branch_i & rdata_outstanding_q[i]) | branch_discard_q[i];
// Record whether this request received a PMP error
assign rdata_pmp_err_n[i] = (valid_req & ~rdata_outstanding_q[i] & instr_pmp_err_i) |
rdata_pmp_err_q[i];
end else begin : g_reqtop
// Entries > 0 consider the FIFO fill state to calculate their next state (by checking
// whether the previous entry is valid)
assign rdata_outstanding_n[i] = (valid_req & instr_gnt_i &
(&rdata_outstanding_q[i-1:0])) |
assign rdata_outstanding_n[i] = (valid_req & gnt_or_pmp_err &
rdata_outstanding_q[i-1]) |
rdata_outstanding_q[i];
assign branch_abort_n[i] = (branch_i & rdata_outstanding_q[i]) | branch_abort_q[i];
assign branch_discard_n[i] = (valid_req & gnt_or_pmp_err & discard_req_d &
rdata_outstanding_q[i-1]) |
(branch_i & rdata_outstanding_q[i]) | branch_discard_q[i];
assign rdata_pmp_err_n[i] = (valid_req & ~rdata_outstanding_q[i] & instr_pmp_err_i &
rdata_outstanding_q[i-1]) |
rdata_pmp_err_q[i];
end
end
// Shift the entries down on each instr_rvalid_i
assign rdata_outstanding_s = rvalid_or_pmp_err ? {1'b0,rdata_outstanding_n[NUM_REQS-1:1]} :
rdata_outstanding_n;
assign branch_abort_s = rvalid_or_pmp_err ? {1'b0,branch_abort_n[NUM_REQS-1:1]} :
branch_abort_n;
assign branch_discard_s = rvalid_or_pmp_err ? {1'b0,branch_discard_n[NUM_REQS-1:1]} :
branch_discard_n;
assign rdata_pmp_err_s = rvalid_or_pmp_err ? {1'b0,rdata_pmp_err_n[NUM_REQS-1:1]} :
rdata_pmp_err_n;
// Push a new entry to the FIFO once complete (and not aborted by a branch)
assign fifo_valid = rvalid_or_pmp_err & ~branch_abort_q[0];
// Push a new entry to the FIFO once complete (and not cancelled by a branch)
assign fifo_valid = rvalid_or_pmp_err & ~branch_discard_q[0];
///////////////
// Registers //
@ -194,22 +226,16 @@ module ibex_prefetch_buffer (
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
valid_req_q <= 1'b0;
hold_addr_q <= 1'b0;
discard_req_q <= 1'b0;
rdata_outstanding_q <= 'b0;
branch_abort_q <= 'b0;
branch_discard_q <= 'b0;
rdata_pmp_err_q <= 'b0;
end else begin
valid_req_q <= valid_req_d;
hold_addr_q <= hold_addr_d;
discard_req_q <= discard_req_d;
rdata_outstanding_q <= rdata_outstanding_s;
branch_abort_q <= branch_abort_s;
end
end
// CPU resets with a branch, so no need to reset these
always_ff @(posedge clk_i) begin
if (addr_valid) begin
instr_addr_q <= instr_addr;
pmp_err_q <= instr_pmp_err_i;
branch_discard_q <= branch_discard_s;
rdata_pmp_err_q <= rdata_pmp_err_s;
end
end
@ -217,18 +243,7 @@ module ibex_prefetch_buffer (
// Outputs //
/////////////
assign instr_req_o = valid_req;
assign instr_addr_o = instr_addr_w_aligned;
////////////////
// Assertions //
////////////////
`ifndef VERILATOR
// Code changes required to support > 2 outstanding requests
assert property (
@(posedge clk_i) disable iff (!rst_ni)
(NUM_REQS <= 2) );
`endif
assign instr_req_o = valid_req;
assign instr_addr_o = instr_addr_w_aligned;
endmodule