cva6/core/frontend/frontend.sv
AngelaGonzalezMarino c389382c89
Altera opt 2 (#2602)
The second optimization for Altera FPGA is to move the BHT to LUTRAM. Same as before, the reason why the optimization previously done for Xilinx is not working, is that in that case asynchronous RAM primitives are used, and Altera does not support asynchronous RAM. Therefore, this optimization consists in using synchronous RAM for the BHT.

The main changes to the existing code are:

New RAM module to infer synchronous RAM in altera with 2 independent read ports and one write port (SyncThreePortRam.sv)

Changes in the frontend.sv file: modify input to vpc_i port of BHT, by advancing the address to read, in order to compensate for the delay of synchronous RAM.

Changes in the bht.sv file: This case is more complex because of the logic operations that need to be performed inside the BHT. First, the pc pointed by bht_update_i is read from the memory, modified according to the saturation counter and valid bit, and finally written again in the memory. The prediction output is given based on the vpc_i. With asynchronous memory, the new data written via update_i is available one clock cycle after writing it. So, if vpc_i tries to read the address that was previously written by update_i, everything is fine. However, in the case of synchronous memory there are three clock cycles of latency (one for reading the pc content (read port 1), another one for writing it, and another one for reading in the other port (read port 0)). For this reason, there is the need to adapt the design to these new latency constraints:

First, there is the need for a delay on the address write of the synchronous RAM, to wait for the previous pc read and store the right modified data.

Once this is solved, similarly to the FIFO case, there is the need for an auxiliary buffer that will store the data written in the FIFO, allowing to have it available 2 clock cycles after the update_i was valid. This is because after having the correct data, the RAM takes 2 clock cycles until data can be available in the output (one clock cycle for writing and one for reading).

Finally, there is a multiplexer in the output that permits to deliver the correct prediction providing the data from the update logic (1 cycle of delay), the auxiliary register (2 cycles of delay), or the RAM (3 or more cycles of delay), depending on the delay since the update_i was valid (i.e. written to the memory).
2024-11-21 23:36:18 +01:00

586 lines
23 KiB
Systemverilog

// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.02.2018
// Description: Ariane Instruction Fetch Frontend
//
// This module interfaces with the instruction cache, handles control
// change request from the back-end and does branch prediction.
module frontend
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type bp_resolve_t = logic,
parameter type fetch_entry_t = logic,
parameter type icache_dreq_t = logic,
parameter type icache_drsp_t = logic
) (
// Subsystem Clock - SUBSYSTEM
input logic clk_i,
// Asynchronous reset active low - SUBSYSTEM
input logic rst_ni,
// Next PC when reset - SUBSYSTEM
input logic [CVA6Cfg.VLEN-1:0] boot_addr_i,
// Flush branch prediction - zero
input logic flush_bp_i,
// Flush requested by FENCE, mis-predict and exception - CONTROLLER
input logic flush_i,
// Halt requested by WFI and Accelerate port - CONTROLLER
input logic halt_i,
// Set COMMIT PC as next PC requested by FENCE, CSR side-effect and Accelerate port - CONTROLLER
input logic set_pc_commit_i,
// COMMIT PC - COMMIT
input logic [CVA6Cfg.VLEN-1:0] pc_commit_i,
// Exception event - COMMIT
input logic ex_valid_i,
// Mispredict event and next PC - EXECUTE
input bp_resolve_t resolved_branch_i,
// Return from exception event - CSR
input logic eret_i,
// Next PC when returning from exception - CSR
input logic [CVA6Cfg.VLEN-1:0] epc_i,
// Next PC when jumping into exception - CSR
input logic [CVA6Cfg.VLEN-1:0] trap_vector_base_i,
// Debug event - CSR
input logic set_debug_pc_i,
// Debug mode state - CSR
input logic debug_mode_i,
// Handshake between CACHE and FRONTEND (fetch) - CACHES
output icache_dreq_t icache_dreq_o,
// Handshake between CACHE and FRONTEND (fetch) - CACHES
input icache_drsp_t icache_dreq_i,
// Handshake's data between fetch and decode - ID_STAGE
output fetch_entry_t [CVA6Cfg.NrIssuePorts-1:0] fetch_entry_o,
// Handshake's valid between fetch and decode - ID_STAGE
output logic [CVA6Cfg.NrIssuePorts-1:0] fetch_entry_valid_o,
// Handshake's ready between fetch and decode - ID_STAGE
input logic [CVA6Cfg.NrIssuePorts-1:0] fetch_entry_ready_i
);
localparam type bht_update_t = struct packed {
logic valid;
logic [CVA6Cfg.VLEN-1:0] pc; // update at PC
logic taken;
};
localparam type btb_prediction_t = struct packed {
logic valid;
logic [CVA6Cfg.VLEN-1:0] target_address;
};
localparam type btb_update_t = struct packed {
logic valid;
logic [CVA6Cfg.VLEN-1:0] pc; // update at PC
logic [CVA6Cfg.VLEN-1:0] target_address;
};
localparam type ras_t = struct packed {
logic valid;
logic [CVA6Cfg.VLEN-1:0] ra;
};
// Instruction Cache Registers, from I$
logic [ CVA6Cfg.FETCH_WIDTH-1:0] icache_data_q;
logic icache_valid_q;
ariane_pkg::frontend_exception_t icache_ex_valid_q;
logic [ CVA6Cfg.VLEN-1:0] icache_vaddr_q;
logic [ CVA6Cfg.GPLEN-1:0] icache_gpaddr_q;
logic [ 31:0] icache_tinst_q;
logic icache_gva_q;
logic instr_queue_ready;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0] instr_queue_consumed;
// upper-most branch-prediction from last cycle
btb_prediction_t btb_q;
bht_prediction_t bht_q;
// instruction fetch is ready
logic if_ready;
logic [CVA6Cfg.VLEN-1:0] npc_d, npc_q; // next PC
// indicates whether we come out of reset (then we need to load boot_addr_i)
logic npc_rst_load_q;
logic replay;
logic [ CVA6Cfg.VLEN-1:0] replay_addr;
// shift amount
logic [$clog2(CVA6Cfg.INSTR_PER_FETCH)-1:0] shamt;
// address will always be 16 bit aligned, make this explicit here
if (CVA6Cfg.RVC) begin : gen_shamt
assign shamt = icache_dreq_i.vaddr[$clog2(CVA6Cfg.INSTR_PER_FETCH):1];
end else begin
assign shamt = 1'b0;
end
// -----------------------
// Ctrl Flow Speculation
// -----------------------
// RVI ctrl flow prediction
logic [CVA6Cfg.INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch, rvi_jalr, rvi_jump;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0][CVA6Cfg.VLEN-1:0] rvi_imm;
// RVC branching
logic [CVA6Cfg.INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0][CVA6Cfg.VLEN-1:0] rvc_imm;
// re-aligned instruction and address (coming from cache - combinationally)
logic [CVA6Cfg.INSTR_PER_FETCH-1:0][ 31:0] instr;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0][CVA6Cfg.VLEN-1:0] addr;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0] instruction_valid;
// BHT, BTB and RAS prediction
bht_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] bht_prediction;
btb_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] btb_prediction;
bht_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] bht_prediction_shifted;
btb_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] btb_prediction_shifted;
ras_t ras_predict;
logic [ CVA6Cfg.VLEN-1:0] vpc_btb;
logic [ CVA6Cfg.VLEN-1:0] vpc_bht;
// branch-predict update
logic is_mispredict;
logic ras_push, ras_pop;
logic [ CVA6Cfg.VLEN-1:0] ras_update;
// Instruction FIFO
logic [ CVA6Cfg.VLEN-1:0] predict_address;
cf_t [CVA6Cfg.INSTR_PER_FETCH-1:0] cf_type;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0] taken_rvi_cf;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0] taken_rvc_cf;
logic serving_unaligned;
// Re-align instructions
instr_realign #(
.CVA6Cfg(CVA6Cfg)
) i_instr_realign (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (icache_dreq_o.kill_s2),
.valid_i (icache_valid_q),
.serving_unaligned_o(serving_unaligned),
.address_i (icache_vaddr_q),
.data_i (icache_data_q),
.valid_o (instruction_valid),
.addr_o (addr),
.instr_o (instr)
);
// --------------------
// Branch Prediction
// --------------------
// select the right branch prediction result
// in case we are serving an unaligned instruction in instr[0] we need to take
// the prediction we saved from the previous fetch
if (CVA6Cfg.RVC) begin : gen_btb_prediction_shifted
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][$clog2(
CVA6Cfg.INSTR_PER_FETCH
):1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][$clog2(
CVA6Cfg.INSTR_PER_FETCH
):1]];
// for all other predictions we can use the generated address to index
// into the branch prediction data structures
for (genvar i = 1; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_prediction_address
assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(CVA6Cfg.INSTR_PER_FETCH):1]];
assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(CVA6Cfg.INSTR_PER_FETCH):1]];
end
end else begin
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][1]];
end
;
// for the return address stack it doens't matter as we have the
// address of the call/return already
logic bp_valid;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0] is_branch;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0] is_call;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0] is_jump;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0] is_return;
logic [CVA6Cfg.INSTR_PER_FETCH-1:0] is_jalr;
for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin
// branch history table -> BHT
assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]);
// function calls -> RAS
assign is_call[i] = instruction_valid[i] & (rvi_call[i] | rvc_call[i]);
// function return -> RAS
assign is_return[i] = instruction_valid[i] & (rvi_return[i] | rvc_return[i]);
// unconditional jumps with known target -> immediately resolved
assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]);
// unconditional jumps with unknown target -> BTB
assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
end
// taken/not taken
always_comb begin
taken_rvi_cf = '0;
taken_rvc_cf = '0;
predict_address = '0;
for (int i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF;
ras_push = 1'b0;
ras_pop = 1'b0;
ras_update = '0;
// lower most prediction gets precedence
for (int i = CVA6Cfg.INSTR_PER_FETCH - 1; i >= 0; i--) begin
unique case ({
is_branch[i], is_return[i], is_jump[i], is_jalr[i]
})
4'b0000: ; // regular instruction e.g.: no branch
// unconditional jump to register, we need the BTB to resolve this
4'b0001: begin
ras_pop = 1'b0;
ras_push = 1'b0;
if (CVA6Cfg.BTBEntries && btb_prediction_shifted[i].valid) begin
predict_address = btb_prediction_shifted[i].target_address;
cf_type[i] = ariane_pkg::JumpR;
end
end
// its an unconditional jump to an immediate
4'b0010: begin
ras_pop = 1'b0;
ras_push = 1'b0;
taken_rvi_cf[i] = rvi_jump[i];
taken_rvc_cf[i] = rvc_jump[i];
cf_type[i] = ariane_pkg::Jump;
end
// return
4'b0100: begin
// make sure to only alter the RAS if we actually consumed the instruction
ras_pop = ras_predict.valid & instr_queue_consumed[i];
ras_push = 1'b0;
predict_address = ras_predict.ra;
cf_type[i] = ariane_pkg::Return;
end
// branch prediction
4'b1000: begin
ras_pop = 1'b0;
ras_push = 1'b0;
// if we have a valid dynamic prediction use it
if (bht_prediction_shifted[i].valid) begin
taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken;
taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken;
// otherwise default to static prediction
end else begin
// set if immediate is negative - static prediction
taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][CVA6Cfg.VLEN-1];
taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][CVA6Cfg.VLEN-1];
end
if (taken_rvi_cf[i] || taken_rvc_cf[i]) begin
cf_type[i] = ariane_pkg::Branch;
end
end
default: ;
// default: $error("Decoded more than one control flow");
endcase
// if this instruction, in addition, is a call, save the resulting address
// but only if we actually consumed the address
if (is_call[i]) begin
ras_push = instr_queue_consumed[i];
ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
end
// calculate the jump target address
if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin
predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
end
end
end
// or reduce struct
always_comb begin
bp_valid = 1'b0;
// BP cannot be valid if we have a return instruction and the RAS is not giving a valid address
// Check that we encountered a control flow and that for a return the RAS
// contains a valid prediction.
for (int i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++)
bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid));
end
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
// Cache interface
assign icache_dreq_o.req = instr_queue_ready;
assign if_ready = icache_dreq_i.ready & instr_queue_ready;
// We need to flush the cache pipeline if:
// 1. We mispredicted
// 2. Want to flush the whole processor front-end
// 3. Need to replay an instruction because the fetch-fifo was full
assign icache_dreq_o.kill_s1 = is_mispredict | flush_i | replay;
// if we have a valid branch-prediction we need to only kill the last cache request
// also if we killed the first stage we also need to kill the second stage (inclusive flush)
assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
// Update Control Flow Predictions
bht_update_t bht_update;
btb_update_t btb_update;
// assert on branch, deassert when resolved
logic speculative_q, speculative_d;
assign speculative_d = (speculative_q && !resolved_branch_i.valid || |is_branch || |is_return || |is_jalr) && !flush_i;
assign icache_dreq_o.spec = speculative_d;
assign bht_update.valid = resolved_branch_i.valid
& (resolved_branch_i.cf_type == ariane_pkg::Branch);
assign bht_update.pc = resolved_branch_i.pc;
assign bht_update.taken = resolved_branch_i.is_taken;
// only update mispredicted branches e.g. no returns from the RAS
assign btb_update.valid = resolved_branch_i.valid
& resolved_branch_i.is_mispredict
& (resolved_branch_i.cf_type == ariane_pkg::JumpR);
assign btb_update.pc = resolved_branch_i.pc;
assign btb_update.target_address = resolved_branch_i.target_address;
// -------------------
// Next PC
// -------------------
// next PC (NPC) can come from (in order of precedence):
// 0. Default assignment/replay instruction
// 1. Branch Predict taken
// 2. Control flow change request (misprediction)
// 3. Return from environment call
// 4. Exception/Interrupt
// 5. Pipeline Flush because of CSR side effects
// Mis-predict handling is a little bit different
// select PC a.k.a PC Gen
always_comb begin : npc_select
automatic logic [CVA6Cfg.VLEN-1:0] fetch_address;
// check whether we come out of reset
// this is a workaround. some tools have issues
// having boot_addr_i in the asynchronous
// reset assignment to npc_q, even though
// boot_addr_i will be assigned a constant
// on the top-level.
if (npc_rst_load_q) begin
npc_d = boot_addr_i;
fetch_address = boot_addr_i;
end else begin
fetch_address = npc_q;
// keep stable by default
npc_d = npc_q;
end
// 0. Branch Prediction
if (bp_valid) begin
fetch_address = predict_address;
npc_d = predict_address;
end
// 1. Default assignment
if (if_ready) begin
npc_d = {
fetch_address[CVA6Cfg.VLEN-1:CVA6Cfg.FETCH_ALIGN_BITS] + 1, {CVA6Cfg.FETCH_ALIGN_BITS{1'b0}}
};
end
// 2. Replay instruction fetch
if (replay) begin
npc_d = replay_addr;
end
// 3. Control flow change request
if (is_mispredict) begin
npc_d = resolved_branch_i.target_address;
end
// 4. Return from environment call
if (eret_i) begin
npc_d = epc_i;
end
// 5. Exception/Interrupt
if (ex_valid_i) begin
npc_d = trap_vector_base_i;
end
// 6. Pipeline Flush because of CSR side effects
// On a pipeline flush start fetching from the next address
// of the instruction in the commit stage
// we either came here from a flush request of a CSR instruction or AMO,
// so as CSR or AMO instructions do not exist in a compressed form
// we can unconditionally do PC + 4 here
// or if the commit stage is halted, just take the current pc of the
// instruction in the commit stage
// TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
if (set_pc_commit_i) begin
npc_d = pc_commit_i + (halt_i ? '0 : {{CVA6Cfg.VLEN - 3{1'b0}}, 3'b100});
end
// 7. Debug
// enter debug on a hard-coded base-address
if (CVA6Cfg.DebugEn && set_debug_pc_i)
npc_d = CVA6Cfg.DmBaseAddress[CVA6Cfg.VLEN-1:0] + CVA6Cfg.HaltAddress[CVA6Cfg.VLEN-1:0];
icache_dreq_o.vaddr = fetch_address;
end
logic [CVA6Cfg.FETCH_WIDTH-1:0] icache_data;
// re-align the cache line
assign icache_data = icache_dreq_i.data >> {shamt, 4'b0};
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
npc_rst_load_q <= 1'b1;
npc_q <= '0;
speculative_q <= '0;
icache_data_q <= '0;
icache_valid_q <= 1'b0;
icache_vaddr_q <= 'b0;
icache_gpaddr_q <= 'b0;
icache_tinst_q <= 'b0;
icache_gva_q <= 1'b0;
icache_ex_valid_q <= ariane_pkg::FE_NONE;
btb_q <= '0;
bht_q <= '0;
end else begin
npc_rst_load_q <= 1'b0;
npc_q <= npc_d;
speculative_q <= speculative_d;
icache_valid_q <= icache_dreq_i.valid;
if (icache_dreq_i.valid) begin
icache_data_q <= icache_data;
icache_vaddr_q <= icache_dreq_i.vaddr;
if (CVA6Cfg.RVH) begin
icache_gpaddr_q <= icache_dreq_i.ex.tval2[CVA6Cfg.GPLEN-1:0];
icache_tinst_q <= icache_dreq_i.ex.tinst;
icache_gva_q <= icache_dreq_i.ex.gva;
end else begin
icache_gpaddr_q <= 'b0;
icache_tinst_q <= 'b0;
icache_gva_q <= 1'b0;
end
// Map the only three exceptions which can occur in the frontend to a two bit enum
if (CVA6Cfg.MmuPresent && icache_dreq_i.ex.cause == riscv::INSTR_GUEST_PAGE_FAULT) begin
icache_ex_valid_q <= ariane_pkg::FE_INSTR_GUEST_PAGE_FAULT;
end else if (CVA6Cfg.MmuPresent && icache_dreq_i.ex.cause == riscv::INSTR_PAGE_FAULT) begin
icache_ex_valid_q <= ariane_pkg::FE_INSTR_PAGE_FAULT;
end else if (icache_dreq_i.ex.cause == riscv::INSTR_ACCESS_FAULT) begin
icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT;
end else begin
icache_ex_valid_q <= ariane_pkg::FE_NONE;
end
// save the uppermost prediction
btb_q <= btb_prediction[CVA6Cfg.INSTR_PER_FETCH-1];
bht_q <= bht_prediction[CVA6Cfg.INSTR_PER_FETCH-1];
end
end
end
if (CVA6Cfg.RASDepth == 0) begin
assign ras_predict = '0;
end else begin : ras_gen
ras #(
.CVA6Cfg(CVA6Cfg),
.ras_t (ras_t),
.DEPTH (CVA6Cfg.RASDepth)
) i_ras (
.clk_i,
.rst_ni,
.flush_bp_i(flush_bp_i),
.push_i(ras_push),
.pop_i(ras_pop),
.data_i(ras_update),
.data_o(ras_predict)
);
end
//For FPGA, BTB is implemented in read synchronous BRAM
//while for ASIC, BTB is implemented in D flip-flop
//and can be read at the same cycle.
//Same for BHT
assign vpc_btb = (CVA6Cfg.FpgaEn) ? icache_dreq_i.vaddr : icache_vaddr_q;
assign vpc_bht = (CVA6Cfg.FpgaEn && CVA6Cfg.FpgaAlteraEn && icache_dreq_i.valid) ? icache_dreq_i.vaddr : icache_vaddr_q;
if (CVA6Cfg.BTBEntries == 0) begin
assign btb_prediction = '0;
end else begin : btb_gen
btb #(
.CVA6Cfg (CVA6Cfg),
.btb_update_t(btb_update_t),
.btb_prediction_t(btb_prediction_t),
.NR_ENTRIES(CVA6Cfg.BTBEntries)
) i_btb (
.clk_i,
.rst_ni,
.flush_bp_i (flush_bp_i),
.debug_mode_i,
.vpc_i (vpc_btb),
.btb_update_i (btb_update),
.btb_prediction_o(btb_prediction)
);
end
if (CVA6Cfg.BHTEntries == 0) begin
assign bht_prediction = '0;
end else begin : bht_gen
bht #(
.CVA6Cfg (CVA6Cfg),
.bht_update_t(bht_update_t),
.NR_ENTRIES(CVA6Cfg.BHTEntries)
) i_bht (
.clk_i,
.rst_ni,
.flush_bp_i (flush_bp_i),
.debug_mode_i,
.vpc_i (vpc_bht),
.bht_update_i (bht_update),
.bht_prediction_o(bht_prediction)
);
end
// we need to inspect up to CVA6Cfg.INSTR_PER_FETCH instructions for branches
// and jumps
for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_instr_scan
instr_scan #(
.CVA6Cfg(CVA6Cfg)
) i_instr_scan (
.instr_i (instr[i]),
.rvi_return_o(rvi_return[i]),
.rvi_call_o (rvi_call[i]),
.rvi_branch_o(rvi_branch[i]),
.rvi_jalr_o (rvi_jalr[i]),
.rvi_jump_o (rvi_jump[i]),
.rvi_imm_o (rvi_imm[i]),
.rvc_branch_o(rvc_branch[i]),
.rvc_jump_o (rvc_jump[i]),
.rvc_jr_o (rvc_jr[i]),
.rvc_return_o(rvc_return[i]),
.rvc_jalr_o (rvc_jalr[i]),
.rvc_call_o (rvc_call[i]),
.rvc_imm_o (rvc_imm[i])
);
end
instr_queue #(
.CVA6Cfg(CVA6Cfg),
.fetch_entry_t(fetch_entry_t)
) i_instr_queue (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.instr_i (instr), // from re-aligner
.addr_i (addr), // from re-aligner
.exception_i (icache_ex_valid_q), // from I$
.exception_addr_i (icache_vaddr_q),
.exception_gpaddr_i (icache_gpaddr_q),
.exception_tinst_i (icache_tinst_q),
.exception_gva_i (icache_gva_q),
.predict_address_i (predict_address),
.cf_type_i (cf_type),
.valid_i (instruction_valid), // from re-aligner
.consumed_o (instr_queue_consumed),
.ready_o (instr_queue_ready),
.replay_o (replay),
.replay_addr_o (replay_addr),
.fetch_entry_o (fetch_entry_o), // to back-end
.fetch_entry_valid_o(fetch_entry_valid_o), // to back-end
.fetch_entry_ready_i(fetch_entry_ready_i) // to back-end
);
// pragma translate_off
`ifndef VERILATOR
initial begin
assert (CVA6Cfg.FETCH_WIDTH == 32 || CVA6Cfg.FETCH_WIDTH == 64)
else $fatal(1, "[frontend] fetch width != not supported");
end
`endif
// pragma translate_on
endmodule