cva6/core/frontend/btb.sv
2024-10-02 23:31:40 +02:00

193 lines
7.8 KiB
Systemverilog

// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 2.0 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.02.2018
// Migrated: Luis Vitorio Cargnini, IEEE
// Date: 09.06.2018
//
// Additional contributions by:
// Sebastien Jacq, Thales - sjthales on github.com
// Date: 2022-12-01
//
// Description: This module is an adaptation of the BTB (Branch Target Buffer)
// module both FPGA and ASIC targets.
// Prediction target address is stored in BRAM on FPGA while for
// original module, target address is stored in D flip-flop.
// For FPGA flushing is not supported because the frontend module
// flushing signal is not connected.
//
// branch target buffer
module btb #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type btb_update_t = logic,
parameter type btb_prediction_t = logic,
parameter int NR_ENTRIES = 8
) (
// Subsystem Clock - SUBSYSTEM
input logic clk_i,
// Asynchronous reset active low - SUBSYSTEM
input logic rst_ni,
// Branch prediction flush request - zero
input logic flush_bp_i,
// Debug mode state - CSR
input logic debug_mode_i,
// Virtual PC - CACHE
input logic [CVA6Cfg.VLEN-1:0] vpc_i,
// Update BTB with resolved address - EXECUTE
input btb_update_t btb_update_i,
// BTB Prediction - FRONTEND
output btb_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] btb_prediction_o
);
// the last bit is always zero, we don't need it for indexing
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
// re-shape the branch history table
localparam NR_ROWS = NR_ENTRIES / CVA6Cfg.INSTR_PER_FETCH;
// number of bits needed to index the row
localparam ROW_ADDR_BITS = $clog2(CVA6Cfg.INSTR_PER_FETCH);
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(CVA6Cfg.INSTR_PER_FETCH) : 1;
// number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
// prevent aliasing to degrade performance
localparam ANTIALIAS_BITS = 8;
// number of bits par word in the bram
localparam BRAM_WORD_BITS = $bits(btb_prediction_t);
// we are not interested in all bits of the address
unread i_unread (.d_i(|vpc_i));
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
logic [ROW_INDEX_BITS-1:0] update_row_index;
assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
assign update_pc = btb_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
if (CVA6Cfg.RVC) begin : gen_update_row_index
assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
end else begin
assign update_row_index = '0;
end
if (CVA6Cfg.FpgaEn) begin : gen_fpga_btb //FPGA TARGETS
logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_csel_prediction;
logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_we_prediction;
logic [CVA6Cfg.INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction;
logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction;
logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction;
logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_csel_update;
logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_we_update;
logic [CVA6Cfg.INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update;
logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update;
// output matching prediction
for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_ram_csel_prediction[i] = 1'b1;
assign btb_ram_we_prediction[i] = 1'b0;
assign btb_ram_wdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = '0;
assign btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS];
end
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_ram_csel_update = '0;
btb_ram_we_update = '0;
btb_ram_addr_update = '0;
btb_ram_wdata_update = '0;
if (btb_update_i.valid && !debug_mode_i) begin
for (int i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin
if (update_row_index == i) begin
btb_ram_csel_update[i] = 1'b1;
btb_ram_we_update[i] = 1'b1;
btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
1'b1, btb_update_i.target_address
};
end
end
end
end
for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_btb_ram
SyncDpRam #(
.ADDR_WIDTH($clog2(NR_ROWS)),
.DATA_DEPTH(NR_ROWS),
.DATA_WIDTH(BRAM_WORD_BITS),
.OUT_REGS (0),
.SIM_INIT (1)
) i_btb_ram (
.Clk_CI (clk_i),
.Rst_RBI (rst_ni),
//----------------------------
.CSelA_SI (btb_ram_csel_update[i]),
.WrEnA_SI (btb_ram_we_update[i]),
.AddrA_DI (btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.WrDataA_DI(btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdDataA_DO(),
//-----------------------------
.CSelB_SI (btb_ram_csel_prediction[i]),
.WrEnB_SI (btb_ram_we_prediction[i]),
.AddrB_DI (btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.WrDataB_DI(btb_ram_wdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdDataB_DO(btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
);
end
end else begin : gen_asic_btb // ASIC TARGET
// typedef for all branch target entries
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
btb_prediction_t
btb_d[NR_ROWS-1:0][CVA6Cfg.INSTR_PER_FETCH-1:0],
btb_q[NR_ROWS-1:0][CVA6Cfg.INSTR_PER_FETCH-1:0];
// output matching prediction
for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
end
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_d = btb_q;
if (btb_update_i.valid && !debug_mode_i) begin
btb_d[update_pc][update_row_index].valid = 1'b1;
// the target address is simply updated
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
// Bias the branches to be taken upon first arrival
for (int i = 0; i < NR_ROWS; i++) btb_q[i] <= '{default: 0};
end else begin
// evict all entries
if (flush_bp_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < CVA6Cfg.INSTR_PER_FETCH; j++) begin
btb_q[i][j].valid <= 1'b0;
end
end
end else begin
btb_q <= btb_d;
end
end
end
end
endmodule