mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-04-23 13:47:13 -04:00
193 lines
7.8 KiB
Systemverilog
193 lines
7.8 KiB
Systemverilog
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
|
|
// Copyright and related rights are licensed under the Solderpad Hardware
|
|
// License, Version 2.0 (the "License"); you may not use this file except in
|
|
// compliance with the License. You may obtain a copy of the License at
|
|
// http://solderpad.org/licenses/SHL-2.0. Unless required by applicable law
|
|
// or agreed to in writing, software, hardware and materials distributed under
|
|
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
|
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations under the License.
|
|
//
|
|
// Author: Florian Zaruba, ETH Zurich
|
|
// Date: 08.02.2018
|
|
// Migrated: Luis Vitorio Cargnini, IEEE
|
|
// Date: 09.06.2018
|
|
//
|
|
// Additional contributions by:
|
|
// Sebastien Jacq, Thales - sjthales on github.com
|
|
// Date: 2022-12-01
|
|
//
|
|
// Description: This module is an adaptation of the BTB (Branch Target Buffer)
|
|
// module both FPGA and ASIC targets.
|
|
// Prediction target address is stored in BRAM on FPGA while for
|
|
// original module, target address is stored in D flip-flop.
|
|
// For FPGA flushing is not supported because the frontend module
|
|
// flushing signal is not connected.
|
|
//
|
|
// branch target buffer
|
|
module btb #(
|
|
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
|
|
parameter type btb_update_t = logic,
|
|
parameter type btb_prediction_t = logic,
|
|
parameter int NR_ENTRIES = 8
|
|
) (
|
|
// Subsystem Clock - SUBSYSTEM
|
|
input logic clk_i,
|
|
// Asynchronous reset active low - SUBSYSTEM
|
|
input logic rst_ni,
|
|
// Branch prediction flush request - zero
|
|
input logic flush_bp_i,
|
|
// Debug mode state - CSR
|
|
input logic debug_mode_i,
|
|
// Virtual PC - CACHE
|
|
input logic [CVA6Cfg.VLEN-1:0] vpc_i,
|
|
// Update BTB with resolved address - EXECUTE
|
|
input btb_update_t btb_update_i,
|
|
// BTB Prediction - FRONTEND
|
|
output btb_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] btb_prediction_o
|
|
);
|
|
// the last bit is always zero, we don't need it for indexing
|
|
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
|
|
// re-shape the branch history table
|
|
localparam NR_ROWS = NR_ENTRIES / CVA6Cfg.INSTR_PER_FETCH;
|
|
// number of bits needed to index the row
|
|
localparam ROW_ADDR_BITS = $clog2(CVA6Cfg.INSTR_PER_FETCH);
|
|
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(CVA6Cfg.INSTR_PER_FETCH) : 1;
|
|
// number of bits we should use for prediction
|
|
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
|
|
// prevent aliasing to degrade performance
|
|
localparam ANTIALIAS_BITS = 8;
|
|
// number of bits par word in the bram
|
|
localparam BRAM_WORD_BITS = $bits(btb_prediction_t);
|
|
// we are not interested in all bits of the address
|
|
unread i_unread (.d_i(|vpc_i));
|
|
|
|
|
|
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
|
|
logic [ROW_INDEX_BITS-1:0] update_row_index;
|
|
|
|
assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
|
|
assign update_pc = btb_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
|
|
if (CVA6Cfg.RVC) begin : gen_update_row_index
|
|
assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
|
|
end else begin
|
|
assign update_row_index = '0;
|
|
end
|
|
|
|
if (CVA6Cfg.FpgaEn) begin : gen_fpga_btb //FPGA TARGETS
|
|
logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_csel_prediction;
|
|
logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_we_prediction;
|
|
logic [CVA6Cfg.INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction;
|
|
logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction;
|
|
logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction;
|
|
|
|
logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_csel_update;
|
|
logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_we_update;
|
|
logic [CVA6Cfg.INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update;
|
|
logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update;
|
|
|
|
// output matching prediction
|
|
for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_btb_output
|
|
assign btb_ram_csel_prediction[i] = 1'b1;
|
|
assign btb_ram_we_prediction[i] = 1'b0;
|
|
assign btb_ram_wdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = '0;
|
|
assign btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
|
|
assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS];
|
|
end
|
|
|
|
// -------------------------
|
|
// Update Branch Prediction
|
|
// -------------------------
|
|
// update on a mis-predict
|
|
always_comb begin : update_branch_predict
|
|
btb_ram_csel_update = '0;
|
|
btb_ram_we_update = '0;
|
|
btb_ram_addr_update = '0;
|
|
btb_ram_wdata_update = '0;
|
|
|
|
if (btb_update_i.valid && !debug_mode_i) begin
|
|
for (int i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin
|
|
if (update_row_index == i) begin
|
|
btb_ram_csel_update[i] = 1'b1;
|
|
btb_ram_we_update[i] = 1'b1;
|
|
btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
|
|
btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
|
|
1'b1, btb_update_i.target_address
|
|
};
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_btb_ram
|
|
SyncDpRam #(
|
|
.ADDR_WIDTH($clog2(NR_ROWS)),
|
|
.DATA_DEPTH(NR_ROWS),
|
|
.DATA_WIDTH(BRAM_WORD_BITS),
|
|
.OUT_REGS (0),
|
|
.SIM_INIT (1)
|
|
) i_btb_ram (
|
|
.Clk_CI (clk_i),
|
|
.Rst_RBI (rst_ni),
|
|
//----------------------------
|
|
.CSelA_SI (btb_ram_csel_update[i]),
|
|
.WrEnA_SI (btb_ram_we_update[i]),
|
|
.AddrA_DI (btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
|
|
.WrDataA_DI(btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
|
|
.RdDataA_DO(),
|
|
//-----------------------------
|
|
.CSelB_SI (btb_ram_csel_prediction[i]),
|
|
.WrEnB_SI (btb_ram_we_prediction[i]),
|
|
.AddrB_DI (btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
|
|
.WrDataB_DI(btb_ram_wdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
|
|
.RdDataB_DO(btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
|
|
);
|
|
end
|
|
|
|
end else begin : gen_asic_btb // ASIC TARGET
|
|
|
|
// typedef for all branch target entries
|
|
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
|
|
btb_prediction_t
|
|
btb_d[NR_ROWS-1:0][CVA6Cfg.INSTR_PER_FETCH-1:0],
|
|
btb_q[NR_ROWS-1:0][CVA6Cfg.INSTR_PER_FETCH-1:0];
|
|
|
|
// output matching prediction
|
|
for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_btb_output
|
|
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
|
|
end
|
|
|
|
// -------------------------
|
|
// Update Branch Prediction
|
|
// -------------------------
|
|
// update on a mis-predict
|
|
always_comb begin : update_branch_predict
|
|
btb_d = btb_q;
|
|
|
|
if (btb_update_i.valid && !debug_mode_i) begin
|
|
btb_d[update_pc][update_row_index].valid = 1'b1;
|
|
// the target address is simply updated
|
|
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
|
|
end
|
|
end
|
|
|
|
// sequential process
|
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
|
if (!rst_ni) begin
|
|
// Bias the branches to be taken upon first arrival
|
|
for (int i = 0; i < NR_ROWS; i++) btb_q[i] <= '{default: 0};
|
|
end else begin
|
|
// evict all entries
|
|
if (flush_bp_i) begin
|
|
for (int i = 0; i < NR_ROWS; i++) begin
|
|
for (int j = 0; j < CVA6Cfg.INSTR_PER_FETCH; j++) begin
|
|
btb_q[i][j].valid <= 1'b0;
|
|
end
|
|
end
|
|
end else begin
|
|
btb_q <= btb_d;
|
|
end
|
|
end
|
|
end
|
|
end
|
|
endmodule
|