mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-04-20 04:07:36 -04:00
BTB optimization for FPGA targets (#1016)
This commit is contained in:
parent
dc0ecfde0a
commit
6deffb27d7
11 changed files with 146 additions and 34 deletions
|
@ -89,6 +89,9 @@ ${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sq
|
|||
${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
|
||||
${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
|
||||
|
||||
//FPGA memories
|
||||
${CVA6_REPO_DIR}/common/local/techlib/fpga/rtl/SyncDpRam.sv
|
||||
|
||||
// Top-level source files (not necessarily instantiated at the top of the cva6).
|
||||
${CVA6_REPO_DIR}/core/ariane.sv
|
||||
${CVA6_REPO_DIR}/core/cva6.sv
|
||||
|
|
|
@ -12,7 +12,18 @@
|
|||
// Date: 08.02.2018
|
||||
// Migrated: Luis Vitorio Cargnini, IEEE
|
||||
// Date: 09.06.2018
|
||||
|
||||
//
|
||||
// Additional contributions by:
|
||||
// Sebastien Jacq, Thales - sjthales on github.com
|
||||
// Date: 2022-12-01
|
||||
//
|
||||
// Description: This module is an adaptation of the BTB (Branch Target Buffer)
|
||||
// module both FPGA and ASIC targets.
|
||||
// Prediction target address is stored in BRAM on FPGA while for
|
||||
// original module, target address is stored in D flip-flop.
|
||||
// For FPGA flushing is not supported because the frontend module
|
||||
// flushing signal is not connected.
|
||||
//
|
||||
// branch target buffer
|
||||
module btb #(
|
||||
parameter int NR_ENTRIES = 8
|
||||
|
@ -32,18 +43,17 @@ module btb #(
|
|||
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
|
||||
// number of bits needed to index the row
|
||||
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
|
||||
localparam ROW_INDEX_BITS = ariane_pkg::RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1; //1
|
||||
localparam ROW_INDEX_BITS = ariane_pkg::RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
|
||||
// number of bits we should use for prediction
|
||||
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
|
||||
// prevent aliasing to degrade performance
|
||||
localparam ANTIALIAS_BITS = 8;
|
||||
// number of bits par word in the bram
|
||||
localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t);
|
||||
// we are not interested in all bits of the address
|
||||
unread i_unread (.d_i(|vpc_i));
|
||||
|
||||
// typedef for all branch target entries
|
||||
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
|
||||
ariane_pkg::btb_prediction_t btb_d [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
|
||||
btb_q [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
|
||||
|
||||
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
|
||||
logic [ROW_INDEX_BITS-1:0] update_row_index;
|
||||
|
||||
|
@ -55,42 +65,118 @@ module btb #(
|
|||
assign update_row_index = '0;
|
||||
end
|
||||
|
||||
// output matching prediction
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
|
||||
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
|
||||
end
|
||||
if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction;
|
||||
|
||||
// -------------------------
|
||||
// Update Branch Prediction
|
||||
// -------------------------
|
||||
// update on a mis-predict
|
||||
always_comb begin : update_branch_predict
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update;
|
||||
|
||||
// output matching prediction
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
|
||||
assign btb_ram_csel_prediction[i] = 1'b1;
|
||||
assign btb_ram_we_prediction[i] = 1'b0;
|
||||
assign btb_ram_wdata_prediction = '0;
|
||||
assign btb_ram_addr_prediction[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = index;
|
||||
assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS];
|
||||
end
|
||||
|
||||
// -------------------------
|
||||
// Update Branch Prediction
|
||||
// -------------------------
|
||||
// update on a mis-predict
|
||||
always_comb begin : update_branch_predict
|
||||
btb_ram_csel_update = '0;
|
||||
btb_ram_we_update = '0;
|
||||
btb_ram_addr_update = '0;
|
||||
btb_ram_wdata_update = '0;
|
||||
|
||||
if (btb_update_i.valid && !debug_mode_i) begin
|
||||
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
|
||||
if (update_row_index == i) begin
|
||||
btb_ram_csel_update[i] = 1'b1;
|
||||
btb_ram_we_update[i] = 1'b1;
|
||||
btb_ram_addr_update[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = update_pc;
|
||||
btb_ram_wdata_update[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] = {1'b1 , btb_update_i.target_address};
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram
|
||||
SyncDpRam #(
|
||||
.ADDR_WIDTH($clog2(NR_ROWS)),
|
||||
.DATA_DEPTH(NR_ROWS),
|
||||
.DATA_WIDTH(BRAM_WORD_BITS),
|
||||
.OUT_REGS (0),
|
||||
.SIM_INIT (1)
|
||||
) i_btb_ram (
|
||||
.Clk_CI ( clk_i ),
|
||||
.Rst_RBI ( rst_ni ),
|
||||
//----------------------------
|
||||
.CSelA_SI ( btb_ram_csel_update[i] ),
|
||||
.WrEnA_SI ( btb_ram_we_update[i] ),
|
||||
.AddrA_DI ( btb_ram_addr_update[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
|
||||
.WrDataA_DI ( btb_ram_wdata_update[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
|
||||
.RdDataA_DO ( ),
|
||||
//-----------------------------
|
||||
.CSelB_SI ( btb_ram_csel_prediction[i] ),
|
||||
.WrEnB_SI ( btb_ram_we_prediction[i] ),
|
||||
.AddrB_DI ( btb_ram_addr_prediction[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
|
||||
.WrDataB_DI ( btb_ram_wdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
|
||||
.RdDataB_DO ( btb_ram_rdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] )
|
||||
);
|
||||
end
|
||||
|
||||
end else begin : gen_asic_btb // ASIC TARGET
|
||||
|
||||
// typedef for all branch target entries
|
||||
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
|
||||
ariane_pkg::btb_prediction_t btb_d [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
|
||||
btb_q [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
|
||||
|
||||
// output matching prediction
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
|
||||
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
|
||||
end
|
||||
|
||||
// -------------------------
|
||||
// Update Branch Prediction
|
||||
// -------------------------
|
||||
// update on a mis-predict
|
||||
always_comb begin : update_branch_predict
|
||||
btb_d = btb_q;
|
||||
|
||||
if (btb_update_i.valid && !debug_mode_i) begin
|
||||
btb_d[update_pc][update_row_index].valid = 1'b1;
|
||||
// the target address is simply updated
|
||||
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
|
||||
btb_d[update_pc][update_row_index].valid = 1'b1;
|
||||
// the target address is simply updated
|
||||
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// sequential process
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
// sequential process
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
// Bias the branches to be taken upon first arrival
|
||||
for (int i = 0; i < NR_ROWS; i++)
|
||||
btb_q[i] <= '{default: 0};
|
||||
// Bias the branches to be taken upon first arrival
|
||||
for (int i = 0; i < NR_ROWS; i++)
|
||||
btb_q[i] <= '{default: 0};
|
||||
end else begin
|
||||
// evict all entries
|
||||
if (flush_i) begin
|
||||
for (int i = 0; i < NR_ROWS; i++) begin
|
||||
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
|
||||
btb_q[i][j].valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
btb_q <= btb_d;
|
||||
// evict all entries
|
||||
if (flush_i) begin
|
||||
for (int i = 0; i < NR_ROWS; i++) begin
|
||||
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
|
||||
btb_q[i][j].valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
btb_q <= btb_d;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
endmodule
|
||||
|
|
|
@ -95,6 +95,7 @@ module frontend import ariane_pkg::*; #(
|
|||
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
|
||||
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
|
||||
ras_t ras_predict;
|
||||
logic [riscv::VLEN-1:0] vpc_btb;
|
||||
|
||||
// branch-predict update
|
||||
logic is_mispredict;
|
||||
|
@ -396,6 +397,11 @@ module frontend import ariane_pkg::*; #(
|
|||
.data_i ( ras_update ),
|
||||
.data_o ( ras_predict )
|
||||
);
|
||||
|
||||
//For FPGA, BTB is implemented in read synchronous BRAM
|
||||
//while for ASIC, BTB is implemented in D flip-flop
|
||||
//and can be read at the same cycle.
|
||||
assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q;
|
||||
|
||||
btb #(
|
||||
.NR_ENTRIES ( ArianeCfg.BTBEntries )
|
||||
|
@ -404,7 +410,7 @@ module frontend import ariane_pkg::*; #(
|
|||
.rst_ni,
|
||||
.flush_i ( flush_bp_i ),
|
||||
.debug_mode_i,
|
||||
.vpc_i ( icache_vaddr_q ),
|
||||
.vpc_i ( vpc_btb ),
|
||||
.btb_update_i ( btb_update ),
|
||||
.btb_prediction_o ( btb_prediction )
|
||||
);
|
||||
|
|
|
@ -156,6 +156,9 @@ package ariane_pkg;
|
|||
// allocate more space for the commit buffer to be on the save side, this needs to be a power of two
|
||||
localparam int unsigned DEPTH_COMMIT = 8;
|
||||
`endif
|
||||
|
||||
localparam bit FPGA_EN = cva6_config_pkg::CVA6ConfigFPGAEn; // Is FPGA optimization of CV32A6
|
||||
|
||||
localparam bit RVC = cva6_config_pkg::CVA6ConfigCExtEn; // Is C extension configuration
|
||||
|
||||
`ifdef PITON_ARIANE
|
||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
|||
localparam CVA6ConfigNrCommitPorts = 2;
|
||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||
|
||||
localparam CVA6ConfigFPGAEn = 0;
|
||||
|
||||
endpackage
|
||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
|||
localparam CVA6ConfigNrCommitPorts = 1;
|
||||
localparam CVA6ConfigNrScoreboardEntries = 4;
|
||||
|
||||
localparam CVA6ConfigFPGAEn = 1;
|
||||
|
||||
endpackage
|
||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
|||
localparam CVA6ConfigNrCommitPorts = 2;
|
||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||
|
||||
localparam CVA6ConfigFPGAEn = 0;
|
||||
|
||||
endpackage
|
||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
|||
localparam CVA6ConfigNrCommitPorts = 2;
|
||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||
|
||||
localparam CVA6ConfigFPGAEn = 0;
|
||||
|
||||
endpackage
|
||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
|||
localparam CVA6ConfigNrCommitPorts = 2;
|
||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||
|
||||
localparam CVA6ConfigFPGAEn = 0;
|
||||
|
||||
endpackage
|
||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
|||
localparam CVA6ConfigNrCommitPorts = 2;
|
||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||
|
||||
localparam CVA6ConfigFPGAEn = 0;
|
||||
|
||||
endpackage
|
||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
|||
localparam CVA6ConfigNrCommitPorts = 2;
|
||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||
|
||||
localparam CVA6ConfigFPGAEn = 0;
|
||||
|
||||
endpackage
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue