BTB optimization for FPGA targets (#1016)

This commit is contained in:
sébastien jacq 2022-12-15 13:24:45 +01:00 committed by GitHub
parent dc0ecfde0a
commit 6deffb27d7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 146 additions and 34 deletions

View file

@ -89,6 +89,9 @@ ${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sq
${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
//FPGA memories
${CVA6_REPO_DIR}/common/local/techlib/fpga/rtl/SyncDpRam.sv
// Top-level source files (not necessarily instantiated at the top of the cva6).
${CVA6_REPO_DIR}/core/ariane.sv
${CVA6_REPO_DIR}/core/cva6.sv

View file

@ -12,7 +12,18 @@
// Date: 08.02.2018
// Migrated: Luis Vitorio Cargnini, IEEE
// Date: 09.06.2018
//
// Additional contributions by:
// Sebastien Jacq, Thales - sjthales on github.com
// Date: 2022-12-01
//
// Description: This module is an adaptation of the BTB (Branch Target Buffer)
// module both FPGA and ASIC targets.
// Prediction target address is stored in BRAM on FPGA while for
// original module, target address is stored in D flip-flop.
// For FPGA flushing is not supported because the frontend module
// flushing signal is not connected.
//
// branch target buffer
module btb #(
parameter int NR_ENTRIES = 8
@ -32,18 +43,17 @@ module btb #(
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
// number of bits needed to index the row
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
localparam ROW_INDEX_BITS = ariane_pkg::RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1; //1
localparam ROW_INDEX_BITS = ariane_pkg::RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
// number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
// prevent aliasing to degrade performance
localparam ANTIALIAS_BITS = 8;
// number of bits par word in the bram
localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t);
// we are not interested in all bits of the address
unread i_unread (.d_i(|vpc_i));
// typedef for all branch target entries
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
ariane_pkg::btb_prediction_t btb_d [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
btb_q [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
logic [ROW_INDEX_BITS-1:0] update_row_index;
@ -55,42 +65,118 @@ module btb #(
assign update_row_index = '0;
end
// output matching prediction
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
end
if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction;
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update;
// output matching prediction
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_ram_csel_prediction[i] = 1'b1;
assign btb_ram_we_prediction[i] = 1'b0;
assign btb_ram_wdata_prediction = '0;
assign btb_ram_addr_prediction[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = index;
assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS];
end
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_ram_csel_update = '0;
btb_ram_we_update = '0;
btb_ram_addr_update = '0;
btb_ram_wdata_update = '0;
if (btb_update_i.valid && !debug_mode_i) begin
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (update_row_index == i) begin
btb_ram_csel_update[i] = 1'b1;
btb_ram_we_update[i] = 1'b1;
btb_ram_addr_update[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = update_pc;
btb_ram_wdata_update[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] = {1'b1 , btb_update_i.target_address};
end
end
end
end
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram
SyncDpRam #(
.ADDR_WIDTH($clog2(NR_ROWS)),
.DATA_DEPTH(NR_ROWS),
.DATA_WIDTH(BRAM_WORD_BITS),
.OUT_REGS (0),
.SIM_INIT (1)
) i_btb_ram (
.Clk_CI ( clk_i ),
.Rst_RBI ( rst_ni ),
//----------------------------
.CSelA_SI ( btb_ram_csel_update[i] ),
.WrEnA_SI ( btb_ram_we_update[i] ),
.AddrA_DI ( btb_ram_addr_update[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
.WrDataA_DI ( btb_ram_wdata_update[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
.RdDataA_DO ( ),
//-----------------------------
.CSelB_SI ( btb_ram_csel_prediction[i] ),
.WrEnB_SI ( btb_ram_we_prediction[i] ),
.AddrB_DI ( btb_ram_addr_prediction[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
.WrDataB_DI ( btb_ram_wdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
.RdDataB_DO ( btb_ram_rdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] )
);
end
end else begin : gen_asic_btb // ASIC TARGET
// typedef for all branch target entries
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
ariane_pkg::btb_prediction_t btb_d [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
btb_q [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
// output matching prediction
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
end
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_d = btb_q;
if (btb_update_i.valid && !debug_mode_i) begin
btb_d[update_pc][update_row_index].valid = 1'b1;
// the target address is simply updated
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
btb_d[update_pc][update_row_index].valid = 1'b1;
// the target address is simply updated
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
end
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
// Bias the branches to be taken upon first arrival
for (int i = 0; i < NR_ROWS; i++)
btb_q[i] <= '{default: 0};
// Bias the branches to be taken upon first arrival
for (int i = 0; i < NR_ROWS; i++)
btb_q[i] <= '{default: 0};
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
btb_q[i][j].valid <= 1'b0;
end
end
end else begin
btb_q <= btb_d;
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
btb_q[i][j].valid <= 1'b0;
end
end
end else begin
btb_q <= btb_d;
end
end
end
end
endmodule

View file

@ -95,6 +95,7 @@ module frontend import ariane_pkg::*; #(
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
ras_t ras_predict;
logic [riscv::VLEN-1:0] vpc_btb;
// branch-predict update
logic is_mispredict;
@ -396,6 +397,11 @@ module frontend import ariane_pkg::*; #(
.data_i ( ras_update ),
.data_o ( ras_predict )
);
//For FPGA, BTB is implemented in read synchronous BRAM
//while for ASIC, BTB is implemented in D flip-flop
//and can be read at the same cycle.
assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q;
btb #(
.NR_ENTRIES ( ArianeCfg.BTBEntries )
@ -404,7 +410,7 @@ module frontend import ariane_pkg::*; #(
.rst_ni,
.flush_i ( flush_bp_i ),
.debug_mode_i,
.vpc_i ( icache_vaddr_q ),
.vpc_i ( vpc_btb ),
.btb_update_i ( btb_update ),
.btb_prediction_o ( btb_prediction )
);

View file

@ -156,6 +156,9 @@ package ariane_pkg;
// allocate more space for the commit buffer to be on the save side, this needs to be a power of two
localparam int unsigned DEPTH_COMMIT = 8;
`endif
localparam bit FPGA_EN = cva6_config_pkg::CVA6ConfigFPGAEn; // Is FPGA optimization of CV32A6
localparam bit RVC = cva6_config_pkg::CVA6ConfigCExtEn; // Is C extension configuration
`ifdef PITON_ARIANE

View file

@ -35,4 +35,6 @@ package cva6_config_pkg;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
endpackage

View file

@ -35,4 +35,6 @@ package cva6_config_pkg;
localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 4;
localparam CVA6ConfigFPGAEn = 1;
endpackage

View file

@ -35,4 +35,6 @@ package cva6_config_pkg;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
endpackage

View file

@ -35,4 +35,6 @@ package cva6_config_pkg;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
endpackage

View file

@ -35,4 +35,6 @@ package cva6_config_pkg;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
endpackage

View file

@ -35,4 +35,6 @@ package cva6_config_pkg;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
endpackage

View file

@ -35,4 +35,6 @@ package cva6_config_pkg;
localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0;
endpackage