mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-06-28 09:16:22 -04:00
BTB optimization for FPGA targets (#1016)
This commit is contained in:
parent
dc0ecfde0a
commit
6deffb27d7
11 changed files with 146 additions and 34 deletions
|
@ -89,6 +89,9 @@ ${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/norm_div_sq
|
||||||
${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
|
${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
|
||||||
${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
|
${CVA6_REPO_DIR}/vendor/pulp-platform/fpnew/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
|
||||||
|
|
||||||
|
//FPGA memories
|
||||||
|
${CVA6_REPO_DIR}/common/local/techlib/fpga/rtl/SyncDpRam.sv
|
||||||
|
|
||||||
// Top-level source files (not necessarily instantiated at the top of the cva6).
|
// Top-level source files (not necessarily instantiated at the top of the cva6).
|
||||||
${CVA6_REPO_DIR}/core/ariane.sv
|
${CVA6_REPO_DIR}/core/ariane.sv
|
||||||
${CVA6_REPO_DIR}/core/cva6.sv
|
${CVA6_REPO_DIR}/core/cva6.sv
|
||||||
|
|
|
@ -12,7 +12,18 @@
|
||||||
// Date: 08.02.2018
|
// Date: 08.02.2018
|
||||||
// Migrated: Luis Vitorio Cargnini, IEEE
|
// Migrated: Luis Vitorio Cargnini, IEEE
|
||||||
// Date: 09.06.2018
|
// Date: 09.06.2018
|
||||||
|
//
|
||||||
|
// Additional contributions by:
|
||||||
|
// Sebastien Jacq, Thales - sjthales on github.com
|
||||||
|
// Date: 2022-12-01
|
||||||
|
//
|
||||||
|
// Description: This module is an adaptation of the BTB (Branch Target Buffer)
|
||||||
|
// module both FPGA and ASIC targets.
|
||||||
|
// Prediction target address is stored in BRAM on FPGA while for
|
||||||
|
// original module, target address is stored in D flip-flop.
|
||||||
|
// For FPGA flushing is not supported because the frontend module
|
||||||
|
// flushing signal is not connected.
|
||||||
|
//
|
||||||
// branch target buffer
|
// branch target buffer
|
||||||
module btb #(
|
module btb #(
|
||||||
parameter int NR_ENTRIES = 8
|
parameter int NR_ENTRIES = 8
|
||||||
|
@ -32,18 +43,17 @@ module btb #(
|
||||||
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
|
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
|
||||||
// number of bits needed to index the row
|
// number of bits needed to index the row
|
||||||
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
|
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
|
||||||
localparam ROW_INDEX_BITS = ariane_pkg::RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1; //1
|
localparam ROW_INDEX_BITS = ariane_pkg::RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
|
||||||
// number of bits we should use for prediction
|
// number of bits we should use for prediction
|
||||||
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
|
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
|
||||||
// prevent aliasing to degrade performance
|
// prevent aliasing to degrade performance
|
||||||
localparam ANTIALIAS_BITS = 8;
|
localparam ANTIALIAS_BITS = 8;
|
||||||
|
// number of bits par word in the bram
|
||||||
|
localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t);
|
||||||
// we are not interested in all bits of the address
|
// we are not interested in all bits of the address
|
||||||
unread i_unread (.d_i(|vpc_i));
|
unread i_unread (.d_i(|vpc_i));
|
||||||
|
|
||||||
// typedef for all branch target entries
|
|
||||||
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
|
|
||||||
ariane_pkg::btb_prediction_t btb_d [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
|
|
||||||
btb_q [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
|
|
||||||
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
|
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
|
||||||
logic [ROW_INDEX_BITS-1:0] update_row_index;
|
logic [ROW_INDEX_BITS-1:0] update_row_index;
|
||||||
|
|
||||||
|
@ -55,42 +65,118 @@ module btb #(
|
||||||
assign update_row_index = '0;
|
assign update_row_index = '0;
|
||||||
end
|
end
|
||||||
|
|
||||||
// output matching prediction
|
if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS
|
||||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
|
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction;
|
||||||
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
|
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction;
|
||||||
end
|
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction;
|
||||||
|
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction;
|
||||||
|
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction;
|
||||||
|
|
||||||
// -------------------------
|
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update;
|
||||||
// Update Branch Prediction
|
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update;
|
||||||
// -------------------------
|
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update;
|
||||||
// update on a mis-predict
|
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update;
|
||||||
always_comb begin : update_branch_predict
|
|
||||||
|
// output matching prediction
|
||||||
|
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
|
||||||
|
assign btb_ram_csel_prediction[i] = 1'b1;
|
||||||
|
assign btb_ram_we_prediction[i] = 1'b0;
|
||||||
|
assign btb_ram_wdata_prediction = '0;
|
||||||
|
assign btb_ram_addr_prediction[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = index;
|
||||||
|
assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS];
|
||||||
|
end
|
||||||
|
|
||||||
|
// -------------------------
|
||||||
|
// Update Branch Prediction
|
||||||
|
// -------------------------
|
||||||
|
// update on a mis-predict
|
||||||
|
always_comb begin : update_branch_predict
|
||||||
|
btb_ram_csel_update = '0;
|
||||||
|
btb_ram_we_update = '0;
|
||||||
|
btb_ram_addr_update = '0;
|
||||||
|
btb_ram_wdata_update = '0;
|
||||||
|
|
||||||
|
if (btb_update_i.valid && !debug_mode_i) begin
|
||||||
|
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
|
||||||
|
if (update_row_index == i) begin
|
||||||
|
btb_ram_csel_update[i] = 1'b1;
|
||||||
|
btb_ram_we_update[i] = 1'b1;
|
||||||
|
btb_ram_addr_update[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = update_pc;
|
||||||
|
btb_ram_wdata_update[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] = {1'b1 , btb_update_i.target_address};
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram
|
||||||
|
SyncDpRam #(
|
||||||
|
.ADDR_WIDTH($clog2(NR_ROWS)),
|
||||||
|
.DATA_DEPTH(NR_ROWS),
|
||||||
|
.DATA_WIDTH(BRAM_WORD_BITS),
|
||||||
|
.OUT_REGS (0),
|
||||||
|
.SIM_INIT (1)
|
||||||
|
) i_btb_ram (
|
||||||
|
.Clk_CI ( clk_i ),
|
||||||
|
.Rst_RBI ( rst_ni ),
|
||||||
|
//----------------------------
|
||||||
|
.CSelA_SI ( btb_ram_csel_update[i] ),
|
||||||
|
.WrEnA_SI ( btb_ram_we_update[i] ),
|
||||||
|
.AddrA_DI ( btb_ram_addr_update[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
|
||||||
|
.WrDataA_DI ( btb_ram_wdata_update[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
|
||||||
|
.RdDataA_DO ( ),
|
||||||
|
//-----------------------------
|
||||||
|
.CSelB_SI ( btb_ram_csel_prediction[i] ),
|
||||||
|
.WrEnB_SI ( btb_ram_we_prediction[i] ),
|
||||||
|
.AddrB_DI ( btb_ram_addr_prediction[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
|
||||||
|
.WrDataB_DI ( btb_ram_wdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
|
||||||
|
.RdDataB_DO ( btb_ram_rdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] )
|
||||||
|
);
|
||||||
|
end
|
||||||
|
|
||||||
|
end else begin : gen_asic_btb // ASIC TARGET
|
||||||
|
|
||||||
|
// typedef for all branch target entries
|
||||||
|
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
|
||||||
|
ariane_pkg::btb_prediction_t btb_d [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
|
||||||
|
btb_q [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
|
||||||
|
|
||||||
|
// output matching prediction
|
||||||
|
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
|
||||||
|
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
|
||||||
|
end
|
||||||
|
|
||||||
|
// -------------------------
|
||||||
|
// Update Branch Prediction
|
||||||
|
// -------------------------
|
||||||
|
// update on a mis-predict
|
||||||
|
always_comb begin : update_branch_predict
|
||||||
btb_d = btb_q;
|
btb_d = btb_q;
|
||||||
|
|
||||||
if (btb_update_i.valid && !debug_mode_i) begin
|
if (btb_update_i.valid && !debug_mode_i) begin
|
||||||
btb_d[update_pc][update_row_index].valid = 1'b1;
|
btb_d[update_pc][update_row_index].valid = 1'b1;
|
||||||
// the target address is simply updated
|
// the target address is simply updated
|
||||||
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
|
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
// sequential process
|
// sequential process
|
||||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||||
if (!rst_ni) begin
|
if (!rst_ni) begin
|
||||||
// Bias the branches to be taken upon first arrival
|
// Bias the branches to be taken upon first arrival
|
||||||
for (int i = 0; i < NR_ROWS; i++)
|
for (int i = 0; i < NR_ROWS; i++)
|
||||||
btb_q[i] <= '{default: 0};
|
btb_q[i] <= '{default: 0};
|
||||||
end else begin
|
end else begin
|
||||||
// evict all entries
|
// evict all entries
|
||||||
if (flush_i) begin
|
if (flush_i) begin
|
||||||
for (int i = 0; i < NR_ROWS; i++) begin
|
for (int i = 0; i < NR_ROWS; i++) begin
|
||||||
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
|
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
|
||||||
btb_q[i][j].valid <= 1'b0;
|
btb_q[i][j].valid <= 1'b0;
|
||||||
end
|
end
|
||||||
end
|
|
||||||
end else begin
|
|
||||||
btb_q <= btb_d;
|
|
||||||
end
|
end
|
||||||
|
end else begin
|
||||||
|
btb_q <= btb_d;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
endmodule
|
endmodule
|
||||||
|
|
|
@ -95,6 +95,7 @@ module frontend import ariane_pkg::*; #(
|
||||||
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
|
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
|
||||||
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
|
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
|
||||||
ras_t ras_predict;
|
ras_t ras_predict;
|
||||||
|
logic [riscv::VLEN-1:0] vpc_btb;
|
||||||
|
|
||||||
// branch-predict update
|
// branch-predict update
|
||||||
logic is_mispredict;
|
logic is_mispredict;
|
||||||
|
@ -397,6 +398,11 @@ module frontend import ariane_pkg::*; #(
|
||||||
.data_o ( ras_predict )
|
.data_o ( ras_predict )
|
||||||
);
|
);
|
||||||
|
|
||||||
|
//For FPGA, BTB is implemented in read synchronous BRAM
|
||||||
|
//while for ASIC, BTB is implemented in D flip-flop
|
||||||
|
//and can be read at the same cycle.
|
||||||
|
assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q;
|
||||||
|
|
||||||
btb #(
|
btb #(
|
||||||
.NR_ENTRIES ( ArianeCfg.BTBEntries )
|
.NR_ENTRIES ( ArianeCfg.BTBEntries )
|
||||||
) i_btb (
|
) i_btb (
|
||||||
|
@ -404,7 +410,7 @@ module frontend import ariane_pkg::*; #(
|
||||||
.rst_ni,
|
.rst_ni,
|
||||||
.flush_i ( flush_bp_i ),
|
.flush_i ( flush_bp_i ),
|
||||||
.debug_mode_i,
|
.debug_mode_i,
|
||||||
.vpc_i ( icache_vaddr_q ),
|
.vpc_i ( vpc_btb ),
|
||||||
.btb_update_i ( btb_update ),
|
.btb_update_i ( btb_update ),
|
||||||
.btb_prediction_o ( btb_prediction )
|
.btb_prediction_o ( btb_prediction )
|
||||||
);
|
);
|
||||||
|
|
|
@ -156,6 +156,9 @@ package ariane_pkg;
|
||||||
// allocate more space for the commit buffer to be on the save side, this needs to be a power of two
|
// allocate more space for the commit buffer to be on the save side, this needs to be a power of two
|
||||||
localparam int unsigned DEPTH_COMMIT = 8;
|
localparam int unsigned DEPTH_COMMIT = 8;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
|
localparam bit FPGA_EN = cva6_config_pkg::CVA6ConfigFPGAEn; // Is FPGA optimization of CV32A6
|
||||||
|
|
||||||
localparam bit RVC = cva6_config_pkg::CVA6ConfigCExtEn; // Is C extension configuration
|
localparam bit RVC = cva6_config_pkg::CVA6ConfigCExtEn; // Is C extension configuration
|
||||||
|
|
||||||
`ifdef PITON_ARIANE
|
`ifdef PITON_ARIANE
|
||||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
||||||
localparam CVA6ConfigNrCommitPorts = 2;
|
localparam CVA6ConfigNrCommitPorts = 2;
|
||||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||||
|
|
||||||
|
localparam CVA6ConfigFPGAEn = 0;
|
||||||
|
|
||||||
endpackage
|
endpackage
|
||||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
||||||
localparam CVA6ConfigNrCommitPorts = 1;
|
localparam CVA6ConfigNrCommitPorts = 1;
|
||||||
localparam CVA6ConfigNrScoreboardEntries = 4;
|
localparam CVA6ConfigNrScoreboardEntries = 4;
|
||||||
|
|
||||||
|
localparam CVA6ConfigFPGAEn = 1;
|
||||||
|
|
||||||
endpackage
|
endpackage
|
||||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
||||||
localparam CVA6ConfigNrCommitPorts = 2;
|
localparam CVA6ConfigNrCommitPorts = 2;
|
||||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||||
|
|
||||||
|
localparam CVA6ConfigFPGAEn = 0;
|
||||||
|
|
||||||
endpackage
|
endpackage
|
||||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
||||||
localparam CVA6ConfigNrCommitPorts = 2;
|
localparam CVA6ConfigNrCommitPorts = 2;
|
||||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||||
|
|
||||||
|
localparam CVA6ConfigFPGAEn = 0;
|
||||||
|
|
||||||
endpackage
|
endpackage
|
||||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
||||||
localparam CVA6ConfigNrCommitPorts = 2;
|
localparam CVA6ConfigNrCommitPorts = 2;
|
||||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||||
|
|
||||||
|
localparam CVA6ConfigFPGAEn = 0;
|
||||||
|
|
||||||
endpackage
|
endpackage
|
||||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
||||||
localparam CVA6ConfigNrCommitPorts = 2;
|
localparam CVA6ConfigNrCommitPorts = 2;
|
||||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||||
|
|
||||||
|
localparam CVA6ConfigFPGAEn = 0;
|
||||||
|
|
||||||
endpackage
|
endpackage
|
||||||
|
|
|
@ -35,4 +35,6 @@ package cva6_config_pkg;
|
||||||
localparam CVA6ConfigNrCommitPorts = 2;
|
localparam CVA6ConfigNrCommitPorts = 2;
|
||||||
localparam CVA6ConfigNrScoreboardEntries = 8;
|
localparam CVA6ConfigNrScoreboardEntries = 8;
|
||||||
|
|
||||||
|
localparam CVA6ConfigFPGAEn = 0;
|
||||||
|
|
||||||
endpackage
|
endpackage
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue