Update BHT and BTB separately

Dhrystone > 1.7 DMIPS
This commit is contained in:
Florian Zaruba 2018-02-14 17:31:02 +01:00
parent d1b2ffc114
commit c57ce142e7
4 changed files with 182 additions and 180 deletions

View file

@ -169,7 +169,7 @@ $(tests): build
verilate:
$(verilator) $(ariane_pkg) $(filter-out src/regfile.sv, $(wildcard src/*.sv)) $(wildcard src/axi_slice/*.sv) \
src/util/cluster_clock_gating.sv src/util/behav_sram.sv src/axi_mem_if/axi2mem.sv tb/agents/axi_if/axi_if.sv \
--unroll-count 256 -Wno-fatal -LDFLAGS "-lfesvr" -CFLAGS "-std=c++11" -Wall --cc --trace \
--unroll-count 1024 -Wno-fatal -LDFLAGS "-lfesvr" -CFLAGS "-std=c++11" -Wall --cc --trace \
$(list_incdir) --top-module ariane_wrapped --exe tb/ariane_tb.cpp tb/simmem.cpp
cd obj_dir && make -j8 -f Variane_wrapped.mk

View file

@ -16,8 +16,8 @@ import ariane_pkg::*;
module frontend #(
parameter int unsigned BTB_ENTRIES = 8,
parameter int unsigned BHT_ENTRIES = 32,
parameter int unsigned RAS_DEPTH = 2
parameter int unsigned BHT_ENTRIES = 1024,
parameter int unsigned RAS_DEPTH = 4
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
@ -115,14 +115,11 @@ module frontend #(
bp_vaddr = '0; // predicted address
bp_valid = 1'b0; // prediction is valid
bp_sbe.is_lower_16 = 1'b0;
bp_sbe.cf_type = RAS;
// only predict on speculative fetches and if the response is valid
if (icache_valid_q && icache_speculative_q) begin
// is it a return and the RAS contains a valid prediction? **speculative**
if (rvi_return && ras_predict.valid) begin
bp_vaddr = ras_predict.ra;
ras_pop = 1'b1;
bp_valid = 1'b1;
end
if (icache_valid_q) begin
if (rvi_call) begin
ras_push = 1'b1;
@ -131,6 +128,7 @@ module frontend #(
// Branch Prediction - **speculative**
if (rvi_branch) begin
bp_sbe.cf_type = BHT;
// dynamic prediction valid?
if (bht_prediction.valid) begin
if (bht_prediction.taken || bht_prediction.strongly_taken)
@ -153,13 +151,27 @@ module frontend #(
if (rvi_jalr && btb_prediction.valid) begin
bp_vaddr = btb_prediction.target_address;
bp_valid = 1'b1;
bp_sbe.cf_type = BTB;
end
// is it a return and the RAS contains a valid prediction? **speculative**
if (rvi_return && ras_predict.valid) begin
bp_vaddr = ras_predict.ra;
ras_pop = 1'b1;
bp_valid = 1'b1;
bp_sbe.cf_type = RAS;
end
if (take_rvi_cf) begin
bp_valid = 1'b1;
bp_vaddr = icache_vaddr_q + rvi_imm;
end
end
// assemble scoreboard entry
bp_sbe.valid = bp_valid;
bp_sbe.predict_address = bp_vaddr;
bp_sbe.predict_taken = bp_valid;
end
always_comb begin : id_if
@ -177,13 +189,6 @@ module frontend #(
icache_kill_s2 = 1'b1;
end
// assemble scoreboard entry
bp_sbe.valid = bp_valid;
bp_sbe.predict_address = bp_vaddr;
bp_sbe.predict_taken = bp_valid;
bp_sbe.is_lower_16 = 1'b0;
bp_sbe.cf_type = (rvi_jalr) ? BTB : BHT;
fifo_valid = icache_valid_q;
end
@ -191,7 +196,7 @@ module frontend #(
// Update Control Flow Predictions
// ----------------------------------------
// BHT
assign bht_update.valid = resolved_branch_i.valid;
assign bht_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BHT);
assign bht_update.pc = resolved_branch_i.pc;
assign bht_update.mispredict = resolved_branch_i.is_mispredict;
assign bht_update.taken = resolved_branch_i.is_taken;
@ -410,24 +415,23 @@ module instr_scan (
// check that rs1 is either x1 or x5 and that rs1 is not x1 or x5, TODO: check the fact about bit 7
assign rvi_return_o = rvi_jalr_o & ~instr_i[7] & ~instr_i[19] & ~instr_i[18] & ~instr_i[16] & instr_i[15];
assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & instr_i[7]; // TODO: check that this captures calls
assign rvc_branch_o = (instr_i[15:13] == OPCODE_C_BEQZ) | (instr_i[15:13] == OPCODE_C_BNEZ);
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvi_imm_o = (instr_i[3]) ? uj_imm(instr_i) : sb_imm(instr_i);
assign rvi_branch_o = (instr_i[6:0] == OPCODE_BRANCH) ? 1'b1 : 1'b0;
assign rvi_jalr_o = (instr_i[6:0] == OPCODE_JALR) ? 1'b1 : 1'b0;
assign rvi_jump_o = (instr_i[6:0] == OPCODE_JAL) ? 1'b1 : 1'b0;
// opcode JAL
assign rvc_jump_o = (instr_i[15:13] == OPCODE_C_J);
assign rvc_jr_o = (instr_i[15:12] == 4'b1000) & (instr_i[6:2] == 5'b00000);
assign rvc_branch_o = (instr_i[15:13] == OPCODE_C_BEQZ) | (instr_i[15:13] == OPCODE_C_BNEZ);
// check that rs1 is x1 or x5
assign rvc_return_o = rvc_jr_o & ~instr_i[11] & ~instr_i[10] & ~instr_i[8] & ~instr_i[7];
assign rvc_jalr_o = (instr_i[15:12] == 4'b1001) & (instr_i[6:2] == 5'b00000);
assign rvc_call_o = rvc_jalr_o; // TODO: check that this captures calls
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvi_imm_o = (instr_i[3]) ? uj_imm(instr_i) : sb_imm(instr_i);
// // differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvc_imm_o = (instr_i[14]) ? {{56{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0}
: {{53{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0};
assign rvi_branch_o = (instr_i[6:0] == OPCODE_BRANCH) ? 1'b1 : 1'b0;
assign rvi_jalr_o = (instr_i[6:0] == OPCODE_JALR) ? 1'b1 : 1'b0;
assign rvi_jump_o = (instr_i[6:0] == OPCODE_JAL) ? 1'b1 : 1'b0;
endmodule
// ------------------------------
@ -548,7 +552,7 @@ endmodule
// branch history table - 2 bit saturation counter
module bht #(
parameter int unsigned NR_ENTRIES = 64
parameter int unsigned NR_ENTRIES = 1024
)(
input logic clk_i,
input logic rst_ni,

View file

@ -33,17 +33,9 @@ module lsu #(
output logic lsu_ready_o, // FU is ready e.g. not busy
input logic lsu_valid_i, // Input is valid
input logic [TRANS_ID_BITS-1:0] trans_id_i, // transaction id, needed for WB
output logic [TRANS_ID_BITS-1:0] ld_trans_id_o, // ID of scoreboard entry at which to write back
output logic [63:0] ld_result_o,
output logic ld_valid_o, // transaction id for which the output is the requested one
output exception_t ld_exception_o, // to WB, signal exception status LD/ST exception
output logic [TRANS_ID_BITS-1:0] st_trans_id_o, // ID of scoreboard entry at which to write back
output logic [63:0] st_result_o,
output logic st_valid_o, // transaction id for which the output is the requested one
output exception_t st_exception_o, // to WB, signal exception status LD/ST exception
output logic [TRANS_ID_BITS-1:0] lsu_trans_id_o, // ID of scoreboard entry at which to write back
output logic [63:0] lsu_result_o,
output logic lsu_valid_o, // transaction id for which the output is the requested one
input logic commit_i, // commit the pending store
output logic commit_ready_o, // commit queue is ready to accept another commit request
@ -80,7 +72,10 @@ module lsu #(
output logic flush_dcache_ack_o,
// Data cache refill port
AXI_BUS.Master data_if,
AXI_BUS.Master bypass_if
AXI_BUS.Master bypass_if,
output exception_t lsu_exception_o // to WB, signal exception status LD/ST exception
);
// data is misaligned
logic data_misaligned;
@ -298,35 +293,25 @@ module lsu #(
// ---------------------
// Result Sequentialize
// ---------------------
// lsu_arbiter i_lsu_arbiter (
// .clk_i ( clk_i ),
// .rst_ni ( rst_ni ),
// .flush_i ( flush_i ),
// .ld_valid_i ( ld_valid ),
// .ld_trans_id_i ( ld_trans_id ),
// .ld_result_i ( ld_result ),
// .ld_ex_i ( ld_ex ),
lsu_arbiter i_lsu_arbiter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.ld_valid_i ( ld_valid ),
.ld_trans_id_i ( ld_trans_id ),
.ld_result_i ( ld_result ),
.ld_ex_i ( ld_ex ),
// .st_valid_i ( st_valid ),
// .st_trans_id_i ( st_trans_id ),
// .st_result_i ( st_result ),
// .st_ex_i ( st_ex ),
.st_valid_i ( st_valid ),
.st_trans_id_i ( st_trans_id ),
.st_result_i ( st_result ),
.st_ex_i ( st_ex ),
// .valid_o ( lsu_valid_o ),
// .trans_id_o ( lsu_trans_id_o ),
// .result_o ( lsu_result_o ),
// .ex_o ( lsu_exception_o )
// );
assign ld_valid_o = ld_valid;
assign ld_trans_id_o = ld_trans_id;
assign ld_result_o = ld_result;
assign ld_exception_o = ld_ex;
assign st_valid_o = st_valid;
assign st_trans_id_o = st_trans_id;
assign st_result_o = st_result;
assign st_exception_o = st_ex;
.valid_o ( lsu_valid_o ),
.trans_id_o ( lsu_trans_id_o ),
.result_o ( lsu_result_o ),
.ex_o ( lsu_exception_o )
);
// determine whether this is a load or store
always_comb begin : which_op
@ -614,118 +599,3 @@ module lsu_bypass (
end
endmodule
// Author: Florian Zaruba, ETH Zurich
// Date: 22.05.2017
// Description: Arbitrates the LSU result port
module lsu_arbiter (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
// Load Port
input logic ld_valid_i,
input logic [TRANS_ID_BITS-1:0] ld_trans_id_i,
input logic [63:0] ld_result_i,
input exception_t ld_ex_i,
// Store Port
input logic st_valid_i,
input logic [TRANS_ID_BITS-1:0] st_trans_id_i,
input logic [63:0] st_result_i,
input exception_t st_ex_i,
// Output Port
output logic valid_o,
output logic [TRANS_ID_BITS-1:0] trans_id_o,
output logic [63:0] result_o,
output exception_t ex_o
);
// this is a dual input FIFO which takes results from the load and store
// paths of the LSU and sequentializes through the FIFO construct. If there is a valid output
// it unconditionally posts the result on its output ports and expects it to be consumed.
// 4 entries is enough to unconditionally post loads and stores since we can only have two outstanding loads
localparam int WIDTH = 4;
// queue pointer
logic [$clog2(WIDTH)-1:0] read_pointer_n, read_pointer_q;
logic [$clog2(WIDTH)-1:0] write_pointer_n, write_pointer_q;
logic [$clog2(WIDTH)-1:0] status_cnt_n, status_cnt_q;
struct packed {
logic [TRANS_ID_BITS-1:0] trans_id;
logic [63:0] result;
exception_t ex;
} mem_n[WIDTH-1:0], mem_q[WIDTH-1:0];
// output last element of queue
assign trans_id_o = mem_q[read_pointer_q].trans_id;
assign result_o = mem_q[read_pointer_q].result;
assign ex_o = mem_q[read_pointer_q].ex;
// if we are not empty we have a valid output
assign valid_o = (status_cnt_q != '0);
// -------------------
// Read-Write Process
// -------------------
always_comb begin : read_write_fifo
automatic logic [$clog2(WIDTH)-1:0] status_cnt;
automatic logic [$clog2(WIDTH)-1:0] write_pointer;
status_cnt = status_cnt_q;
write_pointer = write_pointer_q;
// default assignments
mem_n = mem_q;
read_pointer_n = read_pointer_q;
// ------------
// Write Port
// ------------
// write port 1 - load unit
if (ld_valid_i) begin
mem_n[write_pointer] = {ld_trans_id_i, ld_result_i, ld_ex_i};
write_pointer++;
status_cnt++;
end
// write port 2 - store unit
if (st_valid_i) begin
mem_n[write_pointer] = {st_trans_id_i, st_result_i, st_ex_i};
write_pointer++;
status_cnt++;
end
// ------------
// Read Port
// ------------
// if the last element in the queue was valid we can push it out and make space for a new element
if (valid_o) begin
read_pointer_n = read_pointer_q + 1;
status_cnt--;
end
// update status count
status_cnt_n = status_cnt;
// update write pointer
write_pointer_n = write_pointer;
// ------------
// Flush
// ------------
if (flush_i) begin
status_cnt_n = '0;
write_pointer_n = '0;
read_pointer_n = '0;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mem_q <= '{default: 0};
read_pointer_q <= '0;
write_pointer_q <= '0;
status_cnt_q <= '0;
end else begin
mem_q <= mem_n;
read_pointer_q <= read_pointer_n;
write_pointer_q <= write_pointer_n;
status_cnt_q <= status_cnt_n;
end
end
endmodule

128
src/lsu_arbiter.sv Normal file
View file

@ -0,0 +1,128 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 22.05.2017
// Description: Arbitrates the LSU result port
import ariane_pkg::*;
module lsu_arbiter (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
// Load Port
input logic ld_valid_i,
input logic [TRANS_ID_BITS-1:0] ld_trans_id_i,
input logic [63:0] ld_result_i,
input exception_t ld_ex_i,
// Store Port
input logic st_valid_i,
input logic [TRANS_ID_BITS-1:0] st_trans_id_i,
input logic [63:0] st_result_i,
input exception_t st_ex_i,
// Output Port
output logic valid_o,
output logic [TRANS_ID_BITS-1:0] trans_id_o,
output logic [63:0] result_o,
output exception_t ex_o
);
// this is a dual input FIFO which takes results from the load and store
// paths of the LSU and sequentializes through the FIFO construct. If there is a valid output
// it unconditionally posts the result on its output ports and expects it to be consumed.
// 4 entries is enough to unconditionally post loads and stores since we can only have two outstanding loads
localparam int WIDTH = 4;
// queue pointer
logic [$clog2(WIDTH)-1:0] read_pointer_n, read_pointer_q;
logic [$clog2(WIDTH)-1:0] write_pointer_n, write_pointer_q;
logic [$clog2(WIDTH)-1:0] status_cnt_n, status_cnt_q;
struct packed {
logic [TRANS_ID_BITS-1:0] trans_id;
logic [63:0] result;
exception_t ex;
} mem_n[WIDTH-1:0], mem_q[WIDTH-1:0];
// output last element of queue
assign trans_id_o = mem_q[read_pointer_q].trans_id;
assign result_o = mem_q[read_pointer_q].result;
assign ex_o = mem_q[read_pointer_q].ex;
// if we are not empty we have a valid output
assign valid_o = (status_cnt_q != '0);
// -------------------
// Read-Write Process
// -------------------
always_comb begin : read_write_fifo
automatic logic [$clog2(WIDTH)-1:0] status_cnt;
automatic logic [$clog2(WIDTH)-1:0] write_pointer;
status_cnt = status_cnt_q;
write_pointer = write_pointer_q;
// default assignments
mem_n = mem_q;
read_pointer_n = read_pointer_q;
// ------------
// Write Port
// ------------
// write port 1 - load unit
if (ld_valid_i) begin
mem_n[write_pointer] = {ld_trans_id_i, ld_result_i, ld_ex_i};
write_pointer++;
status_cnt++;
end
// write port 2 - store unit
if (st_valid_i) begin
mem_n[write_pointer] = {st_trans_id_i, st_result_i, st_ex_i};
write_pointer++;
status_cnt++;
end
// ------------
// Read Port
// ------------
// if the last element in the queue was valid we can push it out and make space for a new element
if (valid_o) begin
read_pointer_n = read_pointer_q + 1;
status_cnt--;
end
// update status count
status_cnt_n = status_cnt;
// update write pointer
write_pointer_n = write_pointer;
// ------------
// Flush
// ------------
if (flush_i) begin
status_cnt_n = '0;
write_pointer_n = '0;
read_pointer_n = '0;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mem_q <= '{default: 0};
read_pointer_q <= '0;
write_pointer_q <= '0;
status_cnt_q <= '0;
end else begin
mem_q <= mem_n;
read_pointer_q <= read_pointer_n;
write_pointer_q <= write_pointer_n;
status_cnt_q <= status_cnt_n;
end
end
endmodule