diff --git a/Makefile b/Makefile index 7f82c03f8..924145903 100755 --- a/Makefile +++ b/Makefile @@ -169,7 +169,7 @@ $(tests): build verilate: $(verilator) $(ariane_pkg) $(filter-out src/regfile.sv, $(wildcard src/*.sv)) $(wildcard src/axi_slice/*.sv) \ src/util/cluster_clock_gating.sv src/util/behav_sram.sv src/axi_mem_if/axi2mem.sv tb/agents/axi_if/axi_if.sv \ - --unroll-count 256 -Wno-fatal -LDFLAGS "-lfesvr" -CFLAGS "-std=c++11" -Wall --cc --trace \ + --unroll-count 1024 -Wno-fatal -LDFLAGS "-lfesvr" -CFLAGS "-std=c++11" -Wall --cc --trace \ $(list_incdir) --top-module ariane_wrapped --exe tb/ariane_tb.cpp tb/simmem.cpp cd obj_dir && make -j8 -f Variane_wrapped.mk diff --git a/src/frontend.sv b/src/frontend.sv index a85292f5d..e8149eb9c 100644 --- a/src/frontend.sv +++ b/src/frontend.sv @@ -16,8 +16,8 @@ import ariane_pkg::*; module frontend #( parameter int unsigned BTB_ENTRIES = 8, - parameter int unsigned BHT_ENTRIES = 32, - parameter int unsigned RAS_DEPTH = 2 + parameter int unsigned BHT_ENTRIES = 1024, + parameter int unsigned RAS_DEPTH = 4 )( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -115,14 +115,11 @@ module frontend #( bp_vaddr = '0; // predicted address bp_valid = 1'b0; // prediction is valid + bp_sbe.is_lower_16 = 1'b0; + bp_sbe.cf_type = RAS; + // only predict on speculative fetches and if the response is valid - if (icache_valid_q && icache_speculative_q) begin - // is it a return and the RAS contains a valid prediction? **speculative** - if (rvi_return && ras_predict.valid) begin - bp_vaddr = ras_predict.ra; - ras_pop = 1'b1; - bp_valid = 1'b1; - end + if (icache_valid_q) begin if (rvi_call) begin ras_push = 1'b1; @@ -131,6 +128,7 @@ module frontend #( // Branch Prediction - **speculative** if (rvi_branch) begin + bp_sbe.cf_type = BHT; // dynamic prediction valid? if (bht_prediction.valid) begin if (bht_prediction.taken || bht_prediction.strongly_taken) @@ -153,13 +151,27 @@ module frontend #( if (rvi_jalr && btb_prediction.valid) begin bp_vaddr = btb_prediction.target_address; bp_valid = 1'b1; + bp_sbe.cf_type = BTB; + end + + // is it a return and the RAS contains a valid prediction? **speculative** + if (rvi_return && ras_predict.valid) begin + bp_vaddr = ras_predict.ra; + ras_pop = 1'b1; + bp_valid = 1'b1; + bp_sbe.cf_type = RAS; end if (take_rvi_cf) begin bp_valid = 1'b1; bp_vaddr = icache_vaddr_q + rvi_imm; end + end + // assemble scoreboard entry + bp_sbe.valid = bp_valid; + bp_sbe.predict_address = bp_vaddr; + bp_sbe.predict_taken = bp_valid; end always_comb begin : id_if @@ -177,13 +189,6 @@ module frontend #( icache_kill_s2 = 1'b1; end - // assemble scoreboard entry - bp_sbe.valid = bp_valid; - bp_sbe.predict_address = bp_vaddr; - bp_sbe.predict_taken = bp_valid; - bp_sbe.is_lower_16 = 1'b0; - bp_sbe.cf_type = (rvi_jalr) ? BTB : BHT; - fifo_valid = icache_valid_q; end @@ -191,7 +196,7 @@ module frontend #( // Update Control Flow Predictions // ---------------------------------------- // BHT - assign bht_update.valid = resolved_branch_i.valid; + assign bht_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BHT); assign bht_update.pc = resolved_branch_i.pc; assign bht_update.mispredict = resolved_branch_i.is_mispredict; assign bht_update.taken = resolved_branch_i.is_taken; @@ -410,24 +415,23 @@ module instr_scan ( // check that rs1 is either x1 or x5 and that rs1 is not x1 or x5, TODO: check the fact about bit 7 assign rvi_return_o = rvi_jalr_o & ~instr_i[7] & ~instr_i[19] & ~instr_i[18] & ~instr_i[16] & instr_i[15]; assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & instr_i[7]; // TODO: check that this captures calls - assign rvc_branch_o = (instr_i[15:13] == OPCODE_C_BEQZ) | (instr_i[15:13] == OPCODE_C_BNEZ); + // differentiates between JAL and BRANCH opcode, JALR comes from BHT + assign rvi_imm_o = (instr_i[3]) ? uj_imm(instr_i) : sb_imm(instr_i); + assign rvi_branch_o = (instr_i[6:0] == OPCODE_BRANCH) ? 1'b1 : 1'b0; + assign rvi_jalr_o = (instr_i[6:0] == OPCODE_JALR) ? 1'b1 : 1'b0; + assign rvi_jump_o = (instr_i[6:0] == OPCODE_JAL) ? 1'b1 : 1'b0; // opcode JAL assign rvc_jump_o = (instr_i[15:13] == OPCODE_C_J); assign rvc_jr_o = (instr_i[15:12] == 4'b1000) & (instr_i[6:2] == 5'b00000); + assign rvc_branch_o = (instr_i[15:13] == OPCODE_C_BEQZ) | (instr_i[15:13] == OPCODE_C_BNEZ); // check that rs1 is x1 or x5 assign rvc_return_o = rvc_jr_o & ~instr_i[11] & ~instr_i[10] & ~instr_i[8] & ~instr_i[7]; assign rvc_jalr_o = (instr_i[15:12] == 4'b1001) & (instr_i[6:2] == 5'b00000); assign rvc_call_o = rvc_jalr_o; // TODO: check that this captures calls - // differentiates between JAL and BRANCH opcode, JALR comes from BHT - assign rvi_imm_o = (instr_i[3]) ? uj_imm(instr_i) : sb_imm(instr_i); // // differentiates between JAL and BRANCH opcode, JALR comes from BHT assign rvc_imm_o = (instr_i[14]) ? {{56{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0} : {{53{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0}; - - assign rvi_branch_o = (instr_i[6:0] == OPCODE_BRANCH) ? 1'b1 : 1'b0; - assign rvi_jalr_o = (instr_i[6:0] == OPCODE_JALR) ? 1'b1 : 1'b0; - assign rvi_jump_o = (instr_i[6:0] == OPCODE_JAL) ? 1'b1 : 1'b0; endmodule // ------------------------------ @@ -548,7 +552,7 @@ endmodule // branch history table - 2 bit saturation counter module bht #( - parameter int unsigned NR_ENTRIES = 64 + parameter int unsigned NR_ENTRIES = 1024 )( input logic clk_i, input logic rst_ni, diff --git a/src/lsu.sv b/src/lsu.sv index 2cb9dafb0..c3bbd8ceb 100644 --- a/src/lsu.sv +++ b/src/lsu.sv @@ -33,17 +33,9 @@ module lsu #( output logic lsu_ready_o, // FU is ready e.g. not busy input logic lsu_valid_i, // Input is valid input logic [TRANS_ID_BITS-1:0] trans_id_i, // transaction id, needed for WB - - output logic [TRANS_ID_BITS-1:0] ld_trans_id_o, // ID of scoreboard entry at which to write back - output logic [63:0] ld_result_o, - output logic ld_valid_o, // transaction id for which the output is the requested one - output exception_t ld_exception_o, // to WB, signal exception status LD/ST exception - - output logic [TRANS_ID_BITS-1:0] st_trans_id_o, // ID of scoreboard entry at which to write back - output logic [63:0] st_result_o, - output logic st_valid_o, // transaction id for which the output is the requested one - output exception_t st_exception_o, // to WB, signal exception status LD/ST exception - + output logic [TRANS_ID_BITS-1:0] lsu_trans_id_o, // ID of scoreboard entry at which to write back + output logic [63:0] lsu_result_o, + output logic lsu_valid_o, // transaction id for which the output is the requested one input logic commit_i, // commit the pending store output logic commit_ready_o, // commit queue is ready to accept another commit request @@ -80,7 +72,10 @@ module lsu #( output logic flush_dcache_ack_o, // Data cache refill port AXI_BUS.Master data_if, - AXI_BUS.Master bypass_if + AXI_BUS.Master bypass_if, + + output exception_t lsu_exception_o // to WB, signal exception status LD/ST exception + ); // data is misaligned logic data_misaligned; @@ -298,35 +293,25 @@ module lsu #( // --------------------- // Result Sequentialize // --------------------- - // lsu_arbiter i_lsu_arbiter ( - // .clk_i ( clk_i ), - // .rst_ni ( rst_ni ), - // .flush_i ( flush_i ), - // .ld_valid_i ( ld_valid ), - // .ld_trans_id_i ( ld_trans_id ), - // .ld_result_i ( ld_result ), - // .ld_ex_i ( ld_ex ), + lsu_arbiter i_lsu_arbiter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_i ), + .ld_valid_i ( ld_valid ), + .ld_trans_id_i ( ld_trans_id ), + .ld_result_i ( ld_result ), + .ld_ex_i ( ld_ex ), - // .st_valid_i ( st_valid ), - // .st_trans_id_i ( st_trans_id ), - // .st_result_i ( st_result ), - // .st_ex_i ( st_ex ), + .st_valid_i ( st_valid ), + .st_trans_id_i ( st_trans_id ), + .st_result_i ( st_result ), + .st_ex_i ( st_ex ), - // .valid_o ( lsu_valid_o ), - // .trans_id_o ( lsu_trans_id_o ), - // .result_o ( lsu_result_o ), - // .ex_o ( lsu_exception_o ) - // ); - - assign ld_valid_o = ld_valid; - assign ld_trans_id_o = ld_trans_id; - assign ld_result_o = ld_result; - assign ld_exception_o = ld_ex; - - assign st_valid_o = st_valid; - assign st_trans_id_o = st_trans_id; - assign st_result_o = st_result; - assign st_exception_o = st_ex; + .valid_o ( lsu_valid_o ), + .trans_id_o ( lsu_trans_id_o ), + .result_o ( lsu_result_o ), + .ex_o ( lsu_exception_o ) + ); // determine whether this is a load or store always_comb begin : which_op @@ -614,118 +599,3 @@ module lsu_bypass ( end endmodule -// Author: Florian Zaruba, ETH Zurich -// Date: 22.05.2017 -// Description: Arbitrates the LSU result port -module lsu_arbiter ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic flush_i, - // Load Port - input logic ld_valid_i, - input logic [TRANS_ID_BITS-1:0] ld_trans_id_i, - input logic [63:0] ld_result_i, - input exception_t ld_ex_i, - // Store Port - input logic st_valid_i, - input logic [TRANS_ID_BITS-1:0] st_trans_id_i, - input logic [63:0] st_result_i, - input exception_t st_ex_i, - // Output Port - output logic valid_o, - output logic [TRANS_ID_BITS-1:0] trans_id_o, - output logic [63:0] result_o, - output exception_t ex_o -); - // this is a dual input FIFO which takes results from the load and store - // paths of the LSU and sequentializes through the FIFO construct. If there is a valid output - // it unconditionally posts the result on its output ports and expects it to be consumed. - - // 4 entries is enough to unconditionally post loads and stores since we can only have two outstanding loads - localparam int WIDTH = 4; - - // queue pointer - logic [$clog2(WIDTH)-1:0] read_pointer_n, read_pointer_q; - logic [$clog2(WIDTH)-1:0] write_pointer_n, write_pointer_q; - logic [$clog2(WIDTH)-1:0] status_cnt_n, status_cnt_q; - - struct packed { - logic [TRANS_ID_BITS-1:0] trans_id; - logic [63:0] result; - exception_t ex; - } mem_n[WIDTH-1:0], mem_q[WIDTH-1:0]; - - // output last element of queue - assign trans_id_o = mem_q[read_pointer_q].trans_id; - assign result_o = mem_q[read_pointer_q].result; - assign ex_o = mem_q[read_pointer_q].ex; - - // if we are not empty we have a valid output - assign valid_o = (status_cnt_q != '0); - // ------------------- - // Read-Write Process - // ------------------- - always_comb begin : read_write_fifo - automatic logic [$clog2(WIDTH)-1:0] status_cnt; - automatic logic [$clog2(WIDTH)-1:0] write_pointer; - - status_cnt = status_cnt_q; - write_pointer = write_pointer_q; - - // default assignments - mem_n = mem_q; - read_pointer_n = read_pointer_q; - // ------------ - // Write Port - // ------------ - // write port 1 - load unit - if (ld_valid_i) begin - mem_n[write_pointer] = {ld_trans_id_i, ld_result_i, ld_ex_i}; - write_pointer++; - status_cnt++; - end - // write port 2 - store unit - if (st_valid_i) begin - mem_n[write_pointer] = {st_trans_id_i, st_result_i, st_ex_i}; - write_pointer++; - status_cnt++; - end - // ------------ - // Read Port - // ------------ - // if the last element in the queue was valid we can push it out and make space for a new element - if (valid_o) begin - read_pointer_n = read_pointer_q + 1; - status_cnt--; - end - - // update status count - status_cnt_n = status_cnt; - // update write pointer - write_pointer_n = write_pointer; - - // ------------ - // Flush - // ------------ - if (flush_i) begin - status_cnt_n = '0; - write_pointer_n = '0; - read_pointer_n = '0; - end - end - // sequential process - always_ff @(posedge clk_i or negedge rst_ni) begin - if (~rst_ni) begin - mem_q <= '{default: 0}; - read_pointer_q <= '0; - write_pointer_q <= '0; - status_cnt_q <= '0; - end else begin - mem_q <= mem_n; - read_pointer_q <= read_pointer_n; - write_pointer_q <= write_pointer_n; - status_cnt_q <= status_cnt_n; - end - end - -endmodule diff --git a/src/lsu_arbiter.sv b/src/lsu_arbiter.sv new file mode 100644 index 000000000..1958e1d7c --- /dev/null +++ b/src/lsu_arbiter.sv @@ -0,0 +1,128 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 22.05.2017 +// Description: Arbitrates the LSU result port + +import ariane_pkg::*; + +module lsu_arbiter ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + // Load Port + input logic ld_valid_i, + input logic [TRANS_ID_BITS-1:0] ld_trans_id_i, + input logic [63:0] ld_result_i, + input exception_t ld_ex_i, + // Store Port + input logic st_valid_i, + input logic [TRANS_ID_BITS-1:0] st_trans_id_i, + input logic [63:0] st_result_i, + input exception_t st_ex_i, + // Output Port + output logic valid_o, + output logic [TRANS_ID_BITS-1:0] trans_id_o, + output logic [63:0] result_o, + output exception_t ex_o +); + // this is a dual input FIFO which takes results from the load and store + // paths of the LSU and sequentializes through the FIFO construct. If there is a valid output + // it unconditionally posts the result on its output ports and expects it to be consumed. + + // 4 entries is enough to unconditionally post loads and stores since we can only have two outstanding loads + localparam int WIDTH = 4; + + // queue pointer + logic [$clog2(WIDTH)-1:0] read_pointer_n, read_pointer_q; + logic [$clog2(WIDTH)-1:0] write_pointer_n, write_pointer_q; + logic [$clog2(WIDTH)-1:0] status_cnt_n, status_cnt_q; + + struct packed { + logic [TRANS_ID_BITS-1:0] trans_id; + logic [63:0] result; + exception_t ex; + } mem_n[WIDTH-1:0], mem_q[WIDTH-1:0]; + + // output last element of queue + assign trans_id_o = mem_q[read_pointer_q].trans_id; + assign result_o = mem_q[read_pointer_q].result; + assign ex_o = mem_q[read_pointer_q].ex; + + // if we are not empty we have a valid output + assign valid_o = (status_cnt_q != '0); + // ------------------- + // Read-Write Process + // ------------------- + always_comb begin : read_write_fifo + automatic logic [$clog2(WIDTH)-1:0] status_cnt; + automatic logic [$clog2(WIDTH)-1:0] write_pointer; + + status_cnt = status_cnt_q; + write_pointer = write_pointer_q; + + // default assignments + mem_n = mem_q; + read_pointer_n = read_pointer_q; + // ------------ + // Write Port + // ------------ + // write port 1 - load unit + if (ld_valid_i) begin + mem_n[write_pointer] = {ld_trans_id_i, ld_result_i, ld_ex_i}; + write_pointer++; + status_cnt++; + end + // write port 2 - store unit + if (st_valid_i) begin + mem_n[write_pointer] = {st_trans_id_i, st_result_i, st_ex_i}; + write_pointer++; + status_cnt++; + end + // ------------ + // Read Port + // ------------ + // if the last element in the queue was valid we can push it out and make space for a new element + if (valid_o) begin + read_pointer_n = read_pointer_q + 1; + status_cnt--; + end + + // update status count + status_cnt_n = status_cnt; + // update write pointer + write_pointer_n = write_pointer; + + // ------------ + // Flush + // ------------ + if (flush_i) begin + status_cnt_n = '0; + write_pointer_n = '0; + read_pointer_n = '0; + end + end + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mem_q <= '{default: 0}; + read_pointer_q <= '0; + write_pointer_q <= '0; + status_cnt_q <= '0; + end else begin + mem_q <= mem_n; + read_pointer_q <= read_pointer_n; + write_pointer_q <= write_pointer_n; + status_cnt_q <= status_cnt_n; + end + end + +endmodule