mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-04-20 04:07:36 -04:00
frontend: Clean-up instruction frontend
The instuction frontend has become an increasingly messy part an needed cleaning-up. The current solution contains 2 x 32 bit instruction data fifos and 1 x 64 bit address fifo. Hence, it should be significantly more area efficient that the previous one. The interface to `id_stage` is a ready/valid handshake. The credit based system has been replaced in favour of a replay mechanism as it was very brittle and overly pessimistic. Branch-prediction has been cleaned up: The front-end was also partially predicting on jumps, this could have potentially let to performance bugs if the branch detection wasn't correct in the frontend.
This commit is contained in:
parent
90b76d3e4f
commit
830540b757
14 changed files with 1297 additions and 896 deletions
3
Makefile
3
Makefile
|
@ -146,6 +146,7 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \
|
|||
src/axi/src/axi_delayer.sv \
|
||||
src/axi/src/axi_to_axi_lite.sv \
|
||||
src/fpga-support/rtl/SyncSpRamBeNx64.sv \
|
||||
src/common_cells/src/unread.sv \
|
||||
src/common_cells/src/sync.sv \
|
||||
src/common_cells/src/cdc_2phase.sv \
|
||||
src/common_cells/src/spill_register.sv \
|
||||
|
@ -157,6 +158,7 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \
|
|||
src/common_cells/src/deprecated/fifo_v2.sv \
|
||||
src/common_cells/src/fifo_v3.sv \
|
||||
src/common_cells/src/lzc.sv \
|
||||
src/common_cells/src/popcount.sv \
|
||||
src/common_cells/src/rr_arb_tree.sv \
|
||||
src/common_cells/src/deprecated/rrarbiter.sv \
|
||||
src/common_cells/src/stream_delay.sv \
|
||||
|
@ -361,7 +363,6 @@ verilate_command := $(verilator)
|
|||
-Wno-UNOPTFLAT \
|
||||
-Wno-style \
|
||||
$(if $(PROFILE),--stats --stats-vars --profile-cfuncs,) \
|
||||
-Wno-lint \
|
||||
$(if $(DEBUG),--trace --trace-structs,) \
|
||||
-LDFLAGS "-L$(RISCV)/lib -Wl,-rpath,$(RISCV)/lib -lfesvr$(if $(PROFILE), -g -pg,)" \
|
||||
-CFLAGS "$(CFLAGS)$(if $(PROFILE), -g -pg,)" -Wall --cc --vpi \
|
||||
|
|
|
@ -34,6 +34,9 @@ package ariane_pkg;
|
|||
localparam NrMaxRules = 16;
|
||||
|
||||
typedef struct packed {
|
||||
int RASDepth;
|
||||
int BTBEntries;
|
||||
int BHTEntries;
|
||||
// PMAs
|
||||
int NrNonIdempotentRules; // Number of non idempotent rules
|
||||
logic [NrMaxRules-1:0][63:0] NonIdempotentAddrBase; // base which needs to match
|
||||
|
@ -52,6 +55,9 @@ package ariane_pkg;
|
|||
} ariane_cfg_t;
|
||||
|
||||
localparam ariane_cfg_t ArianeDefaultConfig = '{
|
||||
RASDepth: 2,
|
||||
BTBEntries: 32,
|
||||
BHTEntries: 128,
|
||||
// idempotent region
|
||||
NrNonIdempotentRules: 2,
|
||||
NonIdempotentAddrBase: {64'b0, 64'b0},
|
||||
|
@ -75,6 +81,9 @@ package ariane_pkg;
|
|||
function automatic void check_cfg (ariane_cfg_t Cfg);
|
||||
// pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
assert(Cfg.RASDepth > 0);
|
||||
assert(2**$clog2(Cfg.BTBEntries) == Cfg.BTBEntries);
|
||||
assert(2**$clog2(Cfg.BHTEntries) == Cfg.BHTEntries);
|
||||
assert(Cfg.NrNonIdempotentRules <= NrMaxRules);
|
||||
assert(Cfg.NrExecuteRegionRules <= NrMaxRules);
|
||||
assert(Cfg.NrCachedRegionRules <= NrMaxRules);
|
||||
|
@ -131,9 +140,6 @@ package ariane_pkg;
|
|||
localparam TRANS_ID_BITS = $clog2(NR_SB_ENTRIES); // depending on the number of scoreboard entries we need that many bits
|
||||
// to uniquely identify the entry in the scoreboard
|
||||
localparam ASID_WIDTH = 1;
|
||||
localparam BTB_ENTRIES = 64;
|
||||
localparam BHT_ENTRIES = 128;
|
||||
localparam RAS_DEPTH = 2;
|
||||
localparam BITS_SATURATION_COUNTER = 2;
|
||||
localparam NR_COMMIT_PORTS = 2;
|
||||
|
||||
|
@ -142,8 +148,8 @@ package ariane_pkg;
|
|||
localparam ISSUE_WIDTH = 1;
|
||||
// amount of pipeline registers inserted for load/store return path
|
||||
// this can be tuned to trade-off IPC vs. cycle time
|
||||
localparam NR_LOAD_PIPE_REGS = 1;
|
||||
localparam NR_STORE_PIPE_REGS = 0;
|
||||
localparam int unsigned NR_LOAD_PIPE_REGS = 1;
|
||||
localparam int unsigned NR_STORE_PIPE_REGS = 0;
|
||||
|
||||
// depth of store-buffers, this needs to be a power of two
|
||||
localparam int unsigned DEPTH_SPEC = 4;
|
||||
|
@ -281,7 +287,7 @@ package ariane_pkg;
|
|||
// ---------------
|
||||
|
||||
// leave as is (fails with >8 entries and wider fetch width)
|
||||
localparam int unsigned FETCH_FIFO_DEPTH = 8;
|
||||
localparam int unsigned FETCH_FIFO_DEPTH = 4;
|
||||
localparam int unsigned FETCH_WIDTH = 32;
|
||||
// maximum instructions we can fetch on one request (we support compressed instructions)
|
||||
localparam int unsigned INSTR_PER_FETCH = FETCH_WIDTH / 16;
|
||||
|
@ -295,18 +301,24 @@ package ariane_pkg;
|
|||
logic valid;
|
||||
} exception_t;
|
||||
|
||||
typedef enum logic [1:0] { BHT, BTB, RAS } cf_t;
|
||||
typedef enum logic [2:0] {
|
||||
NoCF, // No control flow prediction
|
||||
Branch, // Branch
|
||||
Jump, // Jump to address from immediate
|
||||
JumpR, // Jump to address from registers
|
||||
Return // Return Address Prediction
|
||||
} cf_t;
|
||||
|
||||
// branch-predict
|
||||
// this is the struct we get back from ex stage and we will use it to update
|
||||
// all the necessary data structures
|
||||
// bp_resolve_t
|
||||
typedef struct packed {
|
||||
logic valid; // prediction with all its values is valid
|
||||
logic [63:0] pc; // pc of predict or mis-predict
|
||||
logic [63:0] pc; // PC of predict or mis-predict
|
||||
logic [63:0] target_address; // target address at which to jump, or not
|
||||
logic is_mispredict; // set if this was a mis-predict
|
||||
logic is_taken; // branch is taken
|
||||
// in the lower 16 bit of the word
|
||||
cf_t cf_type; // Type of control flow change
|
||||
} bp_resolve_t;
|
||||
|
||||
|
@ -314,11 +326,8 @@ package ariane_pkg;
|
|||
// this is the struct which we will inject into the pipeline to guide the various
|
||||
// units towards the correct branch decision and resolve
|
||||
typedef struct packed {
|
||||
logic valid; // this is a valid hint
|
||||
cf_t cf; // type of control flow prediction
|
||||
logic [63:0] predict_address; // target address at which to jump, or not
|
||||
logic predict_taken; // branch is taken
|
||||
// in the lower 16 bit of the word
|
||||
cf_t cf_type; // Type of control flow change
|
||||
} branchpredict_sbe_t;
|
||||
|
||||
typedef struct packed {
|
||||
|
@ -340,14 +349,12 @@ package ariane_pkg;
|
|||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [63:0] pc; // update at PC
|
||||
logic mispredict;
|
||||
logic taken;
|
||||
} bht_update_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic taken;
|
||||
logic strongly_taken;
|
||||
} bht_prediction_t;
|
||||
|
||||
typedef enum logic[3:0] {
|
||||
|
@ -444,7 +451,7 @@ package ariane_pkg;
|
|||
// comparisons
|
||||
LTS, LTU, GES, GEU, EQ, NE,
|
||||
// jumps
|
||||
JALR,
|
||||
JALR, BRANCH,
|
||||
// set lower than operations
|
||||
SLTS, SLTU,
|
||||
// CSR functions
|
||||
|
@ -482,6 +489,13 @@ package ariane_pkg;
|
|||
logic [TRANS_ID_BITS-1:0] trans_id;
|
||||
} fu_data_t;
|
||||
|
||||
function automatic logic is_branch (input fu_op op);
|
||||
unique case (op) inside
|
||||
EQ, NE, LTS, GES, LTU, GEU: return 1'b1;
|
||||
default : return 1'b0; // all other ops
|
||||
endcase
|
||||
endfunction;
|
||||
|
||||
// -------------------------------
|
||||
// Extract Src/Dst FP Reg from Op
|
||||
// -------------------------------
|
||||
|
@ -570,14 +584,6 @@ package ariane_pkg;
|
|||
// ---------------
|
||||
// IF/ID Stage
|
||||
// ---------------
|
||||
typedef struct packed {
|
||||
logic [63:0] address; // the address of the instructions from below
|
||||
logic [FETCH_WIDTH-1:0] instruction; // instruction word
|
||||
branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path
|
||||
logic [INSTR_PER_FETCH-1:0] bp_taken; // at which instruction is this branch taken?
|
||||
logic page_fault; // an instruction page fault happened
|
||||
} frontend_fetch_t;
|
||||
|
||||
// store the decompressed instruction
|
||||
typedef struct packed {
|
||||
logic [63:0] address; // the address of the instructions from below
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright 2017-2019 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
|
@ -59,9 +59,9 @@ module ariane #(
|
|||
// --------------
|
||||
// IF <-> ID
|
||||
// --------------
|
||||
frontend_fetch_t fetch_entry_if_id;
|
||||
fetch_entry_t fetch_entry_if_id;
|
||||
logic fetch_valid_if_id;
|
||||
logic decode_ack_id_if;
|
||||
logic fetch_ready_id_if;
|
||||
|
||||
// --------------
|
||||
// ID <-> ISSUE
|
||||
|
@ -220,7 +220,7 @@ module ariane #(
|
|||
// Frontend
|
||||
// --------------
|
||||
frontend #(
|
||||
.DmBaseAddress ( ArianeCfg.DmBaseAddress )
|
||||
.ArianeCfg ( ArianeCfg )
|
||||
) i_frontend (
|
||||
.flush_i ( flush_ctrl_if ), // not entirely correct
|
||||
.flush_bp_i ( 1'b0 ),
|
||||
|
@ -238,7 +238,7 @@ module ariane #(
|
|||
.ex_valid_i ( ex_commit.valid ),
|
||||
.fetch_entry_o ( fetch_entry_if_id ),
|
||||
.fetch_entry_valid_o ( fetch_valid_if_id ),
|
||||
.fetch_ack_i ( decode_ack_id_if ),
|
||||
.fetch_entry_ready_i ( fetch_ready_id_if ),
|
||||
.*
|
||||
);
|
||||
|
||||
|
@ -246,11 +246,14 @@ module ariane #(
|
|||
// ID
|
||||
// ---------
|
||||
id_stage id_stage_i (
|
||||
.debug_req_i,
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i ( flush_ctrl_if ),
|
||||
.debug_req_i,
|
||||
|
||||
.fetch_entry_i ( fetch_entry_if_id ),
|
||||
.fetch_entry_valid_i ( fetch_valid_if_id ),
|
||||
.decoded_instr_ack_o ( decode_ack_id_if ),
|
||||
.fetch_entry_ready_o ( fetch_ready_id_if ),
|
||||
|
||||
.issue_entry_o ( issue_entry_id_issue ),
|
||||
.issue_entry_valid_o ( issue_entry_valid_id_issue ),
|
||||
|
@ -260,13 +263,12 @@ module ariane #(
|
|||
.priv_lvl_i ( priv_lvl ),
|
||||
.fs_i ( fs ),
|
||||
.frm_i ( frm_csr_id_issue_ex ),
|
||||
.irq_i ( irq_i ),
|
||||
.irq_ctrl_i ( irq_ctrl_csr_id ),
|
||||
.debug_mode_i ( debug_mode ),
|
||||
.tvm_i ( tvm_csr_id ),
|
||||
.tw_i ( tw_csr_id ),
|
||||
.tsr_i ( tsr_csr_id ),
|
||||
.irq_i ( irq_i ),
|
||||
.irq_ctrl_i ( irq_ctrl_csr_id ),
|
||||
.*
|
||||
.tsr_i ( tsr_csr_id )
|
||||
);
|
||||
|
||||
// ---------
|
||||
|
@ -334,6 +336,7 @@ module ariane #(
|
|||
) ex_stage_i (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.debug_mode_i ( debug_mode ),
|
||||
.flush_i ( flush_ctrl_ex ),
|
||||
.fu_data_i ( fu_data_id_ex ),
|
||||
.pc_i ( pc_id_ex ),
|
||||
|
@ -708,9 +711,9 @@ module ariane #(
|
|||
assign tracer_if.flush_unissued = flush_unissued_instr_ctrl_id;
|
||||
assign tracer_if.flush = flush_ctrl_ex;
|
||||
// fetch
|
||||
assign tracer_if.instruction = id_stage_i.instr_realigner_i.fetch_entry_o.instruction;
|
||||
assign tracer_if.fetch_valid = id_stage_i.instr_realigner_i.fetch_entry_valid_o;
|
||||
assign tracer_if.fetch_ack = id_stage_i.instr_realigner_i.fetch_ack_i;
|
||||
assign tracer_if.instruction = id_stage_i.fetch_entry_i.instruction;
|
||||
assign tracer_if.fetch_valid = id_stage_i.fetch_entry_valid_i;
|
||||
assign tracer_if.fetch_ack = id_stage_i.fetch_entry_ready_o;
|
||||
// Issue
|
||||
assign tracer_if.issue_ack = issue_stage_i.i_scoreboard.issue_ack_i;
|
||||
assign tracer_if.issue_sbe = issue_stage_i.i_scoreboard.issue_instr_o;
|
||||
|
|
|
@ -12,10 +12,11 @@
|
|||
// Date: 09.05.2017
|
||||
// Description: Branch target calculation and comparison
|
||||
|
||||
import ariane_pkg::*;
|
||||
|
||||
module branch_unit (
|
||||
input fu_data_t fu_data_i,
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic debug_mode_i,
|
||||
input ariane_pkg::fu_data_t fu_data_i,
|
||||
input logic [63:0] pc_i, // PC of instruction
|
||||
input logic is_compressed_instr_i,
|
||||
input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict
|
||||
|
@ -23,83 +24,62 @@ module branch_unit (
|
|||
input logic branch_comp_res_i, // branch comparison result from ALU
|
||||
output logic [63:0] branch_result_o,
|
||||
|
||||
input branchpredict_sbe_t branch_predict_i, // this is the address we predicted
|
||||
output bp_resolve_t resolved_branch_o, // this is the actual address we are targeting
|
||||
input ariane_pkg::branchpredict_sbe_t branch_predict_i, // this is the address we predicted
|
||||
output ariane_pkg::bp_resolve_t resolved_branch_o, // this is the actual address we are targeting
|
||||
output logic resolve_branch_o, // to ID to clear that we resolved the branch and we can
|
||||
// accept new entries to the scoreboard
|
||||
output exception_t branch_exception_o // branch exception out
|
||||
output ariane_pkg::exception_t branch_exception_o // branch exception out
|
||||
);
|
||||
logic [63:0] target_address;
|
||||
logic [63:0] next_pc;
|
||||
|
||||
// here we handle the various possibilities of mis-predicts
|
||||
// here we handle the various possibilities of mis-predicts
|
||||
always_comb begin : mispredict_handler
|
||||
// set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC
|
||||
automatic logic [63:0] jump_base;
|
||||
jump_base = (fu_data_i.operator == JALR) ? fu_data_i.operand_a : pc_i;
|
||||
// TODO(zarubaf): The ALU can be used to calculate the branch target
|
||||
jump_base = (fu_data_i.operator == ariane_pkg::JALR) ? fu_data_i.operand_a : pc_i;
|
||||
|
||||
target_address = 64'b0;
|
||||
resolve_branch_o = 1'b0;
|
||||
resolved_branch_o.target_address = 64'b0;
|
||||
resolved_branch_o.is_taken = 1'b0;
|
||||
resolved_branch_o.valid = branch_valid_i;
|
||||
resolved_branch_o.is_mispredict = 1'b0;
|
||||
resolved_branch_o.cf_type = branch_predict_i.cf_type;
|
||||
resolved_branch_o.cf_type = branch_predict_i.cf;
|
||||
// calculate next PC, depending on whether the instruction is compressed or not this may be different
|
||||
// TODO(zarubaf): We already calculate this a couple of times, maybe re-use?
|
||||
next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4);
|
||||
// calculate target address simple 64 bit addition
|
||||
target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm));
|
||||
// on a JALR we are supposed to reset the LSB to 0 (according to the specification)
|
||||
if (fu_data_i.operator == JALR)
|
||||
target_address[0] = 1'b0;
|
||||
// if we need to put the branch target address in a destination register, output it here to WB
|
||||
if (fu_data_i.operator == ariane_pkg::JALR) target_address[0] = 1'b0;
|
||||
// we need to put the branch target address into rd, this is the result of this unit
|
||||
branch_result_o = next_pc;
|
||||
|
||||
// save PC - we need this to get the target row in the branch target buffer
|
||||
// we play this trick with the branch instruction which wraps a word boundary:
|
||||
// /---------- Place the prediction on this PC
|
||||
// \/
|
||||
// ____________________________________________________
|
||||
// |branch [15:0] | branch[31:16] | compressed 1[15:0] |
|
||||
// |____________________________________________________
|
||||
// This will relief the pre-fetcher to re-fetch partially fetched unaligned branch instructions e.g.:
|
||||
// we don't have a back arch between the pre-fetcher and decoder/instruction FIFO.
|
||||
resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : ({pc_i[63:2], 2'b0} + 64'h4);
|
||||
|
||||
resolved_branch_o.pc = pc_i;
|
||||
// There are only two sources of mispredicts:
|
||||
// 1. Branches
|
||||
// 2. Jumps to register addresses
|
||||
if (branch_valid_i) begin
|
||||
// write target address which goes to pc gen
|
||||
// write target address which goes to PC Gen
|
||||
resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc;
|
||||
resolved_branch_o.is_taken = branch_comp_res_i;
|
||||
// we've detected a branch in ID with the following parameters
|
||||
// we mis-predicted e.g.: the predicted address is unequal to the actual address
|
||||
if (target_address[0] == 1'b0) begin
|
||||
// we've got a valid branch prediction
|
||||
if (branch_predict_i.valid) begin
|
||||
// if the outcome doesn't match we've got a mis-predict
|
||||
if (branch_predict_i.predict_taken != branch_comp_res_i) begin
|
||||
resolved_branch_o.is_mispredict = 1'b1;
|
||||
end
|
||||
// check if the address of the predict taken branch is correct
|
||||
if (branch_predict_i.predict_taken && target_address != branch_predict_i.predict_address) begin
|
||||
resolved_branch_o.is_mispredict = 1'b1;
|
||||
end
|
||||
// branch-prediction didn't do anything (e.g.: it fetched PC + 2/4), so if this branch is taken
|
||||
// we also have a mis-predict
|
||||
end else begin
|
||||
if (branch_comp_res_i) begin
|
||||
resolved_branch_o.is_mispredict = 1'b1;
|
||||
end
|
||||
end
|
||||
resolved_branch_o.is_taken = branch_comp_res_i;
|
||||
// check the outcome of the branch speculation
|
||||
if (ariane_pkg::is_branch(fu_data_i.operator) && branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch)) begin
|
||||
// we mis-predicted the outcome
|
||||
// if the outcome doesn't match we've got a mis-predict
|
||||
resolved_branch_o.is_mispredict = 1'b1;
|
||||
resolved_branch_o.cf_type = ariane_pkg::Branch;
|
||||
end
|
||||
if (fu_data_i.operator == ariane_pkg::JALR
|
||||
// check if the address of the jump register is correct and that we actually predicted
|
||||
&& (branch_predict_i.cf == ariane_pkg::NoCF || target_address != branch_predict_i.predict_address)) begin
|
||||
resolved_branch_o.is_mispredict = 1'b1;
|
||||
// update BTB only if this wasn't a return
|
||||
if (branch_predict_i.cf != ariane_pkg::Return) resolved_branch_o.cf_type = ariane_pkg::JumpR;
|
||||
end
|
||||
// to resolve the branch in ID
|
||||
resolve_branch_o = 1'b1;
|
||||
// the other case would be that this instruction was no branch but branch prediction thought that it was one
|
||||
// this is essentially also a mis-predict
|
||||
end else if (fu_valid_i && branch_predict_i.valid && branch_predict_i.predict_taken) begin
|
||||
// re-set the branch to the next PC
|
||||
resolved_branch_o.is_mispredict = 1'b1;
|
||||
resolved_branch_o.target_address = next_pc;
|
||||
resolved_branch_o.valid = 1'b1;
|
||||
resolve_branch_o = 1'b1;
|
||||
end
|
||||
end
|
||||
// use ALU exception signal for storing instruction fetch exceptions if
|
||||
|
@ -109,7 +89,6 @@ module branch_unit (
|
|||
branch_exception_o.valid = 1'b0;
|
||||
branch_exception_o.tval = pc_i;
|
||||
// only throw exception if this is indeed a branch
|
||||
if (branch_valid_i && target_address[0] != 1'b0)
|
||||
branch_exception_o.valid = 1'b1;
|
||||
if (branch_valid_i && target_address[0] != 1'b0) branch_exception_o.valid = 1'b1;
|
||||
end
|
||||
endmodule
|
||||
|
|
|
@ -21,6 +21,7 @@ module ex_stage #(
|
|||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i,
|
||||
input logic debug_mode_i,
|
||||
|
||||
input fu_data_t fu_data_i,
|
||||
input logic [63:0] pc_i, // PC of current instruction
|
||||
|
@ -143,6 +144,9 @@ module ex_stage #(
|
|||
// we don't silence the branch unit as this is already critical and we do
|
||||
// not want to add another layer of logic
|
||||
branch_unit branch_unit_i (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.debug_mode_i,
|
||||
.fu_data_i,
|
||||
.pc_i,
|
||||
.is_compressed_instr_i,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//Copyright (C) 2018 to present,
|
||||
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 2.0 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
|
@ -6,7 +6,8 @@
|
|||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.//
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 08.02.2018
|
||||
// Migrated: Luis Vitorio Cargnini, IEEE
|
||||
|
@ -20,65 +21,81 @@ module bht #(
|
|||
input logic rst_ni,
|
||||
input logic flush_i,
|
||||
input logic debug_mode_i,
|
||||
|
||||
input logic [63:0] vpc_i,
|
||||
input ariane_pkg::bht_update_t bht_update_i,
|
||||
output ariane_pkg::bht_prediction_t bht_prediction_o
|
||||
// we potentially need INSTR_PER_FETCH predictions/cycle
|
||||
output ariane_pkg::bht_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_prediction_o
|
||||
);
|
||||
localparam OFFSET = 2; // we are using compressed instructions so do not use the lower 2 bits for prediction
|
||||
localparam ANTIALIAS_BITS = 8;
|
||||
// the last bit is always zero, we don't need it for indexing
|
||||
localparam OFFSET = 1;
|
||||
// re-shape the branch history table
|
||||
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
|
||||
// number of bits needed to index the row
|
||||
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
|
||||
// number of bits we should use for prediction
|
||||
localparam PREDICTION_BITS = $clog2(NR_ENTRIES) + OFFSET;
|
||||
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
|
||||
// we are not interested in all bits of the address
|
||||
unread i_unread (.d_i(|vpc_i));
|
||||
|
||||
struct packed {
|
||||
logic valid;
|
||||
logic [1:0] saturation_counter;
|
||||
} bht_d[NR_ENTRIES-1:0], bht_q[NR_ENTRIES-1:0];
|
||||
} bht_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0], bht_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
|
||||
|
||||
logic [$clog2(NR_ENTRIES)-1:0] index, update_pc;
|
||||
logic [1:0] saturation_counter;
|
||||
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
|
||||
logic [ROW_ADDR_BITS-1:0] update_row_index;
|
||||
logic [1:0] saturation_counter;
|
||||
|
||||
assign index = vpc_i[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET];
|
||||
assign update_pc = bht_update_i.pc[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET];
|
||||
assign update_row_index = bht_update_i.pc[ROW_ADDR_BITS + OFFSET - 1:OFFSET];
|
||||
|
||||
assign index = vpc_i[PREDICTION_BITS - 1:OFFSET];
|
||||
assign update_pc = bht_update_i.pc[PREDICTION_BITS - 1:OFFSET];
|
||||
// prediction assignment
|
||||
assign bht_prediction_o.valid = bht_q[index].valid;
|
||||
assign bht_prediction_o.taken = bht_q[index].saturation_counter == 2'b10;
|
||||
assign bht_prediction_o.strongly_taken = (bht_q[index].saturation_counter == 2'b11);
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_output
|
||||
assign bht_prediction_o[i].valid = bht_q[index][i].valid;
|
||||
assign bht_prediction_o[i].taken = bht_q[index][i].saturation_counter[1] == 1'b1;
|
||||
end
|
||||
|
||||
always_comb begin : update_bht
|
||||
bht_d = bht_q;
|
||||
saturation_counter = bht_q[update_pc].saturation_counter;
|
||||
saturation_counter = bht_q[update_pc][update_row_index].saturation_counter;
|
||||
|
||||
if (bht_update_i.valid && !debug_mode_i) begin
|
||||
bht_d[update_pc].valid = 1'b1;
|
||||
bht_d[update_pc][update_row_index].valid = 1'b1;
|
||||
|
||||
if (saturation_counter == 2'b11) begin
|
||||
// we can safely decrease it
|
||||
if (~bht_update_i.taken)
|
||||
bht_d[update_pc].saturation_counter = saturation_counter - 1;
|
||||
if (!bht_update_i.taken)
|
||||
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
|
||||
// then check if it saturated in the negative regime e.g.: branch not taken
|
||||
end else if (saturation_counter == 2'b00) begin
|
||||
// we can safely increase it
|
||||
if (bht_update_i.taken)
|
||||
bht_d[update_pc].saturation_counter = saturation_counter + 1;
|
||||
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
|
||||
end else begin // otherwise we are not in any boundaries and can decrease or increase it
|
||||
if (bht_update_i.taken)
|
||||
bht_d[update_pc].saturation_counter = saturation_counter + 1;
|
||||
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
|
||||
else
|
||||
bht_d[update_pc].saturation_counter = saturation_counter - 1;
|
||||
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
for (int unsigned i = 0; i < NR_ENTRIES; i++)
|
||||
bht_q[i] <= '0;
|
||||
if (!rst_ni) begin
|
||||
for (int unsigned i = 0; i < NR_ENTRIES; i++) begin
|
||||
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
|
||||
bht_q[i][j] <= '0;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
// evict all entries
|
||||
if (flush_i) begin
|
||||
for (int i = 0; i < NR_ENTRIES; i++) begin
|
||||
bht_q[i].valid <= 1'b0;
|
||||
bht_q[i].saturation_counter <= 2'b10;
|
||||
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
|
||||
bht_q[i][j].valid <= 1'b0;
|
||||
bht_q[i][j].saturation_counter <= 2'b10;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
bht_q <= bht_d;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//Copyright (C) 2018 to present,
|
||||
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 2.0 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
|
@ -13,10 +13,6 @@
|
|||
// Migrated: Luis Vitorio Cargnini, IEEE
|
||||
// Date: 09.06.2018
|
||||
|
||||
// ------------------------------
|
||||
// Branch Prediction
|
||||
// ------------------------------
|
||||
|
||||
// branch target buffer
|
||||
module btb #(
|
||||
parameter int NR_ENTRIES = 8
|
||||
|
@ -28,23 +24,36 @@ module btb #(
|
|||
|
||||
input logic [63:0] vpc_i, // virtual PC from IF stage
|
||||
input ariane_pkg::btb_update_t btb_update_i, // update btb with this information
|
||||
output ariane_pkg::btb_prediction_t btb_prediction_o // prediction from btb
|
||||
output ariane_pkg::btb_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] btb_prediction_o // prediction from btb
|
||||
);
|
||||
// number of bits which are not used for indexing
|
||||
localparam OFFSET = 1; // we are using compressed instructions so do use the lower 2 bits for prediction
|
||||
localparam ANTIALIAS_BITS = 8;
|
||||
// the last bit is always zero, we don't need it for indexing
|
||||
localparam OFFSET = 1;
|
||||
// re-shape the branch history table
|
||||
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
|
||||
// number of bits needed to index the row
|
||||
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
|
||||
// number of bits we should use for prediction
|
||||
localparam PREDICTION_BITS = $clog2(NR_ENTRIES) + OFFSET;
|
||||
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
|
||||
// prevent aliasing to degrade performance
|
||||
localparam ANTIALIAS_BITS = 8;
|
||||
// we are not interested in all bits of the address
|
||||
unread i_unread (.d_i(|vpc_i));
|
||||
|
||||
// typedef for all branch target entries
|
||||
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
|
||||
ariane_pkg::btb_prediction_t btb_d [NR_ENTRIES-1:0], btb_q [NR_ENTRIES-1:0];
|
||||
logic [$clog2(NR_ENTRIES)-1:0] index, update_pc;
|
||||
ariane_pkg::btb_prediction_t btb_d [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
|
||||
btb_q [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
|
||||
logic [$clog2(NR_ROWS)-1:0] index, update_pc;
|
||||
logic [ROW_ADDR_BITS-1:0] update_row_index;
|
||||
|
||||
assign index = vpc_i[PREDICTION_BITS - 1:OFFSET];
|
||||
assign update_pc = btb_update_i.pc[PREDICTION_BITS - 1:OFFSET];
|
||||
assign index = vpc_i[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET];
|
||||
assign update_pc = btb_update_i.pc[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET];
|
||||
assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS + OFFSET - 1:OFFSET];
|
||||
|
||||
// output matching prediction
|
||||
assign btb_prediction_o = btb_q[index];
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
|
||||
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
|
||||
end
|
||||
|
||||
// -------------------------
|
||||
// Update Branch Prediction
|
||||
|
@ -54,23 +63,25 @@ module btb #(
|
|||
btb_d = btb_q;
|
||||
|
||||
if (btb_update_i.valid && !debug_mode_i) begin
|
||||
btb_d[update_pc].valid = 1'b1;
|
||||
btb_d[update_pc][update_row_index].valid = 1'b1;
|
||||
// the target address is simply updated
|
||||
btb_d[update_pc].target_address = btb_update_i.target_address;
|
||||
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
|
||||
end
|
||||
end
|
||||
|
||||
// sequential process
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
// Bias the branches to be taken upon first arrival
|
||||
for (int i = 0; i < NR_ENTRIES; i++)
|
||||
for (int i = 0; i < NR_ROWS; i++)
|
||||
btb_q[i] <= '{default: 0};
|
||||
end else begin
|
||||
// evict all entries
|
||||
if (flush_i) begin
|
||||
for (int i = 0; i < NR_ENTRIES; i++) begin
|
||||
btb_q[i].valid <= 1'b0;
|
||||
for (int i = 0; i < NR_ROWS; i++) begin
|
||||
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
|
||||
btb_q[i][j].valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
btb_q <= btb_d;
|
||||
|
|
|
@ -11,61 +11,66 @@
|
|||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 08.02.2018
|
||||
// Description: Ariane Instruction Fetch Frontend
|
||||
|
||||
|
||||
//
|
||||
// This module interfaces with the instruction cache, handles control
|
||||
// change request from the back-end and does branch prediction.
|
||||
import ariane_pkg::*;
|
||||
|
||||
module frontend #(
|
||||
parameter logic [63:0] DmBaseAddress = 64'h0 // debug module base address
|
||||
parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig
|
||||
) (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i, // flush request for PCGEN
|
||||
input logic flush_bp_i, // flush branch prediction
|
||||
input logic debug_mode_i,
|
||||
// global input
|
||||
input logic [63:0] boot_addr_i,
|
||||
// Set a new PC
|
||||
// mispredict
|
||||
input bp_resolve_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB
|
||||
// from commit, when flushing the whole pipeline
|
||||
input logic set_pc_commit_i, // Take the PC from commit stage
|
||||
input logic [63:0] pc_commit_i, // PC of instruction in commit stage
|
||||
// CSR input
|
||||
input logic [63:0] epc_i, // exception PC which we need to return to
|
||||
input logic eret_i, // return from exception
|
||||
input logic [63:0] trap_vector_base_i, // base of trap vector
|
||||
input logic ex_valid_i, // exception is valid - from commit
|
||||
input logic set_debug_pc_i, // jump to debug address
|
||||
// Instruction Fetch
|
||||
input icache_dreq_o_t icache_dreq_i,
|
||||
output icache_dreq_i_t icache_dreq_o,
|
||||
// instruction output port -> to processor back-end
|
||||
output frontend_fetch_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
|
||||
output logic fetch_entry_valid_o, // instruction in IF is valid
|
||||
input logic fetch_ack_i // ID acknowledged this instruction
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic flush_i, // flush request for PCGEN
|
||||
input logic flush_bp_i, // flush branch prediction
|
||||
input logic debug_mode_i,
|
||||
// global input
|
||||
input logic [63:0] boot_addr_i,
|
||||
// Set a new PC
|
||||
// mispredict
|
||||
input bp_resolve_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB
|
||||
// from commit, when flushing the whole pipeline
|
||||
input logic set_pc_commit_i, // Take the PC from commit stage
|
||||
input logic [63:0] pc_commit_i, // PC of instruction in commit stage
|
||||
// CSR input
|
||||
input logic [63:0] epc_i, // exception PC which we need to return to
|
||||
input logic eret_i, // return from exception
|
||||
input logic [63:0] trap_vector_base_i, // base of trap vector
|
||||
input logic ex_valid_i, // exception is valid - from commit
|
||||
input logic set_debug_pc_i, // jump to debug address
|
||||
// Instruction Fetch
|
||||
output icache_dreq_i_t icache_dreq_o,
|
||||
input icache_dreq_o_t icache_dreq_i,
|
||||
// instruction output port -> to processor back-end
|
||||
output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
|
||||
output logic fetch_entry_valid_o, // instruction in IF is valid
|
||||
input logic fetch_entry_ready_i // ID acknowledged this instruction
|
||||
);
|
||||
// Registers
|
||||
logic [31:0] icache_data_q;
|
||||
logic icache_valid_q;
|
||||
logic icache_ex_valid_q;
|
||||
logic instruction_valid;
|
||||
logic [INSTR_PER_FETCH-1:0] instr_is_compressed;
|
||||
|
||||
logic [63:0] icache_vaddr_q;
|
||||
// BHT, BTB and RAS prediction
|
||||
bht_prediction_t bht_prediction;
|
||||
btb_prediction_t btb_prediction;
|
||||
ras_t ras_predict;
|
||||
bht_update_t bht_update;
|
||||
btb_update_t btb_update;
|
||||
logic ras_push, ras_pop;
|
||||
logic [63:0] ras_update;
|
||||
|
||||
// Instruction Cache Registers, from I$
|
||||
logic [FETCH_WIDTH-1:0] icache_data_q;
|
||||
logic icache_valid_q;
|
||||
logic icache_ex_valid_q;
|
||||
logic [63:0] icache_vaddr_q;
|
||||
logic instr_queue_ready;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_consumed;
|
||||
// upper-most branch-prediction from last cycle
|
||||
btb_prediction_t btb_q;
|
||||
bht_prediction_t bht_q;
|
||||
// instruction fetch is ready
|
||||
logic if_ready;
|
||||
logic [63:0] npc_d, npc_q; // next PC
|
||||
logic npc_rst_load_q; //indicates whether we come out of reset (then we need to load boot_addr_i)
|
||||
|
||||
// indicates whether we come out of reset (then we need to load boot_addr_i)
|
||||
logic npc_rst_load_q;
|
||||
|
||||
logic replay;
|
||||
logic [63:0] replay_addr;
|
||||
|
||||
// shift amount
|
||||
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt;
|
||||
// address will always be 16 bit aligned, make this explicit here
|
||||
assign shamt = icache_dreq_i.vaddr[$clog2(ariane_pkg::INSTR_PER_FETCH):1];
|
||||
|
||||
// -----------------------
|
||||
// Ctrl Flow Speculation
|
||||
// -----------------------
|
||||
|
@ -74,209 +79,185 @@ module frontend #(
|
|||
rvi_jalr, rvi_jump;
|
||||
logic [INSTR_PER_FETCH-1:0][63:0] rvi_imm;
|
||||
// RVC branching
|
||||
logic [INSTR_PER_FETCH-1:0] is_rvc;
|
||||
logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return,
|
||||
rvc_jalr, rvc_call;
|
||||
logic [INSTR_PER_FETCH-1:0][63:0] rvc_imm;
|
||||
// re-aligned instruction and address (coming from cache - combinationally)
|
||||
logic [INSTR_PER_FETCH-1:0][31:0] instr;
|
||||
logic [INSTR_PER_FETCH-1:0][63:0] addr;
|
||||
logic [INSTR_PER_FETCH-1:0] instruction_valid;
|
||||
// BHT, BTB and RAS prediction
|
||||
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction;
|
||||
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction;
|
||||
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
|
||||
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
|
||||
ras_t ras_predict;
|
||||
|
||||
logic [63:0] bp_vaddr;
|
||||
logic bp_valid; // we have a valid branch-prediction
|
||||
logic is_mispredict;
|
||||
// branch-prediction which we inject into the pipeline
|
||||
branchpredict_sbe_t bp_sbe;
|
||||
// fetch fifo credit system
|
||||
logic fifo_valid, fifo_ready, fifo_empty, fifo_pop;
|
||||
logic s2_eff_kill, issue_req, s2_in_flight_d, s2_in_flight_q;
|
||||
logic [$clog2(FETCH_FIFO_DEPTH):0] fifo_credits_d;
|
||||
logic [$clog2(FETCH_FIFO_DEPTH):0] fifo_credits_q;
|
||||
// branch-predict update
|
||||
logic is_mispredict;
|
||||
logic ras_push, ras_pop;
|
||||
logic [63:0] ras_update;
|
||||
|
||||
// save the unaligned part of the instruction to this ff
|
||||
logic [15:0] unaligned_instr_d, unaligned_instr_q;
|
||||
// the last instruction was unaligned
|
||||
logic unaligned_d, unaligned_q;
|
||||
// register to save the unaligned address
|
||||
logic [63:0] unaligned_address_d, unaligned_address_q;
|
||||
// Instruction FIFO
|
||||
logic [63:0] predict_address;
|
||||
cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvi_cf;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf;
|
||||
|
||||
for (genvar i = 0; i < INSTR_PER_FETCH; i ++) begin
|
||||
// LSB != 2'b11
|
||||
assign instr_is_compressed[i] = ~&icache_data_q[i * 16 +: 2];
|
||||
logic serving_unaligned;
|
||||
// Re-align instructions
|
||||
instr_realign i_instr_realign (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.flush_i ( icache_dreq_o.kill_s2 ),
|
||||
.valid_i ( icache_valid_q ),
|
||||
.serving_unaligned_o ( serving_unaligned ),
|
||||
.address_i ( icache_vaddr_q ),
|
||||
.data_i ( icache_data_q ),
|
||||
.valid_o ( instruction_valid ),
|
||||
.addr_o ( addr ),
|
||||
.instr_o ( instr )
|
||||
);
|
||||
// --------------------
|
||||
// Branch Prediction
|
||||
// --------------------
|
||||
// select the right branch prediction result
|
||||
// in case we are serving an unaligned instruction in instr[0] we need to take
|
||||
// the prediction we saved from the previous fetch
|
||||
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[0];
|
||||
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[0];
|
||||
// for all other predictions we can use the generated address to index
|
||||
// into the branch prediction data structures
|
||||
for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address
|
||||
assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
|
||||
assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
|
||||
end
|
||||
// for the return address stack it doens't matter as we have the
|
||||
// address of the call/return already
|
||||
logic bp_valid;
|
||||
|
||||
logic [INSTR_PER_FETCH-1:0] is_branch;
|
||||
logic [INSTR_PER_FETCH-1:0] is_call;
|
||||
logic [INSTR_PER_FETCH-1:0] is_jump;
|
||||
logic [INSTR_PER_FETCH-1:0] is_return;
|
||||
logic [INSTR_PER_FETCH-1:0] is_jalr;
|
||||
|
||||
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
|
||||
// branch history table -> BHT
|
||||
assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]);
|
||||
// function calls -> RAS
|
||||
assign is_call[i] = instruction_valid[i] & (rvi_call[i] | rvc_call[i]);
|
||||
// function return -> RAS
|
||||
assign is_return[i] = instruction_valid[i] & (rvi_return[i] | rvc_return[i]);
|
||||
// unconditional jumps with known target -> immediately resolved
|
||||
assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]);
|
||||
// unconditional jumps with unknown target -> BTB
|
||||
assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & ~is_call[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
|
||||
end
|
||||
|
||||
// Soft-realignment to do branch-prediction
|
||||
always_comb begin : re_align
|
||||
unaligned_d = unaligned_q;
|
||||
unaligned_address_d = unaligned_address_q;
|
||||
unaligned_instr_d = unaligned_instr_q;
|
||||
instruction_valid = icache_valid_q;
|
||||
// taken/not taken
|
||||
always_comb begin
|
||||
taken_rvi_cf = '0;
|
||||
taken_rvc_cf = '0;
|
||||
predict_address = '0;
|
||||
|
||||
// 32-bit can contain 2 instructions
|
||||
instr[0] = icache_data_q;
|
||||
addr[0] = icache_vaddr_q;
|
||||
for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF;
|
||||
|
||||
instr[1] = '0;
|
||||
addr[1] = {icache_vaddr_q[63:2], 2'b10};
|
||||
ras_push = 1'b0;
|
||||
ras_pop = 1'b0;
|
||||
ras_update = '0;
|
||||
|
||||
if (icache_valid_q) begin
|
||||
// last instruction was unaligned
|
||||
if (unaligned_q) begin
|
||||
instr[0] = {icache_data_q[15:0], unaligned_instr_q};
|
||||
addr[0] = unaligned_address_q;
|
||||
|
||||
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
|
||||
unaligned_instr_d = icache_data_q[31:16]; // save the upper bits for next cycle
|
||||
|
||||
// check if this is instruction is still unaligned e.g.: it is not compressed
|
||||
// if its compressed re-set unaligned flag
|
||||
// for 32 bit we can simply check the next instruction and whether it is compressed or not
|
||||
// if it is compressed the next fetch will contain an aligned instruction
|
||||
if (instr_is_compressed[1]) begin
|
||||
unaligned_d = 1'b0;
|
||||
instr[1] = {16'b0, icache_data_q[31:16]};
|
||||
end
|
||||
end else if (instr_is_compressed[0]) begin // instruction zero is RVC
|
||||
// is instruction 1 also compressed
|
||||
// yes? -> no problem, no -> we've got an unaligned instruction
|
||||
if (instr_is_compressed[1]) begin
|
||||
instr[1] = {16'b0, icache_data_q[31:16]};
|
||||
end else begin
|
||||
unaligned_instr_d = icache_data_q[31:16];
|
||||
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
|
||||
unaligned_d = 1'b1;
|
||||
end
|
||||
end // else -> normal fetch
|
||||
end
|
||||
|
||||
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
|
||||
// received the next instruction
|
||||
if (icache_valid_q && icache_vaddr_q[1] && !instr_is_compressed[1]) begin
|
||||
instruction_valid = 1'b0;
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
|
||||
unaligned_instr_d = icache_data_q[31:16];
|
||||
end
|
||||
|
||||
// if we killed the consecutive fetch we are starting on a clean slate
|
||||
if (icache_dreq_o.kill_s2) begin
|
||||
unaligned_d = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
logic [INSTR_PER_FETCH:0] taken;
|
||||
// control front-end + branch-prediction
|
||||
always_comb begin : frontend_ctrl
|
||||
automatic logic take_rvi_cf; // take the control flow change (non-compressed)
|
||||
automatic logic take_rvc_cf; // take the control flow change (compressed)
|
||||
|
||||
take_rvi_cf = 1'b0;
|
||||
take_rvc_cf = 1'b0;
|
||||
ras_pop = 1'b0;
|
||||
ras_push = 1'b0;
|
||||
ras_update = '0;
|
||||
taken = '0;
|
||||
take_rvi_cf = 1'b0;
|
||||
|
||||
bp_vaddr = '0; // predicted address
|
||||
bp_valid = 1'b0; // prediction is valid
|
||||
|
||||
bp_sbe.cf_type = RAS;
|
||||
|
||||
// only predict if the response is valid
|
||||
if (instruction_valid) begin
|
||||
// look at instruction 0, 1, 2, ...
|
||||
for (int unsigned i = 0; i < INSTR_PER_FETCH; i++) begin
|
||||
// only speculate if the previous instruction was not taken
|
||||
if (!taken[i]) begin
|
||||
// function call
|
||||
ras_push = rvi_call[i] | rvc_call[i];
|
||||
ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
|
||||
|
||||
// Branch Prediction - **speculative**
|
||||
if (rvi_branch[i] || rvc_branch[i]) begin
|
||||
bp_sbe.cf_type = BHT;
|
||||
// dynamic prediction valid?
|
||||
if (bht_prediction.valid) begin
|
||||
take_rvi_cf = rvi_branch[i] & (bht_prediction.taken | bht_prediction.strongly_taken);
|
||||
take_rvc_cf = rvc_branch[i] & (bht_prediction.taken | bht_prediction.strongly_taken);
|
||||
// default to static prediction
|
||||
end else begin
|
||||
// set if immediate is negative - static prediction
|
||||
take_rvi_cf = rvi_branch[i] & rvi_imm[i][63];
|
||||
take_rvc_cf = rvc_branch[i] & rvc_imm[i][63];
|
||||
end
|
||||
end
|
||||
|
||||
// unconditional jumps
|
||||
if (rvi_jump[i] || rvc_jump[i]) begin
|
||||
take_rvi_cf = rvi_jump[i];
|
||||
take_rvc_cf = rvc_jump[i];
|
||||
end
|
||||
|
||||
// to take this jump we need a valid prediction target **speculative**
|
||||
if ((rvi_jalr[i] || rvc_jalr[i]) && ~(rvi_call[i] || rvc_call[i])) begin
|
||||
bp_sbe.cf_type = BTB;
|
||||
if (btb_prediction.valid) begin
|
||||
bp_vaddr = btb_prediction.target_address;
|
||||
taken[i+1] = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// is it a return and the RAS contains a valid prediction? **speculative**
|
||||
if ((rvi_return[i] || rvc_return[i]) && ras_predict.valid) begin
|
||||
bp_vaddr = ras_predict.ra;
|
||||
ras_pop = 1'b1;
|
||||
taken[i+1] = 1'b1;
|
||||
bp_sbe.cf_type = RAS;
|
||||
end
|
||||
|
||||
if (take_rvi_cf) begin
|
||||
taken[i+1] = 1'b1;
|
||||
bp_vaddr = addr[i] + rvi_imm[i];
|
||||
end
|
||||
|
||||
if (take_rvc_cf) begin
|
||||
taken[i+1] = 1'b1;
|
||||
bp_vaddr = addr[i] + rvc_imm[i];
|
||||
end
|
||||
|
||||
// we are not interested in the lower instruction
|
||||
if (icache_vaddr_q[1]) begin
|
||||
taken[1] = 1'b0;
|
||||
// TODO(zarubaf): that seems to be overly pessimistic
|
||||
ras_pop = 1'b0;
|
||||
ras_push = 1'b0;
|
||||
end
|
||||
end
|
||||
// lower most prediction gets precedence
|
||||
for (int i = INSTR_PER_FETCH - 1; i >= 0 ; i--) begin
|
||||
unique case ({is_branch[i], is_return[i], is_jump[i], is_jalr[i]})
|
||||
4'b0000:; // regular instruction e.g.: no branch
|
||||
// unconditional jump to register, we need the BTB to resolve this
|
||||
4'b0001: begin
|
||||
ras_pop = 1'b0;
|
||||
ras_push = 1'b0;
|
||||
if (btb_prediction_shifted[i].valid) begin
|
||||
predict_address = btb_prediction_shifted[i].target_address;
|
||||
cf_type[i] = ariane_pkg::JumpR;
|
||||
end
|
||||
end
|
||||
|
||||
bp_valid = |taken;
|
||||
// assemble scoreboard entry
|
||||
bp_sbe.valid = bp_valid;
|
||||
bp_sbe.predict_address = bp_vaddr;
|
||||
bp_sbe.predict_taken = bp_valid;
|
||||
end
|
||||
// its an unconditional jump to an immediate
|
||||
4'b0010: begin
|
||||
ras_pop = 1'b0;
|
||||
ras_push = 1'b0;
|
||||
taken_rvi_cf[i] = rvi_jump[i];
|
||||
taken_rvc_cf[i] = rvc_jump[i];
|
||||
cf_type[i] = ariane_pkg::Jump;
|
||||
end
|
||||
// return
|
||||
4'b0100: begin
|
||||
// make sure to only alter the RAS if we actually consumed the instruction
|
||||
ras_pop = ras_predict.valid & instr_queue_consumed[i];
|
||||
ras_push = 1'b0;
|
||||
predict_address = ras_predict.ra;
|
||||
cf_type[i] = ariane_pkg::Return;
|
||||
end
|
||||
// branch prediction
|
||||
4'b1000: begin
|
||||
ras_pop = 1'b0;
|
||||
ras_push = 1'b0;
|
||||
// if we have a valid dynamic prediction use it
|
||||
if (bht_prediction_shifted[i].valid) begin
|
||||
taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken;
|
||||
taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken;
|
||||
// otherwise default to static prediction
|
||||
end else begin
|
||||
// set if immediate is negative - static prediction
|
||||
taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][63];
|
||||
taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][63];
|
||||
end
|
||||
if (taken_rvi_cf[i] || taken_rvc_cf[i]) cf_type[i] = ariane_pkg::Branch;
|
||||
end
|
||||
default:;
|
||||
// default: $error("Decoded more than one control flow");
|
||||
endcase
|
||||
// if this instruction, in addition, is a call, save the resulting address
|
||||
// but only if we actually consumed the address
|
||||
if (is_call[i]) begin
|
||||
ras_push = instr_queue_consumed[i];
|
||||
ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
|
||||
end
|
||||
// calculate the jump target address
|
||||
if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin
|
||||
predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
|
||||
end
|
||||
end
|
||||
end
|
||||
// or reduce struct
|
||||
always_comb begin
|
||||
bp_valid = 1'b0;
|
||||
for (int i = 0; i < INSTR_PER_FETCH; i++) bp_valid |= (cf_type[i] != NoCF);
|
||||
end
|
||||
|
||||
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
|
||||
// we mis-predicted so kill the icache request and the fetch queue
|
||||
assign icache_dreq_o.kill_s1 = is_mispredict | flush_i;
|
||||
// if we have a valid branch-prediction we need to kill the last cache request
|
||||
assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
|
||||
assign fifo_valid = icache_valid_q;
|
||||
|
||||
// ----------------------------------------
|
||||
// Cache interface
|
||||
assign icache_dreq_o.req = instr_queue_ready;
|
||||
assign if_ready = icache_dreq_i.ready & instr_queue_ready;
|
||||
// We need to flush the cache pipeline if:
|
||||
// 1. We mispredicted
|
||||
// 2. Want to flush the whole processor front-end
|
||||
// 3. Need to replay an instruction because the fetch-fifo was full
|
||||
assign icache_dreq_o.kill_s1 = is_mispredict | flush_i | replay;
|
||||
// if we have a valid branch-prediction we need to only kill the last cache request
|
||||
// also if we killed the first stage we also need to kill the second stage (inclusive flush)
|
||||
assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
|
||||
|
||||
// Update Control Flow Predictions
|
||||
// ----------------------------------------
|
||||
// BHT
|
||||
assign bht_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BHT);
|
||||
bht_update_t bht_update;
|
||||
btb_update_t btb_update;
|
||||
|
||||
assign bht_update.valid = resolved_branch_i.valid
|
||||
& (resolved_branch_i.cf_type == ariane_pkg::Branch);
|
||||
assign bht_update.pc = resolved_branch_i.pc;
|
||||
assign bht_update.mispredict = resolved_branch_i.is_mispredict;
|
||||
assign bht_update.taken = resolved_branch_i.is_taken;
|
||||
// BTB
|
||||
assign btb_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BTB);
|
||||
// only update mispredicted branches e.g. no returns from the RAS
|
||||
assign btb_update.valid = resolved_branch_i.valid
|
||||
& resolved_branch_i.is_mispredict
|
||||
& (resolved_branch_i.cf_type == ariane_pkg::JumpR);
|
||||
assign btb_update.pc = resolved_branch_i.pc;
|
||||
assign btb_update.target_address = resolved_branch_i.target_address;
|
||||
|
||||
|
@ -284,7 +265,7 @@ module frontend #(
|
|||
// Next PC
|
||||
// -------------------
|
||||
// next PC (NPC) can come from (in order of precedence):
|
||||
// 0. Default assignment
|
||||
// 0. Default assignment/replay instruction
|
||||
// 1. Branch Predict taken
|
||||
// 2. Control flow change request (misprediction)
|
||||
// 3. Return from environment call
|
||||
|
@ -293,211 +274,160 @@ module frontend #(
|
|||
// Mis-predict handling is a little bit different
|
||||
// select PC a.k.a PC Gen
|
||||
always_comb begin : npc_select
|
||||
automatic logic [63:0] fetch_address;
|
||||
|
||||
// check whether we come out of reset
|
||||
// this is a workaround. some tools have issues
|
||||
// having boot_addr_i in the asynchronous
|
||||
// reset assignment to npc_q, even though
|
||||
// boot_addr_i will be assigned a constant
|
||||
// on the top-level.
|
||||
if (npc_rst_load_q) begin
|
||||
npc_d = boot_addr_i;
|
||||
fetch_address = boot_addr_i;
|
||||
end else begin
|
||||
fetch_address = npc_q;
|
||||
// keep stable by default
|
||||
npc_d = npc_q;
|
||||
end
|
||||
|
||||
// -------------------------------
|
||||
// 1. Branch Prediction
|
||||
// -------------------------------
|
||||
if (bp_valid) begin
|
||||
fetch_address = bp_vaddr;
|
||||
npc_d = bp_vaddr;
|
||||
end
|
||||
// -------------------------------
|
||||
// 0. Default assignment
|
||||
// -------------------------------
|
||||
if (if_ready) begin
|
||||
npc_d = {fetch_address[63:2], 2'b0} + 'h4;
|
||||
end
|
||||
// -------------------------------
|
||||
// 2. Control flow change request
|
||||
// -------------------------------
|
||||
if (is_mispredict) begin
|
||||
npc_d = resolved_branch_i.target_address;
|
||||
end
|
||||
// -------------------------------
|
||||
// 3. Return from environment call
|
||||
// -------------------------------
|
||||
if (eret_i) begin
|
||||
npc_d = epc_i;
|
||||
end
|
||||
// -------------------------------
|
||||
// 4. Exception/Interrupt
|
||||
// -------------------------------
|
||||
if (ex_valid_i) begin
|
||||
npc_d = trap_vector_base_i;
|
||||
end
|
||||
// -----------------------------------------------
|
||||
// 5. Pipeline Flush because of CSR side effects
|
||||
// -----------------------------------------------
|
||||
// On a pipeline flush start fetching from the next address
|
||||
// of the instruction in the commit stage
|
||||
if (set_pc_commit_i) begin
|
||||
// we came here from a flush request of a CSR instruction or AMO,
|
||||
// as CSR or AMO instructions do not exist in a compressed form
|
||||
// we can unconditionally do PC + 4 here
|
||||
// TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
|
||||
npc_d = pc_commit_i + 64'h4;
|
||||
end
|
||||
// -------------------------------
|
||||
// 6. Debug
|
||||
// -------------------------------
|
||||
// enter debug on a hard-coded base-address
|
||||
if (set_debug_pc_i) begin
|
||||
npc_d = DmBaseAddress + dm::HaltAddress;
|
||||
end
|
||||
|
||||
icache_dreq_o.vaddr = fetch_address;
|
||||
automatic logic [63:0] fetch_address;
|
||||
// check whether we come out of reset
|
||||
// this is a workaround. some tools have issues
|
||||
// having boot_addr_i in the asynchronous
|
||||
// reset assignment to npc_q, even though
|
||||
// boot_addr_i will be assigned a constant
|
||||
// on the top-level.
|
||||
if (npc_rst_load_q) begin
|
||||
npc_d = boot_addr_i;
|
||||
fetch_address = boot_addr_i;
|
||||
end else begin
|
||||
fetch_address = npc_q;
|
||||
// keep stable by default
|
||||
npc_d = npc_q;
|
||||
end
|
||||
// 0. Branch Prediction
|
||||
if (bp_valid) begin
|
||||
fetch_address = predict_address;
|
||||
npc_d = predict_address;
|
||||
end
|
||||
// 1. Default assignment
|
||||
if (if_ready) npc_d = {fetch_address[63:2], 2'b0} + 'h4;
|
||||
// 2. Replay instruction fetch
|
||||
if (replay) npc_d = replay_addr;
|
||||
// 3. Control flow change request
|
||||
if (is_mispredict) npc_d = resolved_branch_i.target_address;
|
||||
// 4. Return from environment call
|
||||
if (eret_i) npc_d = epc_i;
|
||||
// 5. Exception/Interrupt
|
||||
if (ex_valid_i) npc_d = trap_vector_base_i;
|
||||
// 6. Pipeline Flush because of CSR side effects
|
||||
// On a pipeline flush start fetching from the next address
|
||||
// of the instruction in the commit stage
|
||||
// we came here from a flush request of a CSR instruction or AMO,
|
||||
// as CSR or AMO instructions do not exist in a compressed form
|
||||
// we can unconditionally do PC + 4 here
|
||||
// TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
|
||||
if (set_pc_commit_i) npc_d = pc_commit_i + 64'h4;
|
||||
// 7. Debug
|
||||
// enter debug on a hard-coded base-address
|
||||
if (set_debug_pc_i) npc_d = ArianeCfg.DmBaseAddress + dm::HaltAddress;
|
||||
icache_dreq_o.vaddr = fetch_address;
|
||||
end
|
||||
|
||||
// -------------------
|
||||
// Credit-based fetch FIFO flow ctrl
|
||||
// -------------------
|
||||
assign fifo_credits_d = (flush_i) ? FETCH_FIFO_DEPTH :
|
||||
fifo_credits_q + fifo_pop + s2_eff_kill - issue_req;
|
||||
|
||||
// check whether there is a request in flight that is being killed now
|
||||
// if this is the case, we need to increment the credit by 1
|
||||
assign s2_eff_kill = s2_in_flight_q & icache_dreq_o.kill_s2;
|
||||
assign s2_in_flight_d = (flush_i) ? 1'b0 :
|
||||
(issue_req) ? 1'b1 :
|
||||
(icache_dreq_i.valid) ? 1'b0 :
|
||||
s2_in_flight_q;
|
||||
|
||||
// only enable counter if current request is not being killed
|
||||
assign issue_req = if_ready & (~icache_dreq_o.kill_s1);
|
||||
assign fifo_pop = fetch_ack_i & fetch_entry_valid_o;
|
||||
assign fifo_ready = (|fifo_credits_q);
|
||||
assign if_ready = icache_dreq_i.ready & fifo_ready;
|
||||
assign icache_dreq_o.req = fifo_ready;
|
||||
assign fetch_entry_valid_o = ~fifo_empty;
|
||||
|
||||
|
||||
//pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
fetch_fifo_credits0 : assert property (
|
||||
@(posedge clk_i) disable iff (~rst_ni) (fifo_credits_q <= FETCH_FIFO_DEPTH))
|
||||
else $fatal(1,"[frontend] fetch fifo credits must be <= FETCH_FIFO_DEPTH!");
|
||||
initial begin
|
||||
assert (FETCH_FIFO_DEPTH <= 8) else $fatal(1,"[frontend] fetch fifo deeper than 8 not supported");
|
||||
assert (FETCH_WIDTH == 32) else $fatal(1,"[frontend] fetch width != not supported");
|
||||
end
|
||||
`endif
|
||||
//pragma translate_on
|
||||
logic [FETCH_WIDTH-1:0] icache_data;
|
||||
// re-align the cache line
|
||||
assign icache_data = icache_dreq_i.data >> {shamt, 4'b0};
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
npc_q <= '0;
|
||||
npc_rst_load_q <= 1'b1;
|
||||
icache_data_q <= '0;
|
||||
icache_valid_q <= 1'b0;
|
||||
icache_vaddr_q <= 'b0;
|
||||
icache_ex_valid_q <= 1'b0;
|
||||
unaligned_q <= 1'b0;
|
||||
unaligned_address_q <= '0;
|
||||
unaligned_instr_q <= '0;
|
||||
fifo_credits_q <= FETCH_FIFO_DEPTH;
|
||||
s2_in_flight_q <= 1'b0;
|
||||
end else begin
|
||||
npc_rst_load_q <= 1'b0;
|
||||
npc_q <= npc_d;
|
||||
icache_data_q <= icache_dreq_i.data;
|
||||
icache_valid_q <= icache_dreq_i.valid;
|
||||
icache_vaddr_q <= icache_dreq_i.vaddr;
|
||||
icache_ex_valid_q <= icache_dreq_i.ex.valid;
|
||||
unaligned_q <= unaligned_d;
|
||||
unaligned_address_q <= unaligned_address_d;
|
||||
unaligned_instr_q <= unaligned_instr_d;
|
||||
fifo_credits_q <= fifo_credits_d;
|
||||
s2_in_flight_q <= s2_in_flight_d;
|
||||
if (!rst_ni) begin
|
||||
npc_rst_load_q <= 1'b1;
|
||||
npc_q <= '0;
|
||||
icache_data_q <= '0;
|
||||
icache_valid_q <= 1'b0;
|
||||
icache_vaddr_q <= 'b0;
|
||||
icache_ex_valid_q <= 1'b0;
|
||||
btb_q <= '0;
|
||||
bht_q <= '0;
|
||||
end else begin
|
||||
npc_rst_load_q <= 1'b0;
|
||||
npc_q <= npc_d;
|
||||
icache_valid_q <= icache_dreq_i.valid;
|
||||
if (icache_dreq_i.valid) begin
|
||||
icache_data_q <= icache_data;
|
||||
icache_vaddr_q <= icache_dreq_i.vaddr;
|
||||
icache_ex_valid_q <= icache_dreq_i.ex;
|
||||
// save the uppermost prediction
|
||||
btb_q <= btb_prediction[INSTR_PER_FETCH-1];
|
||||
bht_q <= bht_prediction[INSTR_PER_FETCH-1];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
ras #(
|
||||
.DEPTH ( RAS_DEPTH )
|
||||
.DEPTH ( ArianeCfg.RASDepth )
|
||||
) i_ras (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i( flush_bp_i ),
|
||||
.push_i ( ras_push ),
|
||||
.pop_i ( ras_pop ),
|
||||
.data_i ( ras_update ),
|
||||
.data_o ( ras_predict )
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i( flush_bp_i ),
|
||||
.push_i ( ras_push ),
|
||||
.pop_i ( ras_pop ),
|
||||
.data_i ( ras_update ),
|
||||
.data_o ( ras_predict )
|
||||
);
|
||||
|
||||
btb #(
|
||||
.NR_ENTRIES ( BTB_ENTRIES )
|
||||
.NR_ENTRIES ( ArianeCfg.BTBEntries )
|
||||
) i_btb (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i ( flush_bp_i ),
|
||||
.debug_mode_i,
|
||||
.vpc_i ( icache_vaddr_q ),
|
||||
.btb_update_i ( btb_update ),
|
||||
.btb_prediction_o ( btb_prediction )
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i ( flush_bp_i ),
|
||||
.debug_mode_i,
|
||||
.vpc_i ( icache_vaddr_q ),
|
||||
.btb_update_i ( btb_update ),
|
||||
.btb_prediction_o ( btb_prediction )
|
||||
);
|
||||
|
||||
bht #(
|
||||
.NR_ENTRIES ( BHT_ENTRIES )
|
||||
.NR_ENTRIES ( ArianeCfg.BHTEntries )
|
||||
) i_bht (
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i ( flush_bp_i ),
|
||||
.debug_mode_i,
|
||||
.vpc_i ( icache_vaddr_q ),
|
||||
.bht_update_i ( bht_update ),
|
||||
.bht_prediction_o ( bht_prediction )
|
||||
.clk_i,
|
||||
.rst_ni,
|
||||
.flush_i ( flush_bp_i ),
|
||||
.debug_mode_i,
|
||||
.vpc_i ( icache_vaddr_q ),
|
||||
.bht_update_i ( bht_update ),
|
||||
.bht_prediction_o ( bht_prediction )
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
|
||||
instr_scan i_instr_scan (
|
||||
.instr_i ( instr[i] ),
|
||||
.is_rvc_o ( is_rvc[i] ),
|
||||
.rvi_return_o ( rvi_return[i] ),
|
||||
.rvi_call_o ( rvi_call[i] ),
|
||||
.rvi_branch_o ( rvi_branch[i] ),
|
||||
.rvi_jalr_o ( rvi_jalr[i] ),
|
||||
.rvi_jump_o ( rvi_jump[i] ),
|
||||
.rvi_imm_o ( rvi_imm[i] ),
|
||||
.rvc_branch_o ( rvc_branch[i] ),
|
||||
.rvc_jump_o ( rvc_jump[i] ),
|
||||
.rvc_jr_o ( rvc_jr[i] ),
|
||||
.rvc_return_o ( rvc_return[i] ),
|
||||
.rvc_jalr_o ( rvc_jalr[i] ),
|
||||
.rvc_call_o ( rvc_call[i] ),
|
||||
.rvc_imm_o ( rvc_imm[i] )
|
||||
);
|
||||
// we need to inspect up to INSTR_PER_FETCH instructions for branches
|
||||
// and jumps
|
||||
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan
|
||||
instr_scan i_instr_scan (
|
||||
.instr_i ( instr[i] ),
|
||||
.rvi_return_o ( rvi_return[i] ),
|
||||
.rvi_call_o ( rvi_call[i] ),
|
||||
.rvi_branch_o ( rvi_branch[i] ),
|
||||
.rvi_jalr_o ( rvi_jalr[i] ),
|
||||
.rvi_jump_o ( rvi_jump[i] ),
|
||||
.rvi_imm_o ( rvi_imm[i] ),
|
||||
.rvc_branch_o ( rvc_branch[i] ),
|
||||
.rvc_jump_o ( rvc_jump[i] ),
|
||||
.rvc_jr_o ( rvc_jr[i] ),
|
||||
.rvc_return_o ( rvc_return[i] ),
|
||||
.rvc_jalr_o ( rvc_jalr[i] ),
|
||||
.rvc_call_o ( rvc_call[i] ),
|
||||
.rvc_imm_o ( rvc_imm[i] )
|
||||
);
|
||||
end
|
||||
|
||||
fifo_v3 #(
|
||||
.DEPTH ( 8 ),
|
||||
.dtype ( frontend_fetch_t )
|
||||
) i_fetch_fifo (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.flush_i ( flush_i ),
|
||||
.testmode_i ( 1'b0 ),
|
||||
.full_o ( ),
|
||||
.empty_o ( fifo_empty ),
|
||||
.usage_o ( ),
|
||||
.data_i ( {icache_vaddr_q, icache_data_q, bp_sbe, taken[INSTR_PER_FETCH:1], icache_ex_valid_q} ),
|
||||
.push_i ( fifo_valid ),
|
||||
.data_o ( fetch_entry_o ),
|
||||
.pop_i ( fifo_pop )
|
||||
instr_queue i_instr_queue (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.flush_i ( flush_i ),
|
||||
.instr_i ( instr ), // from re-aligner
|
||||
.addr_i ( addr ), // from re-aligner
|
||||
.exception_i ( icache_ex_valid_q ), // from I$
|
||||
.predict_address_i ( predict_address ),
|
||||
.cf_type_i ( cf_type ),
|
||||
.valid_i ( instruction_valid ), // from re-aligner
|
||||
.consumed_o ( instr_queue_consumed ),
|
||||
.ready_o ( instr_queue_ready ),
|
||||
.replay_o ( replay ),
|
||||
.replay_addr_o ( replay_addr ),
|
||||
.fetch_entry_o ( fetch_entry_o ), // to back-end
|
||||
.fetch_entry_valid_o ( fetch_entry_valid_o ), // to back-end
|
||||
.fetch_entry_ready_i ( fetch_entry_ready_i ) // to back-end
|
||||
);
|
||||
|
||||
// pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
initial begin
|
||||
assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64) else $fatal("[frontend] fetch width != not supported");
|
||||
end
|
||||
`endif
|
||||
// pragma translate_on
|
||||
endmodule
|
||||
|
|
353
src/frontend/instr_queue.sv
Normal file
353
src/frontend/instr_queue.sv
Normal file
|
@ -0,0 +1,353 @@
|
|||
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 26.10.2018sim:/ariane_tb/dut/i_ariane/i_frontend/icache_ex_valid_q
|
||||
|
||||
// Description: Instruction Queue, separates instruction front-end from processor
|
||||
// back-end.
|
||||
//
|
||||
// This is an optimized instruction queue which supports the handling of
|
||||
// compressed instructions (16 bit instructions). Internally it is organized as
|
||||
// FETCH_ENTRY x 32 bit queues which are filled in a consecutive manner. Two pointers
|
||||
// point into (`idx_is_q` and `idx_ds_q`) the fill port and the read port. The read port
|
||||
// is designed so that it will easily allow for multiple issue implementation.
|
||||
// The input supports arbitrary power of two instruction fetch widths.
|
||||
//
|
||||
// The queue supports handling of branch prediction and will take care of
|
||||
// only saving a valid instruction stream.
|
||||
//
|
||||
// Furthermore it contains a replay interface in case the instruction queue
|
||||
// is already full. As instructions are in general easily replayed this should
|
||||
// increase the efficiency as I$ misses are potentially hidden. This stands in
|
||||
// contrast to pessimistic actions (early stalling) or credit based approaches.
|
||||
// Credit based systems might be difficult to implement with the current system
|
||||
// as we do not exactly know how much space we are going to need in the fifos
|
||||
// as each instruction can take either one or two slots.
|
||||
//
|
||||
// So the consumed/valid interface degenerates to a `information` interface. If the
|
||||
// upstream circuits keeps pushing the queue will discard the information
|
||||
// and start replaying from the point were it could last manage to accept instructions.
|
||||
//
|
||||
// The instruction front-end will stop issuing instructions as soon as the
|
||||
// fifo is full. This will gate the logic if the processor is e.g.: halted
|
||||
//
|
||||
// TODO(zarubaf): The instruction queues can be reduced to 16 bit. Potentially
|
||||
// the replay mechanism gets more complicated as it can be that a 32 bit instruction
|
||||
// can not be pushed at once.
|
||||
|
||||
module instr_queue (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic flush_i,
|
||||
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i,
|
||||
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][63:0] addr_i,
|
||||
input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i,
|
||||
output logic ready_o,
|
||||
output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o,
|
||||
// we've encountered an exception, at this point the only possible exceptions are page-table faults
|
||||
input logic exception_i,
|
||||
// branch predict
|
||||
input logic [63:0] predict_address_i,
|
||||
input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i,
|
||||
// replay instruction because one of the FIFO was already full
|
||||
output logic replay_o,
|
||||
output logic [63:0] replay_addr_o, // address at which to replay this instruction
|
||||
// to processor backend
|
||||
output ariane_pkg::fetch_entry_t fetch_entry_o,
|
||||
output logic fetch_entry_valid_o,
|
||||
input logic fetch_entry_ready_i
|
||||
);
|
||||
|
||||
typedef struct packed {
|
||||
logic [31:0] instr; // instruction word
|
||||
ariane_pkg::cf_t cf; // branch was taken
|
||||
logic ex; // exception happened
|
||||
} instr_data_t;
|
||||
|
||||
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] branch_index;
|
||||
// instruction queues
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0]
|
||||
[$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] instr_queue_usage;
|
||||
instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty;
|
||||
logic instr_overflow;
|
||||
// address queue
|
||||
logic [$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] address_queue_usage;
|
||||
logic [63:0] address_out;
|
||||
logic pop_address;
|
||||
logic push_address;
|
||||
logic full_address;
|
||||
logic empty_address;
|
||||
logic address_overflow;
|
||||
// input stream counter
|
||||
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] idx_is_d, idx_is_q;
|
||||
// Registers
|
||||
// output FIFO select, one-hot
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q;
|
||||
logic [63:0] pc_d, pc_q; // current PC
|
||||
logic reset_address_d, reset_address_q; // we need to re-set the address because of a flush
|
||||
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*2-2:0] branch_mask_extended;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] branch_mask;
|
||||
logic branch_empty;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken;
|
||||
// shift amount, e.g.: instructions we want to retire
|
||||
logic [$clog2(ariane_pkg::INSTR_PER_FETCH):0] popcount;
|
||||
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] consumed_extended;
|
||||
// FIFO mask
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] fifo_pos_extended;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_pos;
|
||||
logic [ariane_pkg::INSTR_PER_FETCH*2-1:0][31:0] instr;
|
||||
ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH*2-1:0] cf;
|
||||
// replay interface
|
||||
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_overflow_fifo;
|
||||
|
||||
assign ready_o = ~(|instr_queue_full) & ~full_address;
|
||||
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_unpack_taken
|
||||
assign taken[i] = cf_type_i[i] != ariane_pkg::NoCF;
|
||||
end
|
||||
// calculate a branch mask, e.g.: get the first taken branch
|
||||
lzc #(
|
||||
.WIDTH ( ariane_pkg::INSTR_PER_FETCH ),
|
||||
.MODE ( 0 ) // count trailing zeros
|
||||
) i_lzc_branch_index (
|
||||
.in_i ( taken ), // we want to count trailing zeros
|
||||
.cnt_o ( branch_index ), // first branch on branch_index
|
||||
.empty_o ( branch_empty )
|
||||
);
|
||||
// the first index is for sure valid
|
||||
// for example (64 bit fetch):
|
||||
// taken mask: 0 1 1 0
|
||||
// leading zero count = 1
|
||||
// 0 0 0 1, 1 1 1 << 1 = 0 0 1 1, 1 1 0
|
||||
// take the upper 4 bits: 0 0 1 1
|
||||
assign branch_mask_extended = {{{ariane_pkg::INSTR_PER_FETCH-1}{1'b0}}, {{ariane_pkg::INSTR_PER_FETCH}{1'b1}}} << branch_index;
|
||||
assign branch_mask = branch_mask_extended[ariane_pkg::INSTR_PER_FETCH * 2 - 2:ariane_pkg::INSTR_PER_FETCH - 1];
|
||||
|
||||
// mask with taken branches to get the actual amount of instructions we want to push
|
||||
assign valid = valid_i & branch_mask;
|
||||
// rotate right again
|
||||
assign consumed_extended = {push_instr_fifo, push_instr_fifo} >> idx_is_q;
|
||||
assign consumed_o = consumed_extended[ariane_pkg::INSTR_PER_FETCH-1:0];
|
||||
// count the numbers of valid instructions we've pushed from this package
|
||||
popcount #(
|
||||
.INPUT_WIDTH ( ariane_pkg::INSTR_PER_FETCH )
|
||||
) i_popcount (
|
||||
.data_i ( push_instr_fifo ),
|
||||
.popcount_o ( popcount )
|
||||
);
|
||||
assign shamt = popcount[$bits(shamt)-1:0];
|
||||
|
||||
// save the shift amount for next cycle
|
||||
assign idx_is_d = idx_is_q + shamt;
|
||||
|
||||
// ----------------------
|
||||
// Input interface
|
||||
// ----------------------
|
||||
// rotate left by the current position
|
||||
assign fifo_pos_extended = { valid, valid } << idx_is_q;
|
||||
// we just care about the upper bits
|
||||
assign fifo_pos = fifo_pos_extended[ariane_pkg::INSTR_PER_FETCH*2-1:ariane_pkg::INSTR_PER_FETCH];
|
||||
// the fifo_position signal can directly be used to guide the push signal of each FIFO
|
||||
// make sure it is not full
|
||||
assign push_instr = fifo_pos & ~instr_queue_full;
|
||||
|
||||
// duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input
|
||||
assign instr[i] = instr_i[i];
|
||||
assign instr[i + ariane_pkg::INSTR_PER_FETCH] = instr_i[i];
|
||||
assign cf[i] = cf_type_i[i];
|
||||
assign cf[i + ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i];
|
||||
end
|
||||
|
||||
// shift the inputs
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_fifo_input_select
|
||||
/* verilator lint_off WIDTH */
|
||||
assign instr_data_in[i].instr = instr[i + idx_is_q];
|
||||
assign instr_data_in[i].cf = cf[i + idx_is_q];
|
||||
assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet
|
||||
/* verilator lint_on WIDTH */
|
||||
end
|
||||
|
||||
// ----------------------
|
||||
// Replay Logic
|
||||
// ----------------------
|
||||
// We need to replay a instruction fetch iff:
|
||||
// 1. One of the instruction data FIFOs was full and we needed it
|
||||
// (e.g.: we pushed and it was full)
|
||||
// 2. The address/branch predict FIFO was full
|
||||
// if one of the FIFOs was full we need to replay the faulting instruction
|
||||
assign instr_overflow_fifo = instr_queue_full & fifo_pos;
|
||||
assign instr_overflow = |instr_overflow_fifo; // at least one instruction overflowed
|
||||
assign address_overflow = full_address & push_address;
|
||||
assign replay_o = instr_overflow | address_overflow;
|
||||
|
||||
// select the address, in the case of an address fifo overflow just
|
||||
// use the base of this package
|
||||
// if we successfully pushed some instructions we can output the next instruction
|
||||
// which we didn't manage to push
|
||||
assign replay_addr_o = (address_overflow) ? addr_i[0] : addr_i[shamt];
|
||||
|
||||
// ----------------------
|
||||
// Downstream interface
|
||||
// ----------------------
|
||||
// as long as there is at least one queue which can take the value we have a valid instruction
|
||||
assign fetch_entry_valid_o = ~(&instr_queue_empty);
|
||||
|
||||
always_comb begin
|
||||
idx_ds_d = idx_ds_q;
|
||||
|
||||
pop_instr = '0;
|
||||
// assemble fetch entry
|
||||
fetch_entry_o.instruction = '0;
|
||||
fetch_entry_o.address = pc_q;
|
||||
fetch_entry_o.ex.valid = 1'b0;
|
||||
// This is the only exception which can occur up to this point.
|
||||
fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT;
|
||||
fetch_entry_o.ex.tval = '0;
|
||||
fetch_entry_o.branch_predict.predict_address = address_out;
|
||||
// output mux select
|
||||
for (int unsigned i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
|
||||
if (idx_ds_q[i]) begin
|
||||
fetch_entry_o.instruction = instr_data_out[i].instr;
|
||||
fetch_entry_o.ex.valid = instr_data_out[i].ex;
|
||||
fetch_entry_o.ex.tval = pc_q;
|
||||
fetch_entry_o.branch_predict.cf = instr_data_out[i].cf;
|
||||
pop_instr[i] = fetch_entry_valid_o & fetch_entry_ready_i;
|
||||
end
|
||||
end
|
||||
// rotate the pointer left
|
||||
if (fetch_entry_ready_i) begin
|
||||
idx_ds_d = {idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1]};
|
||||
end
|
||||
end
|
||||
|
||||
// TODO(zarubaf): This needs to change for dual-issue
|
||||
// if the handshaking is successful and we had a prediction pop one address entry
|
||||
assign pop_address = ((fetch_entry_o.branch_predict.cf != ariane_pkg::NoCF) & |pop_instr);
|
||||
|
||||
// ----------------------
|
||||
// Calculate (Next) PC
|
||||
// ----------------------
|
||||
always_comb begin
|
||||
pc_d = pc_q;
|
||||
reset_address_d = flush_i ? 1'b1 : reset_address_q;
|
||||
|
||||
if (fetch_entry_ready_i) begin
|
||||
// TODO(zarubaf): This needs to change for a dual issue implementation
|
||||
// advance the PC
|
||||
pc_d = pc_q + ((fetch_entry_o.instruction[1:0] != 2'b11) ? 'd2 : 'd4);
|
||||
end
|
||||
|
||||
if (pop_address) pc_d = address_out;
|
||||
|
||||
// we previously flushed so we need to reset the address
|
||||
if (valid_i[0] && reset_address_q) begin
|
||||
// this is the base of the first instruction
|
||||
pc_d = addr_i[0];
|
||||
reset_address_d = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
// FIFOs
|
||||
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_instr_fifo
|
||||
// Make sure we don't save any instructions if we couldn't save the address
|
||||
assign push_instr_fifo[i] = push_instr[i] & ~address_overflow;
|
||||
fifo_v3 #(
|
||||
.DEPTH ( ariane_pkg::FETCH_FIFO_DEPTH ),
|
||||
.dtype ( instr_data_t )
|
||||
) i_fifo_instr_data (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.flush_i ( flush_i ),
|
||||
.testmode_i ( 1'b0 ),
|
||||
.full_o ( instr_queue_full[i] ),
|
||||
.empty_o ( instr_queue_empty[i] ),
|
||||
.usage_o ( instr_queue_usage[i] ),
|
||||
.data_i ( instr_data_in[i] ),
|
||||
.push_i ( push_instr_fifo[i] ),
|
||||
.data_o ( instr_data_out[i] ),
|
||||
.pop_i ( pop_instr[i] )
|
||||
);
|
||||
end
|
||||
// or reduce and check whether we are retiring a taken branch (might be that the corresponding)
|
||||
// fifo is full.
|
||||
always_comb begin
|
||||
push_address = 1'b0;
|
||||
// check if we are pushing a ctrl flow change, if so save the address
|
||||
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
|
||||
push_address |= push_instr[i] & (instr_data_in[i].cf != ariane_pkg::NoCF);
|
||||
end
|
||||
end
|
||||
|
||||
fifo_v3 #(
|
||||
.DEPTH ( ariane_pkg::FETCH_FIFO_DEPTH ), // TODO(zarubaf): Fork out to separate param
|
||||
.DATA_WIDTH ( 64 )
|
||||
) i_fifo_address (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.flush_i ( flush_i ),
|
||||
.testmode_i ( 1'b0 ),
|
||||
.full_o ( full_address ),
|
||||
.empty_o ( empty_address ),
|
||||
.usage_o ( address_queue_usage ),
|
||||
.data_i ( predict_address_i ),
|
||||
.push_i ( push_address & ~full_address ),
|
||||
.data_o ( address_out ),
|
||||
.pop_i ( pop_address )
|
||||
);
|
||||
|
||||
unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage}));
|
||||
unread i_unread_branch_mask (.d_i(|branch_mask_extended));
|
||||
unread i_unread_lzc (.d_i(|{branch_empty}));
|
||||
unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals
|
||||
unread i_unread_instr_fifo (.d_i(|instr_queue_usage));
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (!rst_ni) begin
|
||||
idx_ds_q <= 'b1;
|
||||
idx_is_q <= '0;
|
||||
pc_q <= '0;
|
||||
reset_address_q <= 1'b1;
|
||||
end else begin
|
||||
pc_q <= pc_d;
|
||||
reset_address_q <= reset_address_d;
|
||||
if (flush_i) begin
|
||||
// one-hot encoded
|
||||
idx_ds_q <= 'b1;
|
||||
// binary encoded
|
||||
idx_is_q <= '0;
|
||||
reset_address_q <= 1'b1;
|
||||
end else begin
|
||||
idx_ds_q <= idx_ds_d;
|
||||
idx_is_q <= idx_is_d;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// pragma translate_off
|
||||
`ifndef VERILATOR
|
||||
replay_address_fifo: assert property (
|
||||
@(posedge clk_i) disable iff (!rst_ni) replay_o |-> !i_fifo_address.push_i
|
||||
) else $fatal(1,"[instr_queue] Pushing address although replay asserted");
|
||||
|
||||
output_select_onehot: assert property (
|
||||
@(posedge clk_i) $onehot0(idx_ds_q)
|
||||
) else begin $error("Output select should be one-hot encoded"); $stop(); end
|
||||
`endif
|
||||
// pragma translate_on
|
||||
endmodule
|
|
@ -1,4 +1,4 @@
|
|||
//Copyright (C) 2018 to present,
|
||||
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 2.0 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
|
@ -17,7 +17,6 @@
|
|||
// ------------------------------
|
||||
module instr_scan (
|
||||
input logic [31:0] instr_i, // expect aligned instruction, compressed or not
|
||||
output logic is_rvc_o,
|
||||
output logic rvi_return_o,
|
||||
output logic rvi_call_o,
|
||||
output logic rvi_branch_o,
|
||||
|
@ -32,35 +31,39 @@ module instr_scan (
|
|||
output logic rvc_call_o,
|
||||
output logic [63:0] rvc_imm_o
|
||||
);
|
||||
assign is_rvc_o = (instr_i[1:0] != 2'b11);
|
||||
// check that rs1 is either x1 or x5 and that rs1 is not x1 or x5, TODO: check the fact about bit 7
|
||||
assign rvi_return_o = rvi_jalr_o & ~instr_i[7] & ~instr_i[19] & ~instr_i[18] & ~instr_i[16] & instr_i[15];
|
||||
assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & instr_i[7]; // TODO: check that this captures calls
|
||||
logic is_rvc;
|
||||
assign is_rvc = (instr_i[1:0] != 2'b11);
|
||||
// check that rs1 is either x1 or x5 and that rs1 is not x1 or x5
|
||||
assign rvi_return_o = rvi_jalr_o & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5)
|
||||
& (instr_i[19:15] != instr_i[11:7]);
|
||||
// Opocde is JAL[R] and destination register is either x1 or x5
|
||||
assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5);
|
||||
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
|
||||
assign rvi_imm_o = (instr_i[3]) ? ariane_pkg::uj_imm(instr_i) : ariane_pkg::sb_imm(instr_i);
|
||||
assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch) ? 1'b1 : 1'b0;
|
||||
assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr) ? 1'b1 : 1'b0;
|
||||
assign rvi_jump_o = (instr_i[6:0] == riscv::OpcodeJal) ? 1'b1 : 1'b0;
|
||||
assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch);
|
||||
assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr);
|
||||
assign rvi_jump_o = (instr_i[6:0] == riscv::OpcodeJal);
|
||||
|
||||
// opcode JAL
|
||||
assign rvc_jump_o = (instr_i[15:13] == riscv::OpcodeC1J) & is_rvc_o & (instr_i[1:0] == riscv::OpcodeC1);
|
||||
assign rvc_jump_o = (instr_i[15:13] == riscv::OpcodeC1J) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1);
|
||||
// always links to register 0
|
||||
assign rvc_jr_o = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd)
|
||||
& ~instr_i[12]
|
||||
logic is_jal_r;
|
||||
assign is_jal_r = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd) &
|
||||
& (instr_i[6:2] == 5'b00000)
|
||||
& (instr_i[1:0] == riscv::OpcodeC2)
|
||||
& is_rvc_o;
|
||||
assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez))
|
||||
& (instr_i[1:0] == riscv::OpcodeC1)
|
||||
& is_rvc_o;
|
||||
// check that rs1 is x1 or x5
|
||||
assign rvc_return_o = ~instr_i[11] & ~instr_i[10] & ~instr_i[8] & instr_i[7] & rvc_jr_o ;
|
||||
& is_rvc;
|
||||
assign rvc_jr_o = is_jal_r & ~instr_i[12];
|
||||
// always links to register 1 e.g.: it is a jump
|
||||
assign rvc_jalr_o = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd)
|
||||
& instr_i[12]
|
||||
& (instr_i[6:2] == 5'b00000) & is_rvc_o;
|
||||
assign rvc_jalr_o = is_jal_r & instr_i[12];
|
||||
assign rvc_call_o = rvc_jalr_o;
|
||||
|
||||
// // differentiates between JAL and BRANCH opcode, JALR comes from BHT
|
||||
assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez))
|
||||
& (instr_i[1:0] == riscv::OpcodeC1)
|
||||
& is_rvc;
|
||||
// check that rs1 is x1 or x5
|
||||
assign rvc_return_o = ((instr_i[11:7] == 5'd1) | (instr_i[11:7] == 5'd5)) & rvc_jr_o ;
|
||||
|
||||
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
|
||||
assign rvc_imm_o = (instr_i[14]) ? {{56{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0}
|
||||
: {{53{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0};
|
||||
endmodule
|
||||
|
|
115
src/id_stage.sv
115
src/id_stage.sv
|
@ -10,95 +10,81 @@
|
|||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 15.04.2017
|
||||
// Description: Description: Instruction decode, contains the logic for decode,
|
||||
// Description: Instruction decode, contains the logic for decode,
|
||||
// issue and read operands.
|
||||
|
||||
import ariane_pkg::*;
|
||||
|
||||
module id_stage (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
|
||||
input logic flush_i,
|
||||
input logic debug_req_i,
|
||||
input logic flush_i,
|
||||
input logic debug_req_i,
|
||||
// from IF
|
||||
input frontend_fetch_t fetch_entry_i,
|
||||
input logic fetch_entry_valid_i,
|
||||
output logic decoded_instr_ack_o, // acknowledge the instruction (fetch entry)
|
||||
|
||||
input ariane_pkg::fetch_entry_t fetch_entry_i,
|
||||
input logic fetch_entry_valid_i,
|
||||
output logic fetch_entry_ready_o, // acknowledge the instruction (fetch entry)
|
||||
// to ID
|
||||
output scoreboard_entry_t issue_entry_o, // a decoded instruction
|
||||
output logic issue_entry_valid_o, // issue entry is valid
|
||||
output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions
|
||||
input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions
|
||||
output ariane_pkg::scoreboard_entry_t issue_entry_o, // a decoded instruction
|
||||
output logic issue_entry_valid_o, // issue entry is valid
|
||||
output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions
|
||||
input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions
|
||||
// from CSR file
|
||||
input riscv::priv_lvl_t priv_lvl_i, // current privilege level
|
||||
input riscv::xs_t fs_i, // floating point extension status
|
||||
input logic [2:0] frm_i, // floating-point dynamic rounding mode
|
||||
input logic [1:0] irq_i,
|
||||
input irq_ctrl_t irq_ctrl_i,
|
||||
input logic debug_mode_i, // we are in debug mode
|
||||
input logic tvm_i,
|
||||
input logic tw_i,
|
||||
input logic tsr_i
|
||||
input riscv::priv_lvl_t priv_lvl_i, // current privilege level
|
||||
input riscv::xs_t fs_i, // floating point extension status
|
||||
input logic [2:0] frm_i, // floating-point dynamic rounding mode
|
||||
input logic [1:0] irq_i,
|
||||
input ariane_pkg::irq_ctrl_t irq_ctrl_i,
|
||||
input logic debug_mode_i, // we are in debug mode
|
||||
input logic tvm_i,
|
||||
input logic tw_i,
|
||||
input logic tsr_i
|
||||
);
|
||||
// register stage
|
||||
// ID/ISSUE register stage
|
||||
struct packed {
|
||||
logic valid;
|
||||
scoreboard_entry_t sbe;
|
||||
logic is_ctrl_flow;
|
||||
logic valid;
|
||||
ariane_pkg::scoreboard_entry_t sbe;
|
||||
logic is_ctrl_flow;
|
||||
} issue_n, issue_q;
|
||||
|
||||
logic is_control_flow_instr;
|
||||
scoreboard_entry_t decoded_instruction;
|
||||
logic is_control_flow_instr;
|
||||
ariane_pkg::scoreboard_entry_t decoded_instruction;
|
||||
|
||||
fetch_entry_t fetch_entry;
|
||||
logic is_illegal;
|
||||
logic [31:0] instruction;
|
||||
logic is_compressed;
|
||||
logic fetch_ack_i;
|
||||
logic fetch_entry_valid;
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 1. Re-align instructions
|
||||
// ---------------------------------------------------------
|
||||
instr_realigner instr_realigner_i (
|
||||
.fetch_entry_i ( fetch_entry_i ),
|
||||
.fetch_entry_valid_i ( fetch_entry_valid_i ),
|
||||
.fetch_ack_o ( decoded_instr_ack_o ),
|
||||
|
||||
.fetch_entry_o ( fetch_entry ),
|
||||
.fetch_entry_valid_o ( fetch_entry_valid ),
|
||||
.fetch_ack_i ( fetch_ack_i ),
|
||||
.*
|
||||
);
|
||||
// ---------------------------------------------------------
|
||||
// 2. Check if they are compressed and expand in case they are
|
||||
// 1. Check if they are compressed and expand in case they are
|
||||
// ---------------------------------------------------------
|
||||
compressed_decoder compressed_decoder_i (
|
||||
.instr_i ( fetch_entry.instruction ),
|
||||
.instr_i ( fetch_entry_i.instruction ),
|
||||
.instr_o ( instruction ),
|
||||
.illegal_instr_o ( is_illegal ),
|
||||
.is_compressed_o ( is_compressed )
|
||||
|
||||
);
|
||||
// ---------------------------------------------------------
|
||||
// 3. Decode and emit instruction to issue stage
|
||||
// 2. Decode and emit instruction to issue stage
|
||||
// ---------------------------------------------------------
|
||||
decoder decoder_i (
|
||||
.debug_req_i,
|
||||
.pc_i ( fetch_entry.address ),
|
||||
.is_compressed_i ( is_compressed ),
|
||||
.compressed_instr_i ( fetch_entry.instruction[15:0] ),
|
||||
.instruction_i ( instruction ),
|
||||
.branch_predict_i ( fetch_entry.branch_predict ),
|
||||
.is_illegal_i ( is_illegal ),
|
||||
.ex_i ( fetch_entry.ex ),
|
||||
.instruction_o ( decoded_instruction ),
|
||||
.is_control_flow_instr_o ( is_control_flow_instr ),
|
||||
.irq_ctrl_i,
|
||||
.irq_i,
|
||||
.pc_i ( fetch_entry_i.address ),
|
||||
.is_compressed_i ( is_compressed ),
|
||||
.is_illegal_i ( is_illegal ),
|
||||
.instruction_i ( instruction ),
|
||||
.compressed_instr_i ( fetch_entry_i.instruction[15:0] ),
|
||||
.branch_predict_i ( fetch_entry_i.branch_predict ),
|
||||
.ex_i ( fetch_entry_i.ex ),
|
||||
.priv_lvl_i ( priv_lvl_i ),
|
||||
.debug_mode_i ( debug_mode_i ),
|
||||
.fs_i,
|
||||
.frm_i,
|
||||
.*
|
||||
.tvm_i,
|
||||
.tw_i,
|
||||
.tsr_i,
|
||||
.instruction_o ( decoded_instruction ),
|
||||
.is_control_flow_instr_o ( is_control_flow_instr )
|
||||
);
|
||||
|
||||
// ------------------
|
||||
|
@ -110,7 +96,7 @@ module id_stage (
|
|||
|
||||
always_comb begin
|
||||
issue_n = issue_q;
|
||||
fetch_ack_i = 1'b0;
|
||||
fetch_entry_ready_o = 1'b0;
|
||||
|
||||
// Clear the valid flag if issue has acknowledged the instruction
|
||||
if (issue_instr_ack_i)
|
||||
|
@ -119,9 +105,9 @@ module id_stage (
|
|||
// if we have a space in the register and the fetch is valid, go get it
|
||||
// or the issue stage is currently acknowledging an instruction, which means that we will have space
|
||||
// for a new instruction
|
||||
if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid) begin
|
||||
fetch_ack_i = 1'b1;
|
||||
issue_n = {1'b1, decoded_instruction, is_control_flow_instr};
|
||||
if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin
|
||||
fetch_entry_ready_o = 1'b1;
|
||||
issue_n = '{1'b1, decoded_instruction, is_control_flow_instr};
|
||||
end
|
||||
|
||||
// invalidate the pipeline register on a flush
|
||||
|
@ -138,5 +124,4 @@ module id_stage (
|
|||
issue_q <= issue_n;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
358
src/instr_realign.sv
Normal file
358
src/instr_realign.sv
Normal file
|
@ -0,0 +1,358 @@
|
|||
// Copyright 2018 - 2019 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
|
||||
// Description: Instruction Re-aligner
|
||||
//
|
||||
// This module takes 32-bit aligned cache blocks and extracts the instructions.
|
||||
// As we are supporting the compressed instruction set extension in a 32 bit instruction word
|
||||
// are up to 2 compressed instructions.
|
||||
// Furthermore those instructions can be arbitrarily interleaved which makes it possible to fetch
|
||||
// only the lower part of a 32 bit instruction.
|
||||
// Furthermore we need to handle the case if we want to start fetching from an unaligned
|
||||
// instruction e.g. a branch.
|
||||
|
||||
import ariane_pkg::*;
|
||||
|
||||
module instr_realign (
|
||||
input logic clk_i,
|
||||
input logic rst_ni,
|
||||
input logic flush_i,
|
||||
input logic valid_i,
|
||||
output logic serving_unaligned_o, // we have an unaligned instruction in [0]
|
||||
input logic [63:0] address_i,
|
||||
input logic [FETCH_WIDTH-1:0] data_i,
|
||||
output logic [INSTR_PER_FETCH-1:0] valid_o,
|
||||
output logic [INSTR_PER_FETCH-1:0][63:0] addr_o,
|
||||
output logic [INSTR_PER_FETCH-1:0][31:0] instr_o
|
||||
);
|
||||
// as a maximum we support a fetch width of 64-bit, hence there can be 4 compressed instructions
|
||||
logic [3:0] instr_is_compressed;
|
||||
|
||||
for (genvar i = 0; i < INSTR_PER_FETCH; i ++) begin
|
||||
// LSB != 2'b11
|
||||
assign instr_is_compressed[i] = ~&data_i[i * 16 +: 2];
|
||||
end
|
||||
|
||||
// save the unaligned part of the instruction to this ff
|
||||
logic [15:0] unaligned_instr_d, unaligned_instr_q;
|
||||
// the last instruction was unaligned
|
||||
logic unaligned_d, unaligned_q;
|
||||
// register to save the unaligned address
|
||||
logic [63:0] unaligned_address_d, unaligned_address_q;
|
||||
// we have an unaligned instruction
|
||||
assign serving_unaligned_o = unaligned_q;
|
||||
|
||||
// Instruction re-alignment
|
||||
if (FETCH_WIDTH == 32) begin : realign_bp_32
|
||||
always_comb begin : re_align
|
||||
unaligned_d = unaligned_q;
|
||||
unaligned_address_d = {address_i[63:2], 2'b10};
|
||||
unaligned_instr_d = data_i[31:16];
|
||||
|
||||
valid_o[0] = valid_i;
|
||||
instr_o[0] = (unaligned_q) ? {data_i[15:0], unaligned_instr_q} : data_i[31:0];
|
||||
addr_o[0] = (unaligned_q) ? unaligned_address_q : address_i;
|
||||
|
||||
valid_o[1] = 1'b0;
|
||||
instr_o[1] = '0;
|
||||
addr_o[1] = {address_i[63:2], 2'b10};
|
||||
|
||||
// this instruction is compressed or the last instruction was unaligned
|
||||
if (instr_is_compressed[0] || unaligned_q) begin
|
||||
// check if this is instruction is still unaligned e.g.: it is not compressed
|
||||
// if its compressed re-set unaligned flag
|
||||
// for 32 bit we can simply check the next instruction and whether it is compressed or not
|
||||
// if it is compressed the next fetch will contain an aligned instruction
|
||||
// is instruction 1 also compressed
|
||||
// yes? -> no problem, no -> we've got an unaligned instruction
|
||||
if (instr_is_compressed[1]) begin
|
||||
unaligned_d = 1'b0;
|
||||
valid_o[1] = valid_i;
|
||||
instr_o[1] = {16'b0, data_i[31:16]};
|
||||
end else begin
|
||||
// save the upper bits for next cycle
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_instr_d = data_i[31:16];
|
||||
unaligned_address_d = {address_i[63:2], 2'b10};
|
||||
end
|
||||
end // else -> normal fetch
|
||||
|
||||
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
|
||||
// received the next instruction
|
||||
if (valid_i && address_i[1]) begin
|
||||
// the instruction is not compressed so we can't do anything in this cycle
|
||||
if (!instr_is_compressed[0]) begin
|
||||
valid_o = '0;
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_address_d = {address_i[63:2], 2'b10};
|
||||
unaligned_instr_d = data_i[15:0];
|
||||
// the instruction isn't compressed but only the lower is ready
|
||||
end else begin
|
||||
valid_o = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
// TODO(zarubaf): Fix 64 bit FETCH_WIDTH, maybe generalize to arbitrary fetch width
|
||||
end else if (FETCH_WIDTH == 64) begin : realign_bp_64
|
||||
initial begin
|
||||
$error("Not propperly implemented");
|
||||
end
|
||||
always_comb begin : re_align
|
||||
unaligned_d = unaligned_q;
|
||||
unaligned_address_d = unaligned_address_q;
|
||||
unaligned_instr_d = unaligned_instr_q;
|
||||
|
||||
valid_o = '0;
|
||||
valid_o[0] = valid_i;
|
||||
|
||||
instr_o[0] = data_i[31:0];
|
||||
addr_o[0] = address_i;
|
||||
|
||||
instr_o[1] = '0;
|
||||
addr_o[1] = {address_i[63:3], 3'b010};
|
||||
|
||||
instr_o[2] = {16'b0, data_i[47:32]};
|
||||
addr_o[2] = {address_i[63:3], 3'b100};
|
||||
|
||||
instr_o[3] = {16'b0, data_i[63:48]};
|
||||
addr_o[3] = {address_i[63:3], 3'b110};
|
||||
|
||||
// last instruction was unaligned
|
||||
if (unaligned_q) begin
|
||||
instr_o[0] = {data_i[15:0], unaligned_instr_q};
|
||||
addr_o[0] = unaligned_address_q;
|
||||
// for 64 bit there exist the following options:
|
||||
// 64 32 0
|
||||
// | 3 | 2 | 1 | 0 | <- instruction slot
|
||||
// | I | I | U | -> again unaligned
|
||||
// | * | C | I | U | -> aligned
|
||||
// | * | I | C | U | -> aligned
|
||||
// | I | C | C | U | -> again unaligned
|
||||
// | * | C | C | C | U | -> aligned
|
||||
// Legend: C = compressed, I = 32 bit instruction, U = unaligned upper half
|
||||
// * = don't care
|
||||
if (instr_is_compressed[1]) begin
|
||||
instr_o[1] = {16'b0, data_i[31:16]};
|
||||
valid_o[1] = valid_i;
|
||||
|
||||
if (instr_is_compressed[2]) begin
|
||||
if (instr_is_compressed[3]) begin
|
||||
unaligned_d = 1'b0;
|
||||
valid_o[3] = valid_i;
|
||||
end else begin
|
||||
// continues to be unaligned
|
||||
end
|
||||
end else begin
|
||||
unaligned_d = 1'b0;
|
||||
instr_o[2] = data_i[63:32];
|
||||
valid_o[2] = valid_i;
|
||||
end
|
||||
// instruction 1 is not compressed
|
||||
end else begin
|
||||
instr_o[1] = data_i[47:16];
|
||||
valid_o[1] = valid_i;
|
||||
addr_o[2] = {address_i[63:3], 3'b110};
|
||||
if (instr_is_compressed[2]) begin
|
||||
unaligned_d = 1'b0;
|
||||
instr_o[2] = {16'b0, data_i[63:48]};
|
||||
valid_o[2] = valid_i;
|
||||
end else begin
|
||||
// continues to be unaligned
|
||||
end
|
||||
end
|
||||
end else if (instr_is_compressed[0]) begin // instruction zero is RVC
|
||||
// 64 32 0
|
||||
// | 3 | 2 | 1 | 0 | <- instruction slot
|
||||
// | I | I | C | -> again unaligned
|
||||
// | * | C | I | C | -> aligned
|
||||
// | * | I | C | C | -> aligned
|
||||
// | I | C | C | C | -> again unaligned
|
||||
// | * | C | C | C | C | -> aligned
|
||||
if (instr_is_compressed[1]) begin
|
||||
instr_o[1] = {16'b0, data_i[31:16]};
|
||||
valid_o[1] = valid_i;
|
||||
|
||||
if (instr_is_compressed[2]) begin
|
||||
valid_o[2] = valid_i;
|
||||
if (instr_is_compressed[3]) begin
|
||||
valid_o[3] = valid_i;
|
||||
end else begin
|
||||
// this instruction is unaligned
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_instr_d = data_i[63:48];
|
||||
unaligned_address_d = addr_o[3];
|
||||
end
|
||||
end else begin
|
||||
instr_o[2] = data_i[63:32];
|
||||
valid_o[2] = valid_i;
|
||||
end
|
||||
// instruction 1 is not compressed -> check slot 3
|
||||
end else begin
|
||||
instr_o[1] = data_i[47:16];
|
||||
valid_o[1] = valid_i;
|
||||
addr_o[2] = {address_i[63:3], 3'b110};
|
||||
if (instr_is_compressed[3]) begin
|
||||
instr_o[2] = data_i[63:48];
|
||||
valid_o[2] = valid_i;
|
||||
end else begin
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_instr_d = data_i[63:48];
|
||||
unaligned_address_d = addr_o[2];
|
||||
end
|
||||
end
|
||||
|
||||
// Full instruction in slot zero
|
||||
// 64 32 0
|
||||
// | 3 | 2 | 1 | 0 | <- instruction slot
|
||||
// | I | C | I |
|
||||
// | * | C | C | I |
|
||||
// | * | I | I |
|
||||
end else begin
|
||||
addr_o[1] = {address_i[63:3], 3'b100};
|
||||
|
||||
if (instr_is_compressed[2]) begin
|
||||
instr_o[1] = {16'b0, data_i[47:32]};
|
||||
valid_o[1] = valid_i;
|
||||
addr_o[2] = {address_i[63:3], 3'b110};
|
||||
if (instr_is_compressed[3]) begin
|
||||
// | * | C | C | I |
|
||||
valid_o[2] = valid_i;
|
||||
addr_o[2] = {16'b0, data_i[63:48]};
|
||||
end else begin
|
||||
// this instruction is unaligned
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_instr_d = data_i[63:48];
|
||||
unaligned_address_d = addr_o[2];
|
||||
end
|
||||
end else begin
|
||||
// two regular instructions back-to-back
|
||||
instr_o[1] = data_i[63:32];
|
||||
valid_o[1] = valid_i;
|
||||
end
|
||||
end
|
||||
|
||||
// --------------------------
|
||||
// Unaligned fetch
|
||||
// --------------------------
|
||||
// Address was not 64 bit aligned
|
||||
case (address_i[2:1])
|
||||
// this means the previouse instruction was either compressed or unaligned
|
||||
// in any case we don't ccare
|
||||
2'b01: begin
|
||||
// 64 32 0
|
||||
// | 3 | 2 | 1 | 0 | <- instruction slot
|
||||
// | I | I | x -> again unaligned
|
||||
// | * | C | I | x -> aligned
|
||||
// | * | I | C | x -> aligned
|
||||
// | I | C | C | x -> again unaligned
|
||||
// | * | C | C | C | x -> aligned
|
||||
addr_o[0] = {address_i[63:3], 3'b010};
|
||||
|
||||
if (instr_is_compressed[1]) begin
|
||||
instr_o[0] = {16'b0, data_i[31:16]};
|
||||
valid_o[0] = valid_i;
|
||||
|
||||
if (instr_is_compressed[2]) begin
|
||||
valid_o[1] = valid_i;
|
||||
instr_o[1] = {16'b0, data_i[47:32]};
|
||||
addr_o[1] = {address_i[63:3], 3'b100};
|
||||
if (instr_is_compressed[3]) begin
|
||||
instr_o[2] = {16'b0, data_i[63:48]};
|
||||
addr_o[2] = {address_i[63:3], 3'b110};
|
||||
valid_o[2] = valid_i;
|
||||
end else begin
|
||||
// this instruction is unaligned
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_instr_d = data_i[63:48];
|
||||
unaligned_address_d = addr_o[3];
|
||||
end
|
||||
end else begin
|
||||
instr_o[1] = data_i[63:32];
|
||||
addr_o[1] = {address_i[63:3], 3'b100};
|
||||
valid_o[1] = valid_i;
|
||||
end
|
||||
// instruction 1 is not compressed -> check slot 3
|
||||
end else begin
|
||||
instr_o[0] = data_i[47:16];
|
||||
valid_o[0] = valid_i;
|
||||
addr_o[1] = {address_i[63:3], 3'b110};
|
||||
if (instr_is_compressed[3]) begin
|
||||
instr_o[1] = data_i[63:48];
|
||||
valid_o[1] = valid_i;
|
||||
end else begin
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_instr_d = data_i[63:48];
|
||||
unaligned_address_d = addr_o[1];
|
||||
end
|
||||
end
|
||||
end
|
||||
2'b10: begin
|
||||
valid_o = '0;
|
||||
// 64 32 0
|
||||
// | 3 | 2 | 1 | 0 | <- instruction slot
|
||||
// | I | C | * | <- unaligned
|
||||
// | C | C | * | <- aligned
|
||||
// | I | * | <- aligned
|
||||
if (instr_is_compressed[2]) begin
|
||||
valid_o[0] = valid_i;
|
||||
instr_o[0] = data_i[47:32];
|
||||
// second instruction is also compressed
|
||||
if (instr_is_compressed[3]) begin
|
||||
valid_o[1] = valid_i;
|
||||
instr_o[1] = data_i[63:48];
|
||||
// regular instruction -> unaligned
|
||||
end else begin
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_address_d = {address_i[63:3], 3'b110};
|
||||
unaligned_instr_d = data_i[63:48];
|
||||
end
|
||||
// instruction is a regular instruction
|
||||
end else begin
|
||||
valid_o[0] = valid_i;
|
||||
instr_o[0] = data_i[63:32];
|
||||
addr_o[0] = address_i;
|
||||
end
|
||||
end
|
||||
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
|
||||
// received the next instruction
|
||||
2'b11: begin
|
||||
valid_o = '0;
|
||||
if (!instr_is_compressed[3]) begin
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_address_d = {address_i[63:3], 3'b110};
|
||||
unaligned_instr_d = data_i[63:48];
|
||||
end else begin
|
||||
valid_o[3] = valid_i;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
unaligned_q <= 1'b0;
|
||||
unaligned_address_q <= '0;
|
||||
unaligned_instr_q <= '0;
|
||||
end else begin
|
||||
if (valid_i) begin
|
||||
unaligned_address_q <= unaligned_address_d;
|
||||
unaligned_instr_q <= unaligned_instr_d;
|
||||
end
|
||||
|
||||
if (flush_i) begin
|
||||
unaligned_q <= 1'b0;
|
||||
end else if (valid_i) begin
|
||||
unaligned_q <= unaligned_d;
|
||||
end
|
||||
end
|
||||
end
|
||||
endmodule
|
|
@ -1,252 +0,0 @@
|
|||
// Copyright 2018 ETH Zurich and University of Bologna.
|
||||
// Copyright and related rights are licensed under the Solderpad Hardware
|
||||
// License, Version 0.51 (the "License"); you may not use this file except in
|
||||
// compliance with the License. You may obtain a copy of the License at
|
||||
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
|
||||
// or agreed to in writing, software, hardware and materials distributed under
|
||||
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
//
|
||||
// Author: Florian Zaruba, ETH Zurich
|
||||
// Date: 14.05.2017
|
||||
// Description: Emits and re-aligns compressed and unaligned instructions
|
||||
|
||||
import ariane_pkg::*;
|
||||
|
||||
module instr_realigner (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
// control signals
|
||||
input logic flush_i,
|
||||
|
||||
input frontend_fetch_t fetch_entry_i,
|
||||
input logic fetch_entry_valid_i,
|
||||
output logic fetch_ack_o,
|
||||
|
||||
output fetch_entry_t fetch_entry_o,
|
||||
output logic fetch_entry_valid_o,
|
||||
input logic fetch_ack_i
|
||||
);
|
||||
// ----------
|
||||
// Registers
|
||||
// ----------
|
||||
// the last instruction was unaligned
|
||||
logic unaligned_n, unaligned_q;
|
||||
// save the unaligned part of the instruction to this ff
|
||||
logic [15:0] unaligned_instr_n, unaligned_instr_q;
|
||||
// the previous instruction was compressed
|
||||
logic compressed_n, compressed_q;
|
||||
// register to save the unaligned address
|
||||
logic [63:0] unaligned_address_n, unaligned_address_q;
|
||||
// get the next instruction, needed on a unaligned access
|
||||
logic jump_unaligned_half_word;
|
||||
|
||||
// check if the lower compressed instruction was no branch otherwise we will need to squash this instruction
|
||||
// but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction
|
||||
logic kill_upper_16_bit;
|
||||
assign kill_upper_16_bit = fetch_entry_i.branch_predict.valid &
|
||||
fetch_entry_i.branch_predict.predict_taken &
|
||||
fetch_entry_i.bp_taken[0];
|
||||
// ----------
|
||||
// Registers
|
||||
// ----------
|
||||
always_comb begin : realign_instr
|
||||
|
||||
unaligned_n = unaligned_q;
|
||||
unaligned_instr_n = unaligned_instr_q;
|
||||
compressed_n = compressed_q;
|
||||
unaligned_address_n = unaligned_address_q;
|
||||
|
||||
// directly output this instruction. adoptions are made throughout the always comb block
|
||||
fetch_entry_o.address = fetch_entry_i.address;
|
||||
fetch_entry_o.instruction = fetch_entry_i.instruction;
|
||||
fetch_entry_o.branch_predict = fetch_entry_i.branch_predict;
|
||||
fetch_entry_o.ex.valid = fetch_entry_i.page_fault;
|
||||
fetch_entry_o.ex.tval = (fetch_entry_i.page_fault) ? fetch_entry_i.address : '0;
|
||||
fetch_entry_o.ex.cause = (fetch_entry_i.page_fault) ? riscv::INSTR_PAGE_FAULT : '0;
|
||||
|
||||
fetch_entry_valid_o = fetch_entry_valid_i;
|
||||
fetch_ack_o = fetch_ack_i;
|
||||
// we just jumped to a half word and encountered an unaligned 32-bit instruction
|
||||
jump_unaligned_half_word = 1'b0;
|
||||
// ---------------------------------
|
||||
// Input port & Instruction Aligner
|
||||
// ---------------------------------
|
||||
// check if the entry if the fetch FIFO is valid and if we are currently not serving the second part
|
||||
// of a compressed instruction
|
||||
if (fetch_entry_valid_i && !compressed_q) begin
|
||||
// ------------------------
|
||||
// Access on Word Boundary
|
||||
// ------------------------
|
||||
if (fetch_entry_i.address[1] == 1'b0) begin
|
||||
// do we actually want the first instruction or was the address a half word access?
|
||||
if (!unaligned_q) begin
|
||||
// we got a valid instruction so we can satisfy the unaligned instruction
|
||||
unaligned_n = 1'b0;
|
||||
// check if the instruction is compressed
|
||||
if (fetch_entry_i.instruction[1:0] != 2'b11) begin
|
||||
// it is compressed
|
||||
fetch_entry_o.instruction = {15'b0, fetch_entry_i.instruction[15:0]};
|
||||
// we need to kill the lower prediction
|
||||
if (fetch_entry_i.branch_predict.valid && !fetch_entry_i.bp_taken[0])
|
||||
fetch_entry_o.branch_predict.valid = 1'b0;
|
||||
|
||||
// should we even look at the upper instruction bits?
|
||||
if (!kill_upper_16_bit) begin
|
||||
// Yes, so...
|
||||
// 1. Is the second instruction also compressed, like:
|
||||
// _____________________________________________
|
||||
// | compressed 2 [31:16] | compressed 1[15:0] |
|
||||
// |____________________________________________
|
||||
if (fetch_entry_i.instruction[17:16] != 2'b11) begin
|
||||
// yes, this was a compressed instruction
|
||||
compressed_n = 1'b1;
|
||||
// do not advance the queue pointer
|
||||
fetch_ack_o = 1'b0;
|
||||
// 2. or is it an unaligned 32 bit instruction like
|
||||
// ____________________________________________________
|
||||
// |instr [15:0] | instr [31:16] | compressed 1[15:0] |
|
||||
// |____________________________________________________
|
||||
end else begin
|
||||
// save the lower 16 bit
|
||||
unaligned_instr_n = fetch_entry_i.instruction[31:16];
|
||||
// save the address
|
||||
unaligned_address_n = {fetch_entry_i.address[63:2], 2'b10};
|
||||
// and that it was unaligned
|
||||
unaligned_n = 1'b1;
|
||||
// this does not consume space in the FIFO
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
// this is a full 32 bit instruction like
|
||||
// _______________________
|
||||
// | instruction [31:0] |
|
||||
// |______________________
|
||||
|
||||
// we have an outstanding unaligned instruction
|
||||
else if (unaligned_q) begin
|
||||
|
||||
|
||||
fetch_entry_o.address = unaligned_address_q;
|
||||
fetch_entry_o.instruction = {fetch_entry_i.instruction[15:0], unaligned_instr_q};
|
||||
|
||||
// again should we look at the upper bits?
|
||||
if (!kill_upper_16_bit) begin
|
||||
// whats up with the other upper 16 bit of this instruction
|
||||
// is the second instruction also compressed, like:
|
||||
// _____________________________________________
|
||||
// | compressed 2 [31:16] | unaligned[31:16] |
|
||||
// |____________________________________________
|
||||
// check if the lower compressed instruction was no branch otherwise we will need to squash this instruction
|
||||
// but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction
|
||||
if (fetch_entry_i.instruction[17:16] != 2'b11) begin
|
||||
// this was a compressed instruction
|
||||
compressed_n = 1'b1;
|
||||
// do not advance the queue pointer
|
||||
fetch_ack_o = 1'b0;
|
||||
// unaligned access served
|
||||
unaligned_n = 1'b0;
|
||||
// we need to kill the lower prediction
|
||||
if (fetch_entry_i.branch_predict.valid && !fetch_entry_i.bp_taken[0])
|
||||
fetch_entry_o.branch_predict.valid = 1'b0;
|
||||
// or is it an unaligned 32 bit instruction like
|
||||
// ____________________________________________________
|
||||
// |instr [15:0] | instr [31:16] | compressed 1[15:0] |
|
||||
// |____________________________________________________
|
||||
end else if (!kill_upper_16_bit) begin
|
||||
// save the lower 16 bit
|
||||
unaligned_instr_n = fetch_entry_i.instruction[31:16];
|
||||
// save the address
|
||||
unaligned_address_n = {fetch_entry_i.address[63:2], 2'b10};
|
||||
// and that it was unaligned
|
||||
unaligned_n = 1'b1;
|
||||
end
|
||||
end
|
||||
// we've got a predicted taken branch we need to clear the unaligned flag if it was decoded as a lower 16 instruction
|
||||
else if (fetch_entry_i.branch_predict.valid) begin
|
||||
// the next fetch will start from a 4 byte boundary again
|
||||
unaligned_n = 1'b0;
|
||||
end
|
||||
end
|
||||
end
|
||||
// ----------------------------
|
||||
// Access on half-Word Boundary
|
||||
// ----------------------------
|
||||
else if (fetch_entry_i.address[1] == 1'b1) begin // address was a half word access
|
||||
// reset the unaligned flag as this is a completely new fetch (because consecutive fetches only happen on a word basis)
|
||||
unaligned_n = 1'b0;
|
||||
// this is a compressed instruction
|
||||
if (fetch_entry_i.instruction[17:16] != 2'b11) begin
|
||||
// it is compressed
|
||||
fetch_entry_o.instruction = {15'b0, fetch_entry_i.instruction[31:16]};
|
||||
|
||||
// this is the first part of a 32 bit unaligned instruction
|
||||
end else begin
|
||||
// save the lower 16 bit
|
||||
unaligned_instr_n = fetch_entry_i.instruction[31:16];
|
||||
// and that it was unaligned
|
||||
unaligned_n = 1'b1;
|
||||
// save the address
|
||||
unaligned_address_n = {fetch_entry_i.address[63:2], 2'b10};
|
||||
// we need to wait for the second instruction
|
||||
fetch_entry_valid_o = 1'b0;
|
||||
// so get it by acknowledging this instruction
|
||||
fetch_ack_o = 1'b1;
|
||||
// we got to an unaligned instruction -> get the next entry to full-fill the need
|
||||
jump_unaligned_half_word = 1'b1;
|
||||
end
|
||||
// there can never be a whole 32 bit instruction on a half word access
|
||||
end
|
||||
end
|
||||
// ----------------------------
|
||||
// Next compressed instruction
|
||||
// ----------------------------
|
||||
// we are serving the second part of an instruction which was also compressed
|
||||
if (compressed_q) begin
|
||||
fetch_ack_o = fetch_ack_i;
|
||||
compressed_n = 1'b0;
|
||||
fetch_entry_o.instruction = {16'b0, fetch_entry_i.instruction[31:16]};
|
||||
fetch_entry_o.address = {fetch_entry_i.address[63:2], 2'b10};
|
||||
fetch_entry_valid_o = 1'b1;
|
||||
end
|
||||
|
||||
// if we didn't get an acknowledge keep the registers stable
|
||||
if (!fetch_ack_i && !jump_unaligned_half_word) begin
|
||||
unaligned_n = unaligned_q;
|
||||
unaligned_instr_n = unaligned_instr_q;
|
||||
compressed_n = compressed_q;
|
||||
unaligned_address_n = unaligned_address_q;
|
||||
end
|
||||
|
||||
if (flush_i) begin
|
||||
// clear the unaligned and compressed instruction
|
||||
unaligned_n = 1'b0;
|
||||
compressed_n = 1'b0;
|
||||
end
|
||||
|
||||
// assign the correct address for a potentially faulting unaligned instruction
|
||||
// we've already done the re-alignment for the instruction word so we
|
||||
// can just assign it here to tval
|
||||
fetch_entry_o.ex.tval = fetch_entry_o.address;
|
||||
end
|
||||
|
||||
// ---------
|
||||
// Registers
|
||||
// ---------
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
unaligned_q <= 1'b0;
|
||||
unaligned_instr_q <= 16'b0;
|
||||
unaligned_address_q <= 64'b0;
|
||||
compressed_q <= 1'b0;
|
||||
end else begin
|
||||
unaligned_q <= unaligned_n;
|
||||
unaligned_instr_q <= unaligned_instr_n;
|
||||
unaligned_address_q <= unaligned_address_n;
|
||||
compressed_q <= compressed_n;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -67,6 +67,9 @@ package ariane_soc;
|
|||
localparam logic [NrRegion-1:0][NB_PERIPHERALS-1:0] ValidRule = {{NrRegion * NB_PERIPHERALS}{1'b1}};
|
||||
|
||||
localparam ariane_pkg::ariane_cfg_t ArianeSocCfg = '{
|
||||
RASDepth: 2,
|
||||
BTBEntries: 32,
|
||||
BHTEntries: 128,
|
||||
// idempotent region
|
||||
NrNonIdempotentRules: 0,
|
||||
NonIdempotentAddrBase: {64'b0},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue