Prepare for 64 bit fetch, reduce fetch fifo size

- Remove is_lower_64 in branch prediction with
  more generic information which extends to 64 bit
  fetches
- Reduce the size of the fetch fifo by eliminating
  redundant exceptions fields (only instruction)
  page faults can occur.
- Introduce new struct which governs communication
  between frontend and decode
- Add various configurations to ariane_pkg
This commit is contained in:
Florian Zaruba 2018-10-18 20:56:27 +02:00
parent 7da347e17b
commit 569ca80d1c
No known key found for this signature in database
GPG key ID: E742FFE8EC38A792
9 changed files with 226 additions and 124 deletions

View file

@ -82,7 +82,6 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \
src/common_cells/src/stream_mux.sv \
src/common_cells/src/stream_demux.sv \
src/util/axi_connect.sv \
src/util/mock_mul.sv \
src/axi/src/axi_cut.sv \
src/axi/src/axi_join.sv \
src/fpga-support/rtl/SyncSpRamBeNx64.sv \

View file

@ -33,6 +33,7 @@ package ariane_pkg;
localparam ENABLE_RENAME = 1'b1;
localparam ISSUE_WIDTH = 1;
// amount of pipeline registers inserted for load/store return path
// this can be tuned to trade-off IPC vs. cycle time
localparam NR_LOAD_PIPE_REGS = 1;
@ -119,7 +120,7 @@ package ariane_pkg;
// if set to zero a flush will not invalidate the cache-lines, in a single core environment
// where coherence is not necessary this can improve performance. This needs to be switched on
// when more than one core is in a system
localparam logic INVALIDATE_ON_FLUSH = 1'b1;
localparam logic INVALIDATE_ON_FLUSH = 1'b0;
localparam NR_WB_PORTS = 4;
@ -130,6 +131,8 @@ package ariane_pkg;
// leave as is (fails with >8 entries and wider fetch width)
localparam int unsigned FETCH_FIFO_DEPTH = 8;
localparam int unsigned FETCH_WIDTH = 32;
// maximum instructions we can fetch on one request (we support compressed instructions)
localparam int unsigned INSTR_PER_FETCH = FETCH_WIDTH / 16;
// Only use struct when signals have same direction
// exception
@ -150,7 +153,6 @@ package ariane_pkg;
logic [63:0] target_address; // target address at which to jump, or not
logic is_mispredict; // set if this was a mis-predict
logic is_taken; // branch is taken
logic is_lower_16; // branch instruction is compressed and resides
// in the lower 16 bit of the word
logic valid; // prediction with all its values is valid
logic clear; // invalidate this entry
@ -164,7 +166,6 @@ package ariane_pkg;
logic valid; // this is a valid hint
logic [63:0] predict_address; // target address at which to jump, or not
logic predict_taken; // branch is taken
logic is_lower_16; // branch instruction is compressed and resides
// in the lower 16 bit of the word
cf_t cf_type; // Type of control flow change
} branchpredict_sbe_t;
@ -173,14 +174,12 @@ package ariane_pkg;
logic valid;
logic [63:0] pc; // update at PC
logic [63:0] target_address;
logic is_lower_16;
logic clear;
} btb_update_t;
typedef struct packed {
logic valid;
logic [63:0] target_address;
logic is_lower_16;
} btb_prediction_t;
typedef struct packed {
@ -369,12 +368,20 @@ package ariane_pkg;
// ---------------
// IF/ID Stage
// ---------------
typedef struct packed {
logic [63:0] address; // the address of the instructions from below
logic [FETCH_WIDTH-1:0] instruction; // instruction word
branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path
logic [INSTR_PER_FETCH-1:0] bp_taken; // at which instruction is this branch taken?
logic page_fault; // an instruction page fault happened
} frontend_fetch_t;
// store the decompressed instruction
typedef struct packed {
logic [63:0] address; // the address of the instructions from below
logic [31:0] instruction; // instruction word
branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path
exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions
logic [63:0] address; // the address of the instructions from below
logic [31:0] instruction; // instruction word
branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path
exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions
} fetch_entry_t;
// ---------------

View file

@ -57,7 +57,7 @@ module ariane #(
// --------------
// IF <-> ID
// --------------
fetch_entry_t fetch_entry_if_id;
frontend_fetch_t fetch_entry_if_id;
logic fetch_valid_if_id;
logic decode_ack_id_if;

View file

@ -44,7 +44,6 @@ module branch_unit (
resolved_branch_o.is_taken = 1'b0;
resolved_branch_o.valid = branch_valid_i;
resolved_branch_o.is_mispredict = 1'b0;
resolved_branch_o.is_lower_16 = 1'b0;
resolved_branch_o.clear = 1'b0;
resolved_branch_o.cf_type = branch_predict_i.cf_type;
// calculate next PC, depending on whether the instruction is compressed or not this may be different
@ -69,10 +68,6 @@ module branch_unit (
resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : ({pc_i[63:2], 2'b0} + 64'h4);
if (branch_valid_i) begin
// save if the branch instruction was in the lower 16 bit of the instruction word
// the first case is a compressed instruction which is in slot 0
// the other case is a misaligned uncompressed instruction which we only predict in the next cycle (see notes above)
resolved_branch_o.is_lower_16 = (is_compressed_instr_i && pc_i[1] == 1'b0) || (!is_compressed_instr_i && pc_i[1] == 1'b1);
// write target address which goes to pc gen
resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc;
resolved_branch_o.is_taken = branch_comp_res_i;

View file

@ -121,9 +121,9 @@ module std_icache (
assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2];
generate
for (genvar i=0;i<ICACHE_SET_ASSOC;i++) begin : g_tag_cmpsel
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : g_tag_cmpsel
assign hit[i] = (tag_rdata[i].tag == tag) ? tag_rdata[i].valid : 1'b0;
assign cl_sel[i] = (hit[i]) ? data_rdata[i][{idx,5'b0} +: FETCH_WIDTH] : '0;
assign cl_sel[i] = (hit[i]) ? data_rdata[i][{idx, 5'b0} +: FETCH_WIDTH] : '0;
assign way_valid[i] = tag_rdata[i].valid;
end
endgenerate
@ -131,7 +131,7 @@ module std_icache (
// OR reduction of selected cachelines
always_comb begin : p_reduction
dreq_o.data = cl_sel[0];
for(int i=1; i<ICACHE_SET_ASSOC;i++)
for(int i = 1; i < ICACHE_SET_ASSOC; i++)
dreq_o.data |= cl_sel[i];
end

View file

@ -57,8 +57,6 @@ module btb #(
btb_d[update_pc].valid = 1'b1;
// the target address is simply updated
btb_d[update_pc].target_address = btb_update_i.target_address;
// as is the information whether this was a compressed branch
btb_d[update_pc].is_lower_16 = btb_update_i.is_lower_16;
// check if we should invalidate this entry, this happens in case we predicted a branch
// where actually none-is (aliasing)
if (btb_update_i.clear) begin

View file

@ -39,22 +39,18 @@ module frontend (
input icache_dreq_o_t icache_dreq_i,
output icache_dreq_i_t icache_dreq_o,
// instruction output port -> to processor back-end
output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
output frontend_fetch_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
output logic fetch_entry_valid_o, // instruction in IF is valid
input logic fetch_ack_i // ID acknowledged this instruction
);
// maximum instructions we can fetch on one request
localparam int unsigned INSTR_PER_FETCH = FETCH_WIDTH/16;
// Registers
logic [31:0] icache_data_q;
logic icache_valid_q;
exception_t icache_ex_q;
exception_t icache_ex_valid_q;
logic instruction_valid;
logic [INSTR_PER_FETCH-1:0] instr_is_compressed;
logic [63:0] icache_vaddr_q;
// BHT, BTB and RAS prediction
bht_prediction_t bht_prediction;
btb_prediction_t btb_prediction;
@ -103,54 +99,160 @@ module frontend (
// register to save the unaligned address
logic [63:0] unaligned_address_d, unaligned_address_q;
// TODO: generalize to arbitrary instruction fetch width
for (genvar i = 0; i < INSTR_PER_FETCH; i ++) begin
// LSB != 2'b11
assign instr_is_compressed[i] = ~&icache_data_q[i * 16 +: 2];
end
// Soft-realignment to do branch-prediction
always_comb begin : re_align
unaligned_d = unaligned_q;
unaligned_address_d = unaligned_address_q;
unaligned_instr_d = unaligned_instr_q;
instruction_valid = icache_valid_q;
instr[1] = '0;
instr[0] = icache_data_q;
// 32-bit can contain 2 instructions
if (FETCH_WIDTH == 32) begin
instr[0] = icache_data_q;
addr[0] = icache_vaddr_q;
addr[1] = {icache_vaddr_q[63:2], 2'b10};
addr[0] = icache_vaddr_q;
instr[1] = '0;
addr[1] = {icache_vaddr_q[63:2], 2'b10};
// with 64-bit we can fetch up to 4 instructions/cycle
end else if (FETCH_WIDTH == 64) begin
instr[0] = icache_data_q;
addr[0] = icache_vaddr_q;
instr[1] = '0;
addr[1] = {icache_vaddr_q[63:3], 3'b010};
instr[2] = '0;
addr[2] = {icache_vaddr_q[63:3], 3'b100};
instr[3] = '0;
addr[3] = {icache_vaddr_q[63:3], 3'b110};
end
if (icache_valid_q) begin
// last instruction was unaligned
if (unaligned_q) begin
instr[0] = {icache_data_q[15:0], unaligned_instr_q};
addr[0] = unaligned_address_q;
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
unaligned_instr_d = icache_data_q[31:16]; // save the upper bits for next cycle
if (FETCH_WIDTH == 32) begin
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
unaligned_instr_d = icache_data_q[31:16]; // save the upper bits for next cycle
end else if (FETCH_WIDTH == 64) begin
unaligned_address_d = {icache_vaddr_q[63:3], 3'b110};
unaligned_instr_d = icache_data_q[63:48]; // save the upper bits for next cycle
end
// check if this is instruction is still unaligned e.g.: it is not compressed
// if its compressed re-set unaligned flag
if (icache_data_q[17:16] != 2'b11) begin
unaligned_d = 1'b0;
instr[1] = {16'b0, icache_data_q[31:16]};
if (FETCH_WIDTH == 32) begin
// for 32 bit we can simply check the next instruction and whether it is compressed or not
// if it is compressed the next fetch will contain an aligned instruction
if (instr_is_compressed[1]) begin
unaligned_d = 1'b0;
instr[1] = {16'b0, icache_data_q[31:16]};
end
end else if (FETCH_WIDTH === 64) begin
// for 64 bit there exist the following options:
// 64 32 0
// | I | I | U | -> again unaligned
// | * | C | I | U | -> aligned
// | * | I | C | U | -> aligned
// | I | C | C | U | -> again unaligned
// | * | C | C | C | U | -> aligned
// Legend: C = compressed, I = 32 bit instruction, U = unaligned upper half
// * = don't care
if (instr_is_compressed[1]) begin
instr[1] = {16'b0, icache_data_q[31:16]};
if (instr_is_compressed[2]) begin
if (instr_is_compressed[3]) begin
unaligned_d = 1'b0;
instr[3] = {16'b0, icache_data_q[63:48]};
end else begin
// continues to be unaligned
end
end else begin
unaligned_d = 1'b0;
instr[2] = icache_data_q[63:32];
end
// instruction 1 is not compressed
end else begin
instr[1] = icache_data_q[47:16];
if (instr_is_compressed[2]) begin
unaligned_d = 1'b0;
instr[2] = icache_data_q[63:48];
end else begin
// continues to be unaligned
end
end
end
end else if (is_rvc[0]) begin // instruction zero is RVC
// is instruction 1 also compressed
// yes? -> no problem, no -> we've got an unaligned instruction
if (icache_data_q[17:16] != 2'b11) begin
instr[1] = {16'b0, icache_data_q[31:16]};
end else begin
unaligned_instr_d = icache_data_q[31:16];
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
unaligned_d = 1'b1;
end else if (instr_is_compressed[0]) begin // instruction zero is RVC
if (FETCH_WIDTH == 32) begin
// is instruction 1 also compressed
// yes? -> no problem, no -> we've got an unaligned instruction
if (instr_is_compressed[1]) begin
instr[1] = {16'b0, icache_data_q[31:16]};
end else begin
unaligned_instr_d = icache_data_q[31:16];
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
unaligned_d = 1'b1;
end
end else if (FETCH_WIDTH == 64) begin
// 64 32 0
// | I | I | C | -> again unaligned
// | * | C | I | C | -> aligned
// | * | I | C | C | -> aligned
// | I | C | C | C | -> again unaligned
// | * | C | C | C | C | -> aligned
if (instr_is_compressed[1]) begin
instr[1] = {16'b0, icache_data_q[31:16]};
if (instr_is_compressed[2]) begin
if (instr_is_compressed[3]) begin
unaligned_d = 1'b0;
instr[3] = {16'b0, icache_data_q[63:48]};
end else begin
// continues to be unaligned
end
end else begin
unaligned_d = 1'b0;
instr[2] = icache_data_q[63:32];
end
// instruction 1 is not compressed
end else begin
instr[1] = icache_data_q[47:16];
if (instr_is_compressed[2]) begin
unaligned_d = 1'b0;
instr[2] = icache_data_q[63:48];
end else begin
// continues to be unaligned
end
end
end
end // else -> normal fetch
end
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
// received the next instruction
if (icache_valid_q && icache_vaddr_q[1] && icache_data_q[17:16] == 2'b11) begin
instruction_valid = 1'b0;
unaligned_d = 1'b1;
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
unaligned_instr_d = icache_data_q[31:16];
if (FETCH_WIDTH == 32) begin
if (icache_valid_q && icache_vaddr_q[1] && !instr_is_compressed[1]) begin
instruction_valid = 1'b0;
unaligned_d = 1'b1;
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
unaligned_instr_d = icache_data_q[31:16];
end
end else if (FETCH_WIDTH == 64) begin
if (icache_valid_q && icache_vaddr_q[2] && icache_vaddr_q[1] && !instr_is_compressed[3]) begin
instruction_valid = 1'b0;
unaligned_d = 1'b1;
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
unaligned_instr_d = icache_data_q[31:16];
end
end
// if we killed the consecutive fetch we are starting on a clean slate
if (icache_dreq_o.kill_s2) begin
unaligned_d = 1'b0;
end
@ -178,7 +280,7 @@ module frontend (
// only predict if the response is valid
if (instruction_valid) begin
// look at instruction 0, 1, 2,...
// look at instruction 0, 1, 2, ...
for (int unsigned i = 0; i < INSTR_PER_FETCH; i++) begin
// only speculate if the previous instruction was not taken
if (!taken[i]) begin
@ -235,8 +337,20 @@ module frontend (
end
// we are not interested in the lower instruction
if (icache_vaddr_q[1]) begin
taken[1] = 1'b0;
if (FETCH_WIDTH == 32) begin
if (icache_vaddr_q[1]) begin
taken[1] = 1'b0;
// TODO(zarubaf): that seems to be overly pessimistic
ras_pop = 1'b0;
ras_push = 1'b0;
end
end else if (FETCH_WIDTH == 64) begin
case (icache_vaddr_q[2:1])
3'b010: taken[1] = 0;
3'b100: taken[2] = 0;
3'b110: taken[3] = 0;
endcase
// TODO(zarubaf): that seems to be overly pessimistic
ras_pop = 1'b0;
ras_push = 1'b0;
end
@ -249,29 +363,14 @@ module frontend (
bp_sbe.valid = bp_valid;
bp_sbe.predict_address = bp_vaddr;
bp_sbe.predict_taken = bp_valid;
bp_sbe.is_lower_16 = taken[1]; // the branch is on the lower 16 (in a 32-bit setup)
end
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
always_comb begin : id_if
icache_dreq_o.kill_s1 = 1'b0;
icache_dreq_o.kill_s2 = 1'b0;
// we mis-predicted so kill the icache request and the fetch queue
if (is_mispredict || flush_i) begin
icache_dreq_o.kill_s1 = 1'b1;
icache_dreq_o.kill_s2 = 1'b1;
end
// if we have a valid branch-prediction we need to kill the last cache request
if (bp_valid) begin
icache_dreq_o.kill_s2 = 1'b1;
end
fifo_valid = icache_valid_q;
end
// we mis-predicted so kill the icache request and the fetch queue
assign icache_dreq_o.kill_s1 = is_mispredict | flush_i;
// if we have a valid branch-prediction we need to kill the last cache request
assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
assign fifo_valid = icache_valid_q;
// ----------------------------------------
// Update Control Flow Predictions
@ -285,7 +384,6 @@ module frontend (
assign btb_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BTB);
assign btb_update.pc = resolved_branch_i.pc;
assign btb_update.target_address = resolved_branch_i.target_address;
assign btb_update.is_lower_16 = resolved_branch_i.is_lower_16;
assign btb_update.clear = resolved_branch_i.clear;
// -------------------
@ -329,7 +427,7 @@ module frontend (
// 0. Default assignment
// -------------------------------
if (if_ready) begin
npc_d = {fetch_address[63:2], 2'b0} + 64'h4;
npc_d = {fetch_address[63:2], 2'b0} + ((FETCH_WIDTH == 64) ? 'h8 : 'h4);
end
// -------------------------------
// 2. Control flow change request
@ -400,8 +498,8 @@ module frontend (
@(posedge clk_i) disable iff (~rst_ni) (fifo_credits_q <= FETCH_FIFO_DEPTH))
else $fatal("[frontend] fetch fifo credits must be <= FETCH_FIFO_DEPTH!");
initial begin
assert (FETCH_FIFO_DEPTH<=8) else $fatal("[frontend] fetch fifo deeper than 8 not supported");
assert (FETCH_WIDTH==32) else $fatal("[frontend] fetch width != not supported");
assert (FETCH_FIFO_DEPTH <= 8) else $fatal("[frontend] fetch fifo deeper than 8 not supported");
assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64) else $fatal("[frontend] fetch width != not supported");
end
`endif
//pragma translate_on
@ -413,7 +511,7 @@ module frontend (
icache_data_q <= '0;
icache_valid_q <= 1'b0;
icache_vaddr_q <= 'b0;
icache_ex_q <= '0;
icache_ex_valid_q <= 1'b0;
unaligned_q <= 1'b0;
unaligned_address_q <= '0;
unaligned_instr_q <= '0;
@ -425,7 +523,7 @@ module frontend (
icache_data_q <= icache_dreq_i.data;
icache_valid_q <= icache_dreq_i.valid;
icache_vaddr_q <= icache_dreq_i.vaddr;
icache_ex_q <= icache_dreq_i.ex;
icache_ex_valid_q <= icache_dreq_i.ex.valid;
unaligned_q <= unaligned_d;
unaligned_address_q <= unaligned_address_d;
unaligned_instr_q <= unaligned_instr_d;
@ -490,8 +588,8 @@ module frontend (
fifo_v2 #(
.DEPTH ( 8 ),
.dtype ( fetch_entry_t ))
i_fetch_fifo (
.dtype ( frontend_fetch_t )
) i_fetch_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
@ -500,7 +598,7 @@ module frontend (
.empty_o ( fifo_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( {icache_vaddr_q, icache_data_q, bp_sbe, icache_ex_q} ),
.data_i ( {icache_vaddr_q, icache_data_q, bp_sbe, taken[INSTR_PER_FETCH:1], icache_ex_valid_q} ),
.push_i ( fifo_valid ),
.data_o ( fetch_entry_o ),
.pop_i ( fifo_pop )

View file

@ -21,7 +21,7 @@ module id_stage (
input logic flush_i,
// from IF
input fetch_entry_t fetch_entry_i,
input frontend_fetch_t fetch_entry_i,
input logic fetch_entry_valid_i,
output logic decoded_instr_ack_o, // acknowledge the instruction (fetch entry)
@ -45,7 +45,6 @@ module id_stage (
logic valid;
scoreboard_entry_t sbe;
logic is_ctrl_flow;
} issue_n, issue_q;
logic is_control_flow_instr;
@ -62,9 +61,9 @@ module id_stage (
// 1. Re-align instructions
// ---------------------------------------------------------
instr_realigner instr_realigner_i (
.fetch_entry_0_i ( fetch_entry_i ),
.fetch_entry_valid_0_i ( fetch_entry_valid_i ),
.fetch_ack_0_o ( decoded_instr_ack_o ),
.fetch_entry_i ( fetch_entry_i ),
.fetch_entry_valid_i ( fetch_entry_valid_i ),
.fetch_ack_o ( decoded_instr_ack_o ),
.fetch_entry_o ( fetch_entry ),
.fetch_entry_valid_o ( fetch_entry_valid ),

View file

@ -20,9 +20,9 @@ module instr_realigner (
// control signals
input logic flush_i,
input fetch_entry_t fetch_entry_0_i,
input logic fetch_entry_valid_0_i,
output logic fetch_ack_0_o,
input frontend_fetch_t fetch_entry_i,
input logic fetch_entry_valid_i,
output logic fetch_ack_o,
output fetch_entry_t fetch_entry_o,
output logic fetch_entry_valid_o,
@ -45,9 +45,9 @@ module instr_realigner (
// check if the lower compressed instruction was no branch otherwise we will need to squash this instruction
// but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction
logic kill_upper_16_bit;
assign kill_upper_16_bit = fetch_entry_0_i.branch_predict.valid &
fetch_entry_0_i.branch_predict.predict_taken &
fetch_entry_0_i.branch_predict.is_lower_16;
assign kill_upper_16_bit = fetch_entry_i.branch_predict.valid &
fetch_entry_i.branch_predict.predict_taken &
fetch_entry_i.bp_taken[0];
// ----------
// Registers
// ----------
@ -58,10 +58,16 @@ module instr_realigner (
compressed_n = compressed_q;
unaligned_address_n = unaligned_address_q;
// directly output this instruction. adoptions are made throughout the process
fetch_entry_o = fetch_entry_0_i;
fetch_entry_valid_o = fetch_entry_valid_0_i;
fetch_ack_0_o = fetch_ack_i;
// directly output this instruction. adoptions are made throughout the always comb block
fetch_entry_o.address = fetch_entry_i.address;
fetch_entry_o.instruction = fetch_entry_i.instruction;
fetch_entry_o.branch_predict = fetch_entry_i.branch_predict;
fetch_entry_o.ex.valid = fetch_entry_i.page_fault;
fetch_entry_o.ex.tval = (fetch_entry_i.page_fault) ? fetch_entry_i.address : '0;
fetch_entry_o.ex.cause = (fetch_entry_i.page_fault) ? riscv::INSTR_PAGE_FAULT : '0;
fetch_entry_valid_o = fetch_entry_valid_i;
fetch_ack_o = fetch_ack_i;
// we just jumped to a half word and encountered an unaligned 32-bit instruction
jump_unaligned_half_word = 1'b0;
// ---------------------------------
@ -69,21 +75,21 @@ module instr_realigner (
// ---------------------------------
// check if the entry if the fetch FIFO is valid and if we are currently not serving the second part
// of a compressed instruction
if (fetch_entry_valid_0_i && !compressed_q) begin
if (fetch_entry_valid_i && !compressed_q) begin
// ------------------------
// Access on Word Boundary
// ------------------------
if (fetch_entry_0_i.address[1] == 1'b0) begin
if (fetch_entry_i.address[1] == 1'b0) begin
// do we actually want the first instruction or was the address a half word access?
if (!unaligned_q) begin
// we got a valid instruction so we can satisfy the unaligned instruction
unaligned_n = 1'b0;
// check if the instruction is compressed
if (fetch_entry_0_i.instruction[1:0] != 2'b11) begin
if (fetch_entry_i.instruction[1:0] != 2'b11) begin
// it is compressed
fetch_entry_o.instruction = {15'b0, fetch_entry_0_i.instruction[15:0]};
fetch_entry_o.instruction = {15'b0, fetch_entry_i.instruction[15:0]};
// we need to kill the lower prediction
if (fetch_entry_0_i.branch_predict.valid && !fetch_entry_0_i.branch_predict.is_lower_16)
if (fetch_entry_i.branch_predict.valid && !fetch_entry_i.bp_taken[0])
fetch_entry_o.branch_predict.valid = 1'b0;
// should we even look at the upper instruction bits?
@ -93,20 +99,20 @@ module instr_realigner (
// _____________________________________________
// | compressed 2 [31:16] | compressed 1[15:0] |
// |____________________________________________
if (fetch_entry_0_i.instruction[17:16] != 2'b11) begin
if (fetch_entry_i.instruction[17:16] != 2'b11) begin
// yes, this was a compressed instruction
compressed_n = 1'b1;
// do not advance the queue pointer
fetch_ack_0_o = 1'b0;
fetch_ack_o = 1'b0;
// 2. or is it an unaligned 32 bit instruction like
// ____________________________________________________
// |instr [15:0] | instr [31:16] | compressed 1[15:0] |
// |____________________________________________________
end else begin
// save the lower 16 bit
unaligned_instr_n = fetch_entry_0_i.instruction[31:16];
unaligned_instr_n = fetch_entry_i.instruction[31:16];
// save the address
unaligned_address_n = {fetch_entry_0_i.address[63:2], 2'b10};
unaligned_address_n = {fetch_entry_i.address[63:2], 2'b10};
// and that it was unaligned
unaligned_n = 1'b1;
// this does not consume space in the FIFO
@ -124,7 +130,7 @@ module instr_realigner (
fetch_entry_o.address = unaligned_address_q;
fetch_entry_o.instruction = {fetch_entry_0_i.instruction[15:0], unaligned_instr_q};
fetch_entry_o.instruction = {fetch_entry_i.instruction[15:0], unaligned_instr_q};
// again should we look at the upper bits?
if (!kill_upper_16_bit) begin
@ -135,15 +141,15 @@ module instr_realigner (
// |____________________________________________
// check if the lower compressed instruction was no branch otherwise we will need to squash this instruction
// but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction
if (fetch_entry_0_i.instruction[17:16] != 2'b11) begin
if (fetch_entry_i.instruction[17:16] != 2'b11) begin
// this was a compressed instruction
compressed_n = 1'b1;
// do not advance the queue pointer
fetch_ack_0_o = 1'b0;
fetch_ack_o = 1'b0;
// unaligned access served
unaligned_n = 1'b0;
// we need to kill the lower prediction
if (fetch_entry_0_i.branch_predict.valid && !fetch_entry_0_i.branch_predict.is_lower_16)
if (fetch_entry_i.branch_predict.valid && !fetch_entry_i.bp_taken[0])
fetch_entry_o.branch_predict.valid = 1'b0;
// or is it an unaligned 32 bit instruction like
// ____________________________________________________
@ -151,15 +157,15 @@ module instr_realigner (
// |____________________________________________________
end else if (!kill_upper_16_bit) begin
// save the lower 16 bit
unaligned_instr_n = fetch_entry_0_i.instruction[31:16];
unaligned_instr_n = fetch_entry_i.instruction[31:16];
// save the address
unaligned_address_n = {fetch_entry_0_i.address[63:2], 2'b10};
unaligned_address_n = {fetch_entry_i.address[63:2], 2'b10};
// and that it was unaligned
unaligned_n = 1'b1;
end
end
// we've got a predicted taken branch we need to clear the unaligned flag if it was decoded as a lower 16 instruction
else if (fetch_entry_0_i.branch_predict.valid) begin
else if (fetch_entry_i.branch_predict.valid) begin
// the next fetch will start from a 4 byte boundary again
unaligned_n = 1'b0;
end
@ -168,26 +174,26 @@ module instr_realigner (
// ----------------------------
// Access on half-Word Boundary
// ----------------------------
else if (fetch_entry_0_i.address[1] == 1'b1) begin // address was a half word access
else if (fetch_entry_i.address[1] == 1'b1) begin // address was a half word access
// reset the unaligned flag as this is a completely new fetch (because consecutive fetches only happen on a word basis)
unaligned_n = 1'b0;
// this is a compressed instruction
if (fetch_entry_0_i.instruction[17:16] != 2'b11) begin
if (fetch_entry_i.instruction[17:16] != 2'b11) begin
// it is compressed
fetch_entry_o.instruction = {15'b0, fetch_entry_0_i.instruction[31:16]};
fetch_entry_o.instruction = {15'b0, fetch_entry_i.instruction[31:16]};
// this is the first part of a 32 bit unaligned instruction
end else begin
// save the lower 16 bit
unaligned_instr_n = fetch_entry_0_i.instruction[31:16];
unaligned_instr_n = fetch_entry_i.instruction[31:16];
// and that it was unaligned
unaligned_n = 1'b1;
// save the address
unaligned_address_n = {fetch_entry_0_i.address[63:2], 2'b10};
unaligned_address_n = {fetch_entry_i.address[63:2], 2'b10};
// we need to wait for the second instruction
fetch_entry_valid_o = 1'b0;
// so get it by acknowledging this instruction
fetch_ack_0_o = 1'b1;
fetch_ack_o = 1'b1;
// we got to an unaligned instruction -> get the next entry to full-fill the need
jump_unaligned_half_word = 1'b1;
end
@ -199,10 +205,10 @@ module instr_realigner (
// ----------------------------
// we are serving the second part of an instruction which was also compressed
if (compressed_q) begin
fetch_ack_0_o = fetch_ack_i;
fetch_ack_o = fetch_ack_i;
compressed_n = 1'b0;
fetch_entry_o.instruction = {16'b0, fetch_entry_0_i.instruction[31:16]};
fetch_entry_o.address = {fetch_entry_0_i.address[63:2], 2'b10};
fetch_entry_o.instruction = {16'b0, fetch_entry_i.instruction[31:16]};
fetch_entry_o.address = {fetch_entry_i.address[63:2], 2'b10};
fetch_entry_valid_o = 1'b1;
end