mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-04-20 04:07:36 -04:00
Prepare for 64 bit fetch, reduce fetch fifo size
- Remove is_lower_64 in branch prediction with more generic information which extends to 64 bit fetches - Reduce the size of the fetch fifo by eliminating redundant exceptions fields (only instruction) page faults can occur. - Introduce new struct which governs communication between frontend and decode - Add various configurations to ariane_pkg
This commit is contained in:
parent
7da347e17b
commit
569ca80d1c
9 changed files with 226 additions and 124 deletions
1
Makefile
1
Makefile
|
@ -82,7 +82,6 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \
|
|||
src/common_cells/src/stream_mux.sv \
|
||||
src/common_cells/src/stream_demux.sv \
|
||||
src/util/axi_connect.sv \
|
||||
src/util/mock_mul.sv \
|
||||
src/axi/src/axi_cut.sv \
|
||||
src/axi/src/axi_join.sv \
|
||||
src/fpga-support/rtl/SyncSpRamBeNx64.sv \
|
||||
|
|
|
@ -33,6 +33,7 @@ package ariane_pkg;
|
|||
|
||||
localparam ENABLE_RENAME = 1'b1;
|
||||
|
||||
localparam ISSUE_WIDTH = 1;
|
||||
// amount of pipeline registers inserted for load/store return path
|
||||
// this can be tuned to trade-off IPC vs. cycle time
|
||||
localparam NR_LOAD_PIPE_REGS = 1;
|
||||
|
@ -119,7 +120,7 @@ package ariane_pkg;
|
|||
// if set to zero a flush will not invalidate the cache-lines, in a single core environment
|
||||
// where coherence is not necessary this can improve performance. This needs to be switched on
|
||||
// when more than one core is in a system
|
||||
localparam logic INVALIDATE_ON_FLUSH = 1'b1;
|
||||
localparam logic INVALIDATE_ON_FLUSH = 1'b0;
|
||||
|
||||
localparam NR_WB_PORTS = 4;
|
||||
|
||||
|
@ -130,6 +131,8 @@ package ariane_pkg;
|
|||
// leave as is (fails with >8 entries and wider fetch width)
|
||||
localparam int unsigned FETCH_FIFO_DEPTH = 8;
|
||||
localparam int unsigned FETCH_WIDTH = 32;
|
||||
// maximum instructions we can fetch on one request (we support compressed instructions)
|
||||
localparam int unsigned INSTR_PER_FETCH = FETCH_WIDTH / 16;
|
||||
|
||||
// Only use struct when signals have same direction
|
||||
// exception
|
||||
|
@ -150,7 +153,6 @@ package ariane_pkg;
|
|||
logic [63:0] target_address; // target address at which to jump, or not
|
||||
logic is_mispredict; // set if this was a mis-predict
|
||||
logic is_taken; // branch is taken
|
||||
logic is_lower_16; // branch instruction is compressed and resides
|
||||
// in the lower 16 bit of the word
|
||||
logic valid; // prediction with all its values is valid
|
||||
logic clear; // invalidate this entry
|
||||
|
@ -164,7 +166,6 @@ package ariane_pkg;
|
|||
logic valid; // this is a valid hint
|
||||
logic [63:0] predict_address; // target address at which to jump, or not
|
||||
logic predict_taken; // branch is taken
|
||||
logic is_lower_16; // branch instruction is compressed and resides
|
||||
// in the lower 16 bit of the word
|
||||
cf_t cf_type; // Type of control flow change
|
||||
} branchpredict_sbe_t;
|
||||
|
@ -173,14 +174,12 @@ package ariane_pkg;
|
|||
logic valid;
|
||||
logic [63:0] pc; // update at PC
|
||||
logic [63:0] target_address;
|
||||
logic is_lower_16;
|
||||
logic clear;
|
||||
} btb_update_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [63:0] target_address;
|
||||
logic is_lower_16;
|
||||
} btb_prediction_t;
|
||||
|
||||
typedef struct packed {
|
||||
|
@ -369,12 +368,20 @@ package ariane_pkg;
|
|||
// ---------------
|
||||
// IF/ID Stage
|
||||
// ---------------
|
||||
typedef struct packed {
|
||||
logic [63:0] address; // the address of the instructions from below
|
||||
logic [FETCH_WIDTH-1:0] instruction; // instruction word
|
||||
branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path
|
||||
logic [INSTR_PER_FETCH-1:0] bp_taken; // at which instruction is this branch taken?
|
||||
logic page_fault; // an instruction page fault happened
|
||||
} frontend_fetch_t;
|
||||
|
||||
// store the decompressed instruction
|
||||
typedef struct packed {
|
||||
logic [63:0] address; // the address of the instructions from below
|
||||
logic [31:0] instruction; // instruction word
|
||||
branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path
|
||||
exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions
|
||||
logic [63:0] address; // the address of the instructions from below
|
||||
logic [31:0] instruction; // instruction word
|
||||
branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path
|
||||
exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions
|
||||
} fetch_entry_t;
|
||||
|
||||
// ---------------
|
||||
|
|
|
@ -57,7 +57,7 @@ module ariane #(
|
|||
// --------------
|
||||
// IF <-> ID
|
||||
// --------------
|
||||
fetch_entry_t fetch_entry_if_id;
|
||||
frontend_fetch_t fetch_entry_if_id;
|
||||
logic fetch_valid_if_id;
|
||||
logic decode_ack_id_if;
|
||||
|
||||
|
|
|
@ -44,7 +44,6 @@ module branch_unit (
|
|||
resolved_branch_o.is_taken = 1'b0;
|
||||
resolved_branch_o.valid = branch_valid_i;
|
||||
resolved_branch_o.is_mispredict = 1'b0;
|
||||
resolved_branch_o.is_lower_16 = 1'b0;
|
||||
resolved_branch_o.clear = 1'b0;
|
||||
resolved_branch_o.cf_type = branch_predict_i.cf_type;
|
||||
// calculate next PC, depending on whether the instruction is compressed or not this may be different
|
||||
|
@ -69,10 +68,6 @@ module branch_unit (
|
|||
resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : ({pc_i[63:2], 2'b0} + 64'h4);
|
||||
|
||||
if (branch_valid_i) begin
|
||||
// save if the branch instruction was in the lower 16 bit of the instruction word
|
||||
// the first case is a compressed instruction which is in slot 0
|
||||
// the other case is a misaligned uncompressed instruction which we only predict in the next cycle (see notes above)
|
||||
resolved_branch_o.is_lower_16 = (is_compressed_instr_i && pc_i[1] == 1'b0) || (!is_compressed_instr_i && pc_i[1] == 1'b1);
|
||||
// write target address which goes to pc gen
|
||||
resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc;
|
||||
resolved_branch_o.is_taken = branch_comp_res_i;
|
||||
|
|
|
@ -121,9 +121,9 @@ module std_icache (
|
|||
assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2];
|
||||
|
||||
generate
|
||||
for (genvar i=0;i<ICACHE_SET_ASSOC;i++) begin : g_tag_cmpsel
|
||||
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : g_tag_cmpsel
|
||||
assign hit[i] = (tag_rdata[i].tag == tag) ? tag_rdata[i].valid : 1'b0;
|
||||
assign cl_sel[i] = (hit[i]) ? data_rdata[i][{idx,5'b0} +: FETCH_WIDTH] : '0;
|
||||
assign cl_sel[i] = (hit[i]) ? data_rdata[i][{idx, 5'b0} +: FETCH_WIDTH] : '0;
|
||||
assign way_valid[i] = tag_rdata[i].valid;
|
||||
end
|
||||
endgenerate
|
||||
|
@ -131,7 +131,7 @@ module std_icache (
|
|||
// OR reduction of selected cachelines
|
||||
always_comb begin : p_reduction
|
||||
dreq_o.data = cl_sel[0];
|
||||
for(int i=1; i<ICACHE_SET_ASSOC;i++)
|
||||
for(int i = 1; i < ICACHE_SET_ASSOC; i++)
|
||||
dreq_o.data |= cl_sel[i];
|
||||
end
|
||||
|
||||
|
|
|
@ -57,8 +57,6 @@ module btb #(
|
|||
btb_d[update_pc].valid = 1'b1;
|
||||
// the target address is simply updated
|
||||
btb_d[update_pc].target_address = btb_update_i.target_address;
|
||||
// as is the information whether this was a compressed branch
|
||||
btb_d[update_pc].is_lower_16 = btb_update_i.is_lower_16;
|
||||
// check if we should invalidate this entry, this happens in case we predicted a branch
|
||||
// where actually none-is (aliasing)
|
||||
if (btb_update_i.clear) begin
|
||||
|
|
|
@ -39,22 +39,18 @@ module frontend (
|
|||
input icache_dreq_o_t icache_dreq_i,
|
||||
output icache_dreq_i_t icache_dreq_o,
|
||||
// instruction output port -> to processor back-end
|
||||
output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
|
||||
output frontend_fetch_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
|
||||
output logic fetch_entry_valid_o, // instruction in IF is valid
|
||||
input logic fetch_ack_i // ID acknowledged this instruction
|
||||
);
|
||||
// maximum instructions we can fetch on one request
|
||||
localparam int unsigned INSTR_PER_FETCH = FETCH_WIDTH/16;
|
||||
|
||||
// Registers
|
||||
logic [31:0] icache_data_q;
|
||||
logic icache_valid_q;
|
||||
exception_t icache_ex_q;
|
||||
|
||||
exception_t icache_ex_valid_q;
|
||||
logic instruction_valid;
|
||||
logic [INSTR_PER_FETCH-1:0] instr_is_compressed;
|
||||
|
||||
logic [63:0] icache_vaddr_q;
|
||||
|
||||
// BHT, BTB and RAS prediction
|
||||
bht_prediction_t bht_prediction;
|
||||
btb_prediction_t btb_prediction;
|
||||
|
@ -103,54 +99,160 @@ module frontend (
|
|||
// register to save the unaligned address
|
||||
logic [63:0] unaligned_address_d, unaligned_address_q;
|
||||
|
||||
// TODO: generalize to arbitrary instruction fetch width
|
||||
for (genvar i = 0; i < INSTR_PER_FETCH; i ++) begin
|
||||
// LSB != 2'b11
|
||||
assign instr_is_compressed[i] = ~&icache_data_q[i * 16 +: 2];
|
||||
end
|
||||
|
||||
// Soft-realignment to do branch-prediction
|
||||
always_comb begin : re_align
|
||||
unaligned_d = unaligned_q;
|
||||
unaligned_address_d = unaligned_address_q;
|
||||
unaligned_instr_d = unaligned_instr_q;
|
||||
instruction_valid = icache_valid_q;
|
||||
|
||||
instr[1] = '0;
|
||||
instr[0] = icache_data_q;
|
||||
// 32-bit can contain 2 instructions
|
||||
if (FETCH_WIDTH == 32) begin
|
||||
instr[0] = icache_data_q;
|
||||
addr[0] = icache_vaddr_q;
|
||||
|
||||
addr[1] = {icache_vaddr_q[63:2], 2'b10};
|
||||
addr[0] = icache_vaddr_q;
|
||||
instr[1] = '0;
|
||||
addr[1] = {icache_vaddr_q[63:2], 2'b10};
|
||||
// with 64-bit we can fetch up to 4 instructions/cycle
|
||||
end else if (FETCH_WIDTH == 64) begin
|
||||
instr[0] = icache_data_q;
|
||||
addr[0] = icache_vaddr_q;
|
||||
|
||||
instr[1] = '0;
|
||||
addr[1] = {icache_vaddr_q[63:3], 3'b010};
|
||||
|
||||
instr[2] = '0;
|
||||
addr[2] = {icache_vaddr_q[63:3], 3'b100};
|
||||
|
||||
instr[3] = '0;
|
||||
addr[3] = {icache_vaddr_q[63:3], 3'b110};
|
||||
end
|
||||
|
||||
if (icache_valid_q) begin
|
||||
// last instruction was unaligned
|
||||
if (unaligned_q) begin
|
||||
instr[0] = {icache_data_q[15:0], unaligned_instr_q};
|
||||
addr[0] = unaligned_address_q;
|
||||
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
|
||||
unaligned_instr_d = icache_data_q[31:16]; // save the upper bits for next cycle
|
||||
|
||||
if (FETCH_WIDTH == 32) begin
|
||||
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
|
||||
unaligned_instr_d = icache_data_q[31:16]; // save the upper bits for next cycle
|
||||
end else if (FETCH_WIDTH == 64) begin
|
||||
unaligned_address_d = {icache_vaddr_q[63:3], 3'b110};
|
||||
unaligned_instr_d = icache_data_q[63:48]; // save the upper bits for next cycle
|
||||
end
|
||||
// check if this is instruction is still unaligned e.g.: it is not compressed
|
||||
// if its compressed re-set unaligned flag
|
||||
if (icache_data_q[17:16] != 2'b11) begin
|
||||
unaligned_d = 1'b0;
|
||||
instr[1] = {16'b0, icache_data_q[31:16]};
|
||||
if (FETCH_WIDTH == 32) begin
|
||||
// for 32 bit we can simply check the next instruction and whether it is compressed or not
|
||||
// if it is compressed the next fetch will contain an aligned instruction
|
||||
if (instr_is_compressed[1]) begin
|
||||
unaligned_d = 1'b0;
|
||||
instr[1] = {16'b0, icache_data_q[31:16]};
|
||||
end
|
||||
end else if (FETCH_WIDTH === 64) begin
|
||||
// for 64 bit there exist the following options:
|
||||
// 64 32 0
|
||||
// | I | I | U | -> again unaligned
|
||||
// | * | C | I | U | -> aligned
|
||||
// | * | I | C | U | -> aligned
|
||||
// | I | C | C | U | -> again unaligned
|
||||
// | * | C | C | C | U | -> aligned
|
||||
// Legend: C = compressed, I = 32 bit instruction, U = unaligned upper half
|
||||
// * = don't care
|
||||
if (instr_is_compressed[1]) begin
|
||||
instr[1] = {16'b0, icache_data_q[31:16]};
|
||||
if (instr_is_compressed[2]) begin
|
||||
if (instr_is_compressed[3]) begin
|
||||
unaligned_d = 1'b0;
|
||||
instr[3] = {16'b0, icache_data_q[63:48]};
|
||||
end else begin
|
||||
// continues to be unaligned
|
||||
end
|
||||
end else begin
|
||||
unaligned_d = 1'b0;
|
||||
instr[2] = icache_data_q[63:32];
|
||||
end
|
||||
// instruction 1 is not compressed
|
||||
end else begin
|
||||
instr[1] = icache_data_q[47:16];
|
||||
if (instr_is_compressed[2]) begin
|
||||
unaligned_d = 1'b0;
|
||||
instr[2] = icache_data_q[63:48];
|
||||
end else begin
|
||||
// continues to be unaligned
|
||||
end
|
||||
end
|
||||
end
|
||||
end else if (is_rvc[0]) begin // instruction zero is RVC
|
||||
// is instruction 1 also compressed
|
||||
// yes? -> no problem, no -> we've got an unaligned instruction
|
||||
if (icache_data_q[17:16] != 2'b11) begin
|
||||
instr[1] = {16'b0, icache_data_q[31:16]};
|
||||
end else begin
|
||||
unaligned_instr_d = icache_data_q[31:16];
|
||||
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
|
||||
unaligned_d = 1'b1;
|
||||
end else if (instr_is_compressed[0]) begin // instruction zero is RVC
|
||||
if (FETCH_WIDTH == 32) begin
|
||||
// is instruction 1 also compressed
|
||||
// yes? -> no problem, no -> we've got an unaligned instruction
|
||||
if (instr_is_compressed[1]) begin
|
||||
instr[1] = {16'b0, icache_data_q[31:16]};
|
||||
end else begin
|
||||
unaligned_instr_d = icache_data_q[31:16];
|
||||
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
|
||||
unaligned_d = 1'b1;
|
||||
end
|
||||
end else if (FETCH_WIDTH == 64) begin
|
||||
// 64 32 0
|
||||
// | I | I | C | -> again unaligned
|
||||
// | * | C | I | C | -> aligned
|
||||
// | * | I | C | C | -> aligned
|
||||
// | I | C | C | C | -> again unaligned
|
||||
// | * | C | C | C | C | -> aligned
|
||||
if (instr_is_compressed[1]) begin
|
||||
instr[1] = {16'b0, icache_data_q[31:16]};
|
||||
if (instr_is_compressed[2]) begin
|
||||
if (instr_is_compressed[3]) begin
|
||||
unaligned_d = 1'b0;
|
||||
instr[3] = {16'b0, icache_data_q[63:48]};
|
||||
end else begin
|
||||
// continues to be unaligned
|
||||
end
|
||||
end else begin
|
||||
unaligned_d = 1'b0;
|
||||
instr[2] = icache_data_q[63:32];
|
||||
end
|
||||
// instruction 1 is not compressed
|
||||
end else begin
|
||||
instr[1] = icache_data_q[47:16];
|
||||
if (instr_is_compressed[2]) begin
|
||||
unaligned_d = 1'b0;
|
||||
instr[2] = icache_data_q[63:48];
|
||||
end else begin
|
||||
// continues to be unaligned
|
||||
end
|
||||
end
|
||||
end
|
||||
end // else -> normal fetch
|
||||
end
|
||||
|
||||
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
|
||||
// received the next instruction
|
||||
if (icache_valid_q && icache_vaddr_q[1] && icache_data_q[17:16] == 2'b11) begin
|
||||
instruction_valid = 1'b0;
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
|
||||
unaligned_instr_d = icache_data_q[31:16];
|
||||
if (FETCH_WIDTH == 32) begin
|
||||
if (icache_valid_q && icache_vaddr_q[1] && !instr_is_compressed[1]) begin
|
||||
instruction_valid = 1'b0;
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
|
||||
unaligned_instr_d = icache_data_q[31:16];
|
||||
end
|
||||
end else if (FETCH_WIDTH == 64) begin
|
||||
if (icache_valid_q && icache_vaddr_q[2] && icache_vaddr_q[1] && !instr_is_compressed[3]) begin
|
||||
instruction_valid = 1'b0;
|
||||
unaligned_d = 1'b1;
|
||||
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
|
||||
unaligned_instr_d = icache_data_q[31:16];
|
||||
end
|
||||
end
|
||||
|
||||
// if we killed the consecutive fetch we are starting on a clean slate
|
||||
if (icache_dreq_o.kill_s2) begin
|
||||
unaligned_d = 1'b0;
|
||||
end
|
||||
|
@ -178,7 +280,7 @@ module frontend (
|
|||
|
||||
// only predict if the response is valid
|
||||
if (instruction_valid) begin
|
||||
// look at instruction 0, 1, 2,...
|
||||
// look at instruction 0, 1, 2, ...
|
||||
for (int unsigned i = 0; i < INSTR_PER_FETCH; i++) begin
|
||||
// only speculate if the previous instruction was not taken
|
||||
if (!taken[i]) begin
|
||||
|
@ -235,8 +337,20 @@ module frontend (
|
|||
end
|
||||
|
||||
// we are not interested in the lower instruction
|
||||
if (icache_vaddr_q[1]) begin
|
||||
taken[1] = 1'b0;
|
||||
if (FETCH_WIDTH == 32) begin
|
||||
if (icache_vaddr_q[1]) begin
|
||||
taken[1] = 1'b0;
|
||||
// TODO(zarubaf): that seems to be overly pessimistic
|
||||
ras_pop = 1'b0;
|
||||
ras_push = 1'b0;
|
||||
end
|
||||
end else if (FETCH_WIDTH == 64) begin
|
||||
case (icache_vaddr_q[2:1])
|
||||
3'b010: taken[1] = 0;
|
||||
3'b100: taken[2] = 0;
|
||||
3'b110: taken[3] = 0;
|
||||
endcase
|
||||
// TODO(zarubaf): that seems to be overly pessimistic
|
||||
ras_pop = 1'b0;
|
||||
ras_push = 1'b0;
|
||||
end
|
||||
|
@ -249,29 +363,14 @@ module frontend (
|
|||
bp_sbe.valid = bp_valid;
|
||||
bp_sbe.predict_address = bp_vaddr;
|
||||
bp_sbe.predict_taken = bp_valid;
|
||||
bp_sbe.is_lower_16 = taken[1]; // the branch is on the lower 16 (in a 32-bit setup)
|
||||
|
||||
end
|
||||
|
||||
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
|
||||
|
||||
always_comb begin : id_if
|
||||
icache_dreq_o.kill_s1 = 1'b0;
|
||||
icache_dreq_o.kill_s2 = 1'b0;
|
||||
|
||||
// we mis-predicted so kill the icache request and the fetch queue
|
||||
if (is_mispredict || flush_i) begin
|
||||
icache_dreq_o.kill_s1 = 1'b1;
|
||||
icache_dreq_o.kill_s2 = 1'b1;
|
||||
end
|
||||
|
||||
// if we have a valid branch-prediction we need to kill the last cache request
|
||||
if (bp_valid) begin
|
||||
icache_dreq_o.kill_s2 = 1'b1;
|
||||
end
|
||||
|
||||
fifo_valid = icache_valid_q;
|
||||
end
|
||||
// we mis-predicted so kill the icache request and the fetch queue
|
||||
assign icache_dreq_o.kill_s1 = is_mispredict | flush_i;
|
||||
// if we have a valid branch-prediction we need to kill the last cache request
|
||||
assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
|
||||
assign fifo_valid = icache_valid_q;
|
||||
|
||||
// ----------------------------------------
|
||||
// Update Control Flow Predictions
|
||||
|
@ -285,7 +384,6 @@ module frontend (
|
|||
assign btb_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BTB);
|
||||
assign btb_update.pc = resolved_branch_i.pc;
|
||||
assign btb_update.target_address = resolved_branch_i.target_address;
|
||||
assign btb_update.is_lower_16 = resolved_branch_i.is_lower_16;
|
||||
assign btb_update.clear = resolved_branch_i.clear;
|
||||
|
||||
// -------------------
|
||||
|
@ -329,7 +427,7 @@ module frontend (
|
|||
// 0. Default assignment
|
||||
// -------------------------------
|
||||
if (if_ready) begin
|
||||
npc_d = {fetch_address[63:2], 2'b0} + 64'h4;
|
||||
npc_d = {fetch_address[63:2], 2'b0} + ((FETCH_WIDTH == 64) ? 'h8 : 'h4);
|
||||
end
|
||||
// -------------------------------
|
||||
// 2. Control flow change request
|
||||
|
@ -400,8 +498,8 @@ module frontend (
|
|||
@(posedge clk_i) disable iff (~rst_ni) (fifo_credits_q <= FETCH_FIFO_DEPTH))
|
||||
else $fatal("[frontend] fetch fifo credits must be <= FETCH_FIFO_DEPTH!");
|
||||
initial begin
|
||||
assert (FETCH_FIFO_DEPTH<=8) else $fatal("[frontend] fetch fifo deeper than 8 not supported");
|
||||
assert (FETCH_WIDTH==32) else $fatal("[frontend] fetch width != not supported");
|
||||
assert (FETCH_FIFO_DEPTH <= 8) else $fatal("[frontend] fetch fifo deeper than 8 not supported");
|
||||
assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64) else $fatal("[frontend] fetch width != not supported");
|
||||
end
|
||||
`endif
|
||||
//pragma translate_on
|
||||
|
@ -413,7 +511,7 @@ module frontend (
|
|||
icache_data_q <= '0;
|
||||
icache_valid_q <= 1'b0;
|
||||
icache_vaddr_q <= 'b0;
|
||||
icache_ex_q <= '0;
|
||||
icache_ex_valid_q <= 1'b0;
|
||||
unaligned_q <= 1'b0;
|
||||
unaligned_address_q <= '0;
|
||||
unaligned_instr_q <= '0;
|
||||
|
@ -425,7 +523,7 @@ module frontend (
|
|||
icache_data_q <= icache_dreq_i.data;
|
||||
icache_valid_q <= icache_dreq_i.valid;
|
||||
icache_vaddr_q <= icache_dreq_i.vaddr;
|
||||
icache_ex_q <= icache_dreq_i.ex;
|
||||
icache_ex_valid_q <= icache_dreq_i.ex.valid;
|
||||
unaligned_q <= unaligned_d;
|
||||
unaligned_address_q <= unaligned_address_d;
|
||||
unaligned_instr_q <= unaligned_instr_d;
|
||||
|
@ -490,8 +588,8 @@ module frontend (
|
|||
|
||||
fifo_v2 #(
|
||||
.DEPTH ( 8 ),
|
||||
.dtype ( fetch_entry_t ))
|
||||
i_fetch_fifo (
|
||||
.dtype ( frontend_fetch_t )
|
||||
) i_fetch_fifo (
|
||||
.clk_i ( clk_i ),
|
||||
.rst_ni ( rst_ni ),
|
||||
.flush_i ( flush_i ),
|
||||
|
@ -500,7 +598,7 @@ module frontend (
|
|||
.empty_o ( fifo_empty ),
|
||||
.alm_full_o ( ),
|
||||
.alm_empty_o ( ),
|
||||
.data_i ( {icache_vaddr_q, icache_data_q, bp_sbe, icache_ex_q} ),
|
||||
.data_i ( {icache_vaddr_q, icache_data_q, bp_sbe, taken[INSTR_PER_FETCH:1], icache_ex_valid_q} ),
|
||||
.push_i ( fifo_valid ),
|
||||
.data_o ( fetch_entry_o ),
|
||||
.pop_i ( fifo_pop )
|
||||
|
|
|
@ -21,7 +21,7 @@ module id_stage (
|
|||
|
||||
input logic flush_i,
|
||||
// from IF
|
||||
input fetch_entry_t fetch_entry_i,
|
||||
input frontend_fetch_t fetch_entry_i,
|
||||
input logic fetch_entry_valid_i,
|
||||
output logic decoded_instr_ack_o, // acknowledge the instruction (fetch entry)
|
||||
|
||||
|
@ -45,7 +45,6 @@ module id_stage (
|
|||
logic valid;
|
||||
scoreboard_entry_t sbe;
|
||||
logic is_ctrl_flow;
|
||||
|
||||
} issue_n, issue_q;
|
||||
|
||||
logic is_control_flow_instr;
|
||||
|
@ -62,9 +61,9 @@ module id_stage (
|
|||
// 1. Re-align instructions
|
||||
// ---------------------------------------------------------
|
||||
instr_realigner instr_realigner_i (
|
||||
.fetch_entry_0_i ( fetch_entry_i ),
|
||||
.fetch_entry_valid_0_i ( fetch_entry_valid_i ),
|
||||
.fetch_ack_0_o ( decoded_instr_ack_o ),
|
||||
.fetch_entry_i ( fetch_entry_i ),
|
||||
.fetch_entry_valid_i ( fetch_entry_valid_i ),
|
||||
.fetch_ack_o ( decoded_instr_ack_o ),
|
||||
|
||||
.fetch_entry_o ( fetch_entry ),
|
||||
.fetch_entry_valid_o ( fetch_entry_valid ),
|
||||
|
|
|
@ -20,9 +20,9 @@ module instr_realigner (
|
|||
// control signals
|
||||
input logic flush_i,
|
||||
|
||||
input fetch_entry_t fetch_entry_0_i,
|
||||
input logic fetch_entry_valid_0_i,
|
||||
output logic fetch_ack_0_o,
|
||||
input frontend_fetch_t fetch_entry_i,
|
||||
input logic fetch_entry_valid_i,
|
||||
output logic fetch_ack_o,
|
||||
|
||||
output fetch_entry_t fetch_entry_o,
|
||||
output logic fetch_entry_valid_o,
|
||||
|
@ -45,9 +45,9 @@ module instr_realigner (
|
|||
// check if the lower compressed instruction was no branch otherwise we will need to squash this instruction
|
||||
// but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction
|
||||
logic kill_upper_16_bit;
|
||||
assign kill_upper_16_bit = fetch_entry_0_i.branch_predict.valid &
|
||||
fetch_entry_0_i.branch_predict.predict_taken &
|
||||
fetch_entry_0_i.branch_predict.is_lower_16;
|
||||
assign kill_upper_16_bit = fetch_entry_i.branch_predict.valid &
|
||||
fetch_entry_i.branch_predict.predict_taken &
|
||||
fetch_entry_i.bp_taken[0];
|
||||
// ----------
|
||||
// Registers
|
||||
// ----------
|
||||
|
@ -58,10 +58,16 @@ module instr_realigner (
|
|||
compressed_n = compressed_q;
|
||||
unaligned_address_n = unaligned_address_q;
|
||||
|
||||
// directly output this instruction. adoptions are made throughout the process
|
||||
fetch_entry_o = fetch_entry_0_i;
|
||||
fetch_entry_valid_o = fetch_entry_valid_0_i;
|
||||
fetch_ack_0_o = fetch_ack_i;
|
||||
// directly output this instruction. adoptions are made throughout the always comb block
|
||||
fetch_entry_o.address = fetch_entry_i.address;
|
||||
fetch_entry_o.instruction = fetch_entry_i.instruction;
|
||||
fetch_entry_o.branch_predict = fetch_entry_i.branch_predict;
|
||||
fetch_entry_o.ex.valid = fetch_entry_i.page_fault;
|
||||
fetch_entry_o.ex.tval = (fetch_entry_i.page_fault) ? fetch_entry_i.address : '0;
|
||||
fetch_entry_o.ex.cause = (fetch_entry_i.page_fault) ? riscv::INSTR_PAGE_FAULT : '0;
|
||||
|
||||
fetch_entry_valid_o = fetch_entry_valid_i;
|
||||
fetch_ack_o = fetch_ack_i;
|
||||
// we just jumped to a half word and encountered an unaligned 32-bit instruction
|
||||
jump_unaligned_half_word = 1'b0;
|
||||
// ---------------------------------
|
||||
|
@ -69,21 +75,21 @@ module instr_realigner (
|
|||
// ---------------------------------
|
||||
// check if the entry if the fetch FIFO is valid and if we are currently not serving the second part
|
||||
// of a compressed instruction
|
||||
if (fetch_entry_valid_0_i && !compressed_q) begin
|
||||
if (fetch_entry_valid_i && !compressed_q) begin
|
||||
// ------------------------
|
||||
// Access on Word Boundary
|
||||
// ------------------------
|
||||
if (fetch_entry_0_i.address[1] == 1'b0) begin
|
||||
if (fetch_entry_i.address[1] == 1'b0) begin
|
||||
// do we actually want the first instruction or was the address a half word access?
|
||||
if (!unaligned_q) begin
|
||||
// we got a valid instruction so we can satisfy the unaligned instruction
|
||||
unaligned_n = 1'b0;
|
||||
// check if the instruction is compressed
|
||||
if (fetch_entry_0_i.instruction[1:0] != 2'b11) begin
|
||||
if (fetch_entry_i.instruction[1:0] != 2'b11) begin
|
||||
// it is compressed
|
||||
fetch_entry_o.instruction = {15'b0, fetch_entry_0_i.instruction[15:0]};
|
||||
fetch_entry_o.instruction = {15'b0, fetch_entry_i.instruction[15:0]};
|
||||
// we need to kill the lower prediction
|
||||
if (fetch_entry_0_i.branch_predict.valid && !fetch_entry_0_i.branch_predict.is_lower_16)
|
||||
if (fetch_entry_i.branch_predict.valid && !fetch_entry_i.bp_taken[0])
|
||||
fetch_entry_o.branch_predict.valid = 1'b0;
|
||||
|
||||
// should we even look at the upper instruction bits?
|
||||
|
@ -93,20 +99,20 @@ module instr_realigner (
|
|||
// _____________________________________________
|
||||
// | compressed 2 [31:16] | compressed 1[15:0] |
|
||||
// |____________________________________________
|
||||
if (fetch_entry_0_i.instruction[17:16] != 2'b11) begin
|
||||
if (fetch_entry_i.instruction[17:16] != 2'b11) begin
|
||||
// yes, this was a compressed instruction
|
||||
compressed_n = 1'b1;
|
||||
// do not advance the queue pointer
|
||||
fetch_ack_0_o = 1'b0;
|
||||
fetch_ack_o = 1'b0;
|
||||
// 2. or is it an unaligned 32 bit instruction like
|
||||
// ____________________________________________________
|
||||
// |instr [15:0] | instr [31:16] | compressed 1[15:0] |
|
||||
// |____________________________________________________
|
||||
end else begin
|
||||
// save the lower 16 bit
|
||||
unaligned_instr_n = fetch_entry_0_i.instruction[31:16];
|
||||
unaligned_instr_n = fetch_entry_i.instruction[31:16];
|
||||
// save the address
|
||||
unaligned_address_n = {fetch_entry_0_i.address[63:2], 2'b10};
|
||||
unaligned_address_n = {fetch_entry_i.address[63:2], 2'b10};
|
||||
// and that it was unaligned
|
||||
unaligned_n = 1'b1;
|
||||
// this does not consume space in the FIFO
|
||||
|
@ -124,7 +130,7 @@ module instr_realigner (
|
|||
|
||||
|
||||
fetch_entry_o.address = unaligned_address_q;
|
||||
fetch_entry_o.instruction = {fetch_entry_0_i.instruction[15:0], unaligned_instr_q};
|
||||
fetch_entry_o.instruction = {fetch_entry_i.instruction[15:0], unaligned_instr_q};
|
||||
|
||||
// again should we look at the upper bits?
|
||||
if (!kill_upper_16_bit) begin
|
||||
|
@ -135,15 +141,15 @@ module instr_realigner (
|
|||
// |____________________________________________
|
||||
// check if the lower compressed instruction was no branch otherwise we will need to squash this instruction
|
||||
// but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction
|
||||
if (fetch_entry_0_i.instruction[17:16] != 2'b11) begin
|
||||
if (fetch_entry_i.instruction[17:16] != 2'b11) begin
|
||||
// this was a compressed instruction
|
||||
compressed_n = 1'b1;
|
||||
// do not advance the queue pointer
|
||||
fetch_ack_0_o = 1'b0;
|
||||
fetch_ack_o = 1'b0;
|
||||
// unaligned access served
|
||||
unaligned_n = 1'b0;
|
||||
// we need to kill the lower prediction
|
||||
if (fetch_entry_0_i.branch_predict.valid && !fetch_entry_0_i.branch_predict.is_lower_16)
|
||||
if (fetch_entry_i.branch_predict.valid && !fetch_entry_i.bp_taken[0])
|
||||
fetch_entry_o.branch_predict.valid = 1'b0;
|
||||
// or is it an unaligned 32 bit instruction like
|
||||
// ____________________________________________________
|
||||
|
@ -151,15 +157,15 @@ module instr_realigner (
|
|||
// |____________________________________________________
|
||||
end else if (!kill_upper_16_bit) begin
|
||||
// save the lower 16 bit
|
||||
unaligned_instr_n = fetch_entry_0_i.instruction[31:16];
|
||||
unaligned_instr_n = fetch_entry_i.instruction[31:16];
|
||||
// save the address
|
||||
unaligned_address_n = {fetch_entry_0_i.address[63:2], 2'b10};
|
||||
unaligned_address_n = {fetch_entry_i.address[63:2], 2'b10};
|
||||
// and that it was unaligned
|
||||
unaligned_n = 1'b1;
|
||||
end
|
||||
end
|
||||
// we've got a predicted taken branch we need to clear the unaligned flag if it was decoded as a lower 16 instruction
|
||||
else if (fetch_entry_0_i.branch_predict.valid) begin
|
||||
else if (fetch_entry_i.branch_predict.valid) begin
|
||||
// the next fetch will start from a 4 byte boundary again
|
||||
unaligned_n = 1'b0;
|
||||
end
|
||||
|
@ -168,26 +174,26 @@ module instr_realigner (
|
|||
// ----------------------------
|
||||
// Access on half-Word Boundary
|
||||
// ----------------------------
|
||||
else if (fetch_entry_0_i.address[1] == 1'b1) begin // address was a half word access
|
||||
else if (fetch_entry_i.address[1] == 1'b1) begin // address was a half word access
|
||||
// reset the unaligned flag as this is a completely new fetch (because consecutive fetches only happen on a word basis)
|
||||
unaligned_n = 1'b0;
|
||||
// this is a compressed instruction
|
||||
if (fetch_entry_0_i.instruction[17:16] != 2'b11) begin
|
||||
if (fetch_entry_i.instruction[17:16] != 2'b11) begin
|
||||
// it is compressed
|
||||
fetch_entry_o.instruction = {15'b0, fetch_entry_0_i.instruction[31:16]};
|
||||
fetch_entry_o.instruction = {15'b0, fetch_entry_i.instruction[31:16]};
|
||||
|
||||
// this is the first part of a 32 bit unaligned instruction
|
||||
end else begin
|
||||
// save the lower 16 bit
|
||||
unaligned_instr_n = fetch_entry_0_i.instruction[31:16];
|
||||
unaligned_instr_n = fetch_entry_i.instruction[31:16];
|
||||
// and that it was unaligned
|
||||
unaligned_n = 1'b1;
|
||||
// save the address
|
||||
unaligned_address_n = {fetch_entry_0_i.address[63:2], 2'b10};
|
||||
unaligned_address_n = {fetch_entry_i.address[63:2], 2'b10};
|
||||
// we need to wait for the second instruction
|
||||
fetch_entry_valid_o = 1'b0;
|
||||
// so get it by acknowledging this instruction
|
||||
fetch_ack_0_o = 1'b1;
|
||||
fetch_ack_o = 1'b1;
|
||||
// we got to an unaligned instruction -> get the next entry to full-fill the need
|
||||
jump_unaligned_half_word = 1'b1;
|
||||
end
|
||||
|
@ -199,10 +205,10 @@ module instr_realigner (
|
|||
// ----------------------------
|
||||
// we are serving the second part of an instruction which was also compressed
|
||||
if (compressed_q) begin
|
||||
fetch_ack_0_o = fetch_ack_i;
|
||||
fetch_ack_o = fetch_ack_i;
|
||||
compressed_n = 1'b0;
|
||||
fetch_entry_o.instruction = {16'b0, fetch_entry_0_i.instruction[31:16]};
|
||||
fetch_entry_o.address = {fetch_entry_0_i.address[63:2], 2'b10};
|
||||
fetch_entry_o.instruction = {16'b0, fetch_entry_i.instruction[31:16]};
|
||||
fetch_entry_o.address = {fetch_entry_i.address[63:2], 2'b10};
|
||||
fetch_entry_valid_o = 1'b1;
|
||||
end
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue