Merge branch 'new-frontend' into ariane_next

This commit is contained in:
Florian Zaruba 2018-03-14 14:35:49 +01:00
commit 594d4687e9
No known key found for this signature in database
GPG key ID: E742FFE8EC38A792
40 changed files with 2154 additions and 1768 deletions

1
.gitignore vendored
View file

@ -18,3 +18,4 @@ obj_dir/*
*.dasm
/Bender.lock
/Bender.local
build/

View file

@ -37,6 +37,7 @@ env:
branches:
only:
- master
- ariane_next
before_install:
- export CXX=g++-4.8 CC=gcc-4.8

View file

@ -3,9 +3,58 @@ package:
authors: [ "Florian Zaruba <zarubaf@iis.ee.ethz.ch>" ]
dependencies:
axi2per: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi2per.git", version: 0.1.0 }
axi_mem_if: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_mem_if.git", version: 0.1.0 }
axi_node: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_node.git", version: 1.0.1 }
axi_slice: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_slice.git", version: 1.1.0 }
axi: { git: "git@iis-git.ee.ethz.ch:fschuiki/axi.git", rev: master }
# bender-vsim: { git: "git@iis-git.ee.ethz.ch:floce/bender-vsim.git", rev: master }
axi: { git: "git@iis-git.ee.ethz.ch:sasa/axi.git", rev: master }
axi2per: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi2per.git", rev: master }
axi_mem_if: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_mem_if.git", version: 0.1.1 }
axi_node: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_node.git", version: 1.0.3 }
axi_slice: { git: "git@iis-git.ee.ethz.ch:sasa/axi_slice.git", version: 1.1.2 }
tech_cells_generic: { git: "git@iis-git.ee.ethz.ch:pulp-open/tech_cells_generic.git", rev: master }
common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: 1.1.0 }
sources:
- target: not(synthesis)
files:
- src/util/instruction_tracer_pkg.sv
- src/util/instruction_tracer_if.sv
- include/ariane_pkg.sv
- include/nbdcache_pkg.sv
- src/alu.sv
- src/ariane.sv
- src/branch_unit.sv
- src/cache_ctrl.sv
- src/commit_stage.sv
- src/compressed_decoder.sv
- src/controller.sv
- src/csr_buffer.sv
- src/csr_regfile.sv
- src/debug_unit.sv
- src/decoder.sv
- src/ex_stage.sv
- src/fetch_fifo.sv
- src/ff1.sv
- src/fifo.sv
- src/frontend.sv
- src/icache.sv
- src/id_stage.sv
- src/if_stage.sv
- src/instr_realigner.sv
- src/issue_read_operands.sv
- src/issue_stage.sv
- src/lfsr.sv
- src/load_unit.sv
- src/lsu_arbiter.sv
- src/lsu.sv
- src/miss_handler.sv
- src/mmu.sv
- src/mult.sv
- src/nbdcache.sv
- src/pcgen_stage.sv
- src/perf_counters.sv
- src/ptw.sv
- src/regfile_ff.sv
- src/regfile.sv
- src/re_name.sv
- src/scoreboard.sv
- src/store_buffer.sv
- src/store_unit.sv
- src/tlb.sv

View file

@ -10,6 +10,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Added
- Improved instruction fetch fronten-end
- Add RAS
- Add support for Bender hardware management
### Changed
- Bugfix in non-detected illegal instruction #12
- Bugfix in non-detected illegal instruction JALR (funct3 != 0)
- Bugfix in non-detected illegal instruction FENCE (some bit-checks missing)
@ -83,8 +89,3 @@ Linux booting on FPGA.
### Added
- Initial development, getting to a stable point
[Unreleased]: https://iis-git.ee.ethz.ch/floce/ariane/compare/v0.3.0...HEAD
[0.3.0]: https://iis-git.ee.ethz.ch/floce/ariane/compare/v0.3.0...v0.4.0
[0.2.0]: https://iis-git.ee.ethz.ch/floce/ariane/compare/v0.2.0...v0.3.0
[0.1.0]: https://iis-git.ee.ethz.ch/floce/ariane/compare/v0.1.0...v0.2.0

10
Makefile Normal file → Executable file
View file

@ -12,7 +12,7 @@ max_cycles ?= 10000000
# Test case to run
test_case ?= core_test
# QuestaSim Version
questa_version ?= -10.6b
questa_version ?=
# verilator version
verilator ?= verilator
# preset which runs a single test
@ -53,8 +53,8 @@ riscv-tests := rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p-
rv64ui-p-sraiw rv64ui-p-sraw rv64ui-p-srl rv64ui-p-srli rv64ui-p-srliw rv64ui-p-srlw \
rv64ui-p-lb rv64ui-p-lbu rv64ui-p-ld rv64ui-p-lh rv64ui-p-lhu rv64ui-p-lui rv64ui-p-lw rv64ui-p-lwu \
rv64mi-p-csr rv64mi-p-mcsr rv64mi-p-illegal rv64mi-p-ma_addr rv64mi-p-ma_fetch rv64mi-p-sbreak rv64mi-p-scall \
rv64si-p-csr rv64si-p-ma_fetch rv64si-p-scall rv64si-p-wfi rv64si-p-sbreak rv64si-p-dirty \
rv64uc-p-rvc \
rv64si-p-csr rv64si-p-ma_fetch rv64si-p-scall rv64si-p-wfi rv64si-p-sbreak rv64si-p-dirty \
rv64uc-p-rvc \
rv64ui-v-add rv64ui-v-addi rv64ui-p-slli rv64ui-v-addiw rv64ui-v-addw rv64ui-v-and rv64ui-v-auipc \
rv64ui-v-beq rv64ui-v-bge rv64ui-v-bgeu rv64ui-v-andi rv64ui-v-blt rv64ui-v-bltu rv64ui-v-bne \
rv64ui-v-simple rv64ui-v-jal rv64ui-v-jalr rv64ui-v-or rv64ui-v-ori rv64ui-v-sub rv64ui-v-subw \
@ -145,7 +145,7 @@ run-asm-tests: build
$(library).$(test_top_level)_optimized;)
run-asm-tests-verilator: verilate
$(foreach test, $(riscv-tests), obj_dir/Variane_wrapped $(riscv-test-dir)/$(test);)
$(foreach test, $(riscv-tests), obj_dir/Variane_wrapped --label="Starting: $(riscv-test-dir)/$(test)" $(riscv-test-dir)/$(test);)
run-failed-tests: build
# make the tests
@ -175,7 +175,7 @@ $(tests): build
verilate:
$(verilator) $(ariane_pkg) $(filter-out src/regfile.sv, $(wildcard src/*.sv)) $(wildcard src/axi_slice/*.sv) \
src/util/cluster_clock_gating.sv src/util/behav_sram.sv src/axi_mem_if/axi2mem.sv tb/agents/axi_if/axi_if.sv \
--unroll-count 256 -Wno-fatal -LDFLAGS "-lfesvr" -CFLAGS "-std=c++11" -Wall --cc --trace \
--unroll-count 1024 -Wno-fatal -Wno-UNOPTFLAT -LDFLAGS "-lfesvr" -CFLAGS "-std=c++11" -Wall --cc --trace \
$(list_incdir) --top-module ariane_wrapped --exe tb/ariane_tb.cpp tb/simmem.cpp
cd obj_dir && make -j8 -f Variane_wrapped.mk

78
include/ariane_pkg.sv Normal file → Executable file
View file

@ -31,6 +31,7 @@ package ariane_pkg;
localparam ASID_WIDTH = 1;
localparam BTB_ENTRIES = 8;
localparam BITS_SATURATION_COUNTER = 2;
localparam NR_COMMIT_PORTS = 2;
localparam logic [63:0] ISA_CODE = (1 << 2) // C - Compressed extension
| (1 << 8) // I - RV32I/64I/128I base ISA
@ -41,6 +42,9 @@ package ariane_pkg;
| (0 << 23) // X - Non-standard extensions present
| (1 << 63); // RV64
// 32 registers + 1 bit for re-naming = 6
localparam REG_ADDR_SIZE = 6;
// ---------------
// Fetch Stage
// ---------------
@ -53,6 +57,8 @@ package ariane_pkg;
logic valid;
} exception_t;
typedef enum logic [1:0] { BHT, BTB, RAS } cf_t;
// branch-predict
// this is the struct we get back from ex stage and we will use it to update
// all the necessary data structures
@ -65,19 +71,53 @@ package ariane_pkg;
// in the lower 16 bit of the word
logic valid; // prediction with all its values is valid
logic clear; // invalidate this entry
cf_t cf_type; // Type of control flow change
} branchpredict_t;
// branchpredict scoreboard entry
// this is the struct which we will inject into the pipeline to guide the various
// units towards the correct branch decision and resolve
typedef struct packed {
logic valid; // this is a valid hint
logic [63:0] predict_address; // target address at which to jump, or not
logic predict_taken; // branch is taken
logic is_lower_16; // branch instruction is compressed and resides
// in the lower 16 bit of the word
logic valid; // this is a valid hint
cf_t cf_type; // Type of control flow change
} branchpredict_sbe_t;
typedef struct packed {
logic valid;
logic [63:0] pc; // update at PC
logic [63:0] target_address;
logic is_lower_16;
logic clear;
} btb_update_t;
typedef struct packed {
logic valid;
logic [63:0] target_address;
logic is_lower_16;
} btb_prediction_t;
typedef struct packed {
logic valid;
logic [63:0] ra;
} ras_t;
typedef struct packed {
logic valid;
logic [63:0] pc; // update at PC
logic mispredict;
logic taken;
} bht_update_t;
typedef struct packed {
logic valid;
logic taken;
logic strongly_taken;
} bht_prediction_t;
typedef enum logic[3:0] {
NONE, LOAD, STORE, ALU, CTRL_FLOW, MULT, CSR
} fu_t;
@ -136,6 +176,7 @@ package ariane_pkg;
fu_op operator;
logic [TRANS_ID_BITS-1:0] trans_id;
} lsu_ctrl_t;
// ---------------
// IF/ID Stage
// ---------------
@ -156,9 +197,9 @@ package ariane_pkg;
// with the transaction id in any case make the width more generic
fu_t fu; // functional unit to use
fu_op op; // operation to perform in each functional unit
logic [4:0] rs1; // register source address 1
logic [4:0] rs2; // register source address 2
logic [4:0] rd; // register destination address
logic [REG_ADDR_SIZE-1:0] rs1; // register source address 1
logic [REG_ADDR_SIZE-1:0] rs2; // register source address 2
logic [REG_ADDR_SIZE-1:0] rd; // register destination address
logic [63:0] result; // for unfinished instructions this field also holds the immediate
logic valid; // is the result valid
logic use_imm; // should we use the immediate as operand b?
@ -230,6 +271,10 @@ package ariane_pkg;
localparam OPCODE_AUIPC = 7'h17;
localparam OPCODE_LUI = 7'h37;
localparam OPCODE_AMO = 7'h2F;
localparam OPCODE_C_J = 3'b101;
localparam OPCODE_C_BEQZ = 3'b110;
localparam OPCODE_C_BNEZ = 3'b111;
// --------------------
// Atomics
// --------------------
@ -262,6 +307,16 @@ package ariane_pkg;
logic v;
} pte_t;
typedef struct packed {
logic valid; // valid flag
logic is_2M; //
logic is_1G; //
logic [26:0] vpn;
logic [ASID_WIDTH-1:0] asid;
pte_t content;
} tlb_update_t;
// Bits required for representation of physical address space as 4K pages
// (e.g. 27*4K == 39bit address space).
localparam PPN4K_WIDTH = 38;
@ -421,4 +476,19 @@ package ariane_pkg;
function automatic logic [63:0] sext32 (logic [31:0] operand);
return {{32{operand[31]}}, operand[31:0]};
endfunction
// ----------------------
// Immediate functions
// ----------------------
function automatic logic [63:0] uj_imm (logic [31:0] instruction_i);
return { {44 {instruction_i[31]}}, instruction_i[19:12], instruction_i[20], instruction_i[30:21], 1'b0 };
endfunction
function automatic logic [63:0] i_imm (logic [31:0] instruction_i);
return { {52 {instruction_i[31]}}, instruction_i[31:20] };
endfunction
function automatic logic [63:0] sb_imm (logic [31:0] instruction_i);
return { {51 {instruction_i[31]}}, instruction_i[31], instruction_i[7], instruction_i[30:25], instruction_i[11:8], 1'b0 };
endfunction
endpackage

View file

@ -72,7 +72,7 @@ module ariane #(
branchpredict_t resolved_branch;
logic [63:0] pc_commit;
logic eret;
logic commit_ack;
logic [NR_COMMIT_PORTS-1:0] commit_ack;
// --------------
// PCGEN <-> IF
@ -166,22 +166,21 @@ module ariane #(
// --------------
// ID <-> COMMIT
// --------------
scoreboard_entry_t commit_instr_id_commit;
scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_id_commit;
// --------------
// COMMIT <-> ID
// --------------
logic [4:0] waddr_a_commit_id;
logic [63:0] wdata_a_commit_id;
logic we_a_commit_id;
logic [NR_COMMIT_PORTS-1:0][4:0] waddr_commit_id;
logic [NR_COMMIT_PORTS-1:0][63:0] wdata_commit_id;
logic [NR_COMMIT_PORTS-1:0] we_commit_id;
// --------------
// IF <-> EX
// --------------
logic fetch_req_if_ex;
logic fetch_gnt_ex_if;
logic fetch_valid_ex_if;
logic [63:0] fetch_rdata_ex_if;
exception_t fetch_ex_ex_if;
logic [63:0] fetch_vaddr_if_ex;
logic fetch_valid_ex_if;
logic [63:0] fetch_paddr_ex_if;
exception_t fetch_ex_ex_if;
// --------------
// CSR <-> *
// --------------
@ -215,7 +214,7 @@ module ariane #(
// CTRL <-> *
// --------------
logic flush_bp_ctrl_pcgen;
logic flush_ctrl_pcgen;
logic set_pc_ctrl_pcgen;
logic flush_csr_ctrl;
logic flush_unissued_instr_ctrl_id;
logic flush_ctrl_if;
@ -250,66 +249,41 @@ module ariane #(
// ----------------
// ICache <-> *
// ----------------
logic [63:0] instr_if_address;
logic instr_if_data_req; // fetch request
logic [3:0] instr_if_data_be;
logic instr_if_data_gnt; // fetch request
logic instr_if_data_rvalid; // fetch data
logic [63:0] instr_if_data_rdata;
logic flush_icache_ctrl_icache;
logic bypass_icache_csr_icache;
logic flush_icache_ack_icache_ctrl;
assign sec_lvl_o = priv_lvl;
assign flush_dcache_ack_o = flush_dcache_ack_ex_ctrl;
// --------------
// NPC Generation
// Frontend
// --------------
pcgen_stage pcgen_stage_i (
.fetch_enable_i ( fetch_enable ),
.flush_i ( flush_ctrl_pcgen ),
.flush_bp_i ( flush_bp_ctrl_pcgen ),
.if_ready_i ( ~if_ready_if_pcgen ),
.resolved_branch_i ( resolved_branch ),
.fetch_address_o ( fetch_address_pcgen_if ),
.fetch_valid_o ( fetch_valid_pcgen_if ),
.branch_predict_o ( branch_predict_pcgen_if ),
.boot_addr_i ( boot_addr_i ),
.pc_commit_i ( pc_commit ),
.epc_i ( epc_commit_pcgen ),
.eret_i ( eret ),
.trap_vector_base_i ( trap_vector_base_commit_pcgen ),
.ex_valid_i ( ex_commit.valid ),
.debug_pc_i ( pc_debug_pcgen ),
.debug_set_pc_i ( set_pc_debug ),
.*
);
// ---------
// IF
// ---------
if_stage if_stage_i (
.flush_i ( flush_ctrl_if ),
.halt_i ( halt_ctrl ),
.if_busy_o ( if_ready_if_pcgen ),
.fetch_address_i ( fetch_address_pcgen_if ),
.fetch_valid_i ( fetch_valid_pcgen_if ),
.branch_predict_i ( branch_predict_pcgen_if ),
.instr_req_o ( fetch_req_if_ex ),
.instr_addr_o ( fetch_vaddr_if_ex ),
.instr_gnt_i ( fetch_gnt_ex_if ),
.instr_rvalid_i ( fetch_valid_ex_if ),
.instr_rdata_i ( fetch_rdata_ex_if ),
.instr_ex_i ( fetch_ex_ex_if ), // fetch exception
frontend #(
.fetch_entry_0_o ( fetch_entry_if_id ),
.fetch_entry_valid_0_o ( fetch_valid_if_id ),
.fetch_ack_0_i ( decode_ack_id_if ),
// Reserved for future use
.fetch_entry_1_o ( ),
.fetch_entry_valid_1_o ( ),
.fetch_ack_1_i ( ),
) i_frontend (
.flush_i ( flush_ctrl_if ), // not entirely correct
.flush_bp_i ( 1'b0 ),
.flush_icache_i ( flush_icache_ctrl_icache ),
.boot_addr_i ( boot_addr_i ),
.fetch_enable_i ( fetch_enable ),
.fetch_req_o ( fetch_req_if_ex ),
.fetch_vaddr_o ( fetch_vaddr_if_ex ),
.fetch_valid_i ( fetch_valid_ex_if ),
.fetch_paddr_i ( fetch_paddr_ex_if ),
.fetch_exception_i ( fetch_ex_ex_if ),
.resolved_branch_i ( resolved_branch ),
.pc_commit_i ( pc_commit ),
.set_pc_commit_i ( set_pc_ctrl_pcgen ),
.epc_i ( epc_commit_pcgen ),
.eret_i ( eret ),
.trap_vector_base_i ( trap_vector_base_commit_pcgen ),
.ex_valid_i ( ex_commit.valid ),
.debug_pc_i ( pc_debug_pcgen ),
.debug_set_pc_i ( set_pc_debug ),
.axi ( instr_if ),
.l1_icache_miss_o ( ), // performance counters
.fetch_entry_o ( fetch_entry_if_id ),
.fetch_entry_valid_o ( fetch_valid_if_id ),
.fetch_ack_i ( decode_ack_id_if ),
.*
);
@ -385,17 +359,16 @@ module ariane #(
.csr_valid_o ( csr_valid_id_ex ),
.trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, branch_trans_id_ex_id, csr_trans_id_ex_id, mult_trans_id_ex_id }),
.wdata_i ( {alu_result_ex_id, lsu_result_ex_id, branch_result_ex_id, csr_result_ex_id, mult_result_ex_id }),
.wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, branch_result_ex_id, csr_result_ex_id, mult_result_ex_id }),
.ex_ex_i ( {{$bits(exception_t){1'b0}}, lsu_exception_ex_id, branch_exception_ex_id, {$bits(exception_t){1'b0}}, {$bits(exception_t){1'b0}} }),
.wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, branch_valid_ex_id, csr_valid_ex_id, mult_valid_ex_id }),
.waddr_a_i ( waddr_a_commit_id ),
.wdata_a_i ( wdata_a_commit_id ),
.we_a_i ( we_a_commit_id ),
.commit_instr_o ( commit_instr_id_commit ),
.commit_ack_i ( commit_ack ),
.waddr_i ( waddr_commit_id ),
.wdata_i ( wdata_commit_id ),
.we_i ( we_commit_id ),
.commit_instr_o ( commit_instr_id_commit ),
.commit_ack_i ( commit_ack ),
.*
);
@ -460,12 +433,12 @@ module ariane #(
.enable_translation_i ( enable_translation_csr_ex ), // from CSR
.en_ld_st_translation_i ( en_ld_st_translation_csr_ex ),
.flush_tlb_i ( flush_tlb_ctrl_ex ),
.fetch_req_i ( fetch_req_if_ex ),
.fetch_gnt_o ( fetch_gnt_ex_if ),
.fetch_valid_o ( fetch_valid_ex_if ),
.fetch_vaddr_i ( fetch_vaddr_if_ex ),
.fetch_rdata_o ( fetch_rdata_ex_if ),
.fetch_ex_o ( fetch_ex_ex_if ), // fetch exception to IF
.fetch_paddr_o ( fetch_paddr_ex_if ),
.fetch_exception_o ( fetch_ex_ex_if ), // fetch exception to IF
.priv_lvl_i ( priv_lvl ), // from CSR
.ld_st_priv_lvl_i ( ld_st_priv_lvl_csr_ex ), // from CSR
.sum_i ( sum_csr_ex ), // from CSR
@ -479,13 +452,6 @@ module ariane #(
.mult_result_o ( mult_result_ex_id ),
.mult_valid_o ( mult_valid_ex_id ),
.instr_if_address_o ( instr_if_address ),
.instr_if_data_req_o ( instr_if_data_req ),
.instr_if_data_be_o ( instr_if_data_be ),
.instr_if_data_gnt_i ( instr_if_data_gnt ),
.instr_if_data_rvalid_i ( instr_if_data_rvalid ),
.instr_if_data_rdata_i ( instr_if_data_rdata ),
.data_if ( data_if ),
.dcache_en_i ( dcache_en_csr_nbdcache ),
.flush_dcache_i ( flush_dcache_ctrl_ex | flush_dcache_i ),
@ -503,9 +469,9 @@ module ariane #(
.commit_instr_i ( commit_instr_id_commit ),
.commit_ack_o ( commit_ack ),
.no_st_pending_i ( no_st_pending_ex_commit ),
.waddr_a_o ( waddr_a_commit_id ),
.wdata_a_o ( wdata_a_commit_id ),
.we_a_o ( we_a_commit_id ),
.waddr_o ( waddr_commit_id ),
.wdata_o ( wdata_commit_id ),
.we_o ( we_commit_id ),
.commit_lsu_o ( lsu_commit_commit_ex ),
.commit_lsu_ready_i ( lsu_commit_ready_ex_commit ),
.commit_csr_o ( csr_commit_commit_ex ),
@ -525,8 +491,7 @@ module ariane #(
// ---------
csr_regfile #(
.ASID_WIDTH ( ASID_WIDTH )
)
csr_regfile_i (
) csr_regfile_i (
.flush_o ( flush_csr_ctrl ),
.halt_csr_o ( halt_csr_ctrl ),
.debug_csr_req_i ( csr_req_debug_csr ),
@ -575,7 +540,7 @@ module ariane #(
.data_i ( data_csr_perf ),
.data_o ( data_perf_csr ),
.commit_instr_i ( commit_instr_id_commit ),
.commit_ack_o ( commit_ack ),
.commit_ack_i ( commit_ack ),
.l1_icache_miss_i ( 1'b0 ),
.l1_dcache_miss_i ( dcache_miss_ex_perf ),
@ -593,7 +558,7 @@ module ariane #(
controller controller_i (
// flush ports
.flush_bp_o ( flush_bp_ctrl_pcgen ),
.flush_pcgen_o ( flush_ctrl_pcgen ),
.set_pc_commit_o ( set_pc_ctrl_pcgen ),
.flush_unissued_instr_o ( flush_unissued_instr_ctrl_id ),
.flush_if_o ( flush_ctrl_if ),
.flush_id_o ( flush_ctrl_id ),
@ -616,7 +581,6 @@ module ariane #(
.sfence_vma_i ( sfence_vma_commit_controller ),
.flush_icache_o ( flush_icache_ctrl_icache ),
.flush_icache_ack_i ( flush_icache_ack_icache_ctrl ),
.*
);
@ -624,8 +588,8 @@ module ariane #(
// Debug
// ------------
debug_unit debug_unit_i (
.commit_instr_i ( commit_instr_id_commit ),
.commit_ack_i ( commit_ack ),
.commit_instr_i ( commit_instr_id_commit[0] ),
.commit_ack_i ( commit_ack[0] ),
.ex_i ( ex_commit ),
.halt_o ( halt_debug_ctrl ),
.fetch_enable_i ( fetch_enable ),
@ -647,31 +611,6 @@ module ariane #(
.*
);
// -------------------
// Instruction Cache
// -------------------
icache #(
.AXI_USER_WIDTH ( AXI_USER_WIDTH ),
.AXI_ID_WIDTH ( AXI_ID_WIDTH )
) i_icache (
.clk_i ( clk_i ),
.rst_n ( rst_ni ),
.test_en_i ( test_en_i ),
.fetch_req_i ( instr_if_data_req ),
.fetch_addr_i ( {instr_if_address[55:3], 3'b0} ),
.fetch_gnt_o ( instr_if_data_gnt ),
.fetch_rvalid_o ( instr_if_data_rvalid ),
.fetch_rdata_o ( instr_if_data_rdata ),
.axi ( instr_if ),
.bypass_icache_i ( ~bypass_icache_csr_icache ),
.cache_is_bypassed_o ( ),
.flush_icache_i ( flush_icache_ctrl_icache ),
.cache_is_flushed_o ( flush_icache_ack_icache_ctrl ),
.flush_set_ID_req_i ( 1'b0 ),
.flush_set_ID_addr_i ( '0 ),
.flush_set_ID_ack_o ( )
);
// -------------------
// Instruction Tracer
// -------------------
@ -688,15 +627,17 @@ module ariane #(
assign tracer_if.fetch_valid = id_stage_i.instr_realigner_i.fetch_entry_valid_o;
assign tracer_if.fetch_ack = id_stage_i.instr_realigner_i.fetch_ack_i;
// Issue
assign tracer_if.issue_ack = issue_stage_i.scoreboard_i.issue_ack_i;
assign tracer_if.issue_sbe = issue_stage_i.scoreboard_i.issue_instr_o;
assign tracer_if.issue_ack = issue_stage_i.i_scoreboard.issue_ack_i;
assign tracer_if.issue_sbe = issue_stage_i.i_scoreboard.issue_instr_o;
// write-back
assign tracer_if.waddr = waddr_a_commit_id;
assign tracer_if.wdata = wdata_a_commit_id;
assign tracer_if.we = we_a_commit_id;
assign tracer_if.waddr = waddr_commit_id;
assign tracer_if.wdata = wdata_commit_id;
assign tracer_if.we = we_commit_id;
// commit
assign tracer_if.commit_instr = commit_instr_id_commit;
assign tracer_if.commit_ack = commit_ack;
// branch predict
assign tracer_if.resolve_branch = resolved_branch;
// address translation
// stores
assign tracer_if.st_valid = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.valid_i;
@ -758,13 +699,15 @@ module ariane #(
if (~rst_ni) begin
cycles <= 0;
end else begin
if (commit_ack && !commit_stage_i.exception_o) begin
$fwrite(f, "%d 0x%0h (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit.pc, commit_instr_id_commit.ex.tval[31:0], commit_instr_id_commit.ex.tval[31:0]);
end else if (commit_ack) begin
if (commit_instr_id_commit.ex.cause == 2) begin
$fwrite(f, "Exception Cause: Illegal Instructions, DASM(%h)\n", commit_instr_id_commit.ex.tval[31:0]);
end else begin
$fwrite(f, "Exception Cause: %5d\n", commit_instr_id_commit.ex.cause);
for (int i = 0; i < NR_COMMIT_PORTS; i++) begin
if (commit_ack[i] && !commit_stage_i.exception_o) begin
$fwrite(f, "%d 0x%0h (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].ex.tval[31:0]);
end else if (commit_ack[i] && commit_instr_id_commit[i].ex.valid) begin
if (commit_instr_id_commit[i].ex.cause == 2) begin
$fwrite(f, "Exception Cause: Illegal Instructions, DASM(%h)\n", commit_instr_id_commit[i].ex.tval[31:0]);
end else begin
$fwrite(f, "Exception Cause: %5d\n", commit_instr_id_commit[i].ex.cause);
end
end
end
cycles <= cycles + 1;

View file

@ -50,13 +50,14 @@ module branch_unit (
jump_base = (operator_i == JALR) ? operand_a_i : pc_i;
target_address = 64'b0;
resolve_branch_o = 1'b0;
resolved_branch_o.target_address = 64'b0;
resolved_branch_o.is_taken = 1'b0;
resolved_branch_o.valid = branch_valid_i;
resolved_branch_o.is_mispredict = 1'b0;
resolved_branch_o.is_lower_16 = 1'b0;
resolved_branch_o.clear = 1'b0;
resolve_branch_o = 1'b0;
resolved_branch_o.cf_type = branch_predict_i.cf_type;
// calculate next PC, depending on whether the instruction is compressed or not this may be different
next_pc = pc_i + ((is_compressed_instr_i) ? 64'h2 : 64'h4);
// calculate target address simple 64 bit addition

View file

@ -1,130 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 19.04.2017
// Description: Branch Target Buffer implementation
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
import ariane_pkg::*;
module btb #(
parameter int NR_ENTRIES = 1024,
parameter int BITS_SATURATION_COUNTER = 2
)
(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush the btb
input logic [63:0] vpc_i, // virtual PC from IF stage
input branchpredict_t branch_predict_i, // a mis-predict happened -> update data structure
output branchpredict_sbe_t branch_predict_o // branch prediction for issuing to the pipeline
);
// number of bits which are not used for indexing
localparam OFFSET = 2;
localparam ANTIALIAS_BITS = 8;
// number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ENTRIES) + OFFSET;
// typedef for all branch target entries
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
struct packed {
logic valid;
logic [63:0] target_address;
logic [BITS_SATURATION_COUNTER-1:0] saturation_counter;
logic is_lower_16;
logic [ANTIALIAS_BITS-1:0] anti_alias; // store some more PC information to prevent aliasing
} btb_n [NR_ENTRIES-1:0], btb_q [NR_ENTRIES-1:0];
logic [$clog2(NR_ENTRIES)-1:0] index, update_pc;
logic [ANTIALIAS_BITS-1:0] anti_alias_index, anti_alias_update_pc;
logic [BITS_SATURATION_COUNTER-1:0] saturation_counter;
// get actual index positions
// we ignore the 0th bit since all instructions are aligned on
// a half word boundary
assign update_pc = branch_predict_i.pc[PREDICTION_BITS - 1:OFFSET];
assign index = vpc_i[PREDICTION_BITS - 1:OFFSET];
// anti-alias portion of PCs
assign anti_alias_update_pc = branch_predict_i.pc[PREDICTION_BITS + ANTIALIAS_BITS - 1:PREDICTION_BITS];
assign anti_alias_index = vpc_i[PREDICTION_BITS + ANTIALIAS_BITS - 1:PREDICTION_BITS];
// we combinatorially predict the branch and the target address
// check if we are potentially aliasing
assign branch_predict_o.valid = (btb_q[index].anti_alias == anti_alias_index) ? btb_q[index].valid : 1'b0;
assign branch_predict_o.predict_taken = btb_q[index].saturation_counter[BITS_SATURATION_COUNTER-1];
assign branch_predict_o.predict_address = btb_q[index].target_address;
assign branch_predict_o.is_lower_16 = btb_q[index].is_lower_16;
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_n = btb_q;
saturation_counter = btb_q[update_pc].saturation_counter;
if (branch_predict_i.valid) begin
btb_n[update_pc].valid = 1'b1;
btb_n[update_pc].anti_alias = anti_alias_update_pc;
// update saturation counter
// first check if counter is already saturated in the positive regime e.g.: branch taken
if (saturation_counter == {BITS_SATURATION_COUNTER{1'b1}}) begin
// we can safely decrease it
if (~branch_predict_i.is_taken)
btb_n[update_pc].saturation_counter = saturation_counter - 1;
// then check if it saturated in the negative regime e.g.: branch not taken
end else if (saturation_counter == {BITS_SATURATION_COUNTER{1'b0}}) begin
// we can safely increase it
if (branch_predict_i.is_taken)
btb_n[update_pc].saturation_counter = saturation_counter + 1;
end else begin // otherwise we are not in any boundaries and can decrease or increase it
if (branch_predict_i.is_taken)
btb_n[update_pc].saturation_counter = saturation_counter + 1;
else
btb_n[update_pc].saturation_counter = saturation_counter - 1;
end
// the target address is simply updated
btb_n[update_pc].target_address = branch_predict_i.target_address;
// as is the information whether this was a compressed branch
btb_n[update_pc].is_lower_16 = branch_predict_i.is_lower_16;
// check if we should invalidate this entry, this happens in case we predicted a branch
// where actually none-is (aliasing)
if (branch_predict_i.clear) begin
btb_n[update_pc].valid = 1'b0;
end
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
// Bias the branches to be taken upon first arrival
for (int i = 0; i < NR_ENTRIES; i++)
btb_q[i] <= '{default: 0};
for (int unsigned i = 0; i < NR_ENTRIES; i++)
btb_q[i].saturation_counter <= 2'b0;
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ENTRIES; i++) begin
btb_q[i].valid <= 1'b0;
btb_q[i].saturation_counter <= '{default: 0};
end
end else begin
btb_q <= btb_n;
end
end
end
endmodule

View file

@ -14,41 +14,44 @@
import ariane_pkg::*;
module commit_stage (
input logic clk_i,
input logic halt_i, // request to halt the core
input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline
output exception_t exception_o, // take exception to controller
module commit_stage #(
parameter int unsigned NR_COMMIT_PORTS = 2
)(
input logic clk_i,
input logic halt_i, // request to halt the core
input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline
output exception_t exception_o, // take exception to controller
// from scoreboard
input scoreboard_entry_t commit_instr_i, // the instruction we want to commit
output logic commit_ack_o, // acknowledge that we are indeed committing
input scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_i, // the instruction we want to commit
output logic [NR_COMMIT_PORTS-1:0] commit_ack_o, // acknowledge that we are indeed committing
// to register file
output logic[4:0] waddr_a_o, // register file write address
output logic[63:0] wdata_a_o, // register file write data
output logic we_a_o, // register file write enable
output logic [NR_COMMIT_PORTS-1:0][4:0] waddr_o, // register file write address
output logic [NR_COMMIT_PORTS-1:0][63:0] wdata_o, // register file write data
output logic [NR_COMMIT_PORTS-1:0] we_o, // register file write enable
// to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline)
output logic [63:0] pc_o,
output logic [63:0] pc_o,
// to/from CSR file
output fu_op csr_op_o, // decoded CSR operation
output logic [63:0] csr_wdata_o, // data to write to CSR
input logic [63:0] csr_rdata_i, // data to read from CSR
input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit)
output fu_op csr_op_o, // decoded CSR operation
output logic [63:0] csr_wdata_o, // data to write to CSR
input logic [63:0] csr_rdata_i, // data to read from CSR
input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit)
// commit signals to ex
output logic commit_lsu_o, // commit the pending store
input logic commit_lsu_ready_i, // commit buffer of LSU is ready
input logic no_st_pending_i, // there is no store pending
output logic commit_csr_o, // commit the pending CSR instruction
output logic fence_i_o, // flush I$ and pipeline
output logic fence_o, // flush D$ and pipeline
output logic sfence_vma_o // flush TLBs and pipeline
output logic commit_lsu_o, // commit the pending store
input logic commit_lsu_ready_i, // commit buffer of LSU is ready
input logic no_st_pending_i, // there is no store pending
output logic commit_csr_o, // commit the pending CSR instruction
output logic fence_i_o, // flush I$ and pipeline
output logic fence_o, // flush D$ and pipeline
output logic sfence_vma_o // flush TLBs and pipeline
);
assign waddr_a_o = commit_instr_i.rd;
assign pc_o = commit_instr_i.pc;
assign waddr_o[0] = commit_instr_i[0].rd[4:0];
assign waddr_o[1] = commit_instr_i[1].rd[4:0];
assign pc_o = commit_instr_i[0].pc;
// -------------------
// Commit Instruction
@ -56,22 +59,27 @@ module commit_stage (
// write register file or commit instruction in LSU or CSR Buffer
always_comb begin : commit
// default assignments
commit_ack_o = 1'b0;
we_a_o = 1'b0;
commit_lsu_o = 1'b0;
commit_csr_o = 1'b0;
wdata_a_o = commit_instr_i.result;
csr_op_o = ADD; // this corresponds to a CSR NOP
csr_wdata_o = 64'b0;
fence_i_o = 1'b0;
fence_o = 1'b0;
sfence_vma_o = 1'b0;
commit_ack_o[0] = 1'b0;
commit_ack_o[1] = 1'b0;
we_o[0] = 1'b0;
we_o[1] = 1'b0;
commit_lsu_o = 1'b0;
commit_csr_o = 1'b0;
wdata_o[0] = commit_instr_i[0].result;
wdata_o[1] = commit_instr_i[1].result;
csr_op_o = ADD; // this corresponds to a CSR NOP
csr_wdata_o = 64'b0;
fence_i_o = 1'b0;
fence_o = 1'b0;
sfence_vma_o = 1'b0;
// we will not commit the instruction if we took an exception
// but we do not commit the instruction if we requested a halt
if (commit_instr_i.valid && !halt_i) begin
if (commit_instr_i[0].valid && !halt_i) begin
commit_ack_o = 1'b1;
commit_ack_o[0] = 1'b1;
// register will be the all zero register.
// and also acknowledge the instruction, this is mainly done for the instruction tracer
// as it will listen on the instruction ack signal. For the overall result it does not make any
@ -79,17 +87,17 @@ module commit_stage (
if (!exception_o.valid) begin
// we can definitely write the register file
// if the instruction is not committing anything the destination
we_a_o = 1'b1;
we_o[0] = 1'b1;
// check whether the instruction we retire was a store
// do not commit the instruction if we got an exception since the store buffer will be cleared
// by the subsequent flush triggered by an exception
if (commit_instr_i.fu == STORE) begin
if (commit_instr_i[0].fu == STORE) begin
// check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store)
if (commit_lsu_ready_i)
commit_lsu_o = 1'b1;
else // if the LSU buffer is not ready - do not commit, wait
commit_ack_o = 1'b0;
commit_ack_o[0] = 1'b0;
end
end
@ -97,42 +105,52 @@ module commit_stage (
// CSR Logic
// ---------
// check whether the instruction we retire was a CSR instruction
if (commit_instr_i.fu == CSR) begin
if (commit_instr_i[0].fu == CSR) begin
// write the CSR file
commit_csr_o = 1'b1;
wdata_a_o = csr_rdata_i;
csr_op_o = commit_instr_i.op;
csr_wdata_o = commit_instr_i.result;
wdata_o[0] = csr_rdata_i;
csr_op_o = commit_instr_i[0].op;
csr_wdata_o = commit_instr_i[0].result;
end
// ------------------
// SFENCE.VMA Logic
// ------------------
// check if this instruction was a SFENCE_VMA
if (commit_instr_i.op == SFENCE_VMA) begin
if (commit_instr_i[0].op == SFENCE_VMA) begin
// no store pending so we can flush the TLBs and pipeline
sfence_vma_o = no_st_pending_i;
// wait for the store buffer to drain until flushing the pipeline
commit_ack_o = no_st_pending_i;
commit_ack_o[0] = no_st_pending_i;
end
// ------------------
// FENCE.I Logic
// ------------------
// Fence synchronizes data and instruction streams. That means that we need to flush the private icache
// and the private dcache. This is the most expensive instruction.
if (commit_instr_i.op == FENCE_I || (flush_dcache_i && commit_instr_i.fu != STORE)) begin
commit_ack_o = no_st_pending_i;
if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && commit_instr_i[0].fu != STORE)) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the I$
fence_i_o = no_st_pending_i;
end
// ------------------
// FENCE Logic
// ------------------
if (commit_instr_i.op == FENCE) begin
commit_ack_o = no_st_pending_i;
if (commit_instr_i[0].op == FENCE) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the D$
fence_o = no_st_pending_i;
end
end
// check if the second instruction can be committed as well and the first wasn't a CSR instruction
if (commit_ack_o[0] && commit_instr_i[1].valid && !halt_i && !(commit_instr_i[0].fu inside {CSR}) && !flush_dcache_i) begin
// only if the first instruction didn't throw an exception and this instruction won't throw an exception
// and the operator is of type ALU, LOAD, CTRL_FLOW, MULT
if (!exception_o.valid && !commit_instr_i[1].ex.valid && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT})) begin
we_o[1] = 1'b1;
commit_ack_o[1] = 1'b1;
end
end
end
// -----------------------------
@ -150,7 +168,7 @@ module commit_stage (
// can happen anywhere in the execution flow and might just happen between two legal instructions - the PC would then
// be outdated. The solution here is to defer any exception/interrupt until we get a valid PC again (from where we cane
// resume execution afterwards).
if (commit_instr_i.valid) begin
if (commit_instr_i[0].valid) begin
// ------------------------
// check for CSR exception
// ------------------------
@ -159,14 +177,14 @@ module commit_stage (
// if no earlier exception happened the commit instruction will still contain
// the instruction data from the ID stage. If a earlier exception happened we don't care
// as we will overwrite it anyway in the next IF bl
exception_o.tval = commit_instr_i.ex.tval;
exception_o.tval = commit_instr_i[0].ex.tval;
end
// ------------------------
// Earlier Exceptions
// ------------------------
// but we give precedence to exceptions which happened earlier
if (commit_instr_i.ex.valid) begin
exception_o = commit_instr_i.ex;
if (commit_instr_i[0].ex.valid) begin
exception_o = commit_instr_i[0].ex;
end
// ------------------------
// Interrupts
@ -175,7 +193,7 @@ module commit_stage (
// by putting interrupts here we give them precedence over any other exception
if (csr_exception_i.valid && csr_exception_i.cause[63]) begin
exception_o = csr_exception_i;
exception_o.tval = commit_instr_i.ex.tval;
exception_o.tval = commit_instr_i[0].ex.tval;
end
end
// If we halted the processor don't take any exceptions

View file

@ -90,7 +90,7 @@ module compressed_decoder
illegal_instr_o = 1'b1;
end
3'b101: begin
OPCODE_C_J: begin
// 101: c.j -> jal x0, imm
instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], {9 {instr_i[12]}}, 4'b0, ~instr_i[15], OPCODE_JAL};
end
@ -174,7 +174,7 @@ module compressed_decoder
endcase
end
3'b110, 3'b111: begin
OPCODE_C_BEQZ, OPCODE_C_BNEZ: begin
// 0: c.beqz -> beq rs1', x0, imm
// 1: c.bnez -> bne rs1', x0, imm
instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], instr_i[12], OPCODE_BRANCH};

View file

@ -18,13 +18,12 @@ module controller (
input logic clk_i,
input logic rst_ni,
output logic flush_bp_o, // Flush branch prediction data structures
output logic flush_pcgen_o, // Flush PC Generation Stage
output logic set_pc_commit_o, // Set PC om PC Gen
output logic flush_if_o, // Flush the IF stage
output logic flush_unissued_instr_o, // Flush un-issued instructions of the scoreboard
output logic flush_id_o, // Flush ID stage
output logic flush_ex_o, // Flush EX stage
output logic flush_icache_o, // Flush ICache
input logic flush_icache_ack_i, // Acknowledge the whole ICache Flush
output logic flush_dcache_o, // Flush DCache
input logic flush_dcache_ack_i, // Acknowledge the whole DCache Flush
output logic flush_tlb_o, // Flush TLBs
@ -45,15 +44,13 @@ module controller (
// active fence - high if we are currently flushing the dcache
logic fence_active_d, fence_active_q;
logic flush_dcache;
logic flush_icache_d, flush_icache_q;
assign flush_icache_o = flush_icache_q;
// ------------
// Flush CTRL
// ------------
always_comb begin : flush_ctrl
fence_active_d = fence_active_q;
flush_pcgen_o = 1'b0;
set_pc_commit_o = 1'b0;
flush_if_o = 1'b0;
flush_unissued_instr_o = 1'b0;
flush_id_o = 1'b0;
@ -61,7 +58,7 @@ module controller (
flush_tlb_o = 1'b0;
flush_dcache = 1'b0;
flush_bp_o = 1'b0; // flush branch prediction
flush_icache_d = (flush_icache_ack_i) ? 1'b0 : flush_icache_q;
flush_icache_o = 1'b0;
// ------------
// Mis-predict
// ------------
@ -78,7 +75,7 @@ module controller (
// ---------------------------------
if (fence_i) begin
// this can be seen as a CSR instruction with side-effect
flush_pcgen_o = 1'b1;
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
@ -92,12 +89,12 @@ module controller (
// FENCE.I
// ---------------------------------
if (fence_i_i) begin
flush_pcgen_o = 1'b1;
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_icache_d = 1'b1;
flush_icache_o = 1'b1;
flush_dcache = 1'b1;
fence_active_d = 1'b1;
@ -115,7 +112,7 @@ module controller (
// SFENCE.VMA
// ---------------------------------
if (sfence_vma_i) begin
flush_pcgen_o = 1'b1;
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
@ -127,7 +124,7 @@ module controller (
// CSR instruction with side-effect
// ---------------------------------
if (flush_csr_i) begin
flush_pcgen_o = 1'b1;
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
@ -142,7 +139,7 @@ module controller (
if (ex_valid_i || eret_i || debug_set_pc_i) begin
// don't flush pcgen as we want to take the exception: Flush PCGen is not a flush signal
// for the PC Gen stage but instead tells it to take the PC we gave it
flush_pcgen_o = 1'b0;
set_pc_commit_o = 1'b0;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
@ -171,12 +168,10 @@ module controller (
if(~rst_ni) begin
fence_active_q <= 1'b0;
flush_dcache_o <= 1'b0;
flush_icache_q <= 1'b0;
end else begin
fence_active_q <= fence_active_d;
// register on the flush signal, this signal might be critical
flush_dcache_o <= flush_dcache;
flush_icache_q <= flush_icache_d;
end
end
endmodule

View file

@ -15,7 +15,8 @@
import ariane_pkg::*;
module csr_regfile #(
parameter int ASID_WIDTH = 1
parameter int ASID_WIDTH = 1,
parameter int unsigned NR_COMMIT_PORTS = 2
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
@ -23,16 +24,16 @@ module csr_regfile #(
input logic time_irq_i, // Timer threw a interrupt
// send a flush request out if a CSR with a side effect has changed (e.g. written)
output logic flush_o,
output logic halt_csr_o, // halt requested
output logic flush_o,
output logic halt_csr_o, // halt requested
// Debug CSR Port
input logic debug_csr_req_i, // Request from debug to read the CSR regfile
input logic [11:0] debug_csr_addr_i, // Address of CSR
input logic debug_csr_we_i, // Is it a read or write?
input logic [63:0] debug_csr_wdata_i, // Data to write
output logic [63:0] debug_csr_rdata_o, // Read data
input logic debug_csr_req_i, // Request from debug to read the CSR regfile
input logic [11:0] debug_csr_addr_i, // Address of CSR
input logic debug_csr_we_i, // Is it a read or write?
input logic [63:0] debug_csr_wdata_i, // Data to write
output logic [63:0] debug_csr_rdata_o, // Read data
// commit acknowledge
input logic commit_ack_i, // Commit acknowledged a instruction -> increase instret CSR
input logic [NR_COMMIT_PORTS-1:0] commit_ack_i, // Commit acknowledged a instruction -> increase instret CSR
// Core and Cluster ID
input logic [3:0] core_id_i, // Core ID is considered static
input logic [5:0] cluster_id_i, // Cluster ID is considered static
@ -239,8 +240,11 @@ module csr_regfile #(
always_comb begin : csr_update
automatic satp_t sapt;
automatic logic [63:0] mip;
automatic logic [63:0] instret;
sapt = satp_q;
mip = csr_wdata & 64'h33;
instret = instret_q;
// only USIP, SSIP, UTIP, STIP are write-able
eret_o = 1'b0;
@ -359,7 +363,7 @@ module csr_regfile #(
CSR_MCAUSE: mcause_d = csr_wdata;
CSR_MTVAL: mtval_d = csr_wdata;
CSR_MCYCLE: cycle_d = csr_wdata;
CSR_MINSTRET: instret_d = csr_wdata;
CSR_MINSTRET: instret = csr_wdata;
CSR_DCACHE: dcache_d = csr_wdata[0]; // enable bit
CSR_ICACHE: icache_d = csr_wdata[0]; // enable bit
CSR_L1_ICACHE_MISS,
@ -488,13 +492,15 @@ module csr_regfile #(
// --------------------
// Counters
// --------------------
instret_d = instret_q;
// just increment the cycle count
cycle_d = cycle_q + 1'b1;
// increase instruction retired counter
if (commit_ack_i) begin
instret_d = instret_q + 1'b1;
for (int i = 0; i < NR_COMMIT_PORTS; i++) begin
if (commit_ack_i[i]) begin
instret++;
end
end
instret_d = instret;
end
// ---------------------------

View file

@ -55,12 +55,7 @@ module decoder (
logic [63:0] imm_sb_type;
logic [63:0] imm_u_type;
logic [63:0] imm_uj_type;
logic [63:0] imm_z_type;
logic [63:0] imm_s2_type;
logic [63:0] imm_bi_type;
logic [63:0] imm_s3_type;
logic [63:0] imm_vs_type;
logic [63:0] imm_vu_type;
always_comb begin : decoder
@ -498,17 +493,13 @@ module decoder (
// Sign extend immediate
// --------------------------------
always_comb begin : sign_extend
imm_i_type = { {52 {instruction_i[31]}}, instruction_i[31:20] };
imm_i_type = i_imm(instruction_i);
imm_iz_type = { 52'b0, instruction_i[31:20] };
imm_s_type = { {52 {instruction_i[31]}}, instruction_i[31:25], instruction_i[11:7] };
imm_sb_type = { {51 {instruction_i[31]}}, instruction_i[31], instruction_i[7], instruction_i[30:25], instruction_i[11:8], 1'b0 };
imm_sb_type = sb_imm(instruction_i);
imm_u_type = { {32 {instruction_i[31]}}, instruction_i[31:12], 12'b0 }; // JAL, AUIPC, sign extended to 64 bit
imm_uj_type = { {44 {instruction_i[31]}}, instruction_i[19:12], instruction_i[20], instruction_i[30:21], 1'b0 };
imm_s2_type = { 59'b0, instruction_i[24:20] };
imm_uj_type = uj_imm(instruction_i);
imm_bi_type = { {59{instruction_i[24]}}, instruction_i[24:20] };
imm_s3_type = { 59'b0, instruction_i[29:25] };
imm_vs_type = { {58 {instruction_i[24]}}, instruction_i[24:20], instruction_i[25] };
imm_vu_type = { 58'b0, instruction_i[24:20], instruction_i[25] };
// NOIMM, PCIMM, IIMM, SIMM, BIMM, BIMM, UIMM, JIMM
// select immediate

View file

@ -83,11 +83,10 @@ module ex_stage #(
input logic en_ld_st_translation_i,
input logic flush_tlb_i,
input logic fetch_req_i,
output logic fetch_gnt_o,
output logic fetch_valid_o,
input logic [63:0] fetch_vaddr_i,
output logic [63:0] fetch_rdata_o,
output exception_t fetch_ex_o,
output logic fetch_valid_o,
output logic [63:0] fetch_paddr_o,
output exception_t fetch_exception_o,
input priv_lvl_t priv_lvl_i,
input priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
@ -100,13 +99,6 @@ module ex_stage #(
output logic dtlb_miss_o,
output logic dcache_miss_o,
output logic [63:0] instr_if_address_o,
output logic instr_if_data_req_o,
output logic [3:0] instr_if_data_be_o,
input logic instr_if_data_gnt_i,
input logic instr_if_data_rvalid_i,
input logic [63:0] instr_if_data_rdata_i,
// DCache interface
input logic dcache_en_i,
input logic flush_dcache_i,

View file

@ -14,8 +14,7 @@
import ariane_pkg::*;
module fetch_fifo
(
module fetch_fifo (
input logic clk_i,
input logic rst_ni,
// control signals
@ -24,24 +23,25 @@ module fetch_fifo
// that we have two compressed instruction (or one compressed instruction and one unaligned instruction) so we
// only predict on one entry and discard (or keep) the other depending on its position and prediction.
// input port
input branchpredict_sbe_t branch_predict_i,
input exception_t ex_i, // fetch exception in
input logic [63:0] addr_i,
input logic [63:0] rdata_i,
input logic [31:0] rdata_i,
input logic valid_i,
output logic ready_o,
// Dual Port Fetch FIFO
// output port 0
output fetch_entry_t fetch_entry_0_o,
output logic fetch_entry_valid_0_o,
input logic fetch_ack_0_i,
// output port 1
output fetch_entry_t fetch_entry_1_o,
output logic fetch_entry_valid_1_o,
input logic fetch_ack_1_i
output fetch_entry_t fetch_entry_o,
output logic fetch_entry_valid_o,
input logic fetch_ack_i
// // output port 1
// output fetch_entry_t fetch_entry_1_o,
// output logic fetch_entry_valid_1_o,
// input logic fetch_ack_1_i
);
localparam int unsigned DEPTH = 4; // must be a power of two
localparam int unsigned DEPTH = 8; // must be a power of two
// status signals
logic full, empty;
@ -50,24 +50,10 @@ module fetch_fifo
logic [$clog2(DEPTH)-1:0] write_pointer_n, write_pointer_q;
logic [$clog2(DEPTH)-1:0] status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool
assign ready_o = (status_cnt_q < DEPTH-2);
assign ready_o = (status_cnt_q < DEPTH-3);
assign full = (status_cnt_q == DEPTH);
assign empty = (status_cnt_q == '0);
// -------------
// Downsize
// -------------
logic [31:0] in_rdata;
// downsize from 64 bit to 32 bit, simply ignore half of the incoming data
always_comb begin : downsize
// take the upper half
if (addr_i[2])
in_rdata = rdata_i[63:32];
// take the lower half of the instruction
else
in_rdata = rdata_i[31:0];
end
always_comb begin : fetch_fifo_logic
// counter
automatic logic [$clog2(DEPTH)-1:0] status_cnt;
@ -85,17 +71,17 @@ module fetch_fifo
if (valid_i) begin
status_cnt++;
// new input data
mem_n[write_pointer_q] = {addr_i, in_rdata, branch_predict_i, ex_i};
mem_n[write_pointer_q] = {addr_i, rdata_i, branch_predict_i, ex_i};
write_pointer++;
end
// -------------
// Fetch Port 0
// -------------
fetch_entry_valid_0_o = (status_cnt_q >= 1);
fetch_entry_0_o = mem_q[read_pointer_q];
fetch_entry_valid_o = (status_cnt_q >= 1);
fetch_entry_o = mem_q[read_pointer_q];
if (fetch_ack_0_i) begin
if (fetch_ack_i) begin
read_pointer++;
status_cnt--;
end
@ -103,13 +89,13 @@ module fetch_fifo
// -------------
// Fetch Port 1
// -------------
fetch_entry_valid_1_o = (status_cnt_q >= 2);
fetch_entry_1_o = mem_q[read_pointer_q + 1'b1];
// fetch_entry_valid_1_o = (status_cnt_q >= 2);
// fetch_entry_1_o = mem_q[read_pointer_q + 1'b1];
if (fetch_ack_1_i) begin
read_pointer++;
status_cnt--;
end
// if (fetch_ack_1_i) begin
// read_pointer++;
// status_cnt--;
// end
write_pointer_n = write_pointer;
status_cnt_n = status_cnt;

80
src/ff1.sv Normal file
View file

@ -0,0 +1,80 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
// Date: 05.06.2017
// Description: Finds first one
// -----------------
// Find First One
// -----------------
module ff1 #(
parameter int unsigned LEN = 32
)(
input logic [LEN-1:0] in_i,
output logic [$clog2(LEN)-1:0] first_one_o,
output logic no_ones_o
);
localparam int unsigned NUM_LEVELS = $clog2(LEN);
logic [LEN-1:0] [NUM_LEVELS-1:0] index_lut;
logic [2**NUM_LEVELS-1:0] sel_nodes;
logic [2**NUM_LEVELS-1:0] [NUM_LEVELS-1:0] index_nodes;
// ----------------------------
// Generate Tree Structure
// ----------------------------
generate
for (genvar j = 0; j < LEN; j++) begin
assign index_lut[j] = $unsigned(j[NUM_LEVELS-1:0]);
end
endgenerate
generate
for (genvar level = 0; level < NUM_LEVELS; level++) begin
if (level < NUM_LEVELS-1) begin
for (genvar l = 0; l < 2**level; l++) begin
assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ?
index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1];
end
end
if (level == NUM_LEVELS-1) begin
for (genvar k = 0; k < 2**level; k++) begin
// if two successive indices are still in the vector...
if (k * 2 < LEN) begin
assign sel_nodes[2**level-1+k] = in_i[k*2] | in_i[k*2+1];
assign index_nodes[2**level-1+k] = (in_i[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
end
// if only the first index is still in the vector...
if (k * 2 == LEN) begin
assign sel_nodes[2**level-1+k] = in_i[k*2];
assign index_nodes[2**level-1+k] = index_lut[k*2];
end
// if index is out of range
if (k * 2 > LEN) begin
assign sel_nodes[2**level-1+k] = 1'b0;
assign index_nodes[2**level-1+k] = '0;
end
end
end
end
endgenerate
// --------------------
// Connect Output
// --------------------
assign first_one_o = index_nodes[0];
assign no_ones_o = ~sel_nodes[0];
endmodule

726
src/frontend.sv Normal file
View file

@ -0,0 +1,726 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 08.02.2018
// Description: Ariane Instruction Fetch Frontend
import ariane_pkg::*;
module frontend #(
parameter int unsigned BTB_ENTRIES = 8,
parameter int unsigned BHT_ENTRIES = 1024,
parameter int unsigned RAS_DEPTH = 4,
parameter int unsigned SET_ASSOCIATIVITY = 4,
parameter int unsigned CACHE_LINE_WIDTH = 64, // in bit
parameter int unsigned FETCH_WIDTH = 32
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush request for PCGEN
input logic flush_bp_i, // flush branch prediction
input logic flush_icache_i, // instruction fence in
// global input
input logic [63:0] boot_addr_i,
input logic fetch_enable_i, // start fetching instructions
// Address translation interface
output logic fetch_req_o, // address translation request
output logic [63:0] fetch_vaddr_o, // virtual address out
input logic fetch_valid_i, // address translation valid
input logic [63:0] fetch_paddr_i, // physical address in
input exception_t fetch_exception_i, // exception occurred during fetch
// Set a new PC
// mispredict
input branchpredict_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB
// from commit, when flushing the whole pipeline
input logic set_pc_commit_i, // Take the PC from commit stage
input logic [63:0] pc_commit_i, // PC of instruction in commit stage
// CSR input
input logic [63:0] epc_i, // exception PC which we need to return to
input logic eret_i, // return from exception
input logic [63:0] trap_vector_base_i, // base of trap vector
input logic ex_valid_i, // exception is valid - from commit
// Debug
input logic [63:0] debug_pc_i, // PC from debug stage
input logic debug_set_pc_i, // Set PC request from debug
// Instruction Fetch
AXI_BUS.Master axi,
output logic l1_icache_miss_o, // instruction cache missed
//
// instruction output port -> to processor back-end
output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
output logic fetch_entry_valid_o, // instruction in IF is valid
input logic fetch_ack_i // ID acknowledged this instruction
);
// maximum instructions we can fetch on one request
localparam int unsigned INSTR_PER_FETCH = FETCH_WIDTH/16;
// Registers
logic [31:0] icache_data_d, icache_data_q;
logic icache_valid_d, icache_valid_q;
exception_t icache_ex_d, icache_ex_q;
logic instruction_valid;
logic icache_speculative_d, icache_speculative_q;
logic [63:0] icache_vaddr_d, icache_vaddr_q;
// BHT, BTB and RAS prediction
bht_prediction_t bht_prediction;
btb_prediction_t btb_prediction;
ras_t ras_predict;
bht_update_t bht_update;
btb_update_t btb_update;
logic ras_push, ras_pop;
logic [63:0] ras_update;
// icache control signals
logic icache_req, kill_s1, kill_s2, icache_ready;
// instruction fetch is ready
logic if_ready;
logic [63:0] npc_d, npc_q; // next PC
// -----------------------
// Ctrl Flow Speculation
// -----------------------
// RVI ctrl flow prediction
logic [INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch,
rvi_jalr, rvi_jump;
logic [INSTR_PER_FETCH-1:0][63:0] rvi_imm;
// RVC branching
logic [INSTR_PER_FETCH-1:0] is_rvc;
logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return,
rvc_jalr, rvc_call;
logic [INSTR_PER_FETCH-1:0][63:0] rvc_imm;
// re-aligned instruction and address (coming from cache - combinationally)
logic [INSTR_PER_FETCH-1:0][31:0] instr;
logic [INSTR_PER_FETCH-1:0][63:0] addr;
// virtual address of current fetch
logic [63:0] fetch_vaddr;
logic [63:0] bp_vaddr;
logic bp_valid; // we have a valid branch-prediction
logic fetch_is_speculative; // is it a speculative fetch or a fetch which need to do for sure
// branch-prediction which we inject into the pipeline
branchpredict_sbe_t bp_sbe;
logic fifo_valid, fifo_ready; // fetch FIFO
// save the unaligned part of the instruction to this ff
logic [15:0] unaligned_instr_d, unaligned_instr_q;
// the last instruction was unaligned
logic unaligned_d, unaligned_q;
// register to save the unaligned address
logic [63:0] unaligned_address_d, unaligned_address_q;
// TODO: generalize to arbitrary instruction fetch width
always_comb begin : re_align
unaligned_d = unaligned_q;
unaligned_address_d = unaligned_address_q;
unaligned_instr_d = unaligned_instr_q;
instruction_valid = icache_valid_q;
instr[1] = '0;
instr[0] = icache_data_q;
addr[1] = {icache_vaddr_q[63:2], 2'b10};
addr[0] = icache_vaddr_q;
if (icache_valid_q) begin
// last instruction was unaligned
if (unaligned_q) begin
instr[0] = {icache_data_q[15:0], unaligned_instr_q};
addr[0] = unaligned_address_q;
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
unaligned_instr_d = icache_data_q[31:16]; // save the upper bits for next cycle
// check if this is instruction is still unaligned e.g.: it is not compressed
// if its compressed re-set unaligned flag
if (icache_data_q[17:16] != 2'b11) begin
unaligned_d = 1'b0;
instr[1] = {16'b0, icache_data_q[31:16]};
end
end else if (is_rvc[0]) begin // instruction zero is RVC
// is instruction 1 also compressed
// yes? -> no problem, no -> we've got an unaligned instruction
if (icache_data_q[17:16] != 2'b11) begin
instr[1] = {16'b0, icache_data_q[31:16]};
end else begin
unaligned_instr_d = icache_data_q[31:16];
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
unaligned_d = 1'b1;
end
end // else -> normal fetch
end
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
// received the next instruction
if (icache_valid_q && icache_vaddr_q[1] && icache_data_q[17:16] == 2'b11) begin
instruction_valid = 1'b0;
unaligned_d = 1'b1;
unaligned_address_d = {icache_vaddr_q[63:2], 2'b10};
unaligned_instr_d = icache_data_q[31:16];
end
if (kill_s2) begin
unaligned_d = 1'b0;
end
end
logic [INSTR_PER_FETCH:0] taken;
// control front-end + branch-prediction
always_comb begin : frontend_ctrl
automatic logic take_rvi_cf; // take the control flow change (non-compressed)
automatic logic take_rvc_cf; // take the control flow change (compressed)
take_rvi_cf = 1'b0;
take_rvc_cf = 1'b0;
ras_pop = 1'b0;
ras_push = 1'b0;
ras_update = '0;
taken = '0;
take_rvi_cf = 1'b0;
if_ready = icache_ready & fifo_ready;
icache_req = fifo_ready;
bp_vaddr = '0; // predicted address
bp_valid = 1'b0; // prediction is valid
bp_sbe.cf_type = RAS;
// only predict if the response is valid
if (instruction_valid) begin
// look at instruction 0, 1, 2,...
for (int unsigned i = 0; i < INSTR_PER_FETCH; i++) begin
// only speculate if the previous instruction was not taken
if (!taken[i]) begin
// function call
ras_push = rvi_call[i] | rvc_call[i];
ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
// Branch Prediction - **speculative**
if (rvi_branch[i] || rvc_branch[i]) begin
bp_sbe.cf_type = BHT;
// dynamic prediction valid?
if (bht_prediction.valid) begin
take_rvi_cf = rvi_branch[i] & (bht_prediction.taken | bht_prediction.strongly_taken);
take_rvc_cf = rvc_branch[i] & (bht_prediction.taken | bht_prediction.strongly_taken);
// default to static prediction
end else begin
// set if immediate is negative - static prediction
take_rvi_cf = rvi_branch[i] & rvi_imm[i][63];
take_rvc_cf = rvc_branch[i] & rvc_imm[i][63];
end
end
// unconditional jumps
if (rvi_jump[i] || rvc_jump[i]) begin
take_rvi_cf = rvi_jump[i];
take_rvc_cf = rvc_jump[i];
end
// to take this jump we need a valid prediction target **speculative**
if ((rvi_jalr[i] || rvc_jalr[i]) && btb_prediction.valid) begin
bp_vaddr = btb_prediction.target_address;
taken[i+1] = 1'b1;
bp_sbe.cf_type = BTB;
end
// is it a return and the RAS contains a valid prediction? **speculative**
if ((rvi_return[i] || rvc_return[i]) && ras_predict.valid) begin
bp_vaddr = ras_predict.ra;
ras_pop = 1'b1;
taken[i+1] = 1'b1;
bp_sbe.cf_type = RAS;
end
if (take_rvi_cf) begin
taken[i+1] = 1'b1;
bp_vaddr = addr[i] + rvi_imm[i];
end
if (take_rvc_cf) begin
taken[i+1] = 1'b1;
bp_vaddr = addr[i] + rvc_imm[i];
end
// we are not interested in the lower instruction
if (icache_vaddr_q[1]) begin
taken[1] = 1'b0;
ras_pop = 1'b0;
ras_push = 1'b0;
end
end
end
end
bp_valid = |taken;
// assemble scoreboard entry
bp_sbe.valid = bp_valid;
bp_sbe.predict_address = bp_vaddr;
bp_sbe.predict_taken = bp_valid;
bp_sbe.is_lower_16 = taken[1]; // the branch is on the lower 16 (in a 32-bit setup)
end
logic is_mispredict;
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
always_comb begin : id_if
kill_s1 = 1'b0;
kill_s2 = 1'b0;
// we mis-predicted so kill the icache request and the fetch queue
if (is_mispredict || flush_i) begin
kill_s1 = 1'b1;
kill_s2 = 1'b1;
end
// if we have a valid branch-prediction we need to kill the last cache request
if (bp_valid) begin
kill_s2 = 1'b1;
end
fifo_valid = icache_valid_q;
end
// ----------------------------------------
// Update Control Flow Predictions
// ----------------------------------------
// BHT
assign bht_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BHT);
assign bht_update.pc = resolved_branch_i.pc;
assign bht_update.mispredict = resolved_branch_i.is_mispredict;
assign bht_update.taken = resolved_branch_i.is_taken;
// BTB
assign btb_update.valid = resolved_branch_i.valid & (resolved_branch_i.cf_type == BTB);
assign btb_update.pc = resolved_branch_i.pc;
assign btb_update.target_address = resolved_branch_i.target_address;
assign btb_update.is_lower_16 = resolved_branch_i.is_lower_16;
assign btb_update.clear = resolved_branch_i.clear;
// -------------------
// Next PC
// -------------------
// next PC (NPC) can come from (in order of precedence):
// 0. Default assignment
// 1. Branch Predict taken
// 2. Control flow change request (misprediction)
// 3. Return from environment call
// 4. Exception/Interrupt
// 5. Pipeline Flush because of CSR side effects
// 6. Debug
// Mis-predict handling is a little bit different
// select PC a.k.a PC Gen
always_comb begin : npc_select
automatic logic [63:0] fetch_address;
fetch_is_speculative = 1'b0;
fetch_address = npc_q;
// keep stable by default
npc_d = npc_q;
// -------------------------------
// 1. Branch Prediction
// -------------------------------
if (bp_valid) begin
fetch_is_speculative = 1'b1;
fetch_address = bp_vaddr;
npc_d = bp_vaddr;
end
// -------------------------------
// 0. Default assignment
// -------------------------------
if (if_ready && fetch_enable_i) begin
npc_d = {fetch_address[63:2], 2'b0} + 64'h4;
fetch_is_speculative = 1'b1;
end
// -------------------------------
// 2. Control flow change request
// -------------------------------
if (is_mispredict) begin
npc_d = resolved_branch_i.target_address;
end
// -------------------------------
// 3. Return from environment call
// -------------------------------
if (eret_i) begin
npc_d = epc_i;
end
// -------------------------------
// 4. Exception/Interrupt
// -------------------------------
if (ex_valid_i) begin
npc_d = trap_vector_base_i;
end
// -----------------------------------------------
// 5. Pipeline Flush because of CSR side effects
// -----------------------------------------------
// On a pipeline flush start fetching from the next address
// of the instruction in the commit stage
if (set_pc_commit_i) begin
// we came here from a flush request of a CSR instruction,
// as CSR instructions do not exist in a compressed form
// we can unconditionally do PC + 4 here
npc_d = pc_commit_i + 64'h4;
end
// -------------------------------
// 6. Debug
// -------------------------------
if (debug_set_pc_i) begin
npc_d = debug_pc_i;
end
fetch_vaddr = fetch_address;
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
npc_q <= boot_addr_i;
icache_data_q <= '0;
icache_valid_q <= 1'b0;
icache_speculative_q <= 1'b0;
icache_vaddr_q <= 'b0;
icache_ex_q <= '0;
unaligned_q <= 1'b0;
unaligned_address_q <= '0;
unaligned_instr_q <= '0;
end else begin
npc_q <= npc_d;
icache_data_q <= icache_data_d;
icache_valid_q <= icache_valid_d;
icache_speculative_q <= icache_speculative_d;
icache_vaddr_q <= icache_vaddr_d;
icache_ex_q <= icache_ex_d;
unaligned_q <= unaligned_d;
unaligned_address_q <= unaligned_address_d;
unaligned_instr_q <= unaligned_instr_d;
end
end
ras #(
.DEPTH ( RAS_DEPTH )
) i_ras (
.push_i ( ras_push ),
.pop_i ( ras_pop ),
.data_i ( ras_update ),
.data_o ( ras_predict ),
.*
);
btb #(
.NR_ENTRIES ( BTB_ENTRIES )
) i_btb (
.flush_i ( flush_bp_i ),
.vpc_i ( icache_vaddr_q ),
.btb_update_i ( btb_update ),
.btb_prediction_o ( btb_prediction ),
.*
);
bht #(
.NR_ENTRIES ( BHT_ENTRIES )
) i_bht (
.flush_i ( flush_bp_i ),
.vpc_i ( icache_vaddr_q ),
.bht_update_i ( bht_update ),
.bht_prediction_o ( bht_prediction ),
.*
);
icache #(
.SET_ASSOCIATIVITY ( 4 ),
.CACHE_LINE_WIDTH ( 128 ),
.FETCH_WIDTH ( FETCH_WIDTH )
) i_icache (
.flush_i ( flush_icache_i ),
.vaddr_i ( fetch_vaddr ), // 1st cycle
.is_speculative_i ( fetch_is_speculative ), // 1st cycle
.data_o ( icache_data_d ),
.req_i ( icache_req ),
.kill_s1_i ( kill_s1 ),
.kill_s2_i ( kill_s2 ),
.ready_o ( icache_ready ),
.valid_o ( icache_valid_d ),
.ex_o ( icache_ex_d ),
.is_speculative_o ( icache_speculative_d ),
.vaddr_o ( icache_vaddr_d ),
.axi ( axi ),
.miss_o ( l1_icache_miss_o ),
.*
);
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
instr_scan i_instr_scan (
.instr_i ( instr[i] ),
.is_rvc_o ( is_rvc[i] ),
.rvi_return_o ( rvi_return[i] ),
.rvi_call_o ( rvi_call[i] ),
.rvi_branch_o ( rvi_branch[i] ),
.rvi_jalr_o ( rvi_jalr[i] ),
.rvi_jump_o ( rvi_jump[i] ),
.rvi_imm_o ( rvi_imm[i] ),
.rvc_branch_o ( rvc_branch[i] ),
.rvc_jump_o ( rvc_jump[i] ),
.rvc_jr_o ( rvc_jr[i] ),
.rvc_return_o ( rvc_return[i] ),
.rvc_jalr_o ( rvc_jalr[i] ),
.rvc_call_o ( rvc_call[i] ),
.rvc_imm_o ( rvc_imm[i] )
);
end
fetch_fifo i_fetch_fifo (
.flush_i ( flush_i ),
.branch_predict_i ( bp_sbe ),
.ex_i ( icache_ex_q ),
.addr_i ( icache_vaddr_q ),
.rdata_i ( icache_data_q ),
.valid_i ( fifo_valid ),
.ready_o ( fifo_ready ),
.fetch_entry_o ( fetch_entry_o ),
.fetch_entry_valid_o( fetch_entry_valid_o ),
.fetch_ack_i ( fetch_ack_i ),
.*
);
endmodule
// ------------------------------
// Instruction Scanner
// ------------------------------
module instr_scan (
input logic [31:0] instr_i, // expect aligned instruction, compressed or not
output logic is_rvc_o,
output logic rvi_return_o,
output logic rvi_call_o,
output logic rvi_branch_o,
output logic rvi_jalr_o,
output logic rvi_jump_o,
output logic [63:0] rvi_imm_o,
output logic rvc_branch_o,
output logic rvc_jump_o,
output logic rvc_jr_o,
output logic rvc_return_o,
output logic rvc_jalr_o,
output logic rvc_call_o,
output logic [63:0] rvc_imm_o
);
assign is_rvc_o = (instr_i[1:0] != 2'b11);
// check that rs1 is either x1 or x5 and that rs1 is not x1 or x5, TODO: check the fact about bit 7
assign rvi_return_o = rvi_jalr_o & ~instr_i[7] & ~instr_i[19] & ~instr_i[18] & ~instr_i[16] & instr_i[15];
assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & instr_i[7]; // TODO: check that this captures calls
// differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvi_imm_o = (instr_i[3]) ? uj_imm(instr_i) : sb_imm(instr_i);
assign rvi_branch_o = (instr_i[6:0] == OPCODE_BRANCH) ? 1'b1 : 1'b0;
assign rvi_jalr_o = (instr_i[6:0] == OPCODE_JALR) ? 1'b1 : 1'b0;
assign rvi_jump_o = (instr_i[6:0] == OPCODE_JAL) ? 1'b1 : 1'b0;
// opcode JAL
assign rvc_jump_o = (instr_i[15:13] == OPCODE_C_J) & is_rvc_o & (instr_i[1:0] == 2'b01);
assign rvc_jr_o = (instr_i[15:12] == 4'b1000) & (instr_i[6:2] == 5'b00000) & is_rvc_o & (instr_i[1:0] == 2'b10);
assign rvc_branch_o = ((instr_i[15:13] == OPCODE_C_BEQZ) | (instr_i[15:13] == OPCODE_C_BNEZ)) & is_rvc_o & (instr_i[1:0] == 2'b01);
// check that rs1 is x1 or x5
assign rvc_return_o = rvc_jr_o & ~instr_i[11] & ~instr_i[10] & ~instr_i[8] & instr_i[7];
assign rvc_jalr_o = (instr_i[15:12] == 4'b1001) & (instr_i[6:2] == 5'b00000) & is_rvc_o;
assign rvc_call_o = rvc_jalr_o; // TODO: check that this captures calls
// // differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvc_imm_o = (instr_i[14]) ? {{56{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0}
: {{53{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0};
endmodule
// ------------------------------
// Branch Prediction
// ------------------------------
// branch target buffer
module btb #(
parameter int NR_ENTRIES = 8
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush the btb
input logic [63:0] vpc_i, // virtual PC from IF stage
input btb_update_t btb_update_i, // update btb with this information
output btb_prediction_t btb_prediction_o // prediction from btb
);
// number of bits which are not used for indexing
localparam OFFSET = 1; // we are using compressed instructions so do use the lower 2 bits for prediction
localparam ANTIALIAS_BITS = 8;
// number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ENTRIES) + OFFSET;
// typedef for all branch target entries
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
btb_prediction_t btb_d [NR_ENTRIES-1:0], btb_q [NR_ENTRIES-1:0];
logic [$clog2(NR_ENTRIES)-1:0] index, update_pc;
assign index = vpc_i[PREDICTION_BITS - 1:OFFSET];
assign update_pc = btb_update_i.pc[PREDICTION_BITS - 1:OFFSET];
// output matching prediction
assign btb_prediction_o = btb_q[index];
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_d = btb_q;
if (btb_update_i.valid) begin
btb_d[update_pc].valid = 1'b1;
// the target address is simply updated
btb_d[update_pc].target_address = btb_update_i.target_address;
// as is the information whether this was a compressed branch
btb_d[update_pc].is_lower_16 = btb_update_i.is_lower_16;
// check if we should invalidate this entry, this happens in case we predicted a branch
// where actually none-is (aliasing)
if (btb_update_i.clear) begin
btb_d[update_pc].valid = 1'b0;
end
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
// Bias the branches to be taken upon first arrival
for (int i = 0; i < NR_ENTRIES; i++)
btb_q[i] <= '{default: 0};
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ENTRIES; i++) begin
btb_q[i].valid <= 1'b0;
end
end else begin
btb_q <= btb_d;
end
end
end
endmodule
// return address stack
module ras #(
parameter int unsigned DEPTH = 2
)(
input logic clk_i,
input logic rst_ni,
input logic push_i,
input logic pop_i,
input logic [63:0] data_i,
output ras_t data_o
);
ras_t [DEPTH-1:0] stack_d, stack_q;
assign data_o = stack_q[0];
always_comb begin
stack_d = stack_q;
// push on the stack
if (push_i) begin
stack_d[0].ra = data_i;
// mark the new return address as valid
stack_d[0].valid = 1'b1;
stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0];
end
if (pop_i) begin
stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1];
// we popped the value so invalidate the end of the stack
stack_d[DEPTH-1].valid = 1'b0;
stack_d[DEPTH-1].ra = 'b0;
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
stack_q <= '0;
end else begin
stack_q <= stack_d;
end
end
endmodule
// branch history table - 2 bit saturation counter
module bht #(
parameter int unsigned NR_ENTRIES = 1024
)(
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic [63:0] vpc_i,
input bht_update_t bht_update_i,
output bht_prediction_t bht_prediction_o
);
localparam OFFSET = 2; // we are using compressed instructions so do not use the lower 2 bits for prediction
localparam ANTIALIAS_BITS = 8;
// number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ENTRIES) + OFFSET;
struct packed {
logic valid;
logic [1:0] saturation_counter;
} bht_d[NR_ENTRIES-1:0], bht_q[NR_ENTRIES-1:0];
logic [$clog2(NR_ENTRIES)-1:0] index, update_pc;
logic [1:0] saturation_counter;
assign index = vpc_i[PREDICTION_BITS - 1:OFFSET];
assign update_pc = bht_update_i.pc[PREDICTION_BITS - 1:OFFSET];
// prediction assignment
assign bht_prediction_o.valid = bht_q[index].valid;
assign bht_prediction_o.taken = bht_q[index].saturation_counter == 2'b10;
assign bht_prediction_o.strongly_taken = (bht_q[index].saturation_counter == 2'b11);
always_comb begin : update_bht
bht_d = bht_q;
saturation_counter = bht_q[update_pc].saturation_counter;
if (bht_update_i.valid) begin
bht_d[update_pc].valid = 1'b1;
if (saturation_counter == 2'b11) begin
// we can safely decrease it
if (~bht_update_i.taken)
bht_d[update_pc].saturation_counter = saturation_counter - 1;
// then check if it saturated in the negative regime e.g.: branch not taken
end else if (saturation_counter == 2'b00) begin
// we can safely increase it
if (bht_update_i.taken)
bht_d[update_pc].saturation_counter = saturation_counter + 1;
end else begin // otherwise we are not in any boundaries and can decrease or increase it
if (bht_update_i.taken)
bht_d[update_pc].saturation_counter = saturation_counter + 1;
else
bht_d[update_pc].saturation_counter = saturation_counter - 1;
end
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
for (int unsigned i = 0; i < NR_ENTRIES; i++)
bht_q[i] <= '0;
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ENTRIES; i++) begin
bht_q[i].valid <= 1'b0;
bht_q[i].saturation_counter <= 2'b10;
end
end else begin
bht_q <= bht_d;
end
end
end
endmodule

File diff suppressed because it is too large Load diff

View file

@ -45,8 +45,8 @@ module instr_realigner (
// check if the lower compressed instruction was no branch otherwise we will need to squash this instruction
// but only if we predicted it to be taken, the predict was on the lower 16 bit compressed instruction
logic kill_upper_16_bit;
assign kill_upper_16_bit = fetch_entry_0_i.branch_predict.valid &&
fetch_entry_0_i.branch_predict.predict_taken &&
assign kill_upper_16_bit = fetch_entry_0_i.branch_predict.valid &
fetch_entry_0_i.branch_predict.predict_taken &
fetch_entry_0_i.branch_predict.is_lower_16;
// ----------
// Registers
@ -82,6 +82,9 @@ module instr_realigner (
if (fetch_entry_0_i.instruction[1:0] != 2'b11) begin
// it is compressed
fetch_entry_o.instruction = {15'b0, fetch_entry_0_i.instruction[15:0]};
// we need to kill the lower prediction
if (fetch_entry_0_i.branch_predict.valid && !fetch_entry_0_i.branch_predict.is_lower_16)
fetch_entry_o.branch_predict.valid = 1'b0;
// should we even look at the upper instruction bits?
if (!kill_upper_16_bit) begin
@ -139,6 +142,9 @@ module instr_realigner (
fetch_ack_0_o = 1'b0;
// unaligned access served
unaligned_n = 1'b0;
// we need to kill the lower prediction
if (fetch_entry_0_i.branch_predict.valid && !fetch_entry_0_i.branch_predict.is_lower_16)
fetch_entry_o.branch_predict.valid = 1'b0;
// or is it an unaligned 32 bit instruction like
// ____________________________________________________
// |instr [15:0] | instr [31:16] | compressed 1[15:0] |

View file

@ -15,7 +15,9 @@
import ariane_pkg::*;
module issue_read_operands (
module issue_read_operands #(
parameter int unsigned NR_COMMIT_PORTS = 2
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic test_en_i,
@ -32,14 +34,14 @@ module issue_read_operands (
input logic issue_instr_valid_i,
output logic issue_ack_o,
// lookup rd in scoreboard
output logic [4:0] rs1_o,
output logic [REG_ADDR_SIZE-1:0] rs1_o,
input logic [63:0] rs1_i,
input logic rs1_valid_i,
output logic [4:0] rs2_o,
output logic [REG_ADDR_SIZE-1:0] rs2_o,
input logic [63:0] rs2_i,
input logic rs2_valid_i,
// get clobber input
input fu_t [31:0] rd_clobber_i,
input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_i,
// To FU, just single issue for now
output fu_t fu_o,
output fu_op operator_o,
@ -66,9 +68,9 @@ module issue_read_operands (
input logic csr_ready_i, // FU is ready
output logic csr_valid_o, // Output is valid
// commit port
input logic [4:0] waddr_a_i,
input logic [63:0] wdata_a_i,
input logic we_a_i
input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i,
input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i,
input logic [NR_COMMIT_PORTS-1:0] we_i
// committing instruction instruction
// from scoreboard
// input scoreboard_entry commit_instr_i,
@ -130,9 +132,10 @@ module issue_read_operands (
end
// or check that the target destination register will be written in this cycle by the
// commit stage
if (we_a_i && waddr_a_i == issue_instr_i.rd) begin
issue_ack_o = 1'b1;
end
for (int unsigned i = 0; i < NR_COMMIT_PORTS; i++)
if (we_i[i] && waddr_i[i] == issue_instr_i.rd) begin
issue_ack_o = 1'b1;
end
end
// we can also issue the instruction under the following two circumstances:
// we can do this even if we are stalled or no functional unit is ready (as we don't need one)
@ -287,11 +290,11 @@ module issue_read_operands (
// get the address from the issue stage by default
// read port
debug_gpr_rdata_o = operand_a_regfile;
raddr_a = issue_instr_i.rs1;
raddr_a = issue_instr_i.rs1[4:0];
// write port
waddr = waddr_a_i;
wdata = wdata_a_i;
we = we_a_i;
waddr = waddr_i[0];
wdata = wdata_i[0];
we = we_i[0];
// we've got a debug request in
if (debug_gpr_req_i) begin
raddr_a = debug_gpr_addr_i;
@ -305,30 +308,33 @@ module issue_read_operands (
// Integer Register File
// ----------------------
regfile #(
.DATA_WIDTH ( 64 )
)
regfile_i (
.DATA_WIDTH ( 64 )
) regfile_i (
// Clock and Reset
.clk ( clk_i ),
.rst_n ( rst_ni ),
.test_en_i ( test_en_i ),
.clk ( clk_i ),
.rst_n ( rst_ni ),
.test_en_i ( test_en_i ),
.raddr_a_i ( raddr_a ),
.rdata_a_o ( operand_a_regfile ),
.raddr_a_i ( raddr_a ),
.rdata_a_o ( operand_a_regfile ),
.raddr_b_i ( issue_instr_i.rs2 ),
.rdata_b_o ( operand_b_regfile ),
.raddr_b_i ( issue_instr_i.rs2[4:0] ),
.rdata_b_o ( operand_b_regfile ),
.waddr_a_i ( waddr ),
.wdata_a_i ( wdata ),
.we_a_i ( we )
.waddr_a_i ( waddr ),
.wdata_a_i ( wdata ),
.we_a_i ( we ),
.waddr_b_i ( waddr_i[1] ),
.wdata_b_i ( wdata_i[1] ),
.we_b_i ( we_i[1] )
);
// ----------------------
// Registers (ID <-> EX)
// ----------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
if (~rst_ni) begin
operand_a_q <= '{default: 0};
operand_b_q <= '{default: 0};
imm_q <= 64'b0;
@ -366,6 +372,10 @@ module issue_read_operands (
assert property (
@(posedge clk_i) (branch_valid_q) |-> (!$isunknown(operand_a_q) && !$isunknown(operand_b_q)))
else $warning ("Got unknown value in one of the operands");
initial begin
assert (NR_COMMIT_PORTS == 2) else $error("Only two commit ports are supported at the moment!");
end
`endif
`endif
endmodule

View file

@ -16,8 +16,9 @@
import ariane_pkg::*;
module issue_stage #(
parameter int NR_ENTRIES = 8,
parameter int NR_WB_PORTS = 4
parameter int unsigned NR_ENTRIES = 8,
parameter int unsigned NR_WB_PORTS = 4,
parameter int unsigned NR_COMMIT_PORTS = 2
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
@ -66,72 +67,48 @@ module issue_stage #(
// write back port
input logic [NR_WB_PORTS-1:0][TRANS_ID_BITS-1:0] trans_id_i,
input logic [NR_WB_PORTS-1:0][63:0] wdata_i,
input logic [NR_WB_PORTS-1:0][63:0] wbdata_i,
input exception_t [NR_WB_PORTS-1:0] ex_ex_i, // exception from execute stage
input logic [NR_WB_PORTS-1:0] wb_valid_i,
// commit port
input logic[4:0] waddr_a_i,
input logic[63:0] wdata_a_i,
input logic we_a_i,
// commit port
input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i,
input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i,
input logic [NR_COMMIT_PORTS-1:0] we_i,
output scoreboard_entry_t commit_instr_o,
input logic commit_ack_i
output scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_o,
input logic [NR_COMMIT_PORTS-1:0] commit_ack_i
);
// ---------------------------------------------------
// Scoreboard (SB) <-> Issue and Read Operands (IRO)
// ---------------------------------------------------
fu_t [31:0] rd_clobber_sb_iro;
logic [4:0] rs1_iro_sb;
logic [63:0] rs1_sb_iro;
logic rs1_valid_sb_iro;
logic [4:0] rs2_iro_sb;
logic [63:0] rs2_sb_iro;
logic rs2_valid_iro_sb;
scoreboard_entry_t issue_instr_sb_iro;
logic issue_instr_valid_sb_iro;
logic issue_ack_iro_sb;
fu_t [2**REG_ADDR_SIZE:0] rd_clobber_sb_iro;
logic [REG_ADDR_SIZE-1:0] rs1_iro_sb;
logic [63:0] rs1_sb_iro;
logic rs1_valid_sb_iro;
// ---------------------------------------------------
// Branch (resolve) logic
// ---------------------------------------------------
// This should basically prevent the scoreboard from accepting
// instructions past a branch. We need to resolve the branch beforehand.
// This limitation is in place to ease the backtracking of mis-predicted branches as they
// can simply be in the front-end of the processor.
logic unresolved_branch_n, unresolved_branch_q;
logic [REG_ADDR_SIZE-1:0] rs2_iro_sb;
logic [63:0] rs2_sb_iro;
logic rs2_valid_iro_sb;
scoreboard_entry_t issue_instr_sb_rename;
logic issue_instr_valid_sb_rename;
logic issue_ack_rename_sb;
scoreboard_entry_t issue_instr_rename_iro;
logic issue_instr_valid_rename_iro;
logic issue_ack_iro_rename;
always_comb begin : unresolved_branch
unresolved_branch_n = unresolved_branch_q;
// we just resolved the branch
if (resolve_branch_i) begin
unresolved_branch_n = 1'b0;
end
// if the instruction is valid, it is a control flow instruction and the issue stage acknowledged its dispatch
// set the unresolved branch flag
if (issue_ack_iro_sb && decoded_instr_valid_i && is_ctrl_flow_i) begin
unresolved_branch_n = 1'b1;
end
// if we predicted a taken branch this means that we need to stall issue for one cycle to resolve the
// branch, otherwise we might issue a wrong instruction
if (issue_ack_iro_sb && decoded_instr_i.bp.valid && decoded_instr_i.bp.predict_taken) begin
unresolved_branch_n = 1'b1;
end
// if we are requested to flush also flush the unresolved branch flag because either the flush
// was requested by a branch or an exception. In any case: any unresolved branch will get evicted
if (flush_unissued_instr_i || flush_i) begin
unresolved_branch_n = 1'b0;
end
end
// ---------------------------------------------------------
// 1. Issue instruction and read operand
// ---------------------------------------------------------
issue_read_operands issue_read_operands_i (
issue_read_operands i_issue_read_operands (
.flush_i ( flush_unissued_instr_i ),
.issue_instr_i ( issue_instr_sb_iro ),
.issue_instr_valid_i ( issue_instr_valid_sb_iro ),
.issue_ack_o ( issue_ack_iro_sb ),
.issue_instr_i ( issue_instr_rename_iro ),
.issue_instr_valid_i ( issue_instr_valid_rename_iro ),
.issue_ack_o ( issue_ack_iro_rename ),
.rs1_o ( rs1_iro_sb ),
.rs1_i ( rs1_sb_iro ),
.rs1_valid_i ( rs1_valid_sb_iro ),
@ -141,40 +118,45 @@ module issue_stage #(
.rd_clobber_i ( rd_clobber_sb_iro ),
.*
);
// ---------------------------------------------------------
// 2. Manage issued instructions in a scoreboard
// 2. Re-name
// ---------------------------------------------------------
re_name i_re_name (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.issue_instr_i ( issue_instr_sb_rename ),
.issue_instr_valid_i ( issue_instr_valid_sb_rename ),
.issue_ack_o ( issue_ack_rename_sb ),
.issue_instr_o ( issue_instr_rename_iro ),
.issue_instr_valid_o ( issue_instr_valid_rename_iro ),
.issue_ack_i ( issue_ack_iro_rename )
);
// ---------------------------------------------------------
// 3. Manage issued instructions in a scoreboard
// ---------------------------------------------------------
scoreboard #(
.NR_ENTRIES ( NR_ENTRIES ),
.NR_WB_PORTS ( NR_WB_PORTS )
)
scoreboard_i
(
.unresolved_branch_i ( unresolved_branch_q ),
.rd_clobber_o ( rd_clobber_sb_iro ),
.rs1_i ( rs1_iro_sb ),
.rs1_o ( rs1_sb_iro ),
.rs1_valid_o ( rs1_valid_sb_iro ),
.rs2_i ( rs2_iro_sb ),
.rs2_o ( rs2_sb_iro ),
.rs2_valid_o ( rs2_valid_iro_sb ),
.NR_ENTRIES ( NR_ENTRIES ),
.NR_WB_PORTS ( NR_WB_PORTS )
) i_scoreboard (
.unresolved_branch_i ( 1'b0 ),
.rd_clobber_o ( rd_clobber_sb_iro ),
.rs1_i ( rs1_iro_sb ),
.rs1_o ( rs1_sb_iro ),
.rs1_valid_o ( rs1_valid_sb_iro ),
.rs2_i ( rs2_iro_sb ),
.rs2_o ( rs2_sb_iro ),
.rs2_valid_o ( rs2_valid_iro_sb ),
.issue_instr_o ( issue_instr_sb_iro ),
.issue_instr_valid_o ( issue_instr_valid_sb_iro ),
.issue_ack_i ( issue_ack_iro_sb ),
.issue_instr_o ( issue_instr_sb_rename ),
.issue_instr_valid_o ( issue_instr_valid_sb_rename ),
.issue_ack_i ( issue_ack_rename_sb ),
.trans_id_i ( trans_id_i ),
.wdata_i ( wdata_i ),
.ex_i ( ex_ex_i ),
.trans_id_i ( trans_id_i ),
.wbdata_i ( wbdata_i ),
.ex_i ( ex_ex_i ),
.*
);
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
unresolved_branch_q <= 1'b0;
end else begin
unresolved_branch_q <= unresolved_branch_n;
end
end
endmodule

View file

@ -48,7 +48,7 @@ module lfsr #(
// output assignment
refill_way_oh = 'b0;
refill_way_oh[shift_q[LOG_WIDTH-1:0]] = 1'b1;
refill_way_bin = shift_q;
refill_way_bin = shift_q[$clog2(WIDTH)-1:0];
end
always_ff @(posedge clk_i or negedge rst_ni) begin : proc_

View file

@ -15,11 +15,11 @@
import ariane_pkg::*;
module lsu #(
parameter int ASID_WIDTH = 1,
parameter int unsigned ASID_WIDTH = 1,
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000,
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned AXI_USER_WIDTH = 1
)(
)(
input logic clk_i,
input logic rst_ni,
input logic flush_i,
@ -41,12 +41,12 @@ module lsu #(
input logic enable_translation_i, // enable virtual memory translation
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic fetch_req_i, // Instruction fetch interface
output logic fetch_gnt_o, // Instruction fetch interface
output logic fetch_valid_o, // Instruction fetch interface
input logic [63:0] fetch_vaddr_i, // Instruction fetch interface
output logic [63:0] fetch_rdata_o, // Instruction fetch interface
output exception_t fetch_ex_o, // Instruction fetch interface
output logic fetch_valid_o, // Instruction fetch interface
output logic [63:0] fetch_paddr_o, // Instruction fetch interface
output exception_t fetch_exception_o, // Instruction fetch interface
input priv_lvl_t priv_lvl_i, // From CSR register file
input priv_lvl_t ld_st_priv_lvl_i, // From CSR register file
@ -59,13 +59,6 @@ module lsu #(
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic dcache_miss_o,
// Instruction memory/cache
output logic [63:0] instr_if_address_o,
output logic instr_if_data_req_o,
output logic [3:0] instr_if_data_be_o,
input logic instr_if_data_gnt_i,
input logic instr_if_data_rvalid_i,
input logic [63:0] instr_if_data_rdata_i,
input logic dcache_en_i,
input logic flush_dcache_i,

View file

@ -17,11 +17,10 @@
import ariane_pkg::*;
module mmu #(
parameter int INSTR_TLB_ENTRIES = 4,
parameter int DATA_TLB_ENTRIES = 4,
parameter int ASID_WIDTH = 1
)
(
parameter int unsigned INSTR_TLB_ENTRIES = 4,
parameter int unsigned DATA_TLB_ENTRIES = 4,
parameter int unsigned ASID_WIDTH = 1
)(
input logic clk_i,
input logic rst_ni,
input logic flush_i,
@ -30,11 +29,11 @@ module mmu #(
// IF interface
input logic fetch_req_i,
output logic fetch_gnt_o,
output logic fetch_valid_o,
input logic [63:0] fetch_vaddr_i,
output logic [63:0] fetch_rdata_o, // pass-through because of interfaces
output exception_t fetch_ex_o, // write-back fetch exceptions (e.g.: bus faults, page faults, etc.)
output logic fetch_valid_o, // translation is valid
output logic [63:0] fetch_paddr_o,
output exception_t fetch_exception_o, // write-back fetch exceptions (e.g.: bus faults, page faults, etc.)
// LSU interface
// this is a more minimalistic interface because the actual addressing logic is handled
// in the LSU as we distinguish load and stores, what we do here is simple address translation
@ -61,15 +60,7 @@ module mmu #(
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
// Memory interfaces
// Instruction memory/cache
output logic [63:0] instr_if_address_o,
output logic instr_if_data_req_o,
output logic [3:0] instr_if_data_be_o,
input logic instr_if_data_gnt_i,
input logic instr_if_data_rvalid_i,
input logic [63:0] instr_if_data_rdata_i,
// Data memory/cache
// PTW memory interface
output logic [11:0] address_index_o,
output logic [43:0] address_tag_o,
output logic [63:0] data_wdata_o,
@ -83,9 +74,6 @@ module mmu #(
input logic data_rvalid_i,
input logic [63:0] data_rdata_i
);
// instruction error
// instruction error valid signal and exception, delayed one cycle
logic ierr_valid_q, ierr_valid_n;
logic iaccess_err; // insufficient privilege to access this instruction page
logic daccess_err; // insufficient privilege to access this data page
@ -94,11 +82,8 @@ module mmu #(
logic ptw_error; // PTW threw an exception
logic [63:0] faulting_address;
logic update_is_2M;
logic update_is_1G;
logic [38:0] update_vaddr;
logic [0:0] update_asid;
pte_t update_content;
tlb_update_t update_ptw_itlb, update_ptw_dtlb;
logic itlb_update;
logic itlb_lu_access;
@ -117,49 +102,42 @@ module mmu #(
// Assignments
assign itlb_lu_access = fetch_req_i;
assign dtlb_lu_access = lsu_req_i;
assign fetch_rdata_o = instr_if_data_rdata_i;
tlb #(
.TLB_ENTRIES ( INSTR_TLB_ENTRIES ),
.ASID_WIDTH ( ASID_WIDTH )
) itlb_i (
) i_itlb (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_tlb_i ),
.update_is_2M_i ( update_is_2M ),
.update_is_1G_i ( update_is_1G ),
.update_vpn_i ( update_vaddr[38:12] ),
.update_asid_i ( update_asid ),
.update_content_i ( update_content ),
.update_tlb_i ( itlb_update ),
.update_i ( update_ptw_itlb ),
.lu_access_i ( itlb_lu_access ),
.lu_asid_i ( asid_i ),
.lu_vaddr_i ( fetch_vaddr_i ),
.lu_content_o ( itlb_content ),
.lu_is_2M_o ( itlb_is_2M ),
.lu_is_1G_o ( itlb_is_1G ),
.lu_hit_o ( itlb_lu_hit )
);
tlb #(
.TLB_ENTRIES(DATA_TLB_ENTRIES),
.ASID_WIDTH(ASID_WIDTH))
dtlb_i (
.TLB_ENTRIES ( DATA_TLB_ENTRIES ),
.ASID_WIDTH ( ASID_WIDTH )
) i_dtlb (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_tlb_i ),
.update_is_2M_i ( update_is_2M ),
.update_is_1G_i ( update_is_1G ),
.update_vpn_i ( update_vaddr[38:12] ),
.update_asid_i ( update_asid ),
.update_content_i ( update_content ),
.update_tlb_i ( dtlb_update ),
.update_i ( update_ptw_dtlb ),
.lu_access_i ( dtlb_lu_access ),
.lu_asid_i ( asid_i ),
.lu_vaddr_i ( lsu_vaddr_i ),
.lu_content_o ( dtlb_content ),
.lu_is_2M_o ( dtlb_is_2M ),
.lu_is_1G_o ( dtlb_is_1G ),
.lu_hit_o ( dtlb_lu_hit )
@ -168,8 +146,7 @@ module mmu #(
ptw #(
.ASID_WIDTH ( ASID_WIDTH )
) ptw_i
(
) i_ptw (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.ptw_active_o ( ptw_active ),
@ -178,20 +155,16 @@ module mmu #(
.faulting_address_o ( faulting_address ),
.enable_translation_i ( enable_translation_i ),
.itlb_update_o ( itlb_update ),
.dtlb_update_o ( dtlb_update ),
.update_content_o ( update_content ),
.update_is_2M_o ( update_is_2M ),
.update_is_1G_o ( update_is_1G ),
.update_vaddr_o ( update_vaddr ),
.update_asid_o ( update_asid ),
.itlb_update_o ( update_ptw_itlb ),
.dtlb_update_o ( update_ptw_dtlb ),
.itlb_access_i ( itlb_lu_access ),
.itlb_miss_i ( ~itlb_lu_hit ),
.itlb_hit_i ( itlb_lu_hit ),
.itlb_vaddr_i ( fetch_vaddr_i ),
.dtlb_access_i ( dtlb_lu_access ),
.dtlb_miss_i ( ~dtlb_lu_hit ),
.dtlb_hit_i ( dtlb_lu_hit ),
.dtlb_vaddr_i ( lsu_vaddr_i ),
.*
);
@ -199,46 +172,39 @@ module mmu #(
//-----------------------
// Instruction Interface
//-----------------------
exception_t fetch_exception;
logic exception_fifo_empty;
// This is a full memory interface, e.g.: it handles all signals to the I$
// Exceptions are always signaled together with the fetch_valid_o signal
// The instruction interface is a simple request response interface
always_comb begin : instr_interface
// MMU disabled: just pass through
instr_if_data_req_o = fetch_req_i;
instr_if_address_o = fetch_vaddr_i; // play through in case we disabled address translation
fetch_gnt_o = instr_if_data_gnt_i;
fetch_valid_o = fetch_req_i;
fetch_paddr_o = fetch_vaddr_i; // play through in case we disabled address translation
// two potential exception sources:
// 1. HPTW threw an exception -> signal with a page fault exception
// 2. We got an access error because of insufficient permissions -> throw an access exception
fetch_exception = '0;
ierr_valid_n = 1'b0; // we keep a separate valid signal in case of an error
fetch_exception_o = '0;
// Check whether we are allowed to access this memory region from a fetch perspective
iaccess_err = fetch_req_i && (((priv_lvl_i == PRIV_LVL_U) && ~itlb_content.u)
|| ((priv_lvl_i == PRIV_LVL_S) && itlb_content.u));
// check that the upper-most bits (63-39) are the same, otherwise throw a page fault exception...
if (fetch_req_i && !((&fetch_vaddr_i[63:39]) == 1'b1 || (|fetch_vaddr_i[63:39]) == 1'b0)) begin
fetch_exception = {INSTR_PAGE_FAULT, fetch_vaddr_i, 1'b1};
ierr_valid_n = 1'b1;
fetch_gnt_o = 1'b1;
fetch_exception_o = {INSTR_PAGE_FAULT, fetch_vaddr_i, 1'b1};
end
// MMU enabled: address from TLB, request delayed until hit. Error when TLB
// hit and no access right or TLB hit and translated address not valid (e.g.
// AXI decode error), or when PTW performs walk due to ITLB miss and raises
// an error.
if (enable_translation_i) begin
instr_if_data_req_o = 1'b0;
fetch_valid_o = 1'b0;
// 4K page
instr_if_address_o = {itlb_content.ppn, fetch_vaddr_i[11:0]};
fetch_paddr_o = {itlb_content.ppn, fetch_vaddr_i[11:0]};
// Mega page
if (itlb_is_2M) begin
instr_if_address_o[20:12] = fetch_vaddr_i[20:12];
fetch_paddr_o[20:12] = fetch_vaddr_i[20:12];
end
// Giga page
if (itlb_is_1G) begin
instr_if_address_o[29:12] = fetch_vaddr_i[29:12];
fetch_paddr_o[29:12] = fetch_vaddr_i[29:12];
end
// ---------
@ -246,20 +212,11 @@ module mmu #(
// --------
// if we hit the ITLB output the request signal immediately
if (itlb_lu_hit) begin
instr_if_data_req_o = fetch_req_i;
fetch_valid_o = fetch_req_i;
// we got an access error
if (iaccess_err) begin
// immediately grant a fetch which threw an exception, and stop the request from happening
instr_if_data_req_o = 1'b0;
// in case we hit the TLB with an exception we need to order the memory request e.g.
// we need to wait until all outstanding request drained otherwise we get an out-of order result
// which will be wrong
if (exception_fifo_empty) begin
fetch_gnt_o = 1'b1;
ierr_valid_n = 1'b1;
end
// throw a page fault
fetch_exception = {INSTR_PAGE_FAULT, fetch_vaddr_i, 1'b1};
fetch_exception_o = {INSTR_PAGE_FAULT, fetch_vaddr_i, 1'b1};
end
end else
// ---------
@ -267,50 +224,10 @@ module mmu #(
// ---------
// watch out for exceptions happening during walking the page table
if (ptw_active && walking_instr) begin
// check that the fetch address is equal with the faulting address as it could be that the page table walker
// has walked an instruction the instruction fetch stage is no longer interested in as we didn't give a grant
// we should not propagate back the exception when the request is no longer high
if (faulting_address == fetch_vaddr_i && fetch_req_i) begin
// on an error pass through fetch with an error signaled
fetch_gnt_o = ptw_error;
ierr_valid_n = ptw_error; // signal valid/error on next cycle
end
fetch_exception = {INSTR_PAGE_FAULT, {25'b0, update_vaddr}, 1'b1};
fetch_valid_o = ptw_error;
fetch_exception_o = {INSTR_PAGE_FAULT, {25'b0, update_vaddr}, 1'b1};
end
end
// the fetch is valid if we either got an error in the previous cycle or the I$ gave us a valid signal.
fetch_valid_o = instr_if_data_rvalid_i || ierr_valid_q;
end
// ---------------------------
// Fetch exception register
// ---------------------------
// We can have two outstanding transactions
fifo #(
.dtype ( exception_t ),
.DEPTH ( 2 )
) i_exception_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.full_o ( ),
.empty_o ( exception_fifo_empty ),
.single_element_o ( ),
.data_i ( fetch_exception ),
.push_i ( fetch_gnt_o ),
.data_o ( fetch_ex_o ),
.pop_i ( fetch_valid_o ),
.*
);
// ----------
// Registers
// ----------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
ierr_valid_q <= 1'b0;
end else begin
ierr_valid_q <= ierr_valid_n;
end
end
//-----------------------

View file

@ -140,7 +140,7 @@ module mult (
// Find First one
// ---------------------
// this unit is used to speed up the sequential division by shifting the dividend first
alu_ff #(
ff1 #(
.LEN ( 64 )
) i_ff1 (
.in_i ( ff1_input ), // signed = operand_b_rev_neg, unsigned operand_b_rev
@ -483,70 +483,3 @@ module mul (
end
end
endmodule
// -----------------
// Find First One
// -----------------
module alu_ff #(
parameter int unsigned LEN = 32
)(
input logic [LEN-1:0] in_i,
output logic [$clog2(LEN)-1:0] first_one_o,
output logic no_ones_o
);
localparam int unsigned NUM_LEVELS = $clog2(LEN);
logic [LEN-1:0] [NUM_LEVELS-1:0] index_lut;
logic [2**NUM_LEVELS-1:0] sel_nodes;
logic [2**NUM_LEVELS-1:0] [NUM_LEVELS-1:0] index_nodes;
// ----------------------------
// Generate Tree Structure
// ----------------------------
generate
for (genvar j = 0; j < LEN; j++) begin
assign index_lut[j] = $unsigned(j);
end
endgenerate
generate
for (genvar level = 0; level < NUM_LEVELS; level++) begin
if (level < NUM_LEVELS-1) begin
for (genvar l = 0; l < 2**level; l++) begin
assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ?
index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1];
end
end
if (level == NUM_LEVELS-1) begin
for (genvar k = 0; k < 2**level; k++) begin
// if two successive indices are still in the vector...
if (k * 2 < LEN) begin
assign sel_nodes[2**level-1+k] = in_i[k*2] | in_i[k*2+1];
assign index_nodes[2**level-1+k] = (in_i[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
end
// if only the first index is still in the vector...
if (k * 2 == LEN) begin
assign sel_nodes[2**level-1+k] = in_i[k*2];
assign index_nodes[2**level-1+k] = index_lut[k*2];
end
// if index is out of range
if (k * 2 > LEN) begin
assign sel_nodes[2**level-1+k] = 1'b0;
assign index_nodes[2**level-1+k] = '0;
end
end
end
end
endgenerate
// --------------------
// Connect Output
// --------------------
assign first_one_o = index_nodes[0];
assign no_ones_o = ~sel_nodes[0];
endmodule

View file

@ -97,56 +97,54 @@ module nbdcache #(
// ------------------
// Cache Controller
// ------------------
generate
for (genvar i = 0; i < 3; i++) begin : master_ports
cache_ctrl #(
.SET_ASSOCIATIVITY ( SET_ASSOCIATIVITY ),
.INDEX_WIDTH ( INDEX_WIDTH ),
.TAG_WIDTH ( TAG_WIDTH ),
.CACHE_LINE_WIDTH ( CACHE_LINE_WIDTH ),
.CACHE_START_ADDR ( CACHE_START_ADDR )
) i_cache_ctrl (
.bypass_i ( ~enable_i ),
.busy_o ( busy [i] ),
.address_index_i ( address_index_i [i] ),
.address_tag_i ( address_tag_i [i] ),
.data_wdata_i ( data_wdata_i [i] ),
.data_req_i ( data_req_i [i] ),
.data_we_i ( data_we_i [i] ),
.data_be_i ( data_be_i [i] ),
.data_size_i ( data_size_i [i] ),
.kill_req_i ( kill_req_i [i] ),
.tag_valid_i ( tag_valid_i [i] ),
.data_gnt_o ( data_gnt_o [i] ),
.data_rvalid_o ( data_rvalid_o [i] ),
.data_rdata_o ( data_rdata_o [i] ),
.amo_op_i ( amo_op_i [i] ),
for (genvar i = 0; i < 3; i++) begin : master_ports
cache_ctrl #(
.SET_ASSOCIATIVITY ( SET_ASSOCIATIVITY ),
.INDEX_WIDTH ( INDEX_WIDTH ),
.TAG_WIDTH ( TAG_WIDTH ),
.CACHE_LINE_WIDTH ( CACHE_LINE_WIDTH ),
.CACHE_START_ADDR ( CACHE_START_ADDR )
) i_cache_ctrl (
.bypass_i ( ~enable_i ),
.busy_o ( busy [i] ),
.address_index_i ( address_index_i [i] ),
.address_tag_i ( address_tag_i [i] ),
.data_wdata_i ( data_wdata_i [i] ),
.data_req_i ( data_req_i [i] ),
.data_we_i ( data_we_i [i] ),
.data_be_i ( data_be_i [i] ),
.data_size_i ( data_size_i [i] ),
.kill_req_i ( kill_req_i [i] ),
.tag_valid_i ( tag_valid_i [i] ),
.data_gnt_o ( data_gnt_o [i] ),
.data_rvalid_o ( data_rvalid_o [i] ),
.data_rdata_o ( data_rdata_o [i] ),
.amo_op_i ( amo_op_i [i] ),
.req_o ( req [i+1] ),
.addr_o ( addr [i+1] ),
.gnt_i ( gnt [i+1] ),
.data_i ( rdata ),
.tag_o ( tag [i+1] ),
.data_o ( wdata [i+1] ),
.we_o ( we [i+1] ),
.be_o ( be [i+1] ),
.hit_way_i ( hit_way ),
.req_o ( req [i+1] ),
.addr_o ( addr [i+1] ),
.gnt_i ( gnt [i+1] ),
.data_i ( rdata ),
.tag_o ( tag [i+1] ),
.data_o ( wdata [i+1] ),
.we_o ( we [i+1] ),
.be_o ( be [i+1] ),
.hit_way_i ( hit_way ),
.miss_req_o ( miss_req [i] ),
.miss_gnt_i ( miss_gnt [i] ),
.active_serving_i ( active_serving [i] ),
.critical_word_i ( critical_word ),
.critical_word_valid_i ( critical_word_valid ),
.bypass_gnt_i ( bypass_gnt [i] ),
.bypass_valid_i ( bypass_valid [i] ),
.bypass_data_i ( bypass_data [i] ),
.miss_req_o ( miss_req [i] ),
.miss_gnt_i ( miss_gnt [i] ),
.active_serving_i ( active_serving [i] ),
.critical_word_i ( critical_word ),
.critical_word_valid_i ( critical_word_valid ),
.bypass_gnt_i ( bypass_gnt [i] ),
.bypass_valid_i ( bypass_valid [i] ),
.bypass_data_i ( bypass_data [i] ),
.mshr_addr_o ( mshr_addr [i] ), // TODO
.mshr_addr_matches_i ( mshr_addr_matches [i] ), // TODO
.*
);
end
endgenerate
.mshr_addr_o ( mshr_addr [i] ), // TODO
.mshr_addr_matches_i ( mshr_addr_matches [i] ), // TODO
.*
);
end
// ------------------
// Miss Handling Unit
@ -180,50 +178,46 @@ module nbdcache #(
// --------------
// Memory Arrays
// --------------
generate
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin : sram_block
sram #(
.DATA_WIDTH ( CACHE_LINE_WIDTH ),
.NUM_WORDS ( NUM_WORDS )
) data_sram (
.req_i ( req_ram [i] ),
.we_i ( we_ram ),
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
.wdata_i ( wdata_ram.data ),
.be_i ( be_ram.data ),
.rdata_o ( rdata_ram[i].data ),
.*
);
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin : sram_block
sram #(
.DATA_WIDTH ( CACHE_LINE_WIDTH ),
.NUM_WORDS ( NUM_WORDS )
) data_sram (
.req_i ( req_ram [i] ),
.we_i ( we_ram ),
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
.wdata_i ( wdata_ram.data ),
.be_i ( be_ram.data ),
.rdata_o ( rdata_ram[i].data ),
.*
);
sram #(
.DATA_WIDTH ( TAG_WIDTH ),
.NUM_WORDS ( NUM_WORDS )
) tag_sram (
.req_i ( req_ram [i] ),
.we_i ( we_ram ),
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
.wdata_i ( wdata_ram.tag ),
.be_i ( be_ram.tag ),
.rdata_o ( rdata_ram[i].tag ),
.*
);
sram #(
.DATA_WIDTH ( TAG_WIDTH ),
.NUM_WORDS ( NUM_WORDS )
) tag_sram (
.req_i ( req_ram [i] ),
.we_i ( we_ram ),
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
.wdata_i ( wdata_ram.tag ),
.be_i ( be_ram.tag ),
.rdata_o ( rdata_ram[i].tag ),
.*
);
end
endgenerate
end
// ----------------
// Dirty SRAM
// ----------------
logic [DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
generate
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin
assign dirty_wdata[i] = wdata_ram.dirty;
assign dirty_wdata[SET_ASSOCIATIVITY + i] = wdata_ram.valid;
assign rdata_ram[i].valid = dirty_rdata[SET_ASSOCIATIVITY + i];
assign rdata_ram[i].dirty = dirty_rdata[i];
end
endgenerate
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin
assign dirty_wdata[i] = wdata_ram.dirty;
assign dirty_wdata[SET_ASSOCIATIVITY + i] = wdata_ram.valid;
assign rdata_ram[i].valid = dirty_rdata[SET_ASSOCIATIVITY + i];
assign rdata_ram[i].dirty = dirty_rdata[i];
end
sram #(
.DATA_WIDTH ( DIRTY_WIDTH ),
@ -322,11 +316,9 @@ module tag_cmp #(
sel_tag = tag_i[i];
end
generate
for (genvar j = 0; j < SET_ASSOCIATIVITY; j++) begin : tag_cmp
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
end
endgenerate
for (genvar j = 0; j < SET_ASSOCIATIVITY; j++) begin : tag_cmp
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
end
always_comb begin

View file

@ -48,19 +48,7 @@ module pcgen_stage (
// branch-predict input register -> this path is critical
branchpredict_t resolved_branch_q;
btb #(
.NR_ENTRIES ( BTB_ENTRIES ),
.BITS_SATURATION_COUNTER ( BITS_SATURATION_COUNTER )
)
btb_i
(
// Use the PC from last cycle to perform branch lookup for the current cycle
.flush_i ( flush_bp_i ),
.vpc_i ( npc_q ),
.branch_predict_i ( resolved_branch_q ), // update port
.branch_predict_o ( branch_predict_btb ), // read port
.*
);
assign branch_predict_btb = '0;
// -------------------
// Next PC
// -------------------

View file

@ -17,27 +17,27 @@ import ariane_pkg::*;
module perf_counters #(
int unsigned NR_EXTERNAL_COUNTERS = 1
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// SRAM like interface
input logic [11:0] addr_i, // read/write address
input logic we_i, // write enable
input logic [63:0] data_i, // data to write
output logic [63:0] data_o, // data to read
input logic [11:0] addr_i, // read/write address
input logic we_i, // write enable
input logic [63:0] data_i, // data to write
output logic [63:0] data_o, // data to read
// from commit stage
input scoreboard_entry_t commit_instr_i,
input logic commit_ack_o,
input scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_i, // the instruction we want to commit
input logic [NR_COMMIT_PORTS-1:0] commit_ack_i, // acknowledge that we are indeed committing
// from L1 caches
input logic l1_icache_miss_i,
input logic l1_dcache_miss_i,
input logic l1_icache_miss_i,
input logic l1_dcache_miss_i,
// from MMU
input logic itlb_miss_i,
input logic dtlb_miss_i,
input logic itlb_miss_i,
input logic dtlb_miss_i,
// from PC Gen
input exception_t ex_i,
input logic eret_i,
input branchpredict_t resolved_branch_i
input exception_t ex_i,
input logic eret_i,
input branchpredict_t resolved_branch_i
);
logic [11:0][63:0] perf_counter_d, perf_counter_q;
@ -62,25 +62,26 @@ module perf_counters #(
perf_counter_d[PERF_DTLB_MISS] = perf_counter_q[PERF_DTLB_MISS] + 1'b1;
// instruction related perf counters
if (commit_ack_o) begin
if (commit_instr_i.fu == LOAD)
perf_counter_d[PERF_LOAD] = perf_counter_q[PERF_LOAD] + 1'b1;
for (int unsigned i = 0; i < NR_COMMIT_PORTS-1; i++) begin
if (commit_ack_i[i]) begin
if (commit_instr_i[i].fu == LOAD)
perf_counter_d[PERF_LOAD] = perf_counter_q[PERF_LOAD] + 1'b1;
if (commit_instr_i.fu == STORE)
perf_counter_d[PERF_STORE] = perf_counter_q[PERF_STORE] + 1'b1;
if (commit_instr_i[i].fu == STORE)
perf_counter_d[PERF_STORE] = perf_counter_q[PERF_STORE] + 1'b1;
if (commit_instr_i.fu == CTRL_FLOW)
perf_counter_d[PERF_BRANCH_JUMP] = perf_counter_q[PERF_BRANCH_JUMP] + 1'b1;
if (commit_instr_i[i].fu == CTRL_FLOW)
perf_counter_d[PERF_BRANCH_JUMP] = perf_counter_q[PERF_BRANCH_JUMP] + 1'b1;
// The standard software calling convention uses register x1 to hold the return address on a call
// the unconditional jump is decoded as ADD op
if (commit_instr_i.fu == CTRL_FLOW && commit_instr_i.op == '0 && commit_instr_i.rd == 'b1)
perf_counter_d[PERF_CALL] = perf_counter_q[PERF_CALL] + 1'b1;
// Return from call
if (commit_instr_i.op == JALR && commit_instr_i.rs1 == 'b1)
perf_counter_d[PERF_RET] = perf_counter_q[PERF_RET] + 1'b1;
// The standard software calling convention uses register x1 to hold the return address on a call
// the unconditional jump is decoded as ADD op
if (commit_instr_i[i].fu == CTRL_FLOW && commit_instr_i[i].op == '0 && commit_instr_i[i].rd == 'b1)
perf_counter_d[PERF_CALL] = perf_counter_q[PERF_CALL] + 1'b1;
// Return from call
if (commit_instr_i[i].op == JALR && commit_instr_i[i].rs1 == 'b1)
perf_counter_d[PERF_RET] = perf_counter_q[PERF_RET] + 1'b1;
end
end
if (ex_i.valid)

View file

@ -18,8 +18,7 @@ import ariane_pkg::*;
module ptw #(
parameter int ASID_WIDTH = 1
)
(
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush everything, we need to do this because
@ -47,23 +46,20 @@ module ptw #(
input logic data_rvalid_i,
input logic [63:0] data_rdata_i,
// to TLBs, update logic
output logic itlb_update_o,
output logic dtlb_update_o,
output pte_t update_content_o,
output tlb_update_t itlb_update_o,
output tlb_update_t dtlb_update_o,
output logic update_is_2M_o,
output logic update_is_1G_o,
output logic [38:0] update_vaddr_o,
output logic [ASID_WIDTH-1:0] update_asid_o,
input logic [ASID_WIDTH-1:0] asid_i,
// from TLBs
// did we miss?
input logic itlb_access_i,
input logic itlb_miss_i,
input logic itlb_hit_i,
input logic [63:0] itlb_vaddr_i,
input logic dtlb_access_i,
input logic dtlb_miss_i,
input logic dtlb_hit_i,
input logic [63:0] dtlb_vaddr_i,
// from CSR file
input logic [43:0] satp_ppn_i, // ppn from satp
@ -107,6 +103,7 @@ module ptw #(
// Assignments
assign update_vaddr_o = vaddr_q;
assign ptw_active_o = (CS != IDLE);
assign walking_instr_o = is_instr_ptw_q;
// directly output the correct physical address
@ -116,13 +113,23 @@ module ptw #(
assign kill_req_o = '0;
// we are never going to write with the HPTW
assign data_wdata_o = 64'b0;
// -----------
// TLB Update
// -----------
assign itlb_update_o.vpn = vaddr_q[38:12];
assign dtlb_update_o.vpn = vaddr_q[38:12];
// update the correct page table level
assign update_is_2M_o = (ptw_lvl_q == LVL2);
assign update_is_1G_o = (ptw_lvl_q == LVL1);
assign itlb_update_o.is_2M = (ptw_lvl_q == LVL2);
assign itlb_update_o.is_1G = (ptw_lvl_q == LVL1);
assign dtlb_update_o.is_2M = (ptw_lvl_q == LVL2);
assign dtlb_update_o.is_1G = (ptw_lvl_q == LVL1);
// output the correct ASID
assign update_asid_o = tlb_update_asid_q;
assign itlb_update_o.asid = tlb_update_asid_q;
assign dtlb_update_o.asid = tlb_update_asid_q;
// set the global mapping bit
assign update_content_o = pte | (global_mapping_q << 5);
assign itlb_update_o.content = pte | (global_mapping_q << 5);
assign dtlb_update_o.content = pte | (global_mapping_q << 5);
assign tag_valid_o = tag_valid_q;
//-------------------
@ -151,26 +158,26 @@ module ptw #(
always_comb begin : ptw
// default assignments
// PTW memory interface
tag_valid_n = 1'b0;
data_req_o = 1'b0;
data_be_o = 8'hFF;
data_size_o = 2'b11;
data_we_o = 1'b0;
ptw_error_o = 1'b0;
itlb_update_o = 1'b0;
dtlb_update_o = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
NS = CS;
global_mapping_n = global_mapping_q;
tag_valid_n = 1'b0;
data_req_o = 1'b0;
data_be_o = 8'hFF;
data_size_o = 2'b11;
data_we_o = 1'b0;
ptw_error_o = 1'b0;
itlb_update_o.valid = 1'b0;
dtlb_update_o.valid = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
NS = CS;
global_mapping_n = global_mapping_q;
// input registers
tlb_update_asid_n = tlb_update_asid_q;
vaddr_n = vaddr_q;
faulting_address_o = '0;
tlb_update_asid_n = tlb_update_asid_q;
vaddr_n = vaddr_q;
faulting_address_o = '0;
itlb_miss_o = 1'b0;
dtlb_miss_o = 1'b0;
itlb_miss_o = 1'b0;
dtlb_miss_o = 1'b0;
case (CS)
@ -180,7 +187,7 @@ module ptw #(
global_mapping_n = 1'b0;
is_instr_ptw_n = 1'b0;
// if we got an ITLB miss
if (enable_translation_i & itlb_access_i & itlb_miss_i & ~dtlb_access_i) begin
if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin
ptw_pptr_n = {satp_ppn_i, itlb_vaddr_i[38:30], 3'b0};
is_instr_ptw_n = 1'b1;
tlb_update_asid_n = asid_i;
@ -188,7 +195,7 @@ module ptw #(
NS = WAIT_GRANT;
itlb_miss_o = 1'b1;
// we got an DTLB miss
end else if (en_ld_st_translation_i & dtlb_access_i & dtlb_miss_i) begin
end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin
ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[38:30], 3'b0};
tlb_update_asid_n = asid_i;
vaddr_n = dtlb_vaddr_i;
@ -241,7 +248,7 @@ module ptw #(
if (!pte.x || !pte.a)
NS = PROPAGATE_ERROR;
else
itlb_update_o = 1'b1;
itlb_update_o.valid = 1'b1;
end else begin
// ------------
@ -253,7 +260,7 @@ module ptw #(
// we can directly raise an error. This doesn't put a useless
// entry into the TLB.
if (pte.a && (pte.r || (pte.x && mxr_i))) begin
dtlb_update_o = 1'b1;
dtlb_update_o.valid = 1'b1;
end else begin
NS = PROPAGATE_ERROR;
end
@ -261,7 +268,7 @@ module ptw #(
// If the request was a store and the page is not write-able, raise an error
// the same applies if the dirty flag is not set
if (lsu_is_store_i && (!pte.w || !pte.d)) begin
dtlb_update_o = 1'b0;
dtlb_update_o.valid = 1'b0;
NS = PROPAGATE_ERROR;
end
end
@ -270,12 +277,12 @@ module ptw #(
// exception.
if (ptw_lvl_q == LVL1 && pte.ppn[17:0] != '0) begin
NS = PROPAGATE_ERROR;
dtlb_update_o = 1'b0;
itlb_update_o = 1'b0;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end else if (ptw_lvl_q == LVL2 && pte.ppn[8:0] != '0) begin
NS = PROPAGATE_ERROR;
dtlb_update_o = 1'b0;
itlb_update_o = 1'b0;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end
// this is a pointer to the next TLB level
end else begin

75
src/re_name.sv Normal file
View file

@ -0,0 +1,75 @@
// Author: Florian Zaruba, ETH Zurich
// Date: 03.10.2017
// Description: Re-name registers
//
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
import ariane_pkg::*;
module re_name (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// coming from scoreboard
input scoreboard_entry_t issue_instr_i,
input logic issue_instr_valid_i,
output logic issue_ack_o,
// coming from scoreboard
output scoreboard_entry_t issue_instr_o,
output logic issue_instr_valid_o,
input logic issue_ack_i
);
// pass through handshaking signals
assign issue_instr_valid_o = issue_instr_valid_i;
assign issue_ack_o = issue_ack_i;
// keep track of re-naming data structures
logic [31:0] re_name_table_n, re_name_table_q;
// -------------------
// Re-naming
// -------------------
always_comb begin
// default assignments
re_name_table_n = re_name_table_q;
issue_instr_o = issue_instr_i;
if (issue_ack_i) begin
// if we acknowledge the instruction tic the corresponding register
re_name_table_n[issue_instr_i.rd] = re_name_table_q[issue_instr_i.rd] ^ 1'b1;
end
// re-name the source registers
issue_instr_o.rs1 = { re_name_table_q[issue_instr_i.rs1], issue_instr_i.rs1 };
issue_instr_o.rs2 = { re_name_table_q[issue_instr_i.rs1], issue_instr_i.rs2 };
// we don't want to re-name register zero, it is non-writeable anyway
re_name_table_n[0] = 1'b0;
end
// -------------------
// Registers
// -------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
re_name_table_q <= '0;
end else begin
re_name_table_q <= re_name_table_n;
end
end
endmodule

View file

@ -46,107 +46,130 @@ module regfile
// Write port W1
input logic [4:0] waddr_a_i,
input logic [DATA_WIDTH-1:0] wdata_a_i,
input logic we_a_i
input logic we_a_i,
// Write port W2
input logic [4:0] waddr_b_i,
input logic [DATA_WIDTH-1:0] wdata_b_i,
input logic we_b_i
);
localparam ADDR_WIDTH = 5;;
localparam NUM_WORDS = 2**ADDR_WIDTH;
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH;
logic [DATA_WIDTH-1:0] mem[NUM_WORDS];
logic [DATA_WIDTH-1:0] mem[NUM_WORDS];
logic [NUM_WORDS-1:1] waddr_onehot_a;
logic [NUM_WORDS-1:1] waddr_onehot_b, waddr_onehot_b_q;
logic [NUM_WORDS-1:1] waddr_onehot_a;
logic [NUM_WORDS-1:1] mem_clocks;
logic [DATA_WIDTH-1:0] wdata_a_q;
logic [DATA_WIDTH-1:0] wdata_b_q;
logic [NUM_WORDS-1:1] mem_clocks;
logic [DATA_WIDTH-1:0] wdata_a_q;
// Write port W1
logic [ADDR_WIDTH-1:0] raddr_a_int, raddr_b_int, waddr_a_int;
assign raddr_a_int = raddr_a_i[ADDR_WIDTH-1:0];
assign raddr_b_int = raddr_b_i[ADDR_WIDTH-1:0];
assign waddr_a_int = waddr_a_i[ADDR_WIDTH-1:0];
// Write port W1
logic [ADDR_WIDTH-1:0] raddr_a_int, raddr_b_int, waddr_a_int;
int unsigned i;
int unsigned j;
int unsigned k;
int unsigned l;
genvar x;
assign raddr_a_int = raddr_a_i[ADDR_WIDTH-1:0];
assign raddr_b_int = raddr_b_i[ADDR_WIDTH-1:0];
assign waddr_a_int = waddr_a_i[ADDR_WIDTH-1:0];
logic clk_int;
//-----------------------------------------------------------------------------
//-- READ : Read address decoder RAD
//-----------------------------------------------------------------------------
assign rdata_a_o = mem[raddr_a_int];
assign rdata_b_o = mem[raddr_b_int];
logic clk_int;
//-----------------------------------------------------------------------------
// WRITE : SAMPLE INPUT DATA
//---------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-- READ : Read address decoder RAD
//-----------------------------------------------------------------------------
assign rdata_a_o = mem[raddr_a_int];
assign rdata_b_o = mem[raddr_b_int];
cluster_clock_gating CG_WE_GLOBAL
(
.clk_i ( clk ),
.en_i ( we_a_i ),
.test_en_i ( test_en_i ),
.clk_o ( clk_int )
);
//-----------------------------------------------------------------------------
// WRITE : SAMPLE INPUT DATA
//---------------------------------------------------------------------------
// use clk_int here, since otherwise we don't want to write anything anyway
always_ff @(posedge clk_int, negedge rst_n) begin : sample_waddr
if (~rst_n) begin
wdata_a_q <= '0;
wdata_b_q <= '0;
waddr_onehot_b_q <= '0;
end else begin
if (we_a_i)
wdata_a_q <= wdata_a_i;
if (we_b_i)
wdata_b_q <= wdata_b_i;
cluster_clock_gating CG_WE_GLOBAL
(
.clk_i ( clk ),
.en_i ( we_a_i ),
.test_en_i ( test_en_i ),
.clk_o ( clk_int )
);
// use clk_int here, since otherwise we don't want to write anything anyway
always_ff @(posedge clk_int, negedge rst_n) begin : sample_waddr
if (~rst_n) begin
wdata_a_q <= '0;
end else begin
if (we_a_i)
wdata_a_q <= wdata_a_i;
waddr_onehot_b_q <= waddr_onehot_b;
end
end
end
//-----------------------------------------------------------------------------
// WRITE : Write Address Decoder (WAD), combinatorial process
//-----------------------------------------------------------------------------
always_comb begin : p_WADa
for (int unsigned i = 1; i < NUM_WORDS; i++) begin : p_WordItera
if ( (we_a_i == 1'b1 ) && (waddr_a_int == i[4:0]) )
waddr_onehot_a[i] = 1'b1;
else
waddr_onehot_a[i] = 1'b0;
//-----------------------------------------------------------------------------
//-- WRITE : Write Address Decoder (WAD), combinatorial process
//-----------------------------------------------------------------------------
always_comb begin : p_WADa
for (i = 1; i < NUM_WORDS; i++) begin : p_WordItera
if ((we_a_i == 1'b1) && (waddr_a_i == i))
waddr_onehot_a[i] = 1'b1;
else
waddr_onehot_a[i] = 1'b0;
end
end
end
//-----------------------------------------------------------------------------
// WRITE : Clock gating (if integrated clock-gating cells are available)
//-----------------------------------------------------------------------------
genvar x;
generate
for (x = 1; x < NUM_WORDS; x++) begin : CG_CELL_WORD_ITER
cluster_clock_gating CG_Inst
(
.clk_i ( clk_int ),
.en_i ( waddr_onehot_a[x] ),
.test_en_i ( test_en_i ),
.clk_o ( mem_clocks[x] )
);
always_comb begin : p_WADb
for (j = 1; j < NUM_WORDS; j++) begin : p_WordIterb
if ((we_b_i == 1'b1) && (waddr_b_i == j))
waddr_onehot_b[j] = 1'b1;
else
waddr_onehot_b[j] = 1'b0;
end
end
endgenerate
//-----------------------------------------------------------------------------
// WRITE : Write operation
//-----------------------------------------------------------------------------
// Generate M = WORDS sequential processes, each of which describes one
// word of the memory. The processes are synchronized with the clocks
// ClocksxC(i), i = 0, 1, ..., M-1
// Use active low, i.e. transparent on low latches as storage elements
// Data is sampled on rising clock edge
//-----------------------------------------------------------------------------
//-- WRITE : Clock gating (if integrated clock-gating cells are available)
//-----------------------------------------------------------------------------
generate
for (x = 1; x < NUM_WORDS; x++)
begin : CG_CELL_WORD_ITER
cluster_clock_gating CG_Inst
(
.clk_i ( clk_int ),
.en_i ( waddr_onehot_a[x] | waddr_onehot_b[x] ),
.test_en_i ( test_en_i ),
.clk_o ( mem_clocks[x] )
);
end
endgenerate
always_latch begin : latch_wdata
// Note: The assignment has to be done inside this process or Modelsim complains about it
mem[0] = '0;
//-----------------------------------------------------------------------------
//-- WRITE : Write operation
//-----------------------------------------------------------------------------
//-- Generate M = WORDS sequential processes, each of which describes one
//-- word of the memory. The processes are synchronized with the clocks
//-- ClocksxC(i), i = 0, 1, ..., M-1
//-- Use active low, i.e. transparent on low latches as storage elements
//-- Data is sampled on rising clock edge
for (int unsigned k = 1; k < NUM_WORDS; k++) begin : w_WordIter
if (mem_clocks[k] == 1'b1)
mem[k] = wdata_a_q;
// Integer registers
always_latch begin : latch_wdata
// Note: The assignment has to be done inside this process or Modelsim complains about it
mem[0] = '0;
for(k = 1; k < NUM_WORDS; k++)
begin : w_WordIter
if (mem_clocks[k] == 1'b1)
mem[k] = waddr_onehot_b_q[k] ? wdata_b_q : wdata_a_q;
end
end
end
endmodule

View file

@ -45,18 +45,21 @@ module regfile
// Write port W1
input logic [4:0] waddr_a_i,
input logic [DATA_WIDTH-1:0] wdata_a_i,
input logic we_a_i
input logic we_a_i,
// Write port W2
input logic [4:0] waddr_b_i,
input logic [DATA_WIDTH-1:0] wdata_b_i,
input logic we_b_i
);
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH;
logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] rf_reg;
logic [NUM_WORDS-1:0] we_a_dec;
logic [NUM_WORDS-1:0] we_a_dec, we_b_dec;
always_comb
begin : we_a_decoder
always_comb begin : we_a_decoder
for (int i = 0; i < NUM_WORDS; i++) begin
if (waddr_a_i == i)
we_a_dec[i] = we_a_i;
@ -65,35 +68,40 @@ module regfile
end
end
always_comb begin : we_b_decoder
for (int i = 0; i < NUM_WORDS; i++) begin
if (waddr_b_i == i)
we_b_dec[i] = we_b_i;
else
we_b_dec[i] = 1'b0;
end
end
genvar i;
generate
// loop from 1 to NUM_WORDS-1 as R0 is nil
for (i = 1; i < NUM_WORDS; i++)
begin : rf_gen
for (genvar i = 1; i < NUM_WORDS; i++) begin : rf_gen
always_ff @(posedge clk, negedge rst_n)
begin : register_write_behavioral
always_ff @(posedge clk, negedge rst_n) begin : register_write_behavioral
if (rst_n==1'b0) begin
rf_reg[i] <= 'b0;
end else begin
if (we_a_dec[i])
rf_reg[i] <= wdata_a_i;
if (we_b_dec[i])
rf_reg[i] <= wdata_b_i;
end
end
end
// R0 is nil
`ifdef verilator
always_ff @(posedge clk, negedge rst_n)
begin
// R0 is nil
`ifdef verilator
always_ff @(posedge clk, negedge rst_n) begin
rf_reg[0] <= '0;
end
`else
`else
assign rf_reg[0] = '0;
`endif
`endif
endgenerate

View file

@ -16,28 +16,30 @@ import ariane_pkg::*;
module scoreboard #(
parameter int unsigned NR_ENTRIES = 8,
parameter int unsigned NR_WB_PORTS = 1
parameter int unsigned NR_WB_PORTS = 1,
parameter int unsigned NR_COMMIT_PORTS = 2
)
(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_unissued_instr_i, // flush only un-issued instructions
input logic flush_i, // flush whole scoreboard
input logic unresolved_branch_i, // we have an unresolved branch
// list of clobbered registers to issue stage
output fu_t [31:0] rd_clobber_o,
output fu_t [2**REG_ADDR_SIZE:0] rd_clobber_o,
// regfile like interface to operand read stage
input logic [4:0] rs1_i,
input logic [REG_ADDR_SIZE-1:0] rs1_i,
output logic [63:0] rs1_o,
output logic rs1_valid_o,
input logic [4:0] rs2_i,
input logic [REG_ADDR_SIZE-1:0] rs2_i,
output logic [63:0] rs2_o,
output logic rs2_valid_o,
// advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer
output scoreboard_entry_t commit_instr_o,
input logic commit_ack_i,
output scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_o,
input logic [NR_COMMIT_PORTS-1:0] commit_ack_i,
// instruction to put on top of scoreboard e.g. : top pointer
// we can always put this instruction to the to p unless we signal with asserted full_o
@ -52,8 +54,8 @@ module scoreboard #(
// write-back port
input logic [NR_WB_PORTS-1:0][TRANS_ID_BITS-1:0] trans_id_i, // transaction ID at which to write the result back
input logic [NR_WB_PORTS-1:0][63:0] wdata_i, // write data in
input exception_t [NR_WB_PORTS-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception, divide by zero)
input logic [NR_WB_PORTS-1:0][63:0] wbdata_i, // write data in
input exception_t [NR_WB_PORTS-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception)
input logic [NR_WB_PORTS-1:0] wb_valid_i // data in is valid
);
localparam int unsigned BITS_ENTRIES = $clog2(NR_ENTRIES);
@ -73,7 +75,10 @@ module scoreboard #(
assign issue_full = (issue_cnt_q == NR_ENTRIES-1);
// output commit instruction directly
assign commit_instr_o = mem_q[commit_pointer_q].sbe;
always_comb begin : commit_ports
for (logic [$clog2(NR_ENTRIES)-1:0] i = 0; i < NR_COMMIT_PORTS; i++)
commit_instr_o[i] = mem_q[commit_pointer_q + i].sbe;
end
// an instruction is ready for issue if we have place in the issue FIFO and it the decoder says it is valid
always_comb begin
@ -90,15 +95,17 @@ module scoreboard #(
// keep track of all issued instructions
always_comb begin : issue_fifo
automatic logic [$clog2(NR_ENTRIES)-1:0] issue_cnt;
automatic logic [$clog2(NR_ENTRIES)-1:0] commit_pointer;
commit_pointer = commit_pointer_q;
issue_cnt = issue_cnt_q;
// default assignment
mem_n = mem_q;
commit_pointer_n = commit_pointer_q;
issue_pointer_n = issue_pointer_q;
// if we got a acknowledge from the issue stage, put this scoreboard entry in the queue
if (decoded_instr_valid_i && decoded_instr_ack_o) begin
if (decoded_instr_valid_i && decoded_instr_ack_o && !flush_unissued_instr_i) begin
// the decoded instruction we put in there is valid (1st bit)
// increase the issue counter
issue_cnt++;
@ -115,7 +122,7 @@ module scoreboard #(
// something in the pipeline e.g. an incomplete memory operation)
if (wb_valid_i[i] && mem_n[trans_id_i[i]].issued) begin
mem_n[trans_id_i[i]].sbe.valid = 1'b1;
mem_n[trans_id_i[i]].sbe.result = wdata_i[i];
mem_n[trans_id_i[i]].sbe.result = wbdata_i[i];
// write the exception back if it is valid
if (ex_i[i].valid)
mem_n[trans_id_i[i]].sbe.ex = ex_i[i];
@ -126,14 +133,16 @@ module scoreboard #(
// Commit Port
// ------------
// we've got an acknowledge from commit
if (commit_ack_i) begin
// decrease the issue counter
issue_cnt--;
// this instruction is no longer in issue e.g.: it is considered finished
mem_n[commit_pointer_q].issued = 1'b0;
mem_n[commit_pointer_q].sbe.valid = 1'b0;
// advance commit pointer
commit_pointer_n = commit_pointer_n + 1'b1;
for (logic [$clog2(NR_ENTRIES)-1:0] i = 0; i < NR_COMMIT_PORTS; i++) begin
if (commit_ack_i[i]) begin
// decrease the issue counter
issue_cnt--;
// this instruction is no longer in issue e.g.: it is considered finished
mem_n[commit_pointer_q + i].issued = 1'b0;
mem_n[commit_pointer_q + i].sbe.valid = 1'b0;
// advance commit pointer
commit_pointer++;
end
end
// ------
// Flush
@ -147,11 +156,13 @@ module scoreboard #(
// set the pointer and counter back to zero
issue_cnt = '0;
issue_pointer_n = '0;
commit_pointer_n = '0;
commit_pointer = '0;
end
end
// update issue counter
issue_cnt_n = issue_cnt;
// update commit potiner
commit_pointer_n = commit_pointer;
end
// -------------------
@ -203,12 +214,12 @@ module scoreboard #(
// make sure that we are not forwarding a result that got an exception
for (int unsigned j = 0; j < NR_WB_PORTS; j++) begin
if (mem_q[trans_id_i[j]].sbe.rd == rs1_i && wb_valid_i[j] && ~ex_i[j].valid) begin
rs1_o = wdata_i[j];
rs1_o = wbdata_i[j];
rs1_valid_o = wb_valid_i[j];
break;
end
if (mem_q[trans_id_i[j]].sbe.rd == rs2_i && wb_valid_i[j] && ~ex_i[j].valid) begin
rs2_o = wdata_i[j];
rs2_o = wbdata_i[j];
rs2_valid_o = wb_valid_i[j];
break;
end
@ -247,8 +258,13 @@ module scoreboard #(
else $error ("RD 0 should not bet set");
// assert that we never acknowledge a commit if the instruction is not valid
assert property (
@(posedge clk_i) (rst_ni && commit_ack_i |-> commit_instr_o.valid))
@(posedge clk_i) (rst_ni && commit_ack_i[0] |-> commit_instr_o[0].valid))
else $error ("Commit acknowledged but instruction is not valid");
assert property (
@(posedge clk_i) (rst_ni && commit_ack_i[1] |-> commit_instr_o[1].valid))
else $error ("Commit acknowledged but instruction is not valid");
// assert that we never give an issue ack signal if the instruction is not valid
assert property (
@(posedge clk_i) (rst_ni && issue_ack_i |-> issue_instr_valid_o))

View file

@ -19,20 +19,12 @@ import ariane_pkg::*;
module tlb #(
parameter int unsigned TLB_ENTRIES = 4,
parameter int unsigned ASID_WIDTH = 1
)
(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // Flush signal
// Update signals
input logic update_is_2M_i,
input logic update_is_1G_i,
input logic [26:0] update_vpn_i,
input logic [ASID_WIDTH-1:0] update_asid_i,
input pte_t update_content_i,
input logic update_tlb_i,
// Update TLB
input tlb_update_t update_i,
// Lookup signals
input logic lu_access_i,
input logic [ASID_WIDTH-1:0] lu_asid_i,
@ -113,19 +105,19 @@ module tlb #(
tags_n[i].valid = 1'b0;
// normal replacement
end else if (update_tlb_i & replace_en[i]) begin
end else if (update_i.valid & replace_en[i]) begin
// update tag array
tags_n[i] = '{
asid: update_asid_i,
vpn2: update_vpn_i [26:18],
vpn1: update_vpn_i [17:9],
vpn0: update_vpn_i [8:0],
is_1G: update_is_1G_i,
is_2M: update_is_2M_i,
asid: update_i.asid,
vpn2: update_i.vpn [26:18],
vpn1: update_i.vpn [17:9],
vpn0: update_i.vpn [8:0],
is_1G: update_i.is_1G,
is_2M: update_i.is_2M,
valid: 1'b1
};
// and content as well
content_n[i] = update_content_i;
content_n[i] = update_i.content;
end
end
end

View file

@ -26,9 +26,10 @@ class instruction_trace_item;
logic [63:0] result;
logic [63:0] paddr;
string priv_lvl;
branchpredict_t bp;
// constructor creating a new instruction trace item, e.g.: a single instruction with all relevant information
function new (time simtime, longint unsigned cycle, scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] reg_file [32], logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl);
function new (time simtime, longint unsigned cycle, scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] reg_file [32], logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl, branchpredict_t bp);
this.simtime = simtime;
this.cycle = cycle;
this.pc = sbe.pc;
@ -37,6 +38,7 @@ class instruction_trace_item;
this.reg_file = reg_file;
this.result = result;
this.paddr = paddr;
this.bp = bp;
this.priv_lvl = getPrivLevel(priv_lvl);
endfunction
// convert register address to ABI compatible form
@ -104,9 +106,8 @@ class instruction_trace_item;
// Regular opcodes
INSTR_LUI: s = this.printUInstr("lui");
INSTR_AUIPC: s = this.printUInstr("auipc");
INSTR_J: s = this.printUJInstr("j");
INSTR_JAL: s = this.printUJInstr("jal");
INSTR_JALR: s = this.printIInstr("jalr");
INSTR_JAL: s = this.printJump();
INSTR_JALR: s = this.printJump();
// BRANCH
INSTR_BEQZ: s = this.printSBInstr("beqz");
INSTR_BEQ: s = this.printSBInstr("beq");
@ -185,10 +186,11 @@ class instruction_trace_item;
endcase
s = $sformatf("%10t %10d %s %h %h %-36s", simtime,
s = $sformatf("%10t %10d %s %h %h %h %-36s", simtime,
cycle,
priv_lvl,
sbe.pc,
bp.is_mispredict & bp.valid,
instr,
s);
@ -281,6 +283,28 @@ class instruction_trace_item;
return $sformatf("%-16s %s, 0x%0h", mnemonic, regAddrToStr(sbe.rd), sbe.result[31:12]);
endfunction // printUInstr
function string printJump();
string mnemonic;
case (instr[6:0])
OPCODE_JALR: begin
// is this a return?
if (sbe.rd == 'b0 && (sbe.rs1 == 'h1 || sbe.rs1 == 'h5)) begin
return this.printMnemonic("ret");
end else begin
return this.printIInstr("jalr");
end
end
OPCODE_JAL: begin
if (sbe.rd == 'b0)
return this.printUJInstr("j");
else
return this.printUJInstr("jal");
end
endcase
endfunction
function string printUJInstr(input string mnemonic);
result_regs.push_back(sbe.rd);

View file

@ -23,7 +23,8 @@ class instruction_tracer;
// issue scoreboard entries
scoreboard_entry_t issue_sbe_queue [$];
scoreboard_entry_t issue_sbe;
// store resolved branches, get (mis-)predictions
branchpredict_t bp [$];
// shadow copy of the register file
logic [63:0] reg_file [32];
// 64 bit clock tick count
@ -35,7 +36,7 @@ class instruction_tracer;
logic display_instructions;
logic [63:0] store_mapping[$], load_mapping[$], address_mapping;
static uvm_cmdline_processor uvcl = uvm_cmdline_processor::get_inst();
// static uvm_cmdline_processor uvcl = uvm_cmdline_processor::get_inst();
function new(virtual instruction_tracer_if tracer_if, logic display_instructions);
@ -56,11 +57,11 @@ class instruction_tracer;
task trace();
logic [31:0] decode_instruction, issue_instruction, issue_commit_instruction;
scoreboard_entry_t commit_instruction;
// initialize register 0
reg_file [0] = 0;
forever begin
automatic branchpredict_t bp_instruction = '0;
// new cycle, we are only interested if reset is de-asserted
@(tracer_if.pck iff tracer_if.pck.rstn);
// increment clock tick
@ -79,7 +80,7 @@ class instruction_tracer;
// -------------------
// we got a new issue ack, so put the element from the decode queue to
// the issue queue
if (tracer_if.pck.issue_ack) begin
if (tracer_if.pck.issue_ack && !tracer_if.pck.flush_unissued) begin
issue_instruction = decode_queue.pop_front();
issue_queue.push_back(issue_instruction);
// also save the scoreboard entry to a separate issue queue
@ -96,42 +97,53 @@ class instruction_tracer;
if (tracer_if.pck.ld_valid && !tracer_if.pck.ld_kill) begin
load_mapping.push_back(tracer_if.pck.ld_paddr);
end
// ----------------------
// Store predictions
// ----------------------
if (tracer_if.pck.resolve_branch.valid) begin
bp.push_back(tracer_if.pck.resolve_branch);
end
// --------------
// Commit
// --------------
// we are committing an instruction
if (tracer_if.pck.commit_ack) begin
commit_instruction = scoreboard_entry_t'(tracer_if.pck.commit_instr);
issue_commit_instruction = issue_queue.pop_front();
issue_sbe = issue_sbe_queue.pop_front();
// check if the instruction retiring is a load or store, get the physical address accordingly
if (tracer_if.pck.commit_instr.fu == LOAD)
address_mapping = load_mapping.pop_front();
else if (tracer_if.pck.commit_instr.fu == STORE)
address_mapping = store_mapping.pop_front();
// the scoreboards issue entry still contains the immediate value as a result
// check if the write back is valid, if not we need to source the result from the register file
// as the most recent version of this register will be there.
if (tracer_if.pck.we) begin
printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata, address_mapping, tracer_if.pck.priv_lvl);
end else
printInstr(issue_sbe, issue_commit_instruction, reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl);
end
for (int i = 0; i < 2; i++) begin
if (tracer_if.pck.commit_ack[i]) begin
commit_instruction = scoreboard_entry_t'(tracer_if.pck.commit_instr[i]);
issue_commit_instruction = issue_queue.pop_front();
issue_sbe = issue_sbe_queue.pop_front();
// check if the instruction retiring is a load or store, get the physical address accordingly
if (tracer_if.pck.commit_instr[i].fu == LOAD)
address_mapping = load_mapping.pop_front();
else if (tracer_if.pck.commit_instr[i].fu == STORE)
address_mapping = store_mapping.pop_front();
if (tracer_if.pck.commit_instr[i].fu == CTRL_FLOW)
bp_instruction = bp.pop_front();
// the scoreboards issue entry still contains the immediate value as a result
// check if the write back is valid, if not we need to source the result from the register file
// as the most recent version of this register will be there.
if (tracer_if.pck.we[i]) begin
printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata[i], address_mapping, tracer_if.pck.priv_lvl, bp_instruction);
end else
printInstr(issue_sbe, issue_commit_instruction, reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, bp_instruction);
end
end
// --------------
// Exceptions
// --------------
if (tracer_if.pck.exception.valid) begin
// print exception
printException(tracer_if.pck.commit_instr.pc, tracer_if.pck.exception.cause, tracer_if.pck.exception.tval);
printException(tracer_if.pck.commit_instr[0].pc, tracer_if.pck.exception.cause, tracer_if.pck.exception.tval);
end
// ----------------------
// Commit Registers
// ----------------------
// update shadow reg file here
if (tracer_if.pck.we && tracer_if.pck.waddr != 5'b0) begin
reg_file[tracer_if.pck.waddr] = tracer_if.pck.wdata;
end
for (int i = 0; i < 2; i++)
if (tracer_if.pck.we[i] && tracer_if.pck.waddr[i] != 5'b0) begin
reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i];
end
// --------------
// Flush Signals
@ -162,10 +174,11 @@ class instruction_tracer;
// also clear mappings
store_mapping = {};
load_mapping = {};
bp = {};
endfunction
function void printInstr(scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl);
instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr, priv_lvl);
function void printInstr(scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl, branchpredict_t bp);
instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr, priv_lvl, bp);
// print instruction to console
string print_instr = iti.printInstr();
uvm_report_info( "Tracer", print_instr, UVM_HIGH);

View file

@ -15,7 +15,6 @@
parameter INSTR_LUI = { 25'b?, OPCODE_LUI };
parameter INSTR_AUIPC = { 25'b?, OPCODE_AUIPC };
parameter INSTR_JAL = { 25'b?, OPCODE_JAL };
parameter INSTR_J = { 20'b?, 5'b0, OPCODE_JAL };
parameter INSTR_JALR = { 17'b?, 3'b000, 5'b?, OPCODE_JALR };
// BRANCH
parameter INSTR_BEQZ = { 7'b?, 5'b0, 5'b?, 3'b000, 5'b?, OPCODE_BRANCH };

View file

@ -18,23 +18,24 @@ import ariane_pkg::*;
interface instruction_tracer_if (
input clk
);
logic rstn;
logic flush_unissued;
logic flush;
logic rstn;
logic flush_unissued;
logic flush;
// Decode
logic [31:0] instruction;
logic fetch_valid;
logic fetch_ack;
logic [31:0] instruction;
logic fetch_valid;
logic fetch_ack;
// Issue stage
logic issue_ack; // issue acknowledged
scoreboard_entry_t issue_sbe; // issue scoreboard entry
logic issue_ack; // issue acknowledged
scoreboard_entry_t issue_sbe; // issue scoreboard entry
// WB stage
logic [4:0] waddr;
logic [63:0] wdata;
logic we;
logic [1:0][4:0] waddr;
logic [1:0][63:0] wdata;
logic [1:0] we;
// commit stage
scoreboard_entry_t commit_instr; // commit instruction
logic commit_ack;
scoreboard_entry_t [1:0] commit_instr; // commit instruction
logic [1:0] commit_ack;
// address translation
// stores
@ -44,7 +45,8 @@ interface instruction_tracer_if (
logic ld_valid;
logic ld_kill;
logic [63:0] ld_paddr;
// misprediction
branchpredict_t resolve_branch;
// exceptions
exception_t exception;
// current privilege level
@ -53,7 +55,7 @@ interface instruction_tracer_if (
`ifndef SYNTHESIS
clocking pck @(posedge clk);
input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr,
st_valid, st_paddr, ld_valid, ld_kill, ld_paddr,
st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, resolve_branch,
wdata, we, commit_instr, commit_ack, exception, priv_lvl;
endclocking
`endif

2
tb

@ -1 +1 @@
Subproject commit d2c8a8a9773b6b128c59a840328a84fbebb94a18
Subproject commit 346ab824aad1c20f7029434c55c92c76c974f3eb