diff --git a/Bender.yml b/Bender.yml index 8e9311a20..d46ccac1f 100644 --- a/Bender.yml +++ b/Bender.yml @@ -21,6 +21,18 @@ frozen: true sources: - files: + - target: cv64a6_imafdcv_sv39 + files: + - core/include/cv64a6_imafdcv_sv39_config_pkg.sv + - core/include/riscv_pkg.sv + - common/local/rvfi/rvfi_pkg.sv + - core/include/ariane_dm_pkg.sv + - core/include/ariane_pkg.sv + - core/mmu_sv39/tlb.sv + - core/mmu_sv39/mmu.sv + - core/mmu_sv39/ptw.sv + - corev_apu/tb/common/mock_uart.sv + - target: cv64a6_imafdc_sv39 files: - core/include/cv64a6_imafdc_sv39_config_pkg.sv @@ -31,6 +43,7 @@ sources: - core/mmu_sv39/tlb.sv - core/mmu_sv39/mmu.sv - core/mmu_sv39/ptw.sv + - core/cva6_accel_first_pass_decoder_stub.sv - target: cv32a6_imac_sv0 files: @@ -42,6 +55,7 @@ sources: - core/mmu_sv32/cva6_tlb_sv32.sv - core/mmu_sv32/cva6_mmu_sv32.sv - core/mmu_sv32/cva6_ptw_sv32.sv + - core/cva6_accel_first_pass_decoder_stub.sv - target: cv32a6_imac_sv32 files: @@ -53,6 +67,7 @@ sources: - core/mmu_sv32/cva6_tlb_sv32.sv - core/mmu_sv32/cva6_mmu_sv32.sv - core/mmu_sv32/cva6_ptw_sv32.sv + - core/cva6_accel_first_pass_decoder_stub.sv - target: cv32a6_imafc_sv32 files: @@ -63,6 +78,7 @@ sources: - core/mmu_sv32/cva6_tlb_sv32.sv - core/mmu_sv32/cva6_mmu_sv32.sv - core/mmu_sv32/cva6_ptw_sv32.sv + - core/cva6_accel_first_pass_decoder_stub.sv # included via target core/include/${TARGET_CFG}_config_pkg.sv # ariane_axi_pkg is dependent on this. @@ -73,6 +89,7 @@ sources: - core/include/wt_cache_pkg.sv - core/include/std_cache_pkg.sv - core/include/axi_intf.sv + - core/include/acc_pkg.sv # for all the below files use Flist.cva6 as baseline and also look at Makefile pd/synth @@ -141,6 +158,7 @@ sources: - core/instr_realign.sv - core/id_stage.sv - core/issue_read_operands.sv + - core/acc_dispatcher.sv - core/issue_stage.sv - core/load_unit.sv - core/load_store_unit.sv diff --git a/core/Flist.cva6 b/core/Flist.cva6 index 1478d9990..904310906 100644 --- a/core/Flist.cva6 +++ b/core/Flist.cva6 @@ -34,10 +34,32 @@ ${CVA6_REPO_DIR}/vendor/pulp-platform/fpga-support/rtl/AsyncThreePortRam.sv +incdir+${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/ +incdir+${CVA6_REPO_DIR}/common/local/util/ +// Floating point unit +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_top.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv +${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv + ${CVA6_REPO_DIR}/core/include/${TARGET_CFG}_config_pkg.sv ${CVA6_REPO_DIR}/core/include/riscv_pkg.sv ${CVA6_REPO_DIR}/common/local/rvfi/rvfi_pkg.sv ${CVA6_REPO_DIR}/core/include/ariane_dm_pkg.sv +// Note: depends on fpnew_pkg, above ${CVA6_REPO_DIR}/core/include/ariane_pkg.sv // TODO: ariane_axi_pkg is dependent on this. ${CVA6_REPO_DIR}/vendor/pulp-platform/axi/src/axi_pkg.sv @@ -48,6 +70,7 @@ ${CVA6_REPO_DIR}/core/include/wt_cache_pkg.sv ${CVA6_REPO_DIR}/core/include/std_cache_pkg.sv ${CVA6_REPO_DIR}/core/include/axi_intf.sv ${CVA6_REPO_DIR}/core/include/instr_tracer_pkg.sv +${CVA6_REPO_DIR}/core/include/acc_pkg.sv //CVXIF ${CVA6_REPO_DIR}/core/include/cvxif_pkg.sv @@ -76,27 +99,6 @@ ${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/exp_backoff.sv ${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/counter.sv ${CVA6_REPO_DIR}/vendor/pulp-platform/common_cells/src/delta_counter.sv -// Floating point unit -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_cast_multi.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_classifier.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_divsqrt_multi.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_fma_multi.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_fma.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_noncomp.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_block.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_fmt_slice.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_opgroup_multifmt_slice.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_rounding.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpnew_top.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv -${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv - // Top-level source files (not necessarily instantiated at the top of the cva6). ${CVA6_REPO_DIR}/core/cva6.sv ${CVA6_REPO_DIR}/core/alu.sv @@ -130,6 +132,8 @@ ${CVA6_REPO_DIR}/core/amo_buffer.sv ${CVA6_REPO_DIR}/core/store_unit.sv ${CVA6_REPO_DIR}/core/commit_stage.sv ${CVA6_REPO_DIR}/core/axi_shim.sv +${CVA6_REPO_DIR}/core/cva6_accel_first_pass_decoder_stub.sv +${CVA6_REPO_DIR}/core/acc_dispatcher.sv // What is "frontend"? ${CVA6_REPO_DIR}/core/frontend/btb.sv diff --git a/core/acc_dispatcher.sv b/core/acc_dispatcher.sv new file mode 100644 index 000000000..94bbd74db --- /dev/null +++ b/core/acc_dispatcher.sv @@ -0,0 +1,389 @@ +// Copyright 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Authors: Matheus Cavalcante, ETH Zurich +// Nils Wistoff, ETH Zurich +// Date: 20.11.2020 +// Description: Functional unit that dispatches CVA6 instructions to accelerators. + +module acc_dispatcher import ariane_pkg::*; import riscv::*; ( + input logic clk_i, + input logic rst_ni, + // Interface with the CSR regfile + input logic acc_cons_en_i, // Accelerator memory consistent mode + // Interface with the CSRs + input logic [2:0] fcsr_frm_i, + output logic dirty_v_state_o, + // Interface with the issue stage + input scoreboard_entry_t issue_instr_i, + input logic issue_instr_hs_i, + output logic issue_stall_o, + input fu_data_t fu_data_i, + input scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_i, + output logic [TRANS_ID_BITS-1:0] acc_trans_id_o, + output xlen_t acc_result_o, + output logic acc_valid_o, + output exception_t acc_exception_o, + // Interface with the execute stage + output logic acc_valid_ex_o, // FU executed + // Interface with the commit stage + input logic [NR_COMMIT_PORTS-1:0] commit_ack_i, + input logic commit_st_barrier_i, // A store barrier was commited + // Interface with the load/store unit + input logic acc_no_st_pending_i, + // Interface with the controller + output logic ctrl_halt_o, + input logic flush_unissued_instr_i, + input logic flush_ex_i, + // Accelerator interface + output acc_pkg::accelerator_req_t acc_req_o, + input acc_pkg::accelerator_resp_t acc_resp_i + ); + + `include "common_cells/registers.svh" + + import cf_math_pkg::idx_width; + + /*********************** + * Common signals * + ***********************/ + + logic acc_ready; + logic acc_valid_d, acc_valid_q; + + /************************** + * Accelerator issue * + **************************/ + + // Issue accelerator instructions + `FF(acc_valid_q, acc_valid_d, '0) + + assign acc_valid_ex_o = acc_valid_q; + assign acc_valid_d = ~issue_instr_i.ex.valid & + issue_instr_hs_i & + (issue_instr_i.fu == ACCEL) & + ~flush_unissued_instr_i; + + // Accelerator load/store pending signals + logic acc_no_ld_pending; + logic acc_no_st_pending; + + // Stall issue stage in three cases: + always_comb begin : stall_issue + unique case (issue_instr_i.fu) + ACCEL: + // 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet + issue_stall_o = ~acc_ready; + LOAD: + // 2. We're issuing a scalar load but there is an inflight accelerator store. + issue_stall_o = acc_cons_en_i & ~acc_no_st_pending; + STORE: + // 3. We're issuing a scalar store but there is an inflight accelerator load or store. + issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending); + default: + issue_stall_o = 1'b0; + endcase + end + + /*********************** + * Instruction queue * + ***********************/ + + localparam InstructionQueueDepth = 3; + + fu_data_t acc_data; + fu_data_t acc_insn_queue_o; + logic acc_insn_queue_pop; + logic acc_insn_queue_empty; + logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage; + logic acc_commit; + logic [TRANS_ID_BITS-1:0] acc_commit_trans_id; + + assign acc_data = acc_valid_ex_o ? fu_data_i : '0; + + fifo_v3 #( + .DEPTH (InstructionQueueDepth), + .FALL_THROUGH(1'b1 ), + .dtype (fu_data_t ) + ) i_acc_insn_queue ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .flush_i (flush_ex_i ), + .testmode_i(1'b0 ), + .data_i (fu_data_i ), + .push_i (acc_valid_q ), + .full_o (/* Unused */ ), + .data_o (acc_insn_queue_o ), + .pop_i (acc_insn_queue_pop ), + .empty_o (acc_insn_queue_empty), + .usage_o (acc_insn_queue_usage) + ); + + // We are ready if the instruction queue is able to accept at least one more entry. + assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth-1); + + /********************************** + * Non-speculative instructions * + **********************************/ + + // Keep track of the instructions that were received by the dispatcher. + logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q; + `FF(insn_pending_q, insn_pending_d, '0) + + // Only non-speculative instructions can be issued to the accelerators. + // The following block keeps track of which transaction IDs reached the + // top of the scoreboard, and are therefore no longer speculative. + logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q; + `FF(insn_ready_q, insn_ready_d, '0) + + always_comb begin: p_non_speculative_ff + // Maintain state + insn_pending_d = insn_pending_q; + insn_ready_d = insn_ready_q; + + // We received a new instruction + if (acc_valid_q) + insn_pending_d[acc_data.trans_id] = 1'b1; + // Flush all received instructions + if (flush_ex_i) + insn_pending_d = '0; + + // An accelerator instruction is no longer speculative. + if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin + insn_ready_d[acc_commit_trans_id] = 1'b1; + insn_pending_d[acc_commit_trans_id] = 1'b0; + end + + // An accelerator instruction was issued. + if (acc_req_o.req_valid) + insn_ready_d[acc_req_o.trans_id] = 1'b0; + end: p_non_speculative_ff + + /************************* + * Accelerator request * + *************************/ + + acc_pkg::accelerator_req_t acc_req; + logic acc_req_valid; + logic acc_req_ready; + + acc_pkg::accelerator_req_t acc_req_int; + fall_through_register #( + .T(acc_pkg::accelerator_req_t) + ) i_accelerator_req_register ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .clr_i (1'b0 ), + .testmode_i(1'b0 ), + .data_i (acc_req ), + .valid_i (acc_req_valid ), + .ready_o (acc_req_ready ), + .data_o (acc_req_int ), + .valid_o (acc_req_o.req_valid), + .ready_i (acc_resp_i.req_ready) + ); + + assign acc_req_o.insn = acc_req_int.insn; + assign acc_req_o.rs1 = acc_req_int.rs1; + assign acc_req_o.rs2 = acc_req_int.rs2; + assign acc_req_o.frm = acc_req_int.frm; + assign acc_req_o.trans_id = acc_req_int.trans_id; + assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i; + assign acc_req_o.acc_cons_en = acc_cons_en_i; + // Will be overwritten by dcache + assign acc_req_o.inval_ready = '0; + + always_comb begin: accelerator_req_dispatcher + // Do not fetch from the instruction queue + acc_insn_queue_pop = 1'b0; + + // Default values + acc_req = '0; + acc_req_valid = 1'b0; + + // Unpack fu_data_t into accelerator_req_t + if (!acc_insn_queue_empty) begin + acc_req = '{ + // Instruction is forwarded from the decoder as an immediate + // - + // frm rounding information is up to date during a valid request to the accelerator + // The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes + // do not take place until the accelerator answers (Ariane commits in-order) + insn : acc_insn_queue_o.imm[31:0], + rs1 : acc_insn_queue_o.operand_a, + rs2 : acc_insn_queue_o.operand_b, + frm : fpnew_pkg::roundmode_e'(fcsr_frm_i), + trans_id: acc_insn_queue_o.trans_id, + default : '0 + }; + // Wait until the instruction is no longer speculative. + acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] || + (acc_commit && insn_pending_q[acc_commit_trans_id]); + acc_insn_queue_pop = acc_req_valid && acc_req_ready; + end + end + + /************************** + * Accelerator response * + **************************/ + + logic acc_ld_disp; + logic acc_st_disp; + + // Unpack the accelerator response + assign acc_trans_id_o = acc_resp_i.trans_id; + assign acc_result_o = acc_resp_i.result; + assign acc_valid_o = acc_resp_i.resp_valid; + assign acc_exception_o = '{ + cause: riscv::ILLEGAL_INSTR, + tval : '0, + valid: acc_resp_i.error + }; + // Always ready to receive responses + assign acc_req_o.resp_ready = 1'b1; + + // Signal dispatched load/store to issue stage + assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD); + assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE); + + /************************** + * Accelerator commit * + **************************/ + + // Instruction can be issued to the (in-order) back-end if + // it reached the top of the scoreboard and it hasn't been + // issued yet + always_comb begin: accelerator_commit + acc_commit = 1'b0; + if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) + acc_commit = 1'b1; + if (commit_instr_i[0].valid && + !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL) + acc_commit = 1'b1; + end + + // Dirty the V state if we are committing anything related to the vector accelerator + always_comb begin : dirty_v_state + dirty_v_state_o = 1'b0; + for (int i = 0; i < NR_COMMIT_PORTS; i++) begin + dirty_v_state_o |= commit_ack_i[i] & (commit_instr_i[i].fu == ACCEL); + end + end + + assign acc_commit_trans_id = !commit_instr_i[0].valid ? commit_instr_i[0].trans_id + : commit_instr_i[1].trans_id; + + /************************** + * Accelerator barriers * + **************************/ + + // On a store barrier (i.e. any barrier that requires preceeding stores to complete + // before continuing execution), halt execution while there are pending stores in + // the accelerator pipeline. + logic wait_acc_store_d, wait_acc_store_q; + `FF(wait_acc_store_q, wait_acc_store_d, '0) + + // Set on store barrier. Clear when no store is pending. + assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending; + assign ctrl_halt_o = wait_acc_store_q; + + /************************** + * Load/Store tracking * + **************************/ + + // Loads + logic acc_spec_loads_overflow; + logic [2:0] acc_spec_loads_pending; + logic acc_disp_loads_overflow; + logic [2:0] acc_disp_loads_pending; + + assign acc_no_ld_pending = (acc_spec_loads_pending == 3'b0) && (acc_disp_loads_pending == 3'b0); + + // Count speculative loads. These can still be flushed. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW (0) + ) i_acc_spec_loads ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .clear_i (flush_ex_i ), + .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp), + .load_i (1'b0 ), + .down_i (acc_ld_disp ), + .d_i ('0 ), + .q_o (acc_spec_loads_pending ), + .overflow_o (acc_spec_loads_overflow ) + ); + + // Count dispatched loads. These cannot be flushed anymore. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW (0) + ) i_acc_disp_loads ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .clear_i (1'b0 ), + .en_i (acc_ld_disp ^ acc_resp_i.load_complete), + .load_i (1'b0 ), + .down_i (acc_resp_i.load_complete), + .d_i ('0 ), + .q_o (acc_disp_loads_pending ), + .overflow_o (acc_disp_loads_overflow ) + ); + + acc_dispatcher_no_load_overflow: assert property ( + @(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) ) + else $error("[acc_dispatcher] Too many pending loads."); + + // Stores + logic acc_spec_stores_overflow; + logic [2:0] acc_spec_stores_pending; + logic acc_disp_stores_overflow; + logic [2:0] acc_disp_stores_pending; + + assign acc_no_st_pending = (acc_spec_stores_pending == 3'b0) && (acc_disp_stores_pending == 3'b0); + + // Count speculative stores. These can still be flushed. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW (0) + ) i_acc_spec_stores ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .clear_i (flush_ex_i ), + .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp), + .load_i (1'b0 ), + .down_i (acc_st_disp ), + .d_i ('0 ), + .q_o (acc_spec_stores_pending ), + .overflow_o (acc_spec_stores_overflow) + ); + + // Count dispatched stores. These cannot be flushed anymore. + counter #( + .WIDTH (3), + .STICKY_OVERFLOW (0) + ) i_acc_disp_stores ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + .clear_i (1'b0 ), + .en_i (acc_st_disp ^ acc_resp_i.store_complete), + .load_i (1'b0 ), + .down_i (acc_resp_i.store_complete), + .d_i ('0 ), + .q_o (acc_disp_stores_pending ), + .overflow_o (acc_disp_stores_overflow ) + ); + + acc_dispatcher_no_store_overflow: assert property ( + @(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) ) + else $error("[acc_dispatcher] Too many pending stores."); + +endmodule : acc_dispatcher diff --git a/core/cache_subsystem/wt_axi_adapter.sv b/core/cache_subsystem/wt_axi_adapter.sv index 2e97ecca6..773c49381 100644 --- a/core/cache_subsystem/wt_axi_adapter.sv +++ b/core/cache_subsystem/wt_axi_adapter.sv @@ -45,7 +45,12 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #( // AXI port output axi_req_t axi_req_o, - input axi_rsp_t axi_resp_i + input axi_rsp_t axi_resp_i, + + // Invalidations + input logic [63:0] inval_addr_i, + input logic inval_valid_i, + output logic inval_ready_o ); // support up to 512bit cache lines @@ -478,6 +483,15 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #( b_pop = 1'b0; dcache_sc_rtrn = 1'b0; + // External invalidation requests (from coprocessor). This is safe as + // there are no other transactions when a coprocessor has pending stores. + inval_ready_o = 1'b0; + if (inval_valid_i) begin + inval_ready_o = 1'b1; + dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ; + dcache_rtrn_vld_d = 1'b1; + dcache_rtrn_inv_d.all = 1'b1; + dcache_rtrn_inv_d.idx = inval_addr_i[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; ////////////////////////////////////// // dcache needs some special treatment // for arbitration and decoding of atomics @@ -486,7 +500,7 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #( // note that this self invalidation is handled in this way due to the // write-through cache architecture, which is aligned with the openpiton // cache subsystem. - if (invalidate) begin + end else if (invalidate) begin dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ; dcache_rtrn_vld_d = 1'b1; diff --git a/core/cache_subsystem/wt_cache_subsystem.sv b/core/cache_subsystem/wt_cache_subsystem.sv index 17fc457ec..8250790fb 100644 --- a/core/cache_subsystem/wt_cache_subsystem.sv +++ b/core/cache_subsystem/wt_cache_subsystem.sv @@ -61,12 +61,16 @@ module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #( `ifdef PITON_ARIANE // L15 (memory side) output l15_req_t l15_req_o, - input l15_rtrn_t l15_rtrn_i + input l15_rtrn_t l15_rtrn_i, `else // memory side output axi_req_t axi_req_o, - input axi_rsp_t axi_resp_i + input axi_rsp_t axi_resp_i, `endif + // Invalidations + input logic [63:0] inval_addr_i, + input logic inval_valid_i, + output logic inval_ready_o // TODO: interrupt interface ); @@ -182,7 +186,10 @@ module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #( .dcache_rtrn_vld_o ( adapter_dcache_rtrn_vld ), .dcache_rtrn_o ( adapter_dcache ), .axi_req_o ( axi_req_o ), - .axi_resp_i ( axi_resp_i ) + .axi_resp_i ( axi_resp_i ), + .inval_addr_i ( inval_addr_i ), + .inval_valid_i ( inval_valid_i ), + .inval_ready_o ( inval_ready_o ) ); `endif diff --git a/core/commit_stage.sv b/core/commit_stage.sv index c1f1e2978..802fe6ef6 100644 --- a/core/commit_stage.sv +++ b/core/commit_stage.sv @@ -79,6 +79,8 @@ module commit_stage import ariane_pkg::*; #( dirty_fp_state_o = 1'b0; for (int i = 0; i < NR_COMMIT_PORTS; i++) begin dirty_fp_state_o |= commit_ack_o[i] & (commit_instr_i[i].fu inside {FPU, FPU_VEC} || is_rd_fpr(commit_instr_i[i].op)); + // Check if we issued a vector floating-point instruction to the accellerator + dirty_fp_state_o |= commit_instr_i[i].fu == ACCEL && commit_instr_i[i].vfp; end end diff --git a/core/controller.sv b/core/controller.sv index 2c503e5e3..ff3190838 100644 --- a/core/controller.sv +++ b/core/controller.sv @@ -30,6 +30,7 @@ module controller import ariane_pkg::*; #( output logic flush_tlb_o, // Flush TLBs input logic halt_csr_i, // Halt request from CSR (WFI instruction) + input logic halt_acc_i, // Halt request from accelerator dispatcher output logic halt_o, // Halt signal to commit stage input logic eret_i, // Return from exception input logic ex_valid_i, // We got an exception, flush the pipeline @@ -165,7 +166,7 @@ module controller import ariane_pkg::*; #( // ---------------------- always_comb begin // halt the core if the fence is active - halt_o = halt_csr_i || fence_active_q; + halt_o = halt_csr_i || halt_acc_i || fence_active_q; end // ---------------------- diff --git a/core/csr_regfile.sv b/core/csr_regfile.sv index f2019ea7d..5d1054523 100644 --- a/core/csr_regfile.sv +++ b/core/csr_regfile.sv @@ -42,6 +42,7 @@ module csr_regfile import ariane_pkg::*; #( output logic[riscv::XLEN-1:0] csr_rdata_o, // Read data out input logic dirty_fp_state_i, // Mark the FP sate as dirty input logic csr_write_fflags_i, // Write fflags register e.g.: we are retiring a floating point instruction + input logic dirty_v_state_i , // Mark the V state as dirty input logic [riscv::VLEN-1:0] pc_i, // PC of instruction accessing the CSR output exception_t csr_exception_o, // attempts to access a CSR without appropriate privilege // level or to write a read-only register also @@ -51,11 +52,16 @@ module csr_regfile import ariane_pkg::*; #( output logic eret_o, // Return from exception, set the PC of epc_o output logic [riscv::VLEN-1:0] trap_vector_base_o, // Output base of exception vector, correct CSR is output (mtvec, stvec) output riscv::priv_lvl_t priv_lvl_o, // Current privilege level the CPU is in + // FP Imprecise exceptions + input logic [4:0] acc_fflags_ex_i, // Imprecise FP exception from the accelerator (fcsr.fflags format) + input logic acc_fflags_ex_valid_i, // An FP exception from the accelerator occurred // FPU output riscv::xs_t fs_o, // Floating point extension status output logic [4:0] fflags_o, // Floating-Point Accured Exceptions output logic [2:0] frm_o, // Floating-Point Dynamic Rounding Mode output logic [6:0] fprec_o, // Floating-Point Precision Control + // Vector extension + output riscv::xs_t vs_o, // Vector extension status // Decoder output irq_ctrl_t irq_ctrl_o, // interrupt management to id stage // MMU @@ -80,6 +86,8 @@ module csr_regfile import ariane_pkg::*; #( // Caches output logic icache_en_o, // L1 ICache Enable output logic dcache_en_o, // L1 DCache Enable + // Accelerator + output logic acc_cons_en_o, // Accelerator memory consistent mode // Performance Counter output logic [11:0] perf_addr_o, // read/write address to performance counter module output logic[riscv::XLEN-1:0] perf_data_o, // write data to performance counter module @@ -136,6 +144,7 @@ module csr_regfile import ariane_pkg::*; #( riscv::xlen_t stval_q, stval_d; riscv::xlen_t dcache_q, dcache_d; riscv::xlen_t icache_q, icache_d; + riscv::xlen_t acc_cons_q, acc_cons_d; logic wfi_d, wfi_q; @@ -156,6 +165,7 @@ module csr_regfile import ariane_pkg::*; #( // ---------------- assign csr_addr = riscv::csr_t'(csr_addr_i); assign fs_o = mstatus_q.fs; + assign vs_o = mstatus_q.vs; // ---------------- // CSR Read logic // ---------------- @@ -412,6 +422,14 @@ module csr_regfile import ariane_pkg::*; #( // custom (non RISC-V) cache control riscv::CSR_DCACHE: csr_rdata = dcache_q; riscv::CSR_ICACHE: csr_rdata = icache_q; + // custom (non RISC-V) accelerator memory consistency mode + riscv::CSR_ACC_CONS: begin + if (ENABLE_ACCELERATOR) begin + csr_rdata = acc_cons_q; + end else begin + read_access_exception = 1'b1; + end + end // PMPs riscv::CSR_PMPCFG0: csr_rdata = pmpcfg_q[riscv::XLEN/8-1:0]; riscv::CSR_PMPCFG1: if (riscv::XLEN == 32) csr_rdata = pmpcfg_q[7:4]; else read_access_exception = 1'b1; @@ -521,6 +539,7 @@ module csr_regfile import ariane_pkg::*; #( mtval_d = mtval_q; dcache_d = dcache_q; icache_d = icache_q; + acc_cons_d = acc_cons_q; sepc_d = sepc_q; scause_d = scause_q; @@ -606,6 +625,10 @@ module csr_regfile import ariane_pkg::*; #( if (!FP_PRESENT) begin mstatus_d.fs = riscv::Off; end + // hardwire to zero if vector extension is not present + if (!RVV) begin + mstatus_d.vs = riscv::Off; + end // this instruction has side-effects flush_o = 1'b1; end @@ -652,6 +675,9 @@ module csr_regfile import ariane_pkg::*; #( if (!FP_PRESENT) begin mstatus_d.fs = riscv::Off; end + if (!RVV) begin + mstatus_d.vs = riscv::Off; + end mstatus_d.wpri3 = 8'b0; mstatus_d.wpri1 = 1'b0; mstatus_d.wpri2 = 1'b0; @@ -799,6 +825,13 @@ module csr_regfile import ariane_pkg::*; #( riscv::CSR_DCACHE: dcache_d = {{riscv::XLEN-1{1'b0}}, csr_wdata[0]}; // enable bit riscv::CSR_ICACHE: icache_d = {{riscv::XLEN-1{1'b0}}, csr_wdata[0]}; // enable bit + riscv::CSR_ACC_CONS: begin + if (ENABLE_ACCELERATOR) begin + acc_cons_d = {{riscv::XLEN-1{1'b0}}, csr_wdata[0]}; // enable bit + end else begin + update_access_exception = 1'b1; + end + end // PMP locked logic // 1. refuse to update any locked entry // 2. also refuse to update the entry below a locked TOR entry @@ -853,6 +886,10 @@ module csr_regfile import ariane_pkg::*; #( if (FP_PRESENT && (dirty_fp_state_csr || dirty_fp_state_i)) begin mstatus_d.fs = riscv::Dirty; end + // mark the vector extension register as dirty + if (RVV && dirty_v_state_i) begin + mstatus_d.vs = riscv::Dirty; + end // hardwired extension registers mstatus_d.sd = (mstatus_q.xs == riscv::Dirty) | (mstatus_q.fs == riscv::Dirty); @@ -860,6 +897,17 @@ module csr_regfile import ariane_pkg::*; #( if (csr_write_fflags_i) begin fcsr_d.fflags = csr_wdata_i[4:0] | fcsr_q.fflags; end + + // ---------------------------- + // Accelerator FP imprecise exceptions + // ---------------------------- + + // Update fflags as soon as a FP exception occurs in the accelerator + // The exception is imprecise, and the fcsr.fflags update always happens immediately + if (ENABLE_ACCELERATOR) begin + fcsr_d.fflags |= acc_fflags_ex_valid_i ? acc_fflags_ex_i : 5'b0; + end + // --------------------- // External Interrupts // --------------------- @@ -1274,6 +1322,7 @@ module csr_regfile import ariane_pkg::*; #( assign icache_en_o = icache_q[0] & (~debug_mode_q); `endif assign dcache_en_o = dcache_q[0]; + assign acc_cons_en_o = ENABLE_ACCELERATOR ? acc_cons_q[0] : 1'b0; // determine if mprv needs to be considered if in debug mode assign mprv = (debug_mode_q && !dcsr_q.mprven) ? 1'b0 : mstatus_q.mprv; @@ -1312,6 +1361,7 @@ module csr_regfile import ariane_pkg::*; #( dcache_q <= {{riscv::XLEN-1{1'b0}}, 1'b1}; icache_q <= {{riscv::XLEN-1{1'b0}}, 1'b1}; mcountinhibit_q <= '0; + acc_cons_q <= {{riscv::XLEN-1{1'b0}}, ENABLE_ACCELERATOR}; // supervisor mode registers sepc_q <= {riscv::XLEN{1'b0}}; scause_q <= {riscv::XLEN{1'b0}}; @@ -1356,6 +1406,7 @@ module csr_regfile import ariane_pkg::*; #( dcache_q <= dcache_d; icache_q <= icache_d; mcountinhibit_q <= mcountinhibit_d; + acc_cons_q <= acc_cons_d; // supervisor mode registers sepc_q <= sepc_d; scause_q <= scause_d; diff --git a/core/cva6.sv b/core/cva6.sv index 51575b525..6ca08aa9a 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -15,6 +15,8 @@ module cva6 import ariane_pkg::*; #( parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig, + parameter type cvxif_req_t = cvxif_pkg::cvxif_req_t, + parameter type cvxif_resp_t = cvxif_pkg::cvxif_resp_t, parameter int unsigned AxiAddrWidth = ariane_axi::AddrWidth, parameter int unsigned AxiDataWidth = ariane_axi::DataWidth, parameter int unsigned AxiIdWidth = ariane_axi::IdWidth, @@ -39,8 +41,8 @@ module cva6 import ariane_pkg::*; #( // RISC-V formal interface port (`rvfi`): // Can be left open when formal tracing is not needed. output ariane_pkg::rvfi_port_t rvfi_o, - output cvxif_pkg::cvxif_req_t cvxif_req_o, - input cvxif_pkg::cvxif_resp_t cvxif_resp_i, + output cvxif_req_t cvxif_req_o, + input cvxif_resp_t cvxif_resp_i, // L15 (memory side) output wt_cache_pkg::l15_req_t l15_req_o, input wt_cache_pkg::l15_rtrn_t l15_rtrn_i, @@ -63,6 +65,8 @@ module cva6 import ariane_pkg::*; #( logic [NR_COMMIT_PORTS-1:0] commit_ack; localparam NumPorts = 3; + cvxif_pkg::cvxif_req_t cvxif_req; + cvxif_pkg::cvxif_resp_t cvxif_resp; // -------------- // PCGEN <-> CSR @@ -130,6 +134,17 @@ module cva6 import ariane_pkg::*; #( riscv::xlen_t fpu_result_ex_id; logic fpu_valid_ex_id; exception_t fpu_exception_ex_id; + // Accelerator + logic stall_acc_id; + scoreboard_entry_t issue_instr_id_acc; + logic issue_instr_hs_id_acc; + logic [TRANS_ID_BITS-1:0] acc_trans_id_ex_id; + riscv::xlen_t acc_result_ex_id; + logic acc_valid_ex_id; + exception_t acc_exception_ex_id; + logic halt_acc_ctrl; + logic [4:0] acc_resp_fflags; + logic acc_resp_fflags_valid; // CSR logic csr_valid_id_ex; // CVXIF @@ -147,6 +162,7 @@ module cva6 import ariane_pkg::*; #( // CSR Commit logic csr_commit_commit_ex; logic dirty_fp_state; + logic dirty_v_state; // LSU Commit logic lsu_commit_commit_ex; logic lsu_commit_ready_ex_commit; @@ -154,6 +170,8 @@ module cva6 import ariane_pkg::*; #( logic no_st_pending_ex; logic no_st_pending_commit; logic amo_valid_commit; + // ACCEL Commit + logic acc_valid_acc_ex; // -------------- // ID <-> COMMIT // -------------- @@ -172,6 +190,7 @@ module cva6 import ariane_pkg::*; #( riscv::xs_t fs; logic [2:0] frm_csr_id_issue_ex; logic [6:0] fprec_csr_ex; + riscv::xs_t vs; logic enable_translation_csr_ex; logic en_ld_st_translation_csr_ex; riscv::priv_lvl_t ld_st_priv_lvl_csr_ex; @@ -191,6 +210,7 @@ module cva6 import ariane_pkg::*; #( logic dcache_en_csr_nbdcache; logic csr_write_fflags_commit_cs; logic icache_en_csr; + logic acc_cons_en_csr; logic debug_mode; logic single_step_csr_commit; riscv::pmpcfg_t [15:0] pmpcfg; @@ -254,6 +274,11 @@ module cva6 import ariane_pkg::*; #( logic [(riscv::XLEN/8)-1:0] lsu_wmask; logic [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id; + // Accelerator port + logic [63:0] inval_addr; + logic inval_valid; + logic inval_ready; + // -------------- // Frontend // -------------- @@ -304,6 +329,7 @@ module cva6 import ariane_pkg::*; #( .priv_lvl_i ( priv_lvl ), .fs_i ( fs ), .frm_i ( frm_csr_id_issue_ex ), + .vs_i ( vs ), .irq_i ( irq_i ), .irq_ctrl_i ( irq_ctrl_csr_id ), .debug_mode_i ( debug_mode ), @@ -322,12 +348,22 @@ module cva6 import ariane_pkg::*; #( assign wbdata_ex_id = {x_result_ex_id, flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id}; assign ex_ex_ex_id = {x_exception_ex_id, flu_exception_ex_id, load_exception_ex_id, store_exception_ex_id, fpu_exception_ex_id}; assign wt_valid_ex_id = {x_valid_ex_id, flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id}; + end else if (ENABLE_ACCELERATOR) begin + assign trans_id_ex_id = {flu_trans_id_ex_id, load_trans_id_ex_id, store_trans_id_ex_id, fpu_trans_id_ex_id, acc_trans_id_ex_id}; + assign wbdata_ex_id = {flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id, acc_result_ex_id}; + assign ex_ex_ex_id = {flu_exception_ex_id, load_exception_ex_id, store_exception_ex_id, fpu_exception_ex_id, acc_exception_ex_id}; + assign wt_valid_ex_id = {flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id, acc_valid_ex_id}; end else begin assign trans_id_ex_id = {flu_trans_id_ex_id, load_trans_id_ex_id, store_trans_id_ex_id, fpu_trans_id_ex_id}; assign wbdata_ex_id = {flu_result_ex_id, load_result_ex_id, store_result_ex_id, fpu_result_ex_id}; assign ex_ex_ex_id = {flu_exception_ex_id, load_exception_ex_id, store_exception_ex_id, fpu_exception_ex_id}; assign wt_valid_ex_id = {flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id}; end + + if (CVXIF_PRESENT && ENABLE_ACCELERATOR) begin : gen_err_xif_and_acc + $error("X-interface and accelerator port cannot be enabled at the same time."); + end + // --------- // Issue // --------- @@ -342,6 +378,7 @@ module cva6 import ariane_pkg::*; #( .sb_full_o ( sb_full ), .flush_unissued_instr_i ( flush_unissued_instr_ctrl_id ), .flush_i ( flush_ctrl_id ), + .stall_i ( stall_acc_id ), // ID Stage .decoded_instr_i ( issue_entry_id_issue ), .decoded_instr_valid_i ( issue_entry_valid_id_issue ), @@ -377,6 +414,9 @@ module cva6 import ariane_pkg::*; #( .x_issue_valid_o ( x_issue_valid_id_ex ), .x_issue_ready_i ( x_issue_ready_ex_id ), .x_off_instr_o ( x_off_instr_id_ex ), + // Accelerator + .issue_instr_o ( issue_instr_id_acc ), + .issue_instr_hs_o ( issue_instr_hs_id_acc ), // Commit .resolved_branch_i ( resolved_branch ), .trans_id_i ( trans_id_ex_id ), @@ -478,8 +518,10 @@ module cva6 import ariane_pkg::*; #( .x_result_o ( x_result_ex_id ), .x_valid_o ( x_valid_ex_id ), .x_we_o ( x_we_ex_id ), - .cvxif_req_o ( cvxif_req_o ), - .cvxif_resp_i ( cvxif_resp_i ), + .cvxif_req_o ( cvxif_req ), + .cvxif_resp_i ( cvxif_resp ), + // Accelerator + .acc_valid_i ( acc_valid_acc_ex ), // Performance counters .itlb_miss_o ( itlb_miss_ex_perf ), .dtlb_miss_o ( dtlb_miss_ex_perf ), @@ -577,6 +619,7 @@ module cva6 import ariane_pkg::*; #( .csr_op_i ( csr_op_commit_csr ), .csr_write_fflags_i ( csr_write_fflags_commit_cs ), .dirty_fp_state_i ( dirty_fp_state ), + .dirty_v_state_i ( dirty_v_state ), .csr_addr_i ( csr_addr_ex_csr ), .csr_wdata_i ( csr_wdata_commit_csr ), .csr_rdata_o ( csr_rdata_csr_commit ), @@ -587,10 +630,13 @@ module cva6 import ariane_pkg::*; #( .set_debug_pc_o ( set_debug_pc ), .trap_vector_base_o ( trap_vector_base_commit_pcgen ), .priv_lvl_o ( priv_lvl ), + .acc_fflags_ex_i ( acc_resp_fflags ), + .acc_fflags_ex_valid_i ( acc_resp_fflags_valid ), .fs_o ( fs ), .fflags_o ( fflags_csr_commit ), .frm_o ( frm_csr_id_issue_ex ), .fprec_o ( fprec_csr_ex ), + .vs_o ( vs ), .irq_ctrl_o ( irq_ctrl_csr_id ), .ld_st_priv_lvl_o ( ld_st_priv_lvl_csr_ex ), .en_translation_o ( enable_translation_csr_ex ), @@ -606,6 +652,7 @@ module cva6 import ariane_pkg::*; #( .single_step_o ( single_step_csr_commit ), .dcache_en_o ( dcache_en_csr_nbdcache ), .icache_en_o ( icache_en_csr ), + .acc_cons_en_o ( acc_cons_en_csr ), .perf_addr_o ( addr_csr_perf ), .perf_data_o ( data_csr_perf ), .perf_data_i ( data_perf_csr ), @@ -675,6 +722,7 @@ module cva6 import ariane_pkg::*; #( .flush_dcache_ack_i ( dcache_flush_ack_cache_ctrl ), .halt_csr_i ( halt_csr_ctrl ), + .halt_acc_i ( halt_acc_ctrl ), .halt_o ( halt_ctrl ), // control ports .eret_i ( eret ), @@ -735,12 +783,15 @@ module cva6 import ariane_pkg::*; #( .wbuffer_not_ni_o ( dcache_commit_wbuffer_not_ni ), `ifdef PITON_ARIANE .l15_req_o ( l15_req_o ), - .l15_rtrn_i ( l15_rtrn_i ) + .l15_rtrn_i ( l15_rtrn_i ), `else // memory side .axi_req_o ( axi_req_o ), - .axi_resp_i ( axi_resp_i ) + .axi_resp_i ( axi_resp_i ), `endif + .inval_addr_i ( inval_addr ), + .inval_valid_i ( inval_valid ), + .inval_ready_o ( inval_ready ) ); end else begin @@ -789,8 +840,76 @@ module cva6 import ariane_pkg::*; #( .axi_resp_i ( axi_resp_i ) ); assign dcache_commit_wbuffer_not_ni = 1'b1; + assign inval_ready = 1'b1; end + // ---------------- + // Accelerator + // ---------------- + + if (ENABLE_ACCELERATOR) begin: gen_accelerator + acc_pkg::accelerator_req_t acc_req; + + acc_dispatcher i_acc_dispatcher ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_unissued_instr_i ( flush_unissued_instr_ctrl_id ), + .flush_ex_i ( flush_ctrl_ex ), + .acc_cons_en_i ( acc_cons_en_csr ), + .fcsr_frm_i ( frm_csr_id_issue_ex ), + .dirty_v_state_o ( dirty_v_state ), + .issue_instr_i ( issue_instr_id_acc ), + .issue_instr_hs_i ( issue_instr_hs_id_acc ), + .issue_stall_o ( stall_acc_id ), + .fu_data_i ( fu_data_id_ex ), + .commit_instr_i ( commit_instr_id_commit ), + .commit_st_barrier_i ( fence_i_commit_controller | fence_commit_controller ), + .acc_trans_id_o ( acc_trans_id_ex_id ), + .acc_result_o ( acc_result_ex_id ), + .acc_valid_o ( acc_valid_ex_id ), + .acc_exception_o ( acc_exception_ex_id ), + .acc_valid_ex_o ( acc_valid_acc_ex ), + .commit_ack_i ( commit_ack ), + .acc_no_st_pending_i ( no_st_pending_commit ), + .ctrl_halt_o ( halt_acc_ctrl ), + .acc_req_o ( acc_req ), + .acc_resp_i ( cvxif_resp_i ) + ); + + assign acc_resp_fflags = cvxif_resp_i.fflags; + assign acc_resp_fflags_valid = cvxif_resp_i.fflags_valid; + + // Pack invalidation interface into accelerator interface + always_comb begin : pack_inval + inval_valid = cvxif_resp_i.inval_valid; + inval_addr = cvxif_resp_i.inval_addr; + cvxif_req_o = acc_req; + cvxif_req_o.inval_ready = inval_ready; + end + + // Tie off cvxif + assign cvxif_resp = '0; + end : gen_accelerator else begin: gen_no_accelerator + assign acc_trans_id_ex_id = '0; + assign acc_result_ex_id = '0; + assign acc_valid_ex_id = '0; + assign acc_exception_ex_id = '0; + assign acc_resp_fflags = '0; + assign acc_resp_fflags_valid = '0; + assign stall_acc_id = '0; + assign dirty_v_state = '0; + assign acc_valid_acc_ex = '0; + assign halt_acc_ctrl = '0; + + // No invalidation interface + assign inval_valid = '0; + assign inval_addr = '0; + + // Feed through cvxif + assign cvxif_req_o = cvxif_req; + assign cvxif_resp = cvxif_resp_i; + end : gen_no_accelerator + // ------------------- // Parameter Check // ------------------- diff --git a/core/cva6_accel_first_pass_decoder_stub.sv b/core/cva6_accel_first_pass_decoder_stub.sv new file mode 100644 index 000000000..5dcebae87 --- /dev/null +++ b/core/cva6_accel_first_pass_decoder_stub.sv @@ -0,0 +1,29 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Author: Nils Wistoff + +// Module stub for the cva6_accel_first_pass_decoder. Replace this with your accelerator's +// first pass decoder. + +module cva6_accel_first_pass_decoder import ariane_pkg::*; ( + input logic [31:0] instruction_i, // instruction from IF + input riscv::xs_t fs_i, // floating point extension status + input riscv::xs_t vs_i, // vector extension status + output logic is_accel_o, // is an accelerator instruction + output scoreboard_entry_t instruction_o, // predecoded instruction + output logic illegal_instr_o, // is an illegal instruction + output logic is_control_flow_instr_o // is a control flow instruction +); + + assign is_accel_o = 1'b0; + assign instruction_o = '0; + assign illegal_instr_o = 1'b0; + assign is_control_flow_instr_o = 1'b0; + + $error("cva6_accel_first_pass_decoder: instantiated non-functional module stub.\ + Please replace this with your accelerator's first pass decoder \ + (or unset ENABLE_ACCELERATOR)."); + +endmodule : cva6_accel_first_pass_decoder diff --git a/core/decoder.sv b/core/decoder.sv index e989e2aad..9e2840c31 100644 --- a/core/decoder.sv +++ b/core/decoder.sv @@ -37,6 +37,7 @@ module decoder import ariane_pkg::*; #( input logic debug_mode_i, // we are in debug mode input riscv::xs_t fs_i, // floating point extension status input logic [2:0] frm_i, // floating-point dynamic rounding mode + input riscv::xs_t vs_i, // vector extension status input logic tvm_i, // trap virtual memory input logic tw_i, // timeout wait input logic tsr_i, // trap sret @@ -68,6 +69,34 @@ module decoder import ariane_pkg::*; #( riscv::xlen_t imm_uj_type; riscv::xlen_t imm_bi_type; + // --------------------------------------- + // Accelerator instructions' first-pass decoder + // --------------------------------------- + logic is_accel; + scoreboard_entry_t acc_instruction; + logic acc_illegal_instr; + logic acc_is_control_flow_instr; + + if (ENABLE_ACCELERATOR) begin: gen_accel_decoder + // This module is responsible for a light-weight decoding of accelerator instructions, + // identifying them, but also whether they read/write scalar registers. + // Accelerators are supposed to define this module. + cva6_accel_first_pass_decoder i_accel_decoder ( + .instruction_i(instruction_i), + .fs_i(fs_i), + .vs_i(vs_i), + .is_accel_o(is_accel), + .instruction_o(acc_instruction), + .illegal_instr_o(acc_illegal_instr), + .is_control_flow_instr_o(acc_is_control_flow_instr) + ); + end: gen_accel_decoder else begin + assign is_accel = 1'b0; + assign acc_instruction = '0; + assign acc_illegal_instr = 1'b1; // this should never propagate + assign acc_is_control_flow_instr = 1'b0; + end + always_comb begin : decoder imm_select = NOIMM; @@ -86,6 +115,7 @@ module decoder import ariane_pkg::*; #( instruction_o.is_compressed = is_compressed_i; instruction_o.use_zimm = 1'b0; instruction_o.bp = branch_predict_i; + instruction_o.vfp = 1'b0; ecall = 1'b0; ebreak = 1'b0; check_fprm = 1'b0; @@ -1153,6 +1183,21 @@ module decoder import ariane_pkg::*; #( imm_select = RS3; end end + + // Accelerator instructions. + // These can overwrite the previous decoding entirely. + if (ENABLE_ACCELERATOR) begin // only generate decoder if accelerators are enabled (static) + if (is_accel) begin + instruction_o.fu = acc_instruction.fu; + instruction_o.vfp = acc_instruction.vfp; + instruction_o.rs1 = acc_instruction.rs1; + instruction_o.rs2 = acc_instruction.rs2; + instruction_o.rd = acc_instruction.rd; + instruction_o.op = acc_instruction.op; + illegal_instr = acc_illegal_instr; + is_control_flow_instr_o = acc_is_control_flow_instr; + end + end end // -------------------------------- @@ -1199,6 +1244,13 @@ module decoder import ariane_pkg::*; #( instruction_o.use_imm = 1'b0; end endcase + + if (ENABLE_ACCELERATOR) begin + if (is_accel) begin + instruction_o.result = acc_instruction.result; + instruction_o.use_imm = acc_instruction.use_imm; + end + end end // --------------------- diff --git a/core/ex_stage.sv b/core/ex_stage.sv index 00569d996..5262aedc7 100644 --- a/core/ex_stage.sv +++ b/core/ex_stage.sv @@ -90,6 +90,7 @@ module ex_stage import ariane_pkg::*; #( output logic x_we_o, output cvxif_pkg::cvxif_req_t cvxif_req_o, input cvxif_pkg::cvxif_resp_t cvxif_resp_i, + input logic acc_valid_i, // Output is valid // Memory Management input logic enable_translation_i, input logic en_ld_st_translation_i, @@ -190,7 +191,7 @@ module ex_stage import ariane_pkg::*; #( .pc_i, .is_compressed_instr_i, // any functional unit is valid, check that there is no accidental mis-predict - .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i ) , + .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i || acc_valid_i ) , .branch_valid_i, .branch_comp_res_i ( alu_branch_res ), .branch_result_o ( branch_result ), diff --git a/core/id_stage.sv b/core/id_stage.sv index 398c6d3a6..7f2f1d4c1 100644 --- a/core/id_stage.sv +++ b/core/id_stage.sv @@ -34,6 +34,7 @@ module id_stage #( input riscv::priv_lvl_t priv_lvl_i, // current privilege level input riscv::xs_t fs_i, // floating point extension status input logic [2:0] frm_i, // floating-point dynamic rounding mode + input riscv::xs_t vs_i, // vector extension status input logic [1:0] irq_i, input ariane_pkg::irq_ctrl_t irq_ctrl_i, input logic debug_mode_i, // we are in debug mode @@ -93,6 +94,7 @@ module id_stage #( .debug_mode_i ( debug_mode_i ), .fs_i, .frm_i, + .vs_i, .tvm_i, .tw_i, .tsr_i, diff --git a/core/include/acc_pkg.sv b/core/include/acc_pkg.sv new file mode 100644 index 000000000..bcd3c70a6 --- /dev/null +++ b/core/include/acc_pkg.sv @@ -0,0 +1,47 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +// Authors: Matheus Cavalcante +// Nils Wistoff + +// Package defining the accelerator interface as used by Ara + CVA6 + +package acc_pkg; + + // ---------------------- + // Accelerator Interface + // ---------------------- + + typedef struct packed { + logic req_valid; + logic resp_ready; + riscv::instruction_t insn; + riscv::xlen_t rs1; + riscv::xlen_t rs2; + fpnew_pkg::roundmode_e frm; + logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id; + logic store_pending; + // Invalidation interface + logic acc_cons_en; + logic inval_ready; + } accelerator_req_t; + + typedef struct packed { + logic req_ready; + logic resp_valid; + riscv::xlen_t result; + logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id; + logic error; + // Metadata + logic store_pending; + logic store_complete; + logic load_complete; + logic [4:0] fflags; + logic fflags_valid; + // Invalidation interface + logic inval_valid; + logic [63:0] inval_addr; + } accelerator_resp_t; + +endpackage diff --git a/core/include/ariane_pkg.sv b/core/include/ariane_pkg.sv index 960f7cea2..a2f32a62c 100644 --- a/core/include/ariane_pkg.sv +++ b/core/include/ariane_pkg.sv @@ -173,9 +173,13 @@ package ariane_pkg; localparam bit RVD = (riscv::IS_XLEN64 ? 1:0) & riscv::FPU_EN; // Is D extension enabled for only 64 bit CPU `endif localparam bit RVA = cva6_config_pkg::CVA6ConfigAExtEn; // Is A extension enabled + localparam bit RVV = cva6_config_pkg::CVA6ConfigVExtEn; + + // Is the accelerator enabled? + localparam bit ENABLE_ACCELERATOR = RVV; // Currently only used by V extension (Ara) // Transprecision floating-point extensions configuration - localparam bit XF16 = cva6_config_pkg::CVA6ConfigF16En; // Is half-precision float extension (Xf16) enabled + localparam bit XF16 = cva6_config_pkg::CVA6ConfigF16En | RVV; // Is half-precision float extension (Xf16) enabled localparam bit XF16ALT = cva6_config_pkg::CVA6ConfigF16AltEn; // Is alternative half-precision float extension (Xf16alt) enabled localparam bit XF8 = cva6_config_pkg::CVA6ConfigF8En; // Is quarter-precision float extension (Xf8) enabled localparam bit XFVEC = cva6_config_pkg::CVA6ConfigFVecEn; // Is vectorial float extension (Xfvec) enabled @@ -223,6 +227,7 @@ package ariane_pkg; | (riscv::XLEN'(0 ) << 13) // N - User level interrupts supported | (riscv::XLEN'(1 ) << 18) // S - Supervisor mode implemented | (riscv::XLEN'(1 ) << 20) // U - User mode implemented + | (riscv::XLEN'(RVV) << 21) // V - Vector extension | (riscv::XLEN'(NSX) << 23) // X - Non-standard extensions present | ((riscv::XLEN == 64 ? 2 : 1) << riscv::XLEN-2); // MXL @@ -231,8 +236,8 @@ package ariane_pkg; localparam bit CVXIF_PRESENT = cva6_config_pkg::CVA6ConfigCvxifEn; - // when cvx interface is present, use an additional writeback port - localparam NR_WB_PORTS = CVXIF_PRESENT ? 5 : 4; + // when cvx interface or the accelerator port is present, use an additional writeback port + localparam NR_WB_PORTS = (CVXIF_PRESENT || ENABLE_ACCELERATOR) ? 5 : 4; // Read ports for general purpose register files localparam NR_RGPR_PORTS = 2; @@ -399,7 +404,8 @@ package ariane_pkg; CSR, // 6 FPU, // 7 FPU_VEC, // 8 - CVXIF // 9 + CVXIF, // 9 + ACCEL // 10 } fu_t; localparam EXC_OFF_RST = 8'h80; @@ -554,7 +560,9 @@ package ariane_pkg; // Shift with Add (Bitmanip) SH1ADD, SH2ADD, SH3ADD, // Bitmanip Logical with negate op (Bitmanip) - ANDN, ORN, XNOR + ANDN, ORN, XNOR, + // Accelerator operations + ACCEL_OP, ACCEL_OP_FS1, ACCEL_OP_FD, ACCEL_OP_LOAD, ACCEL_OP_STORE } fu_op; typedef struct packed { @@ -586,7 +594,8 @@ package ariane_pkg; FMV_F2X, // FPR-GPR Moves FCMP, // Comparisons FCLASS, // Classifications - [VFMIN:VFCPKCD_D] : return 1'b1; // Additional Vectorial FP ops + [VFMIN:VFCPKCD_D], // Additional Vectorial FP ops + ACCEL_OP_FS1 : return 1'b1; // Accelerator instructions default : return 1'b0; // all other ops endcase end else @@ -632,7 +641,8 @@ package ariane_pkg; FSGNJ, // Sign Injections FMV_X2F, // GPR-FPR Moves [VFMIN:VFSGNJX], // Vectorial MIN/MAX and SGNJ - [VFCPKAB_S:VFCPKCD_D] : return 1'b1; // Vectorial FP cast and pack ops + [VFCPKAB_S:VFCPKCD_D], // Vectorial FP cast and pack ops + ACCEL_OP_FD : return 1'b1; // Accelerator instructions default : return 1'b0; // all other ops endcase end else @@ -704,6 +714,7 @@ package ariane_pkg; logic [(riscv::XLEN/8)-1:0] lsu_rmask; // information needed by RVFI logic [(riscv::XLEN/8)-1:0] lsu_wmask; // information needed by RVFI riscv::xlen_t lsu_wdata; // information needed by RVFI + logic vfp; // is this a vector floating-point instruction? } scoreboard_entry_t; // --------------- diff --git a/core/include/cv32a60x_config_pkg.sv b/core/include/cv32a60x_config_pkg.sv index b23acbde0..071b472bf 100644 --- a/core/include/cv32a60x_config_pkg.sv +++ b/core/include/cv32a60x_config_pkg.sv @@ -27,6 +27,7 @@ package cva6_config_pkg; localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigAExtEn = 0; localparam CVA6ConfigBExtEn = 0; + localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiAddrWidth = 64; diff --git a/core/include/cv32a6_embedded_config_pkg.sv b/core/include/cv32a6_embedded_config_pkg.sv index 8d5362f62..79da7ffc9 100644 --- a/core/include/cv32a6_embedded_config_pkg.sv +++ b/core/include/cv32a6_embedded_config_pkg.sv @@ -26,6 +26,7 @@ package cva6_config_pkg; localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigAExtEn = 0; localparam CVA6ConfigBExtEn = 1; + localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiAddrWidth = 64; diff --git a/core/include/cv32a6_ima_sv32_fpga_config_pkg.sv b/core/include/cv32a6_ima_sv32_fpga_config_pkg.sv index 0b23e8671..1ca6badd9 100644 --- a/core/include/cv32a6_ima_sv32_fpga_config_pkg.sv +++ b/core/include/cv32a6_ima_sv32_fpga_config_pkg.sv @@ -27,6 +27,7 @@ package cva6_config_pkg; localparam CVA6ConfigCExtEn = 0; localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigBExtEn = 0; + localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiAddrWidth = 64; diff --git a/core/include/cv32a6_imac_sv0_config_pkg.sv b/core/include/cv32a6_imac_sv0_config_pkg.sv index 35d3caeea..4a8d434b7 100644 --- a/core/include/cv32a6_imac_sv0_config_pkg.sv +++ b/core/include/cv32a6_imac_sv0_config_pkg.sv @@ -27,6 +27,7 @@ package cva6_config_pkg; localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigBExtEn = 0; + localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiAddrWidth = 64; diff --git a/core/include/cv32a6_imac_sv32_config_pkg.sv b/core/include/cv32a6_imac_sv32_config_pkg.sv index fdc7dd7b1..c9ad04791 100644 --- a/core/include/cv32a6_imac_sv32_config_pkg.sv +++ b/core/include/cv32a6_imac_sv32_config_pkg.sv @@ -27,6 +27,7 @@ package cva6_config_pkg; localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigBExtEn = 0; + localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiAddrWidth = 64; diff --git a/core/include/cv32a6_imafc_sv32_config_pkg.sv b/core/include/cv32a6_imafc_sv32_config_pkg.sv index edaf3e16b..e1eba4738 100644 --- a/core/include/cv32a6_imafc_sv32_config_pkg.sv +++ b/core/include/cv32a6_imafc_sv32_config_pkg.sv @@ -27,6 +27,7 @@ package cva6_config_pkg; localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigBExtEn = 0; + localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiAddrWidth = 64; diff --git a/core/include/cv64a6_imafdc_sv39_config_pkg.sv b/core/include/cv64a6_imafdc_sv39_config_pkg.sv index 6f979615c..c08112228 100644 --- a/core/include/cv64a6_imafdc_sv39_config_pkg.sv +++ b/core/include/cv64a6_imafdc_sv39_config_pkg.sv @@ -27,6 +27,7 @@ package cva6_config_pkg; localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigBExtEn = 1; + localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiAddrWidth = 64; diff --git a/core/include/cv64a6_imafdc_sv39_openpiton_config_pkg.sv b/core/include/cv64a6_imafdc_sv39_openpiton_config_pkg.sv index 0fc5df85f..cf0cc1b22 100644 --- a/core/include/cv64a6_imafdc_sv39_openpiton_config_pkg.sv +++ b/core/include/cv64a6_imafdc_sv39_openpiton_config_pkg.sv @@ -27,6 +27,7 @@ package cva6_config_pkg; localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigBExtEn = 0; + localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiAddrWidth = 64; diff --git a/core/include/cv64a6_imafdcv_sv39_config_pkg.sv b/core/include/cv64a6_imafdcv_sv39_config_pkg.sv new file mode 100644 index 000000000..82e4675e6 --- /dev/null +++ b/core/include/cv64a6_imafdcv_sv39_config_pkg.sv @@ -0,0 +1,86 @@ +// Copyright 2021 Thales DIS design services SAS +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// You may obtain a copy of the License at https://solderpad.org/licenses/ +// +// Original Author: Jean-Roch COULON - Thales + + +package cva6_config_pkg; + + typedef enum logic { + WB = 0, + WT = 1 + } cache_type_t ; + + localparam CVA6ConfigXlen = 64; + + localparam CVA6ConfigFpuEn = 1; + localparam CVA6ConfigF16En = 0; + localparam CVA6ConfigF16AltEn = 0; + localparam CVA6ConfigF8En = 0; + localparam CVA6ConfigFVecEn = 0; + + localparam CVA6ConfigCvxifEn = 0; + localparam CVA6ConfigCExtEn = 1; + localparam CVA6ConfigAExtEn = 1; + localparam CVA6ConfigBExtEn = 0; + localparam CVA6ConfigVExtEn = 1; + + localparam CVA6ConfigAxiIdWidth = 4; + localparam CVA6ConfigAxiAddrWidth = 64; + localparam CVA6ConfigAxiDataWidth = 64; + localparam CVA6ConfigFetchUserEn = 0; + localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; + localparam CVA6ConfigDataUserEn = 0; + localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; + + localparam CVA6ConfigRenameEn = 0; + + localparam CVA6ConfigIcacheByteSize = 16384; + localparam CVA6ConfigIcacheSetAssoc = 4; + localparam CVA6ConfigIcacheLineWidth = 128; + localparam CVA6ConfigDcacheByteSize = 16384; + localparam CVA6ConfigDcacheSetAssoc = 4; + localparam CVA6ConfigDcacheLineWidth = 128; + + localparam CVA6ConfigDcacheIdWidth = 1; + localparam CVA6ConfigMemTidWidth = 2; + + localparam CVA6ConfigWtDcacheWbufDepth = 8; + + localparam CVA6ConfigNrCommitPorts = 2; + localparam CVA6ConfigNrScoreboardEntries = 8; + + localparam CVA6ConfigFPGAEn = 0; + + localparam CVA6ConfigNrLoadPipeRegs = 1; + localparam CVA6ConfigNrStorePipeRegs = 0; + + localparam CVA6ConfigInstrTlbEntries = 16; + localparam CVA6ConfigDataTlbEntries = 16; + + localparam CVA6ConfigRASDepth = 2; + localparam CVA6ConfigBTBEntries = 32; + localparam CVA6ConfigBHTEntries = 128; + + localparam CVA6ConfigNrPMPEntries = 8; + + localparam CVA6ConfigPerfCounterEn = 1; + + localparam CVA6ConfigDcacheType = WT; + + localparam CVA6ConfigMmuPresent = 1; + + `define RVFI_PORT + + // Do not modify + `ifdef RVFI_PORT + localparam CVA6ConfigRvfiTrace = 1; + `else + localparam CVA6ConfigRvfiTrace = 0; + `endif + +endpackage diff --git a/core/include/riscv_pkg.sv b/core/include/riscv_pkg.sv index 9f896e8d1..f25849b67 100644 --- a/core/include/riscv_pkg.sv +++ b/core/include/riscv_pkg.sv @@ -245,7 +245,7 @@ package riscv; localparam OpcodeNmsub = 7'b10_010_11; localparam OpcodeNmadd = 7'b10_011_11; localparam OpcodeOpFp = 7'b10_100_11; - localparam OpcodeRsrvd1 = 7'b10_101_11; + localparam OpcodeVec = 7'b10_101_11; localparam OpcodeCustom2 = 7'b10_110_11; // Quadrant 3 localparam OpcodeBranch = 7'b11_000_11; @@ -370,6 +370,14 @@ package riscv; CSR_FRM = 12'h002, CSR_FCSR = 12'h003, CSR_FTRAN = 12'h800, + // Vector CSRs + CSR_VSTART = 12'h008, + CSR_VXSAT = 12'h009, + CSR_VXRM = 12'h00A, + CSR_VCSR = 12'h00F, + CSR_VL = 12'hC20, + CSR_VTYPE = 12'hC21, + CSR_VLENB = 12'hC22, // Supervisor Mode CSRs CSR_SSTATUS = 12'h100, CSR_SIE = 12'h104, @@ -514,6 +522,8 @@ package riscv; // Cache Control (platform specifc) CSR_DCACHE = 12'h7C1, CSR_ICACHE = 12'h7C0, + // Accelerator memory consistency (platform specific) + CSR_ACC_CONS = 12'h7C2, // Triggers CSR_TSELECT = 12'h7A0, CSR_TDATA1 = 12'h7A1, diff --git a/core/issue_read_operands.sv b/core/issue_read_operands.sv index 2674e227f..408f3b8d7 100644 --- a/core/issue_read_operands.sv +++ b/core/issue_read_operands.sv @@ -22,6 +22,8 @@ module issue_read_operands import ariane_pkg::*; #( input logic rst_ni, // Asynchronous reset active low // flush input logic flush_i, + // stall + input logic stall_i, // coming from rename input scoreboard_entry_t issue_instr_i, input logic issue_instr_valid_i, @@ -161,7 +163,7 @@ module issue_read_operands import ariane_pkg::*; #( // check that all operands are available, otherwise stall // forward corresponding register always_comb begin : operands_available - stall = 1'b0; + stall = stall_i; // operand forwarding signals forward_rs1 = 1'b0; forward_rs2 = 1'b0; @@ -248,9 +250,9 @@ module issue_read_operands import ariane_pkg::*; #( // zero extend operand a operand_a_n = {{riscv::XLEN-5{1'b0}}, issue_instr_i.rs1[4:0]}; end - // or is it an immediate (including PC), this is not the case for a store and control flow instructions + // or is it an immediate (including PC), this is not the case for a store, control flow, and accelerator instructions // also make sure operand B is not already used as an FP operand - if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && !is_rs2_fpr(issue_instr_i.op)) begin + if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && (issue_instr_i.fu != ACCEL) && !is_rs2_fpr(issue_instr_i.op)) begin operand_b_n = issue_instr_i.result; end end diff --git a/core/issue_stage.sv b/core/issue_stage.sv index d4bd71019..81ef03dfd 100644 --- a/core/issue_stage.sv +++ b/core/issue_stage.sv @@ -26,6 +26,7 @@ module issue_stage import ariane_pkg::*; #( output logic sb_full_o, input logic flush_unissued_instr_i, input logic flush_i, + input logic stall_i, // Stall issue stage // from ISSUE input scoreboard_entry_t decoded_instr_i, input logic decoded_instr_valid_i, @@ -63,6 +64,10 @@ module issue_stage import ariane_pkg::*; #( input logic x_issue_ready_i, output logic [31:0] x_off_instr_o, + // to accelerator dispatcher + output scoreboard_entry_t issue_instr_o, + output logic issue_instr_hs_o, + // write back port input logic [NR_WB_PORTS-1:0][TRANS_ID_BITS-1:0] trans_id_i, input bp_resolve_t resolved_branch_i, @@ -120,6 +125,9 @@ module issue_stage import ariane_pkg::*; #( assign rs1_forwarding_o = rs1_forwarding_xlen[riscv::VLEN-1:0]; assign rs2_forwarding_o = rs2_forwarding_xlen[riscv::VLEN-1:0]; + assign issue_instr_o = issue_instr_sb_iro; + assign issue_instr_hs_o = issue_instr_valid_sb_iro & issue_ack_iro_sb; + // --------------------------------------------------------- // 1. Re-name // --------------------------------------------------------- diff --git a/core/serdiv.sv b/core/serdiv.sv index c0b8b27d7..dcfe47a08 100644 --- a/core/serdiv.sv +++ b/core/serdiv.sv @@ -17,7 +17,8 @@ module serdiv import ariane_pkg::*; #( parameter ariane_pkg::cva6_cfg_t cva6_cfg = ariane_pkg::cva6_cfg_empty, - parameter WIDTH = 64 + parameter WIDTH = 64, + parameter STABLE_HANDSHAKE = 0 // Guarantee a stable in_rdy_o during the input handshake. Keep it at 0 in CVA6 ) ( input logic clk_i, input logic rst_ni, @@ -162,7 +163,9 @@ module serdiv import ariane_pkg::*; #( in_rdy_o = 1'b1; if (in_vld_i) begin - in_rdy_o = 1'b0;// there is a cycle delay until the valid signal is asserted by the id stage + // CVA6: there is a cycle delay until the valid signal is asserted by the id stage + // Ara: we need a stable handshake + in_rdy_o = (STABLE_HANDSHAKE) ? 1'b1 : 1'b0; a_reg_en = 1'b1; b_reg_en = 1'b1; load_en = 1'b1; diff --git a/src_files.yml b/src_files.yml index 1afc440ad..84173c67c 100644 --- a/src_files.yml +++ b/src_files.yml @@ -51,6 +51,7 @@ ariane: src/store_unit.sv, src/mmu_sv39/tlb.sv, src/mmu_sv32/cva6_tlb_sv32.sv, + src/acc_dispatcher.sv, src/debug/dm_csrs.sv, src/debug/dm_mem.sv, src/debug/dm_top.sv,