diff --git a/Bender.yml b/Bender.yml index da96ec916..1294cc11e 100644 --- a/Bender.yml +++ b/Bender.yml @@ -54,9 +54,12 @@ sources: - src/compressed_decoder.sv - src/axi_shim.sv - src/ex_stage.sv - - src/mmu.sv - - src/ptw.sv - - src/mult.sv + - src/mmu_sv39/mmu.sv + - src/mmu_sv39/ptw.sv + - src/mmu_sv39/mult.sv + - src/mmu_sv32/cva6_mmu_sv32.sv + - src/mmu_sv32/cva6_ptw_sv32.sv + - src/mmu_sv32/cva6_mult_sv32.sv - src/load_unit.sv - src/issue_read_operands.sv - src/pmp/src/pmp_entry.sv diff --git a/Flist.ariane b/Flist.ariane index 940999ae0..e1249fc86 100644 --- a/Flist.ariane +++ b/Flist.ariane @@ -102,7 +102,8 @@ src/issue_read_operands.sv src/issue_stage.sv src/load_unit.sv src/load_store_unit.sv -src/mmu.sv +src/mmu_sv39/mmu.sv +src/mmu_sv32/cva6_mmu_sv32.sv src/mult.sv src/multiplier.sv src/serdiv.sv diff --git a/Makefile b/Makefile index a91a450c8..4328a0a83 100644 --- a/Makefile +++ b/Makefile @@ -153,6 +153,8 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ $(wildcard src/axi_riscv_atomics/src/*.sv) \ $(wildcard src/axi_mem_if/src/*.sv) \ $(wildcard src/pmp/src/*.sv) \ + $(wildcard src/mmu_sv32/*.sv) \ + $(wildcard src/mmu_sv39/*.sv) \ src/rv_plic/rtl/rv_plic_target.sv \ src/rv_plic/rtl/rv_plic_gateway.sv \ src/rv_plic/rtl/plic_regmap.sv \ diff --git a/ariane.core b/ariane.core index 44c9578e6..4d3524785 100644 --- a/ariane.core +++ b/ariane.core @@ -33,17 +33,20 @@ filesets: - src/lsu_arbiter.sv - src/lsu.sv - src/miss_handler.sv - - src/mmu.sv + - src/mmu_sv39/mmu.sv + - src/mmu_sv32/cva6_mmu_sv32.sv - src/mult.sv - src/nbdcache.sv - src/pcgen_stage.sv - src/perf_counters.sv - - src/ptw.sv + - src/mmu_sv39/ptw.sv + - src/mmu_sv32/cva6_ptw_sv32.sv - src/regfile_ff.sv - src/scoreboard.sv - src/store_buffer.sv - src/store_unit.sv - - src/tlb.sv + - src/mmu_sv39/tlb.sv + - src/mmu_sv32/cva6_tlb_sv32.sv file_type : systemVerilogSource depend : - pulp-platform.org::axi_mem_if diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 467ebd552..a95dbf71b 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -611,6 +611,11 @@ package ariane_pkg; // we want jump accordingly e.g.: +4, +2 } scoreboard_entry_t; + // --------------- + // MMU instanciation + // --------------- + localparam bit MMU_PRESENT = 1'b1; // MMU is present + // -------------------- // Atomics // -------------------- @@ -635,7 +640,7 @@ package ariane_pkg; logic valid; // valid flag logic is_2M; // logic is_1G; // - logic [26:0] vpn; + logic [27-1:0] vpn; // VPN (39bits) = 27bits + 12bits offset logic [ASID_WIDTH-1:0] asid; riscv::pte_t content; } tlb_update_t; @@ -644,6 +649,14 @@ package ariane_pkg; // (e.g. 27*4K == 39bit address space). localparam PPN4K_WIDTH = 38; + typedef struct packed { + logic valid; // valid flag + logic is_4M; // + logic [20-1:0] vpn; //VPN (32bits) = 20bits + 12bits offset + logic [9-1:0] asid; //ASID length = 9 for Sv32 mmu + riscv::pte_sv32_t content; + } tlb_update_sv32_t; + typedef enum logic [1:0] { FE_NONE, FE_INSTR_ACCESS_FAULT, diff --git a/include/riscv_pkg.sv b/include/riscv_pkg.sv index 28e7ed001..01d23fe4a 100644 --- a/include/riscv_pkg.sv +++ b/include/riscv_pkg.sv @@ -33,7 +33,7 @@ package riscv; // Warning: When using STD_CACHE, configuration must be PLEN=56 and VLEN=64 // Warning: VLEN must be superior or equal to PLEN localparam VLEN = (XLEN == 32) ? 32 : 64; // virtual address length - localparam PLEN = (XLEN == 32) ? 32 : 56; // physical address length + localparam PLEN = (XLEN == 32) ? 34 : 56; // physical address length localparam IS_XLEN32 = (XLEN == 32) ? 1'b1 : 1'b0; localparam IS_XLEN64 = (XLEN == 32) ? 1'b0 : 1'b1; @@ -267,10 +267,10 @@ package riscv; // ---------------------- // Virtual Memory // ---------------------- - // memory management, pte + // memory management, pte for sv39 typedef struct packed { logic [9:0] reserved; - logic [PLEN-12-1:0] ppn; + logic [44-1:0] ppn; // PPN length for logic [1:0] rsw; logic d; logic a; @@ -282,6 +282,20 @@ package riscv; logic v; } pte_t; + // memory management, pte for sv32 + typedef struct packed { + logic [22-1:0] ppn; // PPN length for + logic [1:0] rsw; + logic d; + logic a; + logic g; + logic u; + logic x; + logic w; + logic r; + logic v; + } pte_sv32_t; + // ---------------------- // Exception Cause Codes // ---------------------- @@ -637,10 +651,19 @@ package riscv; return {csr, 5'h0, 3'h2, dest, 7'h73}; endfunction + function automatic logic [31:0] branch(logic [4:0] src2, logic [4:0] src1, logic [2:0] funct3, logic [11:0] offset); + // OpCode Branch + return {offset[11], offset[9:4], src2, src1, funct3, offset[3:0], offset[10], 7'b11_000_11}; + endfunction + function automatic logic [31:0] ebreak (); return 32'h00100073; endfunction + function automatic logic [31:0] wfi (); + return 32'h10500073; + endfunction + function automatic logic [31:0] nop (); return 32'h00000013; endfunction diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index d7e2ae7be..dbeca1e5e 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -1072,6 +1072,7 @@ module csr_regfile import ariane_pkg::*; #( `endif dcsr_q <= '0; dcsr_q.prv <= riscv::PRIV_LVL_M; + dcsr_q.xdebugver <= 4'h4; dpc_q <= '0; dscratch0_q <= {riscv::XLEN{1'b0}}; dscratch1_q <= {riscv::XLEN{1'b0}}; diff --git a/src/load_store_unit.sv b/src/load_store_unit.sv index 3790c89f3..3814314fb 100644 --- a/src/load_store_unit.sv +++ b/src/load_store_unit.sv @@ -130,34 +130,98 @@ module load_store_unit import ariane_pkg::*; #( // ------------------- // MMU e.g.: TLBs/PTW // ------------------- - mmu #( - .INSTR_TLB_ENTRIES ( 16 ), - .DATA_TLB_ENTRIES ( 16 ), - .ASID_WIDTH ( ASID_WIDTH ), - .ArianeCfg ( ArianeCfg ) - ) i_mmu ( + if (MMU_PRESENT && (riscv::XLEN == 64)) begin : gen_mmu_sv39 + mmu #( + .INSTR_TLB_ENTRIES ( 16 ), + .DATA_TLB_ENTRIES ( 16 ), + .ASID_WIDTH ( ASID_WIDTH ), + .ArianeCfg ( ArianeCfg ) + ) i_cva6_mmu ( // misaligned bypass - .misaligned_ex_i ( misaligned_exception ), - .lsu_is_store_i ( st_translation_req ), - .lsu_req_i ( translation_req ), - .lsu_vaddr_i ( mmu_vaddr ), - .lsu_valid_o ( translation_valid ), - .lsu_paddr_o ( mmu_paddr ), - .lsu_exception_o ( mmu_exception ), - .lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request - .lsu_dtlb_ppn_o ( dtlb_ppn ), // send in the same cycle as the request - // connecting PTW to D$ IF - .req_port_i ( dcache_req_ports_i [0] ), - .req_port_o ( dcache_req_ports_o [0] ), - // icache address translation requests - .icache_areq_i ( icache_areq_i ), - .asid_to_be_flushed_i, - .vaddr_to_be_flushed_i, - .icache_areq_o ( icache_areq_o ), - .pmpcfg_i, - .pmpaddr_i, - .* - ); + .misaligned_ex_i ( misaligned_exception ), + .lsu_is_store_i ( st_translation_req ), + .lsu_req_i ( translation_req ), + .lsu_vaddr_i ( mmu_vaddr ), + .lsu_valid_o ( translation_valid ), + .lsu_paddr_o ( mmu_paddr ), + .lsu_exception_o ( mmu_exception ), + .lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request + .lsu_dtlb_ppn_o ( dtlb_ppn ), // send in the same cycle as the request + // connecting PTW to D$ IF + .req_port_i ( dcache_req_ports_i [0] ), + .req_port_o ( dcache_req_ports_o [0] ), + // icache address translation requests + .icache_areq_i ( icache_areq_i ), + .asid_to_be_flushed_i, + .vaddr_to_be_flushed_i, + .icache_areq_o ( icache_areq_o ), + .pmpcfg_i, + .pmpaddr_i, + .* + ); + end else if (MMU_PRESENT && (riscv::XLEN == 32)) begin : gen_mmu_sv32 + cva6_mmu_sv32 #( + .INSTR_TLB_ENTRIES ( 16 ), + .DATA_TLB_ENTRIES ( 16 ), + .ASID_WIDTH ( ASID_WIDTH ), + .ArianeCfg ( ArianeCfg ) + ) i_cva6_mmu ( + // misaligned bypass + .misaligned_ex_i ( misaligned_exception ), + .lsu_is_store_i ( st_translation_req ), + .lsu_req_i ( translation_req ), + .lsu_vaddr_i ( mmu_vaddr ), + .lsu_valid_o ( translation_valid ), + .lsu_paddr_o ( mmu_paddr ), + .lsu_exception_o ( mmu_exception ), + .lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request + .lsu_dtlb_ppn_o ( dtlb_ppn ), // send in the same cycle as the request + // connecting PTW to D$ IF + .req_port_i ( dcache_req_ports_i [0] ), + .req_port_o ( dcache_req_ports_o [0] ), + // icache address translation requests + .icache_areq_i ( icache_areq_i ), + .asid_to_be_flushed_i, + .vaddr_to_be_flushed_i, + .icache_areq_o ( icache_areq_o ), + .pmpcfg_i, + .pmpaddr_i, + .* + ); + end else begin : gen_no_mmu + assign icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + assign icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; + assign icache_areq_o.fetch_exception = '0; + + assign dcache_req_ports_o[0].address_index = '0; + assign dcache_req_ports_o[0].address_tag = '0; + assign dcache_req_ports_o[0].data_wdata = 64'b0; + assign dcache_req_ports_o[0].data_req = 1'b0; + assign dcache_req_ports_o[0].data_be = 8'hFF; + assign dcache_req_ports_o[0].data_size = 2'b11; + assign dcache_req_ports_o[0].data_we = 1'b0; + assign dcache_req_ports_o[0].kill_req = '0; + assign dcache_req_ports_o[0].tag_valid = 1'b0; + + assign itlb_miss_o = 1'b0; + assign dtlb_miss_o = 1'b0; + assign dtlb_ppn = mmu_vaddr[riscv::PLEN-1:12]; + assign dtlb_hit = 1'b1; + + assign mmu_exception = '0; + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + mmu_paddr <= '0; + translation_valid <= '0; + end else begin + mmu_paddr <= mmu_vaddr[riscv::PLEN-1:0]; + translation_valid <= translation_req; + end + end + end + + logic store_buffer_empty; // ------------------ // Store Unit diff --git a/src/mmu_sv32/cva6_mmu_sv32.sv b/src/mmu_sv32/cva6_mmu_sv32.sv new file mode 100644 index 000000000..2c4293ac3 --- /dev/null +++ b/src/mmu_sv32/cva6_mmu_sv32.sv @@ -0,0 +1,450 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Sebastien Jacq Thales Research & Technology +// Date: 17/07/2021 +// +// Additional contributions by: +// Sebastien Jacq - sjthales on github.com +// +// Description: Memory Management Unit for CV32A6, contains TLB and +// address translation unit. Sv32 as defined in RISC-V +// privilege specification 1.11-WIP. +// This module is an adaptation of the MMU Sv39 developed +// by Florian Zaruba to the Sv32 standard. +// +// =========================================================================== // +// Revisions : +// Date Version Author Description +// 2020-02-17 0.1 S.Jacq MMU Sv32 for CV32A6 +// =========================================================================== // + + +module cva6_mmu_sv32 import ariane_pkg::*; #( + parameter int unsigned INSTR_TLB_ENTRIES = 4, + parameter int unsigned DATA_TLB_ENTRIES = 4, + parameter int unsigned ASID_WIDTH = 1, + parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig +) ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic enable_translation_i, + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + // IF interface + input icache_areq_o_t icache_areq_i, + output icache_areq_i_t icache_areq_o, + // LSU interface + // this is a more minimalistic interface because the actual addressing logic is handled + // in the LSU as we distinguish load and stores, what we do here is simple address translation + input exception_t misaligned_ex_i, + input logic lsu_req_i, // request address translation + input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in + input logic lsu_is_store_i, // the translation is requested by a store + // if we need to walk the page table we can't grant in the same cycle + // Cycle 0 + output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB + output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) + // Cycle 1 + output logic lsu_valid_o, // translation is valid + output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address + output exception_t lsu_exception_o, // address translation threw an exception + // General control signals + input riscv::priv_lvl_t priv_lvl_i, + input riscv::priv_lvl_t ld_st_priv_lvl_i, + input logic sum_i, + input logic mxr_i, + // input logic flag_mprv_i, + input logic [riscv::PPNW-1:0] satp_ppn_i, + input logic [ASID_WIDTH-1:0] asid_i, + input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic flush_tlb_i, + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + // PMP + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i +); + + logic iaccess_err; // insufficient privilege to access this instruction page + logic daccess_err; // insufficient privilege to access this data page + logic ptw_active; // PTW is currently walking a page table + logic walking_instr; // PTW is walking because of an ITLB miss + logic ptw_error; // PTW threw an exception + logic ptw_access_exception; // PTW threw an access exception (PMPs) + logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr + + logic [riscv::VLEN-1:0] update_vaddr; + tlb_update_sv32_t update_ptw_itlb, update_ptw_dtlb; + + logic itlb_lu_access; + riscv::pte_sv32_t itlb_content; + logic itlb_is_4M; + logic itlb_lu_hit; + + logic dtlb_lu_access; + riscv::pte_sv32_t dtlb_content; + logic dtlb_is_4M; + logic dtlb_lu_hit; + + + // Assignments + assign itlb_lu_access = icache_areq_i.fetch_req; + assign dtlb_lu_access = lsu_req_i; + + + cva6_tlb_sv32 #( + .TLB_ENTRIES ( INSTR_TLB_ENTRIES ), + .ASID_WIDTH ( ASID_WIDTH ) + ) i_itlb ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_tlb_i ), + + .update_i ( update_ptw_itlb ), + + .lu_access_i ( itlb_lu_access ), + .lu_asid_i ( asid_i ), + .asid_to_be_flushed_i ( asid_to_be_flushed_i ), + .vaddr_to_be_flushed_i ( vaddr_to_be_flushed_i ), + .lu_vaddr_i ( icache_areq_i.fetch_vaddr ), + .lu_content_o ( itlb_content ), + + .lu_is_4M_o ( itlb_is_4M ), + .lu_hit_o ( itlb_lu_hit ) + ); + + cva6_tlb_sv32 #( + .TLB_ENTRIES ( DATA_TLB_ENTRIES ), + .ASID_WIDTH ( ASID_WIDTH ) + ) i_dtlb ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( flush_tlb_i ), + + .update_i ( update_ptw_dtlb ), + + .lu_access_i ( dtlb_lu_access ), + .lu_asid_i ( asid_i ), + .asid_to_be_flushed_i ( asid_to_be_flushed_i ), + .vaddr_to_be_flushed_i ( vaddr_to_be_flushed_i ), + .lu_vaddr_i ( lsu_vaddr_i ), + .lu_content_o ( dtlb_content ), + + .lu_is_4M_o ( dtlb_is_4M ), + .lu_hit_o ( dtlb_lu_hit ) + ); + + + cva6_ptw_sv32 #( + .ASID_WIDTH ( ASID_WIDTH ), + .ArianeCfg ( ArianeCfg ) + ) i_ptw ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .ptw_active_o ( ptw_active ), + .walking_instr_o ( walking_instr ), + .ptw_error_o ( ptw_error ), + .ptw_access_exception_o ( ptw_access_exception ), + .enable_translation_i ( enable_translation_i ), + + .update_vaddr_o ( update_vaddr ), + .itlb_update_o ( update_ptw_itlb ), + .dtlb_update_o ( update_ptw_dtlb ), + + .itlb_access_i ( itlb_lu_access ), + .itlb_hit_i ( itlb_lu_hit ), + .itlb_vaddr_i ( icache_areq_i.fetch_vaddr ), + + .dtlb_access_i ( dtlb_lu_access ), + .dtlb_hit_i ( dtlb_lu_hit ), + .dtlb_vaddr_i ( lsu_vaddr_i ), + + .req_port_i ( req_port_i ), + .req_port_o ( req_port_o ), + .pmpcfg_i, + .pmpaddr_i, + .bad_paddr_o ( ptw_bad_paddr ), + .* + ); + + // ila_1 i_ila_1 ( + // .clk(clk_i), // input wire clk + // .probe0({req_port_o.address_tag, req_port_o.address_index}), + // .probe1(req_port_o.data_req), // input wire [63:0] probe1 + // .probe2(req_port_i.data_gnt), // input wire [0:0] probe2 + // .probe3(req_port_i.data_rdata), // input wire [0:0] probe3 + // .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4 + // .probe5(ptw_error), // input wire [1:0] probe5 + // .probe6(update_vaddr), // input wire [0:0] probe6 + // .probe7(update_ptw_itlb.valid), // input wire [0:0] probe7 + // .probe8(update_ptw_dtlb.valid), // input wire [0:0] probe8 + // .probe9(dtlb_lu_access), // input wire [0:0] probe9 + // .probe10(lsu_vaddr_i), // input wire [0:0] probe10 + // .probe11(dtlb_lu_hit), // input wire [0:0] probe11 + // .probe12(itlb_lu_access), // input wire [0:0] probe12 + // .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13 + // .probe14(itlb_lu_hit) // input wire [0:0] probe13 + // ); + + //----------------------- + // Instruction Interface + //----------------------- + logic match_any_execute_region; + logic pmp_instr_allow; + + // The instruction interface is a simple request response interface + always_comb begin : instr_interface + // MMU disabled: just pass through + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + icache_areq_o.fetch_paddr = {{riscv::PLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr};// play through in case we disabled address translation + // two potential exception sources: + // 1. HPTW threw an exception -> signal with a page fault exception + // 2. We got an access error because of insufficient permissions -> throw an access exception + icache_areq_o.fetch_exception = '0; + // Check whether we are allowed to access this memory region from a fetch perspective + iaccess_err = icache_areq_i.fetch_req && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u) + || ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u)); + + // MMU enabled: address from TLB, request delayed until hit. Error when TLB + // hit and no access right or TLB hit and translated address not valid (e.g. + // AXI decode error), or when PTW performs walk due to ITLB miss and raises + // an error. + if (enable_translation_i) begin + // we work with SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal + if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin + icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1}; + end + + icache_areq_o.fetch_valid = 1'b0; + + // 4K page + icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]}; + // Mega page + if (itlb_is_4M) begin + icache_areq_o.fetch_paddr[21:12] = icache_areq_i.fetch_vaddr[21:12]; + end + + + // --------- + // ITLB Hit + // -------- + // if we hit the ITLB output the request signal immediately + if (itlb_lu_hit) begin + icache_areq_o.fetch_valid = icache_areq_i.fetch_req; + // we got an access error + if (iaccess_err) begin + // throw a page fault + icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1};//to check on wave --> not connected + end else if (!pmp_instr_allow) begin + icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, icache_areq_i.fetch_vaddr, 1'b1};//to check on wave --> not connected + end + end else + // --------- + // ITLB Miss + // --------- + // watch out for exceptions happening during walking the page table + if (ptw_active && walking_instr) begin + icache_areq_o.fetch_valid = ptw_error | ptw_access_exception; + if (ptw_error) icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, update_vaddr}, 1'b1};//to check on wave + // TODO(moschn,zarubaf): What should the value of tval be in this case? + else icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1};//to check on wave --> not connected + end + end + // if it didn't match any execute region throw an `Instruction Access Fault` + // or: if we are not translating, check PMPs immediately on the paddr + if (!match_any_execute_region || (!enable_translation_i && !pmp_instr_allow)) begin + icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, icache_areq_o.fetch_paddr[riscv::PLEN-1:2], 1'b1};//to check on wave --> not connected + end + end + + // check for execute flag on memory + assign match_any_execute_region = ariane_pkg::is_inside_execute_regions(ArianeCfg, {{64-riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}); + + // Instruction fetch + pmp #( + .PLEN ( riscv::PLEN ), + .PMP_LEN ( riscv::PLEN - 2 ), + .NR_ENTRIES ( ArianeCfg.NrPMPEntries ) + ) i_pmp_if ( + .addr_i ( icache_areq_o.fetch_paddr ), + .priv_lvl_i, + // we will always execute on the instruction fetch port + .access_type_i ( riscv::ACCESS_EXEC ), + // Configuration + .conf_addr_i ( pmpaddr_i ), + .conf_i ( pmpcfg_i ), + .allow_o ( pmp_instr_allow ) + ); + + //----------------------- + // Data Interface + //----------------------- + logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q; + riscv::pte_sv32_t dtlb_pte_n, dtlb_pte_q; + exception_t misaligned_ex_n, misaligned_ex_q; + logic lsu_req_n, lsu_req_q; + logic lsu_is_store_n, lsu_is_store_q; + logic dtlb_hit_n, dtlb_hit_q; + logic dtlb_is_4M_n, dtlb_is_4M_q; + + // check if we need to do translation or if we are always ready (e.g.: we are not translating anything) + assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1; + + // Wires to PMP checks + riscv::pmp_access_t pmp_access_type; + logic pmp_data_allow; + localparam PPNWMin = (riscv::PPNW-1 > 29) ? 29 : riscv::PPNW-1; + // The data interface is simpler and only consists of a request/response interface + always_comb begin : data_interface + // save request and DTLB response + lsu_vaddr_n = lsu_vaddr_i; + lsu_req_n = lsu_req_i; + misaligned_ex_n = misaligned_ex_i; + dtlb_pte_n = dtlb_content; + dtlb_hit_n = dtlb_lu_hit; + lsu_is_store_n = lsu_is_store_i; + dtlb_is_4M_n = dtlb_is_4M; + + lsu_paddr_o = {{riscv::PLEN-riscv::VLEN{1'b0}}, lsu_vaddr_q}; + lsu_dtlb_ppn_o = {{riscv::PLEN-riscv::VLEN{1'b0}},lsu_vaddr_n[riscv::VLEN-1:12]}; + lsu_valid_o = lsu_req_q; + lsu_exception_o = misaligned_ex_q; + pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ; + + // mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions + misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i; + + // Check if the User flag is set, then we may only access it in supervisor mode + // if SUM is enabled + daccess_err = (ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode + (ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u); // this is not a user page but we are in user mode and trying to access it + // translation is enabled and no misaligned exception occurred + if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin + lsu_valid_o = 1'b0; + // 4K page + lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]}; + lsu_dtlb_ppn_o = dtlb_content.ppn; + // Mega page + if (dtlb_is_4M_q) begin + lsu_paddr_o[21:12] = lsu_vaddr_q[21:12]; + lsu_dtlb_ppn_o[21:12] = lsu_vaddr_n[21:12]; + end + // --------- + // DTLB Hit + // -------- + if (dtlb_hit_q && lsu_req_q) begin + lsu_valid_o = 1'b1; + // exception priority: + // PAGE_FAULTS have higher priority than ACCESS_FAULTS + // virtual memory based exceptions are PAGE_FAULTS + // physical memory based exceptions are ACCESS_FAULTS (PMA/PMP) + + // this is a store + if (lsu_is_store_q) begin + // check if the page is write-able and we are not violating privileges + // also check if the dirty flag is set + if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin + lsu_exception_o = {riscv::STORE_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1}; //to check on wave + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; //only 32 bits on 34b of lsu_paddr_o are returned. + end + + // this is a load + end else begin + // check for sufficient access privileges - throw a page fault if necessary + if (daccess_err) begin + lsu_exception_o = {riscv::LOAD_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1}; + // Check if any PMPs are violated + end else if (!pmp_data_allow) begin + lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; //only 32 bits on 34b of lsu_paddr_o are returned. + end + end + end else + + // --------- + // DTLB Miss + // --------- + // watch out for exceptions + if (ptw_active && !walking_instr) begin + // page table walker threw an exception + if (ptw_error) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // the page table walker can only throw page faults + if (lsu_is_store_q) begin + lsu_exception_o = {riscv::STORE_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},update_vaddr}, 1'b1}; + end else begin + lsu_exception_o = {riscv::LOAD_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},update_vaddr}, 1'b1}; + end + end + + if (ptw_access_exception) begin + // an error makes the translation valid + lsu_valid_o = 1'b1; + // the page table walker can only throw page faults + lsu_exception_o = {riscv::LD_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1}; + end + end + end + // If translation is not enabled, check the paddr immediately against PMPs + else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin + if (lsu_is_store_q) begin + lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; + end else begin + lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; + end + end + end + + // Load/store PMP check + pmp #( + .PLEN ( riscv::PLEN ), + .PMP_LEN ( riscv::PLEN - 2 ), + .NR_ENTRIES ( ArianeCfg.NrPMPEntries ) + ) i_pmp_data ( + .addr_i ( lsu_paddr_o ), + .priv_lvl_i ( ld_st_priv_lvl_i ), + .access_type_i ( pmp_access_type ), + // Configuration + .conf_addr_i ( pmpaddr_i ), + .conf_i ( pmpcfg_i ), + .allow_o ( pmp_data_allow ) + ); + + // ---------- + // Registers + // ---------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + lsu_vaddr_q <= '0; + lsu_req_q <= '0; + misaligned_ex_q <= '0; + dtlb_pte_q <= '0; + dtlb_hit_q <= '0; + lsu_is_store_q <= '0; + dtlb_is_4M_q <= '0; + end else begin + lsu_vaddr_q <= lsu_vaddr_n; + lsu_req_q <= lsu_req_n; + misaligned_ex_q <= misaligned_ex_n; + dtlb_pte_q <= dtlb_pte_n; + dtlb_hit_q <= dtlb_hit_n; + lsu_is_store_q <= lsu_is_store_n; + dtlb_is_4M_q <= dtlb_is_4M_n; + end + end +endmodule diff --git a/src/mmu_sv32/cva6_ptw_sv32.sv b/src/mmu_sv32/cva6_ptw_sv32.sv new file mode 100644 index 000000000..db6612823 --- /dev/null +++ b/src/mmu_sv32/cva6_ptw_sv32.sv @@ -0,0 +1,408 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Sebastien Jacq Thales Research & Technology +// Date: 17/07/2021 +// +// Additional contributions by: +// Sebastien Jacq - sjthales on github.com +// +// Description: Hardware-PTW (Page-Table-Walker) for MMU Sv32. +// This module is an adaptation of the Sv39 PTW developed +// by Florian Zaruba and David Schaffenrath to the Sv32 standard. +// +// =========================================================================== // +// Revisions : +// Date Version Author Description +// 2020-02-17 0.1 S.Jacq PTW Sv32 for CV32A6 +// =========================================================================== // + +/* verilator lint_off WIDTH */ + +module cva6_ptw_sv32 import ariane_pkg::*; #( + parameter int ASID_WIDTH = 1, + parameter ariane_pkg::ariane_cfg_t ArianeCfg = ariane_pkg::ArianeDefaultConfig +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // flush everything, we need to do this because + // actually everything we do is speculative at this stage + // e.g.: there could be a CSR instruction that changes everything + output logic ptw_active_o, + output logic walking_instr_o, // set when walking for TLB + output logic ptw_error_o, // set when an error occurred + output logic ptw_access_exception_o, // set when an PMP access exception occured + input logic enable_translation_i, // CSRs indicate to enable SV32 + input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores + + input logic lsu_is_store_i, // this translation was triggered by a store + // PTW memory interface + input dcache_req_o_t req_port_i, + output dcache_req_i_t req_port_o, + + + // to TLBs, update logic + output tlb_update_sv32_t itlb_update_o, + output tlb_update_sv32_t dtlb_update_o, + + output logic [riscv::VLEN-1:0] update_vaddr_o, + + input logic [ASID_WIDTH-1:0] asid_i, + // from TLBs + // did we miss? + input logic itlb_access_i, + input logic itlb_hit_i, + input logic [riscv::VLEN-1:0] itlb_vaddr_i, + + input logic dtlb_access_i, + input logic dtlb_hit_i, + input logic [riscv::VLEN-1:0] dtlb_vaddr_i, + // from CSR file + input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp + input logic mxr_i, + // Performance counters + output logic itlb_miss_o, + output logic dtlb_miss_o, + // PMP + + input riscv::pmpcfg_t [15:0] pmpcfg_i, + input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + output logic [riscv::PLEN-1:0] bad_paddr_o + +); + + // input registers + logic data_rvalid_q; + logic [31:0] data_rdata_q, data_rdata_n; + + riscv::pte_sv32_t pte; + assign pte = riscv::pte_sv32_t'(data_rdata_q); + + assign data_rdata_n = (req_port_o.address_index[2] == 1'b1) ? req_port_i.data_rdata[63:32] : req_port_i.data_rdata[31:0]; + + enum logic[2:0] { + IDLE, + WAIT_GRANT, + PTE_LOOKUP, + WAIT_RVALID, + PROPAGATE_ERROR, + PROPAGATE_ACCESS_ERROR + } state_q, state_d; + + // SV32 defines two levels of page tables + enum logic { + LVL1, LVL2 + } ptw_lvl_q, ptw_lvl_n; + + // is this an instruction page table walk? + logic is_instr_ptw_q, is_instr_ptw_n; + logic global_mapping_q, global_mapping_n; + // latched tag signal + logic tag_valid_n, tag_valid_q; + // register the ASID + logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n; + // register the VPN we need to walk, SV32 defines a 32 bit virtual address + logic [riscv::VLEN-1:0] vaddr_q, vaddr_n; + // 4 byte aligned physical pointer + logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n; + + // Assignments + assign update_vaddr_o = vaddr_q; + + assign ptw_active_o = (state_q != IDLE); + assign walking_instr_o = is_instr_ptw_q; + // directly output the correct physical address + assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0]; + assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH]; + // we are never going to kill this request + assign req_port_o.kill_req = '0; + // we are never going to write with the HPTW + assign req_port_o.data_wdata = 64'b0; + // ----------- + // TLB Update + // ----------- + assign itlb_update_o.vpn = vaddr_q[riscv::SV-1:12]; + assign dtlb_update_o.vpn = vaddr_q[riscv::SV-1:12]; + // update the correct page table level + assign itlb_update_o.is_4M = (ptw_lvl_q == LVL1); + assign dtlb_update_o.is_4M = (ptw_lvl_q == LVL1); + // output the correct ASID + assign itlb_update_o.asid = tlb_update_asid_q; + assign dtlb_update_o.asid = tlb_update_asid_q; + // set the global mapping bit + assign itlb_update_o.content = pte | (global_mapping_q << 5); + assign dtlb_update_o.content = pte | (global_mapping_q << 5); + + assign req_port_o.tag_valid = tag_valid_q; + + logic allow_access; + + assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0; + + pmp #( + .PLEN ( riscv::PLEN ), + .PMP_LEN ( riscv::PLEN - 2 ), + .NR_ENTRIES ( ArianeCfg.NrPMPEntries ) + ) i_pmp_ptw ( + .addr_i ( ptw_pptr_q ), + // PTW access are always checked as if in S-Mode... + .priv_lvl_i ( riscv::PRIV_LVL_S ), + // ...and they are always loads + .access_type_i ( riscv::ACCESS_READ ), + // Configuration + .conf_addr_i ( pmpaddr_i ), + .conf_i ( pmpcfg_i ), + .allow_o ( allow_access ) + ); + + + assign req_port_o.data_be = be_gen(req_port_o.address_index[2:0],req_port_o.data_size ); + + //------------------- + // Page table walker + //------------------- + // A virtual address va is translated into a physical address pa as follows: + // 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39, + // PAGESIZE=2^12 and LEVELS=3.) + // 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For + // Sv32, PTESIZE=4.) + // 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access + // exception. + // 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5. + // Otherwise, this PTE is a pointer to the next level of the page table. + // Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let + // a = pte.ppn × PAGESIZE and go to step 2. + // 5. A leaf PTE has been found. Determine if the requested memory access + // is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and + // raise an access exception. Otherwise, the translation is successful. + // Set pte.a to 1, and, if the memory access is a store, set pte.d to 1. + // The translated physical address is given as follows: + // - pa.pgoff = va.pgoff. + // - If i > 0, then this is a superpage translation and + // pa.ppn[i-1:0] = va.vpn[i-1:0]. + // - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. + always_comb begin : ptw + // default assignments + // PTW memory interface + tag_valid_n = 1'b0; + req_port_o.data_req = 1'b0; + req_port_o.data_size = 2'b10; + req_port_o.data_we = 1'b0; + ptw_error_o = 1'b0; + ptw_access_exception_o = 1'b0; + itlb_update_o.valid = 1'b0; + dtlb_update_o.valid = 1'b0; + is_instr_ptw_n = is_instr_ptw_q; + ptw_lvl_n = ptw_lvl_q; + ptw_pptr_n = ptw_pptr_q; + state_d = state_q; + global_mapping_n = global_mapping_q; + // input registers + tlb_update_asid_n = tlb_update_asid_q; + vaddr_n = vaddr_q; + + itlb_miss_o = 1'b0; + dtlb_miss_o = 1'b0; + + case (state_q) + + IDLE: begin + // by default we start with the top-most page table + ptw_lvl_n = LVL1; + global_mapping_n = 1'b0; + is_instr_ptw_n = 1'b0; + // if we got an ITLB miss + if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin + ptw_pptr_n = {satp_ppn_i, itlb_vaddr_i[riscv::SV-1:22], 2'b0}; // SATP.PPN * PAGESIZE + VPN*PTESIZE = SATP.PPN * 2^(12) + VPN*4 + is_instr_ptw_n = 1'b1; + tlb_update_asid_n = asid_i; + vaddr_n = itlb_vaddr_i; + state_d = WAIT_GRANT; + itlb_miss_o = 1'b1; + // we got an DTLB miss + end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin + ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[riscv::SV-1:22], 2'b0}; // SATP.PPN * PAGESIZE + VPN*PTESIZE = SATP.PPN * 2^(12) + VPN*4 + tlb_update_asid_n = asid_i; + vaddr_n = dtlb_vaddr_i; + state_d = WAIT_GRANT; + dtlb_miss_o = 1'b1; + end + end + + WAIT_GRANT: begin + // send a request out + req_port_o.data_req = 1'b1; + // wait for the WAIT_GRANT + if (req_port_i.data_gnt) begin + // send the tag valid signal one cycle later + tag_valid_n = 1'b1; + state_d = PTE_LOOKUP; + end + end + + PTE_LOOKUP: begin + // we wait for the valid signal + if (data_rvalid_q) begin + + // check if the global mapping bit is set + if (pte.g) + global_mapping_n = 1'b1; + + // ------------- + // Invalid PTE + // ------------- + // If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception. + if (!pte.v || (!pte.r && pte.w)) + state_d = PROPAGATE_ERROR; + // ----------- + // Valid PTE + // ----------- + else begin + state_d = IDLE; + // it is a valid PTE + // if pte.r = 1 or pte.x = 1 it is a valid PTE + if (pte.r || pte.x) begin + // Valid translation found (either 4M or 4K entry) + if (is_instr_ptw_q) begin + // ------------ + // Update ITLB + // ------------ + // If page is not executable, we can directly raise an error. This + // doesn't put a useless entry into the TLB. The same idea applies + // to the access flag since we let the access flag be managed by SW. + if (!pte.x || !pte.a) + state_d = PROPAGATE_ERROR; + else + itlb_update_o.valid = 1'b1; + + end else begin + // ------------ + // Update DTLB + // ------------ + // Check if the access flag has been set, otherwise throw a page-fault + // and let the software handle those bits. + // If page is not readable (there are no write-only pages) + // we can directly raise an error. This doesn't put a useless + // entry into the TLB. + if (pte.a && (pte.r || (pte.x && mxr_i))) begin + dtlb_update_o.valid = 1'b1; + end else begin + state_d = PROPAGATE_ERROR; + end + // Request is a store: perform some additional checks + // If the request was a store and the page is not write-able, raise an error + // the same applies if the dirty flag is not set + if (lsu_is_store_i && (!pte.w || !pte.d)) begin + dtlb_update_o.valid = 1'b0; + state_d = PROPAGATE_ERROR; + end + end + // check if the ppn is correctly aligned: + // 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault + // exception. + if (ptw_lvl_q == LVL1 && pte.ppn[9:0] != '0) begin + state_d = PROPAGATE_ERROR; + dtlb_update_o.valid = 1'b0; + itlb_update_o.valid = 1'b0; + end + // this is a pointer to the next TLB level + end else begin + // pointer to next level of page table + if (ptw_lvl_q == LVL1) begin + // we are in the second level now + ptw_lvl_n = LVL2; + ptw_pptr_n = {pte.ppn, vaddr_q[21:12], 2'b0}; + end + + state_d = WAIT_GRANT; + + if (ptw_lvl_q == LVL2) begin + // Should already be the last level page table => Error + ptw_lvl_n = LVL2; + state_d = PROPAGATE_ERROR; + end + end + end + + // Check if this access was actually allowed from a PMP perspective + if (!allow_access) begin + itlb_update_o.valid = 1'b0; + dtlb_update_o.valid = 1'b0; + // we have to return the failed address in bad_addr + ptw_pptr_n = ptw_pptr_q; + state_d = PROPAGATE_ACCESS_ERROR; + end + end + // we've got a data WAIT_GRANT so tell the cache that the tag is valid + end + // Propagate error to MMU/LSU + PROPAGATE_ERROR: begin + state_d = IDLE; + ptw_error_o = 1'b1; + end + PROPAGATE_ACCESS_ERROR: begin + state_d = IDLE; + ptw_access_exception_o = 1'b1; + end + // wait for the rvalid before going back to IDLE + WAIT_RVALID: begin + if (data_rvalid_q) + state_d = IDLE; + end + default: begin + state_d = IDLE; + end + endcase + + // ------- + // Flush + // ------- + // should we have flushed before we got an rvalid, wait for it until going back to IDLE + if (flush_i) begin + // on a flush check whether we are + // 1. in the PTE Lookup check whether we still need to wait for an rvalid + // 2. waiting for a grant, if so: wait for it + // if not, go back to idle + if ((state_q == PTE_LOOKUP && !data_rvalid_q) || ((state_q == WAIT_GRANT) && req_port_i.data_gnt)) + state_d = WAIT_RVALID; + else + state_d = IDLE; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + is_instr_ptw_q <= 1'b0; + ptw_lvl_q <= LVL1; + tag_valid_q <= 1'b0; + tlb_update_asid_q <= '0; + vaddr_q <= '0; + ptw_pptr_q <= '0; + global_mapping_q <= 1'b0; + data_rdata_q <= '0; + data_rvalid_q <= 1'b0; + end else begin + state_q <= state_d; + ptw_pptr_q <= ptw_pptr_n; + is_instr_ptw_q <= is_instr_ptw_n; + ptw_lvl_q <= ptw_lvl_n; + tag_valid_q <= tag_valid_n; + tlb_update_asid_q <= tlb_update_asid_n; + vaddr_q <= vaddr_n; + global_mapping_q <= global_mapping_n; + data_rdata_q <= data_rdata_n; + data_rvalid_q <= req_port_i.data_rvalid; + end + end + +endmodule +/* verilator lint_on WIDTH */ diff --git a/src/mmu_sv32/cva6_tlb_sv32.sv b/src/mmu_sv32/cva6_tlb_sv32.sv new file mode 100644 index 000000000..a2975a0d0 --- /dev/null +++ b/src/mmu_sv32/cva6_tlb_sv32.sv @@ -0,0 +1,261 @@ +// Copyright (c) 2021 Thales. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Sebastien Jacq Thales Research & Technology +// Date: 17/07/2021 +// +// Additional contributions by: +// Sebastien Jacq - sjthales on github.com +// +// Description: Translation Lookaside Buffer, Sv32 , fully set-associative +// This module is an adaptation of the Sv39 TLB developed +// by Florian Zaruba and David Schaffenrath to the Sv32 standard. +// +// =========================================================================== // +// Revisions : +// Date Version Author Description +// 2020-02-17 0.1 S.Jacq TLB Sv32 for CV32A6 +// =========================================================================== // + +module cva6_tlb_sv32 import ariane_pkg::*; #( + parameter int unsigned TLB_ENTRIES = 4, + parameter int unsigned ASID_WIDTH = 1 + )( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // Flush signal + // Update TLB + input tlb_update_sv32_t update_i, + // Lookup signals + input logic lu_access_i, + input logic [ASID_WIDTH-1:0] lu_asid_i, + input logic [riscv::VLEN-1:0] lu_vaddr_i, + output riscv::pte_sv32_t lu_content_o, + input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + output logic lu_is_4M_o, + output logic lu_hit_o +); + + // Sv32 defines two levels of page tables + struct packed { + logic [8:0] asid; //9 bits wide + logic [9:0] vpn1; //10 bits wide + logic [9:0] vpn0; //10 bits wide + logic is_4M; + logic valid; + } [TLB_ENTRIES-1:0] tags_q, tags_n; + + riscv::pte_sv32_t [TLB_ENTRIES-1:0] content_q, content_n; + logic [9:0] vpn0, vpn1; + logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic + logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy + //------------- + // Translation + //------------- + always_comb begin : translation + vpn0 = lu_vaddr_i[21:12]; + vpn1 = lu_vaddr_i[31:22]; + + + // default assignment + lu_hit = '{default: 0}; + lu_hit_o = 1'b0; + lu_content_o = '{default: 0}; + lu_is_4M_o = 1'b0; + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + // first level match, this may be a mega page, check the ASID flags as well + // if the entry is associated to a global address, don't match the ASID (ASID is don't care) + if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid) || content_q[i].g) && vpn1 == tags_q[i].vpn1) begin + if (tags_q[i].is_4M || vpn0 == tags_q[i].vpn0) begin + lu_is_4M_o = tags_q[i].is_4M; + lu_content_o = content_q[i]; + lu_hit_o = 1'b1; + lu_hit[i] = 1'b1; + end + end + end + end + + logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high + logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high + logic [TLB_ENTRIES-1:0] vaddr_vpn0_match; + logic [TLB_ENTRIES-1:0] vaddr_vpn1_match; + + + assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i); + assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i); + + // ------------------ + // Update and Flush + // ------------------ + always_comb begin : update_flush + tags_n = tags_q; + content_n = content_q; + + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + + vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[21:12] == tags_q[i].vpn0); + vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[31:22] == tags_q[i].vpn1); + + if (flush_i) begin + // invalidate logic + // flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case) + if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0 ) + tags_n[i].valid = 1'b0; + // flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages + else if (asid_to_be_flushed_is0 && ( (vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M) ) && (~vaddr_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case) + else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M)) && (asid_to_be_flushed_i == tags_q[i].asid) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case) + else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid) && (!asid_to_be_flushed_is0)) + tags_n[i].valid = 1'b0; + // normal replacement + end else if (update_i.valid & replace_en[i]) begin + // update tag array + tags_n[i] = '{ + asid: update_i.asid, + vpn1: update_i.vpn [19:10], + vpn0: update_i.vpn [9:0], + is_4M: update_i.is_4M, + valid: 1'b1 + }; + // and content as well + content_n[i] = update_i.content; + end + end + end + + // ----------------------------------------------- + // PLRU - Pseudo Least Recently Used Replacement + // ----------------------------------------------- + logic[2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n; + always_comb begin : plru_replacement + plru_tree_n = plru_tree_q; + // The PLRU-tree indexing: + // lvl0 0 + // / \ + // / \ + // lvl1 1 2 + // / \ / \ + // lvl2 3 4 5 6 + // / \ /\/\ /\ + // ... ... ... ... + // Just predefine which nodes will be set/cleared + // E.g. for a TLB with 8 entries, the for-loop is semantically + // equivalent to the following pseudo-code: + // unique case (1'b1) + // lu_hit[7]: plru_tree_n[0, 2, 6] = {1, 1, 1}; + // lu_hit[6]: plru_tree_n[0, 2, 6] = {1, 1, 0}; + // lu_hit[5]: plru_tree_n[0, 2, 5] = {1, 0, 1}; + // lu_hit[4]: plru_tree_n[0, 2, 5] = {1, 0, 0}; + // lu_hit[3]: plru_tree_n[0, 1, 4] = {0, 1, 1}; + // lu_hit[2]: plru_tree_n[0, 1, 4] = {0, 1, 0}; + // lu_hit[1]: plru_tree_n[0, 1, 3] = {0, 0, 1}; + // lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0}; + // default: begin /* No hit */ end + // endcase + for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin + automatic int unsigned idx_base, shift, new_index; + // we got a hit so update the pointer as it was least recently used + if (lu_hit[i] & lu_access_i) begin + // Set the nodes to the values we would expect + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2**lvl)-1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + // to circumvent the 32 bit integer arithmetic assignment + new_index = ~((i >> (shift-1)) & 32'b1); + plru_tree_n[idx_base + (i >> shift)] = new_index[0]; + end + end + end + // Decode tree to write enable signals + // Next for-loop basically creates the following logic for e.g. an 8 entry + // TLB (note: pseudo-code obviously): + // replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1} + // replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0} + // replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1} + // replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0} + // replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1} + // replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0} + // replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1} + // replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0} + // For each entry traverse the tree. If every tree-node matches, + // the corresponding bit of the entry's index, this is + // the next entry to replace. + for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin + automatic logic en; + automatic int unsigned idx_base, shift, new_index; + en = 1'b1; + for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin + idx_base = $unsigned((2**lvl)-1); + // lvl0 <=> MSB, lvl1 <=> MSB-1, ... + shift = $clog2(TLB_ENTRIES) - lvl; + + // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1); + new_index = (i >> (shift-1)) & 32'b1; + if (new_index[0]) begin + en &= plru_tree_q[idx_base + (i>>shift)]; + end else begin + en &= ~plru_tree_q[idx_base + (i>>shift)]; + end + end + replace_en[i] = en; + end + end + + // sequential process + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + tags_q <= '{default: 0}; + content_q <= '{default: 0}; + plru_tree_q <= '{default: 0}; + end else begin + tags_q <= tags_n; + content_q <= content_n; + plru_tree_q <= plru_tree_n; + end + end + //-------------- + // Sanity checks + //-------------- + + //pragma translate_off + `ifndef VERILATOR + + initial begin : p_assertions + assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1)) + else begin $error("TLB size must be a multiple of 2 and greater than 1"); $stop(); end + assert (ASID_WIDTH >= 1) + else begin $error("ASID width must be at least 1"); $stop(); end + end + + // Just for checking + function int countSetBits(logic[TLB_ENTRIES-1:0] vector); + automatic int count = 0; + foreach (vector[idx]) begin + count += vector[idx]; + end + return count; + endfunction + + assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1)) + else begin $error("More then one hit in TLB!"); $stop(); end + assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1)) + else begin $error("More then one TLB entry selected for next replace!"); $stop(); end + + `endif + //pragma translate_on + +endmodule diff --git a/src/mmu.sv b/src/mmu_sv39/mmu.sv similarity index 100% rename from src/mmu.sv rename to src/mmu_sv39/mmu.sv diff --git a/src/ptw.sv b/src/mmu_sv39/ptw.sv similarity index 97% rename from src/ptw.sv rename to src/mmu_sv39/ptw.sv index d05113105..5ba0feb70 100644 --- a/src/ptw.sv +++ b/src/mmu_sv39/ptw.sv @@ -78,6 +78,7 @@ module ptw import ariane_pkg::*; #( IDLE, WAIT_GRANT, PTE_LOOKUP, + WAIT_RVALID, PROPAGATE_ERROR, PROPAGATE_ACCESS_ERROR } state_q, state_d; @@ -92,8 +93,6 @@ module ptw import ariane_pkg::*; #( logic global_mapping_q, global_mapping_n; // latched tag signal logic tag_valid_n, tag_valid_q; - // latched kill signal - logic kill_req_q, kill_req_d; // register the ASID logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n; // register the VPN we need to walk, SV39 defines a 39 bit virtual address @@ -109,8 +108,8 @@ module ptw import ariane_pkg::*; #( // directly output the correct physical address assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0]; assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH]; - // kill this request - assign req_port_o.kill_req = kill_req_q; + // we are never going to kill this request + assign req_port_o.kill_req = '0; // we are never going to write with the HPTW assign req_port_o.data_wdata = 64'b0; // ----------- @@ -179,7 +178,6 @@ module ptw import ariane_pkg::*; #( // default assignments // PTW memory interface tag_valid_n = 1'b0; - kill_req_d = 1'b0; req_port_o.data_req = 1'b0; req_port_o.data_be = 8'hFF; req_port_o.data_size = 2'b11; @@ -329,13 +327,13 @@ module ptw import ariane_pkg::*; #( end end end - + // Check if this access was actually allowed from a PMP perspective if (!allow_access) begin itlb_update_o.valid = 1'b0; dtlb_update_o.valid = 1'b0; // we have to return the failed address in bad_addr - ptw_pptr_n = ptw_pptr_q; + ptw_pptr_n = ptw_pptr_q; state_d = PROPAGATE_ACCESS_ERROR; end end @@ -350,6 +348,11 @@ module ptw import ariane_pkg::*; #( state_d = IDLE; ptw_access_exception_o = 1'b1; end + // wait for the rvalid before going back to IDLE + WAIT_RVALID: begin + if (data_rvalid_q) + state_d = IDLE; + end default: begin state_d = IDLE; end @@ -364,11 +367,10 @@ module ptw import ariane_pkg::*; #( // 1. in the PTE Lookup check whether we still need to wait for an rvalid // 2. waiting for a grant, if so: wait for it // if not, go back to idle - if ((state_q == PTE_LOOKUP && !data_rvalid_q) || ((state_q == WAIT_GRANT) && req_port_i.data_gnt)) begin - tag_valid_n = 1'b1; - kill_req_d = 1'b1; - end - state_d = IDLE; + if ((state_q == PTE_LOOKUP && !data_rvalid_q) || ((state_q == WAIT_GRANT) && req_port_i.data_gnt)) + state_d = WAIT_RVALID; + else + state_d = IDLE; end end @@ -385,7 +387,6 @@ module ptw import ariane_pkg::*; #( global_mapping_q <= 1'b0; data_rdata_q <= '0; data_rvalid_q <= 1'b0; - kill_req_q <= 1'b0; end else begin state_q <= state_d; ptw_pptr_q <= ptw_pptr_n; @@ -397,7 +398,6 @@ module ptw import ariane_pkg::*; #( global_mapping_q <= global_mapping_n; data_rdata_q <= req_port_i.data_rdata; data_rvalid_q <= req_port_i.data_rvalid; - kill_req_q <= kill_req_d; end end diff --git a/src/tlb.sv b/src/mmu_sv39/tlb.sv similarity index 97% rename from src/tlb.sv rename to src/mmu_sv39/tlb.sv index 5a0aeac51..d540e55fb 100644 --- a/src/tlb.sv +++ b/src/mmu_sv39/tlb.sv @@ -125,7 +125,7 @@ module tlb import ariane_pkg::*; #( else if (asid_to_be_flushed_is0 && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M) ) && (~vaddr_to_be_flushed_is0)) tags_n[i].valid = 1'b0; // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case) - else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M)) && (asid_to_be_flushed_i == tags_q[i].asid) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0)) + else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M)) && (asid_to_be_flushed_i == tags_q[i].asid) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0)) tags_n[i].valid = 1'b0; // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case) else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid) && (!asid_to_be_flushed_is0)) diff --git a/src_files.yml b/src_files.yml index 0a150f3aa..1afc440ad 100644 --- a/src_files.yml +++ b/src_files.yml @@ -34,7 +34,8 @@ ariane: src/load_unit.sv, src/load_store_unit.sv, src/miss_handler.sv, - src/mmu.sv, + src/mmu_sv39/mmu.sv, + src/mmu_sv32/cva6_mmu_sv32.sv, src/mult.sv, src/nbdcache.sv, src/vdregs.sv, @@ -42,12 +43,14 @@ ariane: src/sram_wrapper.sv, src/pcgen_stage.sv, src/perf_counters.sv, - src/ptw.sv, + src/mmu_sv39/ptw.sv, + src/mmu_sv32/cva6_ptw_sv32.sv, src/re_name.sv, src/scoreboard.sv, src/store_buffer.sv, src/store_unit.sv, - src/tlb.sv, + src/mmu_sv39/tlb.sv, + src/mmu_sv32/cva6_tlb_sv32.sv, src/debug/dm_csrs.sv, src/debug/dm_mem.sv, src/debug/dm_top.sv,