From 5b29462bc4d2fa776437fd678e52499675d131fb Mon Sep 17 00:00:00 2001 From: Michael Schaffner Date: Fri, 31 Aug 2018 12:55:59 +0200 Subject: [PATCH] implement l15 adapter --- Makefile | 1 + include/piton_cache_pkg.sv | 184 ++++++++ src/cache_subsystem/piton_cache_subsystem.sv | 146 ++++++ src/cache_subsystem/piton_dcache.sv | 349 ++++++++++++++ src/cache_subsystem/piton_icache.sv | 459 +++++++++++++++++++ src/cache_subsystem/piton_l15_adapter.sv | 440 ++++++++++++++++++ 6 files changed, 1579 insertions(+) create mode 100644 include/piton_cache_pkg.sv create mode 100644 src/cache_subsystem/piton_cache_subsystem.sv create mode 100644 src/cache_subsystem/piton_dcache.sv create mode 100644 src/cache_subsystem/piton_icache.sv create mode 100644 src/cache_subsystem/piton_l15_adapter.sv diff --git a/Makefile b/Makefile index 46a1a46ee..61007ccd7 100755 --- a/Makefile +++ b/Makefile @@ -23,6 +23,7 @@ ariane_pkg := include/riscv_pkg.sv \ src/debug/dm_pkg.sv \ include/ariane_pkg.sv \ include/std_cache_pkg.sv \ + include/piton_cache_pkg.sv \ include/axi_if.sv # utility modules diff --git a/include/piton_cache_pkg.sv b/include/piton_cache_pkg.sv new file mode 100644 index 000000000..d0aeac06e --- /dev/null +++ b/include/piton_cache_pkg.sv @@ -0,0 +1,184 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// +// Author: Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Package for OpenPiton compatible L1 cache subsystem + +package piton_cache_pkg; + + localparam L15_SET_ASSOC = 4; + + // these parames need to coincide with the current L1.5 parameterization + // do not change + localparam L15_TID_WIDTH = 2; + localparam L15_TLB_CSM_WIDTH = 33; + + localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC); + localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC); + localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC); + + // FIFO depths of L15 adapter + localparam ADAPTER_REQ_FIFO_DEPTH = 2; + // since packets have to be consumed immediately, + // we need not have a deeper FIFO + localparam ADAPTER_RTRN_FIFO_DEPTH = 1; + + // local interfaces between caches and L15 adapter + typedef enum logic [1:0] { DCACHE_STORE_REQ, + DCACHE_LOAD_REQ, + DCACHE_ATOMIC_REQ, + DCACHE_INT_REQ } dcache_out_t; + + typedef enum logic [2:0] { DCACHE_INV_REQ, // no ack from the core required + DCACHE_STORE_ACK,// note: this may contain an invalidation vector, too + DCACHE_LOAD_ACK, + DCACHE_ATOMIC_ACK, + DCACHE_INT_ACK } dcache_in_t; + + typedef enum logic [0:0] { ICACHE_INV_REQ, // no ack from the core required + ICACHE_IFILL_ACK} icache_in_t; + + typedef struct packed { + logic vld; // invalidate only affected way + logic all; // invalidate all ways + logic [ariane_pkg::ICACHE_INDEX_WIDTH-1:0] addr; // physical address to invalidate + logic [L15_WAY_WIDTH-1:0] way; // way to invalidate + } cache_inval_t; + + // icache interface + typedef struct packed { + logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way; // way to replace + logic [63:0] paddr; // physical address + logic nc; // noncacheable + logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + } icache_req_t; + + typedef struct packed { + icache_in_t rtype; // see definitions above + logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] data; // full cache line width + cache_inval_t inv; // invalidation vector + logic nc; // noncacheable + logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + } icache_rtrn_t; + + // dcache interface + typedef struct packed { + dcache_out_t rtype; // see definitions above + logic [2:0] size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) + logic [L1D_WAY_WIDTH-1:0] way; // way to replace + logic [63:0] paddr; // physical address + logic [63:0] data; // word width of processor (no block stores at the moment) + logic nc; // noncacheable + logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + } dcache_req_t; + + typedef struct packed { + dcache_in_t rtype; // see definitions above + logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // full cache line width + cache_inval_t inv; // invalidation vector + logic nc; // noncacheable + logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + } dcache_rtrn_t; + + + // taken from iop.h in openpiton + // this is a work around, need to include files properly + // to l1.5 (only marked subset is used) + typedef enum logic [4:0] {LOAD_RQ = 5'b00000, // load request + IMISS_RQ = 5'b10000, // instruction fill request + STORE_RQ = 5'b00001, // store request + CAS1_RQ = 5'b00010, // compare and swap1 packet (OpenSparc atomics) + CAS2_RQ = 5'b00011, // compare and swap2 packet (OpenSparc atomics) + SWAP_RQ = 5'b00110, // swap packet (OpenSparc atomics) + STRLOAD_RQ = 5'b00100, // unused + STRST_RQ = 5'b00101, // unused + STQ_RQ = 5'b00111, // unused + INT_RQ = 5'b01001, // interrupt request + FWD_RQ = 5'b01101, // unused + FWD_RPY = 5'b01110, // unused + RSVD_RQ = 5'b11111 // unused + } l15_reqtypes_t; + + // from l1.5 (only marked subset is used) + typedef enum logic [3:0] {LOAD_RET = 4'b0000, // load packet + // INV_RET = 4'b0011, // invalidate packet, not unique... + ST_ACK = 4'b0100, // store ack packet + //AT_ACK = 4'b0011, // unused, not unique... + INT_RET = 4'b0111, // interrupt packet + TEST_RET = 4'b0101, // unused + FP_RET = 4'b1000, // unused + IFILL_RET = 4'b0001, // instruction fill packet + EVICT_REQ = 4'b0011, // eviction request + ERR_RET = 4'b1100, // unused + STRLOAD_RET = 4'b0010, // unused + STRST_ACK = 4'b0110, // unused + FWD_RQ_RET = 4'b1010, // unused + FWD_RPY_RET = 4'b1011, // unused + RSVD_RET = 4'b1111, // unused + CPX_RESTYPE_ATOMIC_RES = 4'b1110 // custom type for atomic responses + } l15_rtrntypes_t; + + + // l15 interface uses reg for compatibility with verilog + typedef struct packed { + l15_reqtypes_t l15_rqtype; // see below for encoding + logic l15_nc; // non-cacheable bit + logic [2:0] l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) + logic [L15_TID_WIDTH-1:0] l15_threadid; // currently 0 or 1 + logic l15_prefetch; // unused in openpiton + logic l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment + logic l15_blockstore; // unused in openpiton + logic l15_blockinitstore; // unused in openpiton + logic [L15_WAY_WIDTH-1:0] l15_l1rplway; // way to replace + logic [39:0] l15_address; // physical address + logic [63:0] l15_data; // word to write + logic [63:0] l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests) + logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data; // unused in Ariane + } l15_req_t; + + typedef struct packed { + l15_rtrntypes_t l15_returntype; // see below for encoding + logic l15_l2miss; // unused in Ariane + logic [1:0] l15_error; // unused in openpiton + logic l15_noncacheable; // non-cacheable bit + logic l15_atomic; // asserted in load return and store ack packets of atomic tx + logic [L15_TID_WIDTH-1:0] l15_threadid; // used as transaction ID + logic l15_prefetch; // unused in openpiton + logic l15_f4b; // 4byte instruction fill. not used in Ariane (always requests a full cache line). + logic [63:0] l15_data_0; // used for both caches + logic [63:0] l15_data_1; // used for both caches + logic [63:0] l15_data_2; // currently only used for I$ + logic [63:0] l15_data_3; // currently only used for I$ + logic l15_inval_icache_all_way; // invalidate all ways + logic l15_inval_dcache_all_way; // unused in openpiton + logic [15:4] l15_inval_address_15_4; // invalidate selected cacheline + logic l15_cross_invalidate; // unused in openpiton + logic [L15_WAY_WIDTH-1:0] l15_cross_invalidate_way; // unused in openpiton + logic l15_inval_dcache_inval; // invalidate selected cacheline and way + logic l15_inval_icache_inval; // unused in openpiton + logic [L15_WAY_WIDTH-1:0] l15_inval_way; // way to invalidate + logic l15_blockinitstore; // unused in openpiton + } l15_rtrn_t; + + +function automatic logic[63:0] swendian64(input logic[63:0] in); + automatic logic[63:0] out; + for(int k=0; k<64;k+=8)begin + out[k +: 8] = in[63-k -: 8]; + end + return out; +endfunction + +endpackage : piton_cache_pkg diff --git a/src/cache_subsystem/piton_cache_subsystem.sv b/src/cache_subsystem/piton_cache_subsystem.sv new file mode 100644 index 000000000..218aa6689 --- /dev/null +++ b/src/cache_subsystem/piton_cache_subsystem.sv @@ -0,0 +1,146 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// +// Author: Florian Zaruba , ETH Zurich +// Michael Schaffner , ETH Zurich +// Date: 15.08.2018 +// Description: Ariane cache subsystem that is compatible with the OpenPiton +// coherent memory system. + +import ariane_pkg::*; +import piton_cache_pkg::*; + +module piton_cache_subsystem #( + parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000 +)( + input logic clk_i, + input logic rst_ni, + + // I$ + input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) + input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together + output logic icache_miss_o, // to performance counter + + // address translation requests + input icache_areq_i_t icache_areq_i, // to/from frontend + output icache_areq_o_t icache_areq_o, + // data requests + input icache_dreq_i_t icache_dreq_i, // to/from frontend + output icache_dreq_o_t icache_dreq_o, + + // D$ + // Cache management + input logic dcache_enable_i, // from CSR + input logic dcache_flush_i, // high until acknowledged + output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic dcache_miss_o, // we missed on a ld/st + // AMO interface (not functional yet) + input logic dcache_amo_commit_i, // commit atomic memory operation + output logic dcache_amo_valid_o, // we have a valid AMO result + output logic [63:0] dcache_amo_result_o, // result of atomic memory operation + input logic dcache_amo_flush_i, // forget about AMO + // Request ports + input dcache_req_i_t [2:0] dcache_req_ports_i, // to/from LSU + output dcache_req_o_t [2:0] dcache_req_ports_o, // to/from LSU + + // L15 (memory side) + output logic l15_val_o, + input logic l15_ack_i, + input logic l15_header_ack_i, + output l15_req_t l15_data_o, + + input logic l15_val_i, + output logic l15_req_ack_o, + input l15_rtrn_t l15_rtrn_i + + // TODO: interrupt interface +); + + logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld_o; + icache_req_t icache_adapter; + icache_rtrn_t adapter_icache; + logic dcache_adapter_data_req, adapter_dcache_data_ack, adapter_dcache_rtrn_vld; + dcache_req_t dcache_adapter; + dcache_rtrn_t adapter_dcache; + + + piton_icache #( + ) i_piton_icache ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( icache_flush_i ), + .en_i ( icache_en_i ), + .miss_o ( icache_miss_o ), + .areq_i ( icache_areq_i ), + .areq_o ( icache_areq_o ), + .dreq_i ( icache_dreq_i ), + .dreq_o ( icache_dreq_o ), + .mem_rtrn_vld_i ( adapter_icache_rtrn_vld ), + .mem_rtrn_i ( adapter_icache ), + .mem_data_req_o ( icache_adapter_data_req ), + .mem_data_ack_i ( adapter_icache_data_ack ), + .mem_data_o ( icache_adapter ) + ); + + // decreasing priority + // Port 0: PTW + // Port 1: Load Unit + // Port 2: Store Unit + piton_dcache #( + .CACHE_START_ADDR ( CACHE_START_ADDR ) + ) i_piton_dcache ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .enable_i ( dcache_enable_i ), + .flush_i ( dcache_flush_i ), + .flush_ack_o ( dcache_flush_ack_o ), + .miss_o ( dcache_miss_o ), + .amo_commit_i ( dcache_amo_commit_i ), + .amo_valid_o ( dcache_amo_valid_o ), + .amo_result_o ( dcache_amo_result_o ), + .amo_flush_i ( dcache_amo_flush_i ), + .req_ports_i ( dcache_req_ports_i ), + .req_ports_o ( dcache_req_ports_o ), + .mem_rtrn_vld_i ( adapter_dcache_rtrn_vld ), + .mem_rtrn_i ( adapter_dcache ), + .mem_data_req_o ( dcache_adapter_data_req ), + .mem_data_ack_i ( adapter_dcache_data_ack ), + .mem_data_o ( dcache_adapter ) + ); + + + piton_l15_adapter #( + ) i_adapter ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .icache_data_req_i ( icache_adapter_data_req ), + .icache_data_ack_o ( adapter_icache_data_ack ), + .icache_data_i ( icache_adapter ), + .icache_rtrn_vld_o ( adapter_icache_rtrn_vld ), + .icache_rtrn_o ( adapter_icache ), + .dcache_data_req_i ( dcache_adapter_data_req ), + .dcache_data_ack_o ( adapter_dcache_data_ack ), + .dcache_data_i ( dcache_adapter ), + .dcache_rtrn_vld_o ( adapter_dcache_rtrn_vld ), + .dcache_rtrn_o ( adapter_dcache ), + .l15_val_o ( l15_val_o ), + .l15_ack_i ( l15_ack_i ), + .l15_header_ack_i ( l15_header_ack_i ), + .l15_data_o ( l15_port_o ), + .l15_val_i ( l15_val_i ), + .l15_req_ack_o ( l15_req_ack_o ), + .l15_rtrn_i ( l15_port_i ) + ); + +endmodule // piton_cache_subsystem diff --git a/src/cache_subsystem/piton_dcache.sv b/src/cache_subsystem/piton_dcache.sv new file mode 100644 index 000000000..63647492c --- /dev/null +++ b/src/cache_subsystem/piton_dcache.sv @@ -0,0 +1,349 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 13.10.2017 +// Description: Nonblocking private L1 dcache + +import ariane_pkg::*; +import piton_cache_pkg::*; + +module piton_dcache #( + parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // Cache management + input logic enable_i, // from CSR + input logic flush_i, // high until acknowledged + output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed + output logic miss_o, // we missed on a ld/st + // AMO interface + input logic amo_commit_i, // commit atomic memory operation + output logic amo_valid_o, // we have a valid AMO result + output logic [63:0] amo_result_o, // result of atomic memory operation + input logic amo_flush_i, // forget about AMO + // Request ports + input dcache_req_i_t [2:0] req_ports_i, // request ports + output dcache_req_o_t [2:0] req_ports_o, // request ports + + input logic mem_rtrn_vld_i, + input dcache_rtrn_t mem_rtrn_i, + output logic mem_data_req_o, + input logic mem_data_ack_i, + output dcache_req_t mem_data_o +); +/* + // ------------------------------- + // Controller <-> Arbiter + // ------------------------------- + // 1. Miss handler + // 2. PTW + // 3. Load Unit + // 4. Store unit + logic [3:0][DCACHE_SET_ASSOC-1:0] req; + logic [3:0][DCACHE_INDEX_WIDTH-1:0]addr; + logic [3:0] gnt; + cache_line_t [DCACHE_SET_ASSOC-1:0] rdata; + logic [3:0][DCACHE_TAG_WIDTH-1:0] tag; + + cache_line_t [3:0] wdata; + logic [3:0] we; + cl_be_t [3:0] be; + logic [DCACHE_SET_ASSOC-1:0] hit_way; + // ------------------------------- + // Controller <-> Miss unit + // ------------------------------- + logic [2:0] busy; + logic [2:0][55:0] mshr_addr; + logic [2:0] mshr_addr_matches; + logic [2:0] mshr_index_matches; + logic [63:0] critical_word; + logic critical_word_valid; + + logic [2:0][$bits(miss_req_t)-1:0] miss_req; + logic [2:0] miss_gnt; + logic [2:0] active_serving; + + logic [2:0] bypass_gnt; + logic [2:0] bypass_valid; + logic [2:0][63:0] bypass_data; + // ------------------------------- + // Arbiter <-> Datram, + // ------------------------------- + logic [DCACHE_SET_ASSOC-1:0] req_ram; + logic [DCACHE_INDEX_WIDTH-1:0] addr_ram; + logic we_ram; + cache_line_t wdata_ram; + cache_line_t [DCACHE_SET_ASSOC-1:0] rdata_ram; + cl_be_t be_ram; + + // ------------------ + // Cache Controller + // ------------------ + generate + for (genvar i = 0; i < 3; i++) begin : master_ports + cache_ctrl #( + .CACHE_START_ADDR ( CACHE_START_ADDR ) + ) i_cache_ctrl ( + .bypass_i ( ~enable_i ), + + .busy_o ( busy [i] ), + + .req_port_i ( req_ports_i [i] ), + .req_port_o ( req_ports_o [i] ), + + + .req_o ( req [i+1] ), + .addr_o ( addr [i+1] ), + .gnt_i ( gnt [i+1] ), + .data_i ( rdata ), + .tag_o ( tag [i+1] ), + .data_o ( wdata [i+1] ), + .we_o ( we [i+1] ), + .be_o ( be [i+1] ), + .hit_way_i ( hit_way ), + + .miss_req_o ( miss_req [i] ), + .miss_gnt_i ( miss_gnt [i] ), + .active_serving_i ( active_serving [i] ), + .critical_word_i ( critical_word ), + .critical_word_valid_i ( critical_word_valid ), + .bypass_gnt_i ( bypass_gnt [i] ), + .bypass_valid_i ( bypass_valid [i] ), + .bypass_data_i ( bypass_data [i] ), + + .mshr_addr_o ( mshr_addr [i] ), // TODO + .mshr_addr_matches_i ( mshr_addr_matches [i] ), // TODO + .mshr_index_matches_i ( mshr_index_matches[i] ), // TODO + .* + ); + end + endgenerate + + // ------------------ + // Miss Handling Unit + // ------------------ + miss_handler #( + .NR_PORTS ( 3 ) + ) i_miss_handler ( + .busy_i ( |busy ), + .miss_req_i ( miss_req ), + .miss_gnt_o ( miss_gnt ), + .bypass_gnt_o ( bypass_gnt ), + .bypass_valid_o ( bypass_valid ), + .bypass_data_o ( bypass_data ), + .critical_word_o ( critical_word ), + .critical_word_valid_o ( critical_word_valid ), + .mshr_addr_i ( mshr_addr ), + .mshr_addr_matches_o ( mshr_addr_matches ), + .mshr_index_matches_o ( mshr_index_matches ), + .active_serving_o ( active_serving ), + .req_o ( req [0] ), + .addr_o ( addr [0] ), + .data_i ( rdata ), + .be_o ( be [0] ), + .data_o ( wdata [0] ), + .we_o ( we [0] ), + .* + ); + + assign tag[0] = '0; + + // -------------- + // Memory Arrays + // -------------- + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block + sram #( + .DATA_WIDTH ( DCACHE_LINE_WIDTH ), + .NUM_WORDS ( DCACHE_NUM_WORDS ) + ) data_sram ( + .req_i ( req_ram [i] ), + .rst_ni ( rst_ni ), + .we_i ( we_ram ), + .addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ), + .wdata_i ( wdata_ram.data ), + .be_i ( be_ram.data ), + .rdata_o ( rdata_ram[i].data ), + .* + ); + + sram #( + .DATA_WIDTH ( DCACHE_TAG_WIDTH ), + .NUM_WORDS ( DCACHE_NUM_WORDS ) + ) tag_sram ( + .req_i ( req_ram [i] ), + .rst_ni ( rst_ni ), + .we_i ( we_ram ), + .addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ), + .wdata_i ( wdata_ram.tag ), + .be_i ( be_ram.tag ), + .rdata_o ( rdata_ram[i].tag ), + .* + ); + + end + + // ---------------- + // Valid/Dirty Regs + // ---------------- + logic [DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata; + + for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin + assign dirty_wdata[i] = wdata_ram.dirty; + assign dirty_wdata[DCACHE_SET_ASSOC + i] = wdata_ram.valid; + assign rdata_ram[i].valid = dirty_rdata[DCACHE_SET_ASSOC + i]; + assign rdata_ram[i].dirty = dirty_rdata[i]; + end + + vdregs #( + .DATA_WIDTH ( DCACHE_DIRTY_WIDTH ), + .DATA_DEPTH ( DCACHE_NUM_WORDS ) + ) i_vdregs ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( |req_ram ), + .we_i ( we_ram ), + .addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ), + .wdata_i ( dirty_wdata ), + .biten_i ( {be_ram.valid, be_ram.dirty} ), + .rdata_o ( dirty_rdata ) + ); + + // ------------------------------------------------ + // Tag Comparison and memory arbitration + // ------------------------------------------------ + tag_cmp #( + .NR_PORTS ( 4 ), + .ADDR_WIDTH ( DCACHE_INDEX_WIDTH ), + .DCACHE_SET_ASSOC ( DCACHE_SET_ASSOC ) + ) i_tag_cmp ( + .req_i ( req ), + .gnt_o ( gnt ), + .addr_i ( addr ), + .wdata_i ( wdata ), + .we_i ( we ), + .be_i ( be ), + .rdata_o ( rdata ), + .tag_i ( tag ), + .hit_way_o ( hit_way ), + + .req_o ( req_ram ), + .addr_o ( addr_ram ), + .wdata_o ( wdata_ram ), + .we_o ( we_ram ), + .be_o ( be_ram ), + .rdata_i ( rdata_ram ), + .* + ); + + +`ifndef SYNTHESIS + initial begin + assert ($bits(data_if.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus"); + assert (DCACHE_LINE_WIDTH/64 inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of 64"); + end +`endif +endmodule + +// -------------- +// Tag Compare +// -------------- +// +// Description: Arbitrates access to cache memories, simplified request grant protocol +// checks for hit or miss on cache +// +module tag_cmp #( + parameter int unsigned NR_PORTS = 3, + parameter int unsigned ADDR_WIDTH = 64, + parameter type data_t = cache_line_t, + parameter type be_t = cl_be_t, + parameter int unsigned DCACHE_SET_ASSOC = 8 + )( + input logic clk_i, + input logic rst_ni, + + input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i, + output logic [NR_PORTS-1:0] gnt_o, + input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i, + input data_t [NR_PORTS-1:0] wdata_i, + input logic [NR_PORTS-1:0] we_i, + input be_t [NR_PORTS-1:0] be_i, + output data_t [DCACHE_SET_ASSOC-1:0] rdata_o, + input logic [NR_PORTS-1:0][DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later + output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way + + + output logic [DCACHE_SET_ASSOC-1:0] req_o, + output logic [ADDR_WIDTH-1:0] addr_o, + output data_t wdata_o, + output logic we_o, + output be_t be_o, + input data_t [DCACHE_SET_ASSOC-1:0] rdata_i + ); + + assign rdata_o = rdata_i; + // one hot encoded + logic [NR_PORTS-1:0] id_d, id_q; + logic [DCACHE_TAG_WIDTH-1:0] sel_tag; + + always_comb begin : tag_sel + sel_tag = '0; + for (int unsigned i = 0; i < NR_PORTS; i++) + if (id_q[i]) + sel_tag = tag_i[i]; + end + + for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp + assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0; + end + + always_comb begin + + gnt_o = '0; + id_d = '0; + wdata_o = '0; + req_o = '0; + addr_o = '0; + be_o = '0; + we_o = '0; + // Request Side + // priority select + for (int unsigned i = 0; i < NR_PORTS; i++) begin + req_o = req_i[i]; + id_d = (1'b1 << i); + gnt_o[i] = 1'b1; + addr_o = addr_i[i]; + be_o = be_i[i]; + we_o = we_i[i]; + wdata_o = wdata_i[i]; + + if (req_i[i]) + break; + end + + `ifndef SYNTHESIS + `ifndef VERILATOR + // assert that cache only hits on one way + assert property ( + @(posedge clk_i) $onehot0(hit_way_o)) else begin $error("Hit should be one-hot encoded"); $stop(); end + `endif + `endif + end + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + id_q <= 0; + end else begin + id_q <= id_d; + end + end*/ + +endmodule diff --git a/src/cache_subsystem/piton_icache.sv b/src/cache_subsystem/piton_icache.sv new file mode 100644 index 000000000..4a64edfb2 --- /dev/null +++ b/src/cache_subsystem/piton_icache.sv @@ -0,0 +1,459 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Florian Zaruba, ETH Zurich +// Date: 12.02.2018 +// ------------------------------ +// Instruction Cache +// ------------------------------ +import ariane_pkg::*; +import piton_cache_pkg::*; + +module piton_icache #( +)( + input logic clk_i, + input logic rst_ni, + + input logic flush_i, // flush the icache, flush and kill have to be asserted together + input logic en_i, // enable icache + output logic miss_o, // to performance counter + // address translation requests + input icache_areq_i_t areq_i, + output icache_areq_o_t areq_o, + // data requests + input icache_dreq_i_t dreq_i, + output icache_dreq_o_t dreq_o, + // refill port + input logic mem_rtrn_vld_i, + input icache_rtrn_t mem_rtrn_i, + output logic mem_data_req_o, + input logic mem_data_ack_i, + output icache_req_t mem_data_o +); + + /*localparam int unsigned ICACHE_BYTE_OFFSET = $clog2(ICACHE_LINE_WIDTH/8); // 3 + localparam int unsigned ICACHE_NUM_WORD = 2**(ICACHE_INDEX_WIDTH - ICACHE_BYTE_OFFSET); + localparam int unsigned NR_AXI_REFILLS = ($clog2(ICACHE_LINE_WIDTH/64) == 0) ? 1 : $clog2(ICACHE_LINE_WIDTH/64); + // registers + enum logic [3:0] { FLUSH, IDLE, TAG_CMP, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP, + REDO_REQ, TAG_CMP_SAVED, REFILL, + WAIT_ADDRESS_TRANSLATION, WAIT_ADDRESS_TRANSLATION_KILLED + } state_d, state_q; + logic [$clog2(ICACHE_NUM_WORD)-1:0] cnt_d, cnt_q; + logic [NR_AXI_REFILLS-1:0] burst_cnt_d, burst_cnt_q; // counter for AXI transfers + logic [63:0] vaddr_d, vaddr_q; + logic [ICACHE_TAG_WIDTH-1:0] tag_d, tag_q; + logic [ICACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q; + logic flushing_d, flushing_q; + + // signals + logic [ICACHE_SET_ASSOC-1:0] req; // request to memory array + logic [(ICACHE_LINE_WIDTH+7)/8-1:0] data_be; // byte enable for data array + logic [(2**NR_AXI_REFILLS-1):0][7:0] be; // byte enable + logic [$clog2(ICACHE_NUM_WORD)-1:0] addr; // this is a cache-line address, to memory array + logic we; // write enable to memory array + logic [ICACHE_SET_ASSOC-1:0] hit; // hit from tag compare + logic [ICACHE_BYTE_OFFSET-1:2] idx; // index in cache line + logic update_lfsr; // shift the LFSR + logic [ICACHE_SET_ASSOC-1:0] random_way; // random way select from LFSR + logic [ICACHE_SET_ASSOC-1:0] way_valid; // bit string which contains the zapped valid bits + logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_invalid; // first non-valid encountered + logic repl_w_random; // we need to switch repl strategy since all are valid + logic [ICACHE_TAG_WIDTH-1:0] tag; // tag to do comparison with + + // tag + valid bit read/write data + struct packed { + logic valid; + logic [ICACHE_TAG_WIDTH-1:0] tag; + } tag_rdata [ICACHE_SET_ASSOC-1:0], tag_wdata; + + logic [ICACHE_LINE_WIDTH-1:0] data_rdata [ICACHE_SET_ASSOC-1:0], data_wdata; + logic [(2**NR_AXI_REFILLS-1):0][63:0] wdata; + + for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : sram_block + // ------------ + // Tag RAM + // ------------ + sram #( + // tag + valid bit + .DATA_WIDTH ( ICACHE_TAG_WIDTH + 1 ), + .NUM_WORDS ( ICACHE_NUM_WORD ) + ) tag_sram ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( req[i] ), + .we_i ( we ), + .addr_i ( addr ), + .wdata_i ( tag_wdata ), + .be_i ( '1 ), + .rdata_o ( tag_rdata[i] ) + ); + // ------------ + // Data RAM + // ------------ + sram #( + .DATA_WIDTH ( ICACHE_LINE_WIDTH ), + .NUM_WORDS ( ICACHE_NUM_WORD ) + ) data_sram ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( req[i] ), + .we_i ( we ), + .addr_i ( addr ), + .wdata_i ( data_wdata ), + .be_i ( data_be ), + .rdata_o ( data_rdata[i] ) + ); + end + // -------------------- + // Tag Comparison + // -------------------- + for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin + assign hit[i] = (tag_rdata[i].tag == tag) ? tag_rdata[i].valid : 1'b0; + end + + `ifndef SYNTHESIS + `ifndef VERILATOR + // assert that cache only hits on one way + assert property ( + @(posedge clk_i) $onehot0(hit)) else begin $error("[icache] Hit should be one-hot encoded"); $stop(); end + `endif + `endif + + // ------------------ + // Way Select + // ------------------ + assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2]; + // cacheline selected by hit + logic [ICACHE_LINE_WIDTH/FETCH_WIDTH-1:0][FETCH_WIDTH-1:0] selected_cl; + logic [ICACHE_LINE_WIDTH-1:0] selected_cl_flat; + + for (genvar i = 0; i < ICACHE_LINE_WIDTH; i++) begin + logic [ICACHE_SET_ASSOC-1:0] hit_masked_cl; + + for (genvar j = 0; j < ICACHE_SET_ASSOC; j++) + assign hit_masked_cl[j] = data_rdata[j][i] & hit[j]; + + assign selected_cl_flat[i] = |hit_masked_cl; + end + + assign selected_cl = selected_cl_flat; + // maybe re-work if critical + assign dreq_o.data = selected_cl[idx]; + + for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin + assign way_valid[i] = tag_rdata[i].valid; + end + + // ------------------ + // AXI Plumbing + // ------------------ + assign axi.aw_valid = '0; + assign axi.aw_addr = '0; + assign axi.aw_prot = '0; + assign axi.aw_region = '0; + assign axi.aw_len = '0; + assign axi.aw_size = 3'b000; + assign axi.aw_burst = 2'b00; + assign axi.aw_lock = '0; + assign axi.aw_cache = '0; + assign axi.aw_qos = '0; + assign axi.aw_id = '0; + assign axi.aw_user = '0; + + assign axi.w_valid = '0; + assign axi.w_data = '0; + assign axi.w_strb = '0; + assign axi.w_user = '0; + assign axi.w_last = 1'b0; + assign axi.b_ready = 1'b0; + + assign axi.ar_prot = '0; + assign axi.ar_region = '0; + assign axi.ar_len = (2**NR_AXI_REFILLS) - 1; + assign axi.ar_size = 3'b011; + assign axi.ar_burst = 2'b01; + assign axi.ar_lock = '0; + assign axi.ar_cache = '0; + assign axi.ar_qos = '0; + assign axi.ar_id = '0; + assign axi.ar_user = '0; + + assign axi.r_ready = 1'b1; + + assign data_be = be; + assign data_wdata = wdata; + + assign dreq_o.ex = areq_i.fetch_exception; + // ------------------ + // Cache Ctrl + // ------------------ + // for bypassing we use the existing infrastructure of the cache + // but on every access we are re-fetching the cache-line + always_comb begin : cache_ctrl + // default assignments + state_d = state_q; + cnt_d = cnt_q; + vaddr_d = vaddr_q; + tag_d = tag_q; + evict_way_d = evict_way_q; + flushing_d = flushing_q; + burst_cnt_d = burst_cnt_q; + + dreq_o.vaddr = vaddr_q; + + req = '0; + addr = dreq_i.vaddr[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET]; + we = 1'b0; + be = '0; + wdata = '0; + tag_wdata = '0; + dreq_o.ready = 1'b0; + tag = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH]; + dreq_o.valid = 1'b0; + update_lfsr = 1'b0; + miss_o = 1'b0; + + axi.ar_valid = 1'b0; + axi.ar_addr = '0; + + areq_o.fetch_req = 1'b0; + areq_o.fetch_vaddr = vaddr_q; + + case (state_q) + // ~> we are ready to receive a new request + IDLE: begin + dreq_o.ready = 1'b1; + // we are getting a new request + if (dreq_i.req) begin + // request the content of all arrays + req = '1; + // save the virtual address + vaddr_d = dreq_i.vaddr; + state_d = TAG_CMP; + end + + // go to flushing state + if (flush_i || flushing_q) + state_d = FLUSH; + + if (dreq_i.kill_s1) + state_d = IDLE; + end + // ~> compare the tag + TAG_CMP, TAG_CMP_SAVED: begin + areq_o.fetch_req = 1'b1; // request address translation + // use the saved tag + if (state_q == TAG_CMP_SAVED) + tag = tag_q; + // ------- + // Hit + // ------- + // disabling the icache just makes it fetch on every request + if (|hit && areq_i.fetch_valid && (en_i || (state_q != TAG_CMP))) begin + dreq_o.ready = 1'b1; + dreq_o.valid = 1'b1; + + // we've got another request + if (dreq_i.req) begin + // request the content of all arrays + req = '1; + // save the index and stay in compare mode + vaddr_d = dreq_i.vaddr; + state_d = TAG_CMP; + // no new request -> go back to idle + end else begin + state_d = IDLE; + end + + if (dreq_i.kill_s1) + state_d = IDLE; + // ------- + // Miss + // ------- + end else begin + state_d = REFILL; + // hit gonna be zero in most cases except for when the cache is disabled + evict_way_d = hit; + // save tag + tag_d = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH]; + miss_o = en_i; + // get way which to replace + // only if there is no hit we should fall back to real replacement. If there was a hit then + // it means we are in bypass mode (!en_i) and should update the cache-line with the most recent + // value fetched from memory. + if (!(|hit)) begin + // all ways are currently full, randomly replace one of them + if (repl_w_random) begin + evict_way_d = random_way; + // shift the lfsr + update_lfsr = 1'b1; + // there is still one cache-line which is not valid ~> replace that one + end else begin + evict_way_d[repl_invalid] = 1'b1; + end + end + end + // if we didn't hit on the TLB we need to wait until the request has been completed + if (!areq_i.fetch_valid) begin + state_d = WAIT_ADDRESS_TRANSLATION; + end + end + // ~> wait here for a valid address translation, or on a translation even if the request has been killed + WAIT_ADDRESS_TRANSLATION, WAIT_ADDRESS_TRANSLATION_KILLED: begin + areq_o.fetch_req = 1'b1; + // retry the request if no exception occurred + if (areq_i.fetch_valid && (state_q == WAIT_ADDRESS_TRANSLATION)) begin + if (areq_i.fetch_exception.valid) begin + dreq_o.valid = 1'b1; + state_d = IDLE; + end else begin + state_d = REDO_REQ; + tag_d = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH]; + end + end else if (areq_i.fetch_valid) begin + state_d = IDLE; + end + + if (dreq_i.kill_s2) + state_d = WAIT_ADDRESS_TRANSLATION_KILLED; + end + // ~> request a cache-line refill + REFILL, WAIT_KILLED_REFILL: begin + axi.ar_valid = 1'b1; + axi.ar_addr[ICACHE_INDEX_WIDTH+ICACHE_TAG_WIDTH-1:0] = {tag_q, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET], {ICACHE_BYTE_OFFSET{1'b0}}}; + burst_cnt_d = '0; + + if (dreq_i.kill_s2) + state_d = WAIT_KILLED_REFILL; + + // we need to finish this AXI transfer + if (axi.ar_ready) + state_d = (dreq_i.kill_s2 || (state_q == WAIT_KILLED_REFILL)) ? WAIT_KILLED_AXI_R_RESP : WAIT_AXI_R_RESP; + end + // ~> wait for the read response + WAIT_AXI_R_RESP, WAIT_KILLED_AXI_R_RESP: begin + + req = evict_way_q; + addr = vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET]; + + if (axi.r_valid) begin + we = 1'b1; + tag_wdata.tag = tag_q; + tag_wdata.valid = 1'b1; + wdata[burst_cnt_q] = axi.r_data; + // enable the right write path + be[burst_cnt_q] = '1; + // increase burst count + burst_cnt_d = burst_cnt_q + 1; + end + + if (dreq_i.kill_s2) + state_d = WAIT_KILLED_AXI_R_RESP; + + if (axi.r_valid && axi.r_last) begin + state_d = (dreq_i.kill_s2) ? IDLE : REDO_REQ; + end + + if ((state_q == WAIT_KILLED_AXI_R_RESP) && axi.r_last && axi.r_valid) + state_d = IDLE; + end + // ~> redo the request, + REDO_REQ: begin + req = '1; + addr = vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET]; + tag = tag_q; + state_d = TAG_CMP_SAVED; // do tag comparison on the saved tag + end + // we need to wait for some AXI responses to come back + // here for the AW valid + WAIT_KILLED_REFILL: begin + if (axi.aw_valid) + state_d = IDLE; + end + // ~> we are coming here after reset or when a flush was requested + FLUSH: begin + addr = cnt_q; + cnt_d = cnt_q + 1; + req = '1; + we = 1; + // we've finished flushing, go back to idle + if (cnt_q == ICACHE_NUM_WORD - 1) begin + state_d = IDLE; + flushing_d = 1'b0; + end + end + + default : state_d = IDLE; + endcase + + // those are the states where we need to wait a little longer until we can safely exit + if (dreq_i.kill_s2 && !(state_q inside {REFILL, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP}) && !dreq_o.ready) begin + state_d = IDLE; + end + + // if we are killing we can never give a valid response + if (dreq_i.kill_s2) + dreq_o.valid = 1'b0; + + if (flush_i) begin + flushing_d = 1'b1; + dreq_o.ready = 1'b0; // we are not ready to accept a further request here + end + // if we are going to flush -> do not accept any new requests + if (flushing_q) + dreq_o.ready = 1'b0; + end + + lzc #( + .WIDTH ( ICACHE_SET_ASSOC ) + ) i_lzc ( + .in_i ( ~way_valid ), + .cnt_o ( repl_invalid ), + .empty_o ( repl_w_random ) + ); + + // ----------------- + // Replacement LFSR + // ----------------- + lfsr #(.WIDTH (ICACHE_SET_ASSOC)) i_lfsr ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .en_i ( update_lfsr ), + .refill_way_oh ( random_way ), + .refill_way_bin ( ) // left open + ); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= FLUSH; + cnt_q <= '0; + vaddr_q <= '0; + tag_q <= '0; + evict_way_q <= '0; + flushing_q <= 1'b0; + burst_cnt_q <= '0;; + end else begin + state_q <= state_d; + cnt_q <= cnt_d; + vaddr_q <= vaddr_d; + tag_q <= tag_d; + evict_way_q <= evict_way_d; + flushing_q <= flushing_d; + burst_cnt_q <= burst_cnt_d; + end + end + + `ifndef SYNTHESIS + initial begin + assert ($bits(axi.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus"); + end + `endif*/ +endmodule diff --git a/src/cache_subsystem/piton_l15_adapter.sv b/src/cache_subsystem/piton_l15_adapter.sv new file mode 100644 index 000000000..ebcc92239 --- /dev/null +++ b/src/cache_subsystem/piton_l15_adapter.sv @@ -0,0 +1,440 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// +// Author: Michael Schaffner (schaffner@iis.ee.ethz.ch), ETH Zurich +// Date: 08.08.2018 +// Description: adapter module to connect the L1D$ and L1I$ to the native +// interface of the OpenPiton L1.5 cache. +// +// A couple of notes: +// +// 1) the L15 has been designed for an OpenSparc T1 core with 2 threads and can serve only +// 1 ld and rd request per thread. Ariane has only one hart, but the LSU can issue several write +// requests to optimize bandwidth. hence, we reuse the threadid field to issue and track multiple +// requests (up to 2 in this case). +// +// 2) the CSM (clumped shared memory = coherence domain restriction in OpenPiton) +// feature is currently not supported by Ariane. +// +// 3) some features like blockinitstore, prefetch, ECC errors are not used (see interface below) +// +// 4) the arbiter can store upt to two outgoing requests per cache. incoming responses are passed +// through one streaming register, and need to be consumed unconditionally by the caches. +// +// 5) The L1.5 protocol is closely related to the CPX bus of openSPARC, see also [1,2] +// +// 6) Note on transaction data and size: if a store packet is less than 64 bits, then +// the field is filled with copies of the data. in case of an interrupt vector, +// an 18bit interrupt vector is expected. +// +// 7) L1I$ refill requests always have precedence over L1D$ requests. +// +// 8) L1I$ fill requests are always complete cache lines at the moment +// +// 9) the adapter converts from little endian (Ariane) to big endian (openpiton), and vice versa. +// +// Refs: [1] OpenSPARC T1 Microarchitecture Specification +// https://www.oracle.com/technetwork/systems/opensparc/t1-01-opensparct1-micro-arch-1538959.html +// [2] OpenPiton Microarchitecture Specification +// https://parallel.princeton.edu/openpiton/docs/micro_arch.pdf +// + +import ariane_pkg::*; +import piton_cache_pkg::*; + +module piton_l15_adapter #( + +) ( + input logic clk_i, + input logic rst_ni, + + // icache + input logic icache_data_req_i, + output logic icache_data_ack_o, + input icache_req_t icache_data_i, + // returning packets must be consumed immediately + output logic icache_rtrn_vld_o, + output icache_rtrn_t icache_rtrn_o, + + + // dcache + input logic dcache_data_req_i, + output logic dcache_data_ack_o, + input dcache_req_t dcache_data_i, + // returning packets must be consumed immediately + output logic dcache_rtrn_vld_o, + output dcache_rtrn_t dcache_rtrn_o, + + // TODO: amops interface + // TODO: interrupt interface + + // L15 + output logic l15_val_o, + input logic l15_ack_i, + input logic l15_header_ack_i, + output l15_req_t l15_data_o, + + input logic l15_val_i, + output logic l15_req_ack_o, + input l15_rtrn_t l15_rtrn_i +); + +// request path +icache_req_t icache_data; +logic icache_data_full, icache_data_empty, icache_data_data, icache_data_push; + +dcache_req_t dcache_data; +logic dcache_data_full, dcache_data_empty, dcache_data_data, dcache_data_push; + +logic [1:0] arb_req; +logic [1:0] arb_ack; +logic [1:0] arb_idx; + +logic header_ack_d, header_ack_q; + +// return path +logic rtrn_fifo_empty, rtrn_fifo_full, rtrn_fifo_pop; +l15_rtrn_t rtrn_fifo_data; + + +/////////////////////////////////////////////////////// +// request path to L15 +/////////////////////////////////////////////////////// + +// relevant l15 signals +// l15_req_t l15_data_o.l15_rqtype; // see below for encoding +// logic l15_data_o.l15_nc; // non-cacheable bit +// logic [2:0] l15_data_o.l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) +// logic [L15_TID_WIDTH-1:0] l15_data_o.l15_threadid; // currently 0 or 1 +// logic l15_data_o.l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment +// logic [L15_WAY_WIDTH-1:0] l15_data_o.l15_l1rplway; // way to replace +// logic [39:0] l15_data_o.l15_address; // physical address +// logic [63:0] l15_data_o.l15_data; // word to write +// logic [63:0] l15_data_o.l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests) +// logic [L15_TLB_CSM_WIDTH-1:0] l15_data_o.l15_csm_data; + + +// need to deassert valid signal when header is acked +// can move on when packed is acked (need to clear header ack) +assign l15_val_o = (|arb_req) & ~header_ack_q; +assign header_ack_d = (l15_ack_i) ? 1'b0 : (header_ack_q | l15_header_ack_i); + +assign arb_req = {~dcache_data_empty, + ~icache_data_empty}; + +assign dcache_data_pop = arb_ack[1]; +assign icache_data_pop = arb_ack[0]; + +assign icache_data_ack_o = icache_data_req_i & ~ icache_data_full; +assign dcache_data_ack_o = dcache_data_req_i & ~ dcache_data_full; + +// data mux +assign l15_data_o.l15_nc = (arb_idx) ? dcache_data.nc : icache_data.nc; +assign l15_data_o.l15_size = (arb_idx) ? dcache_data.size : 3'b111;// always request full cache line for icache +assign l15_data_o.l15_threadid = (arb_idx) ? dcache_data.tid : icache_data.tid; +assign l15_data_o.l15_invalidate_cacheline = 1'b0; // unused by Ariane as L1 has no ECC at the moment +assign l15_data_o.l15_l1rplway = (arb_idx) ? dcache_data.way : icache_data.way; +assign l15_data_o.l15_address = (arb_idx) ? dcache_data.paddr : icache_data.paddr; +assign l15_data_o.l15_data_next_entry = 1'b0; // unused in Ariane (only used for CAS atomic requests) +assign l15_data_o.l15_csm_data = 1'b0; // unused in Ariane (only used for coherence domain restriction features) + +// swap endianess and replicate datawords if necessary +always_comb begin : p_datarepl + unique case(dcache_data.size) + 3'b000: begin // 1byte + l15_data_o.l15_data = swendian64({dcache_data.data[0], + dcache_data.data[0], + dcache_data.data[0], + dcache_data.data[0], + dcache_data.data[0], + dcache_data.data[0], + dcache_data.data[0], + dcache_data.data[0]}); + end + 3'b001: begin // 2byte + l15_data_o.l15_data = swendian64({dcache_data.data[1:0], + dcache_data.data[1:0], + dcache_data.data[1:0], + dcache_data.data[1:0]}); + end + 3'b010: begin // 4byte + l15_data_o.l15_data = swendian64({dcache_data.data[3:0], + dcache_data.data[3:0]}); + end + default: begin // 8 byte + l15_data_o.l15_data = swendian64(dcache_data.data); + end + endcase // dcache_data.size +end + +// arbiter +// ifills always have priority +always_comb begin : p_arb + arb_idx = '0; + arb_ack = '0; + if(arb_req[0] & l15_ack_i) begin + arb_ack[0] = 1'b1; + arb_idx = 0; + end else if (arb_req[1] & l15_ack_i) begin + arb_ack[1] = 1'b1; + arb_idx = 1; + end +end // p_arb + +// encode packet type +always_comb begin : p_req + l15_data_o.l15_rqtype = LOAD_RQ; + + unique case (arb_idx) + 0: begin// icache + l15_data_o.l15_rqtype = IMISS_RQ; + end + 1: begin + unique case (dcache_data.rtype) + DCACHE_STORE_REQ: begin + l15_data_o.l15_rqtype = STORE_RQ; + end + DCACHE_LOAD_REQ: begin + l15_data_o.l15_rqtype = LOAD_RQ; + end + // DCACHE_ATOMIC_REQ: begin + // //TODO + // end + // DCACHE_INT_REQ: begin + // //TODO + // end + // TODO: atomics + // CAS1_RQ + // CAS2_RQ + // SWAP_RQ + // TODO: interrupt request + // INT_RQ + default: begin + ; + end + endcase // dcache_data.rtype + end + default: begin + ; + end + endcase +end // p_req + + +always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs + if(~rst_ni) begin + header_ack_q <= 0; + end else begin + header_ack_q <= header_ack_d; + end +end + + +fifo_v2 #( + .dtype ( icache_req_t ), + .DEPTH ( ADAPTER_REQ_FIFO_DEPTH ) +) i_icache_data_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .full_o ( icache_data_full ), + .empty_o ( icache_data_empty ), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i ( icache_data_i ), + .push_i ( icache_data_push ), + .data_o ( icache_data ), + .pop_i ( icache_data_pop ) +); + +fifo_v2 #( + .dtype ( dcache_req_t ), + .DEPTH ( ADAPTER_REQ_FIFO_DEPTH ) +) i_dcache_data_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .full_o ( dcache_data_full ), + .empty_o ( dcache_data_empty ), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i ( dcache_data_i ), + .push_i ( dcache_data_push ), + .data_o ( dcache_data ), + .pop_i ( dcache_data_pop ) +); + +/////////////////////////////////////////////////////// +// return path from L15 +/////////////////////////////////////////////////////// + +// relevant l15 signals +// l15_rtrn_i.l15_returntype; // see below for encoding +// l15_rtrn_i.l15_noncacheable; // non-cacheable bit +// l15_rtrn_i.l15_atomic; // asserted in load return and store ack pack +// l15_rtrn_i.l15_threadid; // used as transaction ID +// l15_rtrn_i.l15_f4b; // 4byte instruction fill. not used in Ariane +// l15_rtrn_i.l15_data_0; // used for both caches +// l15_rtrn_i.l15_data_1; // used for both caches +// l15_rtrn_i.l15_data_2; // currently only used for I$ +// l15_rtrn_i.l15_data_3; // currently only used for I$ +// l15_rtrn_i.l15_inval_icache_all_way; // invalidate all ways +// l15_rtrn_i.l15_inval_address_15_4; // invalidate selected cacheline +// l15_rtrn_i.l15_inval_dcache_inval; // invalidate selected cacheline and way +// l15_rtrn_i.l15_inval_way; // way to invalidate + +// acknowledge if we have space to hold this packet +assign l15_req_ack_o = l15_val_i & ~rtrn_fifo_full; +// packets have to be consumed immediately +assign rtrn_fifo_pop = ~rtrn_fifo_empty; + +// decode packet type +always_comb begin : p_rtrn_logic + icache_rtrn_o.rtype = ICACHE_IFILL_ACK; + dcache_rtrn_o.rtype = DCACHE_LOAD_ACK; + icache_rtrn_vld_o = 1'b0; + dcache_rtrn_vld_o = 1'b0; + if(~rtrn_fifo_empty) begin + unique case (rtrn_fifo_data.l15_returntype) + LOAD_RET: begin + dcache_rtrn_o.rtype = DCACHE_LOAD_ACK; + dcache_rtrn_vld_o = 1'b1; + end + ST_ACK: begin + dcache_rtrn_o.rtype = DCACHE_STORE_ACK; + dcache_rtrn_vld_o = 1'b1; + end + // INT_RET: begin + // TODO: implement this + // dcache_rtrn_o.reqType = DCACHE_INT_ACK; + // end + IFILL_RET: begin + icache_rtrn_o.rtype = ICACHE_IFILL_ACK; + icache_rtrn_vld_o = 1'b1; + end + EVICT_REQ: begin + icache_rtrn_o.rtype = ICACHE_INV_REQ; + dcache_rtrn_o.rtype = DCACHE_INV_REQ; + icache_rtrn_vld_o = 1'b1; + dcache_rtrn_vld_o = 1'b1; + end + // CPX_RESTYPE_ATOMIC_RES: begin + // TODO: implement this + // dcache_rtrn_o.reqType = DCACHE_INT_ACK; + // end + default: begin + ; + end + endcase // rtrn_fifo_data.l15_returntype + end +end + + +// icache fifo signal mapping +// swap endianess here since openpiton is big endian +assign icache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_3), + swendian64(rtrn_fifo_data.l15_data_2), + swendian64(rtrn_fifo_data.l15_data_1), + swendian64(rtrn_fifo_data.l15_data_0) }; +assign icache_rtrn_o.tid = rtrn_fifo_data.l15_threadid; +assign icache_rtrn_o.nc = rtrn_fifo_data.l15_noncacheable; + + +// dcache fifo signal mapping +assign dcache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_1), + swendian64(rtrn_fifo_data.l15_data_0) }; + +assign dcache_rtrn_o.tid = rtrn_fifo_data.l15_threadid; +assign dcache_rtrn_o.nc = rtrn_fifo_data.l15_noncacheable; + + +// invalidation signal mapping +assign icache_rtrn_o.inv.addr = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};; +assign icache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way; +assign icache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_icache_inval; +assign icache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_icache_all_way; + +assign dcache_rtrn_o.inv.addr = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};; +assign dcache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way; +assign dcache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_dcache_inval; +assign dcache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_dcache_all_way; + +fifo_v2 #( + .dtype ( l15_rtrn_t ), + .DEPTH ( ADAPTER_RTRN_FIFO_DEPTH ) +) i_rtrn_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .flush_i ( 1'b0 ), + .testmode_i ( 1'b0 ), + .full_o ( rtrn_fifo_full ), + .empty_o ( rtrn_fifo_empty ), + .alm_full_o ( ), + .alm_empty_o ( ), + .data_i ( l15_rtrn_i ), + .push_i ( l15_req_ack_o ), + .data_o ( rtrn_fifo_data ), + .pop_i ( rtrn_fifo_pop ) +); + + +/////////////////////////////////////////////////////// +// assertions +/////////////////////////////////////////////////////// + +//pragma translate_off +`ifndef VERILATOR + invalidations: assert property ( + @(posedge clk_i) disable iff (~rst_ni) l15_val_i |-> l15_rtrn_i.l15_returntype == EVICT_REQ |-> (inv_in.inv | inv_in.all)) + else $fatal("[l15_adapter] got invalidation package with zero invalidation flags"); + + blockstore_o: assert property ( + @(posedge clk_i) disable iff (~rst_ni) l15_val_o|-> !l15_data_o.l15_blockstore) + else $fatal("[l15_adapter] blockstores are not supported"); + + blockstore_i: assert property ( + @(posedge clk_i) disable iff (~rst_ni) l15_val_i|-> !l15_rtrn_i.l15_blockinitstore) + else $fatal("[l15_adapter] blockstores are not supported"); + + instr_fill_size: assert property ( + @(posedge clk_i) disable iff (~rst_ni) (!l15_rtrn_i.l15_f4b)) + else $fatal("[l15_adapter] 4b instruction fills not supported"); + + unsuported_rtrn_types: assert property ( + @(posedge clk_i) disable iff (~rst_ni) (l15_val_i |-> l15_rtrn_i.l15_returntype inside {LOAD_RET, ST_ACK, IFILL_RET, EVICT_REQ})) + else $fatal("[l15_adapter] unsupported rtrn type"); + + + initial begin + // assert wrong parameterizations + assert (L15_SET_ASSOC == ICACHE_SET_ASSOC) + else $fatal("[l15_adapter] number of icache ways not aligned with L15"); + // assert wrong parameterizations + assert (L15_SET_ASSOC == DCACHE_SET_ASSOC) + else $fatal("[l15_adapter] number of dcache ways not aligned with L15"); + // invalidation address returned by L1.5 is 16 bit + assert (16 >= $max(ICACHE_INDEX_WIDTH, DCACHE_INDEX_WIDTH)) + else $fatal("[l15_adapter] maximum number of index bits supported by L1.5 is 16"); + // assert mismatch of cache line width + assert (ICACHE_LINE_WIDTH==256) + else $fatal("[l15_adapter] ichache lines are currently restricted to 256 bits"); + assert (DCACHE_LINE_WIDTH==128) + else $fatal("[l15_adapter] dchache lines are currently restricted to 128 bits"); + end +`endif +//pragma translate_on + +endmodule // piton_l15_adapter \ No newline at end of file