implement l15 adapter

This commit is contained in:
Michael Schaffner 2018-08-31 12:55:59 +02:00
parent 551b400980
commit 5b29462bc4
6 changed files with 1579 additions and 0 deletions

View file

@ -23,6 +23,7 @@ ariane_pkg := include/riscv_pkg.sv \
src/debug/dm_pkg.sv \
include/ariane_pkg.sv \
include/std_cache_pkg.sv \
include/piton_cache_pkg.sv \
include/axi_if.sv
# utility modules

184
include/piton_cache_pkg.sv Normal file
View file

@ -0,0 +1,184 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Package for OpenPiton compatible L1 cache subsystem
package piton_cache_pkg;
localparam L15_SET_ASSOC = 4;
// these parames need to coincide with the current L1.5 parameterization
// do not change
localparam L15_TID_WIDTH = 2;
localparam L15_TLB_CSM_WIDTH = 33;
localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC);
localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC);
localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC);
// FIFO depths of L15 adapter
localparam ADAPTER_REQ_FIFO_DEPTH = 2;
// since packets have to be consumed immediately,
// we need not have a deeper FIFO
localparam ADAPTER_RTRN_FIFO_DEPTH = 1;
// local interfaces between caches and L15 adapter
typedef enum logic [1:0] { DCACHE_STORE_REQ,
DCACHE_LOAD_REQ,
DCACHE_ATOMIC_REQ,
DCACHE_INT_REQ } dcache_out_t;
typedef enum logic [2:0] { DCACHE_INV_REQ, // no ack from the core required
DCACHE_STORE_ACK,// note: this may contain an invalidation vector, too
DCACHE_LOAD_ACK,
DCACHE_ATOMIC_ACK,
DCACHE_INT_ACK } dcache_in_t;
typedef enum logic [0:0] { ICACHE_INV_REQ, // no ack from the core required
ICACHE_IFILL_ACK} icache_in_t;
typedef struct packed {
logic vld; // invalidate only affected way
logic all; // invalidate all ways
logic [ariane_pkg::ICACHE_INDEX_WIDTH-1:0] addr; // physical address to invalidate
logic [L15_WAY_WIDTH-1:0] way; // way to invalidate
} cache_inval_t;
// icache interface
typedef struct packed {
logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way; // way to replace
logic [63:0] paddr; // physical address
logic nc; // noncacheable
logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} icache_req_t;
typedef struct packed {
icache_in_t rtype; // see definitions above
logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] data; // full cache line width
cache_inval_t inv; // invalidation vector
logic nc; // noncacheable
logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} icache_rtrn_t;
// dcache interface
typedef struct packed {
dcache_out_t rtype; // see definitions above
logic [2:0] size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
logic [L1D_WAY_WIDTH-1:0] way; // way to replace
logic [63:0] paddr; // physical address
logic [63:0] data; // word width of processor (no block stores at the moment)
logic nc; // noncacheable
logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} dcache_req_t;
typedef struct packed {
dcache_in_t rtype; // see definitions above
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // full cache line width
cache_inval_t inv; // invalidation vector
logic nc; // noncacheable
logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} dcache_rtrn_t;
// taken from iop.h in openpiton
// this is a work around, need to include files properly
// to l1.5 (only marked subset is used)
typedef enum logic [4:0] {LOAD_RQ = 5'b00000, // load request
IMISS_RQ = 5'b10000, // instruction fill request
STORE_RQ = 5'b00001, // store request
CAS1_RQ = 5'b00010, // compare and swap1 packet (OpenSparc atomics)
CAS2_RQ = 5'b00011, // compare and swap2 packet (OpenSparc atomics)
SWAP_RQ = 5'b00110, // swap packet (OpenSparc atomics)
STRLOAD_RQ = 5'b00100, // unused
STRST_RQ = 5'b00101, // unused
STQ_RQ = 5'b00111, // unused
INT_RQ = 5'b01001, // interrupt request
FWD_RQ = 5'b01101, // unused
FWD_RPY = 5'b01110, // unused
RSVD_RQ = 5'b11111 // unused
} l15_reqtypes_t;
// from l1.5 (only marked subset is used)
typedef enum logic [3:0] {LOAD_RET = 4'b0000, // load packet
// INV_RET = 4'b0011, // invalidate packet, not unique...
ST_ACK = 4'b0100, // store ack packet
//AT_ACK = 4'b0011, // unused, not unique...
INT_RET = 4'b0111, // interrupt packet
TEST_RET = 4'b0101, // unused
FP_RET = 4'b1000, // unused
IFILL_RET = 4'b0001, // instruction fill packet
EVICT_REQ = 4'b0011, // eviction request
ERR_RET = 4'b1100, // unused
STRLOAD_RET = 4'b0010, // unused
STRST_ACK = 4'b0110, // unused
FWD_RQ_RET = 4'b1010, // unused
FWD_RPY_RET = 4'b1011, // unused
RSVD_RET = 4'b1111, // unused
CPX_RESTYPE_ATOMIC_RES = 4'b1110 // custom type for atomic responses
} l15_rtrntypes_t;
// l15 interface uses reg for compatibility with verilog
typedef struct packed {
l15_reqtypes_t l15_rqtype; // see below for encoding
logic l15_nc; // non-cacheable bit
logic [2:0] l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
logic [L15_TID_WIDTH-1:0] l15_threadid; // currently 0 or 1
logic l15_prefetch; // unused in openpiton
logic l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment
logic l15_blockstore; // unused in openpiton
logic l15_blockinitstore; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_l1rplway; // way to replace
logic [39:0] l15_address; // physical address
logic [63:0] l15_data; // word to write
logic [63:0] l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests)
logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data; // unused in Ariane
} l15_req_t;
typedef struct packed {
l15_rtrntypes_t l15_returntype; // see below for encoding
logic l15_l2miss; // unused in Ariane
logic [1:0] l15_error; // unused in openpiton
logic l15_noncacheable; // non-cacheable bit
logic l15_atomic; // asserted in load return and store ack packets of atomic tx
logic [L15_TID_WIDTH-1:0] l15_threadid; // used as transaction ID
logic l15_prefetch; // unused in openpiton
logic l15_f4b; // 4byte instruction fill. not used in Ariane (always requests a full cache line).
logic [63:0] l15_data_0; // used for both caches
logic [63:0] l15_data_1; // used for both caches
logic [63:0] l15_data_2; // currently only used for I$
logic [63:0] l15_data_3; // currently only used for I$
logic l15_inval_icache_all_way; // invalidate all ways
logic l15_inval_dcache_all_way; // unused in openpiton
logic [15:4] l15_inval_address_15_4; // invalidate selected cacheline
logic l15_cross_invalidate; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_cross_invalidate_way; // unused in openpiton
logic l15_inval_dcache_inval; // invalidate selected cacheline and way
logic l15_inval_icache_inval; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_inval_way; // way to invalidate
logic l15_blockinitstore; // unused in openpiton
} l15_rtrn_t;
function automatic logic[63:0] swendian64(input logic[63:0] in);
automatic logic[63:0] out;
for(int k=0; k<64;k+=8)begin
out[k +: 8] = in[63-k -: 8];
end
return out;
endfunction
endpackage : piton_cache_pkg

View file

@ -0,0 +1,146 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
// Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Ariane cache subsystem that is compatible with the OpenPiton
// coherent memory system.
import ariane_pkg::*;
import piton_cache_pkg::*;
module piton_cache_subsystem #(
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000
)(
input logic clk_i,
input logic rst_ni,
// I$
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
// address translation requests
input icache_areq_i_t icache_areq_i, // to/from frontend
output icache_areq_o_t icache_areq_o,
// data requests
input icache_dreq_i_t icache_dreq_i, // to/from frontend
output icache_dreq_o_t icache_dreq_o,
// D$
// Cache management
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
// AMO interface (not functional yet)
input logic dcache_amo_commit_i, // commit atomic memory operation
output logic dcache_amo_valid_o, // we have a valid AMO result
output logic [63:0] dcache_amo_result_o, // result of atomic memory operation
input logic dcache_amo_flush_i, // forget about AMO
// Request ports
input dcache_req_i_t [2:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [2:0] dcache_req_ports_o, // to/from LSU
// L15 (memory side)
output logic l15_val_o,
input logic l15_ack_i,
input logic l15_header_ack_i,
output l15_req_t l15_data_o,
input logic l15_val_i,
output logic l15_req_ack_o,
input l15_rtrn_t l15_rtrn_i
// TODO: interrupt interface
);
logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld_o;
icache_req_t icache_adapter;
icache_rtrn_t adapter_icache;
logic dcache_adapter_data_req, adapter_dcache_data_ack, adapter_dcache_rtrn_vld;
dcache_req_t dcache_adapter;
dcache_rtrn_t adapter_dcache;
piton_icache #(
) i_piton_icache (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( icache_flush_i ),
.en_i ( icache_en_i ),
.miss_o ( icache_miss_o ),
.areq_i ( icache_areq_i ),
.areq_o ( icache_areq_o ),
.dreq_i ( icache_dreq_i ),
.dreq_o ( icache_dreq_o ),
.mem_rtrn_vld_i ( adapter_icache_rtrn_vld ),
.mem_rtrn_i ( adapter_icache ),
.mem_data_req_o ( icache_adapter_data_req ),
.mem_data_ack_i ( adapter_icache_data_ack ),
.mem_data_o ( icache_adapter )
);
// decreasing priority
// Port 0: PTW
// Port 1: Load Unit
// Port 2: Store Unit
piton_dcache #(
.CACHE_START_ADDR ( CACHE_START_ADDR )
) i_piton_dcache (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.enable_i ( dcache_enable_i ),
.flush_i ( dcache_flush_i ),
.flush_ack_o ( dcache_flush_ack_o ),
.miss_o ( dcache_miss_o ),
.amo_commit_i ( dcache_amo_commit_i ),
.amo_valid_o ( dcache_amo_valid_o ),
.amo_result_o ( dcache_amo_result_o ),
.amo_flush_i ( dcache_amo_flush_i ),
.req_ports_i ( dcache_req_ports_i ),
.req_ports_o ( dcache_req_ports_o ),
.mem_rtrn_vld_i ( adapter_dcache_rtrn_vld ),
.mem_rtrn_i ( adapter_dcache ),
.mem_data_req_o ( dcache_adapter_data_req ),
.mem_data_ack_i ( adapter_dcache_data_ack ),
.mem_data_o ( dcache_adapter )
);
piton_l15_adapter #(
) i_adapter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.icache_data_req_i ( icache_adapter_data_req ),
.icache_data_ack_o ( adapter_icache_data_ack ),
.icache_data_i ( icache_adapter ),
.icache_rtrn_vld_o ( adapter_icache_rtrn_vld ),
.icache_rtrn_o ( adapter_icache ),
.dcache_data_req_i ( dcache_adapter_data_req ),
.dcache_data_ack_o ( adapter_dcache_data_ack ),
.dcache_data_i ( dcache_adapter ),
.dcache_rtrn_vld_o ( adapter_dcache_rtrn_vld ),
.dcache_rtrn_o ( adapter_dcache ),
.l15_val_o ( l15_val_o ),
.l15_ack_i ( l15_ack_i ),
.l15_header_ack_i ( l15_header_ack_i ),
.l15_data_o ( l15_port_o ),
.l15_val_i ( l15_val_i ),
.l15_req_ack_o ( l15_req_ack_o ),
.l15_rtrn_i ( l15_port_i )
);
endmodule // piton_cache_subsystem

View file

@ -0,0 +1,349 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 13.10.2017
// Description: Nonblocking private L1 dcache
import ariane_pkg::*;
import piton_cache_pkg::*;
module piton_dcache #(
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// Cache management
input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
// AMO interface
input logic amo_commit_i, // commit atomic memory operation
output logic amo_valid_o, // we have a valid AMO result
output logic [63:0] amo_result_o, // result of atomic memory operation
input logic amo_flush_i, // forget about AMO
// Request ports
input dcache_req_i_t [2:0] req_ports_i, // request ports
output dcache_req_o_t [2:0] req_ports_o, // request ports
input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output dcache_req_t mem_data_o
);
/*
// -------------------------------
// Controller <-> Arbiter
// -------------------------------
// 1. Miss handler
// 2. PTW
// 3. Load Unit
// 4. Store unit
logic [3:0][DCACHE_SET_ASSOC-1:0] req;
logic [3:0][DCACHE_INDEX_WIDTH-1:0]addr;
logic [3:0] gnt;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata;
logic [3:0][DCACHE_TAG_WIDTH-1:0] tag;
cache_line_t [3:0] wdata;
logic [3:0] we;
cl_be_t [3:0] be;
logic [DCACHE_SET_ASSOC-1:0] hit_way;
// -------------------------------
// Controller <-> Miss unit
// -------------------------------
logic [2:0] busy;
logic [2:0][55:0] mshr_addr;
logic [2:0] mshr_addr_matches;
logic [2:0] mshr_index_matches;
logic [63:0] critical_word;
logic critical_word_valid;
logic [2:0][$bits(miss_req_t)-1:0] miss_req;
logic [2:0] miss_gnt;
logic [2:0] active_serving;
logic [2:0] bypass_gnt;
logic [2:0] bypass_valid;
logic [2:0][63:0] bypass_data;
// -------------------------------
// Arbiter <-> Datram,
// -------------------------------
logic [DCACHE_SET_ASSOC-1:0] req_ram;
logic [DCACHE_INDEX_WIDTH-1:0] addr_ram;
logic we_ram;
cache_line_t wdata_ram;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram;
// ------------------
// Cache Controller
// ------------------
generate
for (genvar i = 0; i < 3; i++) begin : master_ports
cache_ctrl #(
.CACHE_START_ADDR ( CACHE_START_ADDR )
) i_cache_ctrl (
.bypass_i ( ~enable_i ),
.busy_o ( busy [i] ),
.req_port_i ( req_ports_i [i] ),
.req_port_o ( req_ports_o [i] ),
.req_o ( req [i+1] ),
.addr_o ( addr [i+1] ),
.gnt_i ( gnt [i+1] ),
.data_i ( rdata ),
.tag_o ( tag [i+1] ),
.data_o ( wdata [i+1] ),
.we_o ( we [i+1] ),
.be_o ( be [i+1] ),
.hit_way_i ( hit_way ),
.miss_req_o ( miss_req [i] ),
.miss_gnt_i ( miss_gnt [i] ),
.active_serving_i ( active_serving [i] ),
.critical_word_i ( critical_word ),
.critical_word_valid_i ( critical_word_valid ),
.bypass_gnt_i ( bypass_gnt [i] ),
.bypass_valid_i ( bypass_valid [i] ),
.bypass_data_i ( bypass_data [i] ),
.mshr_addr_o ( mshr_addr [i] ), // TODO
.mshr_addr_matches_i ( mshr_addr_matches [i] ), // TODO
.mshr_index_matches_i ( mshr_index_matches[i] ), // TODO
.*
);
end
endgenerate
// ------------------
// Miss Handling Unit
// ------------------
miss_handler #(
.NR_PORTS ( 3 )
) i_miss_handler (
.busy_i ( |busy ),
.miss_req_i ( miss_req ),
.miss_gnt_o ( miss_gnt ),
.bypass_gnt_o ( bypass_gnt ),
.bypass_valid_o ( bypass_valid ),
.bypass_data_o ( bypass_data ),
.critical_word_o ( critical_word ),
.critical_word_valid_o ( critical_word_valid ),
.mshr_addr_i ( mshr_addr ),
.mshr_addr_matches_o ( mshr_addr_matches ),
.mshr_index_matches_o ( mshr_index_matches ),
.active_serving_o ( active_serving ),
.req_o ( req [0] ),
.addr_o ( addr [0] ),
.data_i ( rdata ),
.be_o ( be [0] ),
.data_o ( wdata [0] ),
.we_o ( we [0] ),
.*
);
assign tag[0] = '0;
// --------------
// Memory Arrays
// --------------
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block
sram #(
.DATA_WIDTH ( DCACHE_LINE_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) data_sram (
.req_i ( req_ram [i] ),
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wdata_i ( wdata_ram.data ),
.be_i ( be_ram.data ),
.rdata_o ( rdata_ram[i].data ),
.*
);
sram #(
.DATA_WIDTH ( DCACHE_TAG_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) tag_sram (
.req_i ( req_ram [i] ),
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wdata_i ( wdata_ram.tag ),
.be_i ( be_ram.tag ),
.rdata_o ( rdata_ram[i].tag ),
.*
);
end
// ----------------
// Valid/Dirty Regs
// ----------------
logic [DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign dirty_wdata[i] = wdata_ram.dirty;
assign dirty_wdata[DCACHE_SET_ASSOC + i] = wdata_ram.valid;
assign rdata_ram[i].valid = dirty_rdata[DCACHE_SET_ASSOC + i];
assign rdata_ram[i].dirty = dirty_rdata[i];
end
vdregs #(
.DATA_WIDTH ( DCACHE_DIRTY_WIDTH ),
.DATA_DEPTH ( DCACHE_NUM_WORDS )
) i_vdregs (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( |req_ram ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wdata_i ( dirty_wdata ),
.biten_i ( {be_ram.valid, be_ram.dirty} ),
.rdata_o ( dirty_rdata )
);
// ------------------------------------------------
// Tag Comparison and memory arbitration
// ------------------------------------------------
tag_cmp #(
.NR_PORTS ( 4 ),
.ADDR_WIDTH ( DCACHE_INDEX_WIDTH ),
.DCACHE_SET_ASSOC ( DCACHE_SET_ASSOC )
) i_tag_cmp (
.req_i ( req ),
.gnt_o ( gnt ),
.addr_i ( addr ),
.wdata_i ( wdata ),
.we_i ( we ),
.be_i ( be ),
.rdata_o ( rdata ),
.tag_i ( tag ),
.hit_way_o ( hit_way ),
.req_o ( req_ram ),
.addr_o ( addr_ram ),
.wdata_o ( wdata_ram ),
.we_o ( we_ram ),
.be_o ( be_ram ),
.rdata_i ( rdata_ram ),
.*
);
`ifndef SYNTHESIS
initial begin
assert ($bits(data_if.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus");
assert (DCACHE_LINE_WIDTH/64 inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of 64");
end
`endif
endmodule
// --------------
// Tag Compare
// --------------
//
// Description: Arbitrates access to cache memories, simplified request grant protocol
// checks for hit or miss on cache
//
module tag_cmp #(
parameter int unsigned NR_PORTS = 3,
parameter int unsigned ADDR_WIDTH = 64,
parameter type data_t = cache_line_t,
parameter type be_t = cl_be_t,
parameter int unsigned DCACHE_SET_ASSOC = 8
)(
input logic clk_i,
input logic rst_ni,
input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i,
output logic [NR_PORTS-1:0] gnt_o,
input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i,
input data_t [NR_PORTS-1:0] wdata_i,
input logic [NR_PORTS-1:0] we_i,
input be_t [NR_PORTS-1:0] be_i,
output data_t [DCACHE_SET_ASSOC-1:0] rdata_o,
input logic [NR_PORTS-1:0][DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way
output logic [DCACHE_SET_ASSOC-1:0] req_o,
output logic [ADDR_WIDTH-1:0] addr_o,
output data_t wdata_o,
output logic we_o,
output be_t be_o,
input data_t [DCACHE_SET_ASSOC-1:0] rdata_i
);
assign rdata_o = rdata_i;
// one hot encoded
logic [NR_PORTS-1:0] id_d, id_q;
logic [DCACHE_TAG_WIDTH-1:0] sel_tag;
always_comb begin : tag_sel
sel_tag = '0;
for (int unsigned i = 0; i < NR_PORTS; i++)
if (id_q[i])
sel_tag = tag_i[i];
end
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
end
always_comb begin
gnt_o = '0;
id_d = '0;
wdata_o = '0;
req_o = '0;
addr_o = '0;
be_o = '0;
we_o = '0;
// Request Side
// priority select
for (int unsigned i = 0; i < NR_PORTS; i++) begin
req_o = req_i[i];
id_d = (1'b1 << i);
gnt_o[i] = 1'b1;
addr_o = addr_i[i];
be_o = be_i[i];
we_o = we_i[i];
wdata_o = wdata_i[i];
if (req_i[i])
break;
end
`ifndef SYNTHESIS
`ifndef VERILATOR
// assert that cache only hits on one way
assert property (
@(posedge clk_i) $onehot0(hit_way_o)) else begin $error("Hit should be one-hot encoded"); $stop(); end
`endif
`endif
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
id_q <= 0;
end else begin
id_q <= id_d;
end
end*/
endmodule

View file

@ -0,0 +1,459 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 12.02.2018
// ------------------------------
// Instruction Cache
// ------------------------------
import ariane_pkg::*;
import piton_cache_pkg::*;
module piton_icache #(
)(
input logic clk_i,
input logic rst_ni,
input logic flush_i, // flush the icache, flush and kill have to be asserted together
input logic en_i, // enable icache
output logic miss_o, // to performance counter
// address translation requests
input icache_areq_i_t areq_i,
output icache_areq_o_t areq_o,
// data requests
input icache_dreq_i_t dreq_i,
output icache_dreq_o_t dreq_o,
// refill port
input logic mem_rtrn_vld_i,
input icache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output icache_req_t mem_data_o
);
/*localparam int unsigned ICACHE_BYTE_OFFSET = $clog2(ICACHE_LINE_WIDTH/8); // 3
localparam int unsigned ICACHE_NUM_WORD = 2**(ICACHE_INDEX_WIDTH - ICACHE_BYTE_OFFSET);
localparam int unsigned NR_AXI_REFILLS = ($clog2(ICACHE_LINE_WIDTH/64) == 0) ? 1 : $clog2(ICACHE_LINE_WIDTH/64);
// registers
enum logic [3:0] { FLUSH, IDLE, TAG_CMP, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP,
REDO_REQ, TAG_CMP_SAVED, REFILL,
WAIT_ADDRESS_TRANSLATION, WAIT_ADDRESS_TRANSLATION_KILLED
} state_d, state_q;
logic [$clog2(ICACHE_NUM_WORD)-1:0] cnt_d, cnt_q;
logic [NR_AXI_REFILLS-1:0] burst_cnt_d, burst_cnt_q; // counter for AXI transfers
logic [63:0] vaddr_d, vaddr_q;
logic [ICACHE_TAG_WIDTH-1:0] tag_d, tag_q;
logic [ICACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q;
logic flushing_d, flushing_q;
// signals
logic [ICACHE_SET_ASSOC-1:0] req; // request to memory array
logic [(ICACHE_LINE_WIDTH+7)/8-1:0] data_be; // byte enable for data array
logic [(2**NR_AXI_REFILLS-1):0][7:0] be; // byte enable
logic [$clog2(ICACHE_NUM_WORD)-1:0] addr; // this is a cache-line address, to memory array
logic we; // write enable to memory array
logic [ICACHE_SET_ASSOC-1:0] hit; // hit from tag compare
logic [ICACHE_BYTE_OFFSET-1:2] idx; // index in cache line
logic update_lfsr; // shift the LFSR
logic [ICACHE_SET_ASSOC-1:0] random_way; // random way select from LFSR
logic [ICACHE_SET_ASSOC-1:0] way_valid; // bit string which contains the zapped valid bits
logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_invalid; // first non-valid encountered
logic repl_w_random; // we need to switch repl strategy since all are valid
logic [ICACHE_TAG_WIDTH-1:0] tag; // tag to do comparison with
// tag + valid bit read/write data
struct packed {
logic valid;
logic [ICACHE_TAG_WIDTH-1:0] tag;
} tag_rdata [ICACHE_SET_ASSOC-1:0], tag_wdata;
logic [ICACHE_LINE_WIDTH-1:0] data_rdata [ICACHE_SET_ASSOC-1:0], data_wdata;
logic [(2**NR_AXI_REFILLS-1):0][63:0] wdata;
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : sram_block
// ------------
// Tag RAM
// ------------
sram #(
// tag + valid bit
.DATA_WIDTH ( ICACHE_TAG_WIDTH + 1 ),
.NUM_WORDS ( ICACHE_NUM_WORD )
) tag_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req[i] ),
.we_i ( we ),
.addr_i ( addr ),
.wdata_i ( tag_wdata ),
.be_i ( '1 ),
.rdata_o ( tag_rdata[i] )
);
// ------------
// Data RAM
// ------------
sram #(
.DATA_WIDTH ( ICACHE_LINE_WIDTH ),
.NUM_WORDS ( ICACHE_NUM_WORD )
) data_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req[i] ),
.we_i ( we ),
.addr_i ( addr ),
.wdata_i ( data_wdata ),
.be_i ( data_be ),
.rdata_o ( data_rdata[i] )
);
end
// --------------------
// Tag Comparison
// --------------------
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin
assign hit[i] = (tag_rdata[i].tag == tag) ? tag_rdata[i].valid : 1'b0;
end
`ifndef SYNTHESIS
`ifndef VERILATOR
// assert that cache only hits on one way
assert property (
@(posedge clk_i) $onehot0(hit)) else begin $error("[icache] Hit should be one-hot encoded"); $stop(); end
`endif
`endif
// ------------------
// Way Select
// ------------------
assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2];
// cacheline selected by hit
logic [ICACHE_LINE_WIDTH/FETCH_WIDTH-1:0][FETCH_WIDTH-1:0] selected_cl;
logic [ICACHE_LINE_WIDTH-1:0] selected_cl_flat;
for (genvar i = 0; i < ICACHE_LINE_WIDTH; i++) begin
logic [ICACHE_SET_ASSOC-1:0] hit_masked_cl;
for (genvar j = 0; j < ICACHE_SET_ASSOC; j++)
assign hit_masked_cl[j] = data_rdata[j][i] & hit[j];
assign selected_cl_flat[i] = |hit_masked_cl;
end
assign selected_cl = selected_cl_flat;
// maybe re-work if critical
assign dreq_o.data = selected_cl[idx];
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin
assign way_valid[i] = tag_rdata[i].valid;
end
// ------------------
// AXI Plumbing
// ------------------
assign axi.aw_valid = '0;
assign axi.aw_addr = '0;
assign axi.aw_prot = '0;
assign axi.aw_region = '0;
assign axi.aw_len = '0;
assign axi.aw_size = 3'b000;
assign axi.aw_burst = 2'b00;
assign axi.aw_lock = '0;
assign axi.aw_cache = '0;
assign axi.aw_qos = '0;
assign axi.aw_id = '0;
assign axi.aw_user = '0;
assign axi.w_valid = '0;
assign axi.w_data = '0;
assign axi.w_strb = '0;
assign axi.w_user = '0;
assign axi.w_last = 1'b0;
assign axi.b_ready = 1'b0;
assign axi.ar_prot = '0;
assign axi.ar_region = '0;
assign axi.ar_len = (2**NR_AXI_REFILLS) - 1;
assign axi.ar_size = 3'b011;
assign axi.ar_burst = 2'b01;
assign axi.ar_lock = '0;
assign axi.ar_cache = '0;
assign axi.ar_qos = '0;
assign axi.ar_id = '0;
assign axi.ar_user = '0;
assign axi.r_ready = 1'b1;
assign data_be = be;
assign data_wdata = wdata;
assign dreq_o.ex = areq_i.fetch_exception;
// ------------------
// Cache Ctrl
// ------------------
// for bypassing we use the existing infrastructure of the cache
// but on every access we are re-fetching the cache-line
always_comb begin : cache_ctrl
// default assignments
state_d = state_q;
cnt_d = cnt_q;
vaddr_d = vaddr_q;
tag_d = tag_q;
evict_way_d = evict_way_q;
flushing_d = flushing_q;
burst_cnt_d = burst_cnt_q;
dreq_o.vaddr = vaddr_q;
req = '0;
addr = dreq_i.vaddr[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET];
we = 1'b0;
be = '0;
wdata = '0;
tag_wdata = '0;
dreq_o.ready = 1'b0;
tag = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
dreq_o.valid = 1'b0;
update_lfsr = 1'b0;
miss_o = 1'b0;
axi.ar_valid = 1'b0;
axi.ar_addr = '0;
areq_o.fetch_req = 1'b0;
areq_o.fetch_vaddr = vaddr_q;
case (state_q)
// ~> we are ready to receive a new request
IDLE: begin
dreq_o.ready = 1'b1;
// we are getting a new request
if (dreq_i.req) begin
// request the content of all arrays
req = '1;
// save the virtual address
vaddr_d = dreq_i.vaddr;
state_d = TAG_CMP;
end
// go to flushing state
if (flush_i || flushing_q)
state_d = FLUSH;
if (dreq_i.kill_s1)
state_d = IDLE;
end
// ~> compare the tag
TAG_CMP, TAG_CMP_SAVED: begin
areq_o.fetch_req = 1'b1; // request address translation
// use the saved tag
if (state_q == TAG_CMP_SAVED)
tag = tag_q;
// -------
// Hit
// -------
// disabling the icache just makes it fetch on every request
if (|hit && areq_i.fetch_valid && (en_i || (state_q != TAG_CMP))) begin
dreq_o.ready = 1'b1;
dreq_o.valid = 1'b1;
// we've got another request
if (dreq_i.req) begin
// request the content of all arrays
req = '1;
// save the index and stay in compare mode
vaddr_d = dreq_i.vaddr;
state_d = TAG_CMP;
// no new request -> go back to idle
end else begin
state_d = IDLE;
end
if (dreq_i.kill_s1)
state_d = IDLE;
// -------
// Miss
// -------
end else begin
state_d = REFILL;
// hit gonna be zero in most cases except for when the cache is disabled
evict_way_d = hit;
// save tag
tag_d = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
miss_o = en_i;
// get way which to replace
// only if there is no hit we should fall back to real replacement. If there was a hit then
// it means we are in bypass mode (!en_i) and should update the cache-line with the most recent
// value fetched from memory.
if (!(|hit)) begin
// all ways are currently full, randomly replace one of them
if (repl_w_random) begin
evict_way_d = random_way;
// shift the lfsr
update_lfsr = 1'b1;
// there is still one cache-line which is not valid ~> replace that one
end else begin
evict_way_d[repl_invalid] = 1'b1;
end
end
end
// if we didn't hit on the TLB we need to wait until the request has been completed
if (!areq_i.fetch_valid) begin
state_d = WAIT_ADDRESS_TRANSLATION;
end
end
// ~> wait here for a valid address translation, or on a translation even if the request has been killed
WAIT_ADDRESS_TRANSLATION, WAIT_ADDRESS_TRANSLATION_KILLED: begin
areq_o.fetch_req = 1'b1;
// retry the request if no exception occurred
if (areq_i.fetch_valid && (state_q == WAIT_ADDRESS_TRANSLATION)) begin
if (areq_i.fetch_exception.valid) begin
dreq_o.valid = 1'b1;
state_d = IDLE;
end else begin
state_d = REDO_REQ;
tag_d = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
end
end else if (areq_i.fetch_valid) begin
state_d = IDLE;
end
if (dreq_i.kill_s2)
state_d = WAIT_ADDRESS_TRANSLATION_KILLED;
end
// ~> request a cache-line refill
REFILL, WAIT_KILLED_REFILL: begin
axi.ar_valid = 1'b1;
axi.ar_addr[ICACHE_INDEX_WIDTH+ICACHE_TAG_WIDTH-1:0] = {tag_q, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET], {ICACHE_BYTE_OFFSET{1'b0}}};
burst_cnt_d = '0;
if (dreq_i.kill_s2)
state_d = WAIT_KILLED_REFILL;
// we need to finish this AXI transfer
if (axi.ar_ready)
state_d = (dreq_i.kill_s2 || (state_q == WAIT_KILLED_REFILL)) ? WAIT_KILLED_AXI_R_RESP : WAIT_AXI_R_RESP;
end
// ~> wait for the read response
WAIT_AXI_R_RESP, WAIT_KILLED_AXI_R_RESP: begin
req = evict_way_q;
addr = vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET];
if (axi.r_valid) begin
we = 1'b1;
tag_wdata.tag = tag_q;
tag_wdata.valid = 1'b1;
wdata[burst_cnt_q] = axi.r_data;
// enable the right write path
be[burst_cnt_q] = '1;
// increase burst count
burst_cnt_d = burst_cnt_q + 1;
end
if (dreq_i.kill_s2)
state_d = WAIT_KILLED_AXI_R_RESP;
if (axi.r_valid && axi.r_last) begin
state_d = (dreq_i.kill_s2) ? IDLE : REDO_REQ;
end
if ((state_q == WAIT_KILLED_AXI_R_RESP) && axi.r_last && axi.r_valid)
state_d = IDLE;
end
// ~> redo the request,
REDO_REQ: begin
req = '1;
addr = vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET];
tag = tag_q;
state_d = TAG_CMP_SAVED; // do tag comparison on the saved tag
end
// we need to wait for some AXI responses to come back
// here for the AW valid
WAIT_KILLED_REFILL: begin
if (axi.aw_valid)
state_d = IDLE;
end
// ~> we are coming here after reset or when a flush was requested
FLUSH: begin
addr = cnt_q;
cnt_d = cnt_q + 1;
req = '1;
we = 1;
// we've finished flushing, go back to idle
if (cnt_q == ICACHE_NUM_WORD - 1) begin
state_d = IDLE;
flushing_d = 1'b0;
end
end
default : state_d = IDLE;
endcase
// those are the states where we need to wait a little longer until we can safely exit
if (dreq_i.kill_s2 && !(state_q inside {REFILL, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP}) && !dreq_o.ready) begin
state_d = IDLE;
end
// if we are killing we can never give a valid response
if (dreq_i.kill_s2)
dreq_o.valid = 1'b0;
if (flush_i) begin
flushing_d = 1'b1;
dreq_o.ready = 1'b0; // we are not ready to accept a further request here
end
// if we are going to flush -> do not accept any new requests
if (flushing_q)
dreq_o.ready = 1'b0;
end
lzc #(
.WIDTH ( ICACHE_SET_ASSOC )
) i_lzc (
.in_i ( ~way_valid ),
.cnt_o ( repl_invalid ),
.empty_o ( repl_w_random )
);
// -----------------
// Replacement LFSR
// -----------------
lfsr #(.WIDTH (ICACHE_SET_ASSOC)) i_lfsr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( update_lfsr ),
.refill_way_oh ( random_way ),
.refill_way_bin ( ) // left open
);
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= FLUSH;
cnt_q <= '0;
vaddr_q <= '0;
tag_q <= '0;
evict_way_q <= '0;
flushing_q <= 1'b0;
burst_cnt_q <= '0;;
end else begin
state_q <= state_d;
cnt_q <= cnt_d;
vaddr_q <= vaddr_d;
tag_q <= tag_d;
evict_way_q <= evict_way_d;
flushing_q <= flushing_d;
burst_cnt_q <= burst_cnt_d;
end
end
`ifndef SYNTHESIS
initial begin
assert ($bits(axi.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus");
end
`endif*/
endmodule

View file

@ -0,0 +1,440 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner (schaffner@iis.ee.ethz.ch), ETH Zurich
// Date: 08.08.2018
// Description: adapter module to connect the L1D$ and L1I$ to the native
// interface of the OpenPiton L1.5 cache.
//
// A couple of notes:
//
// 1) the L15 has been designed for an OpenSparc T1 core with 2 threads and can serve only
// 1 ld and rd request per thread. Ariane has only one hart, but the LSU can issue several write
// requests to optimize bandwidth. hence, we reuse the threadid field to issue and track multiple
// requests (up to 2 in this case).
//
// 2) the CSM (clumped shared memory = coherence domain restriction in OpenPiton)
// feature is currently not supported by Ariane.
//
// 3) some features like blockinitstore, prefetch, ECC errors are not used (see interface below)
//
// 4) the arbiter can store upt to two outgoing requests per cache. incoming responses are passed
// through one streaming register, and need to be consumed unconditionally by the caches.
//
// 5) The L1.5 protocol is closely related to the CPX bus of openSPARC, see also [1,2]
//
// 6) Note on transaction data and size: if a store packet is less than 64 bits, then
// the field is filled with copies of the data. in case of an interrupt vector,
// an 18bit interrupt vector is expected.
//
// 7) L1I$ refill requests always have precedence over L1D$ requests.
//
// 8) L1I$ fill requests are always complete cache lines at the moment
//
// 9) the adapter converts from little endian (Ariane) to big endian (openpiton), and vice versa.
//
// Refs: [1] OpenSPARC T1 Microarchitecture Specification
// https://www.oracle.com/technetwork/systems/opensparc/t1-01-opensparct1-micro-arch-1538959.html
// [2] OpenPiton Microarchitecture Specification
// https://parallel.princeton.edu/openpiton/docs/micro_arch.pdf
//
import ariane_pkg::*;
import piton_cache_pkg::*;
module piton_l15_adapter #(
) (
input logic clk_i,
input logic rst_ni,
// icache
input logic icache_data_req_i,
output logic icache_data_ack_o,
input icache_req_t icache_data_i,
// returning packets must be consumed immediately
output logic icache_rtrn_vld_o,
output icache_rtrn_t icache_rtrn_o,
// dcache
input logic dcache_data_req_i,
output logic dcache_data_ack_o,
input dcache_req_t dcache_data_i,
// returning packets must be consumed immediately
output logic dcache_rtrn_vld_o,
output dcache_rtrn_t dcache_rtrn_o,
// TODO: amops interface
// TODO: interrupt interface
// L15
output logic l15_val_o,
input logic l15_ack_i,
input logic l15_header_ack_i,
output l15_req_t l15_data_o,
input logic l15_val_i,
output logic l15_req_ack_o,
input l15_rtrn_t l15_rtrn_i
);
// request path
icache_req_t icache_data;
logic icache_data_full, icache_data_empty, icache_data_data, icache_data_push;
dcache_req_t dcache_data;
logic dcache_data_full, dcache_data_empty, dcache_data_data, dcache_data_push;
logic [1:0] arb_req;
logic [1:0] arb_ack;
logic [1:0] arb_idx;
logic header_ack_d, header_ack_q;
// return path
logic rtrn_fifo_empty, rtrn_fifo_full, rtrn_fifo_pop;
l15_rtrn_t rtrn_fifo_data;
///////////////////////////////////////////////////////
// request path to L15
///////////////////////////////////////////////////////
// relevant l15 signals
// l15_req_t l15_data_o.l15_rqtype; // see below for encoding
// logic l15_data_o.l15_nc; // non-cacheable bit
// logic [2:0] l15_data_o.l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
// logic [L15_TID_WIDTH-1:0] l15_data_o.l15_threadid; // currently 0 or 1
// logic l15_data_o.l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment
// logic [L15_WAY_WIDTH-1:0] l15_data_o.l15_l1rplway; // way to replace
// logic [39:0] l15_data_o.l15_address; // physical address
// logic [63:0] l15_data_o.l15_data; // word to write
// logic [63:0] l15_data_o.l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests)
// logic [L15_TLB_CSM_WIDTH-1:0] l15_data_o.l15_csm_data;
// need to deassert valid signal when header is acked
// can move on when packed is acked (need to clear header ack)
assign l15_val_o = (|arb_req) & ~header_ack_q;
assign header_ack_d = (l15_ack_i) ? 1'b0 : (header_ack_q | l15_header_ack_i);
assign arb_req = {~dcache_data_empty,
~icache_data_empty};
assign dcache_data_pop = arb_ack[1];
assign icache_data_pop = arb_ack[0];
assign icache_data_ack_o = icache_data_req_i & ~ icache_data_full;
assign dcache_data_ack_o = dcache_data_req_i & ~ dcache_data_full;
// data mux
assign l15_data_o.l15_nc = (arb_idx) ? dcache_data.nc : icache_data.nc;
assign l15_data_o.l15_size = (arb_idx) ? dcache_data.size : 3'b111;// always request full cache line for icache
assign l15_data_o.l15_threadid = (arb_idx) ? dcache_data.tid : icache_data.tid;
assign l15_data_o.l15_invalidate_cacheline = 1'b0; // unused by Ariane as L1 has no ECC at the moment
assign l15_data_o.l15_l1rplway = (arb_idx) ? dcache_data.way : icache_data.way;
assign l15_data_o.l15_address = (arb_idx) ? dcache_data.paddr : icache_data.paddr;
assign l15_data_o.l15_data_next_entry = 1'b0; // unused in Ariane (only used for CAS atomic requests)
assign l15_data_o.l15_csm_data = 1'b0; // unused in Ariane (only used for coherence domain restriction features)
// swap endianess and replicate datawords if necessary
always_comb begin : p_datarepl
unique case(dcache_data.size)
3'b000: begin // 1byte
l15_data_o.l15_data = swendian64({dcache_data.data[0],
dcache_data.data[0],
dcache_data.data[0],
dcache_data.data[0],
dcache_data.data[0],
dcache_data.data[0],
dcache_data.data[0],
dcache_data.data[0]});
end
3'b001: begin // 2byte
l15_data_o.l15_data = swendian64({dcache_data.data[1:0],
dcache_data.data[1:0],
dcache_data.data[1:0],
dcache_data.data[1:0]});
end
3'b010: begin // 4byte
l15_data_o.l15_data = swendian64({dcache_data.data[3:0],
dcache_data.data[3:0]});
end
default: begin // 8 byte
l15_data_o.l15_data = swendian64(dcache_data.data);
end
endcase // dcache_data.size
end
// arbiter
// ifills always have priority
always_comb begin : p_arb
arb_idx = '0;
arb_ack = '0;
if(arb_req[0] & l15_ack_i) begin
arb_ack[0] = 1'b1;
arb_idx = 0;
end else if (arb_req[1] & l15_ack_i) begin
arb_ack[1] = 1'b1;
arb_idx = 1;
end
end // p_arb
// encode packet type
always_comb begin : p_req
l15_data_o.l15_rqtype = LOAD_RQ;
unique case (arb_idx)
0: begin// icache
l15_data_o.l15_rqtype = IMISS_RQ;
end
1: begin
unique case (dcache_data.rtype)
DCACHE_STORE_REQ: begin
l15_data_o.l15_rqtype = STORE_RQ;
end
DCACHE_LOAD_REQ: begin
l15_data_o.l15_rqtype = LOAD_RQ;
end
// DCACHE_ATOMIC_REQ: begin
// //TODO
// end
// DCACHE_INT_REQ: begin
// //TODO
// end
// TODO: atomics
// CAS1_RQ
// CAS2_RQ
// SWAP_RQ
// TODO: interrupt request
// INT_RQ
default: begin
;
end
endcase // dcache_data.rtype
end
default: begin
;
end
endcase
end // p_req
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(~rst_ni) begin
header_ack_q <= 0;
end else begin
header_ack_q <= header_ack_d;
end
end
fifo_v2 #(
.dtype ( icache_req_t ),
.DEPTH ( ADAPTER_REQ_FIFO_DEPTH )
) i_icache_data_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( icache_data_full ),
.empty_o ( icache_data_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( icache_data_i ),
.push_i ( icache_data_push ),
.data_o ( icache_data ),
.pop_i ( icache_data_pop )
);
fifo_v2 #(
.dtype ( dcache_req_t ),
.DEPTH ( ADAPTER_REQ_FIFO_DEPTH )
) i_dcache_data_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( dcache_data_full ),
.empty_o ( dcache_data_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( dcache_data_i ),
.push_i ( dcache_data_push ),
.data_o ( dcache_data ),
.pop_i ( dcache_data_pop )
);
///////////////////////////////////////////////////////
// return path from L15
///////////////////////////////////////////////////////
// relevant l15 signals
// l15_rtrn_i.l15_returntype; // see below for encoding
// l15_rtrn_i.l15_noncacheable; // non-cacheable bit
// l15_rtrn_i.l15_atomic; // asserted in load return and store ack pack
// l15_rtrn_i.l15_threadid; // used as transaction ID
// l15_rtrn_i.l15_f4b; // 4byte instruction fill. not used in Ariane
// l15_rtrn_i.l15_data_0; // used for both caches
// l15_rtrn_i.l15_data_1; // used for both caches
// l15_rtrn_i.l15_data_2; // currently only used for I$
// l15_rtrn_i.l15_data_3; // currently only used for I$
// l15_rtrn_i.l15_inval_icache_all_way; // invalidate all ways
// l15_rtrn_i.l15_inval_address_15_4; // invalidate selected cacheline
// l15_rtrn_i.l15_inval_dcache_inval; // invalidate selected cacheline and way
// l15_rtrn_i.l15_inval_way; // way to invalidate
// acknowledge if we have space to hold this packet
assign l15_req_ack_o = l15_val_i & ~rtrn_fifo_full;
// packets have to be consumed immediately
assign rtrn_fifo_pop = ~rtrn_fifo_empty;
// decode packet type
always_comb begin : p_rtrn_logic
icache_rtrn_o.rtype = ICACHE_IFILL_ACK;
dcache_rtrn_o.rtype = DCACHE_LOAD_ACK;
icache_rtrn_vld_o = 1'b0;
dcache_rtrn_vld_o = 1'b0;
if(~rtrn_fifo_empty) begin
unique case (rtrn_fifo_data.l15_returntype)
LOAD_RET: begin
dcache_rtrn_o.rtype = DCACHE_LOAD_ACK;
dcache_rtrn_vld_o = 1'b1;
end
ST_ACK: begin
dcache_rtrn_o.rtype = DCACHE_STORE_ACK;
dcache_rtrn_vld_o = 1'b1;
end
// INT_RET: begin
// TODO: implement this
// dcache_rtrn_o.reqType = DCACHE_INT_ACK;
// end
IFILL_RET: begin
icache_rtrn_o.rtype = ICACHE_IFILL_ACK;
icache_rtrn_vld_o = 1'b1;
end
EVICT_REQ: begin
icache_rtrn_o.rtype = ICACHE_INV_REQ;
dcache_rtrn_o.rtype = DCACHE_INV_REQ;
icache_rtrn_vld_o = 1'b1;
dcache_rtrn_vld_o = 1'b1;
end
// CPX_RESTYPE_ATOMIC_RES: begin
// TODO: implement this
// dcache_rtrn_o.reqType = DCACHE_INT_ACK;
// end
default: begin
;
end
endcase // rtrn_fifo_data.l15_returntype
end
end
// icache fifo signal mapping
// swap endianess here since openpiton is big endian
assign icache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_3),
swendian64(rtrn_fifo_data.l15_data_2),
swendian64(rtrn_fifo_data.l15_data_1),
swendian64(rtrn_fifo_data.l15_data_0) };
assign icache_rtrn_o.tid = rtrn_fifo_data.l15_threadid;
assign icache_rtrn_o.nc = rtrn_fifo_data.l15_noncacheable;
// dcache fifo signal mapping
assign dcache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_1),
swendian64(rtrn_fifo_data.l15_data_0) };
assign dcache_rtrn_o.tid = rtrn_fifo_data.l15_threadid;
assign dcache_rtrn_o.nc = rtrn_fifo_data.l15_noncacheable;
// invalidation signal mapping
assign icache_rtrn_o.inv.addr = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};;
assign icache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way;
assign icache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_icache_inval;
assign icache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_icache_all_way;
assign dcache_rtrn_o.inv.addr = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};;
assign dcache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way;
assign dcache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_dcache_inval;
assign dcache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_dcache_all_way;
fifo_v2 #(
.dtype ( l15_rtrn_t ),
.DEPTH ( ADAPTER_RTRN_FIFO_DEPTH )
) i_rtrn_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( rtrn_fifo_full ),
.empty_o ( rtrn_fifo_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( l15_rtrn_i ),
.push_i ( l15_req_ack_o ),
.data_o ( rtrn_fifo_data ),
.pop_i ( rtrn_fifo_pop )
);
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
invalidations: assert property (
@(posedge clk_i) disable iff (~rst_ni) l15_val_i |-> l15_rtrn_i.l15_returntype == EVICT_REQ |-> (inv_in.inv | inv_in.all))
else $fatal("[l15_adapter] got invalidation package with zero invalidation flags");
blockstore_o: assert property (
@(posedge clk_i) disable iff (~rst_ni) l15_val_o|-> !l15_data_o.l15_blockstore)
else $fatal("[l15_adapter] blockstores are not supported");
blockstore_i: assert property (
@(posedge clk_i) disable iff (~rst_ni) l15_val_i|-> !l15_rtrn_i.l15_blockinitstore)
else $fatal("[l15_adapter] blockstores are not supported");
instr_fill_size: assert property (
@(posedge clk_i) disable iff (~rst_ni) (!l15_rtrn_i.l15_f4b))
else $fatal("[l15_adapter] 4b instruction fills not supported");
unsuported_rtrn_types: assert property (
@(posedge clk_i) disable iff (~rst_ni) (l15_val_i |-> l15_rtrn_i.l15_returntype inside {LOAD_RET, ST_ACK, IFILL_RET, EVICT_REQ}))
else $fatal("[l15_adapter] unsupported rtrn type");
initial begin
// assert wrong parameterizations
assert (L15_SET_ASSOC == ICACHE_SET_ASSOC)
else $fatal("[l15_adapter] number of icache ways not aligned with L15");
// assert wrong parameterizations
assert (L15_SET_ASSOC == DCACHE_SET_ASSOC)
else $fatal("[l15_adapter] number of dcache ways not aligned with L15");
// invalidation address returned by L1.5 is 16 bit
assert (16 >= $max(ICACHE_INDEX_WIDTH, DCACHE_INDEX_WIDTH))
else $fatal("[l15_adapter] maximum number of index bits supported by L1.5 is 16");
// assert mismatch of cache line width
assert (ICACHE_LINE_WIDTH==256)
else $fatal("[l15_adapter] ichache lines are currently restricted to 256 bits");
assert (DCACHE_LINE_WIDTH==128)
else $fatal("[l15_adapter] dchache lines are currently restricted to 128 bits");
end
`endif
//pragma translate_on
endmodule // piton_l15_adapter