cva6/src/cache_ctrl.sv

// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License.  You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// File:   cache_ctrl.svh
// Author: Florian Zaruba <zarubaf@ethz.ch>
// Date:   14.10.2017
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// Description: Cache controller


import ariane_pkg::*;
import nbdcache_pkg::*;

module cache_ctrl #(
        parameter int unsigned SET_ASSOCIATIVITY = 8,
        parameter int unsigned INDEX_WIDTH       = 12,
        parameter int unsigned TAG_WIDTH         = 44,
        parameter int unsigned CACHE_LINE_WIDTH  = 100,
        parameter logic [63:0] CACHE_START_ADDR  = 64'h4000_0000
    )(
        input  logic                                               clk_i,     // Clock
        input  logic                                               rst_ni,    // Asynchronous reset active low
        input  logic                                               bypass_i,  // enable cache
        output logic                                               busy_o,
        // Core request ports
        input  logic [INDEX_WIDTH-1:0]                             address_index_i,
        input  logic [TAG_WIDTH-1:0]                               address_tag_i,
        input  logic [63:0]                                        data_wdata_i,
        input  logic                                               data_req_i,
        input  logic                                               data_we_i,
        input  logic [7:0]                                         data_be_i,
        input  logic [1:0]                                         data_size_i,
        input  logic                                               kill_req_i,
        input  logic                                               tag_valid_i,
        output logic                                               data_gnt_o,
        output logic                                               data_rvalid_o,
        output logic [63:0]                                        data_rdata_o,
        input  amo_t                                               amo_op_i,
        // SRAM interface
        output logic [SET_ASSOCIATIVITY-1:0]                       req_o,  // req is valid
        output logic [INDEX_WIDTH-1:0]                             addr_o, // address into cache array
        input  logic                                               gnt_i,
        output cache_line_t                                        data_o,
        output cl_be_t                                             be_o,
        output logic [TAG_WIDTH-1:0]                               tag_o, //valid one cycle later
        input  cache_line_t [SET_ASSOCIATIVITY-1:0]                data_i,
        output logic                                               we_o,
        input  logic [SET_ASSOCIATIVITY-1:0]                       hit_way_i,
        // Miss handling
        output miss_req_t                                          miss_req_o,
        // return
        input  logic                                               miss_gnt_i,
        input  logic                                               active_serving_i, // the miss unit is currently active for this unit, serving the miss
        input  logic [63:0]                                        critical_word_i,
        input  logic                                               critical_word_valid_i,

        input  logic                                               bypass_gnt_i,
        input  logic                                               bypass_valid_i,
        input  logic [63:0]                                        bypass_data_i,
        // check MSHR for aliasing
        output logic [55:0]                                        mshr_addr_o,
        input  logic                                               mshr_addr_matches_i
);

    enum logic [3:0] {
        IDLE, WAIT_TAG, WAIT_TAG_BYPASSED, STORE_REQ, WAIT_REFILL_VALID, WAIT_REFILL_GNT, WAIT_TAG_SAVED, WAIT_MSHR, WAIT_CRITICAL_WORD
    } state_d, state_q;

    typedef struct packed {
        logic [INDEX_WIDTH-1:0] index;
        logic [TAG_WIDTH-1:0]   tag;
        logic [7:0]             be;
        logic [1:0]             size;
        logic                   we;
        logic [63:0]            wdata;
        logic                   bypass;
    } mem_req_t;

    logic [SET_ASSOCIATIVITY-1:0] hit_way_d, hit_way_q;

    assign busy_o = (state_q != IDLE);

    mem_req_t mem_req_d, mem_req_q;

    logic [CACHE_LINE_WIDTH-1:0] cl_i;

    always_comb begin : way_select
        cl_i = '0;
        for (int unsigned i = 0; i < SET_ASSOCIATIVITY; i++)
            if (hit_way_i[i])
                cl_i = data_i[i].data;

        // cl_i = data_i[one_hot_to_bin(hit_way_i)].data;
    end

    // --------------
    // Cache FSM
    // --------------
    always_comb begin : cache_ctrl_fsm
        automatic logic [$clog2(CACHE_LINE_WIDTH)-1:0] cl_offset;
        // incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
        // cache-line offset -> multiple of 64
        cl_offset = mem_req_q.index[BYTE_OFFSET-1:3] << 6; // shift by 6 to the left

        // default assignments
        state_d   = state_q;
        mem_req_d = mem_req_q;
        hit_way_d = hit_way_q;

        // output assignments
        data_gnt_o    = 1'b0;
        data_rvalid_o = 1'b0;
        data_rdata_o  = '0;
        miss_req_o    = '0;
        mshr_addr_o   = '0;
        // Memory array communication
        req_o  = '0;
        addr_o = address_index_i;
        data_o = '0;
        be_o   = '0;
        tag_o  = '0;
        we_o   = '0;
        tag_o  = 'b0;

        case (state_q)

            IDLE: begin
                // a new request arrived
                if (data_req_i) begin
                    // request the cache line - we can do this specualtive
                    req_o = '1;

                    // save index, be and we
                    mem_req_d.index = address_index_i;
                    mem_req_d.tag   = address_tag_i;
                    mem_req_d.be    = data_be_i;
                    mem_req_d.size  = data_size_i;
                    mem_req_d.we    = data_we_i;
                    mem_req_d.wdata = data_wdata_i;

                    // Bypass mode, check for uncacheable address here as well
                    if (bypass_i) begin
                        state_d = WAIT_TAG_BYPASSED;
                        // grant this access
                        data_gnt_o = 1'b1;
                        mem_req_d.bypass = 1'b1;
                    // ------------------
                    // Cache is enabled
                    // ------------------
                    end else begin
                        // Wait that we have access on the memory array
                        if (gnt_i) begin
                            state_d = WAIT_TAG;
                            mem_req_d.bypass = 1'b0;
                            // only for a read
                            if (!data_we_i)
                                data_gnt_o = 1'b1;
                        end
                    end
                end
            end

            // cache enabled and waiting for tag
            WAIT_TAG, WAIT_TAG_SAVED: begin
                // depending on where we come from
                // For the store case the tag comes in the same cycle
                tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag :  address_tag_i;

                // we speculatively request another transfer
                if (data_req_i) begin
                    req_o      = '1;
                end

                // check that the client really wants to do the request
                if (!kill_req_i) begin
                    // ------------
                    // HIT CASE
                    // ------------
                    if (|hit_way_i) begin
                        // we can request another cache-line if this was a load
                        // make another request
                        if (data_req_i && !mem_req_q.we) begin
                            state_d          = WAIT_TAG; // switch back to WAIT_TAG
                            mem_req_d.index  = address_index_i;
                            mem_req_d.be     = data_be_i;
                            mem_req_d.size   = data_size_i;
                            mem_req_d.we     = data_we_i;
                            mem_req_d.wdata  = data_wdata_i;
                            mem_req_d.tag    = address_tag_i;
                            mem_req_d.bypass = 1'b0;
                            data_gnt_o = gnt_i;

                            if (!gnt_i) begin
                                state_d = IDLE;
                            end

                        end else begin
                            state_d = IDLE;
                        end

                        // this is timing critical
                        // data_rdata_o = cl_i[cl_offset +: 64];
                        case (mem_req_q.index[3])
                            1'b0: data_rdata_o = cl_i[63:0];
                            1'b1: data_rdata_o = cl_i[127:64];
                        endcase

                        // report data for a read
                        if (!mem_req_q.we) begin
                            data_rvalid_o = 1'b1;

                        // else this was a store so we need an extra step to handle it
                        end else begin
                            state_d = STORE_REQ;
                            hit_way_d = hit_way_i;
                        end
                    // ------------
                    // MISS CASE
                    // ------------
                    end else begin
                        // also save tag
                        mem_req_d.tag = address_tag_i;
                        // make a miss request
                        state_d = WAIT_REFILL_GNT;
                    end
                    // ---------------
                    // Check MSHR
                    // ---------------
                    mshr_addr_o = {address_tag_i, mem_req_q.index};
                    // we've got a match on MSHR
                    if (mshr_addr_matches_i) begin
                        state_d = WAIT_MSHR;
                        // save tag if we didn't already save it e.g.: we are not in in the Tag saved state
                        if (state_q != WAIT_TAG_SAVED)
                            mem_req_d.tag = address_tag_i;
                    end
                    // -------------------------
                    // Check for cache-ability
                    // -------------------------
                    if (tag_o < CACHE_START_ADDR[TAG_WIDTH+INDEX_WIDTH-1:INDEX_WIDTH]) begin
                        mem_req_d.tag = address_tag_i;
                        mem_req_d.bypass = 1'b1;
                        state_d = WAIT_REFILL_GNT;
                    end
                end else begin
                    // we can potentially accept a new request -> I don't know how this works out timing vise
                    // as this will chain some paths together...
                    // For now this should not happen to frequently and we spare another cycle
                    // go back to idle
                    state_d = IDLE;
                    data_rvalid_o = 1'b1;
                end
            end

            // ~> we are here as we need a second round of memory access for a store
            STORE_REQ: begin
                // store data, write dirty bit
                req_o = hit_way_q;
                addr_o = mem_req_q.index;
                we_o  = 1'b1;

                be_o.dirty = hit_way_q;
                be_o.valid = hit_way_q;

                // set the correct byte enable
                for (int unsigned i = 0; i < 8; i++) begin
                    if (mem_req_q.be[i])
                        be_o.data[cl_offset + i*8 +: 8] = '1;
                end

                data_o.data[cl_offset +: 64] = mem_req_q.wdata;
                // ~> change the state
                data_o.dirty = 1'b1;
                data_o.valid = 1'b1;

                // got a grant ~> this is finished now
                if (gnt_i) begin
                    data_gnt_o = 1'b1;
                    state_d = IDLE;
                end
            end

            // we've got a match on MSHR ~> miss unit is scurrently serving a request
            WAIT_MSHR: begin
                mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
                // we can start a new request
                if (!mshr_addr_matches_i) begin
                    req_o = '1;

                    addr_o = mem_req_q.index;

                    if (gnt_i)
                        state_d = WAIT_TAG_SAVED;
                end
            end

            // its for sure a miss
            WAIT_TAG_BYPASSED: begin
                // the request was killed
                if (kill_req_i) begin
                    state_d = IDLE;
                    // we need to ack the killing
                    data_rvalid_o = 1'b1;
                end else begin
                    // save tag
                    mem_req_d.tag = address_tag_i;
                    state_d = WAIT_REFILL_GNT;
                end
            end

            // ~> wait for grant from miss unit
            WAIT_REFILL_GNT: begin

                mshr_addr_o = {mem_req_q.tag, mem_req_q.index};

                miss_req_o.valid = 1'b1;
                miss_req_o.bypass = mem_req_q.bypass;
                miss_req_o.addr = {mem_req_q.tag, mem_req_q.index};
                miss_req_o.be = mem_req_q.be;
                miss_req_o.size = mem_req_q.size;
                miss_req_o.we = mem_req_q.we;
                miss_req_o.wdata = mem_req_q.wdata;

                // got a grant so go to valid
                if (bypass_gnt_i) begin
                    state_d = WAIT_REFILL_VALID;
                    // if this was a write we still need to give a grant to the store unit
                    if (mem_req_q.we)
                        data_gnt_o = 1'b1;
                end

                if (miss_gnt_i && !mem_req_q.we)
                    state_d = WAIT_CRITICAL_WORD;
                else if (miss_gnt_i) begin
                    state_d = IDLE;
                    data_gnt_o = 1'b1;
                end

                // it can be the case that the miss unit is currently serving a request which matches ours
                // so we need to check the mshr for matching continously
                // if the mshr matches we need to go to a different state -> we should never get a matching mshr and a high miss_gnt_i
                if (mshr_addr_matches_i && !active_serving_i) begin
                    state_d = WAIT_MSHR;
                end
            end

            // ~> wait for critical word to arrive
            WAIT_CRITICAL_WORD: begin
                // speculatively request another word
                if (data_req_i) begin
                    // request the cache line
                    req_o = '1;
                end

                if (critical_word_valid_i) begin
                    data_rvalid_o = 1'b1;
                    data_rdata_o = critical_word_i;
                    // we can make another request
                    if (data_req_i) begin
                        // save index, be and we
                        mem_req_d.index = address_index_i;
                        mem_req_d.be    = data_be_i;
                        mem_req_d.size  = data_size_i;
                        mem_req_d.we    = data_we_i;
                        mem_req_d.wdata = data_wdata_i;
                        mem_req_d.tag   = address_tag_i;


                        state_d = IDLE;

                        // Wait until we have access on the memory array
                        if (gnt_i) begin
                            state_d = WAIT_TAG;
                            mem_req_d.bypass = 1'b0;
                            data_gnt_o = 1'b1;
                        end

                    end else begin
                        state_d = IDLE;
                    end
                end
            end
            // ~> wait until the bypass request is valid
            WAIT_REFILL_VALID: begin
                // got a valid answer
                if (bypass_valid_i) begin
                    data_rdata_o = bypass_data_i;
                    data_rvalid_o = 1'b1;
                    state_d = IDLE;
                end
            end

        endcase
    end

    // --------------
    // Registers
    // --------------
    always_ff @(posedge clk_i or negedge rst_ni) begin
        if (~rst_ni) begin
            state_q   <= IDLE;
            mem_req_q <= '0;
            hit_way_q <= '0;
        end else begin
            state_q   <= state_d;
            mem_req_q <= mem_req_d;
            hit_way_q <= hit_way_d;
        end
    end

    `ifndef SYNTHESIS
        initial begin
            assert (CACHE_LINE_WIDTH == 128) else $error ("Cacheline width has to be 128 for the moment. But only small changes required in data select logic");
        end
    `endif
endmodule

module AMO_alu (
        input logic         clk_i,
        input logic         rst_ni,
        // AMO interface
        input  logic        amo_commit_i, // commit atomic memory operation
        output logic        amo_valid_o,  // we have a valid AMO result
        output logic [63:0] amo_result_o, // result of atomic memory operation
        input  logic        amo_flush_i   // forget about AMO
    );

endmodule