diff --git a/Makefile b/Makefile index d5d5469d5..92eaf8d84 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ top_level = core_tb test_top_level = core_tb # Ariane PKG -ariane_pkg = include/ariane_pkg.sv +ariane_pkg = include/ariane_pkg.sv include/nbdcache_pkg.sv # utility modules util = $(wildcard src/util/*.sv*) # test targets @@ -50,7 +50,7 @@ riscv-tests = rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p- rv64ui-v-sraiw rv64ui-v-sraw rv64ui-v-srl rv64ui-v-srli rv64ui-v-srliw rv64ui-v-srlw \ rv64ui-v-lb rv64ui-v-lbu rv64ui-v-ld rv64ui-v-lh rv64ui-v-lhu rv64ui-v-lui - # rv64um-p-mul rv64um-p-mulh rv64um-p-mulhsu rv64um-p-mulhu rv64um-p-div rv64um-p-divu rv64um-p-rem \ + # rv64um-p-mul rv64um-p-mulh rv64um-p-mulhsu rv64um-p-mulhu rv64um-p-div rv64um-p-divu rv64um-p-rem \ # rv64um-p-remu rv64um-p-mulw rv64um-p-divw rv64um-p-divuw rv64um-p-remw rv64um-p-remuw \ # rv64um-v-mul rv64um-v-mulh rv64um-v-mulhsu rv64um-v-mulhu rv64um-v-div rv64um-v-divu rv64um-v-rem \ # rv64um-v-remu rv64um-v-mulw rv64um-v-divw rv64um-v-divuw rv64um-v-remw rv64um-v-remuw diff --git a/include/nbdcache_pkg.sv b/include/nbdcache_pkg.sv new file mode 100755 index 000000000..5ce6701ac --- /dev/null +++ b/include/nbdcache_pkg.sv @@ -0,0 +1,16 @@ +/* File: ariane_pkg.svh + * Author: Florian Zaruba + * Date: 13.10.2017 + * + * Copyright (C) 2017 ETH Zurich, University of Bologna + * All rights reserved. + * + * Description: Contains all the necessary defines for the non-block DCache + * of Ariane in one package. + */ + +package nbdcache_pkg; + + typedef enum logic { SINGLE_REQ, CACHE_LINE_REQ } req_t; + +endpackage diff --git a/src/axi_adapter.sv b/src/axi_adapter.sv new file mode 100644 index 000000000..514ee230d --- /dev/null +++ b/src/axi_adapter.sv @@ -0,0 +1,308 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 13.10.2017 +// Description: This module handles the AXI transactions +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +import nbdcache_pkg::*; + +module axi_adapter #( + parameter int unsigned CACHE_LINE_WIDTH = 256, + parameter int unsigned AXI_ID_WIDTH = 10, + parameter int unsigned AXI_USER_WIDTH = 10 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + + input logic req_i, + input req_t type_i, + output logic gnt_o, + input logic [63:0] addr_i, + input logic we_i, + input logic [(CACHE_LINE_WIDTH/64)-1:0][63:0] wdata_i, + input logic [(CACHE_LINE_WIDTH/64)-1:0][7:0] be_i, + input logic [AXI_ID_WIDTH-1:0] id_i, + // read port + output logic valid_o, + output logic [127:0] rdata_o, + output logic [AXI_ID_WIDTH-1:0] id_o, + // critical word - read port + output logic [63:0] critical_word_o, + output logic critical_word_valid, + // AXI port + AXI_BUS.Master axi +); + localparam BURST_SIZE = CACHE_LINE_WIDTH/64; + + enum logic [3:0] { + IDLE, WAIT_B_VALID, WAIT_AW_READY, WAIT_LAST_W_READY, WAIT_LAST_W_READY_AW_READY, WAIT_AW_READY_BURST, + WAIT_R_VALID, WAIT_R_VALID_MULTIPLE, COMPLETE_READ + } state_q, state_d; + + // counter for AXI transfers + logic [$clog2(CACHE_LINE_WIDTH/64)-1:0] cnt_d, cnt_q; + logic [(CACHE_LINE_WIDTH/64)-1:0][63:0] cache_line_d, cache_line_q; + // save the address for a read, as we allow for non-cacheline aligned accesses + logic [$clog2(CACHE_LINE_WIDTH/64)-1:0] addr_offset_d, addr_offset_q; + + always_comb begin : axi_fsm + // Default assignments + axi.aw_valid = 1'b0; + axi.aw_addr = addr_i; + axi.aw_prot = 3'b0; + axi.aw_region = 4'b0; + axi.aw_len = 8'b0; + axi.aw_size = 3'b011; // 8 bytes + axi.aw_burst = 2'b01; // incremental transfer + axi.aw_lock = 1'b0; + axi.aw_cache = 4'b0; + axi.aw_qos = 4'b0; + axi.aw_id = id_i; + axi.aw_user = '0; + + axi.ar_valid = 1'b0; + axi.ar_addr = addr_i; + axi.ar_prot = 3'b0; + axi.ar_region = 4'b0; + axi.ar_len = 8'b0; + axi.ar_size = 3'b011; // 8 bytes + axi.ar_burst = 2'b10; // wrapping transfer + axi.ar_lock = 1'b0; + axi.ar_cache = 4'b0; + axi.ar_qos = 4'b0; + axi.ar_id = id_i; + axi.ar_user = '0; + + axi.w_valid = 1'b0; + axi.w_data = wdata_i[0]; + axi.w_strb = be_i[0]; + axi.w_user = '0; + axi.w_last = 1'b0; + + axi.b_ready = 1'b0; + axi.r_ready = 1'b0; + + gnt_o = 1'b0; + valid_o = 1'b0; + id_o = axi.r_id; + + // rdata_o = axi.r_data; + critical_word_o = axi.r_data; + critical_word_valid = 1'b0; + + state_d = state_q; + cnt_d = cnt_q; + cache_line_d = cache_line_q; + addr_offset_d = addr_offset_q; + + case (state_q) + + IDLE: begin + cnt_d = '0; + // we have an incoming request + if (req_i) begin + // is this a read or write? + // write + if (we_i) begin + // the data is valid + axi.aw_valid = 1'b1; + axi.w_valid = 1'b1; + // its a single write + if (type_i == SINGLE_REQ) begin + // single req can be granted here + gnt_o = axi.aw_ready & axi.w_ready; + + case ({axi.aw_ready, axi.w_ready}) + 2'b11: state_d = WAIT_B_VALID; + 2'b01: state_d = WAIT_AW_READY; + 2'b10: state_d = WAIT_LAST_W_READY; + default: state_d = IDLE; + endcase + // its a request for the whole cache line + end else begin + axi.aw_len = BURST_SIZE; // number of bursts to do + axi.w_last = 1'b0; + axi.w_data = wdata_i[0]; + axi.w_strb = be_i[0]; + + if (axi.w_ready) + cnt_d = BURST_SIZE - 1; + else + cnt_d = BURST_SIZE; + + case ({axi.aw_ready, axi.w_ready}) + 2'b11: state_d = WAIT_LAST_W_READY; + 2'b01: state_d = WAIT_LAST_W_READY_AW_READY; + 2'b10: state_d = WAIT_LAST_W_READY; + default:; + endcase + end + // read + end else begin + + axi.ar_valid = 1'b1; + gnt_o = axi.ar_ready; + + if (type_i != SINGLE_REQ) begin + axi.ar_len = CACHE_LINE_WIDTH/64; + cnt_d = CACHE_LINE_WIDTH/64 - 1; + end + + if (axi.ar_ready) begin + state_d = (type_i == SINGLE_REQ) ? WAIT_R_VALID : WAIT_R_VALID_MULTIPLE; + addr_offset_d = addr_i[$clog2(CACHE_LINE_WIDTH/64)-1:0]; + end + end + end + end + + // ~> from single write, write request has already been granted + WAIT_AW_READY: begin + axi.aw_valid = 1'b1; + axi.aw_len = 8'b0; + + if (axi.aw_read) + state_d = WAIT_B_VALID; + + end + + // ~> we need to wait for an aw_ready and there is at least one outstanding write + WAIT_LAST_W_READY_AW_READY: begin + + axi.w_valid = 1'b1; + axi.w_last = (cnt_q == '0) ? 1'b1 : 1'b0; + axi.w_data = wdata_i[BURST_SIZE-cnt_q]; + axi.w_strb = be_i[BURST_SIZE-cnt_q]; + + axi.aw_valid = 1'b1; + // we are here because we want to write a cache line + axi.aw_len = CACHE_LINE_WIDTH/64; + // we got an aw_ready + case ({axi.aw_ready, axi.w_ready}) + // we got an aw ready + 2'b01: begin + // are there any outstanding transactions? + if (cnt_q == 0) + state_d = WAIT_AW_READY_BURST; + else // yes, so reduce the count and stay here + cnt_d = cnt_q - 1; + end + 2'b10: state_d = WAIT_LAST_W_READY; + 2'b11: begin + // we are finished + if (cnt_q == 0) begin + state_d = WAIT_B_VALID; + gnt_o = 1'b1; + // there are outstanding transactions + end else begin + state_d = WAIT_LAST_W_READY; + cnt_d = cnt_q - 1; + end + end + default:; + endcase + + end + + // ~> all data has already been sent, we are only waiting for the aw_ready + WAIT_AW_READY_BURST: begin + axi.aw_valid = 1'b1; + axi.aw_len = CACHE_LINE_WIDTH/64; + + if (axi.aw_ready) begin + state_d = WAIT_B_VALID; + gnt_o = 1'b1; + end + end + + // ~> from write, there is an outstanding write + WAIT_LAST_W_READY: begin + axi.w_valid = 1'b1; + axi.w_data = wdata_i[BURST_SIZE-cnt_q]; + axi.w_strb = be_i[BURST_SIZE-cnt_q]; + + // this is the last write + axi.w_last = (cnt_q == '0) ? 1'b1 : 1'b0; + gnt_o = (cnt_q == '0); + + if (axi.w_ready) begin + // last write -> go to WAIT_B_VALID + if (cnt_q == '0) + state_d = WAIT_B_VALID; + else + cnt_d = cnt_q - 1; + end + end + + // ~> finish write transaction + WAIT_B_VALID: begin + axi.b_ready = 1'b1; + id_o = axi.b_id; + + // Write is valid + if (axi.b_valid) begin + state_d = IDLE; + valid_o = 1'b1; + end + end + + // ~> cacheline read, single read + WAIT_R_VALID_MULTIPLE, WAIT_R_VALID: begin + // reads are always wrapping here + axi.r_ready = 1'b1; + // this is the first read a.k.a the critical word + if (axi.r_valid) begin + // this is the first word of a cacheline read + if (state_q == WAIT_R_VALID_MULTIPLE) begin + critical_word_valid = 1'b1; + critical_word_o = axi.r_data; + end + // this is the last read + if (axi.r_last) begin + state_d = COMPLETE_READ; + end + + // save the word + if (state_q == WAIT_R_VALID_MULTIPLE) + cache_line_d[addr_offset_q + cnt_q] = axi.r_data; + else + cache_line_d[0] = axi.r_data; + end + end + // ~> read is complete + COMPLETE_READ: begin + valid_o = 1'b1; + state_d = IDLE; + end + endcase + end + + // ---------------- + // Registers + // ---------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + state_q <= IDLE; + cnt_q <= '0; + cache_line_q <= '0; + addr_offset_q <= '0; + end else begin + state_q <= state_d; + cnt_q <= cnt_d; + cache_line_q <= cache_line_d; + addr_offset_q <= addr_offset_d; + end + end + +endmodule diff --git a/src/nb_dcache.sv b/src/nb_dcache.sv index b8be4b8c7..e75282e14 100644 --- a/src/nb_dcache.sv +++ b/src/nb_dcache.sv @@ -1,43 +1,118 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 13.10.2017 +// Description: Nonblocking private L1 dcache +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. import ariane_pkg::*; +import nbdcache_pkg::*; -module nb_dcache ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low +module nb_dcache #( + parameter int unsigned INDEX_WIDTH = 12, + parameter int unsigned TAG_WIDTH = 44, + parameter int unsigned CACHE_LINE_WIDTH = 256, + parameter int unsigned SET_ASSOCIATIVITY = 8, + parameter int unsigned AXI_ID_WIDTH = 10, + parameter int unsigned AXI_USER_WIDTH = 10 +)( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low // AXI refill port - AXI_BUS.Master data_if, + AXI_BUS.Master data_if, // AMO interface - input logic amo_commit_i, // commit atomic memory operation - output logic amo_valid_o, // we have a valid AMO result - output logic [63:0] amo_result_o, // result of atomic memory operation + input logic amo_commit_i, // commit atomic memory operation + output logic amo_valid_o, // we have a valid AMO result + output logic [63:0] amo_result_o, // result of atomic memory operation // Request ports - input logic [2:0][11:0] address_index_i, - input logic [2:0][43:0] address_tag_i, - input logic [2:0][63:0] data_wdata_i, - input logic [2:0] data_req_i, - input logic [2:0] data_we_i, - input logic [2:0][7:0] data_be_i, - input logic [2:0] kill_req_i, - input logic [2:0] tag_valid_i, - output logic [2:0] data_gnt_o, - output logic [2:0] data_rvalid_o, - output logic [2:0][63:0] data_rdata_o, - input amo_t [2:0] amo_op_i + input logic [2:0][INDEX_WIDTH-1:0] address_index_i, + input logic [2:0][TAG_WIDTH-1:0] address_tag_i, + input logic [2:0][63:0] data_wdata_i, + input logic [2:0] data_req_i, + input logic [2:0] data_we_i, + input logic [2:0][7:0] data_be_i, + input logic [2:0] kill_req_i, + input logic [2:0] tag_valid_i, + output logic [2:0] data_gnt_o, + output logic [2:0] data_rvalid_o, + output logic [2:0][63:0] data_rdata_o, + input amo_t [2:0] amo_op_i ); + + localparam NUM_WORDS = 2**INDEX_WIDTH; + localparam DIRTY_WIDTH = (CACHE_LINE_WIDTH/64)*SET_ASSOCIATIVITY; + // AMO ALU // Cache FSM + // -------------- // Memories + // -------------- + // TODO: Re-work + generate + for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin : set_associativity + sram #( + .DATA_WIDTH ( CACHE_LINE_WIDTH ), + .NUM_WORDS ( NUM_WORDS ) + ) data_sram ( + .clk_i ( clk_i ), + .req_i ( ), + .we_i ( ), + .addr_i ( ), + .wdata_i ( ), + .be_i ( ), + .rdata_o ( ) + ); + sram #( + .DATA_WIDTH ( TAG_WIDTH ), + .NUM_WORDS ( NUM_WORDS ) + ) tag_sram ( + .clk_i ( clk_i ), + .req_i ( ), + .we_i ( ), + .addr_i ( ), + .wdata_i ( ), + .be_i ( ), + .rdata_o ( ) + ); + + end + endgenerate + + sram #( + .DATA_WIDTH ( DIRTY_WIDTH ), + .NUM_WORDS ( NUM_WORDS ) + ) dirty_sram ( + .clk_i ( clk_i ), + .req_i ( ), + .we_i ( ), + .addr_i ( ), + .wdata_i ( ), + .be_i ( ), + .rdata_o ( ) + ); // AXI Module `ifndef SYNTHESIS initial begin assert ($bits(data_if.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus"); + assert (CACHE_LINE_WIDTH/64 inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of 64"); end `endif endmodule diff --git a/src/util/behav_sram.sv b/src/util/behav_sram.sv new file mode 100755 index 000000000..aad79c1da --- /dev/null +++ b/src/util/behav_sram.sv @@ -0,0 +1,54 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 13.10.2017 +// Description: SRAM Behavioral Model +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. + +module sram #( + int unsigned DATA_WIDTH = 64, + int unsigned NUM_WORDS = 1024 +)( + input logic clk_i, + + input logic req_i, + input logic we_i, + input logic [$clog2(NUM_WORDS)-1:0] addr_i, + input logic [DATA_WIDTH-1:0] wdata_i, + input logic [DATA_WIDTH-1:0] be_i, + output logic [DATA_WIDTH-1:0] rdata_o +); + localparam ADDR_WIDTH = $clog2(NUM_WORDS); + + logic [ADDR_WIDTH-1:0] ram [DATA_WIDTH-1:0]; + logic [ADDR_WIDTH-1:0] raddr_q; + + // 1. randomize array + // 2. randomize output when no request is active + + always @(posedge clk_i) begin + if (req_i && !we_i) raddr_q <= addr_i; + + end + + always @(posedge clk_i) begin + if (we_i && req_i) begin + for (int i = 0; i < DATA_WIDTH; i++) + if (be_i[i]) ram[addr_i][i] <= wdata_i; + end + end + + assign rdata_o = ram[raddr_q]; + +endmodule