diff --git a/.gitmodules b/.gitmodules index 15a256062..d4a445c13 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,6 @@ [submodule "src/fpga-support"] path = src/fpga-support url = https://github.com/pulp-platform/fpga-support.git +[submodule "src/common_cells"] + path = src/common_cells + url = https://github.com/pulp-platform/common_cells.git diff --git a/Bender.yml b/Bender.yml index beb17a84b..de1b40958 100644 --- a/Bender.yml +++ b/Bender.yml @@ -3,14 +3,14 @@ package: authors: [ "Florian Zaruba " ] dependencies: - axi: { git: "git@iis-git.ee.ethz.ch:sasa/axi.git", rev: master } - axi2per: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi2per.git", rev: master } - axi_mem_if: { git: "git@github.com:pulp-platform/axi_mem_if.git", rev: master } - axi_node: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_node.git", version: 1.0.3 } - axi_slice: { git: "git@iis-git.ee.ethz.ch:sasa/axi_slice.git", version: 1.1.2 } - tech_cells_generic: { git: "git@iis-git.ee.ethz.ch:pulp-open/tech_cells_generic.git", rev: master } - common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: 1.1.0 } - + axi: { git: "git@iis-git.ee.ethz.ch:sasa/axi.git", rev: master } + axi2per: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi2per.git", rev: master } + axi_mem_if: { git: "git@github.com:pulp-platform/axi_mem_if.git", rev: master } + axi_node: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_node.git", version: 1.0.3 } + axi_slice: { git: "git@iis-git.ee.ethz.ch:sasa/axi_slice.git", version: 1.1.2 } + tech_cells_generic: { git: "git@iis-git.ee.ethz.ch:pulp-open/tech_cells_generic.git", rev: master } + common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: 1.7.0 } + fpga-support: { git: "https://github.com/pulp-platform/fpga-support.git", version: v0.3.2 } sources: - include/riscv_pkg.sv - src/debug/dm_pkg.sv @@ -32,7 +32,6 @@ sources: - src/decoder.sv - src/ex_stage.sv - src/fetch_fifo.sv - - src/ff1.sv - src/frontend.sv - src/icache.sv - src/id_stage.sv @@ -47,9 +46,11 @@ sources: - src/mmu.sv - src/mult.sv - src/nbdcache.sv + - src/vdregs.sv - src/perf_counters.sv - src/ptw.sv - src/std_cache_subsystem.sv + - src/sram_wrapper.sv # - src/ariane_regfile_ff.sv - src/ariane_regfile.sv - src/re_name.sv diff --git a/Makefile b/Makefile index 56e82e02f..964c3f325 100755 --- a/Makefile +++ b/Makefile @@ -24,13 +24,12 @@ ariane_pkg := include/riscv_pkg.sv \ include/ariane_pkg.sv \ include/std_cache_pkg.sv \ include/axi_if.sv + # utility modules util := $(wildcard src/util/*.svh) \ src/util/instruction_tracer_pkg.sv \ src/util/instruction_tracer_if.sv \ - src/util/generic_fifo.sv \ - src/util/cluster_clock_gating.sv \ - src/util/sram_wrap.sv + src/util/cluster_clock_gating.sv # Test packages test_pkg := $(wildcard tb/test/*/*sequence_pkg.sv*) \ @@ -39,12 +38,27 @@ test_pkg := $(wildcard tb/test/*/*sequence_pkg.sv*) \ dpi := $(patsubst tb/dpi/%.cc,work/%.o,$(wildcard tb/dpi/*.cc)) dpi_hdr := $(wildcard tb/dpi/*.h) # this list contains the standalone components -src := $(wildcard src/*.sv) $(wildcard tb/common/*.sv) \ - $(wildcard tb/common/*.v) $(wildcard bootrom/*.sv) \ - $(wildcard src/axi_slice/*.sv) $(wildcard src/clint/*.sv) \ - $(wildcard src/axi_node/*.sv) $(wildcard src/axi_mem_if/src/*.sv) \ - $(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv)) \ - $(wildcard src/debug/debug_rom/*.sv) src/fpga-support/rtl/SyncSpRamBeNx64.sv +src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ + $(wildcard bootrom/*.sv) \ + $(wildcard src/axi_slice/*.sv) \ + $(wildcard src/clint/*.sv) \ + $(wildcard src/axi_node/*.sv) \ + $(wildcard src/axi_mem_if/src/*.sv) \ + $(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv)) \ + $(wildcard src/debug/debug_rom/*.sv) \ + src/fpga-support/rtl/SyncSpRamBeNx64.sv \ + src/common_cells/src/deprecated/generic_fifo.sv \ + src/common_cells/src/deprecated/pulp_sync.sv \ + src/common_cells/src/fifo.sv \ + src/common_cells/src/lzc.sv \ + src/common_cells/src/rrarbiter.sv \ + tb/ariane_testharness.sv \ + tb/common/SimDTM.sv \ + tb/common/SimJTAG.sv + + + + # look for testbenches tbs := tb/ariane_tb.sv tb/ariane_testharness.sv # RISCV asm tests and benchmark setup (used for CI) @@ -70,7 +84,7 @@ list_incdir := $(foreach dir, ${incdir}, +incdir+$(dir)) # Build the TB and module using QuestaSim build: $(library) $(library)/.build-srcs $(library)/.build-tb $(library)/ariane_dpi.so - # Optimize top level + # Optimize top level vopt$(questa_version) $(compile_flag) -work $(library) $(test_top_level) -o $(test_top_level)_optimized +acc -check_synthesis # src files @@ -143,45 +157,31 @@ check-benchmarks: ci/check-tests.sh tmp/riscv-benchmarks- $(riscv-benchmarks-list) -verilate_command := $(verilator) \ - $(ariane_pkg) \ - tb/ariane_testharness.sv \ - $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ - $(wildcard src/axi_slice/*.sv) \ - $(wildcard src/clint/*.sv) \ - $(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv)) \ - src/debug/debug_rom/debug_rom.sv \ - src/util/generic_fifo.sv \ - tb/common/SimDTM.sv \ - tb/common/SimJTAG.sv \ - tb/common/pulp_sync.sv \ - bootrom/bootrom.sv \ - src/util/cluster_clock_gating.sv \ - src/util/sram_wrap.sv \ - src/fpga-support/rtl/SyncSpRamBeNx64.sv \ - src/axi_mem_if/src/axi2mem.sv \ - +incdir+src/axi_node \ - --unroll-count 256 \ - -Werror-PINMISSING \ - -Werror-IMPLICIT \ - -Wno-fatal \ - -Wno-PINCONNECTEMPTY \ - -Wno-ASSIGNDLY \ - -Wno-DECLFILENAME \ - -Wno-UNOPTFLAT \ - -Wno-UNUSED \ - -Wno-style \ - -Wno-lint \ - $(if $(DEBUG),--trace-structs --trace,) \ +verilate_command := $(verilator) \ + $(ariane_pkg) \ + $(filter-out tb/ariane_bt.sv,$(src)) \ + +incdir+src/axi_node \ + --unroll-count 256 \ + -Werror-PINMISSING \ + -Werror-IMPLICIT \ + -Wno-fatal \ + -Wno-PINCONNECTEMPTY \ + -Wno-ASSIGNDLY \ + -Wno-DECLFILENAME \ + -Wno-UNOPTFLAT \ + -Wno-UNUSED \ + -Wno-style \ + -Wno-lint \ + $(if $(DEBUG),--trace-structs --trace,) \ -LDFLAGS "-lfesvr" -CFLAGS "-std=c++11 -I../tb/dpi" -Wall --cc --vpi \ - $(list_incdir) --top-module ariane_testharness \ - --Mdir build -O3 \ + $(list_incdir) --top-module ariane_testharness \ + --Mdir build -O3 \ --exe tb/ariane_tb.cpp tb/dpi/SimDTM.cc tb/dpi/SimJTAG.cc tb/dpi/remote_bitbang.cc # User Verilator, at some point in the future this will be auto-generated verilate: $(verilate_command) - cd build && make -j4 -f Variane_testharness.mk + cd build && make -j${NUM_JOBS} -f Variane_testharness.mk $(addsuffix -verilator,$(riscv-asm-tests)): verilate build/Variane_testharness $(riscv-test-dir)/$(subst -verilator,,$@) diff --git a/src/common_cells b/src/common_cells new file mode 160000 index 000000000..62e218d96 --- /dev/null +++ b/src/common_cells @@ -0,0 +1 @@ +Subproject commit 62e218d962f0c95a9d4ee645ffafea7307412388 diff --git a/src/ff1.sv b/src/ff1.sv deleted file mode 100644 index e098db288..000000000 --- a/src/ff1.sv +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// -// Author: Florian Zaruba -// Date: 05.06.2017 -// Description: Finds first one - -// ----------------- -// Find First One -// ----------------- -module ff1 #( - parameter int unsigned LEN = 32 -)( - input logic [LEN-1:0] in_i, - output logic [$clog2(LEN)-1:0] first_one_o, - output logic no_ones_o -); - -localparam int unsigned NUM_LEVELS = $clog2(LEN); - -logic [LEN-1:0] [NUM_LEVELS-1:0] index_lut; -logic [2**NUM_LEVELS-1:0] sel_nodes; -logic [2**NUM_LEVELS-1:0] [NUM_LEVELS-1:0] index_nodes; - -// ---------------------------- -// Generate Tree Structure -// ---------------------------- -generate - for (genvar j = 0; j < LEN; j++) begin - assign index_lut[j] = $unsigned(j[NUM_LEVELS-1:0]); - end -endgenerate - -generate - for (genvar level = 0; level < NUM_LEVELS; level++) begin - - if (level < NUM_LEVELS-1) begin - for (genvar l = 0; l < 2**level; l++) begin - assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1]; - assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? - index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1]; - end - end - - if (level == NUM_LEVELS-1) begin - for (genvar k = 0; k < 2**level; k++) begin - // if two successive indices are still in the vector... - if (k * 2 < LEN) begin - assign sel_nodes[2**level-1+k] = in_i[k*2] | in_i[k*2+1]; - assign index_nodes[2**level-1+k] = (in_i[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1]; - end - // if only the first index is still in the vector... - if (k * 2 == LEN) begin - assign sel_nodes[2**level-1+k] = in_i[k*2]; - assign index_nodes[2**level-1+k] = index_lut[k*2]; - end - // if index is out of range - if (k * 2 > LEN) begin - assign sel_nodes[2**level-1+k] = 1'b0; - assign index_nodes[2**level-1+k] = '0; - end - end - end - end -endgenerate - -// -------------------- -// Connect Output -// -------------------- -assign first_one_o = index_nodes[0]; -assign no_ones_o = ~sel_nodes[0]; - -endmodule diff --git a/src/fifo.sv b/src/fifo.sv deleted file mode 100644 index a77ee935b..000000000 --- a/src/fifo.sv +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Author: Florian Zaruba - -module fifo #( - parameter bit FALL_THROUGH = 1'b0, // fifo is in fall-through mode - parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic - parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 - parameter int unsigned ALM_EMPTY_TH = 1, // almost empty threshold (when to assert alm_empty_o) - parameter int unsigned ALM_FULL_TH = 1, // almost full threshold (when to assert alm_full_o) - parameter type dtype = logic [DATA_WIDTH-1:0] -)( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low - input logic flush_i, // flush the queue - input logic testmode_i, // test_mode to bypass clock gating - // status flags - output logic full_o, // queue is full - output logic empty_o, // queue is empty - output logic alm_full_o, // FIFO fillstate >= the specified threshold - output logic alm_empty_o, // FIFO fillstate <= the specified threshold - // as long as the queue is not full we can push new data - input dtype data_i, // data to push into the queue - input logic push_i, // data is valid and can be pushed to the queue - // as long as the queue is not empty we can pop new elements - output dtype data_o, // output data - input logic pop_i // pop head from queue -); - // local parameter - // FIFO depth - handle the case of pass-through, synthesizer will do constant propagation - localparam int unsigned FIFO_DEPTH = (DEPTH > 0) ? DEPTH : 1; - localparam int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1; - // clock gating control - logic gate_clock; - // pointer to the read and write section of the queue - logic [ADDR_DEPTH - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q; - // keep a counter to keep track of the current queue status - logic [ADDR_DEPTH:0] status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool - // actual memory - dtype [FIFO_DEPTH - 1:0] mem_n, mem_q; - - if (DEPTH == 0) begin - assign empty_o = ~push_i; - assign full_o = ~pop_i; - assign alm_full_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0 - assign alm_empty_o = 1'b0; // that signal does not make any sense in a FIFO of depth 0 - end else begin - assign full_o = (status_cnt_q == FIFO_DEPTH); - assign empty_o = (status_cnt_q == 0) & ~(FALL_THROUGH & push_i); - assign alm_full_o = (status_cnt_q >= ALM_FULL_TH); - assign alm_empty_o = (status_cnt_q <= ALM_EMPTY_TH); - end - // status flags - - // read and write queue logic - always_comb begin : read_write_comb - // default assignment - read_pointer_n = read_pointer_q; - write_pointer_n = write_pointer_q; - status_cnt_n = status_cnt_q; - data_o = (DEPTH == 0) ? data_i : mem_q[read_pointer_q]; - mem_n = mem_q; - gate_clock = 1'b1; - - // push a new element to the queue - if (push_i && ~full_o) begin - // push the data onto the queue - mem_n[write_pointer_q] = data_i; - // un-gate the clock, we want to write something - gate_clock = 1'b0; - // increment the write counter - if (write_pointer_q == FIFO_DEPTH-1) - write_pointer_n = '0; - else - write_pointer_n = write_pointer_q + 1; - // increment the overall counter - status_cnt_n = status_cnt_q + 1; - end - - if (pop_i && ~empty_o) begin - // read from the queue is a default assignment - // but increment the read pointer... - if (read_pointer_n == FIFO_DEPTH-1) - read_pointer_n = '0; - else - read_pointer_n = read_pointer_q + 1; - // ... and decrement the overall count - status_cnt_n = status_cnt_q - 1; - end - - // keep the count pointer stable if we push and pop at the same time - if (push_i && pop_i && ~full_o && ~empty_o) - status_cnt_n = status_cnt_q; - - // FIFO is in pass through mode -> do not change the pointers - if (FALL_THROUGH && (status_cnt_q == 0) && push_i && pop_i) begin - data_o = data_i; - status_cnt_n = status_cnt_q; - read_pointer_n = read_pointer_q; - write_pointer_n = write_pointer_q; - end - end - - // sequential process - always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin - read_pointer_q <= '0; - write_pointer_q <= '0; - status_cnt_q <= '0; - end else begin - if (flush_i) begin - read_pointer_q <= '0; - write_pointer_q <= '0; - status_cnt_q <= '0; - end else begin - read_pointer_q <= read_pointer_n; - write_pointer_q <= write_pointer_n; - status_cnt_q <= status_cnt_n; - end - end - end - - always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin - mem_q <= '0; - end else if (!gate_clock) begin - mem_q <= mem_n; - end - end - `ifndef SYNTHESIS - `ifndef verilator - initial begin - // assert ((THRESHOLD - 1) > DEPTH) else $error("Threshold can't be bigger than the DEPTH."); - - assert property( - @(posedge clk_i) (rst_ni && full_o |-> ~push_i)) - else $warning ("Trying to push new data although the FIFO is full."); - - assert property( - @(posedge clk_i) (rst_ni && empty_o |-> ~pop_i)) - else $warning ("Trying to pop data although the FIFO is empty."); - end - `endif - `endif -endmodule // generic_fifo diff --git a/src/icache.sv b/src/icache.sv index 20ccac8a4..31c65c733 100644 --- a/src/icache.sv +++ b/src/icache.sv @@ -408,12 +408,12 @@ module icache #( dreq_o.ready = 1'b0; end - ff1 #( - .LEN ( ICACHE_SET_ASSOC ) - ) i_ff1 ( - .in_i ( ~way_valid ), - .first_one_o ( repl_invalid ), - .no_ones_o ( repl_w_random ) + lzc #( + .WIDTH ( ICACHE_SET_ASSOC ) + ) i_lzc ( + .in_i ( ~way_valid ), + .cnt_o ( repl_invalid ), + .empty_o ( repl_w_random ) ); // ----------------- diff --git a/src/mult.sv b/src/mult.sv index 5adb75d1e..4aa711e49 100644 --- a/src/mult.sv +++ b/src/mult.sv @@ -69,9 +69,9 @@ module mult ( // --------------------- // Division // --------------------- - logic [5:0] ff1_result; // holds the index of the last '1' (as the input operand is reversed) - logic ff1_no_one; // no one was found by find first one - logic [63:0] ff1_input; // input to find first one + logic [5:0] lzc_result; // holds the index of the last '1' (as the input operand is reversed) + logic lzc_no_one; // no one was found by find first one + logic [63:0] lzc_input; // input to find first one logic [63:0] operand_b_rev, operand_b_rev_neg, operand_b_shift; // couple of different representations for the dividend logic [6:0] div_shift; // amount of which to shift to left logic div_signed; // should this operation be performed as a signed or unsigned division @@ -95,7 +95,7 @@ module mult ( endgenerate // negated reverse input operand, used for signed divisions assign operand_b_rev_neg = ~operand_b_rev; - assign ff1_input = (div_op_signed) ? operand_b_rev_neg : operand_b_rev; + assign lzc_input = (div_op_signed) ? operand_b_rev_neg : operand_b_rev; // prepare the input operands and control divider always_comb begin @@ -139,19 +139,19 @@ module mult ( end // --------------------- - // Find First one + // Leading Zero Counter // --------------------- // this unit is used to speed up the sequential division by shifting the dividend first - ff1 #( - .LEN ( 64 ) - ) i_ff1 ( - .in_i ( ff1_input ), // signed = operand_b_rev_neg, unsigned operand_b_rev - .first_one_o ( ff1_result ), - .no_ones_o ( ff1_no_one ) + lzc #( + .WIDTH ( 64 ) + ) i_lzc ( + .in_i ( lzc_input ), // signed = operand_b_rev_neg, unsigned operand_b_rev + .cnt_o ( lzc_result ), + .empty_o ( lzc_no_one ) ); // if the dividend is all zero go for the full length - assign div_shift = ff1_no_one ? 7'd64 : ff1_result; + assign div_shift = lzc_no_one ? 7'd64 : lzc_result; // prepare dividend by shifting assign operand_b_shift = operand_b <<< div_shift; diff --git a/src/rrarbiter.sv b/src/rrarbiter.sv deleted file mode 100644 index 7c85ae412..000000000 --- a/src/rrarbiter.sv +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) 2018 ETH Zurich, University of Bologna -// All rights reserved. -// -// This code is under development and not yet released to the public. -// Until it is released, the code is under the copyright of ETH Zurich and -// the University of Bologna, and may contain confidential and/or unpublished -// work. Any reuse/redistribution is strictly forbidden without written -// permission from ETH Zurich. -// -// Bug fixes and contributions will eventually be released under the -// SolderPad open hardware license in the context of the PULP platform -// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the -// University of Bologna. -// -// Author: Michael Schaffner , ETH Zurich -// Date: 16.08.2018 -// Description: Round robin arbiter with lookahead -// -// this unit is a generic round robin arbiter with "look ahead" - i.e. it jumps -// to the next valid request signal instead of stepping around with stepsize 1. -// if the current req signal has been acknowledged in the last cycle, and it is -// again valid in the current cycle, the arbiter will first serve the other req -// signals (if there is a valid one) in the req vector before acknowledging the -// same signal again (this prevents starvation). -// -// the arbiter has a request signal vector input (req_i) and an ack -// signal vector ouput (ack_o). to enable the arbiter the signal -// en_i has to be asserted. vld_o is high when one of the -// req_i signals is acknowledged. -// -// the entity has one register which stores the index of the last request signal -// that was served. -// -// dependencies: relies on fast leading zero counter tree "ff1" in common_cells -// - -module rrarbiter #( - parameter NUM_REQ = 13 -)( - input logic clk_i, - input logic rst_ni, - - input logic flush_i, // clears the fsm and control signal registers - input logic en_i, // arbiter enable - input logic [NUM_REQ-1:0] req_i, // request signals - - output logic [NUM_REQ-1:0] ack_o, // acknowledge signals - output logic vld_o, // valid signals - output logic [$clog2(NUM_REQ)-1:0] idx_o // idx output - ); - -localparam SEL_WIDTH = $clog2(NUM_REQ); - -logic [SEL_WIDTH-1:0] arb_sel_d; -logic [SEL_WIDTH-1:0] arb_sel_q; - - -// only used in case of more than 2 requesters -logic [NUM_REQ-1:0] mask_lut[NUM_REQ-1:0]; -logic [NUM_REQ-1:0] mask; -logic [NUM_REQ-1:0] masked_lower; -logic [NUM_REQ-1:0] masked_upper; -logic [SEL_WIDTH-1:0] lower_idx; -logic [SEL_WIDTH-1:0] upper_idx; -logic [SEL_WIDTH-1:0] next_idx; -logic [SEL_WIDTH-1:0] idx; -logic no_lower_ones; - - -// shared -assign idx_o = arb_sel_d; -assign vld_o = (|req_i) & en_i; - -// only 2 input requesters -generate - if (NUM_REQ == 2) begin - - assign ack_o[0] = ((~arb_sel_q) | ( arb_sel_q & ~req_i[1])) & req_i[0] & en_i; - assign arb_sel_d = (( arb_sel_q) | (~arb_sel_q & ~req_i[0])) & req_i[1]; - assign ack_o[1] = arb_sel_d & en_i; - - end -endgenerate - -// more than 2 requesters -generate - if (NUM_REQ > 2) begin - - // this mask is used to mask the incoming req vector - // depending on the index of the last served index - assign mask = mask_lut[arb_sel_q]; - - // get index from masked vectors - ff1 #( - .LEN(NUM_REQ) - ) i_lower_ff1 ( - .in_i ( masked_lower ), - .first_one_o ( lower_idx ), - .no_ones_o ( no_lower_ones ) - ); - - ff1 #( - .LEN(NUM_REQ) - ) i_upper_ff1 ( - .in_i ( masked_upper ), - .first_one_o ( upper_idx ), - .no_ones_o ( ) - ); - - // wrap around - assign next_idx = (no_lower_ones) ? upper_idx : lower_idx; - assign arb_sel_d = (next_idx < NUM_REQ) ? next_idx : NUM_REQ-1; - - end -endgenerate - -genvar k; -generate - for (k=0; (k < NUM_REQ) && NUM_REQ > 2; k++) begin - assign mask_lut[k] = 2**(k+1)-1; - assign masked_lower[k] = (~ mask[k]) & req_i[k]; - assign masked_upper[k] = mask[k] & req_i[k]; - assign ack_o[k] = ((arb_sel_d == k) & vld_o ) ? 1'b1 : 1'b0; - end -endgenerate - -// regs -always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs - if(~rst_ni) begin - arb_sel_q <= 0; - end else begin - if (flush_i) begin - arb_sel_q <= 0; - end else if (vld_o) begin - arb_sel_q <= arb_sel_d; - end - end -end - - -`ifndef SYNTHESIS -`ifndef VERILATOR - // check parameterization, enable and hot1 property of acks - // todo: check RR fairness with sequence assertion - initial begin - assert (NUM_REQ>=2) else $fatal ("minimum input width of req vecor is 2"); - end - ack_implies_vld: assert property (@(posedge clk_i) disable iff (~rst_ni) |ack_o |-> vld_o) else $fatal ("an asserted ack signal implies that vld_o must be asserted, too"); - vld_implies_ack: assert property (@(posedge clk_i) disable iff (~rst_ni) vld_o |-> |ack_o) else $fatal ("an asserted vld_o signal implies that one ack_o must be asserted, too"); - en_vld_check: assert property (@(posedge clk_i) disable iff (~rst_ni) !en_i |-> !vld_o) else $fatal ("vld must not be asserted when arbiter is disabled"); - en_ack_check: assert property (@(posedge clk_i) disable iff (~rst_ni) !en_i |-> !ack_o) else $fatal ("ack_o must not be asserted when arbiter is disabled"); - ack_idx_check: assert property (@(posedge clk_i) disable iff (~rst_ni) vld_o |-> ack_o[idx_o]) else $fatal ("index / ack_o do not match"); - hot1_check: assert property (@(posedge clk_i) disable iff (~rst_ni) ((~(1< - -module generic_fifo - #( - parameter int unsigned DATA_WIDTH = 32, - parameter int unsigned DATA_DEPTH = 8 - ) - ( - input logic clk, - input logic rst_n, - //PUSH SIDE - input logic [DATA_WIDTH-1:0] data_i, - input logic valid_i, - output logic grant_o, - //POP SIDE - output logic [DATA_WIDTH-1:0] data_o, - output logic valid_o, - input logic grant_i, - - input logic test_mode_i - ); - - - // Local Parameter - localparam int unsigned ADDR_DEPTH = $clog2(DATA_DEPTH); - enum logic [1:0] { EMPTY, FULL, MIDDLE } CS, NS; - // Internal Signals - - logic gate_clock; - logic clk_gated; - - logic [ADDR_DEPTH-1:0] Pop_Pointer_CS, Pop_Pointer_NS; - logic [ADDR_DEPTH-1:0] Push_Pointer_CS, Push_Pointer_NS; - logic [DATA_WIDTH-1:0] FIFO_REGISTERS[DATA_DEPTH-1:0]; - int unsigned i; - - - - // Parameter Check - // synopsys translate_off - initial - begin : parameter_check - integer param_err_flg; - param_err_flg = 0; - - if (DATA_WIDTH < 1) - begin - param_err_flg = 1; - $display("ERROR: %m :\n Invalid value (%d) for parameter DATA_WIDTH (legal range: greater than 1)", DATA_WIDTH ); - end - - if (DATA_DEPTH < 1) - begin - param_err_flg = 1; - $display("ERROR: %m :\n Invalid value (%d) for parameter DATA_DEPTH (legal range: greater than 1)", DATA_DEPTH ); - end - end - // synopsys translate_on - -`ifndef PULP_FPGA_EMUL - cluster_clock_gating cg_cell - ( - .clk_i ( clk ), - .en_i (~gate_clock ), - .test_en_i ( test_mode_i ), - .clk_o ( clk_gated ) - ); -`else - assign clk_gated = clk; -`endif - - // UPDATE THE STATE - always_ff @(posedge clk, negedge rst_n) - begin - if(rst_n == 1'b0) - begin - CS <= EMPTY; - Pop_Pointer_CS <= {ADDR_DEPTH {1'b0}}; - Push_Pointer_CS <= {ADDR_DEPTH {1'b0}}; - end - else - begin - CS <= NS; - Pop_Pointer_CS <= Pop_Pointer_NS; - Push_Pointer_CS <= Push_Pointer_NS; - end - end - - - // Compute Next State - always_comb - begin - gate_clock = 1'b0; - - case(CS) - - EMPTY: - begin - grant_o = 1'b1; - valid_o = 1'b0; - - case(valid_i) - 1'b0 : - begin - NS = EMPTY; - Push_Pointer_NS = Push_Pointer_CS; - Pop_Pointer_NS = Pop_Pointer_CS; - gate_clock = 1'b1; - end - - 1'b1: - begin - NS = MIDDLE; - Push_Pointer_NS = Push_Pointer_CS + 1'b1; - Pop_Pointer_NS = Pop_Pointer_CS; - end - - endcase - end//~EMPTY - - MIDDLE: - begin - grant_o = 1'b1; - valid_o = 1'b1; - - case({valid_i,grant_i}) - - 2'b01: - begin - gate_clock = 1'b1; - - if((Pop_Pointer_CS == Push_Pointer_CS -1 ) || ((Pop_Pointer_CS == DATA_DEPTH-1) && (Push_Pointer_CS == 0) )) - NS = EMPTY; - else - NS = MIDDLE; - - Push_Pointer_NS = Push_Pointer_CS; - - if(Pop_Pointer_CS == DATA_DEPTH-1) - Pop_Pointer_NS = 0; - else - Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; - end - - 2'b00 : - begin - gate_clock = 1'b1; - NS = MIDDLE; - Push_Pointer_NS = Push_Pointer_CS; - Pop_Pointer_NS = Pop_Pointer_CS; - end - - 2'b11: - begin - NS = MIDDLE; - - if(Push_Pointer_CS == DATA_DEPTH-1) - Push_Pointer_NS = 0; - else - Push_Pointer_NS = Push_Pointer_CS + 1'b1; - - if(Pop_Pointer_CS == DATA_DEPTH-1) - Pop_Pointer_NS = 0; - else - Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; - end - - 2'b10: - begin - if(( Push_Pointer_CS == Pop_Pointer_CS - 1) || ( (Push_Pointer_CS == DATA_DEPTH-1) && (Pop_Pointer_CS == 0) )) - NS = FULL; - else - NS = MIDDLE; - - if(Push_Pointer_CS == DATA_DEPTH - 1) - Push_Pointer_NS = 0; - else - Push_Pointer_NS = Push_Pointer_CS + 1'b1; - - Pop_Pointer_NS = Pop_Pointer_CS; - end - - endcase - end - - FULL: - begin - grant_o = 1'b0; - valid_o = 1'b1; - gate_clock = 1'b1; - - case(grant_i) - 1'b1: - begin - NS = MIDDLE; - - Push_Pointer_NS = Push_Pointer_CS; - - if(Pop_Pointer_CS == DATA_DEPTH-1) - Pop_Pointer_NS = 0; - else - Pop_Pointer_NS = Pop_Pointer_CS + 1'b1; - end - - 1'b0: - begin - NS = FULL; - Push_Pointer_NS = Push_Pointer_CS; - Pop_Pointer_NS = Pop_Pointer_CS; - end - endcase - - end // end of FULL - - default : - begin - gate_clock = 1'b1; - grant_o = 1'b0; - valid_o = 1'b0; - NS = EMPTY; - Pop_Pointer_NS = 0; - Push_Pointer_NS = 0; - end - - endcase - end - - always_ff @(posedge clk_gated, negedge rst_n) - begin - if(rst_n == 1'b0) - begin - for (i=0; i< DATA_DEPTH; i++) - FIFO_REGISTERS[i] <= {DATA_WIDTH {1'b0}}; - end - else - begin - if((grant_o == 1'b1) && (valid_i == 1'b1)) - FIFO_REGISTERS[Push_Pointer_CS] <= data_i; - end - end - - assign data_o = FIFO_REGISTERS[Pop_Pointer_CS]; - -endmodule // generic_fifo diff --git a/src/util/regfile.sv b/src/util/regfile.sv deleted file mode 100644 index c90e7c28d..000000000 --- a/src/util/regfile.sv +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// -// Author: Florian Zaruba , ETH Zurich -// Michael Schaffner , ETH Zurich -// Date: 15.08.2018 -// Description: SRAM wrapper for FPGA (requires the fpga-support submodule) -// -// Note: the wrapped module contains two different implementations for -// ALTERA and XILINX tools, since these follow different coding styles for -// inferrable RAMS with byte enable. define `FPGA_TARGET_XILINX or -// `FPGA_TARGET_ALTERA in your build environment (default is ALTERA) - -module regfile #( - parameter DATA_WIDTH = 64, - parameter DATA_DEPTH = 1024 -)( - input logic clk_i, - input logic rst_ni, - input logic we_i, - input logic [$clog2(DATA_DEPTH)-1:0] addr_i, - input logic [DATA_WIDTH-1:0] wdata_i, - input logic [DATA_WIDTH-1:0] biten_i, // bit enable - output logic [DATA_WIDTH-1:0] rdata_o -); - -logic [DATA_DEPTH-1:0] regs_d[DATA_WIDTH-1:0]; -logic [DATA_DEPTH-1:0] regs_q[DATA_WIDTH-1:0]; - -genvar k; -generate - for(k=0;k