Merge pull request #142 from msfschaffner/ariane_next

This adds preliminary support for the OpenPiton cache system.
This commit is contained in:
Florian Zaruba 2018-11-23 21:19:19 +01:00 committed by GitHub
commit f5af3df4a3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
84 changed files with 11550 additions and 1000 deletions

View file

@ -1,12 +1,12 @@
before_script:
# paths to local or network installations (the riscv toolchain and
# paths to local or network installations (the riscv toolchain and
# verilator are not built in the ci job as in travis)
- export QUESTASIM_HOME=/usr/pack/modelsim-10.6b-kgf/questasim/
- export QUESTASIM_VERSION=-10.6b
- export QUESTASIM_FLAGS=-noautoldlibpath
- export CXX=g++-7.2.0 CC=gcc-7.2.0
- export RISCV=/usr/scratch2/larain1/gitlabci/riscv_install
- export VERILATOR_ROOT=/usr/scratch2/larain1/gitlabci/verilator-3.924
- export RISCV=/scratch2/gitlabci/riscv_install
- export VERILATOR_ROOT=/scratch2/gitlabci/verilator-3.924
# setup dependent paths
- export PATH=${RISCV}/bin:$VERILATOR_ROOT/bin:${PATH}
- export LIBRARY_PATH=$RISCV/lib
@ -15,9 +15,14 @@ before_script:
- export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include:/usr/pack/gcc-7.2.0-af/linux-x64/include
# number of parallel jobs to use for make commands and simulation
- export NUM_JOBS=4
- which java
- java -version
- which git
- git --version
- ci/make-tmp.sh
- git submodule update --init --recursive
- git submodule init
- git submodule update --recursive
variables:
GIT_SUBMODULE_STRATEGY: recursive
@ -26,6 +31,7 @@ stages:
- standard
- serpent
###################################
# prepare
build:
stage: build
@ -33,11 +39,16 @@ build:
- ci/build-riscv-tests.sh
- ci/get-torture.sh
- make clean
- make torture-gen
# this currently does not work with the current runner version...
#- make torture-gen
artifacts:
paths:
- tmp
- tmp
###################################
# tests with standard cache system
# rv64ui-p-* and rv64ui-v-* tests
asm-quest:
stage: standard
script:
@ -51,19 +62,19 @@ amo-quest:
- make -j${NUM_JOBS} run-amo-tests batch-mode=1
dependencies:
- build
bench-quest:
stage: standard
script:
- make -j${NUM_JOBS} run-benchmarks batch-mode=1
dependencies:
- build
- build
# rv64ui-p-* tests
asm1-ver:
stage: standard
script:
- make -j${NUM_JOBS} run-asm-tests1-verilator
- make -j${NUM_JOBS} run-asm-tests1-verilator
dependencies:
- build
@ -71,33 +82,40 @@ asm1-ver:
asm2-ver:
stage: standard
script:
- make -j${NUM_JOBS} run-asm-tests2-verilator
- make -j${NUM_JOBS} run-asm-tests2-verilator
dependencies:
- build
- build
# atomics
amo-ver:
stage: standard
script:
- make -j${NUM_JOBS} run-amo-verilator
- make -j${NUM_JOBS} run-amo-verilator
dependencies:
- build
- build
bench-ver:
stage: standard
script:
- make -j${NUM_JOBS} run-benchmarks-verilator
- make -j${NUM_JOBS} run-benchmarks-verilator
dependencies:
- build
torture:
bench-ver:
stage: standard
script:
- make torture-rtest
- make torture-rtest-verilator
- make -j${NUM_JOBS} run-benchmarks-verilator
dependencies:
- build
# torture:
# stage: standard
# script:
# - make torture-rtest batch-mode=1
# - make torture-rtest-verilator
# dependencies:
# - build
serdiv-quest:
stage: standard
script:
@ -107,4 +125,69 @@ serdiv-quest:
dependencies:
- build
###################################
# tests with serpent cache system
# rv64ui-p-* and rv64ui-v-* tests
s-asm-quest:
stage: serpent
script:
- make -j${NUM_JOBS} run-asm-tests defines=SERPENT_PULP+AXI64_CACHE_PORTS batch-mode=1
dependencies:
- build
s-bench-quest:
stage: serpent
script:
- make -j${NUM_JOBS} run-benchmarks defines=SERPENT_PULP+AXI64_CACHE_PORTS batch-mode=1
dependencies:
- build
# rv64ui-p-* tests
s-asm1-ver:
stage: serpent
script:
- make -j${NUM_JOBS} run-asm-tests1-verilator defines=SERPENT_PULP+AXI64_CACHE_PORTS
dependencies:
- build
# rv64ui-v-* tests
s-asm2-ver:
stage: serpent
script:
- make -j${NUM_JOBS} run-asm-tests2-verilator defines=SERPENT_PULP+AXI64_CACHE_PORTS
dependencies:
- build
s-bench-ver:
stage: serpent
script:
- make -j${NUM_JOBS} run-benchmarks-verilator defines=SERPENT_PULP+AXI64_CACHE_PORTS
dependencies:
- build
s-icache-quest:
stage: serpent
script:
- cd tb/tb_serpent_icache/
- make simc
- "grep 'CI: PASSED' summary.rep"
s-dcache-quest:
stage: serpent
script:
- cd tb/tb_serpent_dcache/
- make simc
- "grep 'CI: PASSED' RD0_summary.rep"
- "grep 'CI: PASSED' RD1_summary.rep"
- "grep 'CI: PASSED' TB_MEM_summary.rep"
dependencies:
- build
# s-torture:
# stage: serpent
# script:
# - make torture-rtest defines=SERPENT_PULP+AXI64_CACHE_PORTS batch-mode=1
# - make torture-rtest-verilator defines=SERPENT_PULP+AXI64_CACHE_PORTS
# dependencies:
# - build

View file

@ -30,6 +30,8 @@ addons:
- python-pexpect
- libusb-1.0-0-dev
- default-jdk
- zlib1g-dev
- valgrind
env:
global:
- RISCV="/home/travis/riscv_install"
@ -75,32 +77,60 @@ jobs:
name: run riscv benchmarks
script:
- ci/build-riscv-tests.sh
- make -j${NUM_JOBS} run-benchmarks-verilator
- make -j${NUM_JOBS} run-benchmarks-verilator
# rv64ui-p-* tests
- stage: test
name: run asm tests1
script:
- ci/build-riscv-tests.sh
- make -j${NUM_JOBS} run-asm-tests1-verilator
- make -j${NUM_JOBS} run-asm-tests1-verilator
# rv64ui-v-* tests
- stage: test
name: run asm tests2
script:
- ci/build-riscv-tests.sh
- make -j${NUM_JOBS} run-asm-tests2-verilator
- make -j${NUM_JOBS} run-asm-tests2-verilator
# amo tests
- stage: test
name: run amo tests
script:
- ci/build-riscv-tests.sh
- make -j${NUM_JOBS} run-amo-verilator
- make -j${NUM_JOBS} run-amo-verilator
- stage: test
name: run torture
script:
- ci/get-torture.sh
- make clean
- make torture-gen
- make torture-rtest-verilator
- make torture-rtest-verilator
- stage: test
name: run riscv benchmarks (serpent)
script:
- ci/build-riscv-tests.sh
- make -j${NUM_JOBS} run-benchmarks-verilator defines=SERPENT_PULP+AXI64_CACHE_PORTS
# rv64ui-p-* tests
- stage: test
name: run asm tests1 (serpent)
script:
- ci/build-riscv-tests.sh
- make -j${NUM_JOBS} run-asm-tests1-verilator defines=SERPENT_PULP+AXI64_CACHE_PORTS
# rv64ui-v-* tests
- stage: test
name: run asm tests2 (serpent)
script:
- ci/build-riscv-tests.sh
- make -j${NUM_JOBS} run-asm-tests2-verilator defines=SERPENT_PULP+AXI64_CACHE_PORTS
- stage: test
name: run torture (serpent)
script:
- ci/get-torture.sh
- make clean
- make torture-gen defines=SERPENT_PULP+AXI64_CACHE_PORTS
- make torture-rtest-verilator defines=SERPENT_PULP+AXI64_CACHE_PORTS
# extra time during long builds
install: travis_wait

View file

@ -73,8 +73,7 @@ sources:
- src/issue_read_operands.sv
- src/issue_stage.sv
- src/load_unit.sv
- src/lsu_arbiter.sv
- src/lsu.sv
- src/load_store_unit.sv
- src/mmu.sv
- src/mult.sv
- src/serdiv.sv

View file

@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Commit log feature
- Support for A-Extension
- Preliminary FP support
- Preliminary support for OpenPiton cache system
- Provisioned `aw_top` signal for close to memory atomics
- FPGA Support
- Misc bug-fixes

157
Flist.ariane Normal file
View file

@ -0,0 +1,157 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: File list for OpenPiton flow
// src/fpu_div_sqrt_mvp/hdl/fpu_ff.sv
// src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
// src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
// src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
// src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
// src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
// src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
// src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
// src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
// src/fpu/src/pkg/fpnew_pkg.vhd
// src/fpu/src/pkg/fpnew_fmts_pkg.vhd
// src/fpu/src/pkg/fpnew_comps_pkg.vhd
// src/fpu/src/pkg/fpnew_pkg_constants.vhd
// src/fpu/src/utils/fp_pipe.vhd
// src/fpu/src/utils/fp_rounding.vhd
// src/fpu/src/utils/fp_arbiter.vhd
// src/fpu/src/ops/fma_core.vhd
// src/fpu/src/ops/fp_fma.vhd
// src/fpu/src/ops/fp_divsqrt_multi.vhd
// src/fpu/src/ops/fp_noncomp.vhd
// src/fpu/src/ops/fp_f2fcasts_fmt.vhd
// src/fpu/src/ops/fp_f2icasts_fmt.vhd
// src/fpu/src/ops/fp_i2fcasts_fmt.vhd
// src/fpu/src/subunits/addmul_fmt_slice.vhd
// src/fpu/src/subunits/addmul_block.vhd
// src/fpu/src/subunits/divsqrt_multifmt_slice.vhd
// src/fpu/src/subunits/divsqrt_block.vhd
// src/fpu/src/subunits/noncomp_fmt_slice.vhd
// src/fpu/src/subunits/noncomp_block.vhd
// src/fpu/src/subunits/conv_fmt_slice.vhd
// src/fpu/src/subunits/conv_ifmt_slice.vhd
// src/fpu/src/subunits/conv_block.vhd
// src/fpu/src/fpnew.vhd
// src/fpu/src/fpnew_top.vhd
src/axi/src/axi_pkg.sv
src/debug/dm_pkg.sv
include/riscv_pkg.sv
include/ariane_pkg.sv
include/ariane_axi_pkg.sv
include/serpent_cache_pkg.sv
//include/std_cache_pkg.sv
include/axi_intf.sv
src/util/instruction_tracer_pkg.sv
src/util/instruction_tracer_if.sv
src/util/sram.sv
src/util/axi_master_connect.sv
src/util/axi_master_connect_rev.sv
src/util/axi_slave_connect.sv
src/util/axi_slave_connect_rev.sv
src/common_cells/src/fifo_v1.sv
src/common_cells/src/fifo_v2.sv
src/common_cells/src/fifo_v3.sv
src/common_cells/src/lfsr_8bit.sv
src/common_cells/src/lzc.sv
src/common_cells/src/rrarbiter.sv
src/common_cells/src/rstgen_bypass.sv
src/common_cells/src/sync_wedge.sv
src/common_cells/src/cdc_2phase.sv
src/common_cells/src/pipe_reg_simple.sv
src/fpga-support/rtl/SyncSpRamBeNx64.sv
src/axi_mem_if/src/axi2mem.sv
src/tech_cells_generic/src/cluster_clock_inverter.sv
src/tech_cells_generic/src/pulp_clock_mux2.sv
src/axi_adapter.sv
src/alu.sv
src/fpu_wrap.sv
src/ariane.sv
src/branch_unit.sv
src/compressed_decoder.sv
src/controller.sv
src/csr_buffer.sv
src/csr_regfile.sv
src/decoder.sv
src/ex_stage.sv
src/frontend/btb.sv
src/frontend/bht.sv
src/frontend/ras.sv
src/frontend/instr_scan.sv
src/frontend/frontend.sv
src/id_stage.sv
src/instr_realigner.sv
src/issue_read_operands.sv
src/issue_stage.sv
src/load_unit.sv
src/load_store_unit.sv
src/mmu.sv
src/mult.sv
src/multiplier.sv
src/serdiv.sv
src/perf_counters.sv
src/ptw.sv
src/ariane_regfile_ff.sv
src/re_name.sv
src/scoreboard.sv
src/store_buffer.sv
src/amo_buffer.sv
src/store_unit.sv
src/tlb.sv
src/commit_stage.sv
src/cache_subsystem/serpent_dcache_ctrl.sv
src/cache_subsystem/serpent_dcache_mem.sv
src/cache_subsystem/serpent_dcache_missunit.sv
src/cache_subsystem/serpent_dcache_wbuffer.sv
src/cache_subsystem/serpent_dcache.sv
src/cache_subsystem/serpent_icache.sv
src/cache_subsystem/serpent_l15_adapter.sv
src/cache_subsystem/serpent_cache_subsystem.sv
src/debug/debug_rom/debug_rom.sv
src/debug/dm_csrs.sv
src/clint/clint.sv
src/clint/axi_lite_interface.sv
src/debug/dm_mem.sv
src/debug/dm_top.sv
src/debug/dmi_cdc.sv
src/debug/dmi_jtag.sv
src/debug/dm_sba.sv
src/debug/dmi_jtag_tap.sv
openpiton/ariane_verilog_wrap.sv
openpiton/serpent_peripherals.sv
bootrom/bootrom.sv
src/plic/plic.sv
src/plic/plic_claim_complete_tracker.sv
src/plic/plic_comparator.sv
src/plic/plic_find_max.sv
src/plic/plic_gateway.sv
src/plic/plic_interface.sv
src/plic/plic_target_slice.sv
fpga/src/axi2apb/src/axi2apb_wrap.sv
fpga/src/axi2apb/src/axi2apb.sv
fpga/src/axi2apb/src/axi2apb_64_32.sv
fpga/src/axi_slice/src/axi_w_buffer.sv
fpga/src/axi_slice/src/axi_b_buffer.sv
fpga/src/axi_slice/src/axi_slice_wrap.sv
fpga/src/axi_slice/src/axi_slice.sv
fpga/src/axi_slice/src/axi_single_slice.sv
fpga/src/axi_slice/src/axi_ar_buffer.sv
fpga/src/axi_slice/src/axi_r_buffer.sv
fpga/src/axi_slice/src/axi_aw_buffer.sv
src/register_interface/src/apb_to_reg.sv
src/register_interface/src/reg_intf.sv

View file

@ -38,6 +38,7 @@ ariane_pkg := include/riscv_pkg.sv \
src/debug/dm_pkg.sv \
include/ariane_pkg.sv \
include/std_cache_pkg.sv \
include/serpent_cache_pkg.sv \
src/axi/src/axi_pkg.sv \
src/register_interface/src/reg_intf.sv \
include/axi_intf.sv \
@ -99,7 +100,10 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \
src/common_cells/src/rstgen.sv \
src/common_cells/src/stream_mux.sv \
src/common_cells/src/stream_demux.sv \
src/util/axi_connect.sv \
src/util/axi_master_connect.sv \
src/util/axi_slave_connect.sv \
src/util/axi_master_connect_rev.sv \
src/util/axi_slave_connect_rev.sv \
src/axi/src/axi_cut.sv \
src/axi/src/axi_join.sv \
src/axi/src/axi_delayer.sv \
@ -152,11 +156,10 @@ riscv-benchmarks := $(shell xargs printf '\n%s' < $(riscv-benchmarks-li
# Search here for include files (e.g.: non-standalone components)
incdir :=
# Compile and sim flags
compile_flag += +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive +define+$(defines)
compile_flag += +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive +define+$(defines)
uvm-flags += +UVM_NO_RELNOTES +UVM_VERBOSITY=LOW
questa-flags += -t 1ns -64 -coverage -classdebug $(gui-sim) $(QUESTASIM_FLAGS)
compile_flag_vhd += -64 -nologo -quiet -2008
uvm-flags += +UVM_NO_RELNOTES +UVM_VERBOSITY=LOW
questa-flags += -t 1ns -64 -coverage -classdebug $(gui-sim) $(QUESTASIM_FLAGS)
# Iterate over all include directories and write them with +incdir+ prefixed
# +incdir+ works for Verilator and QuestaSim
@ -164,7 +167,9 @@ list_incdir := $(foreach dir, ${incdir}, +incdir+$(dir))
# RISCV torture setup
riscv-torture-dir := tmp/riscv-torture
riscv-torture-bin := java -Xmx1G -Xss8M -XX:MaxPermSize=128M -jar sbt-launch.jar
# old java flags -Xmx1G -Xss8M -XX:MaxPermSize=128M
# -XshowSettings -Xdiag
riscv-torture-bin := java -jar sbt-launch.jar
# if defined, calls the questa targets in batch mode
ifdef batch-mode
@ -197,7 +202,7 @@ build: $(library) $(library)/.build-srcs $(library)/.build-tb $(dpi-library)/ari
vopt$(questa_version) $(compile_flag) -work $(library) $(top_level) -o $(top_level)_optimized +acc -check_synthesis
# src files
$(library)/.build-srcs: $(ariane_pkg) $(util) $(src) $(library) $(uart_src)
$(library)/.build-srcs: $(util) $(library)
vlog$(questa_version) $(compile_flag) -work $(library) $(filter %.sv,$(ariane_pkg)) $(list_incdir) -suppress 2583
vcom$(questa_version) $(compile_flag_vhd) -work $(library) -pedanticerrors $(filter %.vhd,$(ariane_pkg))
vlog$(questa_version) $(compile_flag) -work $(library) $(filter %.sv,$(util)) $(list_incdir) -suppress 2583
@ -208,9 +213,9 @@ $(library)/.build-srcs: $(ariane_pkg) $(util) $(src) $(library) $(uart_src)
touch $(library)/.build-srcs
# build TBs
$(library)/.build-tb: $(dpi) $(tbs)
$(library)/.build-tb: $(dpi)
# Compile top level
vlog$(questa_version) -sv $(tbs) -work $(library)
vlog$(questa_version) $(compile_flag) -sv $(tbs) -work $(library)
touch $(library)/.build-tb
$(library):
@ -333,33 +338,31 @@ torture-itest:
cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -a output/test.S'
torture-rtest: build
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture$(torture-logs) defines=$(defines) test-location=$(test-location)" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture$(torture-logs) batch-mode=1 defines=$(defines) test-location=$(test-location)" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a $(test-location).S' | tee $(test-location).log
make check-torture test-location=$(test-location)
torture-dummy: build
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture defines=$(defines) test-location=\$${@: -1}" > call.sh
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture batch-mode=1 defines=$(defines) test-location=\$${@: -1}" > call.sh
torture-rnight: build
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture$(torture-logs) defines=$(defines) test-location=\$${@: -1}" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture$(torture-logs) batch-mode=1 defines=$(defines) test-location=\$${@: -1}" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && $(riscv-torture-bin) 'overnight/run -r ./call.sh -g none' | tee output/overnight.log
$(MAKE) check-torture
torture-rtest-verilator: verilate
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture-verilator defines=$(defines)" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture-verilator batch-mode=1 defines=$(defines)" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a output/test.S' | tee output/test.log
$(MAKE) check-torture
run-torture: build
vsim${questa_version} +permissive $(questa-flags) -c -lib $(library) +max-cycles=$(max_cycles)+UVM_TESTNAME=$(test_case) \
+BASEDIR=$(riscv-torture-dir) $(uvm-flags) +jtag_rbb_enable=0 -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \
-do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
vsim${questa_version} +permissive $(questa-flags) $(questa-cmd) -lib $(library) +max-cycles=$(max_cycles)+UVM_TESTNAME=$(test_case) \
+BASEDIR=$(riscv-torture-dir) $(uvm-flags) +jtag_rbb_enable=0 -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \
${top_level}_optimized +permissive-off +signature=$(riscv-torture-dir)/$(test-location).rtlsim.sig ++$(riscv-torture-dir)/$(test-location) ++$(target-options)
run-torture-log: build
vsim${questa_version} +permissive $(questa-flags) -c -lib $(library) +max-cycles=$(max_cycles)+UVM_TESTNAME=$(test_case) \
+BASEDIR=$(riscv-torture-dir) $(uvm-flags) +jtag_rbb_enable=0 -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \
-do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; log -r /*; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
vsim${questa_version} +permissive $(questa-flags) $(questa-cmd) -lib $(library) +max-cycles=$(max_cycles)+UVM_TESTNAME=$(test_case) \
+BASEDIR=$(riscv-torture-dir) $(uvm-flags) +jtag_rbb_enable=0 -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \
${top_level}_optimized +permissive-off +signature=$(riscv-torture-dir)/$(test-location).rtlsim.sig ++$(riscv-torture-dir)/$(test-location) ++$(target-options)
cp vsim.wlf $(riscv-torture-dir)/$(test-location).wlf
cp trace_hart_0000.log $(riscv-torture-dir)/$(test-location).trace

View file

@ -213,11 +213,21 @@ If you are on an Ubuntu based system you need to add the following udev rule to
> SUBSYSTEM=="usb", ACTION=="add", ATTRS{idProduct}=="002a", ATTRS{idVendor}=="15ba", MODE="664", GROUP="plugdev"
>```
### Preliminary Support for OpenPiton Cache System
Ariane version 4.0 has preliminary support for the OpenPiton distributed cache system from Princeton University. To this end, a different L1 cache subsystem (`src/cache_subsystem/serpent_cache_subsystem.sv`) has been developed that follows a write-through protocol and that has support for cache invalidations and atomics.
The corresponding integration patches will soon be released on [OpenPiton GitHub repository](https://github.com/PrincetonUniversity/openpiton).
To activate the different cache system, compile your code with the macro `SERPENT_PULP`.
Note that this feature is still in Beta stage, and may hence not be completely bug-free.
## Planned Improvements
Check-out the issue tab which also loosely tracks planned improvements.
> Atomics are implemented for a single core environment. They will semantically fail in a multi-core setup.
> Atomics are implemented for a single core environment. They will semantically fail in a multi-core setup (unless you are using the serpent flavor of Ariane in combination with the OpenPiton cache subsystem, see previous section).
## Going Beyond

1475
bootrom/encoding.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
torture.generator.nseqs 200
torture.generator.memsize 1024
torture.generator.fprnd 0
torture.generator.amo true
torture.generator.amo false
torture.generator.mul true
torture.generator.divider true
torture.generator.segment true

View file

@ -17,5 +17,6 @@ git submodule update --init --recursive
# copy ariane specific config
cp config/default.config config/default.config.bak
cp $ROOT/ci/default.config config/default.config
git checkout ./output/Makefile
git apply $ROOT/ci/torture_make.patch

View file

@ -19,7 +19,9 @@ sudo apt install \
texinfo \
python-pexpect \
libusb-1.0-0-dev \
default-jdk
default-jdk \
zlib1g-dev \
valgrind
# customize your paths here
source ci/path-setup.sh
@ -40,13 +42,13 @@ make clean
make torture-gen
# run asm tests on verilator
make -j${NUM_JOBS} verilate
make -j${NUM_JOBS} run-asm-tests-verilator
make -j${NUM_JOBS} run-benchmarks-verilator
make -j${NUM_JOBS} torture-rtest-verilator
make -j${NUM_JOBS} verilate
make -j${NUM_JOBS} run-asm-tests-verilator
make -j${NUM_JOBS} run-benchmarks-verilator
make -j${NUM_JOBS} torture-rtest-verilator
# run asm tests on questa
make -j${NUM_JOBS} build batch-mode=1
make -j${NUM_JOBS} run-asm-tests batch-mode=1
make -j${NUM_JOBS} run-benchmarks batch-mode=1
make -j${NUM_JOBS} torture-rtest batch-mode=1
make -j${NUM_JOBS} build batch-mode=1
make -j${NUM_JOBS} run-asm-tests batch-mode=1
make -j${NUM_JOBS} run-benchmarks batch-mode=1
make -j${NUM_JOBS} torture-rtest batch-mode=1

View file

@ -9,7 +9,7 @@ if [ -z ${NUM_JOBS} ]; then
NUM_JOBS=1
fi
if [ ! -e "$RISCV/dtc/dtc" ]; then
if [ ! -e "$RISCV/bin/dtc" ]; then
echo "Installing DTC"
git clone https://git.kernel.org/pub/scm/utils/dtc/dtc.git
cd dtc

View file

@ -10,7 +10,9 @@ fi
if [ ! -e "$VERILATOR_ROOT/bin/verilator" ]; then
echo "Installing Verilator"
wget https://www.veripool.org/ftp/verilator-3.924.tgz
tar xzf verilator*.t*gz && cd verilator-*
tar xzf verilator*.t*gz
rm verilator*.t*gz
cd verilator-*
mkdir -p $VERILATOR_ROOT
# copy scripts
autoconf && ./configure --prefix="$VERILATOR_ROOT" && make -j${NUM_JOBS}

View file

@ -4,38 +4,38 @@ index cf1214f..c81bccc 100644
+++ b/output/Makefile
@@ -20,9 +20,9 @@ extra_files =
#--------------------------------------------------------------------
RISCV_GCC = riscv64-unknown-elf-gcc
-RISCV_GCC_OPTS = -nostdlib -nostartfiles -Wa,-march=RVIMAFDXhwacha
+RISCV_GCC_OPTS = -nostdlib -nostartfiles -Wa,-march=rv64imc
RISCV_OBJDUMP = riscv64-unknown-elf-objdump --disassemble-all --section=.text --section=.data --section=.bss
-RISCV_SIM = spike --extension=hwacha
+RISCV_SIM = spike
+RISCV_SIM = spike
#------------------------------------------------------------
# Build assembly tests
@@ -38,9 +38,6 @@ $(asm_tests_dump): %.dump: %
$(asm_tests_bin): %: %.S $(extra_files)
$(RISCV_GCC) $(RISCV_GCC_OPTS) -I../env/p -T../env/p/link.ld $< -o $@
-$(asm_tests_hex): %.hex: % $(extra_files)
- elf2hex 16 16384 $< > $@
-
$(asm_tests_sig): %.sig: %
$(RISCV_SIM) +signature=$@ $<
@@ -51,12 +48,12 @@ run: $(asm_tests_sig)
echo; perl -ne 'print " [$$1] $$ARGV \t$$2\n" if /\*{3}(.{8})\*{3}(.*)/' \
$(asm_tests_sig); echo;
-junk += $(asm_tests_bin) $(asm_tests_dump) $(asm_tests_sig) $(asm_tests_hex)
+junk += $(asm_tests_bin) $(asm_tests_dump) $(asm_tests_sig)
+junk += $(asm_tests_bin) $(asm_tests_dump) $(asm_tests_sig)
#------------------------------------------------------------
# Default
-all: $(asm_tests_dump) $(asm_tests_hex)
+all: $(asm_tests_dump)
+all: $(asm_tests_dump)
#------------------------------------------------------------
# Clean up

View file

@ -19,7 +19,9 @@ sudo apt install \
texinfo \
python-pexpect \
libusb-1.0-0-dev \
default-jdk
default-jdk \
zlib1g-dev \
valgrind
# customize your paths here
source ci/path-setup.sh
@ -40,7 +42,7 @@ make clean
make torture-gen
# run asm tests on verilator
make -j${NUM_JOBS} verilate
make -j${NUM_JOBS} run-asm-tests-verilator
make -j${NUM_JOBS} run-benchmarks-verilator
make -j${NUM_JOBS} torture-rtest-verilator
make -j${NUM_JOBS} verilate
make -j${NUM_JOBS} run-asm-tests-verilator
make -j${NUM_JOBS} run-benchmarks-verilator
make -j${NUM_JOBS} torture-rtest-verilator

View file

@ -16,6 +16,10 @@
*/
package ariane_axi;
// used in axi_adapter.sv
typedef enum logic { SINGLE_REQ, CACHE_LINE_REQ } ad_req_t;
// 4 is recommended by AXI standard, so lets stick to it, do not change
localparam IdWidth = 4;
localparam UserWidth = 1;

View file

@ -16,6 +16,13 @@
* in one package.
*/
// this is needed to propagate the
// configuration in case Ariane is
// instantiated in OpenPiton
`ifdef PITON_ARIANE
`include "l15.tmp.h"
`endif
package ariane_pkg;
// ---------------
@ -41,8 +48,16 @@ package ariane_pkg;
// depth of store-buffers, this needs to be a power of two
localparam int unsigned DEPTH_SPEC = 4;
`ifdef SERPENT_PULP
// in this case we can use a small commit queue since we have a write buffer in the dcache
// we could in principle do without the commit queue in this case, but the timing degrades if we do that due
// to longer paths into the commit stage
localparam int unsigned DEPTH_COMMIT = 2;
`else
// allocate more space for the commit buffer to be on the save side, this needs to be a power of two
localparam int unsigned DEPTH_COMMIT = 8;
`endif
// Floating-point extensions configuration
localparam bit RVF = 1'b0; // Is F extension enabled
@ -107,7 +122,7 @@ package ariane_pkg;
// static debug hartinfo
localparam dm::hartinfo_t DebugHartInfo = '{
zero1: '0,
nscratch: 1, // DTM currently needs at least one scratch register
nscratch: 2, // Debug module needs at least two scratch regs
zero0: '0,
dataaccess: 1'b1, // data registers are memory mapped in the debugger
datasize: dm::DataCount,
@ -244,17 +259,33 @@ package ariane_pkg;
// Cache config
// ---------------
// I$
localparam int unsigned ICACHE_INDEX_WIDTH = 12; // in bit
localparam int unsigned ICACHE_TAG_WIDTH = 44; // in bit
localparam int unsigned ICACHE_SET_ASSOC = 4;
localparam int unsigned ICACHE_LINE_WIDTH = 128; // in bit
// D$
localparam int unsigned DCACHE_INDEX_WIDTH = 12;
localparam int unsigned DCACHE_TAG_WIDTH = 44;
localparam int unsigned DCACHE_LINE_WIDTH = 128;
localparam int unsigned DCACHE_SET_ASSOC = 8;
// if serpent pulp is used standalone (outside of openpiton)
// we just use the default config of ariane
// otherwise we have to propagate the openpiton L15 configuration from l15.h
`ifdef PITON_ARIANE
// I$
localparam int unsigned ICACHE_LINE_WIDTH = `CONFIG_L1I_CACHELINE_WIDTH;
localparam int unsigned ICACHE_SET_ASSOC = `CONFIG_L1I_ASSOCIATIVITY;
localparam int unsigned ICACHE_INDEX_WIDTH = $clog2(`CONFIG_L1I_SIZE / ICACHE_SET_ASSOC);
localparam int unsigned ICACHE_TAG_WIDTH = 56 - ICACHE_INDEX_WIDTH;
// D$
localparam int unsigned DCACHE_LINE_WIDTH = `CONFIG_L1D_CACHELINE_WIDTH;
localparam int unsigned DCACHE_SET_ASSOC = `CONFIG_L1D_ASSOCIATIVITY;
localparam int unsigned DCACHE_INDEX_WIDTH = $clog2(`CONFIG_L1D_SIZE / DCACHE_SET_ASSOC);
localparam int unsigned DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH;
`else
// align to openpiton for the time being (this should be more configurable in the future)
// I$
localparam int unsigned ICACHE_INDEX_WIDTH = 12; // in bit
localparam int unsigned ICACHE_TAG_WIDTH = 44; // in bit
localparam int unsigned ICACHE_LINE_WIDTH = 128; // in bit
localparam int unsigned ICACHE_SET_ASSOC = 4;
// D$
localparam int unsigned DCACHE_INDEX_WIDTH = 12; // in bit
localparam int unsigned DCACHE_TAG_WIDTH = 44; // in bit
localparam int unsigned DCACHE_LINE_WIDTH = 128; // in bit
localparam int unsigned DCACHE_SET_ASSOC = 8;
`endif
// ---------------
// EX Stage
@ -440,8 +471,20 @@ package ariane_pkg;
// Atomics
// --------------------
typedef enum logic [3:0] {
AMO_NONE, AMO_LR, AMO_SC, AMO_SWAP, AMO_ADD, AMO_AND,
AMO_OR, AMO_XOR, AMO_MAX, AMO_MAXU, AMO_MIN, AMO_MINU
AMO_NONE =4'b0000,
AMO_LR =4'b0001,
AMO_SC =4'b0010,
AMO_SWAP =4'b0011,
AMO_ADD =4'b0100,
AMO_AND =4'b0101,
AMO_OR =4'b0110,
AMO_XOR =4'b0111,
AMO_MAX =4'b1000,
AMO_MAXU =4'b1001,
AMO_MIN =4'b1010,
AMO_MINU =4'b1011,
AMO_CAS1 =4'b1100, // unused, not part of riscv spec, but provided in OpenPiton
AMO_CAS2 =4'b1101 // unused, not part of riscv spec, but provided in OpenPiton
} amo_t;
typedef struct packed {
@ -523,9 +566,9 @@ package ariane_pkg;
} dcache_req_i_t;
typedef struct packed {
logic data_gnt;
logic data_rvalid;
logic [63:0] data_rdata;
logic data_gnt;
logic data_rvalid;
logic [63:0] data_rdata;
} dcache_req_o_t;
// ----------------------

View file

@ -0,0 +1,353 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Package for OpenPiton compatible L1 cache subsystem
// this is needed to propagate the
// configuration in case Ariane is
// instantiated in OpenPiton
`ifdef PITON_ARIANE
`include "l15.tmp.h"
`include "define.tmp.h"
`endif
package serpent_cache_pkg;
// these parames need to coincide with the
// L1.5 parameterization, do not change
`ifdef PITON_ARIANE
localparam L15_SET_ASSOC = `CONFIG_L15_ASSOCIATIVITY;
localparam L15_TID_WIDTH = `L15_THREADID_WIDTH;
localparam L15_TLB_CSM_WIDTH = `TLB_CSM_WIDTH;
`else
localparam L15_SET_ASSOC = ariane_pkg::DCACHE_SET_ASSOC;// align with dcache for compatibility with the standard Ariane setup
localparam L15_TID_WIDTH = 2;
localparam L15_TLB_CSM_WIDTH = 33;
`endif
localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC);
localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC);
localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC);
// FIFO depths of L15 adapter
localparam ADAPTER_REQ_FIFO_DEPTH = 2;
localparam ADAPTER_RTRN_FIFO_DEPTH = 2;
// Calculated parameter
localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH/8);
localparam ICACHE_NUM_WORDS = 2**(ariane_pkg::ICACHE_INDEX_WIDTH-ICACHE_OFFSET_WIDTH);
localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS);// excluding byte offset
localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8);
localparam DCACHE_NUM_WORDS = 2**(ariane_pkg::DCACHE_INDEX_WIDTH-DCACHE_OFFSET_WIDTH);
localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS);// excluding byte offset
localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH/64;
// write buffer parameterization
localparam DCACHE_WBUF_DEPTH = 8;
localparam DCACHE_MAX_TX = 2**L15_TID_WIDTH;
localparam DCACHE_ID_WIDTH = $clog2(DCACHE_MAX_TX);
typedef struct packed {
logic [ariane_pkg::DCACHE_INDEX_WIDTH+ariane_pkg::DCACHE_TAG_WIDTH-1:0] wtag;
logic [63:0] data;
logic [7:0] dirty; // byte is dirty
logic [7:0] valid; // byte is valid
logic [7:0] txblock; // byte is part of transaction in-flight
logic checked; // if cache state of this word has been checked
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] hit_oh; // valid way in the cache
} wbuffer_t;
// TX status registers are indexed with the transaction ID
// they basically store which bytes from which buffer entry are part
// of that transaction
typedef struct packed {
logic vld;
logic [7:0] be;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] ptr;
} tx_stat_t;
// local interfaces between caches and L15 adapter
typedef enum logic [1:0] {
DCACHE_STORE_REQ,
DCACHE_LOAD_REQ,
DCACHE_ATOMIC_REQ,
DCACHE_INT_REQ } dcache_out_t;
typedef enum logic [2:0] {
DCACHE_INV_REQ, // no ack from the core required
DCACHE_STORE_ACK,// note: this may contain an invalidation vector, too
DCACHE_LOAD_ACK,
DCACHE_ATOMIC_ACK,
DCACHE_INT_ACK } dcache_in_t;
typedef enum logic [0:0] {
ICACHE_INV_REQ, // no ack from the core required
ICACHE_IFILL_ACK} icache_in_t;
typedef struct packed {
logic vld; // invalidate only affected way
logic all; // invalidate all ways
logic [ariane_pkg::ICACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate
logic [L15_WAY_WIDTH-1:0] way; // way to invalidate
} cache_inval_t;
// icache interface
typedef struct packed {
logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way; // way to replace
logic [63:0] paddr; // physical address
logic nc; // noncacheable
logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} icache_req_t;
typedef struct packed {
icache_in_t rtype; // see definitions above
logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] data; // full cache line width
cache_inval_t inv; // invalidation vector
logic nc; // noncacheable
logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
logic f4b; // fetch 4 bytes only (from I/O space)
} icache_rtrn_t;
// dcache interface
typedef struct packed {
dcache_out_t rtype; // see definitions above
logic [2:0] size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
logic [L1D_WAY_WIDTH-1:0] way; // way to replace
logic [63:0] paddr; // physical address
logic [63:0] data; // word width of processor (no block stores at the moment)
logic nc; // noncacheable
logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
ariane_pkg::amo_t amo_op; // amo opcode
} dcache_req_t;
typedef struct packed {
dcache_in_t rtype; // see definitions above
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // full cache line width
cache_inval_t inv; // invalidation vector
logic nc; // noncacheable
logic [L15_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} dcache_rtrn_t;
// taken from iop.h in openpiton
// to l1.5 (only marked subset is used)
typedef enum logic [4:0] {
L15_LOAD_RQ = 5'b00000, // load request
L15_IMISS_RQ = 5'b10000, // instruction fill request
L15_STORE_RQ = 5'b00001, // store request
L15_ATOMIC_RQ = 5'b00110, // atomic op
//L15_CAS1_RQ = 5'b00010, // compare and swap1 packet (OpenSparc atomics)
//L15_CAS2_RQ = 5'b00011, // compare and swap2 packet (OpenSparc atomics)
//L15_SWAP_RQ = 5'b00110, // swap packet (OpenSparc atomics)
L15_STRLOAD_RQ = 5'b00100, // unused
L15_STRST_RQ = 5'b00101, // unused
L15_STQ_RQ = 5'b00111, // unused
L15_INT_RQ = 5'b01001, // interrupt request
L15_FWD_RQ = 5'b01101, // unused
L15_FWD_RPY = 5'b01110, // unused
L15_RSVD_RQ = 5'b11111 // unused
} l15_reqtypes_t;
// from l1.5 (only marked subset is used)
typedef enum logic [3:0] {
L15_LOAD_RET = 4'b0000, // load packet
// L15_INV_RET = 4'b0011, // invalidate packet, not unique...
L15_ST_ACK = 4'b0100, // store ack packet
//L15_AT_ACK = 4'b0011, // unused, not unique...
L15_INT_RET = 4'b0111, // interrupt packet
L15_TEST_RET = 4'b0101, // unused
L15_FP_RET = 4'b1000, // unused
L15_IFILL_RET = 4'b0001, // instruction fill packet
L15_EVICT_REQ = 4'b0011, // eviction request
L15_ERR_RET = 4'b1100, // unused
L15_STRLOAD_RET = 4'b0010, // unused
L15_STRST_ACK = 4'b0110, // unused
L15_FWD_RQ_RET = 4'b1010, // unused
L15_FWD_RPY_RET = 4'b1011, // unused
L15_RSVD_RET = 4'b1111, // unused
L15_CPX_RESTYPE_ATOMIC_RES = 4'b1110 // custom type for atomic responses
} l15_rtrntypes_t;
typedef struct packed {
logic l15_val; // valid signal, asserted with request
logic l15_req_ack; // ack for response
l15_reqtypes_t l15_rqtype; // see below for encoding
logic l15_nc; // non-cacheable bit
logic [2:0] l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
logic [L15_TID_WIDTH-1:0] l15_threadid; // currently 0 or 1
logic l15_prefetch; // unused in openpiton
logic l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment
logic l15_blockstore; // unused in openpiton
logic l15_blockinitstore; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_l1rplway; // way to replace
logic [39:0] l15_address; // physical address
logic [63:0] l15_data; // word to write
logic [63:0] l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests)
logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data; // unused in Ariane
logic [3:0] l15_amo_op; // atomic operation type
} l15_req_t;
typedef struct packed {
logic l15_ack; // ack for request struct
logic l15_header_ack; // ack for request struct
logic l15_val; // valid signal for return struct
l15_rtrntypes_t l15_returntype; // see below for encoding
logic l15_l2miss; // unused in Ariane
logic [1:0] l15_error; // unused in openpiton
logic l15_noncacheable; // non-cacheable bit
logic l15_atomic; // asserted in load return and store ack packets of atomic tx
logic [L15_TID_WIDTH-1:0] l15_threadid; // used as transaction ID
logic l15_prefetch; // unused in openpiton
logic l15_f4b; // 4byte instruction fill from I/O space (nc).
logic [63:0] l15_data_0; // used for both caches
logic [63:0] l15_data_1; // used for both caches
logic [63:0] l15_data_2; // currently only used for I$
logic [63:0] l15_data_3; // currently only used for I$
logic l15_inval_icache_all_way; // invalidate all ways
logic l15_inval_dcache_all_way; // unused in openpiton
logic [15:4] l15_inval_address_15_4; // invalidate selected cacheline
logic l15_cross_invalidate; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_cross_invalidate_way; // unused in openpiton
logic l15_inval_dcache_inval; // invalidate selected cacheline and way
logic l15_inval_icache_inval; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_inval_way; // way to invalidate
logic l15_blockinitstore; // unused in openpiton
} l15_rtrn_t;
// swap endianess in a 64bit word
function automatic logic[63:0] swendian64(input logic[63:0] in);
automatic logic[63:0] out;
for(int k=0; k<64;k+=8)begin
out[k +: 8] = in[63-k -: 8];
end
return out;
endfunction
function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh (
input logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] in
);
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] out;
out = '0;
out[in] = 1'b1;
return out;
endfunction
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh (
input logic [$clog2(ariane_pkg::DCACHE_SET_ASSOC)-1:0] in
);
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] out;
out = '0;
out[in] = 1'b1;
return out;
endfunction
function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh (
input logic [$clog2(DCACHE_NUM_BANKS)-1:0] in
);
logic [DCACHE_NUM_BANKS-1:0] out;
out = '0;
out[in] = 1'b1;
return out;
endfunction
function automatic logic [5:0] popcnt64 (
input logic [63:0] in
);
logic [5:0] cnt= 0;
foreach (in[k]) begin
cnt += in[k];
end
return cnt;
endfunction : popcnt64
function automatic logic [7:0] toByteEnable8(
input logic [2:0] offset,
input logic [1:0] size
);
logic [7:0] be;
be = '0;
unique case(size)
2'b00: be[offset] = '1; // byte
2'b01: be[offset +:2 ] = '1; // hword
2'b10: be[offset +:4 ] = '1; // word
default: be = '1; // dword
endcase // size
return be;
endfunction : toByteEnable8
// openpiton requires the data to be replicated in case of smaller sizes than dwords
function automatic logic [63:0] repData64(
input logic [63:0] data,
input logic [2:0] offset,
input logic [1:0] size
);
logic [63:0] out;
unique case(size)
2'b00: for(int k=0; k<8; k++) out[k*8 +: 8] = data[offset*8 +: 8]; // byte
2'b01: for(int k=0; k<4; k++) out[k*16 +: 16] = data[offset*8 +: 16]; // hword
2'b10: for(int k=0; k<2; k++) out[k*32 +: 32] = data[offset*8 +: 32]; // word
default: out = data; // dword
endcase // size
return out;
endfunction : repData64
// note: this is openpiton specific. cannot transmit unaligned words.
// hence we default to individual bytes in that case, and they have to be transmitted
// one after the other
function automatic logic [1:0] toSize64(
input logic [7:0] be
);
logic [1:0] size;
unique case(be)
8'b1111_1111: size = 2'b11; // dword
8'b0000_1111, 8'b1111_0000: size = 2'b10; // word
8'b1100_0000, 8'b0011_0000, 8'b0000_1100, 8'b0000_0011: size = 2'b01; // hword
default: size = 2'b00; // individual bytes
endcase // be
return size;
endfunction : toSize64
// align the physical address to the specified size:
// 000: bytes
// 001: hword
// 010: word
// 011: dword
// 111: DCACHE line
function automatic logic [63:0] paddrSizeAlign(
input logic [63:0] paddr,
input logic [2:0] size
);
logic [63:0] out;
out = paddr;
unique case (size)
3'b001: out[0:0] = '0;
3'b010: out[1:0] = '0;
3'b011: out[2:0] = '0;
3'b111: out[DCACHE_OFFSET_WIDTH-1:0] = '0;
default: ;
endcase
return out;
endfunction : paddrSizeAlign
endpackage : serpent_cache_pkg

View file

@ -23,9 +23,6 @@ package std_cache_pkg;
localparam DCACHE_DIRTY_WIDTH = ariane_pkg::DCACHE_SET_ASSOC*2;
// localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not
typedef enum logic { SINGLE_REQ, CACHE_LINE_REQ } req_t;
typedef struct packed {
logic [1:0] id; // id for which we handle the miss
logic valid;

View file

@ -0,0 +1,129 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner, ETH Zurich
// Date: 19.03.2017
// Description: Ariane Top-level wrapper to break out SV structs to logic vectors.
// default to AXI64 cache ports if not using the
// serpent PULP extension
`ifndef SERPENT_PULP
`ifndef AXI64_CACHE_PORTS
`define AXI64_CACHE_PORTS
`endif
`endif
module ariane_verilog_wrap #(
parameter bit SwapEndianess = 1, // swap endianess in l15 adapter
parameter logic [63:0] CachedAddrEnd = 64'h80_0000_0000, // end of cached region
parameter logic [63:0] CachedAddrBeg = 64'h00_8000_0000 // begin of cached region
) (
input clk_i,
input reset_l, // this is an openpiton-specific name, do not change (hier. paths in TB use this)
// Core ID, Cluster ID and boot address are considered more or less static
input [63:0] boot_addr_i, // reset boot address
input [63:0] hart_id_i, // hart id in a multicore environment (reflected in a CSR)
// Interrupt inputs
input [1:0] irq_i, // level sensitive IR lines, mip & sip (async)
input ipi_i, // inter-processor interrupts (async)
// Timer facilities
input time_irq_i, // timer interrupt in (async)
input debug_req_i, // debug request (async)
`ifdef AXI64_CACHE_PORTS
// AXI (memory side)
output [$size(ariane_axi::req_t)-1:0] axi_req_o,
input [$size(ariane_axi::resp_t)-1:0] axi_resp_i
`else
// L15 (memory side)
output [$size(serpent_cache_pkg::l15_req_t)-1:0] l15_req_o,
input [$size(serpent_cache_pkg::l15_rtrn_t)-1:0] l15_rtrn_i
`endif
);
// assign bitvector to packed struct and vice versa
`ifdef AXI64_CACHE_PORTS
ariane_axi::req_t axi_req;
ariane_axi::resp_t axi_resp;
assign axi_req_o = axi_req;
assign axi_resp = axi_resp_i;
`else
// L15 (memory side)
serpent_cache_pkg::l15_req_t l15_req;
serpent_cache_pkg::l15_rtrn_t l15_rtrn;
assign l15_req_o = l15_req;
assign l15_rtrn = l15_rtrn_i;
`endif
// // this is a workaround since interrupts are not fully supported yet.
// // the logic below catches the initial wake up interrupt that enables the cores.
// logic wake_up_d, wake_up_q;
// logic rst_n;
// assign wake_up_d = wake_up_q || ((l15_rtrn.l15_returntype == serpent_cache_pkg::L15_INT_RET) && l15_rtrn.l15_val);
// always_ff @(posedge clk_i or negedge reset_l) begin : p_regs
// if(~reset_l) begin
// wake_up_q <= 0;
// end else begin
// wake_up_q <= wake_up_d;
// end
// end
// // reset gate this
// assign rst_n = wake_up_q & reset_l;
// this is a workaround,
// we basically wait for 32k cycles such that the SRAMs in openpiton can initialize
// 128KB..8K cycles
// 256KB..16K cycles
// etc, so this should be enough for 512k per tile
logic [15:0] wake_up_cnt_d, wake_up_cnt_q;
logic rst_n;
assign wake_up_cnt_d = (wake_up_cnt_q[$high(wake_up_cnt_q)]) ? wake_up_cnt_q : wake_up_cnt_q + 1;
always_ff @(posedge clk_i or negedge reset_l) begin : p_regs
if(~reset_l) begin
wake_up_cnt_q <= 0;
end else begin
wake_up_cnt_q <= wake_up_cnt_d;
end
end
// reset gate this
assign rst_n = wake_up_cnt_q[$high(wake_up_cnt_q)] & reset_l;
ariane #(
.SwapEndianess ( SwapEndianess ),
.CachedAddrEnd ( CachedAddrEnd ),
.CachedAddrBeg ( CachedAddrBeg )
) ariane (
.clk_i ( clk_i ),
.rst_ni ( rst_n ),
.boot_addr_i ,
.hart_id_i ,
.irq_i ,
.ipi_i ,
.time_irq_i ,
.debug_req_i ,
`ifdef AXI64_CACHE_PORTS
.axi_req_o ( axi_req ),
.axi_resp_i ( axi_resp )
`else
.l15_req_o ( l15_req ),
.l15_rtrn_i ( l15_rtrn )
`endif
);
endmodule // ariane_verilog_wrap

View file

@ -0,0 +1,593 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 14.11.2018
// Description: Ariane chipset for OpenPiton that includes the bootrom (with DTB),
// debug module, clint and plic.
//
// Note that direct system bus accesses are not yet possible due to a missing
// AXI-lite br_master <-> NOC converter module.
//
// The address bases for the individual peripherals are defined in the
// devices.xml file in OpenPiton, and should be set to
//
// Debug 40'h90_0000_0000 <length 0x1000>
// Boot Rom 40'h90_0001_0000 <length 0x10000>
// CLINT 40'h90_0200_0000 <length 0x1000000>
// PLIC 40'h90_0300_0000 <length 0x1000000>
//
module serpent_peripherals #(
parameter int unsigned DataWidth = 64,
parameter int unsigned NumHarts = 1,
parameter int unsigned NumSources = 1,
parameter bit SwapEndianess = 0
) (
input clk_i,
input rst_ni,
input testmode_i,
// connections to OpenPiton NoC filters
// Debug/JTAG
input [DataWidth-1:0] buf_ariane_debug_noc2_data_i,
input buf_ariane_debug_noc2_valid_i,
output ariane_debug_buf_noc2_ready_o,
output [DataWidth-1:0] ariane_debug_buf_noc3_data_o,
output ariane_debug_buf_noc3_valid_o,
input buf_ariane_debug_noc3_ready_i,
// Bootrom
input [DataWidth-1:0] buf_ariane_bootrom_noc2_data_i,
input buf_ariane_bootrom_noc2_valid_i,
output ariane_bootrom_buf_noc2_ready_o,
output [DataWidth-1:0] ariane_bootrom_buf_noc3_data_o,
output ariane_bootrom_buf_noc3_valid_o,
input buf_ariane_bootrom_noc3_ready_i,
// CLINT
input [DataWidth-1:0] buf_ariane_clint_noc2_data_i,
input buf_ariane_clint_noc2_valid_i,
output ariane_clint_buf_noc2_ready_o,
output [DataWidth-1:0] ariane_clint_buf_noc3_data_o,
output ariane_clint_buf_noc3_valid_o,
input buf_ariane_clint_noc3_ready_i,
// PLIC
input [DataWidth-1:0] buf_ariane_plic_noc2_data_i,
input buf_ariane_plic_noc2_valid_i,
output ariane_plic_buf_noc2_ready_o,
output [DataWidth-1:0] ariane_plic_buf_noc3_data_o,
output ariane_plic_buf_noc3_valid_o,
input buf_ariane_plic_noc3_ready_i,
// Debug sigs to cores
output ndmreset_o, // non-debug module reset
output dmactive_o, // debug module is active
output [NumHarts-1:0] debug_req_o, // async debug request
input [NumHarts-1:0] unavailable_i, // communicate whether the hart is unavailable (e.g.: power down)
// JTAG
input tck_i,
input tms_i,
input trst_ni,
input td_i,
output td_o,
output tdo_oe_o,
// CLINT
input rtc_i, // Real-time clock in (usually 32.768 kHz)
output [NumHarts-1:0] timer_irq_o, // Timer interrupts
output [NumHarts-1:0] ipi_o, // software interrupt (a.k.a inter-process-interrupt)
// PLIC
// TODO
input [NumSources-1:0] irq_sources_i,
output [NumHarts-1:0][1:0] irq_o // level sensitive IR lines, mip & sip (async)
);
localparam int unsigned AxiIdWidth = 0;
localparam int unsigned AxiAddrWidth = 64;
localparam int unsigned AxiDataWidth = 64;
localparam int unsigned AxiUserWidth = 0;
/////////////////////////////
// Debug module and JTAG
/////////////////////////////
logic jtag_req_valid;
logic [6:0] jtag_req_bits_addr;
logic [1:0] jtag_req_bits_op;
logic [31:0] jtag_req_bits_data;
logic jtag_resp_ready;
logic jtag_resp_valid;
dm::dmi_req_t jtag_dmi_req;
dm::dmi_resp_t debug_resp;
dmi_jtag i_dmi_jtag (
.clk_i ,
.rst_ni ,
.testmode_i ,
.dmi_req_o ( jtag_dmi_req ),
.dmi_req_valid_o ( jtag_req_valid ),
.dmi_req_ready_i ( debug_req_ready ),
.dmi_resp_i ( debug_resp ),
.dmi_resp_ready_o ( jtag_resp_ready ),
.dmi_resp_valid_i ( jtag_resp_valid ),
.dmi_rst_no ( ), // not connected
.tck_i ,
.tms_i ,
.trst_ni ,
.td_i ,
.td_o ,
.tdo_oe_o
);
ariane_axi::req_t dm_axi_m_req, dm_axi_s_req;
ariane_axi::resp_t dm_axi_m_resp, dm_axi_s_resp;
// debug module
dm_top #(
// current implementation only supports 1 hart
.NrHarts ( NumHarts ),
.AxiIdWidth ( AxiIdWidth ),
.AxiAddrWidth ( AxiAddrWidth ),
.AxiDataWidth ( AxiDataWidth ),
.AxiUserWidth ( AxiUserWidth )
) i_dm_top (
.clk_i ,
.rst_ni , // PoR
.testmode_i ,
.ndmreset_o ,
.dmactive_o , // active debug session
.debug_req_o ,
.unavailable_i ,
.axi_s_req_i ( dm_axi_s_req ),
.axi_s_resp_o ( dm_axi_s_resp ),
.axi_m_req_o ( dm_axi_m_req ),
.axi_m_resp_i ( dm_axi_m_resp ),
.dmi_rst_ni ( rst_ni ),
.dmi_req_valid_i ( jtag_req_valid ),
.dmi_req_ready_o ( debug_req_ready ),
.dmi_req_i ( jtag_dmi_req ),
.dmi_resp_valid_o ( jtag_resp_valid ),
.dmi_resp_ready_i ( jtag_resp_ready ),
.dmi_resp_o ( debug_resp )
);
noc_axilite_bridge #(
.SLAVE_RESP_BYTEWIDTH ( 8 ),
.SWAP_ENDIANESS ( SwapEndianess )
) i_debug_axilite_bridge (
.clk ( clk_i ),
.rst ( ~rst_ni ),
// to/from NOC
.splitter_bridge_val ( buf_ariane_debug_noc2_valid_i ),
.splitter_bridge_data ( buf_ariane_debug_noc2_data_i ),
.bridge_splitter_rdy ( ariane_debug_buf_noc2_ready_o ),
.bridge_splitter_val ( ariane_debug_buf_noc3_valid_o ),
.bridge_splitter_data ( ariane_debug_buf_noc3_data_o ),
.splitter_bridge_rdy ( buf_ariane_debug_noc3_ready_i ),
//axi lite signals
//write address channel
.m_axi_awaddr ( dm_axi_s_req.aw.addr ),
.m_axi_awvalid ( dm_axi_s_req.aw_valid ),
.m_axi_awready ( dm_axi_s_resp.aw_ready ),
//write data channel
.m_axi_wdata ( dm_axi_s_req.w.data ),
.m_axi_wstrb ( dm_axi_s_req.w.strb ),
.m_axi_wvalid ( dm_axi_s_req.w_valid ),
.m_axi_wready ( dm_axi_s_resp.w_ready ),
//read address channel
.m_axi_araddr ( dm_axi_s_req.ar.addr ),
.m_axi_arvalid ( dm_axi_s_req.ar_valid ),
.m_axi_arready ( dm_axi_s_resp.ar_ready ),
//read data channel
.m_axi_rdata ( dm_axi_s_resp.r.data ),
.m_axi_rresp ( dm_axi_s_resp.r.resp ),
.m_axi_rvalid ( dm_axi_s_resp.r_valid ),
.m_axi_rready ( dm_axi_s_req.r_ready ),
//write response channel
.m_axi_bresp ( dm_axi_s_resp.b.resp ),
.m_axi_bvalid ( dm_axi_s_resp.b_valid ),
.m_axi_bready ( dm_axi_s_req.b_ready )
);
// tie off system bus accesses (not supported yet due to
// missing AXI-lite br_master <-> NOC converter)
assign dm_axi_m_resp = '0;
// tie off signals not used by AXI-lite
assign dm_axi_s_req.aw.id = '0;
assign dm_axi_s_req.aw.len = '0;
assign dm_axi_s_req.aw.size = 2'b11;// 8byte
assign dm_axi_s_req.aw.burst = '0;
assign dm_axi_s_req.aw.lock = '0;
assign dm_axi_s_req.aw.cache = '0;
assign dm_axi_s_req.aw.prot = '0;
assign dm_axi_s_req.aw.qos = '0;
assign dm_axi_s_req.aw.region = '0;
assign dm_axi_s_req.aw.atop = '0;
assign dm_axi_s_req.w.last = 1'b1;
assign dm_axi_s_req.ar.id = '0;
assign dm_axi_s_req.ar.len = '0;
assign dm_axi_s_req.ar.size = 2'b11;// 8byte
assign dm_axi_s_req.ar.burst = '0;
assign dm_axi_s_req.ar.lock = '0;
assign dm_axi_s_req.ar.cache = '0;
assign dm_axi_s_req.ar.prot = '0;
assign dm_axi_s_req.ar.qos = '0;
assign dm_axi_s_req.ar.region = '0;
// assign dm_axi_s_resp.r.id = '0;
// assign dm_axi_s_resp.r.last = 1'b1;
// assign dm_axi_s_resp.b.id = '0;
/////////////////////////////
// Bootrom
/////////////////////////////
logic rom_req;
logic [AxiAddrWidth-1:0] rom_addr;
logic [AxiDataWidth-1:0] rom_rdata;
AXI_BUS #(
.AXI_ID_WIDTH ( AxiIdWidth ),
.AXI_ADDR_WIDTH ( AxiAddrWidth ),
.AXI_DATA_WIDTH ( AxiDataWidth ),
.AXI_USER_WIDTH ( AxiUserWidth )
) br_master();
axi2mem #(
.AXI_ID_WIDTH ( AxiIdWidth ),
.AXI_ADDR_WIDTH ( AxiAddrWidth ),
.AXI_DATA_WIDTH ( AxiDataWidth ),
.AXI_USER_WIDTH ( AxiUserWidth )
) i_axi2rom (
.clk_i ,
.rst_ni ,
.slave ( br_master ),
.req_o ( rom_req ),
.we_o ( ),
.addr_o ( rom_addr ),
.be_o ( ),
.data_o ( ),
.data_i ( rom_rdata )
);
bootrom i_bootrom (
.clk_i ,
.req_i ( rom_req ),
.addr_i ( rom_addr ),
.rdata_o ( rom_rdata )
);
noc_axilite_bridge #(
.SLAVE_RESP_BYTEWIDTH ( 8 ),
.SWAP_ENDIANESS ( SwapEndianess )
) i_bootrom_axilite_bridge (
.clk ( clk_i ),
.rst ( ~rst_ni ),
// to/from NOC
.splitter_bridge_val ( buf_ariane_bootrom_noc2_valid_i ),
.splitter_bridge_data ( buf_ariane_bootrom_noc2_data_i ),
.bridge_splitter_rdy ( ariane_bootrom_buf_noc2_ready_o ),
.bridge_splitter_val ( ariane_bootrom_buf_noc3_valid_o ),
.bridge_splitter_data ( ariane_bootrom_buf_noc3_data_o ),
.splitter_bridge_rdy ( buf_ariane_bootrom_noc3_ready_i ),
//axi lite signals
//write address channel
.m_axi_awaddr ( br_master.aw_addr ),
.m_axi_awvalid ( br_master.aw_valid ),
.m_axi_awready ( br_master.aw_ready ),
//write data channel
.m_axi_wdata ( br_master.w_data ),
.m_axi_wstrb ( br_master.w_strb ),
.m_axi_wvalid ( br_master.w_valid ),
.m_axi_wready ( br_master.w_ready ),
//read address channel
.m_axi_araddr ( br_master.ar_addr ),
.m_axi_arvalid ( br_master.ar_valid ),
.m_axi_arready ( br_master.ar_ready ),
//read data channel
.m_axi_rdata ( br_master.r_data ),
.m_axi_rresp ( br_master.r_resp ),
.m_axi_rvalid ( br_master.r_valid ),
.m_axi_rready ( br_master.r_ready ),
//write response channel
.m_axi_bresp ( br_master.b_resp ),
.m_axi_bvalid ( br_master.b_valid ),
.m_axi_bready ( br_master.b_ready )
);
// tie off signals not used by AXI-lite
assign br_master.aw_id = '0;
assign br_master.aw_len = '0;
assign br_master.aw_size = 2'b11;// 8byte
assign br_master.aw_burst = '0;
assign br_master.aw_lock = '0;
assign br_master.aw_cache = '0;
assign br_master.aw_prot = '0;
assign br_master.aw_qos = '0;
assign br_master.aw_region = '0;
assign br_master.w_last = 1'b1;
assign br_master.ar_id = '0;
assign br_master.ar_len = '0;
assign br_master.ar_size = 2'b11;// 8byte
assign br_master.ar_burst = '0;
assign br_master.ar_lock = '0;
assign br_master.ar_cache = '0;
assign br_master.ar_prot = '0;
assign br_master.ar_qos = '0;
assign br_master.ar_region = '0;
// assign br_master.r_id = '0;
// assign br_master.r_last = 1'b1;
// assign br_master.b_id = '0;
/////////////////////////////
// CLINT
/////////////////////////////
ariane_axi::req_t clint_axi_req;
ariane_axi::resp_t clint_axi_resp;
clint #(
.AXI_ADDR_WIDTH ( AxiAddrWidth ),
.AXI_DATA_WIDTH ( AxiDataWidth ),
.AXI_ID_WIDTH ( AxiIdWidth ),
.NR_CORES ( NumHarts )
) i_clint (
.clk_i ,
.rst_ni ,
.testmode_i ,
.axi_req_i ( clint_axi_req ),
.axi_resp_o ( clint_axi_resp ),
.rtc_i ,
.timer_irq_o ,
.ipi_o
);
noc_axilite_bridge #(
.SLAVE_RESP_BYTEWIDTH ( 8 ),
.SWAP_ENDIANESS ( SwapEndianess )
) i_clint_axilite_bridge (
.clk ( clk_i ),
.rst ( ~rst_ni ),
// to/from NOC
.splitter_bridge_val ( buf_ariane_clint_noc2_valid_i ),
.splitter_bridge_data ( buf_ariane_clint_noc2_data_i ),
.bridge_splitter_rdy ( ariane_clint_buf_noc2_ready_o ),
.bridge_splitter_val ( ariane_clint_buf_noc3_valid_o ),
.bridge_splitter_data ( ariane_clint_buf_noc3_data_o ),
.splitter_bridge_rdy ( buf_ariane_clint_noc3_ready_i ),
//axi lite signals
//write address channel
.m_axi_awaddr ( clint_axi_req.aw.addr ),
.m_axi_awvalid ( clint_axi_req.aw_valid ),
.m_axi_awready ( clint_axi_resp.aw_ready ),
//write data channel
.m_axi_wdata ( clint_axi_req.w.data ),
.m_axi_wstrb ( clint_axi_req.w.strb ),
.m_axi_wvalid ( clint_axi_req.w_valid ),
.m_axi_wready ( clint_axi_resp.w_ready ),
//read address channel
.m_axi_araddr ( clint_axi_req.ar.addr ),
.m_axi_arvalid ( clint_axi_req.ar_valid ),
.m_axi_arready ( clint_axi_resp.ar_ready ),
//read data channel
.m_axi_rdata ( clint_axi_resp.r.data ),
.m_axi_rresp ( clint_axi_resp.r.resp ),
.m_axi_rvalid ( clint_axi_resp.r_valid ),
.m_axi_rready ( clint_axi_req.r_ready ),
//write response channel
.m_axi_bresp ( clint_axi_resp.b.resp ),
.m_axi_bvalid ( clint_axi_resp.b_valid ),
.m_axi_bready ( clint_axi_req.b_ready )
);
// tie off signals not used by AXI-lite
assign clint_axi_req.aw.id = '0;
assign clint_axi_req.aw.len = '0;
assign clint_axi_req.aw.size = 2'b11;// 8byte
assign clint_axi_req.aw.burst = '0;
assign clint_axi_req.aw.lock = '0;
assign clint_axi_req.aw.cache = '0;
assign clint_axi_req.aw.prot = '0;
assign clint_axi_req.aw.qos = '0;
assign clint_axi_req.aw.region = '0;
assign clint_axi_req.aw.atop = '0;
assign clint_axi_req.w.last = 1'b1;
assign clint_axi_req.ar.id = '0;
assign clint_axi_req.ar.len = '0;
assign clint_axi_req.ar.size = 2'b11;// 8byte
assign clint_axi_req.ar.burst = '0;
assign clint_axi_req.ar.lock = '0;
assign clint_axi_req.ar.cache = '0;
assign clint_axi_req.ar.prot = '0;
assign clint_axi_req.ar.qos = '0;
assign clint_axi_req.ar.region = '0;
/////////////////////////////
// PLIC
/////////////////////////////
AXI_BUS #(
.AXI_ID_WIDTH ( AxiIdWidth ),
.AXI_ADDR_WIDTH ( AxiAddrWidth ),
.AXI_DATA_WIDTH ( AxiDataWidth ),
.AXI_USER_WIDTH ( AxiUserWidth )
) plic_master();
noc_axilite_bridge #(
.SLAVE_RESP_BYTEWIDTH ( 8 ),
.SWAP_ENDIANESS ( SwapEndianess )
) i_plic_axilite_bridge (
.clk ( clk_i ),
.rst ( ~rst_ni ),
// to/from NOC
.splitter_bridge_val ( buf_ariane_plic_noc2_valid_i ),
.splitter_bridge_data ( buf_ariane_plic_noc2_data_i ),
.bridge_splitter_rdy ( ariane_plic_buf_noc2_ready_o ),
.bridge_splitter_val ( ariane_plic_buf_noc3_valid_o ),
.bridge_splitter_data ( ariane_plic_buf_noc3_data_o ),
.splitter_bridge_rdy ( buf_ariane_plic_noc3_ready_i ),
//axi lite signals
//write address channel
//write address channel
.m_axi_awaddr ( plic_master.aw_addr ),
.m_axi_awvalid ( plic_master.aw_valid ),
.m_axi_awready ( plic_master.aw_ready ),
//write data channel
.m_axi_wdata ( plic_master.w_data ),
.m_axi_wstrb ( plic_master.w_strb ),
.m_axi_wvalid ( plic_master.w_valid ),
.m_axi_wready ( plic_master.w_ready ),
//read address channel
.m_axi_araddr ( plic_master.ar_addr ),
.m_axi_arvalid ( plic_master.ar_valid ),
.m_axi_arready ( plic_master.ar_ready ),
//read data channel
.m_axi_rdata ( plic_master.r_data ),
.m_axi_rresp ( plic_master.r_resp ),
.m_axi_rvalid ( plic_master.r_valid ),
.m_axi_rready ( plic_master.r_ready ),
//write response channel
.m_axi_bresp ( plic_master.b_resp ),
.m_axi_bvalid ( plic_master.b_valid ),
.m_axi_bready ( plic_master.b_ready )
);
// tie off signals not used by AXI-lite
assign plic_master.aw_id = '0;
assign plic_master.aw_len = '0;
assign plic_master.aw_size = 2'b11;// 8byte
assign plic_master.aw_burst = '0;
assign plic_master.aw_lock = '0;
assign plic_master.aw_cache = '0;
assign plic_master.aw_prot = '0;
assign plic_master.aw_qos = '0;
assign plic_master.aw_region = '0;
assign plic_master.w_last = 1'b1;
assign plic_master.ar_id = '0;
assign plic_master.ar_len = '0;
assign plic_master.ar_size = 2'b11;// 8byte
assign plic_master.ar_burst = '0;
assign plic_master.ar_lock = '0;
assign plic_master.ar_cache = '0;
assign plic_master.ar_prot = '0;
assign plic_master.ar_qos = '0;
assign plic_master.ar_region = '0;
REG_BUS #(
.ADDR_WIDTH ( 32 ),
.DATA_WIDTH ( 32 )
) reg_bus (clk_i);
logic plic_penable;
logic plic_pwrite;
logic [31:0] plic_paddr;
logic plic_psel;
logic [31:0] plic_pwdata;
logic [31:0] plic_prdata;
logic plic_pready;
logic plic_pslverr;
axi2apb_64_32 #(
.AXI4_ADDRESS_WIDTH ( AxiAddrWidth ),
.AXI4_RDATA_WIDTH ( AxiDataWidth ),
.AXI4_WDATA_WIDTH ( AxiDataWidth ),
.AXI4_ID_WIDTH ( AxiIdWidth ),
.AXI4_USER_WIDTH ( AxiUserWidth ),
.BUFF_DEPTH_SLAVE ( 2 ),
.APB_ADDR_WIDTH ( 32 )
) i_axi2apb_64_32_plic (
.ACLK ( clk_i ),
.ARESETn ( rst_ni ),
.test_en_i ( testmode_i ),
.AWID_i ( plic_master.aw_id ),
.AWADDR_i ( plic_master.aw_addr ),
.AWLEN_i ( plic_master.aw_len ),
.AWSIZE_i ( plic_master.aw_size ),
.AWBURST_i ( plic_master.aw_burst ),
.AWLOCK_i ( plic_master.aw_lock ),
.AWCACHE_i ( plic_master.aw_cache ),
.AWPROT_i ( plic_master.aw_prot ),
.AWREGION_i( plic_master.aw_region ),
.AWUSER_i ( plic_master.aw_user ),
.AWQOS_i ( plic_master.aw_qos ),
.AWVALID_i ( plic_master.aw_valid ),
.AWREADY_o ( plic_master.aw_ready ),
.WDATA_i ( plic_master.w_data ),
.WSTRB_i ( plic_master.w_strb ),
.WLAST_i ( plic_master.w_last ),
.WUSER_i ( plic_master.w_user ),
.WVALID_i ( plic_master.w_valid ),
.WREADY_o ( plic_master.w_ready ),
.BID_o ( plic_master.b_id ),
.BRESP_o ( plic_master.b_resp ),
.BVALID_o ( plic_master.b_valid ),
.BUSER_o ( plic_master.b_user ),
.BREADY_i ( plic_master.b_ready ),
.ARID_i ( plic_master.ar_id ),
.ARADDR_i ( plic_master.ar_addr ),
.ARLEN_i ( plic_master.ar_len ),
.ARSIZE_i ( plic_master.ar_size ),
.ARBURST_i ( plic_master.ar_burst ),
.ARLOCK_i ( plic_master.ar_lock ),
.ARCACHE_i ( plic_master.ar_cache ),
.ARPROT_i ( plic_master.ar_prot ),
.ARREGION_i( plic_master.ar_region ),
.ARUSER_i ( plic_master.ar_user ),
.ARQOS_i ( plic_master.ar_qos ),
.ARVALID_i ( plic_master.ar_valid ),
.ARREADY_o ( plic_master.ar_ready ),
.RID_o ( plic_master.r_id ),
.RDATA_o ( plic_master.r_data ),
.RRESP_o ( plic_master.r_resp ),
.RLAST_o ( plic_master.r_last ),
.RUSER_o ( plic_master.r_user ),
.RVALID_o ( plic_master.r_valid ),
.RREADY_i ( plic_master.r_ready ),
.PENABLE ( plic_penable ),
.PWRITE ( plic_pwrite ),
.PADDR ( plic_paddr ),
.PSEL ( plic_psel ),
.PWDATA ( plic_pwdata ),
.PRDATA ( plic_prdata ),
.PREADY ( plic_pready ),
.PSLVERR ( plic_pslverr )
);
apb_to_reg i_apb_to_reg (
.clk_i ,
.rst_ni ,
.penable_i ( plic_penable ),
.pwrite_i ( plic_pwrite ),
.paddr_i ( plic_paddr ),
.psel_i ( plic_psel ),
.pwdata_i ( plic_pwdata ),
.prdata_o ( plic_prdata ),
.pready_o ( plic_pready ),
.pslverr_o ( plic_pslverr ),
.reg_o ( reg_bus )
);
plic #(
.ADDR_WIDTH ( 32 ),
.DATA_WIDTH ( 32 ),
.ID_BITWIDTH ( 3 ), // TODO (zarubaf): Find propper width
.PARAMETER_BITWIDTH ( 3 ), // TODO (zarubaf): Find propper width
.NUM_TARGETS ( 2*NumHarts ),
.NUM_SOURCES ( NumSources )
) i_plic (
.clk_i ,
.rst_ni ,
.irq_sources_i ,
.eip_targets_o ( irq_o ),
.external_bus_io ( reg_bus )
);
endmodule

File diff suppressed because it is too large Load diff

View file

@ -14,18 +14,19 @@
*
* Description: Manages communication with the AXI Bus
*/
import std_cache_pkg::*;
//import std_cache_pkg::*;
module axi_adapter #(
parameter int unsigned DATA_WIDTH = 256,
parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature
parameter int unsigned AXI_ID_WIDTH = 10
parameter int unsigned DATA_WIDTH = 256,
parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned CACHELINE_BYTE_OFFSET = 8
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic req_i,
input req_t type_i,
input ariane_axi::ad_req_t type_i,
output logic gnt_o,
output logic [AXI_ID_WIDTH-1:0] gnt_id_o,
input logic [63:0] addr_i,
@ -69,7 +70,7 @@ module axi_adapter #(
axi_req_o.aw.region = 4'b0;
axi_req_o.aw.len = 8'b0;
axi_req_o.aw.size = {1'b0, size_i};
axi_req_o.aw.burst = (type_i == SINGLE_REQ) ? 2'b00 : 2'b01; // fixed size for single request and incremental transfer for everything else
axi_req_o.aw.burst = (type_i == ariane_axi::SINGLE_REQ) ? 2'b00 : 2'b01; // fixed size for single request and incremental transfer for everything else
axi_req_o.aw.lock = 1'b0;
axi_req_o.aw.cache = 4'b0;
axi_req_o.aw.qos = 4'b0;
@ -79,12 +80,12 @@ module axi_adapter #(
axi_req_o.ar_valid = 1'b0;
// in case of a single request or wrapping transfer we can simply begin at the address, if we want to request a cache-line
// with an incremental transfer we need to output the corresponding base address of the cache line
axi_req_o.ar.addr = (CRITICAL_WORD_FIRST || type_i == SINGLE_REQ) ? addr_i : { addr_i[63:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET}{1'b0}}};
axi_req_o.ar.addr = (CRITICAL_WORD_FIRST || type_i == ariane_axi::SINGLE_REQ) ? addr_i : { addr_i[63:CACHELINE_BYTE_OFFSET], {{CACHELINE_BYTE_OFFSET}{1'b0}}};
axi_req_o.ar.prot = 3'b0;
axi_req_o.ar.region = 4'b0;
axi_req_o.ar.len = 8'b0;
axi_req_o.ar.size = {1'b0, size_i}; // 8 bytes
axi_req_o.ar.burst = (type_i == SINGLE_REQ) ? 2'b00 : (CRITICAL_WORD_FIRST ? 2'b10 : 2'b01); // wrapping transfer in case of a critical word first strategy
axi_req_o.ar.burst = (type_i == ariane_axi::SINGLE_REQ) ? 2'b00 : (CRITICAL_WORD_FIRST ? 2'b10 : 2'b01); // wrapping transfer in case of a critical word first strategy
axi_req_o.ar.lock = 1'b0;
axi_req_o.ar.cache = 4'b0;
axi_req_o.ar.qos = 4'b0;
@ -127,7 +128,7 @@ module axi_adapter #(
axi_req_o.aw_valid = 1'b1;
axi_req_o.w_valid = 1'b1;
// its a single write
if (type_i == SINGLE_REQ) begin
if (type_i == ariane_axi::SINGLE_REQ) begin
// only a single write so the data is already the last one
axi_req_o.w.last = 1'b1;
// single req can be granted here
@ -162,14 +163,13 @@ module axi_adapter #(
axi_req_o.ar_valid = 1'b1;
gnt_o = axi_resp_i.ar_ready;
if (type_i != SINGLE_REQ) begin
if (type_i != ariane_axi::SINGLE_REQ) begin
axi_req_o.ar.len = BURST_SIZE;
cnt_d = BURST_SIZE;
end
if (axi_resp_i.ar_ready) begin
state_d = (type_i == SINGLE_REQ) ? WAIT_R_VALID : WAIT_R_VALID_MULTIPLE;
state_d = (type_i == ariane_axi::SINGLE_REQ) ? WAIT_R_VALID : WAIT_R_VALID_MULTIPLE;
addr_offset_d = addr_i[ADDR_INDEX-1+3:3];
end
end
@ -184,16 +184,13 @@ module axi_adapter #(
gnt_o = 1'b1;
state_d = WAIT_B_VALID;
end
end
// ~> we need to wait for an aw_ready and there is at least one outstanding write
WAIT_LAST_W_READY_AW_READY: begin
axi_req_o.w_valid = 1'b1;
axi_req_o.w.last = (cnt_q == '0);
if (type_i == SINGLE_REQ) begin
if (type_i == ariane_axi::SINGLE_REQ) begin
axi_req_o.w.data = wdata_i[0];
axi_req_o.w.strb = be_i[0];
end else begin
@ -245,7 +242,7 @@ module axi_adapter #(
WAIT_LAST_W_READY: begin
axi_req_o.w_valid = 1'b1;
if (type_i != SINGLE_REQ) begin
if (type_i != ariane_axi::SINGLE_REQ) begin
axi_req_o.w.data = wdata_i[BURST_SIZE-cnt_q];
axi_req_o.w.strb = be_i[BURST_SIZE-cnt_q];
end

327
src/axi_adapter2.sv Normal file
View file

@ -0,0 +1,327 @@
/* Copyright 2018 ETH Zurich and University of Bologna.
* Copyright and related rights are licensed under the Solderpad Hardware
* License, Version 0.51 (the License); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
* or agreed to in writing, software, hardware and materials distributed under
* this License is distributed on an AS IS BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* File: axi_adapter.sv
* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
* Date: 1.8.2018
*
* Description: Manages communication with the AXI Bus. Note that if you intend
* to use read bursts with BLEN>0, you have to either use the same ID for all reads
* to ensure ordering of the transactions, or you have to make sure that only one read
* is in flight. otherwise, the read response deserialization mechanism may not work
* correctly due to axi beat interleaving.
*/
import std_cache_pkg::*;
module axi_adapter2 #(
parameter int unsigned DATA_WORDS = 4, // data width in dwords, this is also the maximum burst length, must be >=2
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned CRITICAL_WORD_FIRST = 0 // this must be supported by the AXI subsystem, note that the data will be shifted by the word offset when this is enabled
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// read channel
// request
input logic rd_req_i,
output logic rd_gnt_o,
input logic [63:0] rd_addr_i,
input logic [$clog2(DATA_WORDS)-1:0] rd_blen_i, // axi convention: LEN-1
input logic [1:0] rd_size_i,
input logic [AXI_ID_WIDTH-1:0] rd_id_i, // use same ID for reads, or make sure you only have one outstanding read tx
// read response
input logic rd_rdy_i,
output logic rd_valid_o,
output logic [DATA_WORDS-1:0][63:0] rd_data_o,
output logic [AXI_ID_WIDTH-1:0] rd_id_o,
// can be used to determine critical word
output logic [63:0] rd_word_o,
output logic rd_word_valid_o,
output logic rd_word_cnt_o,
// write channel
input logic wr_req_i,
output logic wr_gnt_o,
input logic [63:0] wr_addr_i,
input logic [DATA_WORDS-1:0][63:0] wr_data_i,
input logic [DATA_WORDS-1:0][7:0] wr_be_i,
input logic [$clog2(DATA_WORDS)-1:0] wr_blen_i, // axi convention: LEN-1
input logic [1:0] wr_size_i,
input logic [AXI_ID_WIDTH-1:0] wr_id_i,
// write response
input logic wr_rdy_i,
output logic wr_valid_o,
output logic [AXI_ID_WIDTH-1:0] wr_id_o,
// AXI port
output ariane_axi::req_t axi_req_o,
input ariane_axi::resp_t axi_resp_i
);
localparam ADDR_INDEX = ($clog2(DATA_WORDS) > 0) ? $clog2(DATA_WORDS) : 1;
///////////////////////////////////////////////////////
// write channel
///////////////////////////////////////////////////////
enum logic [3:0] {
IDLE, WAIT_AW_READY, WAIT_LAST_W_READY, WAIT_LAST_W_READY_AW_READY, WAIT_AW_READY_BURST
} wr_state_q, wr_state_d;
// AXI tx counter
logic [ADDR_INDEX-1:0] wr_cnt_d, wr_cnt_q;
logic wr_single_req, wr_cnt_done, wr_cnt_clr, wr_cnt_en;
assign wr_single_req = (wr_blen_i == 0);
// address
assign axi_req_o.aw.burst = (wr_single_req) ? 2'b00 : 2'b01; // fixed size for single request and incremental transfer for everything else
assign axi_req_o.aw.addr = wr_addr_i;
assign axi_req_o.aw.size = wr_size_i;
assign axi_req_o.aw.len = wr_blen_i;
assign axi_req_o.aw.id = wr_id_i;
assign axi_req_o.aw.prot = 3'b0;
assign axi_req_o.aw.region = 4'b0;
assign axi_req_o.aw.lock = 1'b0;
assign axi_req_o.aw.cache = 4'b0;
assign axi_req_o.aw.qos = 4'b0;
assign axi_req_o.aw.atop = '0; // currently not used
// data
assign axi_req_o.w.data = wr_data_i[wr_cnt_q];
assign axi_req_o.w.strb = wr_be_i[wr_cnt_q];
assign axi_req_o.w.last = wr_cnt_done;
// response
assign axi_req_o.b_ready = wr_rdy_i;
assign wr_valid_o = axi_resp_i.b_valid;
assign wr_id_o = axi_resp_i.b.id;
// tx counter
assign wr_cnt_done = (wr_cnt_q == wr_blen_i);
assign wr_cnt_d = (wr_cnt_clr) ? '0 :
(wr_cnt_en) ? wr_cnt_q+1 :
wr_cnt_q;
always_comb begin : p_axi_write_fsm
// default
wr_state_d = wr_state_q;
axi_req_o.aw_valid = 1'b0;
axi_req_o.w_valid = 1'b0;
wr_gnt_o = 1'b0;
wr_cnt_en = 1'b0;
wr_cnt_clr = 1'b0;
case (wr_state_q)
///////////////////////////////////
IDLE: begin
// we have an incoming request
if (wr_req_i) begin
// is this a read or write?
axi_req_o.aw_valid = 1'b1;
axi_req_o.w_valid = 1'b1;
// its a single write
if (wr_single_req) begin
wr_cnt_clr = 1'b1;
// single req can be granted here
wr_gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready;
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready})
2'b01: wr_state_d = WAIT_AW_READY;
2'b10: wr_state_d = WAIT_LAST_W_READY;
default: wr_state_d = IDLE;
endcase
// its a request for the whole cache line
end else begin
wr_cnt_en = axi_resp_i.w_ready;
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready})
2'b11: wr_state_d = WAIT_LAST_W_READY;
2'b01: wr_state_d = WAIT_LAST_W_READY_AW_READY;
2'b10: wr_state_d = WAIT_LAST_W_READY;
default:;
endcase
end
end
end
///////////////////////////////////
// ~> from single write
WAIT_AW_READY: begin
axi_req_o.aw_valid = 1'b1;
if (axi_resp_i.aw_ready) begin
wr_state_d = IDLE;
wr_gnt_o = 1'b1;
end
end
///////////////////////////////////
// ~> we need to wait for an aw_ready and there is at least one outstanding write
WAIT_LAST_W_READY_AW_READY: begin
axi_req_o.w_valid = 1'b1;
axi_req_o.aw_valid = 1'b1;
// we got an aw_ready
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready})
// we got an aw ready
2'b01: begin
// are there any outstanding transactions?
if (wr_cnt_done) begin
wr_state_d = WAIT_AW_READY_BURST;
wr_cnt_clr = 1'b1;
end else begin
// yes, so reduce the count and stay here
wr_cnt_en = 1'b1;
end
end
2'b10: wr_state_d = WAIT_LAST_W_READY;
2'b11: begin
// we are finished
if (wr_cnt_done) begin
wr_state_d = IDLE;
wr_gnt_o = 1'b1;
wr_cnt_clr = 1'b1;
// there are outstanding transactions
end else begin
wr_state_d = WAIT_LAST_W_READY;
wr_cnt_en = 1'b1;
end
end
default:;
endcase
end
///////////////////////////////////
// ~> all data has already been sent, we are only waiting for the aw_ready
WAIT_AW_READY_BURST: begin
axi_req_o.aw_valid = 1'b1;
if (axi_resp_i.aw_ready) begin
wr_state_d = IDLE;
wr_gnt_o = 1'b1;
end
end
///////////////////////////////////
// ~> from write, there is an outstanding write
WAIT_LAST_W_READY: begin
axi_req_o.w_valid = 1'b1;
// this is the last write
if (wr_cnt_done) begin
if (axi_resp_i.w_ready) begin
wr_state_d = IDLE;
wr_cnt_clr = 1'b1;
wr_gnt_o = 1'b1;
end
end else if (axi_resp_i.w_ready) begin
wr_cnt_en = 1'b1;
end
end
///////////////////////////////////
default: begin
wr_state_d = IDLE;
end
endcase
end
///////////////////////////////////////////////////////
// read channel
///////////////////////////////////////////////////////
// AXI tx counter
logic [ADDR_INDEX-1:0] rd_cnt_d, rd_cnt_q;
logic rd_single_req, rd_cnt_clr, rd_cnt_en;
logic [DATA_WORDS-1:0][63:0] rd_data_d, rd_data_q;
logic rd_valid_d, rd_valid_q;
logic [AXI_ID_WIDTH-1:0] rd_id_d, rd_id_q;
assign rd_single_req = (rd_blen_i == 0);
// address
// in case of a single request or wrapping transfer we can simply begin at the address, if we want to request a cache-line
// with an incremental transfer we need to output the corresponding base address of the cache line
assign axi_req_o.ar.burst = (rd_single_req) ? 2'b00 :
(CRITICAL_WORD_FIRST) ? 2'b10 :
2'b01; // wrapping transfer in case of a critical word first strategy
assign axi_req_o.ar.addr = rd_addr_i;
assign axi_req_o.ar.size = rd_size_i;
assign axi_req_o.ar.len = rd_blen_i;
assign axi_req_o.ar.id = rd_id_i;
assign axi_req_o.ar.prot = 3'b0;
assign axi_req_o.ar.region = 4'b0;
assign axi_req_o.ar.lock = 1'b0;
assign axi_req_o.ar.cache = 4'b0;
assign axi_req_o.ar.qos = 4'b0;
// make the read request
assign axi_req_o.ar_valid = rd_req_i;
assign rd_gnt_o = rd_req_i & axi_resp_i.ar_ready;
// return path
// we are always ready
assign axi_req_o.r_ready = rd_rdy_i;
assign rd_cnt_en = axi_resp_i.r_valid;
assign rd_cnt_clr = axi_resp_i.r.last;
assign rd_valid_d = axi_resp_i.r_valid & axi_resp_i.r.last;
assign rd_valid_o = rd_valid_q;
assign rd_id_d = axi_resp_i.r.id;
assign rd_id_o = rd_id_q;
assign rd_data_o = rd_data_q;
// used to determine critical word
assign rd_word_o = axi_resp_i.r.data;
assign rd_word_valid_o = axi_resp_i.r_valid;
assign rd_word_cnt_o = rd_cnt_q;
// tx counter
assign rd_cnt_d = (rd_cnt_clr) ? '0 :
(rd_cnt_en) ? rd_cnt_q+1 :
rd_cnt_q;
generate
for(genvar k=0; k<DATA_WORDS; k++) begin : g_rd_data
assign rd_data_d[k] = (rd_cnt_q==k && rd_cnt_en) ? axi_resp_i.r.data : rd_data_q[k];
end
endgenerate
// ----------------
// Registers
// ----------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
// start in flushing state and initialize the memory
wr_state_q <= IDLE;
wr_cnt_q <= '0;
rd_cnt_q <= '0;
rd_data_q <= '0;
rd_valid_q <= '0;
rd_id_q <= '0;
end else begin
wr_state_q <= wr_state_d;
wr_cnt_q <= wr_cnt_d;
rd_cnt_q <= rd_cnt_d;
rd_data_q <= rd_data_d;
rd_valid_q <= rd_valid_d;
rd_id_q <= rd_id_d;
end
end
// ----------------
// Assertions
// ----------------
//pragma translate_off
`ifndef VERILATOR
initial begin
assert (DATA_WORDS >= 1) else
$fatal(1,"[axi adapter] DATA_WORDS must be >= 1");
end
`endif
//pragma translate_on
endmodule // axi_adapter2

@ -1 +1 @@
Subproject commit 6338af6ee3065c4de22b555a67f64755745b7129
Subproject commit 1f77f634b65fdee56dfc928cadadd66e9fafc485

View file

@ -104,7 +104,7 @@ module miss_handler #(
logic [DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata;
logic req_fsm_miss_we;
logic [(DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be;
req_t req_fsm_miss_req;
ariane_axi::ad_req_t req_fsm_miss_req;
logic [1:0] req_fsm_miss_size;
logic gnt_miss_fsm;
@ -153,7 +153,7 @@ module miss_handler #(
req_fsm_miss_wdata = '0;
req_fsm_miss_we = 1'b0;
req_fsm_miss_be = '0;
req_fsm_miss_req = CACHE_LINE_REQ;
req_fsm_miss_req = ariane_axi::CACHE_LINE_REQ;
req_fsm_miss_size = 2'b11;
// core
flush_ack_o = 1'b0;
@ -384,7 +384,7 @@ module miss_handler #(
req_fsm_miss_valid = 1'b1;
// address is in operand a
req_fsm_miss_addr = amo_req_i.operand_a;
req_fsm_miss_req = SINGLE_REQ;
req_fsm_miss_req = ariane_axi::SINGLE_REQ;
req_fsm_miss_size = amo_req_i.size;
// the request has been granted
if (gnt_miss_fsm) begin
@ -434,7 +434,7 @@ module miss_handler #(
end
req_fsm_miss_we = 1'b1;
req_fsm_miss_req = SINGLE_REQ;
req_fsm_miss_req = ariane_axi::SINGLE_REQ;
req_fsm_miss_size = amo_req_i.size;
req_fsm_miss_addr = amo_req_i.operand_a;
@ -562,13 +562,14 @@ module miss_handler #(
);
axi_adapter #(
.DATA_WIDTH ( 64 ),
.AXI_ID_WIDTH ( 4 )
.DATA_WIDTH ( 64 ),
.AXI_ID_WIDTH ( 4 ),
.CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET )
) i_bypass_axi_adapter (
.clk_i,
.rst_ni,
.req_i ( req_fsm_bypass_valid ),
.type_i ( SINGLE_REQ ),
.type_i ( ariane_axi::SINGLE_REQ ),
.gnt_o ( gnt_bypass_fsm ),
.addr_i ( req_fsm_bypass_addr ),
.we_i ( req_fsm_bypass_we ),
@ -590,8 +591,9 @@ module miss_handler #(
// Cache Line AXI Refill
// ----------------------
axi_adapter #(
.DATA_WIDTH ( DCACHE_LINE_WIDTH ),
.AXI_ID_WIDTH ( 4 )
.DATA_WIDTH ( DCACHE_LINE_WIDTH ),
.AXI_ID_WIDTH ( 4 ),
.CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET )
) i_miss_axi_adapter (
.clk_i,
.rst_ni,

View file

@ -0,0 +1,342 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
// Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Ariane cache subsystem that is compatible with the OpenPiton
// coherent memory system.
//
// Define SERPENT_PULP if you want to use this cache.
// Define AXI64_CACHE_PORTS if you want to use this cache
// with a standard 64bit AXI interace instead of the openpiton
// L1.5 interface.
import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_cache_subsystem #(
`ifdef AXI64_CACHE_PORTS
parameter int unsigned AxiIdWidth = 10,
`endif
parameter logic [63:0] CachedAddrBeg = 64'h00_8000_0000, // begin of cached region
parameter logic [63:0] CachedAddrEnd = 64'h80_0000_0000, // end of cached region
parameter bit SwapEndianess = 0 // swap endianess in l15 adapter
) (
input logic clk_i,
input logic rst_ni,
// I$
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
// address translation requests
input icache_areq_i_t icache_areq_i, // to/from frontend
output icache_areq_o_t icache_areq_o,
// data requests
input icache_dreq_i_t icache_dreq_i, // to/from frontend
output icache_dreq_o_t icache_dreq_o,
// D$
// Cache management
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
// AMO interface
input amo_req_t dcache_amo_req_i,
output amo_resp_t dcache_amo_resp_o,
// Request ports
input dcache_req_i_t [2:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [2:0] dcache_req_ports_o, // to/from LSU
// writebuffer status
output logic wbuffer_empty_o,
`ifdef AXI64_CACHE_PORTS
// memory side
output ariane_axi::req_t axi_req_o,
input ariane_axi::resp_t axi_resp_i
`else
// L15 (memory side)
output l15_req_t l15_req_o,
input l15_rtrn_t l15_rtrn_i
`endif
// TODO: interrupt interface
);
logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld;
serpent_cache_pkg::icache_req_t icache_adapter;
serpent_cache_pkg::icache_rtrn_t adapter_icache;
logic dcache_adapter_data_req, adapter_dcache_data_ack, adapter_dcache_rtrn_vld;
serpent_cache_pkg::dcache_req_t dcache_adapter;
serpent_cache_pkg::dcache_rtrn_t adapter_dcache;
`ifdef AXI64_CACHE_PORTS
// used for local plumbing in this case
l15_req_t l15_req;
l15_rtrn_t l15_rtrn;
`endif
serpent_icache #(
`ifdef AXI64_CACHE_PORTS
.Axi64BitCompliant ( 1'b1 ),
`endif
// use ID 0 for icache reads
.RdTxId ( 0 ),
.CachedAddrBeg ( CachedAddrBeg ),
.CachedAddrEnd ( CachedAddrEnd )
) i_serpent_icache (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( icache_flush_i ),
.en_i ( icache_en_i ),
.miss_o ( icache_miss_o ),
.areq_i ( icache_areq_i ),
.areq_o ( icache_areq_o ),
.dreq_i ( icache_dreq_i ),
.dreq_o ( icache_dreq_o ),
.mem_rtrn_vld_i ( adapter_icache_rtrn_vld ),
.mem_rtrn_i ( adapter_icache ),
.mem_data_req_o ( icache_adapter_data_req ),
.mem_data_ack_i ( adapter_icache_data_ack ),
.mem_data_o ( icache_adapter )
);
// Note:
// Ports 0/1 for PTW and LD unit are read only.
// they have equal prio and are RR arbited
// Port 2 is write only and goes into the merging write buffer
serpent_dcache #(
// use ID 1 for dcache reads and amos. note that the writebuffer
// uses all IDs up to DCACHE_MAX_TX-1 for write transactions.
.RdAmoTxId ( 1 ),
.CachedAddrBeg ( CachedAddrBeg ),
.CachedAddrEnd ( CachedAddrEnd )
) i_serpent_dcache (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.enable_i ( dcache_enable_i ),
.flush_i ( dcache_flush_i ),
.flush_ack_o ( dcache_flush_ack_o ),
.miss_o ( dcache_miss_o ),
.wbuffer_empty_o ( wbuffer_empty_o ),
.amo_req_i ( dcache_amo_req_i ),
.amo_resp_o ( dcache_amo_resp_o ),
.req_ports_i ( dcache_req_ports_i ),
.req_ports_o ( dcache_req_ports_o ),
.mem_rtrn_vld_i ( adapter_dcache_rtrn_vld ),
.mem_rtrn_i ( adapter_dcache ),
.mem_data_req_o ( dcache_adapter_data_req ),
.mem_data_ack_i ( adapter_dcache_data_ack ),
.mem_data_o ( dcache_adapter )
);
// arbiter/adapter
serpent_l15_adapter #(
.SwapEndianess ( SwapEndianess ),
.CachedAddrBeg ( CachedAddrBeg ),
.CachedAddrEnd ( CachedAddrEnd )
) i_adapter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.icache_data_req_i ( icache_adapter_data_req ),
.icache_data_ack_o ( adapter_icache_data_ack ),
.icache_data_i ( icache_adapter ),
.icache_rtrn_vld_o ( adapter_icache_rtrn_vld ),
.icache_rtrn_o ( adapter_icache ),
.dcache_data_req_i ( dcache_adapter_data_req ),
.dcache_data_ack_o ( adapter_dcache_data_ack ),
.dcache_data_i ( dcache_adapter ),
.dcache_rtrn_vld_o ( adapter_dcache_rtrn_vld ),
.dcache_rtrn_o ( adapter_dcache ),
`ifdef AXI64_CACHE_PORTS
.l15_req_o ( l15_req ),
.l15_rtrn_i ( l15_rtrn )
`else
.l15_req_o ( l15_req_o ),
.l15_rtrn_i ( l15_rtrn_i )
`endif
);
///////////////////////////////////////////////////////
// different memory plumbing to allow for using the
// serpent cache subsystem in a standard AXI setting
// for verificaton purposes.
///////////////////////////////////////////////////////
`ifdef AXI64_CACHE_PORTS
// support up to 512bit cache lines
localparam AxiNumWords = 8;
logic axi_rd_req, axi_rd_gnt;
logic [63:0] axi_rd_addr, axi_wr_addr;
logic [$clog2(AxiNumWords)-1:0] axi_rd_blen, axi_wr_blen;
logic [1:0] axi_rd_size, axi_wr_size;
logic [AxiIdWidth-1:0] axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out;
logic axi_rd_valid;
logic [AxiNumWords-1:0][63:0] axi_rd_data, axi_wr_data;
logic [AxiNumWords-1:0][7:0] axi_wr_be;
logic axi_wr_req, axi_wr_gnt;
logic axi_wr_valid, axi_rd_rdy, axi_wr_rdy;
logic ifill;
logic [serpent_cache_pkg::L15_TID_WIDTH+2-1:0] id_tmp;
logic rd_pending_d, rd_pending_q;
// request side
assign ifill = (l15_req.l15_rqtype==serpent_cache_pkg::L15_IMISS_RQ);
assign axi_rd_req = l15_req.l15_val && (l15_req.l15_rqtype==serpent_cache_pkg::L15_LOAD_RQ | ifill) && !rd_pending_q;
assign axi_wr_req = l15_req.l15_val && (l15_req.l15_rqtype==serpent_cache_pkg::L15_STORE_RQ);
assign axi_rd_addr = l15_req.l15_address;
assign axi_wr_addr = axi_rd_addr;
// the axi interconnect does not correctly handle the ordering of read responses.
// workaround: only allow for one outstanding TX. need to improve this.
assign rd_pending_d = (axi_rd_valid ) ? '0 : rd_pending_q | axi_rd_gnt;
assign axi_rd_id_in = {l15_req.l15_threadid, ifill, l15_req.l15_nc};
assign axi_wr_id_in = axi_rd_id_in;
assign axi_rd_size = (ifill) ? 2'b11 : l15_req.l15_size[1:0];// always request 64bit words in case of ifill
assign axi_wr_size = l15_req.l15_size[1:0];
assign axi_rd_blen = (l15_req.l15_size[2]) ? ((ifill) ? ariane_pkg::ICACHE_LINE_WIDTH/64-1 :
ariane_pkg::DCACHE_LINE_WIDTH/64-1) : '0;
assign axi_wr_blen = '0;// single word writes
assign axi_wr_data = l15_req.l15_data;
assign axi_wr_be = (axi_wr_req) ? serpent_cache_pkg::toByteEnable8(axi_wr_addr[2:0], axi_wr_size) : '0;
// return path
always_comb begin : p_axi_rtrn
// default
l15_rtrn = '0;
// from request path
l15_rtrn.l15_ack = axi_rd_gnt | axi_wr_gnt;
l15_rtrn.l15_header_ack = axi_rd_gnt | axi_wr_gnt;
// we are always ready to consume packets unconditionally,
// but in case of returning reads, we have to stall the write response
axi_rd_rdy = 1'b1;
axi_wr_rdy = ~axi_rd_valid;// this vld signal comes directly from a register
// unconditionally consume packets
l15_rtrn.l15_val = axi_rd_valid | axi_wr_valid;
// encode packet type
id_tmp = (axi_rd_valid) ? axi_rd_id_out : axi_wr_id_out;
l15_rtrn.l15_returntype = (axi_rd_valid && id_tmp[1]) ? L15_IFILL_RET :
(axi_rd_valid) ? L15_LOAD_RET :
L15_ST_ACK;
// decode id and set flags accordingly
l15_rtrn.l15_noncacheable = id_tmp[0];
l15_rtrn.l15_threadid = id_tmp>>2;
// 4B non-cacheable ifill
l15_rtrn.l15_f4b = id_tmp[0] & id_tmp[1] & axi_rd_valid;
l15_rtrn.l15_data_0 = axi_rd_data[0];
l15_rtrn.l15_data_1 = axi_rd_data[1];
l15_rtrn.l15_data_2 = axi_rd_data[2];
l15_rtrn.l15_data_3 = axi_rd_data[3];
end
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(~rst_ni) begin
rd_pending_q <= '0;
end else begin
rd_pending_q <= rd_pending_d;
end
end
axi_adapter2 #(
.DATA_WORDS ( AxiNumWords ),
.AXI_ID_WIDTH ( AxiIdWidth )
) i_axi_adapter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.rd_req_i ( axi_rd_req ),
.rd_gnt_o ( axi_rd_gnt ),
.rd_addr_i ( axi_rd_addr ),
.rd_blen_i ( axi_rd_blen ),
.rd_size_i ( axi_rd_size ),
.rd_id_i ( axi_rd_id_in ),
.rd_rdy_i ( axi_rd_rdy ),
.rd_valid_o ( axi_rd_valid ),
.rd_data_o ( axi_rd_data ),
.rd_id_o ( axi_rd_id_out ),
.rd_word_o ( ),
.rd_word_valid_o ( ),
.rd_word_cnt_o ( ),
.wr_req_i ( axi_wr_req ),
.wr_gnt_o ( axi_wr_gnt ),
.wr_addr_i ( axi_wr_addr ),
.wr_data_i ( axi_wr_data ),
.wr_be_i ( axi_wr_be ),
.wr_blen_i ( axi_wr_blen ),
.wr_size_i ( axi_wr_size ),
.wr_id_i ( axi_wr_id_in ),
.wr_rdy_i ( axi_wr_rdy ),
.wr_valid_o ( axi_wr_valid ),
.wr_id_o ( axi_wr_id_out ),
.axi_req_o ( axi_req_o ),
.axi_resp_i ( axi_resp_i )
);
`endif
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
`ifdef AXI64_CACHE_PORTS
initial begin
assert (AxiIdWidth >= $clog2(serpent_cache_pkg::DCACHE_MAX_TX)+2) else
$fatal(1,$psprintf("[l1 cache] AXI ID must be at least %01d bit wide", $clog2(serpent_cache_pkg::DCACHE_MAX_TX)+2));
end
`endif
a_invalid_instruction_fetch: assert property (
@(posedge clk_i) disable iff (~rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr, icache_dreq_o.data);
a_invalid_write_data: assert property (
@(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX)
else $warning(1,"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
{dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index}, dcache_req_ports_i[2].data_be, dcache_req_ports_i[2].data_wdata);
for(genvar j=0; j<2; j++) begin : g_assertion
a_invalid_read_data: assert property (
@(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_o[j].data_rvalid |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid data on port %01d: data=%016X",
j, dcache_req_ports_o[j].data_rdata);
end
`endif
//pragma translate_on
endmodule // serpent_cache_subsystem

View file

@ -0,0 +1,329 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: Instruction cache that is compatible with openpiton.
import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_dcache #(
// ID to be used for read and AMO transactions.
// note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions
parameter logic [DCACHE_ID_WIDTH-1:0] RdAmoTxId = 1,
parameter logic [63:0] CachedAddrBeg = 64'h00_8000_0000, // begin of cached region
parameter logic [63:0] CachedAddrEnd = 64'h80_0000_0000 // end of cached region
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// Cache management
input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
output logic wbuffer_empty_o,
// AMO interface
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// Request ports
input dcache_req_i_t [2:0] req_ports_i,
output dcache_req_o_t [2:0] req_ports_o,
input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output dcache_req_t mem_data_o
);
// LD unit and PTW
localparam NumPorts = 3;
// miss unit <-> read controllers
logic cache_en;
// miss unit <-> memory
logic wr_cl_vld;
logic wr_cl_nc;
logic [DCACHE_SET_ASSOC-1:0] wr_cl_we;
logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx;
logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off;
logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data;
logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be;
logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits;
logic [DCACHE_SET_ASSOC-1:0] wr_req;
logic wr_ack;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx;
logic [DCACHE_OFFSET_WIDTH-1:0] wr_off;
logic [63:0] wr_data;
logic [7:0] wr_data_be;
// miss unit <-> controllers/wbuffer
logic [NumPorts-1:0] miss_req;
logic [NumPorts-1:0] miss_ack;
logic [NumPorts-1:0] miss_nc;
logic [NumPorts-1:0] miss_we;
logic [NumPorts-1:0][63:0] miss_wdata;
logic [NumPorts-1:0][63:0] miss_paddr;
logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits;
logic [NumPorts-1:0][2:0] miss_size;
logic [NumPorts-1:0][DCACHE_ID_WIDTH-1:0] miss_id;
logic [NumPorts-1:0] miss_replay;
logic [NumPorts-1:0] miss_rtrn_vld;
logic [DCACHE_ID_WIDTH-1:0] miss_rtrn_id;
// memory <-> read controllers/miss unit
logic [NumPorts-1:0] rd_prio;
logic [NumPorts-1:0] rd_tag_only;
logic [NumPorts-1:0] rd_req;
logic [NumPorts-1:0] rd_ack;
logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx;
logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off;
logic [63:0] rd_data;
logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits;
logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh;
// miss unit <-> wbuffer
logic [DCACHE_MAX_TX-1:0][63:0] tx_paddr;
logic [DCACHE_MAX_TX-1:0] tx_vld;
// wbuffer <-> memory
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data;
///////////////////////////////////////////////////////
// miss handling unit
///////////////////////////////////////////////////////
serpent_dcache_missunit #(
.AmoTxId ( RdAmoTxId ),
.NumPorts ( NumPorts )
) i_serpent_dcache_missunit (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.enable_i ( enable_i ),
.flush_i ( flush_i ),
.flush_ack_o ( flush_ack_o ),
.miss_o ( miss_o ),
.wbuffer_empty_i ( wbuffer_empty_o ),
.cache_en_o ( cache_en ),
// amo interface
.amo_req_i ( amo_req_i ),
.amo_resp_o ( amo_resp_o ),
// miss handling interface
.miss_req_i ( miss_req ),
.miss_ack_o ( miss_ack ),
.miss_nc_i ( miss_nc ),
.miss_we_i ( miss_we ),
.miss_wdata_i ( miss_wdata ),
.miss_paddr_i ( miss_paddr ),
.miss_vld_bits_i ( miss_vld_bits ),
.miss_size_i ( miss_size ),
.miss_id_i ( miss_id ),
.miss_replay_o ( miss_replay ),
.miss_rtrn_vld_o ( miss_rtrn_vld ),
.miss_rtrn_id_o ( miss_rtrn_id ),
// from writebuffer
.tx_paddr_i ( tx_paddr ),
.tx_vld_i ( tx_vld ),
// cache memory interface
.wr_cl_vld_o ( wr_cl_vld ),
.wr_cl_nc_o ( wr_cl_nc ),
.wr_cl_we_o ( wr_cl_we ),
.wr_cl_tag_o ( wr_cl_tag ),
.wr_cl_idx_o ( wr_cl_idx ),
.wr_cl_off_o ( wr_cl_off ),
.wr_cl_data_o ( wr_cl_data ),
.wr_cl_data_be_o ( wr_cl_data_be ),
.wr_vld_bits_o ( wr_vld_bits ),
// memory interface
.mem_rtrn_vld_i ( mem_rtrn_vld_i ),
.mem_rtrn_i ( mem_rtrn_i ),
.mem_data_req_o ( mem_data_req_o ),
.mem_data_ack_i ( mem_data_ack_i ),
.mem_data_o ( mem_data_o )
);
///////////////////////////////////////////////////////
// read controllers (LD unit and PTW/MMU)
///////////////////////////////////////////////////////
generate
// note: last read port is used by the write buffer
for(genvar k=0; k<NumPorts-1; k++) begin
// set these to high prio ports
assign rd_prio[k] = 1'b1;
serpent_dcache_ctrl #(
.RdTxId ( RdAmoTxId ),
.CachedAddrBeg ( CachedAddrBeg ),
.CachedAddrEnd ( CachedAddrEnd ))
i_serpent_dcache_ctrl (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.cache_en_i ( cache_en ),
// reqs from core
.req_port_i ( req_ports_i [k] ),
.req_port_o ( req_ports_o [k] ),
// miss interface
.miss_req_o ( miss_req [k] ),
.miss_ack_i ( miss_ack [k] ),
.miss_we_o ( miss_we [k] ),
.miss_wdata_o ( miss_wdata [k] ),
.miss_vld_bits_o ( miss_vld_bits [k] ),
.miss_paddr_o ( miss_paddr [k] ),
.miss_nc_o ( miss_nc [k] ),
.miss_size_o ( miss_size [k] ),
.miss_id_o ( miss_id [k] ),
.miss_replay_i ( miss_replay [k] ),
.miss_rtrn_vld_i ( miss_rtrn_vld [k] ),
// used to detect readout mux collisions
.wr_cl_vld_i ( wr_cl_vld ),
// cache mem interface
.rd_tag_o ( rd_tag [k] ),
.rd_idx_o ( rd_idx [k] ),
.rd_off_o ( rd_off [k] ),
.rd_req_o ( rd_req [k] ),
.rd_tag_only_o ( rd_tag_only [k] ),
.rd_ack_i ( rd_ack [k] ),
.rd_data_i ( rd_data ),
.rd_vld_bits_i ( rd_vld_bits ),
.rd_hit_oh_i ( rd_hit_oh )
);
end
endgenerate
///////////////////////////////////////////////////////
// store unit controller
///////////////////////////////////////////////////////
// set read port to low priority
assign rd_prio[2] = 1'b0;
serpent_dcache_wbuffer #(
.CachedAddrBeg ( CachedAddrBeg ),
.CachedAddrEnd ( CachedAddrEnd ))
i_serpent_dcache_wbuffer (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.empty_o ( wbuffer_empty_o ),
// TODO: fix this
.cache_en_i ( cache_en ),
// .cache_en_i ( '0 ),
// request ports from core (store unit)
.req_port_i ( req_ports_i [2] ),
.req_port_o ( req_ports_o [2] ),
// miss unit interface
.miss_req_o ( miss_req [2] ),
.miss_ack_i ( miss_ack [2] ),
.miss_we_o ( miss_we [2] ),
.miss_wdata_o ( miss_wdata [2] ),
.miss_vld_bits_o ( miss_vld_bits [2] ),
.miss_paddr_o ( miss_paddr [2] ),
.miss_nc_o ( miss_nc [2] ),
.miss_size_o ( miss_size [2] ),
.miss_id_o ( miss_id [2] ),
.miss_rtrn_vld_i ( miss_rtrn_vld [2] ),
.miss_rtrn_id_i ( miss_rtrn_id ),
// cache read interface
.rd_tag_o ( rd_tag [2] ),
.rd_idx_o ( rd_idx [2] ),
.rd_off_o ( rd_off [2] ),
.rd_req_o ( rd_req [2] ),
.rd_tag_only_o ( rd_tag_only [2] ),
.rd_ack_i ( rd_ack [2] ),
.rd_data_i ( rd_data ),
.rd_vld_bits_i ( rd_vld_bits ),
.rd_hit_oh_i ( rd_hit_oh ),
// incoming invalidations/cache refills
.wr_cl_vld_i ( wr_cl_vld ),
.wr_cl_idx_i ( wr_cl_idx ),
// single word write interface
.wr_req_o ( wr_req ),
.wr_ack_i ( wr_ack ),
.wr_idx_o ( wr_idx ),
.wr_off_o ( wr_off ),
.wr_data_o ( wr_data ),
.wr_data_be_o ( wr_data_be ),
// write buffer forwarding
.wbuffer_data_o ( wbuffer_data ),
.tx_paddr_o ( tx_paddr ),
.tx_vld_o ( tx_vld )
);
///////////////////////////////////////////////////////
// memory arrays, arbitration and tag comparison
///////////////////////////////////////////////////////
serpent_dcache_mem #(
.NumPorts(NumPorts)
) i_serpent_dcache_mem (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
// read ports
.rd_prio_i ( rd_prio ),
.rd_tag_i ( rd_tag ),
.rd_idx_i ( rd_idx ),
.rd_off_i ( rd_off ),
.rd_req_i ( rd_req ),
.rd_tag_only_i ( rd_tag_only ),
.rd_ack_o ( rd_ack ),
.rd_vld_bits_o ( rd_vld_bits ),
.rd_hit_oh_o ( rd_hit_oh ),
.rd_data_o ( rd_data ),
// cacheline write port
.wr_cl_vld_i ( wr_cl_vld ),
.wr_cl_nc_i ( wr_cl_nc ),
.wr_cl_we_i ( wr_cl_we ),
.wr_cl_tag_i ( wr_cl_tag ),
.wr_cl_idx_i ( wr_cl_idx ),
.wr_cl_off_i ( wr_cl_off ),
.wr_cl_data_i ( wr_cl_data ),
.wr_cl_data_be_i ( wr_cl_data_be ),
.wr_vld_bits_i ( wr_vld_bits ),
// single word write port
.wr_req_i ( wr_req ),
.wr_ack_o ( wr_ack ),
.wr_idx_i ( wr_idx ),
.wr_off_i ( wr_off ),
.wr_data_i ( wr_data ),
.wr_data_be_i ( wr_data_be ),
// write buffer forwarding
.wbuffer_data_i ( wbuffer_data )
);
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
// check for concurrency issues
//pragma translate_off
`ifndef VERILATOR
flush: assert property (
@(posedge clk_i) disable iff (~rst_ni) flush_i |-> flush_ack_o |-> wbuffer_empty_o)
else $fatal(1,"[l1 dcache] flushed cache implies flushed wbuffer");
initial begin
// assert wrong parameterizations
assert (DCACHE_INDEX_WIDTH<=12)
else $fatal(1,"[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages");
end
`endif
//pragma translate_on
endmodule // serpent_dcache

View file

@ -0,0 +1,267 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: DCache controller for read port
import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_dcache_ctrl #(
parameter logic [DCACHE_ID_WIDTH-1:0] RdTxId = 1, // ID to use for read transactions
parameter logic [63:0] CachedAddrBeg = 64'h00_8000_0000, // begin of cached region
parameter logic [63:0] CachedAddrEnd = 64'h80_0000_0000 // end of cached region
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic cache_en_i,
// core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// interface to miss handler
output logic miss_req_o,
input logic miss_ack_i,
output logic miss_we_o, // unused (set to 0)
output logic [63:0] miss_wdata_o, // unused (set to 0)
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index
output logic [63:0] miss_paddr_o,
output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline
output logic [DCACHE_ID_WIDTH-1:0] miss_id_o, // set to constant ID
input logic miss_replay_i, // request collided with pending miss - have to replay the request
input logic miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory
// used to detect readout mux collisions
input logic wr_cl_vld_i,
// cache memory interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to zero here
input logic rd_ack_i,
input logic [63:0] rd_data_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i
);
// controller FSM
typedef enum logic[2:0] {IDLE, READ, MISS_REQ, MISS_WAIT, KILL_MISS, REPLAY_REQ, REPLAY_READ} state_t;
state_t state_d, state_q;
logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q;
logic [DCACHE_CL_IDX_WIDTH-1:0] address_idx_d, address_idx_q;
logic [DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q;
logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q;
logic save_tag, rd_req_d, rd_req_q, rd_ack_d, rd_ack_q;
logic [1:0] data_size_d, data_size_q;
///////////////////////////////////////////////////////
// misc
///////////////////////////////////////////////////////
// map address to tag/idx/offset and save
assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q;
assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q;
assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : address_idx_q;
assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_OFFSET_WIDTH-1:0] : address_off_q;
assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q;
assign rd_tag_o = address_tag_d;
assign rd_idx_o = address_idx_d;
assign rd_off_o = address_off_d;
assign req_port_o.data_rdata = rd_data_i;
// to miss unit
assign miss_vld_bits_o = vld_data_q;
assign miss_paddr_o = {address_tag_q, address_idx_q, address_off_q};
assign miss_size_o = (miss_nc_o) ? data_size_q : 3'b111;
assign miss_nc_o = (address_tag_q < (CachedAddrBeg>>DCACHE_INDEX_WIDTH)) ||
(address_tag_q >= (CachedAddrEnd>>DCACHE_INDEX_WIDTH)) ||
(!cache_en_i);
assign miss_we_o = '0;
assign miss_wdata_o = '0;
assign miss_id_o = RdTxId;
assign rd_req_d = rd_req_o;
assign rd_ack_d = rd_ack_i;
assign rd_tag_only_o = '0;
///////////////////////////////////////////////////////
// main control logic
///////////////////////////////////////////////////////
always_comb begin : p_fsm
// default assignment
state_d = state_q;
save_tag = 1'b0;
rd_req_o = 1'b0;
miss_req_o = 1'b0;
req_port_o.data_rvalid = 1'b0;
req_port_o.data_gnt = 1'b0;
// interfaces
unique case (state_q)
//////////////////////////////////
// wait for an incoming request
IDLE: begin
if (req_port_i.data_req) begin
rd_req_o = 1'b1;
if (rd_ack_i) begin
state_d = READ;
req_port_o.data_gnt = 1'b1;
end
end
end
//////////////////////////////////
// check whether we have a hit
// in case the cache is disabled,
// or in case the address is NC, we
// reuse the miss mechanism to handle
// the request
READ, REPLAY_READ: begin
// speculatively request cache line
rd_req_o = 1'b1;
// kill -> go back to IDLE
if(req_port_i.kill_req) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end else if(req_port_i.tag_valid | state_q==REPLAY_READ) begin
save_tag = (state_q!=REPLAY_READ);
if(wr_cl_vld_i | ~rd_ack_q) begin
state_d = REPLAY_REQ;
// we've got a hit
end else if((|rd_hit_oh_i) & cache_en_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
// we can handle another request
if (rd_ack_i && req_port_i.data_req) begin
state_d = READ;
req_port_o.data_gnt = 1'b1;
end
// we've got a miss
end else begin
state_d = MISS_REQ;
end
end
end
//////////////////////////////////
// issue request
MISS_REQ: begin
miss_req_o = 1'b1;
if(req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if(miss_ack_i) begin
state_d = KILL_MISS;
end else begin
state_d = IDLE;
end
end else if(miss_replay_i) begin
state_d = REPLAY_REQ;
end else if(miss_ack_i) begin
state_d = MISS_WAIT;
end
end
//////////////////////////////////
// wait until the memory transaction
// returns.
MISS_WAIT: begin
if(req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if(miss_rtrn_vld_i) begin
state_d = IDLE;
end else begin
state_d = KILL_MISS;
end
end else if(miss_rtrn_vld_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end
end
//////////////////////////////////
// replay read request
REPLAY_REQ: begin
rd_req_o = 1'b1;
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
state_d = IDLE;
end else if(rd_ack_i) begin
state_d = REPLAY_READ;
end
end
//////////////////////////////////
// killed miss,
// wait until miss unit responds and
// go back to idle
KILL_MISS: begin
if (miss_rtrn_vld_i) begin
state_d = IDLE;
end
end
default: begin
// we should never get here
state_d = IDLE;
end
endcase // state_q
end
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(~rst_ni) begin
state_q <= IDLE;
address_tag_q <= '0;
address_idx_q <= '0;
address_off_q <= '0;
vld_data_q <= '0;
data_size_q <= '0;
rd_req_q <= '0;
rd_ack_q <= '0;
end else begin
state_q <= state_d;
address_tag_q <= address_tag_d;
address_idx_q <= address_idx_d;
address_off_q <= address_off_d;
vld_data_q <= vld_data_d;
data_size_q <= data_size_d;
rd_req_q <= rd_req_d;
rd_ack_q <= rd_ack_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) (~rd_ack_i) |=> cache_en_i |-> $onehot0(rd_hit_oh_i))
else $fatal(1,"[l1 dcache ctrl] rd_hit_oh_i signal must be hot1");
initial begin
// assert wrong parameterizations
assert (DCACHE_INDEX_WIDTH<=12)
else $fatal(1,"[l1 dcache ctrl] cache index width can be maximum 12bit since VM uses 4kB pages");
end
`endif
//pragma translate_on
endmodule // serpent_dcache_ctrl

View file

@ -0,0 +1,373 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: Memory arrays, arbiter and tag comparison for serpent dcache.
//
//
// Notes: 1) all ports can trigger a readout of all ways, and the way where the tag hits is selected
//
// 2) only port0 can write full cache lines. higher ports are read only. also, port0 can only read the tag array,
// and does not trigger a cache line readout.
//
// 3) the single word write port is a separate port without access to the tag memory.
// these single word writes can interleave with read operations if they go to different
// cacheline offsets, since each word offset is placed into a different SRAM bank.
//
// 4) Read ports with same priority are RR arbited. but high prio ports (rd_prio_i[port_nr] = '1b1) will stall
// low prio ports (rd_prio_i[port_nr] = '1b0)
import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_dcache_mem #(
parameter int unsigned NumPorts = 3
)(
input logic clk_i,
input logic rst_ni,
// ports
input logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later
input logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i,
input logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i,
input logic [NumPorts-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways
input logic [NumPorts-1:0] rd_tag_only_i, // only do a tag/valid lookup, no access to data arrays
input logic [NumPorts-1:0] rd_prio_i, // 0: low prio, 1: high prio
output logic [NumPorts-1:0] rd_ack_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o,
output logic [63:0] rd_data_o,
// only available on port 0, uses address signals of port 0
input logic wr_cl_vld_i,
input logic wr_cl_nc_i, // noncacheable access
input logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline
input logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
input logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i,
input logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_i,
input logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i,
input logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_i,
// separate port for single word write, no tag access
input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3]
output logic wr_ack_o,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_i,
input logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_i,
input logic [63:0] wr_data_i,
input logic [7:0] wr_data_be_i,
// forwarded wbuffer
input wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_i
);
logic [DCACHE_NUM_BANKS-1:0] bank_req;
logic [DCACHE_NUM_BANKS-1:0] bank_we;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][7:0] bank_be;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0] bank_idx;
logic [DCACHE_CL_IDX_WIDTH-1:0] bank_idx_d, bank_idx_q;
logic [DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][63:0] bank_wdata; //
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][63:0] bank_rdata; //
logic [DCACHE_SET_ASSOC-1:0][63:0] rdata_cl; // selected word from each cacheline
logic [DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable
logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem
logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
logic [$clog2(NumPorts)-1:0] vld_sel_d, vld_sel_q;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh;
logic [7:0] wbuffer_be;
logic [63:0] wbuffer_rdata, rdata;
logic [63:0] wbuffer_cmp_addr;
logic cmp_en_d, cmp_en_q;
logic rd_acked;
logic [NumPorts-1:0] bank_collision, rd_req_masked, rd_req_prio;
///////////////////////////////////////////////////////
// arbiter
///////////////////////////////////////////////////////
// Priority is highest for lowest read port index
//
// SRAM bank mapping:
//
// Bank 0 Bank 2
// [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] ..
// byte enable mapping
generate
for (genvar k=0;k<DCACHE_NUM_BANKS;k++) begin : g_bank
for (genvar j=0;j<DCACHE_SET_ASSOC;j++) begin : g_bank_way
assign bank_be[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*8 +: 8] :
(wr_req_i[j] & wr_ack_o) ? wr_data_be_i :
'0;
assign bank_wdata[k][j] = (wr_cl_vld_i) ? wr_cl_data_i[k*64 +: 64] :
wr_data_i;
end
end
endgenerate
assign vld_wdata = wr_vld_bits_i;
assign vld_addr = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign rd_tag = rd_tag_i[vld_sel_q]; //delayed by one cycle
assign bank_off_d = (wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d];
assign bank_idx_d = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign vld_req = (wr_cl_vld_i) ? wr_cl_we_i : (rd_acked) ? '1 : '0;
// priority masking
// disable low prio requests when any of the high prio reqs is present
assign rd_req_prio = rd_req_i & rd_prio_i;
assign rd_req_masked = (|rd_req_prio) ? rd_req_prio : rd_req_i;
// read port arbiter
rrarbiter #(
.NUM_REQ(NumPorts)
) i_rrarbiter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i( 1'b0 ),
.en_i ( ~wr_cl_vld_i ),
.req_i ( rd_req_masked ),
.ack_o ( rd_ack_o ),
.vld_o ( rd_acked ),
.idx_o ( vld_sel_d )
);
always_comb begin : p_bank_req
vld_we = wr_cl_vld_i;
bank_req = '0;
wr_ack_o = '0;
bank_we = '0;
bank_idx = '{default:wr_idx_i};
for(int k=0; k<NumPorts; k++) begin
bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:3] == wr_off_i[DCACHE_OFFSET_WIDTH-1:3];
end
if(wr_cl_vld_i & |wr_cl_we_i) begin
bank_req = '1;
bank_we = '1;
bank_idx = '{default:wr_cl_idx_i};
end else begin
if(rd_acked) begin
if(~rd_tag_only_i[vld_sel_d]) begin
bank_req = dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:3]);
bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:3]] = rd_idx_i[vld_sel_d];
end
end
if(|wr_req_i) begin
if(rd_tag_only_i[vld_sel_d] | ~(rd_ack_o[vld_sel_d] & bank_collision[vld_sel_d])) begin
wr_ack_o = 1'b1;
bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:3]);
bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:3]);
end
end
end
end
///////////////////////////////////////////////////////
// tag comparison, hit generatio, readoud muxes
///////////////////////////////////////////////////////
logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx;
assign cmp_en_d = (|vld_req) & ~vld_we;
// word tag comparison in write buffer
assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} :
{rd_tag, bank_idx_q, bank_off_q};
// hit generation
generate
for (genvar i=0;i<DCACHE_SET_ASSOC;i++) begin : g_tag_cmpsel
// tag comparison of ways >0
assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q;
// byte offset mux of ways >0
assign rdata_cl[i] = bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:3]][i];
end
for(genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin : g_wbuffer_hit
assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & (wbuffer_data_i[k].wtag == (wbuffer_cmp_addr >> 3));
end
endgenerate
lzc #(
.WIDTH ( DCACHE_WBUF_DEPTH )
) i_lzc_wbuffer_hit (
.in_i ( wbuffer_hit_oh ),
.cnt_o ( wbuffer_hit_idx ),
.empty_o ( )
);
lzc #(
.WIDTH ( DCACHE_SET_ASSOC )
) i_lzc_rd_hit (
.in_i ( rd_hit_oh_o ),
.cnt_o ( rd_hit_idx ),
.empty_o ( )
);
assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data;
assign wbuffer_be = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0;
assign wr_cl_off = (wr_cl_nc_i) ? '0 : wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3];
assign rdata = (wr_cl_vld_i) ? wr_cl_data_i[wr_cl_off*64 +: 64] :
rdata_cl[rd_hit_idx];
// overlay bytes that hit in the write buffer
generate
for(genvar k=0; k<8; k++) begin : g_rd_data
assign rd_data_o[8*k +: 8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k +: 8] : rdata[8*k +: 8];
end
endgenerate
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
logic [DCACHE_TAG_WIDTH:0] vld_tag_rdata [DCACHE_SET_ASSOC-1:0];
generate
for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : g_data_banks
// Data RAM
sram #(
.DATA_WIDTH ( ariane_pkg::DCACHE_SET_ASSOC * 64 ),
.NUM_WORDS ( serpent_cache_pkg::DCACHE_NUM_WORDS )
) i_data_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( bank_req [k] ),
.we_i ( bank_we [k] ),
.addr_i ( bank_idx [k] ),
.wdata_i ( bank_wdata [k] ),
.be_i ( bank_be [k] ),
.rdata_o ( bank_rdata [k] )
);
end
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : g_tag_srams
assign tag_rdata[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH-1:0];
assign rd_vld_bits_o[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH];
// Tag RAM
sram #(
// tag + valid bit
.DATA_WIDTH ( ariane_pkg::DCACHE_TAG_WIDTH + 1 ),
.NUM_WORDS ( serpent_cache_pkg::DCACHE_NUM_WORDS )
) i_tag_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( vld_req[i] ),
.we_i ( vld_we ),
.addr_i ( vld_addr ),
.wdata_i ( {vld_wdata[i], wr_cl_tag_i} ),
.be_i ( '1 ),
.rdata_o ( vld_tag_rdata[i] )
);
end
endgenerate
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(~rst_ni) begin
bank_idx_q <= '0;
bank_off_q <= '0;
vld_sel_q <= '0;
cmp_en_q <= '0;
end else begin
bank_idx_q <= bank_idx_d;
bank_off_q <= bank_off_d;
vld_sel_q <= vld_sel_d ;
cmp_en_q <= cmp_en_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
hit_hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) &vld_req |-> ~vld_we |=> $onehot0(rd_hit_oh_o))
else $fatal(1,"[l1 dcache] rd_hit_oh_o signal must be hot1");
word_write_hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) wr_ack_o |-> $onehot0(wr_req_i))
else $fatal(1,"[l1 dcache] wr_req_i signal must be hot1");
wbuffer_hit_hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) &vld_req |-> ~vld_we |=> $onehot0(wbuffer_hit_oh))
else $fatal(1,"[l1 dcache] wbuffer_hit_oh signal must be hot1");
// this is only used for verification!
logic vld_mirror[serpent_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[serpent_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
if(~rst_ni) begin
vld_mirror <= '{default:'0};
tag_mirror <= '{default:'0};
end else begin
for (int i = 0; i < DCACHE_SET_ASSOC; i++) begin
if(vld_req[i] & vld_we) begin
vld_mirror[vld_addr][i] <= vld_wdata[i];
tag_mirror[vld_addr][i] <= wr_cl_tag_i;
end
end
end
end
generate
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata);
end
endgenerate
tag_write_duplicate: assert property (
@(posedge clk_i) disable iff (~rst_ni) |vld_req |-> vld_we |-> ~(|tag_write_duplicate_test))
else $fatal(1,"[l1 dcache] cannot allocate a CL that is already present in the cache");
// logic tst;
// always_comb begin : p_test
// tst = tag == 44'h13;
// // for (int k=0; k<DCACHE_SET_ASSOC;k++) begin
// // tst |= tag_rdata[k] == 44'h96;
// // end
// tst &= bank_idx_d == 64'h0C;
// tst &= |wr_cl_we_i;
// end
`endif
//pragma translate_on
endmodule // serpent_dcache_mem

View file

@ -0,0 +1,504 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: miss controller for serpent dcache. Note that the current assumption
// is that the port with the highest index issues writes instead of reads.
import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_dcache_missunit #(
parameter logic [DCACHE_ID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs
parameter int unsigned NumPorts = 3 // number of miss ports
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// cache management, signals from/to core
input logic enable_i, // from CSR
input logic flush_i, // flush request, this waits for pending tx (write, read) to finish and will clear the cache
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
// local cache management signals
input logic wbuffer_empty_i,
output logic cache_en_o, // local cache enable signal
// AMO interface
input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o,
// miss handling interface (ld, ptw, wbuffer)
input logic [NumPorts-1:0] miss_req_i,
output logic [NumPorts-1:0] miss_ack_o,
input logic [NumPorts-1:0] miss_nc_i,
input logic [NumPorts-1:0] miss_we_i,
input logic [NumPorts-1:0][63:0] miss_wdata_i,
input logic [NumPorts-1:0][63:0] miss_paddr_i,
input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i,
input logic [NumPorts-1:0][2:0] miss_size_i,
input logic [NumPorts-1:0][DCACHE_ID_WIDTH-1:0] miss_id_i, // used as transaction ID
// signals that the request collided with a pending read
output logic [NumPorts-1:0] miss_replay_o,
// signals response from memory
output logic [NumPorts-1:0] miss_rtrn_vld_o,
output logic [DCACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads
// from writebuffer
input logic [DCACHE_MAX_TX-1:0][63:0] tx_paddr_i, // used to check for address collisions with read operations
input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations
// write interface to cache memory
output logic wr_cl_vld_o, // writes a full cacheline
output logic wr_cl_nc_o, // writes a full cacheline
output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline
output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o,
output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o,
output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o,
output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o,
// memory interface
input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output dcache_req_t mem_data_o
);
// controller FSM
typedef enum logic[2:0] {IDLE, DRAIN, AMO, FLUSH, STORE_WAIT, LOAD_WAIT, AMO_WAIT} state_t;
state_t state_d, state_q;
// MSHR for reads
typedef struct packed {
logic [63:0] paddr ;
logic [2:0] size ;
logic [DCACHE_SET_ASSOC-1:0] vld_bits;
logic [DCACHE_ID_WIDTH-1:0] id ;
logic nc ;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way;
logic [$clog2(NumPorts)-1:0] miss_port_idx;
} mshr_t;
mshr_t mshr_d, mshr_q;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way, inv_way, rnd_way;
logic mshr_vld_d, mshr_vld_q, mshr_vld_q1;
logic mshr_allocate;
logic update_lfsr, all_ways_valid;
logic enable_d, enable_q;
logic flush_ack_d, flush_ack_q;
logic flush_en, flush_done;
logic mask_reads, lock_reqs;
logic amo_sel, miss_is_write;
logic [63:0] amo_data, tmp_paddr;
logic [$clog2(NumPorts)-1:0] miss_port_idx;
logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q;
logic [NumPorts-1:0] miss_req_masked_d, miss_req_masked_q;
logic inv_vld, inv_vld_all, cl_write_en;
logic load_ack, store_ack, amo_ack;
logic [NumPorts-1:0] mshr_rdrd_collision_d, mshr_rdrd_collision_q;
logic [NumPorts-1:0] mshr_rdrd_collision;
logic tx_rdwr_collision, mshr_rdwr_collision;
///////////////////////////////////////////////////////
// input arbitration and general control sigs
///////////////////////////////////////////////////////
assign cache_en_o = enable_q;
assign cnt_d = (flush_en) ? cnt_q + 1 : '0;
assign flush_done = (cnt_q == serpent_cache_pkg::DCACHE_NUM_WORDS-1);
assign miss_req_masked_d = ( lock_reqs ) ? miss_req_masked_q :
( mask_reads ) ? miss_we_i & miss_req_i : miss_req_i;
assign miss_is_write = miss_we_i[miss_port_idx];
// read port arbiter
lzc #(
.WIDTH ( NumPorts )
) i_lzc_reqs (
.in_i ( miss_req_masked_d ),
.cnt_o ( miss_port_idx ),
.empty_o ( )
);
always_comb begin : p_ack
miss_ack_o = '0;
miss_ack_o[miss_port_idx] = mem_data_ack_i & mem_data_req_o;
end
///////////////////////////////////////////////////////
// MSHR and way replacement logic (only for read ops)
///////////////////////////////////////////////////////
// find invalid cache line
lzc #(
.WIDTH ( ariane_pkg::DCACHE_SET_ASSOC )
) i_lzc_inv (
.in_i ( ~miss_vld_bits_i[miss_port_idx] ),
.cnt_o ( inv_way ),
.empty_o ( all_ways_valid )
);
// generate random cacheline index
lfsr_8bit #(
.WIDTH ( ariane_pkg::DCACHE_SET_ASSOC )
) i_lfsr_inv (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( update_lfsr ),
.refill_way_oh ( ),
.refill_way_bin ( rnd_way )
);
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign mshr_d.size = (mshr_allocate) ? miss_size_i [miss_port_idx] : mshr_q.size;
assign mshr_d.paddr = (mshr_allocate) ? miss_paddr_i [miss_port_idx] : mshr_q.paddr;
assign mshr_d.vld_bits = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits;
assign mshr_d.id = (mshr_allocate) ? miss_id_i [miss_port_idx] : mshr_q.id;
assign mshr_d.nc = (mshr_allocate) ? miss_nc_i [miss_port_idx] : mshr_q.nc;
assign mshr_d.repl_way = (mshr_allocate) ? repl_way : mshr_q.repl_way;
assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx;
// currently we only have one outstanding read TX, hence an incoming load clears the MSHR
assign mshr_vld_d = (mshr_allocate) ? 1'b1 :
(load_ack) ? 1'b0 :
mshr_vld_q;
assign miss_o = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0;
generate
for(genvar k=0; k<NumPorts; k++) begin
assign mshr_rdrd_collision[k] = (mshr_q.paddr[63:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][63:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1);
assign mshr_rdrd_collision_d[k] = (~miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k];
end
endgenerate
// read/write collision, stalls the corresponding request
// write collides with MSHR
assign mshr_rdwr_collision = (mshr_q.paddr[63:DCACHE_OFFSET_WIDTH] == miss_paddr_i[NumPorts-1][63:DCACHE_OFFSET_WIDTH]) && mshr_vld_q;
// read collides with inflight TX
always_comb begin : p_tx_coll
tx_rdwr_collision = 1'b0;
for(int k=0; k<DCACHE_MAX_TX; k++) begin
tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][63:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][63:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k];
end
end
///////////////////////////////////////////////////////
// to memory
///////////////////////////////////////////////////////
// if size = 32bit word, select appropriate offset, replicate for openpiton...
assign amo_data = (amo_req_i.size==2'b10) ? {amo_req_i.operand_b[amo_req_i.operand_a[2]*32 +: 32],
amo_req_i.operand_b[amo_req_i.operand_a[2]*32 +: 32]} :
amo_req_i.operand_b;
// always sign extend 32bit values
assign amo_resp_o.result = (amo_req_i.size==2'b10) ? {{32{mem_rtrn_i.data[amo_req_i.operand_a[2]*32 + 31]}}, mem_rtrn_i.data[amo_req_i.operand_a[2]*32 +: 32]} :
mem_rtrn_i.data[63:0];
// outgoing memory requests (AMOs are always uncached)
assign mem_data_o.tid = (amo_sel) ? AmoTxId : miss_id_i[miss_port_idx];
assign mem_data_o.nc = (amo_sel) ? 1'b1 : miss_nc_i[miss_port_idx];
assign mem_data_o.way = (amo_sel) ? '0 : repl_way;
assign mem_data_o.data = (amo_sel) ? amo_data : miss_wdata_i[miss_port_idx];
assign mem_data_o.size = (amo_sel) ? amo_req_i.size : miss_size_i [miss_port_idx];
assign mem_data_o.amo_op = (amo_sel) ? amo_req_i.amo_op : AMO_NONE;
assign tmp_paddr = (amo_sel) ? amo_req_i.operand_a : miss_paddr_i[miss_port_idx];
assign mem_data_o.paddr = serpent_cache_pkg::paddrSizeAlign(tmp_paddr, mem_data_o.size);
///////////////////////////////////////////////////////
// responses from memory
///////////////////////////////////////////////////////
// incoming responses
always_comb begin : p_rtrn_logic
load_ack = 1'b0;
store_ack = 1'b0;
amo_ack = 1'b0;
inv_vld = 1'b0;
inv_vld_all = 1'b0;
miss_rtrn_vld_o ='0;
if(mem_rtrn_vld_i) begin
unique case (mem_rtrn_i.rtype)
DCACHE_LOAD_ACK: begin
load_ack = 1'b1;
miss_rtrn_vld_o[mshr_q.miss_port_idx] = 1'b1;
end
DCACHE_STORE_ACK: begin
store_ack = 1'b1;
miss_rtrn_vld_o[NumPorts-1] = 1'b1;
end
DCACHE_ATOMIC_ACK: begin
amo_ack = 1'b1;
end
DCACHE_INV_REQ: begin
inv_vld = mem_rtrn_i.inv.vld | mem_rtrn_i.inv.all;
inv_vld_all = mem_rtrn_i.inv.all;
end
// TODO:
// DCACHE_INT_REQ: begin
// end
default : begin
end
endcase
end
end
// to write buffer
assign miss_rtrn_id_o = mem_rtrn_i.tid;
///////////////////////////////////////////////////////
// writes to cache memory
///////////////////////////////////////////////////////
// cacheline write port
assign wr_cl_nc_o = mshr_q.nc;
assign wr_cl_vld_o = load_ack | |wr_cl_we_o;
assign wr_cl_we_o = ( flush_en ) ? '1 :
( inv_vld_all ) ? '1 :
( inv_vld ) ? dcache_way_bin2oh(mem_rtrn_i.inv.way) :
( cl_write_en ) ? dcache_way_bin2oh(mshr_q.repl_way) :
'0;
assign wr_vld_bits_o = ( flush_en ) ? '0 :
( inv_vld ) ? '0 :
( cl_write_en ) ? dcache_way_bin2oh(mshr_q.repl_way) :
'0;
assign wr_cl_idx_o = ( flush_en ) ? cnt_q :
( inv_vld ) ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] :
mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_cl_data_o = mem_rtrn_i.data;
assign wr_cl_data_be_o = ( cl_write_en ) ? '1 : '0;// we only write complete cachelines into the memory
// only NC responses write to the cache
assign cl_write_en = load_ack & ~mshr_q.nc;
///////////////////////////////////////////////////////
// main control logic for generating tx
///////////////////////////////////////////////////////
always_comb begin : p_fsm
// default assignment
state_d = state_q;
flush_ack_o = 1'b0;
mem_data_o.rtype = DCACHE_LOAD_REQ;
mem_data_req_o = 1'b0;
amo_resp_o.ack = 1'b0;
miss_replay_o = '0;
// disabling cache is possible anytime, enabling goes via flush
enable_d = enable_q & enable_i;
flush_ack_d = flush_ack_q;
flush_en = 1'b0;
amo_sel = 1'b0;
update_lfsr = 1'b0;
mshr_allocate = 1'b0;
lock_reqs = 1'b0;
mask_reads = mshr_vld_q;
// interfaces
unique case (state_q)
//////////////////////////////////
// wait for misses / amo ops
IDLE: begin
if(flush_i | (enable_i & ~enable_q)) begin
if(wbuffer_empty_i && ~mshr_vld_q) begin
flush_ack_d = flush_i;
state_d = FLUSH;
end else begin
state_d = DRAIN;
end
end else if(amo_req_i.req) begin
if(wbuffer_empty_i && ~mshr_vld_q) begin
state_d = AMO;
end else begin
state_d = DRAIN;
end
// we've got a miss to handle
end else if(|miss_req_masked_d) begin
// this is a write miss, just pass through (but check whether write collides with MSHR)
if(miss_is_write) begin
// stall in case this write collides with the MSHR address
if(~mshr_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
if(~mem_data_ack_i) begin
state_d = STORE_WAIT;
end
end
// this is a read miss, can only allocate 1 MSHR
// in case of a load_ack we can accept a new miss, since the MSHR is being cleared
end else if(~mshr_vld_q | load_ack) begin
// replay the read request in case the address has collided with MSHR during the time the request was pending
// i.e., the cache state may have been updated in the mean time due to a refill at the same CL address
if(mshr_rdrd_collision_d[miss_port_idx]) begin
miss_replay_o[miss_port_idx] = 1'b1;
// stall in case this CL address overlaps with a write TX that is in flight
end else if(~tx_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_LOAD_REQ;
update_lfsr = all_ways_valid & mem_data_ack_i;// need to evict a random way
mshr_allocate = mem_data_ack_i;
if(~mem_data_ack_i) begin
state_d = LOAD_WAIT;
end
end
end
end
end
//////////////////////////////////
// wait until this request is acked
STORE_WAIT: begin
lock_reqs = 1'b1;
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
if(mem_data_ack_i) begin
state_d = IDLE;
end
end
//////////////////////////////////
// wait until this request is acked
LOAD_WAIT: begin
lock_reqs = 1'b1;
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_LOAD_REQ;
if(mem_data_ack_i) begin
update_lfsr = all_ways_valid;// need to evict a random way
mshr_allocate = 1'b1;;
state_d = IDLE;
end
end
//////////////////////////////////
// only handle stores, do not accept new read requests
// wait until MSHR is cleared and wbuffer is empty
DRAIN: begin
mask_reads = 1'b1;
// these are writes, check whether they collide with MSHR
if(|miss_req_masked_d && ~mshr_rdwr_collision) begin
mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ;
end
if(wbuffer_empty_i && ~mshr_vld_q) begin
state_d = IDLE;
end
end
//////////////////////////////////
// flush the cache
FLUSH: begin
// internal flush signal
flush_en = 1'b1;
if(flush_done) begin
state_d = IDLE;
flush_ack_o = flush_ack_q;
flush_ack_d = 1'b0;
enable_d = enable_i;
end
end
//////////////////////////////////
// send out amo op request
AMO: begin
mem_data_o.rtype = DCACHE_ATOMIC_REQ;
mem_data_req_o = 1'b1;
amo_sel = 1'b1;
if(mem_data_ack_i) begin
state_d = AMO_WAIT;
end
end
//////////////////////////////////
// block and wait until AMO OP returns
AMO_WAIT: begin
amo_sel = 1'b1;
if(amo_ack) begin
amo_resp_o.ack = 1'b1;
state_d = IDLE;
end
end
//////////////////////////////////
default: begin
// we should never get here
state_d = IDLE;
end
endcase // state_q
end
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(~rst_ni) begin
state_q <= FLUSH;
cnt_q <= '0;
enable_q <= '0;
flush_ack_q <= '0;
mshr_vld_q <= '0;
mshr_vld_q1 <= '0;
mshr_q <= '0;
mshr_rdrd_collision_q <= '0;
miss_req_masked_q <= '0;
end else begin
state_q <= state_d;
cnt_q <= cnt_d;
enable_q <= enable_d;
flush_ack_q <= flush_ack_d;
mshr_vld_q <= mshr_vld_d;
mshr_vld_q1 <= mshr_vld_q;
mshr_q <= mshr_d;
mshr_rdrd_collision_q <= mshr_rdrd_collision_d;
miss_req_masked_q <= miss_req_masked_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
nc_response : assert property (
@(posedge clk_i) disable iff (~rst_ni) mshr_vld_q |-> mshr_q.nc |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.nc)
else $fatal(1,"[l1 dcache missunit] NC load response implies NC load response");
read_tid : assert property (
@(posedge clk_i) disable iff (~rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id)
else $fatal(1,"[l1 dcache missunit] TID of load response doesn't match");
read_ports : assert property (
@(posedge clk_i) disable iff (~rst_ni) |miss_req_i[NumPorts-2:0] |-> miss_we_i[NumPorts-2:0] == 0)
else $fatal(1,"[l1 dcache missunit] only last port can issue write requests");
write_port : assert property (
@(posedge clk_i) disable iff (~rst_ni) miss_req_i[NumPorts-1] |-> miss_we_i[NumPorts-1])
else $fatal(1,"[l1 dcache missunit] last port can only issue write requests");
initial begin
// assert wrong parameterizations
assert (NumPorts>=2)
else $fatal(1,"[l1 dcache missunit] at least two ports are required (one read port, one write port)");
end
`endif
//pragma translate_on
endmodule // serpent_dcache_missunit

View file

@ -0,0 +1,550 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 13.09.2018
// Description: coalescing write buffer for serpent dcache
//
// A couple of notes:
//
// 1) the write buffer behaves as a fully-associative cache, and is therefore coalescing.
// this cache is used by the cache readout logic to forward data to the load unit.
//
// each byte can be in the following states (valid/dirty/txblock):
//
// 0/0/0: invalid -> free entry in the buffer
// 1/1/0: valid and dirty, Byte is hence not part of TX in-flight
// 1/0/1: valid and not dirty, Byte is part of a TX in-flight
// 1/1/1: valid and, part of tx and dirty. this means that the byte has been
// overwritten while in TX and needs to be retransmitted once the write of that byte returns.
// 1/0/0: this would represent a clean state, but is never reached in the wbuffer in the current implementation.
// this is because when a TX returns, and the byte is in state [1/0/1], it is written to cache if needed and
// its state is immediately cleared to 0/x/x.
//
// this state is used to distinguish between bytes that have been written and not
// yet sent to the memory subsystem, and bytes that are part of a transaction.
//
// 2) further, each word in the write buffer has a cache states (checked, hit_oh)
//
// checked == 0: unknown cache state
// checked == 1: cache state has been looked up, valid way is stored in "hit_oh"
//
// cache invalidations/refills affecting a particular word will clear its word state to 0,
// so another lookup has to be done. note that these lookups are triggered as soon as there is
// a valid word with checked == 0 in the write buffer.
//
// 3) returning write ACKs trigger a cache update if the word is present in the cache, and evict that
// word from the write buffer. if the word is not allocated to the cache, it is just evicted from the write buffer.
// if the word cache state is VOID, the pipeline is stalled until it is clear whether that word is in the cache or not.
//
// 4) we handle NC writes using the writebuffer circuitry. upon an NC request, the writebuffer will first be drained.
// then, only the NC word is written into the write buffer and no further write requests are acknowledged until that
// word has been evicted from the write buffer.
import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_dcache_wbuffer #(
parameter logic [63:0] CachedAddrBeg = 64'h00_8000_0000, // begin of cached region
parameter logic [63:0] CachedAddrEnd = 64'h80_0000_0000 // end of cached region
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic cache_en_i, // writes are treated as NC if disabled
output logic empty_o, // asserted if no data is present in write buffer
// core request ports
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// interface to miss handler
input logic miss_ack_i,
output logic [63:0] miss_paddr_o,
output logic miss_req_o,
output logic miss_we_o, // always 1 here
output logic [63:0] miss_wdata_o,
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0)
output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, //
output logic [DCACHE_ID_WIDTH-1:0] miss_id_o, // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1)
// write responses from memory
input logic miss_rtrn_vld_i,
input logic [DCACHE_ID_WIDTH-1:0] miss_rtrn_id_i, // transaction ID to clear
// cache read interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to 1 here as we do not have to read the data arrays
input logic rd_ack_i,
input logic [63:0] rd_data_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i,
// cacheline writes
input logic wr_cl_vld_i,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
// cache word write interface
output logic [DCACHE_SET_ASSOC-1:0] wr_req_o,
input logic wr_ack_i,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o,
output logic [63:0] wr_data_o,
output logic [7:0] wr_data_be_o,
// to forwarding logic and miss unit
output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o,
output logic [DCACHE_MAX_TX-1:0][63:0] tx_paddr_o, // used to check for address collisions with read operations
output logic [DCACHE_MAX_TX-1:0] tx_vld_o
);
tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q;
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q;
logic [DCACHE_WBUF_DEPTH-1:0] valid;
logic [DCACHE_WBUF_DEPTH-1:0] dirty;
logic [DCACHE_WBUF_DEPTH-1:0] tocheck;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit;
logic [DCACHE_WBUF_DEPTH-1:0][7:0] bdirty;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr;
logic [DCACHE_ID_WIDTH-1:0] tx_id, rtrn_id;
logic [2:0] bdirty_off;
logic [7:0] tx_be;
logic [63:0] wr_paddr, rd_paddr;
logic [DCACHE_TAG_WIDTH-1:0] rd_tag_d, rd_tag_q;
logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_d, rd_hit_oh_q;
logic check_en_d, check_en_q, check_en_q1;
logic full, dirty_rd_en, rdy;
logic rtrn_empty, evict;
logic nc_pending_d, nc_pending_q, addr_is_nc;
logic wbuffer_wren;
logic free_tx_slots;
logic wr_cl_vld_q, wr_cl_vld_d;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_q, wr_cl_idx_d;
logic [63:0] debug_paddr [DCACHE_WBUF_DEPTH-1:0];
///////////////////////////////////////////////////////
// misc
///////////////////////////////////////////////////////
assign miss_nc_o = nc_pending_q;
assign addr_is_nc = (req_port_i.address_tag < (CachedAddrBeg>>DCACHE_INDEX_WIDTH)) ||
(req_port_i.address_tag >= (CachedAddrEnd>>DCACHE_INDEX_WIDTH)) ||
(!cache_en_i);
assign miss_we_o = 1'b1;
assign miss_vld_bits_o = '0;
assign wbuffer_data_o = wbuffer_q;
generate
for(genvar k=0; k<DCACHE_MAX_TX;k++) begin
assign tx_vld_o[k] = tx_stat_q[k].vld;
assign tx_paddr_o[k] = wbuffer_q[tx_stat_q[k].ptr].wtag<<3;
end
endgenerate
///////////////////////////////////////////////////////
// openpiton does not understand byte enable sigs
// need to convert to the four cases:
// 00: byte
// 01: halfword
// 10: word
// 11: dword
// non-contiguous writes need to be serialized!
// e.g. merged dwords with BE like this: 8'b01001100
///////////////////////////////////////////////////////
// get byte offset
lzc #(
.WIDTH ( 8 )
) i_vld_bdirty (
.in_i ( bdirty[dirty_ptr] ),
.cnt_o ( bdirty_off ),
.empty_o ( )
);
// add the offset to the physical base address of this buffer entry
assign miss_paddr_o = {wbuffer_q[dirty_ptr].wtag, bdirty_off};
assign miss_id_o = tx_id;
// is there any dirty word to be transmitted, and is there a free TX slot?
assign miss_req_o = (|dirty) && free_tx_slots;
// get size of aligned words, and the corresponding byte enables
// note: openpiton can only handle aligned offsets + size, and hence
// we have to split unaligned data into multiple transfers (see toSize64)
// e.g. if we have the following valid bytes: 0011_1001 -> TX0: 0000_0001, TX1: 0000_1000, TX2: 0011_0000
assign miss_size_o = toSize64(bdirty[dirty_ptr]);
// replicate transfers shorter than a dword
assign miss_wdata_o = repData64(wbuffer_q[dirty_ptr].data,
bdirty_off,
miss_size_o[1:0]);
assign tx_be = toByteEnable8(bdirty_off,
miss_size_o[1:0]);
///////////////////////////////////////////////////////
// TX status registers and ID counters
///////////////////////////////////////////////////////
// TODO: todo: make this fall through if timing permits it
fifo_v2 #(
.FALL_THROUGH ( 1'b0 ),
.DATA_WIDTH ( $clog2(DCACHE_MAX_TX) ),
.DEPTH ( DCACHE_MAX_TX )
) i_rtrn_id_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( ),
.empty_o ( rtrn_empty ),
.alm_full_o ( ),
.alm_empty_o( ),
.data_i ( miss_rtrn_id_i ),
.push_i ( miss_rtrn_vld_i ),
.data_o ( rtrn_id ),
.pop_i ( evict )
);
always_comb begin : p_tx_stat
tx_stat_d = tx_stat_q;
evict = 1'b0;
wr_req_o = '0;
// clear entry if it is clear whether it can be pushed to the cache or not
if((~rtrn_empty) && wbuffer_q[rtrn_ptr].checked) begin
// check if data is clean and can be written, otherwise skip
// check if CL is present, otherwise skip
if((|wr_data_be_o) && (|wbuffer_q[rtrn_ptr].hit_oh)) begin
wr_req_o = wbuffer_q[rtrn_ptr].hit_oh;
if(wr_ack_i) begin
evict = 1'b1;
tx_stat_d[rtrn_id].vld = 1'b0;
end
end else begin
evict = 1'b1;
tx_stat_d[rtrn_id].vld = 1'b0;
end
end
// allocate a new entry
if(dirty_rd_en) begin
tx_stat_d[tx_id].vld = 1'b1;
tx_stat_d[tx_id].ptr = dirty_ptr;
tx_stat_d[tx_id].be = tx_be;
end
end
assign free_tx_slots = |(~tx_vld_o);
// get free TX slot
rrarbiter #(
.NUM_REQ ( DCACHE_MAX_TX ),
.LOCK_IN ( 1 )// lock the decision, once request is asserted
) i_tx_id_rr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.en_i ( dirty_rd_en ),
.req_i ( ~tx_vld_o ),
.ack_o ( ),
.vld_o ( ),
.idx_o ( tx_id )
);
///////////////////////////////////////////////////////
// cache readout & update
///////////////////////////////////////////////////////
assign rd_tag_d = rd_paddr>>DCACHE_INDEX_WIDTH;
// trigger TAG readout in cache
assign rd_tag_only_o = 1'b1;
assign rd_paddr = wbuffer_q[check_ptr_d].wtag<<3;
assign rd_req_o = |tocheck;
assign rd_tag_o = rd_tag_q;//delay by one cycle
assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign check_en_d = rd_req_o & rd_ack_i;
// cache update port
assign rtrn_ptr = tx_stat_q[rtrn_id].ptr;
// if we wrote into a word while it was in-flight, we cannot write the dirty bytes to the cache
// when the TX returns
assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty);
assign wr_paddr = wbuffer_q[rtrn_ptr].wtag<<3;
assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_data_o = wbuffer_q[rtrn_ptr].data;
///////////////////////////////////////////////////////
// readout of status bits, index calculation
///////////////////////////////////////////////////////
assign wr_cl_vld_d = wr_cl_vld_i;
assign wr_cl_idx_d = wr_cl_idx_i;
generate
for(genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin
// only for debug, will be pruned
assign debug_paddr[k] = wbuffer_q[k].wtag << 3;
// dirty bytes that are ready for transmission.
// note that we cannot retransmit a word that is already in-flight
// since the multiple transactions might overtake each other in the memory system!
assign bdirty[k] = (|wbuffer_q[k].txblock) ? '0 : wbuffer_q[k].dirty & wbuffer_q[k].valid;
assign dirty[k] = |bdirty[k];
assign valid[k] = |wbuffer_q[k].valid;
assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:3]});
// checks if an invalidation/cache refill hits a particular word
// note: an invalidation can hit multiple words!
// need to respect previous cycle, too, since we add a cycle of latency to the rd_hit_oh_i signal...
assign inval_hit[k] = (wr_cl_vld_d & valid[k] & (wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-1:0]<<3 == wr_cl_idx_d<<DCACHE_OFFSET_WIDTH)) |
(wr_cl_vld_q & valid[k] & (wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-1:0]<<3 == wr_cl_idx_q<<DCACHE_OFFSET_WIDTH));
// these word have to be looked up in the cache
assign tocheck[k] = (~wbuffer_q[k].checked) & valid[k];
end
endgenerate
assign wr_ptr = (|wbuffer_hit_oh) ? hit_ptr : next_ptr;
assign empty_o = ~(|valid);
assign rdy = (|wbuffer_hit_oh) | (~full);
// next free entry in the buffer
lzc #(
.WIDTH ( DCACHE_WBUF_DEPTH )
) i_vld_lzc (
.in_i ( ~valid ),
.cnt_o ( next_ptr ),
.empty_o ( full )
);
// get index of hit
lzc #(
.WIDTH ( DCACHE_WBUF_DEPTH )
) i_hit_lzc (
.in_i ( wbuffer_hit_oh ),
.cnt_o ( hit_ptr ),
.empty_o ( )
);
// next dirty word to serve
rrarbiter #(
.NUM_REQ ( DCACHE_WBUF_DEPTH ),
.LOCK_IN ( 1 )// lock the decision, once request is asserted
) i_dirty_rr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.en_i ( dirty_rd_en ),
.req_i ( dirty ),
.ack_o ( ),
.vld_o ( ),
.idx_o ( dirty_ptr )
);
// next word to lookup in the cache
rrarbiter #(
.NUM_REQ ( DCACHE_WBUF_DEPTH )
) i_clean_rr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.en_i ( check_en_d ),
.req_i ( tocheck ),
.ack_o ( ),
.vld_o ( ),
.idx_o ( check_ptr_d )
);
///////////////////////////////////////////////////////
// update logic
///////////////////////////////////////////////////////
assign req_port_o.data_rvalid = '0;
assign req_port_o.data_rdata = '0;
assign rd_hit_oh_d = rd_hit_oh_i;
// TODO: rewrite and separate into MUXES and write strobe logic
always_comb begin : p_buffer
wbuffer_d = wbuffer_q;
nc_pending_d = nc_pending_q;
dirty_rd_en = 1'b0;
req_port_o.data_gnt = 1'b0;
wbuffer_wren = 1'b0;
// TAG lookup returns, mark corresponding word
if(check_en_q1) begin
if(wbuffer_q[check_ptr_q1].valid) begin
wbuffer_d[check_ptr_q1].checked = 1'b1;
wbuffer_d[check_ptr_q1].hit_oh = rd_hit_oh_q;
end
end
// if an invalidation or cache line refill comes in and hits on the write buffer,
// we have to discard our knowledge of the corresponding cacheline state
for(int k=0; k<DCACHE_WBUF_DEPTH; k++) begin
if(inval_hit[k]) begin
wbuffer_d[k].checked = 1'b0;
end
end
// once TX write response came back, we can clear the TX block. if it was not dirty, we
// can completely evict it - otherwise we have to leave it there for retransmission
if(evict) begin
for(int k=0; k<8; k++) begin
if(tx_stat_q[rtrn_id].be[k]) begin
wbuffer_d[rtrn_ptr].txblock[k] = 1'b0;
if(~wbuffer_q[rtrn_ptr].dirty[k]) begin
wbuffer_d[rtrn_ptr].valid[k] = 1'b0;
// NOTE: this is not strictly needed, but makes it much
// easier to debug, since no invalid data remains in the buffer
// wbuffer_d[rtrn_ptr].data[k*8 +:8] = '0;
end
end
end
// if all bytes are evicted, clear the cache status flag
if(wbuffer_d[rtrn_ptr].valid == 0) begin
wbuffer_d[rtrn_ptr].checked = 1'b0;
end
end
// mark bytes sent out to the memory system
if(miss_req_o & miss_ack_i) begin
dirty_rd_en = 1'b1;
for(int k=0; k<8; k++) begin
if(tx_be[k]) begin
wbuffer_d[dirty_ptr].dirty[k] = 1'b0;
wbuffer_d[dirty_ptr].txblock[k] = 1'b1;
end
end
end
// write new word into the buffer
if(req_port_i.data_req & rdy) begin
// in case we have an NC address, need to drain the buffer first
// in case we are serving an NC address, we block until it is written to memory
if(empty_o | ~(addr_is_nc | nc_pending_q)) begin
wbuffer_wren = 1'b1;
req_port_o.data_gnt = 1'b1;
nc_pending_d = addr_is_nc;
wbuffer_d[wr_ptr].checked = 1'b0;
wbuffer_d[wr_ptr].wtag = {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:3]};
// mark bytes as dirty
for(int k=0; k<8; k++) begin
if(req_port_i.data_be[k]) begin
wbuffer_d[wr_ptr].valid[k] = 1'b1;
wbuffer_d[wr_ptr].dirty[k] = 1'b1;
wbuffer_d[wr_ptr].data[k*8 +: 8] = req_port_i.data_wdata[k*8 +: 8];
end
end
end
end
end
///////////////////////////////////////////////////////
// ff's
///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(~rst_ni) begin
wbuffer_q <= '{default: '0};
tx_stat_q <= '{default: '0};
nc_pending_q <= '0;
check_ptr_q <= '0;
check_ptr_q1 <= '0;
check_en_q <= '0;
check_en_q1 <= '0;
rd_tag_q <= '0;
rd_hit_oh_q <= '0;
wr_cl_vld_q <= '0;
wr_cl_idx_q <= '0;
end else begin
wbuffer_q <= wbuffer_d;
tx_stat_q <= tx_stat_d;
nc_pending_q <= nc_pending_d;
check_ptr_q <= check_ptr_d;
check_ptr_q1 <= check_ptr_q;
check_en_q <= check_en_d;
check_en_q1 <= check_en_q;
rd_tag_q <= rd_tag_d;
rd_hit_oh_q <= rd_hit_oh_d;
wr_cl_vld_q <= wr_cl_vld_d;
wr_cl_idx_q <= wr_cl_idx_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) req_port_i.data_req |-> $onehot0(wbuffer_hit_oh))
else $fatal(1,"[l1 dcache wbuffer] wbuffer_hit_oh signal must be hot1");
tx_status: assert property (
@(posedge clk_i) disable iff (~rst_ni) evict & miss_ack_i & miss_req_o |-> (tx_id != rtrn_id))
else $fatal(1,"[l1 dcache wbuffer] cannot allocate and clear same tx slot id in the same cycle");
tx_valid0: assert property (
@(posedge clk_i) disable iff (~rst_ni) evict |-> tx_stat_q[rtrn_id].vld)
else $fatal(1,"[l1 dcache wbuffer] evicting invalid transaction slot");
tx_valid1: assert property (
@(posedge clk_i) disable iff (~rst_ni) evict |-> |wbuffer_q[rtrn_ptr].valid)
else $fatal(1,"[l1 dcache wbuffer] wbuffer entry corresponding to this transaction is invalid");
write_full: assert property (
@(posedge clk_i) disable iff (~rst_ni) req_port_i.data_req |-> req_port_o.data_gnt |-> ((~full) | (|wbuffer_hit_oh)))
else $fatal(1,"[l1 dcache wbuffer] cannot write if full or no hit");
unused0: assert property (
@(posedge clk_i) disable iff (~rst_ni) ~req_port_i.tag_valid)
else $fatal(1,"[l1 dcache wbuffer] req_port_i.tag_valid should not be asserted");
unused1: assert property (
@(posedge clk_i) disable iff (~rst_ni) ~req_port_i.kill_req)
else $fatal(1,"[l1 dcache wbuffer] req_port_i.kill_req should not be asserted");
generate
for(genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin
for(genvar j=0; j<8; j++) begin
byteStates: assert property (
@(posedge clk_i) disable iff (~rst_ni) {wbuffer_q[k].valid[j], wbuffer_q[k].dirty[j], wbuffer_q[k].txblock[j]} inside {3'b000, 3'b110, 3'b101, 3'b111} )
else $fatal(1,"[l1 dcache wbuffer] byte %02d of wbuffer entry %02d has invalid state: valid=%01b, dirty=%01b, txblock=%01b",
j,k,
wbuffer_q[k].valid[j],
wbuffer_q[k].dirty[j],
wbuffer_q[k].txblock[j]);
end
end
endgenerate
`endif
//pragma translate_on
endmodule // serpent_dcache_wbuffer

View file

@ -0,0 +1,552 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Instruction cache that is compatible with openpiton.
//
// Some notes:
//
// 1) refills always have the size of one cache line, except for accesses to the I/O region, which is mapped
// to the top half of the physical address space (bit 39 = 1). the data width of the interface has the width
// of one cache line, and hence the ifills can be transferred in a single cycle. note that the ifills must be
// consumed unconditionally.
//
// 2) instruction fetches are always assumed to be aligned to 32bit (lower 2 bits are ignored)
//
// 3) NC accesses to I/O space are expected to return 32bit from memory.
//
import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_icache #(
parameter logic [DCACHE_ID_WIDTH-1:0] RdTxId = 0, // ID to be used for read transactions
parameter bit Axi64BitCompliant = 1'b0, // set this to 1 when using in conjunction with 64bit AXI bus adapter
parameter logic [63:0] CachedAddrBeg = 64'h00_8000_0000, // begin of cached region
parameter logic [63:0] CachedAddrEnd = 64'h80_0000_0000 // end of cached region
) (
input logic clk_i,
input logic rst_ni,
input logic flush_i, // flush the icache, flush and kill have to be asserted together
input logic en_i, // enable icache
output logic miss_o, // to performance counter
// address translation requests
input icache_areq_i_t areq_i,
output icache_areq_o_t areq_o,
// data requests
input icache_dreq_i_t dreq_i,
output icache_dreq_o_t dreq_o,
// refill port
input logic mem_rtrn_vld_i,
input icache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o,
input logic mem_data_ack_i,
output icache_req_t mem_data_o
);
// signals
logic cache_en_d, cache_en_q; // cache is enabled
logic [63:0] vaddr_d, vaddr_q;
logic paddr_is_nc; // asserted if physical address is non-cacheable
logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare
logic cache_rden; // triggers cache lookup
logic cache_wren; // triggers write to cacheline
logic cmp_en_d, cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal.
logic flush_d, flush_q; // used to register and signal pending flushes
// replacement strategy
logic update_lfsr; // shift the LFSR
logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered
logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement
logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace
logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot)
logic all_ways_valid; // we need to switch repl strategy since all are valid
// invalidations / flushing
logic inv_en; // incoming invalidations
logic flush_en, flush_done; // used to flush cache entries
logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries
// mem arrays
logic cl_we; // write enable to memory array
logic [ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array
logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array
logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem
logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache
logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0]cl_sel; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable
logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs
logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
// cpmtroller FSM
typedef enum logic[2:0] {FLUSH, IDLE, READ, MISS, TLB_MISS, KILL_ATRANS, KILL_MISS} state_t;
state_t state_d, state_q;
///////////////////////////////////////////////////////
// address -> cl_index mapping, interface plumbing
///////////////////////////////////////////////////////
// extract tag from physical address, check if NC
assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q;
// noncacheable if request goes to I/O space, or if cache is disabled
assign paddr_is_nc = (cl_tag_d < (CachedAddrBeg>>ICACHE_INDEX_WIDTH)) ||
(cl_tag_d >= (CachedAddrEnd>>ICACHE_INDEX_WIDTH)) ||
(!cache_en_q);
// pass exception through
assign dreq_o.ex = areq_i.fetch_exception;
// latch this in case we have to stall later on
// make sure this is 32bit aligned
assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q;
assign areq_o.fetch_vaddr = {vaddr_q>>2, 2'b0};
// split virtual address into index and offset to address cache arrays
assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH];
generate
if(Axi64BitCompliant)begin
// if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} :
( paddr_is_nc & mem_data_req_o ) ? cl_offset_q[2]<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case
cl_offset_q;
// request word address instead of cl address in case of NC access
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 32bit
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
end
if(!Axi64BitCompliant)begin
// icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not.
// since the piton cache system replicates the data, we can always index the full CL
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} :
cl_offset_q;
// request word address instead of cl address in case of NC access
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
end
endgenerate
assign mem_data_o.tid = RdTxId;
assign mem_data_o.nc = paddr_is_nc;
// way that is being replaced
assign mem_data_o.way = repl_way;
assign dreq_o.vaddr = vaddr_q;
///////////////////////////////////////////////////////
// main control logic
///////////////////////////////////////////////////////
always_comb begin : p_fsm
// default assignment
state_d = state_q;
cache_en_d = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush
flush_en = 1'b0;
cmp_en_d = 1'b0;
cache_rden = 1'b0;
cache_wren = 1'b0;
inv_en = 1'b0;
flush_d = flush_q | flush_i; // register incoming flush
// interfaces
dreq_o.ready = 1'b0;
areq_o.fetch_req = 1'b0;
dreq_o.valid = 1'b0;
mem_data_req_o = 1'b0;
// performance counter
miss_o = 1'b0;
// handle invalidations unconditionally
// note: invald are mutually exclusive with
// ifills, since both arrive over the same IF
// however, we need to make sure below that we
// do not trigger a cache readout at the same time...
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_INV_REQ) begin
inv_en = 1'b1;
end
unique case (state_q)
//////////////////////////////////
// this clears all valid bits
FLUSH: begin
flush_en = 1'b1;
if (flush_done) begin
state_d = IDLE;
flush_d = 1'b0;
// if the cache was not enabled set this
cache_en_d = en_i;
end
end
//////////////////////////////////
// wait for an incoming request
IDLE: begin
// only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q;
// handle pending flushes, or perform cache clear upon enable
if (flush_d | (en_i & ~cache_en_q)) begin
state_d = FLUSH;
// wait for incoming requests
end else begin
// mem requests are for sure invals here
if (~mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1;
// we have a new request
if (dreq_i.req) begin
cache_rden = 1'b1;
state_d = READ;
end
end
if (dreq_i.kill_s1) begin
state_d = IDLE;
end
end
end
//////////////////////////////////
// check whether we have a hit
// in case the cache is disabled,
// or in case the address is NC, we
// reuse the miss mechanism to handle
// the request
READ: begin
state_d = TLB_MISS;
areq_o.fetch_req = '1;
// only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q;
// readout speculatively
cache_rden = cache_en_q;
if (areq_i.fetch_valid) begin
// check if we have to flush
if (flush_d) begin
state_d = IDLE;
// we have a hit or an exception output valid result
end else if ((|cl_hit & cache_en_q) | areq_i.fetch_exception.valid) begin
dreq_o.valid = ~dreq_i.kill_s2;// just don't output in this case
state_d = IDLE;
// we can accept another request
// and stay here, but only if no inval is coming in
// note: we are not expecting ifill return packets here...
if (~mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1;
if (dreq_i.req) begin
state_d = READ;
end
end
// if a request is being killed at this stage,
// we have to bail out and wait for the address translation to complete
if (dreq_i.kill_s1) begin
state_d = IDLE;
end
// we have a miss / NC transaction
end else if (dreq_i.kill_s2) begin
state_d = IDLE;
end else begin
cmp_en_d = 1'b0;
// only count this as a miss if the cache is enabled, and
// the address is cacheable
// send out ifill request
mem_data_req_o = 1'b1;
if (mem_data_ack_i) begin
miss_o = (~paddr_is_nc);
state_d = MISS;
end
end
// bail out if this request is being killed (and we missed on the TLB)
end else if (dreq_i.kill_s2 | flush_d) begin
state_d = KILL_ATRANS;
end
end
//////////////////////////////////
// wait until the memory transaction
// returns. do not write to memory
// if the nc bit is set.
TLB_MISS: begin
areq_o.fetch_req = '1;
// only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q;
// readout speculatively
cache_rden = cache_en_q;
if (areq_i.fetch_valid) begin
// check if we have to kill this request
if (dreq_i.kill_s2 | flush_d) begin
state_d = IDLE;
// check whether we got an exception
end else if (areq_i.fetch_exception.valid) begin
dreq_o.valid = 1'b1;
state_d = IDLE;
// re-trigger cache readout for tag comparison and cache line selection
// but if we got an invalidation, we have to wait another cycle
end else if (~mem_rtrn_vld_i) begin
state_d = READ;
end
// bail out if this request is being killed
end else if (dreq_i.kill_s2 | flush_d) begin
state_d = KILL_ATRANS;
end
end
//////////////////////////////////
// wait until the memory transaction
// returns. do not write to memory
// if the nc bit is set.
MISS: begin
// note: this is mutually exclusive with ICACHE_INV_REQ,
// so we do not have to check for invals here
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
state_d = IDLE;
// only return data if request is not being killed
if (~(dreq_i.kill_s2 | flush_d)) begin
dreq_o.valid = 1'b1;
// only write to cache if this address is cacheable
cache_wren = ~paddr_is_nc;
end
// bail out if this request is being killed
end else if (dreq_i.kill_s2 | flush_d) begin
state_d = KILL_MISS;
end
end
//////////////////////////////////
// killed address translation,
// wait until paddr is valid, and go
// back to idle
KILL_ATRANS: begin
areq_o.fetch_req = '1;
if (areq_i.fetch_valid) begin
state_d = IDLE;
end
end
//////////////////////////////////
// killed miss,
// wait until memory responds and
// go back to idle
KILL_MISS: begin
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
state_d = IDLE;
end
end
default: begin
// we should never get here
state_d = FLUSH;
end
endcase // state_q
end
///////////////////////////////////////////////////////
// valid bit invalidation and replacement strategy
///////////////////////////////////////////////////////
// note: it cannot happen that we get an invalidation + a cl replacement
// in the same cycle as these requests arrive via the same interface
// flushes take precedence over invalidations (it is ok if we ignore
// the inval since the cache is cleared anyway)
assign flush_cnt_d = (flush_done) ? '0 :
(flush_en) ? flush_cnt_q + 1 :
flush_cnt_q;
assign flush_done = (flush_cnt_q==(ICACHE_NUM_WORDS-1));
// invalidation/clearing address
// flushing takes precedence over invals
assign vld_addr = (flush_en) ? flush_cnt_q :
(inv_en) ? mem_rtrn_i.inv.idx[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] :
cl_index;
assign vld_req = (flush_en | cache_rden) ? '1 :
(mem_rtrn_i.inv.all & inv_en) ? '1 :
(mem_rtrn_i.inv.vld & inv_en) ? icache_way_bin2oh(mem_rtrn_i.inv.way) :
repl_way_oh_q;
assign vld_wdata = (cache_wren) ? '1 : '0;
assign vld_we = (cache_wren | inv_en | flush_en);
// assign vld_req = (vld_we | cache_rden);
// chose random replacement if all are valid
assign update_lfsr = cache_wren & all_ways_valid;
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q;
// enable signals for memory arrays
assign cl_req = (cache_rden) ? '1 :
(cache_wren) ? repl_way_oh_q :
'0;
assign cl_we = cache_wren;
// find invalid cache line
lzc #(
.WIDTH ( ICACHE_SET_ASSOC )
) i_lzc (
.in_i ( ~vld_rdata ),
.cnt_o ( inv_way ),
.empty_o ( all_ways_valid )
);
// generate random cacheline index
lfsr_8bit #(
.WIDTH (ICACHE_SET_ASSOC)
) i_lfsr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( update_lfsr ),
.refill_way_oh ( ),
.refill_way_bin ( rnd_way )
);
///////////////////////////////////////////////////////
// tag comparison, hit generation
///////////////////////////////////////////////////////
logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx;
generate
for (genvar i=0;i<ICACHE_SET_ASSOC;i++) begin : g_tag_cmpsel
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
assign cl_sel[i] = cl_rdata[i][{cl_offset_q,3'b0} +: FETCH_WIDTH];
end
endgenerate
lzc #(
.WIDTH ( ICACHE_SET_ASSOC )
) i_lzc_hit (
.in_i ( cl_hit ),
.cnt_o ( hit_idx ),
.empty_o ( )
);
assign dreq_o.data = ( cmp_en_q ) ? cl_sel[hit_idx] :
mem_rtrn_i.data[{cl_offset_q,3'b0} +: FETCH_WIDTH];
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata [ICACHE_SET_ASSOC-1:0];
generate
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : g_sram
// Tag RAM
sram #(
// tag + valid bit
.DATA_WIDTH ( ICACHE_TAG_WIDTH+1 ),
.NUM_WORDS ( ICACHE_NUM_WORDS )
) tag_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( vld_req[i] ),
.we_i ( vld_we ),
.addr_i ( vld_addr ),
// we can always use the saved tag here since it takes a
// couple of cycle until we write to the cache upon a miss
.wdata_i ( {vld_wdata[i], cl_tag_q} ),
.be_i ( '1 ),
.rdata_o ( cl_tag_valid_rdata[i] )
);
assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0];
assign vld_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH];
// Data RAM
sram #(
.DATA_WIDTH ( ICACHE_LINE_WIDTH ),
.NUM_WORDS ( ICACHE_NUM_WORDS )
) data_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( cl_req[i] ),
.we_i ( cl_we ),
.addr_i ( cl_index ),
.wdata_i ( mem_rtrn_i.data ),
.be_i ( '1 ),
.rdata_o ( cl_rdata[i] )
);
end
endgenerate
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(~rst_ni) begin
cl_tag_q <= '0;
flush_cnt_q <= '0;
vaddr_q <= '0;
cmp_en_q <= '0;
cache_en_q <= '0;
flush_q <= '0;
state_q <= IDLE;
cl_offset_q <= '0;
repl_way_oh_q <= '0;
end else begin
cl_tag_q <= cl_tag_d;
flush_cnt_q <= flush_cnt_d;
vaddr_q <= vaddr_d;
cmp_en_q <= cmp_en_d;
cache_en_q <= cache_en_d;
flush_q <= flush_d;
state_q <= state_d;
cl_offset_q <= cl_offset_d;
repl_way_oh_q <= repl_way_oh_d;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
noncacheable0: assert property (
@(posedge clk_i) disable iff (~rst_ni) paddr_is_nc |-> mem_rtrn_vld_i |-> state_q != KILL_MISS |-> mem_rtrn_i.rtype == ICACHE_IFILL_ACK |-> mem_rtrn_i.nc)
else $fatal(1,"[l1 icache] NC paddr implies nc ifill");
noncacheable1: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_rtrn_vld_i |-> state_q != KILL_MISS |-> mem_rtrn_i.f4b |-> mem_rtrn_i.nc)
else $fatal(1,"[l1 icache] 4b ifill implies NC");
repl_inval0: assert property (
@(posedge clk_i) disable iff (~rst_ni) cache_wren |-> ~(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld))
else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
repl_inval1: assert property (
@(posedge clk_i) disable iff (~rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> ~cache_wren)
else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
invalid_state: assert property (
@(posedge clk_i) disable iff (~rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, TLB_MISS, KILL_ATRANS, KILL_MISS}))
else $fatal(1,"[l1 icache] fsm reached an invalid state");
hot1: assert property (
@(posedge clk_i) disable iff (~rst_ni) (~inv_en) |=> cmp_en_q |-> $onehot0(cl_hit))
else $fatal(1,"[l1 icache] cl_hit signal must be hot1");
initial begin
// assert wrong parameterizations
assert (ICACHE_INDEX_WIDTH<=12)
else $fatal(1,"[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages");
end
`endif
//pragma translate_on
endmodule // serpent_icache

View file

@ -0,0 +1,410 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner (schaffner@iis.ee.ethz.ch), ETH Zurich
// Date: 08.08.2018
// Description: adapter module to connect the L1D$ and L1I$ to the native
// interface of the OpenPiton L1.5 cache.
//
// A couple of notes:
//
// 1) the L15 has been designed for an OpenSparc T1 core with 2 threads and can serve only
// 1 ld and rd request per thread. Ariane has only one hart, but the LSU can issue several write
// requests to optimize bandwidth. hence, we reuse the threadid field to issue and track multiple
// requests (up to 2 in this case).
//
// 2) the CSM (clumped shared memory = coherence domain restriction in OpenPiton)
// feature is currently not supported by Ariane.
//
// 3) some features like blockinitstore, prefetch, ECC errors are not used (see interface below)
//
// 4) the arbiter can store upt to two outgoing requests per cache. incoming responses are passed
// through one streaming register, and need to be consumed unconditionally by the caches.
//
// 5) The L1.5 protocol is closely related to the CPX bus of openSPARC, see also [1,2]
//
// 6) Note on transaction data and size: if a store packet is less than 64 bits, then
// the field is filled with copies of the data. in case of an interrupt vector,
// an 18bit interrupt vector is expected.
//
// 7) L1I$ refill requests always have precedence over L1D$ requests.
//
// 8) L1I$ fill requests are always complete cache lines at the moment
//
// 9) the adapter converts from little endian (Ariane) to big endian (openpiton), and vice versa.
//
// 10) L1I$ requests to I.O space (bit39 of address = 1'b1) always return 32bit nc data
//
// Refs: [1] OpenSPARC T1 Microarchitecture Specification
// https://www.oracle.com/technetwork/systems/opensparc/t1-01-opensparct1-micro-arch-1538959.html
// [2] OpenPiton Microarchitecture Specification
// https://parallel.princeton.edu/openpiton/docs/micro_arch.pdf
//
import ariane_pkg::*;
import serpent_cache_pkg::*;
module serpent_l15_adapter #(
parameter logic [63:0] CachedAddrBeg = 64'h00_8000_0000, // begin of cached region
parameter logic [63:0] CachedAddrEnd = 64'h80_0000_0000, // end of cached region
parameter bit SwapEndianess = 1 ,
parameter bit PitonRemapIO = 1 // for OpenPiton
) (
input logic clk_i,
input logic rst_ni,
// icache
input logic icache_data_req_i,
output logic icache_data_ack_o,
input icache_req_t icache_data_i,
// returning packets must be consumed immediately
output logic icache_rtrn_vld_o,
output icache_rtrn_t icache_rtrn_o,
// dcache
input logic dcache_data_req_i,
output logic dcache_data_ack_o,
input dcache_req_t dcache_data_i,
// returning packets must be consumed immediately
output logic dcache_rtrn_vld_o,
output dcache_rtrn_t dcache_rtrn_o,
// TODO: interrupt interface
// L15
output l15_req_t l15_req_o,
input l15_rtrn_t l15_rtrn_i
);
// request path
icache_req_t icache_data;
logic icache_data_full, icache_data_empty;
dcache_req_t dcache_data;
logic dcache_data_full, dcache_data_empty;
logic [1:0] arb_req, arb_ack;
logic arb_idx;
// return path
logic rtrn_fifo_empty, rtrn_fifo_full, rtrn_fifo_pop;
l15_rtrn_t rtrn_fifo_data;
///////////////////////////////////////////////////////
// request path to L15
///////////////////////////////////////////////////////
// relevant l15 signals
// l15_req_t l15_req_o.l15_rqtype; // see below for encoding
// logic l15_req_o.l15_nc; // non-cacheable bit
// logic [2:0] l15_req_o.l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
// logic [L15_TID_WIDTH-1:0] l15_req_o.l15_threadid; // currently 0 or 1
// logic l15_req_o.l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment
// logic [L15_WAY_WIDTH-1:0] l15_req_o.l15_l1rplway; // way to replace
// logic [39:0] l15_req_o.l15_address; // physical address
// logic [63:0] l15_req_o.l15_data; // word to write
// logic [63:0] l15_req_o.l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests)
// logic [L15_TLB_CSM_WIDTH-1:0] l15_req_o.l15_csm_data;
logic [63:0] tmp_paddr;
assign icache_data_ack_o = icache_data_req_i & ~icache_data_full;
assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full;
// data mux
assign l15_req_o.l15_nc = (arb_idx) ? dcache_data.nc : icache_data.nc;
// icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not.
assign l15_req_o.l15_size = (arb_idx) ? dcache_data.size :
(icache_data.nc) ? 3'b010 : 3'b111;
assign l15_req_o.l15_threadid = (arb_idx) ? dcache_data.tid : icache_data.tid;
assign l15_req_o.l15_prefetch = '0; // unused in openpiton
assign l15_req_o.l15_invalidate_cacheline = '0; // unused by Ariane as L1 has no ECC at the moment
assign l15_req_o.l15_blockstore = '0; // unused in openpiton
assign l15_req_o.l15_blockinitstore = '0; // unused in openpiton
assign l15_req_o.l15_l1rplway = (arb_idx) ? dcache_data.way : icache_data.way;
// assign tmp_paddr = (arb_idx) ? dcache_data.paddr :
// icache_data.paddr;
// assign l15_req_o.l15_address = ((tmp_paddr < CachedAddrBeg) && PitonRemapIO) ? {25'b1, tmp_paddr[38:0]} : tmp_paddr;
assign l15_req_o.l15_address = (arb_idx) ? dcache_data.paddr :
icache_data.paddr;
assign l15_req_o.l15_data_next_entry = '0; // unused in Ariane (only used for CAS atomic requests)
assign l15_req_o.l15_csm_data = '0; // unused in Ariane (only used for coherence domain restriction features)
assign l15_req_o.l15_amo_op = dcache_data.amo_op;
// openpiton is big endian
generate
if (SwapEndianess) assign l15_req_o.l15_data = swendian64(dcache_data.data);
else assign l15_req_o.l15_data = dcache_data.data;
endgenerate
// arbiter
rrarbiter #(
.NUM_REQ(2),
.LOCK_IN(1)
) i_rrarbiter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i( '0 ),
.en_i ( l15_rtrn_i.l15_ack ),
.req_i ( arb_req ),
.ack_o ( arb_ack ),
.vld_o ( ),
.idx_o ( arb_idx )
);
assign arb_req = {~dcache_data_empty, ~icache_data_empty};
assign l15_req_o.l15_val = (|arb_req);// & ~header_ack_q;
// encode packet type
always_comb begin : p_req
l15_req_o.l15_rqtype = L15_LOAD_RQ;
unique case (arb_idx)
0: begin// icache
l15_req_o.l15_rqtype = L15_IMISS_RQ;
end
1: begin
unique case (dcache_data.rtype)
DCACHE_STORE_REQ: begin
l15_req_o.l15_rqtype = L15_STORE_RQ;
end
DCACHE_LOAD_REQ: begin
l15_req_o.l15_rqtype = L15_LOAD_RQ;
end
DCACHE_ATOMIC_REQ: begin
l15_req_o.l15_rqtype = L15_ATOMIC_RQ;
end
// DCACHE_INT_REQ: begin
// //TODO interrupt requests
// end
default: begin
;
end
endcase // dcache_data.rtype
end
default: begin
;
end
endcase
end // p_req
fifo_v2 #(
.dtype ( icache_req_t ),
.DEPTH ( ADAPTER_REQ_FIFO_DEPTH )
) i_icache_data_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( icache_data_full ),
.empty_o ( icache_data_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( icache_data_i ),
.push_i ( icache_data_ack_o ),
.data_o ( icache_data ),
.pop_i ( arb_ack[0] )
);
fifo_v2 #(
.dtype ( dcache_req_t ),
.DEPTH ( ADAPTER_REQ_FIFO_DEPTH )
) i_dcache_data_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( dcache_data_full ),
.empty_o ( dcache_data_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( dcache_data_i ),
.push_i ( dcache_data_ack_o ),
.data_o ( dcache_data ),
.pop_i ( arb_ack[1] )
);
///////////////////////////////////////////////////////
// return path from L15
///////////////////////////////////////////////////////
// relevant l15 signals
// l15_rtrn_i.l15_returntype; // see below for encoding
// l15_rtrn_i.l15_noncacheable; // non-cacheable bit
// l15_rtrn_i.l15_atomic; // asserted in load return and store ack pack
// l15_rtrn_i.l15_threadid; // used as transaction ID
// l15_rtrn_i.l15_f4b; // 4byte instruction fill from I/O space (nc).
// l15_rtrn_i.l15_data_0; // used for both caches
// l15_rtrn_i.l15_data_1; // used for both caches
// l15_rtrn_i.l15_data_2; // currently only used for I$
// l15_rtrn_i.l15_data_3; // currently only used for I$
// l15_rtrn_i.l15_inval_icache_all_way; // invalidate all ways
// l15_rtrn_i.l15_inval_address_15_4; // invalidate selected cacheline
// l15_rtrn_i.l15_inval_dcache_inval; // invalidate selected cacheline and way
// l15_rtrn_i.l15_inval_way; // way to invalidate
// acknowledge if we have space to hold this packet
assign l15_req_o.l15_req_ack = l15_rtrn_i.l15_val & ~rtrn_fifo_full;
// packets have to be consumed immediately
assign rtrn_fifo_pop = ~rtrn_fifo_empty;
// decode packet type
always_comb begin : p_rtrn_logic
icache_rtrn_o.rtype = ICACHE_IFILL_ACK;
dcache_rtrn_o.rtype = DCACHE_LOAD_ACK;
icache_rtrn_vld_o = 1'b0;
dcache_rtrn_vld_o = 1'b0;
if(~rtrn_fifo_empty) begin
unique case (rtrn_fifo_data.l15_returntype)
L15_LOAD_RET: begin
dcache_rtrn_o.rtype = DCACHE_LOAD_ACK;
dcache_rtrn_vld_o = 1'b1;
end
L15_ST_ACK: begin
dcache_rtrn_o.rtype = DCACHE_STORE_ACK;
dcache_rtrn_vld_o = 1'b1;
end
L15_IFILL_RET: begin
icache_rtrn_o.rtype = ICACHE_IFILL_ACK;
icache_rtrn_vld_o = 1'b1;
end
L15_EVICT_REQ: begin
icache_rtrn_o.rtype = ICACHE_INV_REQ;
dcache_rtrn_o.rtype = DCACHE_INV_REQ;
icache_rtrn_vld_o = icache_rtrn_o.inv.vld | icache_rtrn_o.inv.all;
dcache_rtrn_vld_o = dcache_rtrn_o.inv.vld | dcache_rtrn_o.inv.all;
end
L15_CPX_RESTYPE_ATOMIC_RES: begin
dcache_rtrn_o.rtype = DCACHE_ATOMIC_ACK;
dcache_rtrn_vld_o = 1'b1;
end
// L15_INT_RET: begin
// TODO: implement this
// dcache_rtrn_o.reqType = DCACHE_INT_ACK;
// end
default: begin
;
end
endcase // rtrn_fifo_data.l15_returntype
end
end
// openpiton is big endian
generate
if (SwapEndianess) begin
assign dcache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_1),
swendian64(rtrn_fifo_data.l15_data_0) };
assign icache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_3),
swendian64(rtrn_fifo_data.l15_data_2),
swendian64(rtrn_fifo_data.l15_data_1),
swendian64(rtrn_fifo_data.l15_data_0) };
end else begin
assign dcache_rtrn_o.data = { rtrn_fifo_data.l15_data_1,
rtrn_fifo_data.l15_data_0 };
assign icache_rtrn_o.data = { rtrn_fifo_data.l15_data_3,
rtrn_fifo_data.l15_data_2,
rtrn_fifo_data.l15_data_1,
rtrn_fifo_data.l15_data_0 };
end
endgenerate
// fifo signals
assign icache_rtrn_o.tid = rtrn_fifo_data.l15_threadid;
assign icache_rtrn_o.nc = rtrn_fifo_data.l15_noncacheable;
assign icache_rtrn_o.f4b = rtrn_fifo_data.l15_f4b;
assign dcache_rtrn_o.tid = rtrn_fifo_data.l15_threadid;
assign dcache_rtrn_o.nc = rtrn_fifo_data.l15_noncacheable;
// invalidation signal mapping
assign icache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};
assign icache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way;
assign icache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_icache_inval;
assign icache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_icache_all_way;
assign dcache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};
assign dcache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way;
assign dcache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_dcache_inval;
assign dcache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_dcache_all_way;
fifo_v2 #(
.dtype ( l15_rtrn_t ),
.DEPTH ( ADAPTER_RTRN_FIFO_DEPTH )
) i_rtrn_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( rtrn_fifo_full ),
.empty_o ( rtrn_fifo_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( l15_rtrn_i ),
.push_i ( l15_req_o.l15_req_ack ),
.data_o ( rtrn_fifo_data ),
.pop_i ( rtrn_fifo_pop )
);
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
invalidations: assert property (
@(posedge clk_i) disable iff (~rst_ni) l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype == L15_EVICT_REQ |-> (l15_rtrn_i.l15_inval_icache_inval |
l15_rtrn_i.l15_inval_dcache_inval |
l15_rtrn_i.l15_inval_icache_all_way |
l15_rtrn_i.l15_inval_dcache_all_way))
else $fatal(1,"[l15_adapter] got invalidation package with zero invalidation flags");
blockstore_o: assert property (
@(posedge clk_i) disable iff (~rst_ni) l15_req_o.l15_val |-> l15_req_o.l15_rqtype == L15_STORE_RQ |-> !(l15_req_o.l15_blockstore || l15_req_o.l15_blockinitstore))
else $fatal(1,"[l15_adapter] blockstores are not supported (out)");
blockstore_i: assert property (
@(posedge clk_i) disable iff (~rst_ni) l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_ST_ACK, L15_ST_ACK} |-> !l15_rtrn_i.l15_blockinitstore)
else $fatal(1,"[l15_adapter] blockstores are not supported (in)");
unsuported_rtrn_types: assert property (
@(posedge clk_i) disable iff (~rst_ni) (l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_LOAD_RET, L15_ST_ACK, L15_IFILL_RET, L15_EVICT_REQ, L15_CPX_RESTYPE_ATOMIC_RES}))
else $warning("[l15_adapter] return type %X04 is not (yet) supported by l15 adapter.", l15_rtrn_i.l15_returntype);
amo_type: assert property (
@(posedge clk_i) disable iff (~rst_ni) (l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_CPX_RESTYPE_ATOMIC_RES} |-> l15_rtrn_i.l15_atomic ))
else $fatal(1,"[l15_adapter] l15_atomic must be asserted when the return type is an ATOMIC_RES");
initial begin
// assert wrong parameterizations
assert (L15_SET_ASSOC >= ICACHE_SET_ASSOC)
else $fatal(1,"[l15_adapter] number of icache ways must be smaller or equal the number of L15 ways");
// assert wrong parameterizations
assert (L15_SET_ASSOC >= DCACHE_SET_ASSOC)
else $fatal(1,"[l15_adapter] number of dcache ways must be smaller or equal the number of L15 ways");
// invalidation address returned by L1.5 is 16 bit
assert (16 >= DCACHE_INDEX_WIDTH && 16 >= ICACHE_INDEX_WIDTH)
else $fatal(1,"[l15_adapter] maximum number of index bits supported by L1.5 is 16");
end
`endif
//pragma translate_on
endmodule // serpent_l15_adapter

View file

@ -42,6 +42,7 @@ module std_cache_subsystem #(
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
output logic wbuffer_empty_o, // statically set to 1, as there is no wbuffer in this cache system
// Request ports
input dcache_req_i_t [2:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [2:0] dcache_req_ports_o, // to/from LSU
@ -50,6 +51,8 @@ module std_cache_subsystem #(
input ariane_axi::resp_t axi_resp_i
);
assign wbuffer_empty_o = 1'b1;
ariane_axi::req_t axi_req_icache;
ariane_axi::resp_t axi_resp_icache;
ariane_axi::req_t axi_req_bypass;
@ -273,9 +276,11 @@ module std_cache_subsystem #(
.oup_ready_i ( {axi_req_icache.b_ready, axi_req_bypass.b_ready, axi_req_data.b_ready} )
);
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR

View file

@ -21,7 +21,8 @@ module axi_lite_interface #(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
AXI_BUS.Slave slave,
input ariane_axi::req_t axi_req_i,
output ariane_axi::resp_t axi_resp_o,
output logic [AXI_ADDR_WIDTH-1:0] address_o,
output logic en_o, // transaction is valid
@ -38,17 +39,17 @@ module axi_lite_interface #(
logic [AXI_ADDR_WIDTH-1:0] address_n, address_q;
// pass through read data on the read data channel
assign slave.r_data = data_i;
assign axi_resp_o.r.data = data_i;
// send back the transaction id we've latched
assign slave.r_id = trans_id_q;
assign slave.b_id = trans_id_q;
assign axi_resp_o.r.id = trans_id_q;
assign axi_resp_o.b.id = trans_id_q;
// set r_last to one as defined by the AXI4 - Lite standard
assign slave.r_last = 1'b1;
assign axi_resp_o.r.last = 1'b1;
// we do not support any errors so set response flag to all zeros
assign slave.b_resp = 2'b0;
assign slave.r_resp = 2'b0;
assign axi_resp_o.b.resp = 2'b0;
assign axi_resp_o.r.resp = 2'b0;
// output data which we want to write to the slave
assign data_o = slave.w_data;
assign data_o = axi_req_i.w.data;
// ------------------------
// AXI4-Lite State Machine
// ------------------------
@ -59,14 +60,12 @@ module axi_lite_interface #(
trans_id_n = trans_id_q;
// we'll answer a write request only if we got address and data
slave.aw_ready = 1'b0;
slave.w_ready = 1'b0;
slave.b_valid = 1'b0;
slave.b_user = 1'b0;
axi_resp_o.aw_ready = 1'b0;
axi_resp_o.w_ready = 1'b0;
axi_resp_o.b_valid = 1'b0;
slave.ar_ready = 1'b1;
slave.r_valid = 1'b0;
slave.r_user = 1'b0;
axi_resp_o.ar_ready = 1'b1;
axi_resp_o.r_valid = 1'b0;
address_o = '0;
we_o = 1'b0;
@ -76,24 +75,24 @@ module axi_lite_interface #(
// we are ready to accept a new request
IDLE: begin
// we've git a valid write request, we also know that we have asserted the aw_ready
if (slave.aw_valid) begin
if (axi_req_i.aw_valid) begin
slave.aw_ready = 1'b1;
axi_resp_o.aw_ready = 1'b1;
// this costs performance but the interconnect does not obey the AXI standard
NS = WRITE;
// save address
address_n = slave.aw_addr;
address_n = axi_req_i.aw.addr;
// save the transaction id for reflection
trans_id_n = slave.aw_id;
trans_id_n = axi_req_i.aw.id;
// we've got a valid read request, we also know that we have asserted the ar_ready
end else if (slave.ar_valid) begin
end else if (axi_req_i.ar_valid) begin
NS = READ;
address_n = slave.ar_addr;
address_n = axi_req_i.ar.addr;
// also request the word from the memory-like interface
address_o = slave.ar_addr;
address_o = axi_req_i.ar.addr;
// save the transaction id for reflection
trans_id_n = slave.ar_id;
trans_id_n = axi_req_i.ar.id;
end
end
@ -103,22 +102,22 @@ module axi_lite_interface #(
// enable the ram-like
en_o = 1'b1;
// we are not ready for another request here
slave.ar_ready = 1'b0;
axi_resp_o.ar_ready = 1'b0;
// further assert the correct address
address_o = address_q;
// the read is valid
slave.r_valid = 1'b1;
axi_resp_o.r_valid = 1'b1;
// check if we got a valid r_ready and go back to IDLE
if (slave.r_ready)
if (axi_req_i.r_ready)
NS = IDLE;
end
// We've got a write request at least one cycle earlier
// wait here for the data
WRITE: begin
if (slave.w_valid) begin
if (axi_req_i.w_valid) begin
// we are not ready for another request here
slave.ar_ready = 1'b0;
slave.w_ready = 1'b1;
axi_resp_o.ar_ready = 1'b0;
axi_resp_o.w_ready = 1'b1;
// use the latched address
address_o = address_q;
en_o = 1'b1;
@ -129,9 +128,9 @@ module axi_lite_interface #(
end
WRITE_B: begin
slave.b_valid = 1'b1;
axi_resp_o.b_valid = 1'b1;
// we've already performed the write here so wait for the ready signal
if (slave.b_ready)
if (axi_req_i.b_ready)
NS = IDLE;
end
default:;
@ -161,10 +160,10 @@ module axi_lite_interface #(
//pragma translate_off
`ifndef VERILATOR
// check that burst length is just one
assert property (@(posedge clk_i) slave.ar_valid |-> ((slave.ar_len == 8'b0)))
assert property (@(posedge clk_i) axi_req_i.ar_valid |-> ((axi_req_i.ar.len == 8'b0)))
else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end
// do the same for the write channel
assert property (@(posedge clk_i) slave.aw_valid |-> ((slave.aw_len == 8'b0)))
assert property (@(posedge clk_i) axi_req_i.aw_valid |-> ((axi_req_i.aw.len == 8'b0)))
else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end
`endif
//pragma translate_on

View file

@ -21,12 +21,12 @@ module clint #(
parameter int unsigned AXI_DATA_WIDTH = 64,
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned NR_CORES = 1 // Number of cores therefore also the number of timecmp registers and timer interrupts
)(
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic testmode_i,
AXI_BUS.Slave slave,
input ariane_axi::req_t axi_req_i,
output ariane_axi::resp_t axi_resp_o,
input logic rtc_i, // Real-time clock in (usually 32.768 kHz)
output logic [NR_CORES-1:0] timer_irq_o, // Timer interrupts
output logic [NR_CORES-1:0] ipi_o // software interrupt (a.k.a inter-process-interrupt)
@ -35,6 +35,9 @@ module clint #(
localparam logic [15:0] MSIP_BASE = 16'h0;
localparam logic [15:0] MTIMECMP_BASE = 16'h4000;
localparam logic [15:0] MTIME_BASE = 16'hbff8;
localparam AddrSelWidth = (NR_CORES == 1) ? 1 : $clog2(NR_CORES);
// signals from AXI 4 Lite
logic [AXI_ADDR_WIDTH-1:0] address;
logic en;
@ -60,14 +63,15 @@ module clint #(
.AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
.AXI_ID_WIDTH ( AXI_ID_WIDTH )
) axi_lite_interface_i (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.slave ( slave ),
.address_o ( address ),
.en_o ( en ),
.we_o ( we ),
.data_i ( rdata ),
.data_o ( wdata )
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.axi_req_i ( axi_req_i ),
.axi_resp_o ( axi_resp_o ),
.address_o ( address ),
.en_o ( en ),
.we_o ( we ),
.data_i ( rdata ),
.data_o ( wdata )
);
// -----------------------------
@ -86,11 +90,11 @@ module clint #(
if (en && we) begin
case (register_address) inside
[MSIP_BASE:MSIP_BASE+8*NR_CORES]: begin
msip_n[$unsigned(address[NR_CORES-1+3:3])] = wdata[0];
msip_n[$unsigned(address[AddrSelWidth-1+3:3])] = wdata[0];
end
[MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin
mtimecmp_n[$unsigned(address[NR_CORES-1+3:3])] = wdata;
mtimecmp_n[$unsigned(address[AddrSelWidth-1+3:3])] = wdata;
end
MTIME_BASE: begin
@ -108,11 +112,11 @@ module clint #(
if (en && !we) begin
case (register_address) inside
[MSIP_BASE:MSIP_BASE+8*NR_CORES]: begin
rdata = msip_q[$unsigned(address[NR_CORES-1+3:3])];
rdata = msip_q[$unsigned(address[AddrSelWidth-1+3:3])];
end
[MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin
rdata = mtimecmp_q[$unsigned(address[NR_CORES-1+3:3])];
rdata = mtimecmp_q[$unsigned(address[AddrSelWidth-1+3:3])];
end
MTIME_BASE: begin

View file

@ -139,6 +139,7 @@ module commit_stage #(
else // if the LSU buffer is not ready - do not commit, wait
commit_ack_o[0] = 1'b0;
end
// ---------
// FPU Flags
// ---------
@ -149,6 +150,7 @@ module commit_stage #(
end
end
// ---------
// CSR Logic
// ---------

@ -1 +1 @@
Subproject commit 6de11b90cde4b408adc1f27c655844c54de8080d
Subproject commit cb801849915b6bcd7af33f4aa8389c627324a31b

View file

@ -116,6 +116,7 @@ module csr_regfile #(
logic [63:0] dpc_q, dpc_d;
logic [63:0] dscratch0_q, dscratch0_d;
logic [63:0] dscratch1_q, dscratch1_d;
logic [63:0] mtvec_q, mtvec_d;
logic [63:0] medeleg_q, medeleg_d;
logic [63:0] mideleg_q, mideleg_d;
@ -186,6 +187,7 @@ module csr_regfile #(
riscv::CSR_DCSR: csr_rdata = {32'b0, dcsr_q};
riscv::CSR_DPC: csr_rdata = dpc_q;
riscv::CSR_DSCRATCH0: csr_rdata = dscratch0_q;
riscv::CSR_DSCRATCH1: csr_rdata = dscratch1_q;
// trigger module registers
riscv::CSR_TSELECT:; // not implemented
riscv::CSR_TDATA1:; // not implemented
@ -296,6 +298,7 @@ module csr_regfile #(
dcsr_d = dcsr_q;
dpc_d = dpc_q;
dscratch0_d = dscratch0_q;
dscratch1_d = dscratch1_q;
mstatus_d = mstatus_q;
// check whether we come out of reset
@ -389,6 +392,7 @@ module csr_regfile #(
end
riscv::CSR_DPC: dpc_d = csr_wdata;
riscv::CSR_DSCRATCH0: dscratch0_d = csr_wdata;
riscv::CSR_DSCRATCH1: dscratch1_d = csr_wdata;
// trigger module CSRs
riscv::CSR_TSELECT:; // not implemented
riscv::CSR_TDATA1:; // not implemented
@ -980,6 +984,7 @@ module csr_regfile #(
dcsr_q.prv <= riscv::PRIV_LVL_M;
dpc_q <= 64'b0;
dscratch0_q <= 64'b0;
dscratch1_q <= 64'b0;
// machine mode registers
mstatus_q <= 64'b0;
// set to boot address + direct mode + 4 byte offset which is the initial trap
@ -1018,6 +1023,7 @@ module csr_regfile #(
dcsr_q <= dcsr_d;
dpc_q <= dpc_d;
dscratch0_q <= dscratch0_d;
dscratch1_q <= dscratch1_d;
// machine mode registers
mstatus_q <= mstatus_d;
mtvec_rst_load_q <= 1'b0;

View file

@ -0,0 +1 @@
../../../bootrom/encoding.h

View file

@ -17,7 +17,7 @@
module dm_csrs #(
parameter int NrHarts = -1
)(
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic testmode_i,
@ -89,20 +89,43 @@ module dm_csrs #(
localparam dm::dm_csr_t ProgBufEnd = dm::dm_csr_t'((dm::ProgBuf0 + {4'b0, dm::ProgBufSize}));
logic [31:0] haltsum0, haltsum1, haltsum2, haltsum3;
// TODO(zarubaf) Need an elegant way to calculate haltsums
// remove assertions below when implemented...
assign haltsum0 = '0;
assign haltsum1 = '0;
assign haltsum2 = '0;
assign haltsum3 = '0;
for (genvar i = 0; i < 32; i++) begin
// assign haltsum0[i] = halted_i[i];
// TODO(zarubaf) Implement correct haltsum logic
// assign haltsum0[i] = halted_i[hartsel[19:5]];
// assign haltsum1[i] = (NrHarts > 32) ? &halted_i[hartsel[19:10] +: 32] : 1'b0;
// assign haltsum2[i] = (NrHarts > 1024) ? &halted_i[hartsel[19:15] +: 1024] : 1'b0;
// assign haltsum3[i] = (NrHarts > 32768) ? &halted_i[hartsel[19:19] +: 32768] : 1'b0;
logic [NrHarts/2**5 :0][31:0] halted_reshaped0;
logic [NrHarts/2**10:0][31:0] halted_reshaped1;
logic [NrHarts/2**15:0][31:0] halted_reshaped2;
logic [(NrHarts/2**10+1)*32-1:0] halted_flat1;
logic [(NrHarts/2**15+1)*32-1:0] halted_flat2;
logic [32-1:0] halted_flat3;
// haltsum0
assign halted_reshaped0 = halted_i;
assign haltsum0 = halted_reshaped0[hartsel_o[19:5]];
// haltsum1
always_comb begin : p_reduction1
halted_flat1 = '0;
for (int k=0; k<NrHarts/2**5; k++) begin
halted_flat1[k] = &halted_reshaped0[k];
end
halted_reshaped1 = halted_flat1;
haltsum1 = halted_reshaped1[hartsel_o[19:10]];
end
// haltsum2
always_comb begin : p_reduction2
halted_flat2 = '0;
for (int k=0; k<NrHarts/2**10; k++) begin
halted_flat2[k] = &halted_reshaped1[k];
end
halted_reshaped2 = halted_flat2;
haltsum2 = halted_reshaped2[hartsel_o[19:15]];
end
// haltsum3
always_comb begin : p_reduction3
halted_flat3 = '0;
for (int k=0; k<NrHarts/2**15; k++) begin
halted_flat3[k] = &halted_reshaped2[k];
end
haltsum3 = halted_flat3;
end
dm::dmstatus_t dmstatus;
dm::dmcontrol_t dmcontrol_d, dmcontrol_q;
@ -120,7 +143,7 @@ module dm_csrs #(
// because first data address starts at 0x04
logic [({3'b0, dm::DataCount} + dm::Data0 - 1):(dm::Data0)][31:0] data_d, data_q;
logic [NrHarts-1:0] selected_hart;
logic [HartSelLen-1:0] selected_hart;
// a successful response returns zero
assign dmi_resp_o.resp = dm::DTM_SUCCESS;
@ -149,25 +172,25 @@ module dm_csrs #(
// we do not support halt-on-reset sequence
dmstatus.hasresethaltreq = 1'b0;
// TODO(zarubaf) things need to change here if we implement the array mask
dmstatus.allhavereset = havereset_q[hartsel_o[HartSelLen-1:0]];
dmstatus.anyhavereset = havereset_q[hartsel_o[HartSelLen-1:0]];
dmstatus.allhavereset = havereset_q[selected_hart];
dmstatus.anyhavereset = havereset_q[selected_hart];
dmstatus.allresumeack = resumeack_i[hartsel_o[HartSelLen-1:0]];
dmstatus.anyresumeack = resumeack_i[hartsel_o[HartSelLen-1:0]];
dmstatus.allresumeack = resumeack_i[selected_hart];
dmstatus.anyresumeack = resumeack_i[selected_hart];
dmstatus.allunavail = unavailable_i[hartsel_o[HartSelLen-1:0]];
dmstatus.anyunavail = unavailable_i[hartsel_o[HartSelLen-1:0]];
dmstatus.allunavail = unavailable_i[selected_hart];
dmstatus.anyunavail = unavailable_i[selected_hart];
// as soon as we are out of the legal Hart region tell the debugger
// that there are only non-existent harts
dmstatus.allnonexistent = (hartsel_o > NrHarts[19:0] - 1) ? 1'b1 : 1'b0;
dmstatus.anynonexistent = (hartsel_o > NrHarts[19:0] - 1) ? 1'b1 : 1'b0;
dmstatus.allhalted = halted_i[hartsel_o[HartSelLen-1:0]];
dmstatus.anyhalted = halted_i[hartsel_o[HartSelLen-1:0]];
dmstatus.allhalted = halted_i[selected_hart];
dmstatus.anyhalted = halted_i[selected_hart];
dmstatus.allrunning = ~halted_i[hartsel_o[HartSelLen-1:0]];
dmstatus.anyrunning = ~halted_i[hartsel_o[HartSelLen-1:0]];
dmstatus.allrunning = ~halted_i[selected_hart];
dmstatus.anyrunning = ~halted_i[selected_hart];
// abstractcs
abstractcs = '0;
@ -436,7 +459,7 @@ module dm_csrs #(
// output multiplexer
always_comb begin
selected_hart = hartsel_o[NrHarts-1:0];
selected_hart = hartsel_o[HartSelLen-1:0];
// default assignment
haltreq_o = '0;
resumereq_o = '0;
@ -528,9 +551,6 @@ module dm_csrs #(
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////

View file

@ -137,7 +137,7 @@ module dm_sba (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req ),
.type_i ( std_cache_pkg::SINGLE_REQ ),
.type_i ( ariane_axi::SINGLE_REQ ),
.gnt_o ( gnt ),
.gnt_id_o ( ),
.addr_i ( address ),

View file

@ -23,7 +23,7 @@ module dm_top #(
parameter int AxiAddrWidth = -1,
parameter int AxiDataWidth = -1,
parameter int AxiUserWidth = -1
)(
) (
input logic clk_i, // clock
input logic rst_ni, // asynchronous reset active low, connect PoR here, not the system reset
input logic testmode_i,
@ -32,10 +32,13 @@ module dm_top #(
output logic [NrHarts-1:0] debug_req_o, // async debug request
input logic [NrHarts-1:0] unavailable_i, // communicate whether the hart is unavailable (e.g.: power down)
AXI_BUS.Slave axi_slave, // bus slave, for an execution based technique
// bus slave, for an execution based technique
input ariane_axi::req_t axi_s_req_i,
output ariane_axi::resp_t axi_s_resp_o,
// bus master, for system bus accesses
output ariane_axi::req_t axi_req_o,
input ariane_axi::resp_t axi_resp_i,
output ariane_axi::req_t axi_m_req_o,
input ariane_axi::resp_t axi_m_resp_i,
// Connection to DTM - compatible to RocketChip Debug Module
input logic dmi_rst_ni,
@ -148,8 +151,8 @@ module dm_top #(
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.dmactive_i ( dmactive_o ),
.axi_req_o,
.axi_resp_i,
.axi_req_o ( axi_m_req_o ),
.axi_resp_i ( axi_m_resp_i ),
.sbaddress_i ( sbaddress_csrs_sba ),
.sbaddress_o ( sbaddress_sba_csrs ),
.sbaddress_write_valid_i ( sbaddress_write_valid ),
@ -195,20 +198,33 @@ module dm_top #(
.rdata_o ( rdata )
);
AXI_BUS #(
.AXI_ID_WIDTH ( AxiIdWidth ),
.AXI_ADDR_WIDTH ( AxiAddrWidth ),
.AXI_DATA_WIDTH ( AxiDataWidth ),
.AXI_USER_WIDTH ( AxiUserWidth )
) slave();
axi_slave_connect_rev i_axi_slave_connect_rev (
.axi_req_i (axi_s_req_i),
.axi_resp_o(axi_s_resp_o),
.slave(slave));
axi2mem #(
.AXI_ID_WIDTH ( AxiIdWidth ),
.AXI_ADDR_WIDTH ( AxiAddrWidth ),
.AXI_DATA_WIDTH ( AxiDataWidth ),
.AXI_USER_WIDTH ( AxiUserWidth )
) i_axi2mem (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.slave ( axi_slave ),
.req_o ( req ),
.we_o ( we ),
.addr_o ( addr ),
.be_o ( be ),
.data_o ( wdata ),
.data_i ( rdata )
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.slave ( slave ),
.req_o ( req ),
.we_o ( we ),
.addr_o ( addr ),
.be_o ( be ),
.data_o ( wdata ),
.data_i ( rdata )
);
endmodule

View file

@ -249,7 +249,7 @@ module ex_stage (
assign lsu_data = lsu_valid_i ? fu_data_i : '0;
lsu lsu_i (
load_store_unit lsu_i (
.clk_i,
.rst_ni,
.flush_i,

@ -1 +1 @@
Subproject commit 00e2579173f1412f06d4eb95d6b98d0eb1cd2e94
Subproject commit 1801c5e0ea231f83e9ba5422b9a7a4feaaad879f

View file

@ -85,7 +85,6 @@ module frontend (
logic is_mispredict;
// branch-prediction which we inject into the pipeline
branchpredict_sbe_t bp_sbe;
// fetch fifo credit system
logic fifo_valid, fifo_ready, fifo_empty, fifo_pop;
logic s2_eff_kill, issue_req, s2_in_flight_d, s2_in_flight_q;
@ -386,14 +385,15 @@ module frontend (
assign icache_dreq_o.req = fifo_ready;
assign fetch_entry_valid_o = ~fifo_empty;
//pragma translate_off
`ifndef VERILATOR
fetch_fifo_credits0 : assert property (
@(posedge clk_i) disable iff (~rst_ni) (fifo_credits_q <= FETCH_FIFO_DEPTH))
else $fatal(1, "[frontend] fetch fifo credits must be <= FETCH_FIFO_DEPTH!");
else $fatal(1,"[frontend] fetch fifo credits must be <= FETCH_FIFO_DEPTH!");
initial begin
assert (FETCH_FIFO_DEPTH <= 8) else $fatal("[frontend] fetch fifo deeper than 8 not supported");
assert (FETCH_WIDTH == 32) else $fatal("[frontend] fetch width != not supported");
assert (FETCH_FIFO_DEPTH <= 8) else $fatal(1,"[frontend] fetch fifo deeper than 8 not supported");
assert (FETCH_WIDTH == 32) else $fatal(1,"[frontend] fetch width != not supported");
end
`endif
//pragma translate_on
@ -480,6 +480,7 @@ module frontend (
);
end
fifo_v2 #(
.DEPTH ( 8 ),
.dtype ( frontend_fetch_t )

View file

@ -14,7 +14,7 @@
import ariane_pkg::*;
module lsu #(
module load_store_unit #(
parameter int unsigned ASID_WIDTH = 1
)(
input logic clk_i,

View file

@ -63,9 +63,11 @@ module load_unit (
assign in_data = {lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.operator};
// output address
// we can now output the lower 12 bit as the index to the cache
assign req_port_o.address_index = lsu_ctrl_i.vaddr[11:0];
assign req_port_o.address_index = lsu_ctrl_i.vaddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
// translation from last cycle, again: control is handled in the FSM
assign req_port_o.address_tag = paddr_i[55:12];
assign req_port_o.address_tag = paddr_i[ariane_pkg::DCACHE_TAG_WIDTH +
ariane_pkg::DCACHE_INDEX_WIDTH-1 :
ariane_pkg::DCACHE_INDEX_WIDTH];
// directly output an exception
assign ex_o = ex_i;
@ -341,17 +343,20 @@ module load_unit (
end
// end result mux fast
`ifndef SYNTHESIS
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
// check invalid offsets
assert property (@(posedge clk_i) disable iff (~rst_ni)
(load_data_q.operator inside {LW, LWU}) |-> load_data_q.address_offset < 5) else $fatal ("invalid address offset used with {LW, LWU}");
assert property (@(posedge clk_i) disable iff (~rst_ni)
(load_data_q.operator inside {LH, LHU}) |-> load_data_q.address_offset < 7) else $fatal ("invalid address offset used with {LH, LHU}");
assert property (@(posedge clk_i) disable iff (~rst_ni)
(load_data_q.operator inside {LB, LBU}) |-> load_data_q.address_offset < 8) else $fatal ("invalid address offset used with {LB, LBU}");
addr_offset0: assert property (@(posedge clk_i) disable iff (~rst_ni)
valid_o |-> (load_data_q.operator inside {LW, LWU}) |-> load_data_q.address_offset < 5) else $fatal (1,"invalid address offset used with {LW, LWU}");
addr_offset1: assert property (@(posedge clk_i) disable iff (~rst_ni)
valid_o |-> (load_data_q.operator inside {LH, LHU}) |-> load_data_q.address_offset < 7) else $fatal (1,"invalid address offset used with {LH, LHU}");
addr_offset2: assert property (@(posedge clk_i) disable iff (~rst_ni)
valid_o |-> (load_data_q.operator inside {LB, LBU}) |-> load_data_q.address_offset < 8) else $fatal (1,"invalid address offset used with {LB, LBU}");
`endif
`endif
//pragma translate_on
endmodule

View file

@ -40,7 +40,7 @@ module mult (
// ---------------------
// Multiplication
// ---------------------
mul i_mul (
multiplier i_multiplier (
.clk_i,
.rst_ni,
.trans_id_i ( fu_data_i.trans_id ),

View file

@ -16,7 +16,7 @@
import ariane_pkg::*;
module mul (
module multiplier (
input logic clk_i,
input logic rst_ni,
input logic [TRANS_ID_BITS-1:0] trans_id_i,

View file

@ -28,6 +28,8 @@
//-------------------------------------------------------------------------------
module plic #(
parameter int ADDR_WIDTH = 32, // can be either 32 or 64 bits (don't use 64bit at the moment as this causes memory map issues)
parameter int DATA_WIDTH = 32, // can be either 32 or 64 bits (don't use 64bit at the moment as this causes memory map issues)
parameter int ID_BITWIDTH = -1, // width of the gateway identifiers
parameter int PARAMETER_BITWIDTH = -1, // width of the internal parameter e.g. priorities
parameter int NUM_TARGETS = -1, // number of target slices
@ -39,8 +41,6 @@ module plic #(
output logic [NUM_TARGETS-1:0] eip_targets_o,
REG_BUS.in external_bus_io
);
localparam int ADDR_WIDTH = 32;
localparam int DATA_WIDTH = 32;
// declare all local variables
// gateway arrays always go from NUM_SOURCES to 1 because gateway ids start at 1
logic gateway_irq_pendings [NUM_SOURCES]; //for pending irqs of the gateways

View file

@ -99,8 +99,8 @@ module ptw #(
assign ptw_active_o = (state_q != IDLE);
assign walking_instr_o = is_instr_ptw_q;
// directly output the correct physical address
assign req_port_o.address_index = ptw_pptr_q[11:0];
assign req_port_o.address_tag = ptw_pptr_q[55:12];
assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0];
assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH];
// we are never going to kill this request
assign req_port_o.kill_req = '0;
// we are never going to write with the HPTW

View file

@ -65,6 +65,7 @@ module store_buffer (
logic [$clog2(DEPTH_COMMIT)-1:0] commit_read_pointer_n, commit_read_pointer_q;
logic [$clog2(DEPTH_COMMIT)-1:0] commit_write_pointer_n, commit_write_pointer_q;
// ----------------------------------------
// Speculative Queue - Core Interface
// ----------------------------------------
@ -119,18 +120,22 @@ module store_buffer (
// ----------------------------------------
// Commit Queue - Memory Interface
// ----------------------------------------
// we will never kill a request in the store buffer since we already know that the translation is valid
// e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault
assign req_port_o.kill_req = 1'b0;
assign req_port_o.data_we = 1'b1; // we will always write in the store queue
assign req_port_o.tag_valid = 1'b0;
// those signals can directly be output to the memory
assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[11:0];
assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
// if we got a new request we already saved the tag from the previous cycle
assign req_port_o.address_tag = commit_queue_q[commit_read_pointer_q].address[55:12];
assign req_port_o.tag_valid = 1'b0;
assign req_port_o.address_tag = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_TAG_WIDTH +
ariane_pkg::DCACHE_INDEX_WIDTH-1 :
ariane_pkg::DCACHE_INDEX_WIDTH];
assign req_port_o.data_wdata = commit_queue_q[commit_read_pointer_q].data;
assign req_port_o.data_be = commit_queue_q[commit_read_pointer_q].be;
assign req_port_o.data_size = commit_queue_q[commit_read_pointer_q].data_size;
// we will never kill a request in the store buffer since we already know that the translation is valid
// e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault
assign req_port_o.kill_req = 1'b0;
assign req_port_o.data_we = 1'b1; // we will always write in the store queue
always_comb begin : store_if
automatic logic [DEPTH_COMMIT:0] commit_status_cnt;
@ -189,6 +194,7 @@ module store_buffer (
// page offsets are virtually and physically the same
always_comb begin : address_checker
page_offset_matches_o = 1'b0;
// check if the LSBs are identical and the entry is valid
for (int unsigned i = 0; i < DEPTH_COMMIT; i++) begin
// Check if the page offset matches and whether the entry is valid, for the commit queue
@ -197,6 +203,7 @@ module store_buffer (
break;
end
end
for (int unsigned i = 0; i < DEPTH_SPEC; i++) begin
// do the same for the speculative queue
if ((page_offset_i[11:3] == speculative_queue_q[i].address[11:3]) && speculative_queue_q[i].valid) begin
@ -212,31 +219,41 @@ module store_buffer (
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
always_ff @(posedge clk_i or negedge rst_ni) begin : p_spec
if (~rst_ni) begin
// initialize the queues
speculative_queue_q <= '{default: 0};
commit_queue_q <= '{default: 0};
commit_read_pointer_q <= '0;
commit_write_pointer_q <= '0;
commit_status_cnt_q <= '0;
speculative_read_pointer_q <= '0;
speculative_write_pointer_q <= '0;
speculative_status_cnt_q <= '0;
end else begin
speculative_queue_q <= speculative_queue_n;
commit_queue_q <= commit_queue_n;
commit_read_pointer_q <= commit_read_pointer_n;
commit_write_pointer_q <= commit_write_pointer_n;
commit_status_cnt_q <= commit_status_cnt_n;
speculative_read_pointer_q <= speculative_read_pointer_n;
speculative_write_pointer_q <= speculative_write_pointer_n;
speculative_status_cnt_q <= speculative_status_cnt_n;
end
end
`ifndef SYNTHESIS
`ifndef verilator
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin : p_commit
if (~rst_ni) begin
commit_queue_q <= '{default: 0};
commit_read_pointer_q <= '0;
commit_write_pointer_q <= '0;
commit_status_cnt_q <= '0;
end else begin
commit_queue_q <= commit_queue_n;
commit_read_pointer_q <= commit_read_pointer_n;
commit_write_pointer_q <= commit_write_pointer_n;
commit_status_cnt_q <= commit_status_cnt_n;
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef VERILATOR
// assert that commit is never set when we are flushing this would be counter intuitive
// as flush and commit is decided in the same stage
commit_and_flush: assert property (
@ -254,7 +271,9 @@ module store_buffer (
commit_buffer_overflow: assert property (
@(posedge clk_i) rst_ni && (commit_status_cnt_q == DEPTH_COMMIT) |-> !commit_i)
else $error("[Commit Queue] You are trying to commit a store although the buffer is full");
`endif
`endif
//pragma translate_on
endmodule

View file

@ -1,67 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Description: Connects SV AXI interface to structs used by Ariane
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
module axi_connect (
input ariane_axi::req_t axi_req_i,
output ariane_axi::resp_t axi_resp_o,
AXI_BUS.out master
);
assign master.aw_id = axi_req_i.aw.id;
assign master.aw_addr = axi_req_i.aw.addr;
assign master.aw_len = axi_req_i.aw.len;
assign master.aw_size = axi_req_i.aw.size;
assign master.aw_burst = axi_req_i.aw.burst;
assign master.aw_lock = axi_req_i.aw.lock;
assign master.aw_cache = axi_req_i.aw.cache;
assign master.aw_prot = axi_req_i.aw.prot;
assign master.aw_qos = axi_req_i.aw.qos;
assign master.aw_region = axi_req_i.aw.region;
assign master.aw_user = '0;
assign master.aw_valid = axi_req_i.aw_valid;
assign axi_resp_o.aw_ready = master.aw_ready;
assign master.w_data = axi_req_i.w.data;
assign master.w_strb = axi_req_i.w.strb;
assign master.w_last = axi_req_i.w.last;
assign master.w_user = '0;
assign master.w_valid = axi_req_i.w_valid;
assign axi_resp_o.w_ready = master.w_ready;
assign axi_resp_o.b.id = master.b_id;
assign axi_resp_o.b.resp = master.b_resp;
assign axi_resp_o.b_valid = master.b_valid;
assign master.b_ready = axi_req_i.b_ready;
assign master.ar_id = axi_req_i.ar.id;
assign master.ar_addr = axi_req_i.ar.addr;
assign master.ar_len = axi_req_i.ar.len;
assign master.ar_size = axi_req_i.ar.size;
assign master.ar_burst = axi_req_i.ar.burst;
assign master.ar_lock = axi_req_i.ar.lock;
assign master.ar_cache = axi_req_i.ar.cache;
assign master.ar_prot = axi_req_i.ar.prot;
assign master.ar_qos = axi_req_i.ar.qos;
assign master.ar_region = axi_req_i.ar.region;
assign master.ar_user = '0;
assign master.ar_valid = axi_req_i.ar_valid;
assign axi_resp_o.ar_ready = master.ar_ready;
assign axi_resp_o.r.id = master.r_id;
assign axi_resp_o.r.data = master.r_data;
assign axi_resp_o.r.resp = master.r_resp;
assign axi_resp_o.r.last = master.r_last;
assign axi_resp_o.r_valid = master.r_valid;
assign master.r_ready = axi_req_i.r_ready;
endmodule

View file

@ -0,0 +1,67 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Description: Connects SV AXI interface to structs used by Ariane
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
module axi_master_connect (
input ariane_axi::req_t axi_req_i,
output ariane_axi::resp_t axi_resp_o,
AXI_BUS.out master
);
assign master.aw_id = axi_req_i.aw.id;
assign master.aw_addr = axi_req_i.aw.addr;
assign master.aw_len = axi_req_i.aw.len;
assign master.aw_size = axi_req_i.aw.size;
assign master.aw_burst = axi_req_i.aw.burst;
assign master.aw_lock = axi_req_i.aw.lock;
assign master.aw_cache = axi_req_i.aw.cache;
assign master.aw_prot = axi_req_i.aw.prot;
assign master.aw_qos = axi_req_i.aw.qos;
assign master.aw_region = axi_req_i.aw.region;
assign master.aw_user = '0;
assign master.aw_valid = axi_req_i.aw_valid;
assign axi_resp_o.aw_ready = master.aw_ready;
assign master.w_data = axi_req_i.w.data;
assign master.w_strb = axi_req_i.w.strb;
assign master.w_last = axi_req_i.w.last;
assign master.w_user = '0;
assign master.w_valid = axi_req_i.w_valid;
assign axi_resp_o.w_ready = master.w_ready;
assign axi_resp_o.b.id = master.b_id;
assign axi_resp_o.b.resp = master.b_resp;
assign axi_resp_o.b_valid = master.b_valid;
assign master.b_ready = axi_req_i.b_ready;
assign master.ar_id = axi_req_i.ar.id;
assign master.ar_addr = axi_req_i.ar.addr;
assign master.ar_len = axi_req_i.ar.len;
assign master.ar_size = axi_req_i.ar.size;
assign master.ar_burst = axi_req_i.ar.burst;
assign master.ar_lock = axi_req_i.ar.lock;
assign master.ar_cache = axi_req_i.ar.cache;
assign master.ar_prot = axi_req_i.ar.prot;
assign master.ar_qos = axi_req_i.ar.qos;
assign master.ar_region = axi_req_i.ar.region;
assign master.ar_user = '0;
assign master.ar_valid = axi_req_i.ar_valid;
assign axi_resp_o.ar_ready = master.ar_ready;
assign axi_resp_o.r.id = master.r_id;
assign axi_resp_o.r.data = master.r_data;
assign axi_resp_o.r.resp = master.r_resp;
assign axi_resp_o.r.last = master.r_last;
assign axi_resp_o.r_valid = master.r_valid;
assign master.r_ready = axi_req_i.r_ready;
endmodule

View file

@ -0,0 +1,68 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Description: Connects SV AXI interface to structs used by Ariane
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
module axi_master_connect_rev (
output ariane_axi::req_t axi_req_o,
input ariane_axi::resp_t axi_resp_i,
AXI_BUS.in master
);
assign axi_req_o.aw.atop = '0; // not supported at the moment
assign axi_req_o.aw.id = master.aw_id;
assign axi_req_o.aw.addr = master.aw_addr;
assign axi_req_o.aw.len = master.aw_len;
assign axi_req_o.aw.size = master.aw_size;
assign axi_req_o.aw.burst = master.aw_burst;
assign axi_req_o.aw.lock = master.aw_lock;
assign axi_req_o.aw.cache = master.aw_cache;
assign axi_req_o.aw.prot = master.aw_prot;
assign axi_req_o.aw.qos = master.aw_qos;
assign axi_req_o.aw.region = master.aw_region;
// assign = master.aw_user;
assign axi_req_o.aw_valid = master.aw_valid;
assign master.aw_ready = axi_resp_i.aw_ready;
assign axi_req_o.w.data = master.w_data;
assign axi_req_o.w.strb = master.w_strb;
assign axi_req_o.w.last = master.w_last;
// assign = master.w_user;
assign axi_req_o.w_valid = master.w_valid;
assign master.w_ready = axi_resp_i.w_ready;
assign master.b_id = axi_resp_i.b.id;
assign master.b_resp = axi_resp_i.b.resp;
assign master.b_valid = axi_resp_i.b_valid;
assign axi_req_o.b_ready = master.b_ready;
assign axi_req_o.ar.id = master.ar_id;
assign axi_req_o.ar.addr = master.ar_addr;
assign axi_req_o.ar.len = master.ar_len;
assign axi_req_o.ar.size = master.ar_size;
assign axi_req_o.ar.burst = master.ar_burst;
assign axi_req_o.ar.lock = master.ar_lock;
assign axi_req_o.ar.cache = master.ar_cache;
assign axi_req_o.ar.prot = master.ar_prot;
assign axi_req_o.ar.qos = master.ar_qos;
assign axi_req_o.ar.region = master.ar_region;
// assign = master.ar_user;
assign axi_req_o.ar_valid = master.ar_valid;
assign master.ar_ready = axi_resp_i.ar_ready;
assign master.r_id = axi_resp_i.r.id;
assign master.r_data = axi_resp_i.r.data;
assign master.r_resp = axi_resp_i.r.resp;
assign master.r_last = axi_resp_i.r.last;
assign master.r_valid = axi_resp_i.r_valid;
assign axi_req_o.r_ready = master.r_ready;
endmodule

View file

@ -0,0 +1,68 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Description: Connects SV AXI interface to structs used by Ariane
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
module axi_slave_connect (
output ariane_axi::req_t axi_req_o,
input ariane_axi::resp_t axi_resp_i,
AXI_BUS.in slave
);
assign axi_req_o.aw.atop = '0; // not supported at the moment
assign axi_req_o.aw.id = slave.aw_id;
assign axi_req_o.aw.addr = slave.aw_addr;
assign axi_req_o.aw.len = slave.aw_len;
assign axi_req_o.aw.size = slave.aw_size;
assign axi_req_o.aw.burst = slave.aw_burst;
assign axi_req_o.aw.lock = slave.aw_lock;
assign axi_req_o.aw.cache = slave.aw_cache;
assign axi_req_o.aw.prot = slave.aw_prot;
assign axi_req_o.aw.qos = slave.aw_qos;
assign axi_req_o.aw.region = slave.aw_region;
// assign = slave.aw_user;
assign axi_req_o.aw_valid = slave.aw_valid;
assign slave.aw_ready = axi_resp_i.aw_ready;
assign axi_req_o.w.data = slave.w_data;
assign axi_req_o.w.strb = slave.w_strb;
assign axi_req_o.w.last = slave.w_last;
// assign = slave.w_user;
assign axi_req_o.w_valid = slave.w_valid;
assign slave.w_ready = axi_resp_i.w_ready;
assign slave.b_id = axi_resp_i.b.id;
assign slave.b_resp = axi_resp_i.b.resp;
assign slave.b_valid = axi_resp_i.b_valid;
assign axi_req_o.b_ready = slave.b_ready;
assign axi_req_o.ar.id = slave.ar_id;
assign axi_req_o.ar.addr = slave.ar_addr;
assign axi_req_o.ar.len = slave.ar_len;
assign axi_req_o.ar.size = slave.ar_size;
assign axi_req_o.ar.burst = slave.ar_burst;
assign axi_req_o.ar.lock = slave.ar_lock;
assign axi_req_o.ar.cache = slave.ar_cache;
assign axi_req_o.ar.prot = slave.ar_prot;
assign axi_req_o.ar.qos = slave.ar_qos;
assign axi_req_o.ar.region = slave.ar_region;
// assign = slave.ar_user;
assign axi_req_o.ar_valid = slave.ar_valid;
assign slave.ar_ready = axi_resp_i.ar_ready;
assign slave.r_id = axi_resp_i.r.id;
assign slave.r_data = axi_resp_i.r.data;
assign slave.r_resp = axi_resp_i.r.resp;
assign slave.r_last = axi_resp_i.r.last;
assign slave.r_valid = axi_resp_i.r_valid;
assign axi_req_o.r_ready = slave.r_ready;
endmodule

View file

@ -0,0 +1,67 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Description: Connects SV AXI interface to structs used by Ariane
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
module axi_slave_connect_rev (
input ariane_axi::req_t axi_req_i,
output ariane_axi::resp_t axi_resp_o,
AXI_BUS.out slave
);
assign slave.aw_id = axi_req_i.aw.id;
assign slave.aw_addr = axi_req_i.aw.addr;
assign slave.aw_len = axi_req_i.aw.len;
assign slave.aw_size = axi_req_i.aw.size;
assign slave.aw_burst = axi_req_i.aw.burst;
assign slave.aw_lock = axi_req_i.aw.lock;
assign slave.aw_cache = axi_req_i.aw.cache;
assign slave.aw_prot = axi_req_i.aw.prot;
assign slave.aw_qos = axi_req_i.aw.qos;
assign slave.aw_region = axi_req_i.aw.region;
assign slave.aw_user = '0;
assign slave.aw_valid = axi_req_i.aw_valid;
assign axi_resp_o.aw_ready = slave.aw_ready;
assign slave.w_data = axi_req_i.w.data;
assign slave.w_strb = axi_req_i.w.strb;
assign slave.w_last = axi_req_i.w.last;
assign slave.w_user = '0;
assign slave.w_valid = axi_req_i.w_valid;
assign axi_resp_o.w_ready = slave.w_ready;
assign axi_resp_o.b.id = slave.b_id;
assign axi_resp_o.b.resp = slave.b_resp;
assign axi_resp_o.b_valid = slave.b_valid;
assign slave.b_ready = axi_req_i.b_ready;
assign slave.ar_id = axi_req_i.ar.id;
assign slave.ar_addr = axi_req_i.ar.addr;
assign slave.ar_len = axi_req_i.ar.len;
assign slave.ar_size = axi_req_i.ar.size;
assign slave.ar_burst = axi_req_i.ar.burst;
assign slave.ar_lock = axi_req_i.ar.lock;
assign slave.ar_cache = axi_req_i.ar.cache;
assign slave.ar_prot = axi_req_i.ar.prot;
assign slave.ar_qos = axi_req_i.ar.qos;
assign slave.ar_region = axi_req_i.ar.region;
assign slave.ar_user = '0;
assign slave.ar_valid = axi_req_i.ar_valid;
assign axi_resp_o.ar_ready = slave.ar_ready;
assign axi_resp_o.r.id = slave.r_id;
assign axi_resp_o.r.data = slave.r_data;
assign axi_resp_o.r.resp = slave.r_resp;
assign axi_resp_o.r.last = slave.r_last;
assign axi_resp_o.r_valid = slave.r_valid;
assign slave.r_ready = axi_req_i.r_ready;
endmodule

View file

@ -62,6 +62,7 @@ class instruction_tracer;
scoreboard_entry_t commit_instruction;
// initialize register 0
gp_reg_file = '{default:0};
fp_reg_file = '{default:0};
forever begin
automatic branchpredict_t bp_instruction = '0;

View file

@ -21,7 +21,7 @@
module sram #(
parameter DATA_WIDTH = 64,
parameter NUM_WORDS = 1024,
parameter OUT_REGS = 0 // enables output registers in FPGA macro (read lat = 2)
parameter OUT_REGS = 0 // enables output registers in FPGA macro (read lat = 2)
)(
input logic clk_i,
input logic rst_ni,

View file

@ -32,8 +32,7 @@ ariane:
src/issue_stage.sv,
src/lfsr.sv,
src/load_unit.sv,
src/lsu_arbiter.sv,
src/lsu.sv,
src/load_store_unit.sv,
src/miss_handler.sv,
src/mmu.sv,
src/mult.sv,

View file

@ -14,7 +14,6 @@
// Instantiates an AXI-Bus and memories
module ariane_testharness #(
parameter logic [63:0] CACHE_START_ADDR = 64'h8000_0000, // address on which to decide whether the request is cache-able or not
parameter int unsigned AXI_ID_WIDTH = 4,
parameter int unsigned AXI_USER_WIDTH = 1,
parameter int unsigned AXI_ADDRESS_WIDTH = 64,
@ -173,8 +172,8 @@ module ariane_testharness #(
assign dmi_exit = 1'b0;
end
ariane_axi::req_t axi_sba_req;
ariane_axi::resp_t axi_sba_resp;
ariane_axi::req_t dm_axi_m_req, dm_axi_s_req;
ariane_axi::resp_t dm_axi_m_resp, dm_axi_s_resp;
// debug module
dm_top #(
@ -185,26 +184,30 @@ module ariane_testharness #(
.AxiDataWidth ( AXI_DATA_WIDTH ),
.AxiUserWidth ( AXI_USER_WIDTH )
) i_dm_top (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ), // PoR
.testmode_i ( test_en ),
.ndmreset_o ( ndmreset ),
.dmactive_o ( ), // active debug session
.debug_req_o ( debug_req_core ),
.unavailable_i ( '0 ),
.axi_slave ( master[ariane_soc::Debug] ),
.axi_req_o ( axi_sba_req ),
.axi_resp_i ( axi_sba_resp ),
.dmi_rst_ni ( rst_ni ),
.dmi_req_valid_i ( debug_req_valid ),
.dmi_req_ready_o ( debug_req_ready ),
.dmi_req_i ( debug_req ),
.dmi_resp_valid_o ( debug_resp_valid ),
.dmi_resp_ready_i ( debug_resp_ready ),
.dmi_resp_o ( debug_resp )
.clk_i ( clk_i ),
.rst_ni ( rst_ni ), // PoR
.testmode_i ( test_en ),
.ndmreset_o ( ndmreset ),
.dmactive_o ( ), // active debug session
.debug_req_o ( debug_req_core ),
.unavailable_i ( '0 ),
.axi_s_req_i ( dm_axi_s_req ),
.axi_s_resp_o ( dm_axi_s_resp ),
.axi_m_req_o ( dm_axi_m_req ),
.axi_m_resp_i ( dm_axi_m_resp ),
.dmi_rst_ni ( rst_ni ),
.dmi_req_valid_i ( debug_req_valid ),
.dmi_req_ready_o ( debug_req_ready ),
.dmi_req_i ( debug_req ),
.dmi_resp_valid_o ( debug_resp_valid ),
.dmi_resp_ready_i ( debug_resp_ready ),
.dmi_resp_o ( debug_resp )
);
axi_connect i_axi_connect_sba (.axi_req_i(axi_sba_req), .axi_resp_o(axi_sba_resp), .master(slave[1]));
axi_master_connect i_axi_master_dm (.axi_req_i(dm_axi_m_req), .axi_resp_o(dm_axi_m_resp), .master(slave[1]));
axi_slave_connect i_axi_slave_dm (.axi_req_o(dm_axi_s_req), .axi_resp_i(dm_axi_s_resp), .slave(master[ariane_soc::Debug]));
// ---------------
// ROM
@ -311,6 +314,7 @@ module ariane_testharness #(
.r_ready_o ( dram.r_ready )
);
assign aw_chan_i.atop = '0;
assign aw_chan_i.id = master[ariane_soc::DRAM].aw_id;
assign aw_chan_i.addr = master[ariane_soc::DRAM].aw_addr;
assign aw_chan_i.len = master[ariane_soc::DRAM].aw_len;
@ -463,21 +467,27 @@ module ariane_testharness #(
logic ipi;
logic timer_irq;
ariane_axi::req_t axi_clint_req;
ariane_axi::resp_t axi_clint_resp;
clint #(
.AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ),
.AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
.AXI_ID_WIDTH ( AXI_ID_WIDTH_SLAVES ),
.NR_CORES ( 1 )
) i_clint (
.clk_i ( clk_i ),
.rst_ni ( ndmreset_n ),
.testmode_i ( test_en ),
.slave ( master[ariane_soc::CLINT] ),
.rtc_i ( rtc_i ),
.timer_irq_o ( timer_irq ),
.ipi_o ( ipi )
.clk_i ( clk_i ),
.rst_ni ( ndmreset_n ),
.testmode_i ( test_en ),
.axi_req_i ( axi_clint_req ),
.axi_resp_o ( axi_clint_resp ),
.rtc_i ( rtc_i ),
.timer_irq_o ( timer_irq ),
.ipi_o ( ipi )
);
axi_slave_connect i_axi_slave_connect_clint (.axi_req_o(axi_clint_req), .axi_resp_i(axi_clint_resp), .slave(master[ariane_soc::CLINT]));
// ---------------
// Peripherals
// ---------------
@ -527,7 +537,11 @@ module ariane_testharness #(
ariane_axi::resp_t axi_ariane_resp;
ariane #(
.CACHE_START_ADDR ( CACHE_START_ADDR )
`ifdef SERPENT_PULP
.SwapEndianess ( 0 ),
.CachedAddrEnd ( (ariane_soc::DRAMBase + ariane_soc::DRAMLength) ),
`endif
.CachedAddrBeg ( ariane_soc::DRAMBase )
) i_ariane (
.clk_i ( clk_i ),
.rst_ni ( ndmreset_n ),
@ -541,6 +555,6 @@ module ariane_testharness #(
.axi_resp_i ( axi_ariane_resp )
);
axi_connect i_axi_connect_ariane (.axi_req_i(axi_ariane_req), .axi_resp_o(axi_ariane_resp), .master(slave[0]));
axi_master_connect i_axi_master_connect_ariane (.axi_req_i(axi_ariane_req), .axi_resp_o(axi_ariane_resp), .master(slave[0]));
endmodule

0
tb/common/core_mem.sv Executable file → Normal file
View file

0
tb/common/dp_ram.sv Executable file → Normal file
View file

0
tb/common/string_buffer.svh Executable file → Normal file
View file

3
tb/tb_serpent_dcache/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
work
modelsim.ini
*.rep

27
tb/tb_serpent_dcache/Makefile Executable file
View file

@ -0,0 +1,27 @@
library ?= work
toplevel ?= tb
src-list := tb.list
inc-path := $(shell pwd)/hdl/
src := $(shell xargs printf '\n%s' < $(src-list) | cut -b 1-)
compile_flag += +cover+i_dut -incr -64 -nologo
sim_opts += -64 -coverage -classdebug -voptargs="+acc"
questa_version ?= ${QUESTASIM_VERSION}
build: clean
vlib${questa_version} $(library)
vlog${questa_version} -work $(library) -pedanticerrors $(src) $(compile_flag) +incdir+$(inc-path)
touch $(library)/.build
sim: build
vsim${questa_version} -lib $(library) $(toplevel) -do "do wave.do" $(sim_opts)
simc: build
vsim${questa_version} -lib $(library) $(toplevel) -c -do "run -all; exit" $(sim_opts)
clean:
rm -rf $(library)
.PHONY: clean simc sim build

View file

@ -0,0 +1,637 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: testbench for piton_icache. includes the following tests:
//
// 0) random accesses with disabled cache
// 1) random accesses with enabled cache to cacheable and noncacheable memory
// 2) linear, wrapping sweep with enabled cache
// 3) 1) with random stalls on the memory side and TLB side
// 4) nr 3) with random invalidations
//
// note that we use a simplified address translation scheme to emulate the TLB.
// (random offsets).
`include "tb.svh"
import ariane_pkg::*;
import serpent_cache_pkg::*;
import tb_pkg::*;
module tb;
// leave this
timeunit 1ps;
timeprecision 1ps;
// memory configuration (64bit words)
parameter MemBytes = 2**DCACHE_INDEX_WIDTH * 4 * 32;
parameter MemWords = MemBytes>>3;
// noncacheable portion
parameter logic [63:0] CachedAddrBeg = MemBytes>>3;//1/8th of the memory is NC
parameter logic [63:0] CachedAddrEnd = 64'hFFFF_FFFF_FFFF_FFFF;
// contention and invalidation rates (in %)
parameter MemRandHitRate = 75;
parameter MemRandInvRate = 10;
parameter TlbHitRate = 95;
// parameters for random read sequences (in %)
parameter FlushRate = 10;
parameter KillRate = 5;
parameter Verbose = 0;
///////////////////////////////////////////////////////////////////////////////
// MUT signal declarations
///////////////////////////////////////////////////////////////////////////////
logic enable_i;
logic flush_i;
logic flush_ack_o;
logic miss_o;
logic wbuffer_empty_o;
amo_req_t amo_req_i;
amo_resp_t amo_resp_o;
dcache_req_i_t [2:0] req_ports_i;
dcache_req_o_t [2:0] req_ports_o;
logic mem_rtrn_vld_i;
dcache_rtrn_t mem_rtrn_i;
logic mem_data_req_o;
logic mem_data_ack_i;
dcache_req_t mem_data_o;
///////////////////////////////////////////////////////////////////////////////
// TB signal declarations
///////////////////////////////////////////////////////////////////////////////
logic [63:0] mem_array[MemWords-1:0];
string test_name;
logic clk_i, rst_ni;
logic [31:0] seq_num_resp, seq_num_write;
seq_t [2:0] seq_type;
logic [2:0] seq_done;
logic [6:0] req_rate[2:0];
logic seq_run, seq_last;
logic end_of_sim;
logic mem_rand_en;
logic inv_rand_en;
logic amo_rand_en;
logic tlb_rand_en;
logic write_en;
logic [63:0] write_paddr, write_data;
logic [7:0] write_be;
logic check_en;
logic [7:0] commit_be;
logic [63:0] commit_paddr;
logic commit_en;
typedef struct packed {
logic [1:0] size;
logic [63:0] paddr;
} resp_fifo_t;
logic [63:0] act_paddr[1:0];
logic [63:0] exp_rdata[1:0];
logic [63:0] exp_paddr[1:0];
resp_fifo_t fifo_data_in[1:0];
resp_fifo_t fifo_data[1:0];
logic [1:0] fifo_push, fifo_pop, fifo_flush;
logic [2:0] flush;
logic flush_rand_en;
///////////////////////////////////////////////////////////////////////////////
// helper tasks
///////////////////////////////////////////////////////////////////////////////
task automatic runSeq(input int nReadVectors, input int nWriteVectors = 0, input logic last =1'b0);
seq_last = last;
seq_run = 1'b1;
seq_num_resp = nReadVectors;
seq_num_write = nWriteVectors;
`APPL_WAIT_CYC(clk_i,1)
seq_run = 1'b0;
`APPL_WAIT_SIG(clk_i, &seq_done)
`APPL_WAIT_CYC(clk_i,1)
endtask : runSeq
task automatic flushCache();
flush[2] = 1'b1;
`APPL_WAIT_SIG(clk_i, flush_ack_o);
flush[2] = 0'b0;
`APPL_WAIT_CYC(clk_i,1)
endtask : flushCache
task automatic memCheck();
check_en = 1'b1;
`APPL_WAIT_CYC(clk_i,1)
check_en = 0'b0;
`APPL_WAIT_CYC(clk_i,1)
endtask : memCheck
///////////////////////////////////////////////////////////////////////////////
// Clock Process
///////////////////////////////////////////////////////////////////////////////
always @*
begin
do begin
clk_i = 1;#(CLK_HI);
clk_i = 0;#(CLK_LO);
end while (end_of_sim == 1'b0);
repeat (100) begin
// generate a few extra cycle to allow
// response acquisition to complete
clk_i = 1;#(CLK_HI);
clk_i = 0;#(CLK_LO);
end
end
///////////////////////////////////////////////////////////////////////////////
// memory emulation
///////////////////////////////////////////////////////////////////////////////
tb_mem #(
.MemRandHitRate ( MemRandHitRate ),
.MemRandInvRate ( MemRandInvRate ),
.MemWords ( MemWords ),
.CachedAddrBeg ( CachedAddrBeg ),
.CachedAddrEnd ( CachedAddrEnd )
) i_tb_mem (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.mem_rand_en_i ( mem_rand_en ),
.inv_rand_en_i ( inv_rand_en ),
.amo_rand_en_i ( amo_rand_en ),
.mem_data_req_i ( mem_data_req_o ),
.mem_data_ack_o ( mem_data_ack_i ),
.mem_data_i ( mem_data_o ),
.mem_rtrn_vld_o ( mem_rtrn_vld_i ),
.mem_rtrn_o ( mem_rtrn_i ),
// for verification
.seq_last_i ( seq_last ),
.check_en_i ( check_en ),
.commit_en_i ( commit_en ),
.commit_be_i ( commit_be ),
.commit_paddr_i ( commit_paddr ),
.write_en_i ( write_en ),
.write_be_i ( write_be ),
.write_data_i ( write_data ),
.write_paddr_i ( write_paddr ),
.mem_array_o ( mem_array )
);
///////////////////////////////////////////////////////////////////////////////
// MUT
///////////////////////////////////////////////////////////////////////////////
serpent_dcache #(
.CachedAddrBeg ( CachedAddrBeg ),
.CachedAddrEnd ( CachedAddrEnd )
) i_dut (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.flush_ack_o ( flush_ack_o ),
.enable_i ( enable_i ),
.miss_o ( miss_o ),
.wbuffer_empty_o ( wbuffer_empty_o ),
.amo_req_i ( amo_req_i ),
.amo_resp_o ( amo_resp_o ),
.req_ports_i ( req_ports_i ),
.req_ports_o ( req_ports_o ),
.mem_rtrn_vld_i ( mem_rtrn_vld_i ),
.mem_rtrn_i ( mem_rtrn_i ),
.mem_data_req_o ( mem_data_req_o ),
.mem_data_ack_i ( mem_data_ack_i ),
.mem_data_o ( mem_data_o )
);
///////////////////////////////////////////////////////////////////////////////
// port emulation programs
///////////////////////////////////////////////////////////////////////////////
// get actual paddr from read controllers
assign act_paddr[0] = {i_dut.genblk1[0].i_serpent_dcache_ctrl.address_tag_d,
i_dut.genblk1[0].i_serpent_dcache_ctrl.address_idx_q,
i_dut.genblk1[0].i_serpent_dcache_ctrl.address_off_q};
assign act_paddr[1] = {i_dut.genblk1[1].i_serpent_dcache_ctrl.address_tag_d,
i_dut.genblk1[1].i_serpent_dcache_ctrl.address_idx_q,
i_dut.genblk1[1].i_serpent_dcache_ctrl.address_off_q};
// generate fifo queues for expected responses
generate
for(genvar k=0; k<2;k++) begin
assign fifo_data_in[k] = {req_ports_i[k].data_size,
exp_paddr[k]};
assign exp_rdata[k] = mem_array[fifo_data[k].paddr>>3];
assign fifo_push[k] = req_ports_i[k].data_req & req_ports_o[k].data_gnt;
assign fifo_flush[k] = req_ports_i[k].kill_req;
assign fifo_pop[k] = req_ports_o[k].data_rvalid;
fifo_v2 #(
.dtype(resp_fifo_t)
) i_resp_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( fifo_flush[k] ),
.testmode_i ( '0 ),
.full_o ( ),
.empty_o ( ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( fifo_data_in[k] ),
.push_i ( fifo_push[k] ),
.data_o ( fifo_data[k] ),
.pop_i ( fifo_pop[k] )
);
end
endgenerate
tb_readport #(
.PortName ( "RD0" ),
.FlushRate ( FlushRate ),
.KillRate ( KillRate ),
.TlbHitRate ( TlbHitRate ),
.MemWords ( MemWords ),
.CachedAddrBeg ( CachedAddrBeg ),
.CachedAddrEnd ( CachedAddrEnd ),
.RndSeed ( 5555555 ),
.Verbose ( Verbose )
) i_tb_readport0 (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.test_name_i ( test_name ),
.req_rate_i ( req_rate[0] ),
.seq_type_i ( seq_type[0] ),
.tlb_rand_en_i ( tlb_rand_en ),
.flush_rand_en_i ( flush_rand_en ),
.seq_run_i ( seq_run ),
.seq_num_resp_i ( seq_num_resp ),
.seq_last_i ( seq_last ),
.seq_done_o ( seq_done[0] ),
.exp_paddr_o ( exp_paddr[0] ),
.exp_size_i ( fifo_data[0].size ),
.exp_paddr_i ( fifo_data[0].paddr ),
.exp_rdata_i ( exp_rdata[0] ),
.act_paddr_i ( act_paddr[0] ),
.flush_o ( flush[0] ),
.flush_ack_i ( flush_ack_o ),
.dut_req_port_o ( req_ports_i[0] ),
.dut_req_port_i ( req_ports_o[0] )
);
tb_readport #(
.PortName ( "RD1" ),
.FlushRate ( FlushRate ),
.KillRate ( KillRate ),
.TlbHitRate ( TlbHitRate ),
.MemWords ( MemWords ),
.CachedAddrBeg ( CachedAddrBeg ),
.CachedAddrEnd ( CachedAddrEnd ),
.RndSeed ( 3333333 ),
.Verbose ( Verbose )
) i_tb_readport1 (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.test_name_i ( test_name ),
.req_rate_i ( req_rate[1] ),
.seq_type_i ( seq_type[1] ),
.tlb_rand_en_i ( tlb_rand_en ),
.flush_rand_en_i ( flush_rand_en ),
.seq_run_i ( seq_run ),
.seq_num_resp_i ( seq_num_resp ),
.seq_last_i ( seq_last ),
.exp_paddr_o ( exp_paddr[1] ),
.exp_size_i ( fifo_data[1].size ),
.exp_paddr_i ( fifo_data[1].paddr ),
.exp_rdata_i ( exp_rdata[1] ),
.act_paddr_i ( act_paddr[1] ),
.seq_done_o ( seq_done[1] ),
.flush_o ( flush[1] ),
.flush_ack_i ( flush_ack_o ),
.dut_req_port_o ( req_ports_i[1] ),
.dut_req_port_i ( req_ports_o[1] )
);
tb_writeport #(
.PortName ( "WR0" ),
.MemWords ( MemWords ),
.CachedAddrBeg ( CachedAddrBeg ),
.CachedAddrEnd ( CachedAddrEnd ),
.RndSeed ( 7777777 ),
.Verbose ( Verbose )
) i_tb_writeport (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.test_name_i ( test_name ),
.req_rate_i ( req_rate[2] ),
.seq_type_i ( seq_type[2] ),
.seq_run_i ( seq_run ),
.seq_num_vect_i ( seq_num_write ),
.seq_last_i ( seq_last ),
.seq_done_o ( seq_done[2] ),
.dut_req_port_o ( req_ports_i[2] ),
.dut_req_port_i ( req_ports_o[2] )
);
assign write_en = req_ports_i[2].data_req & req_ports_o[2].data_gnt & req_ports_i[2].data_we;
assign write_paddr = {req_ports_i[2].address_tag, req_ports_i[2].address_index};
assign write_data = req_ports_i[2].data_wdata;
assign write_be = req_ports_i[2].data_be;
// generate write buffer commit signals based on internal eviction status
assign commit_be = i_dut.i_serpent_dcache_wbuffer.wr_data_be_o;
assign commit_paddr = i_dut.i_serpent_dcache_wbuffer.wr_paddr;
assign commit_en = i_dut.i_serpent_dcache_wbuffer.evict;
// TODO: implement AMO agent
assign amo_req_i.req = '0;
assign amo_req_i.amo_op = AMO_NONE;
assign amo_req_i.size = '0;
assign amo_req_i.operand_a = '0;
assign amo_req_i.operand_b = '0;
// amo_resp_o
assign flush_i = |flush;
///////////////////////////////////////////////////////////////////////////////
// simulation coordinator process
///////////////////////////////////////////////////////////////////////////////
// TODO: implement CSR / controller
// flush_i, flush_ack_o, enable_i, miss_o, wbuffer_empty_o
initial begin : p_stim
test_name = "";
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd75};
seq_run = 1'b0;
seq_last = 1'b0;
seq_num_resp = '0;
seq_num_write = '0;
check_en = '0;
// seq_done
end_of_sim = 0;
rst_ni = 0;
// randomization settings
mem_rand_en = 0;
tlb_rand_en = 0;
inv_rand_en = 0;
amo_rand_en = 0;
flush_rand_en = 0;
// cache ctrl
flush[2] = 0;
// flush_ack_o
// wbuffer_empty_o
enable_i = 0;
// miss_o
// print some info
$display("TB> current configuration:");
$display("TB> MemWords %d", MemWords);
$display("TB> CachedAddrBeg %16X", CachedAddrBeg);
$display("TB> CachedAddrEnd %16X", CachedAddrEnd);
$display("TB> MemRandHitRate %d", MemRandHitRate);
$display("TB> MemRandInvRate %d", MemRandInvRate);
// reset cycles
`APPL_WAIT_CYC(clk_i,100)
rst_ni = 1'b1;
`APPL_WAIT_CYC(clk_i,100)
$display("TB> start with test sequences");
// apply each test until seq_num_resp memory
// requests have successfully completed
///////////////////////////////////////////////
test_name = "TEST 0 -- random read -- disabled cache";
// config
enable_i = 0;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 1 -- sequential read -- disabled cache";
// config
enable_i = 0;
seq_type = '{default: LINEAR_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 2 -- random read -- enabled cache";
// config
enable_i = 1;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 3 -- linear read -- enabled cache";
// config
enable_i = 1;
seq_type = '{default: LINEAR_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 4 -- random read -- enabled cache + tlb, mem contentions";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 5 -- linear read -- enabled cache + tlb, mem contentions";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
seq_type = '{default: LINEAR_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 6 -- random read -- enabled cache + tlb, mem contentions + invalidations";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
inv_rand_en = 1;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd50};
runSeq(10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 7 -- random read/write -- disabled cache";
// config
enable_i = 0;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd25};
runSeq(10000,10000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 8 -- random read/write -- enabled cache";
// config
enable_i = 1;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd25};
runSeq(10000,20000);// last sequence flag, terminates agents
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 9 -- random read/write -- enabled cache + tlb, mem contentions + invalidations";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
inv_rand_en = 1;
seq_type = '{default: RANDOM_SEQ};
req_rate = '{default: 7'd25};
runSeq(10000,20000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 10 -- linear burst write -- enabled cache";
// config
enable_i = 1;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{LINEAR_SEQ, IDLE_SEQ, IDLE_SEQ};
req_rate = '{100, 0, 0};
runSeq(0,5000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 11 -- linear burst write with hot cache";
// config
enable_i = 1;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{IDLE_SEQ, IDLE_SEQ, LINEAR_SEQ};
req_rate = '{default:100};
runSeq((CachedAddrBeg>>3)+(2**(DCACHE_INDEX_WIDTH-3))*DCACHE_SET_ASSOC,0);
seq_type = '{LINEAR_SEQ, IDLE_SEQ, IDLE_SEQ};
runSeq(0,(CachedAddrBeg>>3)+(2**(DCACHE_INDEX_WIDTH-3))*DCACHE_SET_ASSOC,1);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 12 -- random write bursts -- enabled cache";
// config
enable_i = 1;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{BURST_SEQ, RANDOM_SEQ, RANDOM_SEQ};
req_rate = '{75, 0, 0};
runSeq(0,5000,0);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 13 -- random write bursts -- enabled cache + tlb, mem contentions + invalidations";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
inv_rand_en = 1;
seq_type = '{BURST_SEQ, IDLE_SEQ, IDLE_SEQ};
req_rate = '{75, 0, 0};
runSeq(0,5000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 14 -- random write/read-- enabled cache + tlb, mem contentions + invalidations";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
inv_rand_en = 1;
seq_type = '{RANDOM_SEQ, RANDOM_SEQ, RANDOM_SEQ};
req_rate = '{default:25};
runSeq(5000,5000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 15 -- short wrapping sequences to provoke writebuffer hits";
// config
enable_i = 1;
tlb_rand_en = 0;
mem_rand_en = 0;
inv_rand_en = 0;
seq_type = '{WRAP_SEQ, IDLE_SEQ, WRAP_SEQ};
req_rate = '{100,0,20};
runSeq(5000,5000);
flushCache();
memCheck();
///////////////////////////////////////////////
test_name = "TEST 16 -- random write/read-- enabled cache + tlb, mem contentions + invalidations + random flushes";
// config
enable_i = 1;
tlb_rand_en = 1;
mem_rand_en = 1;
inv_rand_en = 1;
flush_rand_en = 1;
seq_type = '{RANDOM_SEQ, RANDOM_SEQ, RANDOM_SEQ};
req_rate = '{default:25};
runSeq(5000,5000,1);// last sequence flag, terminates agents
flushCache();
memCheck();
///////////////////////////////////////////////
end_of_sim = 1;
$display("TB> end test sequences");
end
endmodule

View file

@ -0,0 +1,66 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description:
//
//////////////////////////////////////////////////////////////////////////////
// use to ensure proper ATI timing
///////////////////////////////////////////////////////////////////////////////
`define APPL_ACQ_WAIT #(ACQ_DEL-APPL_DEL);
`define WAIT_CYC(CLK, N) \
repeat(N) @(posedge(CLK));
`define WAIT(CLK, SIG) \
do begin \
@(posedge(CLK)); \
end while(SIG == 1'b0);
`define WAIT_SIG(CLK,SIG) \
do begin \
@(posedge(CLK)); \
end while(SIG == 1'b0);
`define APPL_WAIT_COMB_SIG(CLK,SIG) \
`APPL_ACQ_WAIT \
while(SIG == 1'b0) begin \
@(posedge(CLK)); \
#(ACQ_DEL); \
end
`define APPL_WAIT_SIG(CLK,SIG) \
do begin \
@(posedge(CLK)); \
#(APPL_DEL); \
end while(SIG == 1'b0);
`define ACQ_WAIT_SIG(CLK,SIG) \
do begin \
@(posedge(CLK)); \
#(ACQ_DEL); \
end while(SIG == 1'b0);
`define APPL_WAIT_CYC(CLK, N) \
repeat(N) @(posedge(CLK)); \
#(tb_pkg::APPL_DEL);
`define ACQ_WAIT_CYC(CLK, N) \
repeat(N) @(posedge(CLK)); \
#(tb_pkg::ACQ_DEL);

View file

@ -0,0 +1,368 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: simple emulation layer for the memory subsystem.
//
`include "tb.svh"
import ariane_pkg::*;
import serpent_cache_pkg::*;
import tb_pkg::*;
module tb_mem #(
parameter string MemName = "TB_MEM",
parameter MemRandHitRate = 10, //in percent
parameter MemRandInvRate = 5, //in percent
parameter MemWords = 1024*1024,// in 64bit words
parameter logic [63:0] CachedAddrBeg = MemWords/2,
parameter logic [63:0] CachedAddrEnd = 64'hFFFF_FFFF_FFFF_FFFF
) (
input logic clk_i,
input logic rst_ni,
// randomization settings
input logic mem_rand_en_i,
input logic inv_rand_en_i,
input logic amo_rand_en_i,
// dcache interface
output logic mem_rtrn_vld_o,
output dcache_rtrn_t mem_rtrn_o,
input logic mem_data_req_i,
output logic mem_data_ack_o,
input dcache_req_t mem_data_i,
// expected response interface
input logic seq_last_i,
input logic check_en_i,
input logic commit_en_i,
input logic [7:0] commit_be_i,
input logic [63:0] commit_paddr_i,
input logic write_en_i,
input logic [7:0] write_be_i,
input logic [63:0] write_data_i,
input logic [63:0] write_paddr_i,
output logic [63:0] mem_array_o[MemWords-1:0]
);
// leave this
timeunit 1ps;
timeprecision 1ps;
logic mem_ready_q, mem_inv_q;
logic [63:0] rand_addr_q;
dcache_req_t outfifo_data;
logic outfifo_pop, outfifo_push, outfifo_full, outfifo_empty;
dcache_rtrn_t infifo_data;
logic infifo_pop, infifo_push, infifo_full, infifo_empty;
logic initialized_q;
logic write_en;
logic [63:0] mem_array_q[MemWords-1:0];
// this shadow memory provides a view that is consistent with the one from the core
// i.e., pending writes are present in this view, and invalidations will not overwrite
// the corresponding bytes until they have been commited to the normal memory.
logic [63:0] mem_array_shadow_q[MemWords-1:0];
logic [7:0] mem_array_dirty_q[MemWords-1:0];
assign mem_array_o = mem_array_shadow_q;
// sequential process holding the state of the memory readout process
always_ff @(posedge clk_i or negedge rst_ni) begin : p_tlb_rand
automatic int rnd = 0;
automatic logic [63:0] val;
automatic logic [63:0] lval;
if(~rst_ni) begin
mem_ready_q <= '0;
mem_inv_q <= '0;
rand_addr_q <= '0;
initialized_q <= '0;
end else begin
// fill the memory once with random data
if (initialized_q) begin
// commit "virtual" writes (i.e., clear the dirty flags)
if(commit_en_i) begin
for(int k=0; k<8; k++) begin
if(commit_be_i[k]) begin
mem_array_dirty_q[commit_paddr_i>>3][k] <= 1'b0;
end
end
end
// "virtual" writes coming from TB agent, used to generate expected responses
if(write_en_i) begin
for(int k=0; k<8; k++) begin
if(write_be_i[k]) begin
mem_array_shadow_q[write_paddr_i>>3][k*8 +: 8] <= write_data_i[k*8 +: 8];
mem_array_dirty_q[write_paddr_i>>3][k] <= 1'b1;
end
end
end
// "real" writes coming via the miss controller
if(write_en) begin
unique case(outfifo_data.size)
3'b000: mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2:0]*8 +: 8] = outfifo_data.data[outfifo_data.paddr[2:0]*8 +: 8];
3'b001: mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2:1]*16 +: 16] = outfifo_data.data[outfifo_data.paddr[2:1]*16 +: 16];
3'b010: mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2:2]*32 +: 32] = outfifo_data.data[outfifo_data.paddr[2:2]*32 +: 32];
3'b011: mem_array_q[outfifo_data.paddr>>3] = outfifo_data.data[0 +: 64];
default: begin
$fatal(1,"unsupported transfer size for write");
end
endcase // infifo_data.size
end
// initialization with random data
end else begin
mem_array_dirty_q <= '{default:'0};
for (int k=0; k<MemWords; k++) begin
void'(randomize(val));
mem_array_q[k] <= val;
mem_array_shadow_q[k] <= val;
end
initialized_q <= 1;
end
// generate random contentions
if (mem_rand_en_i) begin
void'(randomize(rnd) with {rnd > 0; rnd <= 100;});
if(rnd < MemRandHitRate) begin
mem_ready_q <= '1;
end else begin
mem_ready_q <= '0;
end
end else begin
mem_ready_q <= '1;
end
// generate random invalidations
if (inv_rand_en_i) begin
void'(randomize(rnd) with {rnd > 0; rnd <= 100;});
if(rnd < MemRandInvRate) begin
mem_inv_q <= '1;
void'(randomize(lval) with {lval>=0; lval<(MemWords>>3);});
void'(randomize(val));
rand_addr_q <= lval<<3;
// with the current TB setup, we cannot invalidate a memory location if a write response to the same address is
// in flight, since this could lead to an incosistent state between the real memory and the shadow memory view.
// the workaround is not to overwrite shadow memory regions that are still pending in the write buffer
// this can be improved.
for(int k=0; k<8; k++) begin
if(~mem_array_dirty_q[lval][k]) begin
mem_array_q [lval][k*8 +: 8] <= val[k*8 +: 8];
mem_array_shadow_q[lval][k*8 +: 8] <= val[k*8 +: 8];
end
end
end else begin
mem_inv_q <= '0;
end
end else begin
mem_inv_q <= '0;
end
end
end
// readout process
always_comb begin : proc_mem
infifo_push = 0;
infifo_data = '0;
outfifo_pop = 0;
infifo_data.rtype = DCACHE_LOAD_ACK;
infifo_data.data = 'x;
write_en = '0;
// TODO: atomic request
// DCACHE_ATOMIC_REQ
// DCACHE_ATOMIC_ACK
// TODO: stores
// DCACHE_STORE_REQ
// DCACHE_STORE_ACK
// TODO: interrupts
// DCACHE_INT_REQ
// DCACHE_INT_ACK
// generate random invalidation
if (mem_inv_q) begin
infifo_data.rtype = DCACHE_INV_REQ;
// since we do not keep a mirror tag table here,
// we allways invalidate all ways of the aliased index.
// this is not entirely correct and will produce
// too many invalidations
infifo_data.inv.idx = rand_addr_q[DCACHE_INDEX_WIDTH-1:0];
infifo_data.inv.all = '1;
infifo_push = 1'b1;
end else if ((~outfifo_empty) && (~infifo_full) && mem_ready_q) begin
outfifo_pop = 1'b1;
infifo_push = 1'b1;
unique case (outfifo_data.rtype)
DCACHE_LOAD_REQ: begin
infifo_data.tid = outfifo_data.tid;
infifo_data.nc = outfifo_data.nc;
infifo_data.data = 'x;
unique case(outfifo_data.size)
3'b000: for(int k=0;k<64;k+=8) infifo_data.data[outfifo_data.paddr[2:0]*8 +: 8] = mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2:0]*8 +: 8];
3'b001: for(int k=0;k<64;k+=16) infifo_data.data[outfifo_data.paddr[2:1]*16+:16] = mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2:1]*16+:16];
3'b010: for(int k=0;k<64;k+=32) infifo_data.data[outfifo_data.paddr[2] *32+:32] = mem_array_q[outfifo_data.paddr>>3][outfifo_data.paddr[2] *32+:32];
3'b011: infifo_data.data[0+:64] = mem_array_q[outfifo_data.paddr>>3];
3'b111: for(int k=0; k<DCACHE_LINE_WIDTH/64; k++) infifo_data.data[k*64 +:64] = mem_array_q[(outfifo_data.paddr>>3) + k];
default: $fatal(1,"unsupported transfer size for read");
endcase // infifo_data.size
end
DCACHE_STORE_REQ: begin
infifo_data.tid = outfifo_data.tid;
infifo_data.rtype = DCACHE_STORE_ACK;
infifo_data.nc = outfifo_data.nc;
write_en = 1'b1;
end
// DCACHE_ATOMIC_REQ: $fatal(1, "DCACHE_ATOMIC_REQ not implemented yet");
// DCACHE_INT_REQ: $fatal(1, "DCACHE_INT_REQ not implemented yet");
default: begin
// $fatal(1, "unsupported request type");
end
endcase // outfifo_data.rtype
end
end
fifo_v2 #(
.dtype(dcache_req_t),
.DEPTH(2)
) i_outfifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( outfifo_full ),
.empty_o ( outfifo_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( mem_data_i ),
.push_i ( outfifo_push ),
.data_o ( outfifo_data ),
.pop_i ( outfifo_pop )
);
assign outfifo_push = mem_data_req_i & (~outfifo_full);
assign mem_data_ack_o = outfifo_push;
fifo_v2 #(
.dtype(dcache_rtrn_t),
.DEPTH(2)
) i_infifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( infifo_full ),
.empty_o ( infifo_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( infifo_data ),
.push_i ( infifo_push ),
.data_o ( mem_rtrn_o ),
.pop_i ( infifo_pop )
);
assign infifo_pop = ~infifo_empty;
assign mem_rtrn_vld_o = infifo_pop;
///////////////////////////////////////////////////////
// checker process
///////////////////////////////////////////////////////
initial begin
bit ok;
progress status;
status = new(MemName);
`ACQ_WAIT_CYC(clk_i,10)
`ACQ_WAIT_SIG(clk_i,~rst_ni)
while(~seq_last_i) begin
`ACQ_WAIT_SIG(clk_i,check_en_i)
status.reset(MemWords);
// crosscheck whether shadow and real memory arrays still match
for(int k=0; k<MemWords; k++) begin
ok = (mem_array_q[k] == mem_array_shadow_q[k]) && !(|mem_array_dirty_q[k]);
if(!ok) begin
$display("%s> dirty bytes at k=%016X: real[k>>3]=%016X, shadow[k>>3]=%016X, dirty[k>>3]=%02X",
MemName, k<<3, mem_array_q[k], mem_array_shadow_q[k], mem_array_dirty_q[k]);
end
status.addRes(!ok);
status.print();
end
end
status.printToFile({MemName, "_summary.rep"}, 1);
if(status.totErrCnt == 0) begin
$display("%s> ----------------------------------------------------------------------", MemName);
$display("%s> PASSED %0d VECTORS", MemName, status.totAcqCnt);
$display("%s> ----------------------------------------------------------------------\n", MemName);
end else begin
$display("%s> ----------------------------------------------------------------------\n", MemName);
$display("%s> FAILED %0d OF %0d VECTORS\n", MemName , status.totErrCnt, status.totAcqCnt);
$display("%s> ----------------------------------------------------------------------\n", MemName);
end
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
`ifndef verilator
nc_region: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_data_req_i |-> mem_data_i.paddr >= CachedAddrEnd || mem_data_i.paddr < CachedAddrBeg |-> mem_data_i.nc)
else $fatal(1, "cached access into noncached region");
cached_reads: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_data_req_i |-> mem_data_i.rtype==DCACHE_LOAD_REQ |-> ~mem_data_i.nc |-> mem_data_i.size == 3'b111)
else $fatal(1, "cached read accesses always have to be one CL wide");
nc_reads: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_data_req_i |-> mem_data_i.rtype==DCACHE_LOAD_REQ |-> mem_data_i.nc |-> mem_data_i.size inside {3'b000, 3'b001, 3'b010, 3'b011})
else $fatal(1, "nc read size can only be one of the following: byte, halfword, word, dword");
write_size: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_data_req_i |-> mem_data_i.rtype==DCACHE_STORE_REQ |-> mem_data_i.size inside {3'b000, 3'b001, 3'b010, 3'b011})
else $fatal(1, "write size can only be one of the following: byte, halfword, word, dword");
addr_range: assert property (
@(posedge clk_i) disable iff (~rst_ni) mem_data_req_i |-> mem_data_i.rtype inside {DCACHE_STORE_REQ, DCACHE_STORE_REQ} |-> mem_data_i.paddr < (MemWords<<3))
else $fatal(1, "address is out of bounds");
`endif
//pragma translate_on
endmodule // mem_emul

View file

@ -0,0 +1,150 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: testbench package with some helper functions.
package tb_pkg;
// // for abs(double) function
// import mti_cstdlib::*;
// for timestamps
import "DPI-C" \time = function int _time (inout int tloc[4]);
import "DPI-C" function string ctime(inout int tloc[4]);
///////////////////////////////////////////////////////////////////////////////
// parameters
///////////////////////////////////////////////////////////////////////////////
// creates a 10ns ATI timing cycle
time CLK_HI = 5ns; // set clock high time
time CLK_LO = 5ns; // set clock low time
time CLK_PERIOD = CLK_HI+CLK_LO;
time APPL_DEL = 2ns; // set stimuli application delay
time ACQ_DEL = 8ns; // set response aquisition delay
parameter ERROR_CNT_STOP_LEVEL = 1; // use 1 for debugging. 0 runs the complete simulation...
// tb_readport sequences
typedef enum logic [2:0] { RANDOM_SEQ, LINEAR_SEQ, BURST_SEQ, IDLE_SEQ, WRAP_SEQ } seq_t;
///////////////////////////////////////////////////////////////////////////////
// progress
///////////////////////////////////////////////////////////////////////////////
class progress;
real newState, oldState;
longint numResp, acqCnt, errCnt, totAcqCnt, totErrCnt;
string name;
function new(string name);
begin
this.name = name;
this.acqCnt = 0;
this.errCnt = 0;
this.newState = 0.0;
this.oldState = 0.0;
this.numResp = 1;
this.totAcqCnt = 0;
this.totErrCnt = 0;
end
endfunction : new
function void reset(longint numResp_);
begin
this.acqCnt = 0;
this.errCnt = 0;
this.newState = 0.0;
this.oldState = 0.0;
this.numResp = numResp_;
end
endfunction : reset
function void addRes(int isError);
begin
this.acqCnt++;
this.totAcqCnt++;
this.errCnt += isError;
this.totErrCnt += isError;
if(ERROR_CNT_STOP_LEVEL <= this.errCnt && ERROR_CNT_STOP_LEVEL > 0) begin
$error("%s> simulation stopped (ERROR_CNT_STOP_LEVEL = %d reached).", this.name, ERROR_CNT_STOP_LEVEL);
$stop();
end
end
endfunction : addRes
function void print();
begin
this.newState = $itor(this.acqCnt) / $itor(this.numResp);
if(this.newState - this.oldState >= 0.01) begin
$display("%s> validated %03d%% -- %01d failed (%03.3f%%) ",
this.name,
$rtoi(this.newState*100.0),
this.errCnt,
$itor(this.errCnt) / $itor(this.acqCnt) * 100.0);
// $fflush();
this.oldState = this.newState;
end
end
endfunction : print
function void printToFile(string file, bit summary = 0);
begin
int fptr;
// sanitize string
for(fptr=0; fptr<$size(file);fptr++) begin
if(file[fptr] == " " || file[fptr] == "/" || file[fptr] == "\\") begin
file[fptr] = "_";
end
end
fptr = $fopen(file,"w");
if(summary) begin
$fdisplay(fptr, "Simulation Summary of %s", this.name);
$fdisplay(fptr, "total: %01d of %01d vectors failed (%03.3f%%) ",
this.totErrCnt,
this.totAcqCnt,
$itor(this.totErrCnt) / ($itor(this.totAcqCnt) * 100.0 + 0.000000001));
if(this.totErrCnt == 0) begin
$fdisplay(fptr, "CI: PASSED");
end else begin
$fdisplay(fptr, "CI: FAILED");
end
end else begin
$fdisplay(fptr, "test name: %s", file);
$fdisplay(fptr, "this test: %01d of %01d vectors failed (%03.3f%%) ",
this.errCnt,
this.acqCnt,
$itor(this.errCnt) / $itor(this.acqCnt) * 100.0);
$fdisplay(fptr, "total so far: %01d of %01d vectors failed (%03.3f%%) ",
this.totErrCnt,
this.totAcqCnt,
$itor(this.totErrCnt) / $itor(this.totAcqCnt) * 100.0);
end
$fclose(fptr);
end
endfunction : printToFile
endclass : progress
endpackage : tb_pkg

View file

@ -0,0 +1,404 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: program that emulates a cache readport. the program can generate
// randomized or linear read sequences, and it checks the returned responses against
// the expected responses coming directly from the emulated memory (tb_mem).
//
`include "tb.svh"
import ariane_pkg::*;
import serpent_cache_pkg::*;
import tb_pkg::*;
program tb_readport #(
parameter string PortName = "read port 0",
parameter FlushRate = 1,
parameter KillRate = 5,
parameter TlbHitRate = 95,
parameter MemWords = 1024*1024,// in 64bit words
parameter logic [63:0] CachedAddrBeg = 0,
parameter logic [63:0] CachedAddrEnd = 0,
parameter RndSeed = 1110,
parameter Verbose = 0
) (
input logic clk_i,
input logic rst_ni,
// to testbench master
ref string test_name_i,
input logic [6:0] req_rate_i, //a rate between 0 and 100 percent
input seq_t seq_type_i,
input logic tlb_rand_en_i,
input logic flush_rand_en_i,
input logic seq_run_i,
input logic [31:0] seq_num_resp_i,
input logic seq_last_i,
output logic seq_done_o,
// expresp interface
output logic [63:0] exp_paddr_o,
input logic [1:0] exp_size_i,
input logic [63:0] exp_rdata_i,
input logic [63:0] exp_paddr_i,
input logic [63:0] act_paddr_i,
// interface to DUT
output logic flush_o,
input logic flush_ack_i,
output dcache_req_i_t dut_req_port_o,
input dcache_req_o_t dut_req_port_i
);
// leave this
timeunit 1ps;
timeprecision 1ps;
logic [63:0] paddr;
logic seq_end_req, seq_end_ack, prog_end;
logic [DCACHE_TAG_WIDTH-1:0] tag_q;
logic [DCACHE_TAG_WIDTH-1:0] tag_vld_q;
///////////////////////////////////////////////////////////////////////////////
// Randomly delay the tag by at least one cycle
///////////////////////////////////////////////////////////////////////////////
// // TODO: add randomization
initial begin : p_tag_delay
logic [63:0] tmp_paddr, val;
int unsigned cnt;
logic tmp_vld;
tag_q <= '0;
tag_vld_q <= 1'b0;
`APPL_WAIT_CYC(clk_i, 10)
`APPL_WAIT_SIG(clk_i,~rst_ni)
`APPL_WAIT_CYC(clk_i,1)
tmp_vld = 0;
cnt = 0;
forever begin
`APPL_WAIT_CYC(clk_i,1)
if(cnt==0) begin
if(tmp_vld) begin
tmp_vld = 0;
tag_q <= tmp_paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
tag_vld_q <= 1'b1;
end else begin
tag_vld_q <= 1'b0;
end
`APPL_ACQ_WAIT;
if(dut_req_port_o.data_req) begin
tmp_paddr = paddr;
tmp_vld = 1;
if(tlb_rand_en_i) begin
void'(randomize(val) with {val>0; val<=100;});
if(val>=TlbHitRate) begin
void'(randomize(cnt) with {cnt>0; cnt<=50;});
end
end
end
end else begin
tag_vld_q <= 1'b0;
cnt -= 1;
`APPL_ACQ_WAIT;
end
if(dut_req_port_o.kill_req) begin
tmp_vld = 0;
cnt = 0;
end
end
end
assign dut_req_port_o.address_tag = tag_q;
assign dut_req_port_o.tag_valid = tag_vld_q;
assign dut_req_port_o.address_index = paddr[DCACHE_INDEX_WIDTH-1:0];
assign exp_paddr_o = paddr;
///////////////////////////////////////////////////////////////////////////////
// Helper tasks
///////////////////////////////////////////////////////////////////////////////
task automatic flushCache();
flush_o = 1'b1;
`APPL_WAIT_SIG(clk_i, flush_ack_i);
flush_o = 0'b0;
`APPL_WAIT_CYC(clk_i,1)
endtask : flushCache
task automatic genRandReq();
automatic logic [63:0] val;
automatic logic [1:0] size;
void'($urandom(RndSeed));
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
while(~seq_end_req) begin
// randomize request
dut_req_port_o.data_req = '0;
// generate random control events
void'(randomize(val) with {val > 0; val <= 100;});
if(val < KillRate) begin
dut_req_port_o.kill_req = 1'b1;
`APPL_WAIT_CYC(clk_i,1)
dut_req_port_o.kill_req = 1'b0;
end else begin
void'(randomize(val) with {val > 0; val <= 100;});
if(val < FlushRate && flush_rand_en_i) begin
flushCache();
end else begin
void'(randomize(val) with {val > 0; val <= 100;});
if(val < req_rate_i) begin
dut_req_port_o.data_req = 1'b1;
// generate random address
void'(randomize(val) with {val >= 0; val < (MemWords<<3);});
void'(randomize(size));
dut_req_port_o.data_size = size;
paddr = val;
// align to size
unique case(size)
2'b01: paddr[0] = 1'b0;
2'b10: paddr[1:0] = 2'b00;
2'b11: paddr[2:0] = 3'b000;
default: ;
endcase
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
end
`APPL_WAIT_CYC(clk_i,1)
end
end
end
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
endtask : genRandReq
task automatic genSeqRead();
automatic logic [63:0] val;
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
val = '0;
while(~seq_end_req) begin
dut_req_port_o.data_req = 1'b1;
dut_req_port_o.data_size = 2'b11;
paddr = val;
// generate linear read
val = (val + 8) % (MemWords<<3);
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
`APPL_WAIT_CYC(clk_i,1)
end
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
endtask : genSeqRead
task automatic genWrapSeq();
automatic logic [63:0] val;
paddr = CachedAddrBeg;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
val = '0;
while(~seq_end_req) begin
dut_req_port_o.data_req = 1'b1;
dut_req_port_o.data_size = 2'b11;
paddr = val;
// generate wrapping read of 1 cachelines
paddr = CachedAddrBeg + val;
val = (val + 8) % (1*(DCACHE_LINE_WIDTH/64)*8);
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
`APPL_WAIT_CYC(clk_i,1)
end
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
endtask : genWrapSeq
///////////////////////////////////////////////////////////////////////////////
// Sequence application
///////////////////////////////////////////////////////////////////////////////
initial begin : p_stim
paddr = '0;
dut_req_port_o.data_wdata = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_we = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.kill_req = '0;
seq_end_ack = '0;
flush_o = '0;
// print some info
$display("%s> current configuration:", PortName);
$display("%s> KillRate %d", PortName, KillRate);
$display("%s> FlushRate %d", PortName, FlushRate);
$display("%s> TlbHitRate %d", PortName, TlbHitRate);
$display("%s> RndSeed %d", PortName, RndSeed);
`APPL_WAIT_CYC(clk_i,1)
`APPL_WAIT_SIG(clk_i,~rst_ni)
$display("%s> starting application", PortName);
while(~seq_last_i) begin
`APPL_WAIT_SIG(clk_i,seq_run_i)
unique case(seq_type_i)
RANDOM_SEQ: begin
$display("%s> start random sequence with %04d responses and req_rate %03d", PortName, seq_num_resp_i, req_rate_i);
genRandReq();
end
LINEAR_SEQ: begin
$display("%s> start linear sequence with %04d responses and req_rate %03d", PortName, seq_num_resp_i, req_rate_i);
genSeqRead();
end
WRAP_SEQ: begin
$display("%s> start wrapping sequence with %04d responses and req_rate %03d", PortName, seq_num_resp_i, req_rate_i);
genWrapSeq();
end
IDLE_SEQ: begin
`APPL_WAIT_SIG(clk_i,seq_end_req)
end
BURST_SEQ: begin
$fatal(1, "Burst sequence not implemented for read port agent");
end
endcase // seq_type_i
seq_end_ack = 1'b1;
$display("%s> stop sequence", PortName);
`APPL_WAIT_CYC(clk_i,1)
seq_end_ack = 1'b0;
end
$display("%s> ending application", PortName);
end
///////////////////////////////////////////////////////////////////////////////
// Response acquisition
///////////////////////////////////////////////////////////////////////////////
initial begin : p_acq
bit ok;
progress status;
string failingTests, tmpstr1, tmpstr2;
int n;
logic [63:0] exp_rdata, exp_paddr;
logic [1:0] exp_size;
status = new(PortName);
failingTests = "";
seq_done_o = 1'b0;
seq_end_req = 1'b0;
prog_end = 1'b0;
`ACQ_WAIT_CYC(clk_i,1)
`ACQ_WAIT_SIG(clk_i,~rst_ni)
///////////////////////////////////////////////
// loop over tests
n=0;
while(~seq_last_i) begin
`ACQ_WAIT_SIG(clk_i,seq_run_i)
seq_done_o = 1'b0;
$display("%s> %s", PortName, test_name_i);
status.reset(seq_num_resp_i);
for (int k=0;k<seq_num_resp_i && seq_type_i != IDLE_SEQ;k++) begin
`ACQ_WAIT_SIG(clk_i, (dut_req_port_i.data_rvalid & ~dut_req_port_o.kill_req))
exp_rdata = 'x;
unique case(exp_size_i)
2'b00: exp_rdata[exp_paddr_i[2:0]*8 +: 8] = exp_rdata_i[exp_paddr_i[2:0]*8 +: 8];
2'b01: exp_rdata[exp_paddr_i[2:1]*16 +: 16] = exp_rdata_i[exp_paddr_i[2:1]*16 +: 16];
2'b10: exp_rdata[exp_paddr_i[2] *32 +: 32] = exp_rdata_i[exp_paddr_i[2] *32 +: 32];
2'b11: exp_rdata = exp_rdata_i;
endcase // exp_size
// note: wildcard as defined in right operand!
ok=(dut_req_port_i.data_rdata ==? exp_rdata) && (exp_paddr_i == act_paddr_i);
if(Verbose | !ok) begin
tmpstr1 = $psprintf("vector: %02d - %06d -- exp_paddr: %16X -- exp_data: %16X -- access size: %01d Byte",
n, k, exp_paddr_i, exp_rdata, 2**exp_size_i);
tmpstr2 = $psprintf("vector: %02d - %06d -- act_paddr: %16X -- act_data: %16X -- access size: %01d Byte",
n, k, act_paddr_i, dut_req_port_i.data_rdata, 2**exp_size_i);
$display("%s> %s", PortName, tmpstr1);
$display("%s> %s", PortName, tmpstr2);
end
if(!ok) begin
failingTests = $psprintf("%s%s> %s\n%s> %s\n", failingTests, PortName, tmpstr1, PortName, tmpstr2);
end
status.addRes(!ok);
status.print();
end
seq_end_req = 1'b1;
`ACQ_WAIT_SIG(clk_i, seq_end_ack)
seq_end_req = 1'b0;
`ACQ_WAIT_CYC(clk_i,1)
seq_done_o = 1'b1;
n++;
end
///////////////////////////////////////////////
status.printToFile({PortName, "_summary.rep"}, 1);
if(status.totErrCnt == 0) begin
$display("%s> ----------------------------------------------------------------------", PortName);
$display("%s> PASSED %0d VECTORS", PortName, status.totAcqCnt);
$display("%s> ----------------------------------------------------------------------\n", PortName);
end else begin
$display("%s> ----------------------------------------------------------------------\n", PortName);
$display("%s> FAILED %0d OF %0d VECTORS\n", PortName , status.totErrCnt, status.totAcqCnt);
$display("%s> failing tests:", PortName);
$display("%s", failingTests);
$display("%s> ----------------------------------------------------------------------\n", PortName);
end
prog_end = 1'b1;
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
// `ifndef VERILATOR
// `endif
//pragma translate_on
endprogram // tb_readport

View file

@ -0,0 +1,300 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: program that emulates a cache write port. the program can generate
// randomized or linear read sequences.
//
`include "tb.svh"
import ariane_pkg::*;
import serpent_cache_pkg::*;
import tb_pkg::*;
program tb_writeport #(
parameter string PortName = "write port 0",
parameter MemWords = 1024*1024,// in 64bit words
parameter logic [63:0] CachedAddrBeg = 0,
parameter logic [63:0] CachedAddrEnd = 0,
parameter RndSeed = 1110,
parameter Verbose = 0
) (
input logic clk_i,
input logic rst_ni,
// to testbench master
ref string test_name_i,
input logic [6:0] req_rate_i,
input seq_t seq_type_i,
input logic seq_run_i,
input logic [31:0] seq_num_vect_i,
input logic seq_last_i,
output logic seq_done_o,
// interface to DUT
output dcache_req_i_t dut_req_port_o,
input dcache_req_o_t dut_req_port_i
);
// leave this
timeunit 1ps;
timeprecision 1ps;
logic [63:0] paddr;
assign dut_req_port_o.address_tag = paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
assign dut_req_port_o.address_index = paddr[DCACHE_INDEX_WIDTH-1:0];
assign dut_req_port_o.data_we = dut_req_port_o.data_req;
///////////////////////////////////////////////////////////////////////////////
// Helper tasks
///////////////////////////////////////////////////////////////////////////////
task automatic applyRandData();
automatic logic [63:0] val;
automatic logic [7:0] be;
automatic logic [1:0] size;
void'(randomize(size));
// align to size, set correct byte enables
be = '0;
unique case(size)
2'b00: be[paddr[2:0] +: 1] = '1;
2'b01: be[paddr[2:1]<<1 +: 2] = '1;
2'b10: be[paddr[2:2]<<2 +: 4] = '1;
2'b11: be = '1;
default: ;
endcase
paddr[2:0] = '0;
void'(randomize(val));
for(int k=0; k<8; k++) begin
if( be[k] ) begin
dut_req_port_o.data_wdata[k*8 +: 8] = val[k*8 +: 8];
end
end
dut_req_port_o.data_be = be;
dut_req_port_o.data_size = size;
endtask : applyRandData
task automatic genRandReq();
automatic logic [63:0] val;
void'($urandom(RndSeed));
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
repeat(seq_num_vect_i) begin
// randomize request
dut_req_port_o.data_req = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
void'(randomize(val) with {val > 0; val <= 100;});
if(val < req_rate_i) begin
dut_req_port_o.data_req = 1'b1;
// generate random address
void'(randomize(paddr) with {paddr >= 0; paddr < (MemWords<<3);});
applyRandData();
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
end
`APPL_WAIT_CYC(clk_i,1)
end
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
endtask : genRandReq
task automatic genSeqWrite();
automatic logic [63:0] val;
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
val = '0;
repeat(seq_num_vect_i) begin
dut_req_port_o.data_req = 1'b1;
dut_req_port_o.data_size = 2'b11;
dut_req_port_o.data_be = '1;
dut_req_port_o.data_wdata = val;
paddr = val;
// generate linear read
val = (val + 8) % (MemWords<<3);
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
`APPL_WAIT_CYC(clk_i,1)
end
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
endtask : genSeqWrite
task automatic genWrapSeq();
automatic logic [63:0] val;
void'($urandom(RndSeed));
paddr = CachedAddrBeg;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
val = '0;
repeat(seq_num_vect_i) begin
dut_req_port_o.data_req = 1'b1;
applyRandData();
// generate wrapping read of 1 cacheline
paddr = CachedAddrBeg + val;
val = (val + 8) % (1*(DCACHE_LINE_WIDTH/64)*8);
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
`APPL_WAIT_CYC(clk_i,1)
end
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
endtask : genWrapSeq
task automatic genSeqBurst();
automatic logic [63:0] val;
automatic logic [7:0] be;
automatic logic [1:0] size;
automatic int cnt, burst_len;
void'($urandom(RndSeed));
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
cnt = 0;
while(cnt < seq_num_vect_i) begin
// randomize request
dut_req_port_o.data_req = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
void'(randomize(val) with {val > 0; val <= 100;});
if(val < req_rate_i) begin
dut_req_port_o.data_req = 1'b1;
// generate random address base
void'(randomize(paddr) with {paddr >= 0; paddr < (MemWords<<3);});
// do a random burst
void'(randomize(burst_len) with {burst_len >= 0; burst_len < 100;});
for(int k=0; k<burst_len && cnt < seq_num_vect_i && paddr < ((MemWords-1)<<3); k++) begin
applyRandData();
`APPL_WAIT_COMB_SIG(clk_i, dut_req_port_i.data_gnt)
`APPL_WAIT_CYC(clk_i,1)
//void'(randomize(val) with {val>=0 val<=8;};);
paddr += 8;
cnt ++;
end
end
`APPL_WAIT_CYC(clk_i,1)
end
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = 'x;
endtask : genSeqBurst
///////////////////////////////////////////////////////////////////////////////
// Sequence application
///////////////////////////////////////////////////////////////////////////////
initial begin : p_stim
paddr = '0;
dut_req_port_o.data_req = '0;
dut_req_port_o.data_size = '0;
dut_req_port_o.data_be = '0;
dut_req_port_o.data_wdata = '0;
dut_req_port_o.tag_valid = '0;
dut_req_port_o.kill_req = '0;
seq_done_o = 1'b0;
// print some info
$display("%s> current configuration:", PortName);
$display("%s> RndSeed %d", PortName, RndSeed);
`APPL_WAIT_CYC(clk_i,1)
`APPL_WAIT_SIG(clk_i,~rst_ni)
$display("%s> starting application", PortName);
while(~seq_last_i) begin
`APPL_WAIT_SIG(clk_i,seq_run_i)
seq_done_o = 1'b0;
unique case(seq_type_i)
RANDOM_SEQ: begin
$display("%s> start random sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i);
genRandReq();
end
LINEAR_SEQ: begin
$display("%s> start linear sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i);
genSeqWrite();
end
WRAP_SEQ: begin
$display("%s> start wrapping sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i);
genWrapSeq();
end
IDLE_SEQ: ;// do nothing
BURST_SEQ: begin
$display("%s> start burst sequence with %04d vectors and req_rate %03d", PortName, seq_num_vect_i, req_rate_i);
genSeqBurst();
end
endcase // seq_type_i
seq_done_o = 1'b1;
$display("%s> stop sequence", PortName);
`APPL_WAIT_CYC(clk_i,1)
end
$display("%s> ending application", PortName);
end
///////////////////////////////////////////////////////
// assertions
///////////////////////////////////////////////////////
//pragma translate_off
// `ifndef verilator
// exp_resp_vld: assert property (
// @(posedge clk_i) disable iff (~rst_ni) dut_req_port_i.data_rvalid |-> exp_rdata_queue.size()>0 && exp_size_queue.size()>0 && exp_paddr_queue.size()>0)
// else $fatal(1, "expected response must be in the queue when DUT response returns");
// `endif
//pragma translate_on
endprogram // tb_readport

View file

@ -0,0 +1,21 @@
../../include/riscv_pkg.sv
../../src/debug/dm_pkg.sv
../../include/ariane_pkg.sv
../../include/serpent_cache_pkg.sv
../../src/fpga-support/rtl/SyncSpRamBeNx64.sv
../../src/cache_subsystem/serpent_dcache_ctrl.sv
../../src/cache_subsystem/serpent_dcache_mem.sv
../../src/cache_subsystem/serpent_dcache_missunit.sv
../../src/cache_subsystem/serpent_dcache_wbuffer.sv
../../src/cache_subsystem/serpent_dcache.sv
../../src/common_cells/src/lfsr_8bit.sv
../../src/common_cells/src/fifo_v2.sv
../../src/common_cells/src/fifo_v3.sv
../../src/common_cells/src/lzc.sv
../../src/common_cells/src/rrarbiter.sv
../../src/util/sram.sv
hdl/tb_pkg.sv
hdl/tb_mem.sv
hdl/tb_readport.sv
hdl/tb_writeport.sv
hdl/tb.sv

View file

@ -0,0 +1,474 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate /tb/KILL_RATE
add wave -noupdate /tb/MEM_BYTES
add wave -noupdate /tb/MEM_RAND_HIT_RATE
add wave -noupdate /tb/MEM_RAND_INV_RATE
add wave -noupdate /tb/MEM_WORDS
add wave -noupdate /tb/NC_ADDR_BEGIN
add wave -noupdate /tb/amo_ack_o
add wave -noupdate /tb/amo_rand_en
add wave -noupdate /tb/amo_req_i
add wave -noupdate /tb/clk_i
add wave -noupdate /tb/enable_i
add wave -noupdate /tb/end_of_sim
add wave -noupdate /tb/flush_ack_o
add wave -noupdate /tb/flush_i
add wave -noupdate /tb/inv_rand_en
add wave -noupdate /tb/mem_array
add wave -noupdate /tb/mem_data_ack_i
add wave -noupdate /tb/mem_data_o
add wave -noupdate /tb/mem_data_req_o
add wave -noupdate /tb/mem_rand_en
add wave -noupdate -expand /tb/mem_rtrn_i
add wave -noupdate /tb/mem_rtrn_vld_i
add wave -noupdate /tb/miss_o
add wave -noupdate /tb/req_ports_i
add wave -noupdate /tb/req_ports_o
add wave -noupdate /tb/rst_ni
add wave -noupdate /tb/seq_done
add wave -noupdate /tb/seq_last
add wave -noupdate /tb/seq_num_resp
add wave -noupdate /tb/seq_run
add wave -noupdate /tb/seq_type
add wave -noupdate /tb/test_name
add wave -noupdate /tb/wbuffer_empty_o
add wave -noupdate -divider Programs
add wave -noupdate -group Writeport /tb/i_tb_writeport/clk_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/rst_ni
add wave -noupdate -group Writeport /tb/i_tb_writeport/req_rate_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/seq_type_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/seq_run_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/seq_num_vect_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/seq_last_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/dut_req_port_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/MEM_WORDS
add wave -noupdate -group Writeport /tb/i_tb_writeport/RND_SEED
add wave -noupdate -group Writeport /tb/i_tb_writeport/VERBOSE
add wave -noupdate -group Writeport /tb/i_tb_writeport/test_name_i
add wave -noupdate -group Writeport /tb/i_tb_writeport/paddr
add wave -noupdate -group Writeport /tb/i_tb_writeport/seq_done_o
add wave -noupdate -group Writeport /tb/i_tb_writeport/dut_req_port_o
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/clk_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/rst_ni
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_type_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_run_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_num_resp_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_last_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_done_o
add wave -noupdate -group {Readport 0} -expand /tb/i_tb_readport0/dut_req_port_o
add wave -noupdate -group {Readport 0} -expand /tb/i_tb_readport0/dut_req_port_i
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/paddr
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_end_req
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/seq_end_ack
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/tag_q
add wave -noupdate -group {Readport 0} /tb/i_tb_readport0/tag_vld_q
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/clk_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/rst_ni
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_type_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_run_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_num_resp_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_last_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_done_o
add wave -noupdate -group {Readport 1} -expand /tb/i_tb_readport1/dut_req_port_o
add wave -noupdate -group {Readport 1} -expand /tb/i_tb_readport1/dut_req_port_i
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/paddr
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_end_req
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/seq_end_ack
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/tag_q
add wave -noupdate -group {Readport 1} /tb/i_tb_readport1/tag_vld_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/clk_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/rst_ni
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_rand_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/inv_rand_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/amo_rand_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_data_req_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_data_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/seq_last_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/check_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/commit_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/commit_be_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/commit_paddr_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/write_en_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/write_be_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/write_data_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/write_paddr_i
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/MEM_RAND_HIT_RATE
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/MEM_RAND_INV_RATE
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/MEM_WORDS
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/NC_ADDR_BEGIN
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/NC_ADDR_GE_LT
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_ready_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_inv_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/rand_addr_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/outfifo_data
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/outfifo_pop
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/outfifo_push
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/outfifo_full
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/outfifo_empty
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/infifo_data
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/infifo_pop
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/infifo_push
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/infifo_full
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/infifo_empty
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/initialized_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/write_en
add wave -noupdate -group i_tb_mem -color Magenta /tb/i_tb_mem/mem_array_q
add wave -noupdate -group i_tb_mem -color Magenta /tb/i_tb_mem/mem_array_shadow_q
add wave -noupdate -group i_tb_mem -color Magenta /tb/i_tb_mem/mem_array_dirty_q
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_rtrn_vld_o
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_rtrn_o
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_data_ack_o
add wave -noupdate -group i_tb_mem /tb/i_tb_mem/mem_array_o
add wave -noupdate -divider Modules
add wave -noupdate -group i_dut /tb/i_dut/clk_i
add wave -noupdate -group i_dut /tb/i_dut/rst_ni
add wave -noupdate -group i_dut /tb/i_dut/enable_i
add wave -noupdate -group i_dut /tb/i_dut/flush_i
add wave -noupdate -group i_dut /tb/i_dut/amo_req_i
add wave -noupdate -group i_dut /tb/i_dut/req_ports_i
add wave -noupdate -group i_dut /tb/i_dut/mem_rtrn_vld_i
add wave -noupdate -group i_dut /tb/i_dut/mem_rtrn_i
add wave -noupdate -group i_dut /tb/i_dut/mem_data_ack_i
add wave -noupdate -group i_dut /tb/i_dut/NC_ADDR_BEGIN
add wave -noupdate -group i_dut /tb/i_dut/NC_ADDR_GE_LT
add wave -noupdate -group i_dut /tb/i_dut/NUM_PORTS
add wave -noupdate -group i_dut /tb/i_dut/cache_en
add wave -noupdate -group i_dut /tb/i_dut/flush_en
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_vld
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_tag
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_idx
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_off
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_data
add wave -noupdate -group i_dut /tb/i_dut/wr_cl_data_be
add wave -noupdate -group i_dut /tb/i_dut/wr_vld_bits
add wave -noupdate -group i_dut /tb/i_dut/wr_req
add wave -noupdate -group i_dut /tb/i_dut/wr_ack
add wave -noupdate -group i_dut /tb/i_dut/wr_idx
add wave -noupdate -group i_dut /tb/i_dut/wr_off
add wave -noupdate -group i_dut /tb/i_dut/wr_data
add wave -noupdate -group i_dut /tb/i_dut/wr_data_be
add wave -noupdate -group i_dut /tb/i_dut/miss_req
add wave -noupdate -group i_dut /tb/i_dut/miss_ack
add wave -noupdate -group i_dut /tb/i_dut/miss_nc
add wave -noupdate -group i_dut /tb/i_dut/miss_we
add wave -noupdate -group i_dut /tb/i_dut/miss_wdata
add wave -noupdate -group i_dut /tb/i_dut/miss_paddr
add wave -noupdate -group i_dut /tb/i_dut/miss_vld_bits
add wave -noupdate -group i_dut /tb/i_dut/miss_size
add wave -noupdate -group i_dut /tb/i_dut/miss_wr_id
add wave -noupdate -group i_dut /tb/i_dut/miss_rtrn_vld
add wave -noupdate -group i_dut /tb/i_dut/miss_rtrn_id
add wave -noupdate -group i_dut /tb/i_dut/rd_req
add wave -noupdate -group i_dut /tb/i_dut/rd_ack
add wave -noupdate -group i_dut /tb/i_dut/rd_tag
add wave -noupdate -group i_dut /tb/i_dut/rd_idx
add wave -noupdate -group i_dut /tb/i_dut/rd_off
add wave -noupdate -group i_dut /tb/i_dut/rd_data
add wave -noupdate -group i_dut /tb/i_dut/rd_vld_bits
add wave -noupdate -group i_dut /tb/i_dut/rd_hit_oh
add wave -noupdate -group i_dut /tb/i_dut/wbuffer_data
add wave -noupdate -group i_dut /tb/i_dut/flush_ack_o
add wave -noupdate -group i_dut /tb/i_dut/miss_o
add wave -noupdate -group i_dut /tb/i_dut/wbuffer_empty_o
add wave -noupdate -group i_dut /tb/i_dut/amo_ack_o
add wave -noupdate -group i_dut /tb/i_dut/req_ports_o
add wave -noupdate -group i_dut /tb/i_dut/mem_data_req_o
add wave -noupdate -group i_dut /tb/i_dut/mem_data_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/clk_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rst_ni
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/cache_en_i
add wave -noupdate -group i_wbuffer -color Magenta /tb/i_dut/i_serpent_dcache_wbuffer/req_port_i
add wave -noupdate -group i_wbuffer -color Magenta /tb/i_dut/i_serpent_dcache_wbuffer/req_port_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_ack_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_rtrn_vld_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_rtrn_id_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_ack_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_data_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_vld_bits_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_hit_oh_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_ack_i
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/empty_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_paddr_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_req_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_we_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_wdata_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_vld_bits_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_nc_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_size_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/miss_wr_id_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_tag_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_idx_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_off_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_req_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_req_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_idx_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_off_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_data_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_data_be_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wbuffer_data_o
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/NC_ADDR_BEGIN
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/NC_ADDR_GE_LT
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_stat_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_stat_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wbuffer_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wbuffer_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/valid
add wave -noupdate -group i_wbuffer -color Magenta /tb/i_dut/i_serpent_dcache_wbuffer/debug_paddr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/dirty
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tocheck
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wbuffer_hit_oh
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/inval_hit
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/bdirty
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/next_ptr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/dirty_ptr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/hit_ptr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_ptr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/check_ptr_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/check_ptr_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rtrn_ptr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_cnt_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_cnt_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_id_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_id_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rtrn_id
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/bdirty_off
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/tx_be
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/wr_paddr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rd_paddr
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/check_en_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/check_en_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/full
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/dirty_rd_en
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rdy
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/rtrn_empty
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/evict
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/nc_pending_d
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/nc_pending_q
add wave -noupdate -group i_wbuffer /tb/i_dut/i_serpent_dcache_wbuffer/addr_is_nc
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/clk_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/rst_ni
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/enable_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wbuffer_empty_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/amo_req_i
add wave -noupdate -group i_missunit -expand /tb/i_dut/i_serpent_dcache_missunit/miss_req_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_nc_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_we_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_wdata_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_paddr_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_vld_bits_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_size_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_wr_id_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mem_rtrn_vld_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mem_rtrn_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mem_data_ack_i
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_ack_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/cache_en_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_en_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/amo_ack_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_ack_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_replay_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_rtrn_vld_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_rtrn_id_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_vld_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_nc_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_we_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_tag_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_idx_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_off_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_data_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_cl_data_be_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/wr_vld_bits_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mem_data_req_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mem_data_o
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/NUM_PORTS
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/state_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/state_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mshr_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mshr_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/repl_way
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/inv_way
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/rnd_way
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mshr_vld_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mshr_vld_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mshr_allocate
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/update_lfsr
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/all_ways_valid
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/enable_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/enable_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_ack_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_ack_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/amo_sel
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/flush_done
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/mask_reads
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_is_write
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/amo_data
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_port_idx
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/cnt_d
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/cnt_q
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/miss_req
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/inv_vld
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/cl_write_en
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/load_ack
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/store_ack
add wave -noupdate -group i_missunit /tb/i_dut/i_serpent_dcache_missunit/amo_ack
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/clk_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rst_ni
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_tag_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_idx_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_off_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_req_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_vld_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_tag_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_idx_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_off_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_data_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_cl_data_be_i
add wave -noupdate -expand -group i_mem -expand /tb/i_dut/i_serpent_dcache_mem/wr_vld_bits_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_req_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_idx_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_off_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_data_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_data_be_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_data_i
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_ack_o
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_vld_bits_o
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_hit_oh_o
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rd_data_o
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wr_ack_o
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/NUM_PORTS
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_req
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_we
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_be
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_idx
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_idx_d
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_idx_q
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_off_d
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_off_q
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_wdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/bank_rdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rdata_cl
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_req
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_we
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_wdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/tag_rdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_addr
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_sel_d
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_sel_q
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_hit_oh
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_be
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_rdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/rdata
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_cmp_addr
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_bvalid
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/wbuffer_data
add wave -noupdate -expand -group i_mem /tb/i_dut/i_serpent_dcache_mem/vld_tag_rdata
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/clk_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rst_ni}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/flush_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/cache_en_i}
add wave -noupdate -group i_ctrl0 -expand {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/req_port_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_ack_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_replay_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_rtrn_vld_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_ack_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_data_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_vld_bits_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_hit_oh_i}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/req_port_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_req_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_we_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_wdata_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_vld_bits_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_paddr_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_nc_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_size_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/miss_wr_id_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_tag_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_idx_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_off_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_req_o}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/NC_ADDR_BEGIN}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/NC_ADDR_GE_LT}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/state_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/state_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_tag_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_tag_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_idx_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_idx_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_off_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/address_off_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/vld_data_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/vld_data_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/save_tag}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_req_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/rd_req_q}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/data_size_d}
add wave -noupdate -group i_ctrl0 {/tb/i_dut/genblk1[0]/i_serpent_dcache_ctrl/data_size_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/clk_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rst_ni}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/flush_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/cache_en_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/req_port_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_ack_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_replay_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_rtrn_vld_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_ack_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_data_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_vld_bits_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_hit_oh_i}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/req_port_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_req_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_we_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_wdata_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_vld_bits_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_paddr_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_nc_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_size_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/miss_wr_id_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_tag_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_idx_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_off_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_req_o}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/NC_ADDR_BEGIN}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/NC_ADDR_GE_LT}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/state_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/state_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_tag_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_tag_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_idx_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_idx_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_off_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/address_off_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/vld_data_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/vld_data_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/save_tag}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_req_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/rd_req_q}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/data_size_d}
add wave -noupdate -group i_ctrl1 {/tb/i_dut/genblk1[1]/i_serpent_dcache_ctrl/data_size_q}
TreeUpdate [SetDefaultTree]
quietly WaveActivateNextPane
add wave -noupdate {/tb/i_tb_mem/mem_array_q[6741]}
add wave -noupdate {/tb/i_tb_mem/mem_array_shadow_q[6741]}
add wave -noupdate {/tb/i_tb_mem/mem_array_dirty_q[6741]}
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {31432807547 ps} 0} {{Cursor 2} {29040000 ps} 0} {{Cursor 3} {1027790000 ps} 0}
quietly wave cursor active 2
configure wave -namecolwidth 375
configure wave -valuecolwidth 224
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {0 ps} {103267500 ps}

3
tb/tb_serpent_icache/.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
work
modelsim.ini
*.rep

25
tb/tb_serpent_icache/Makefile Executable file
View file

@ -0,0 +1,25 @@
library ?= work
toplevel ?= tb
src-list := tb.list
src := $(shell xargs printf '\n%s' < $(src-list) | cut -b 1-)
compile_flag += +cover+/dut -incr -64 -nologo
sim_opts += -64 -coverage -classdebug -voptargs="+acc"
questa_version ?= ${QUESTASIM_VERSION}
build: clean
vlib${questa_version} $(library)
vlog${questa_version} -work $(library) -pedanticerrors $(src) $(compile_flag)
touch $(library)/.build
sim: build
vsim${questa_version} -lib $(library) $(toplevel) -do "do wave.do" $(sim_opts)
simc: build
vsim${questa_version} -lib $(library) $(toplevel) -c -do "run -all; exit" $(sim_opts)
clean:
rm -rf $(library)
.PHONY: clean simc sim build

View file

@ -0,0 +1,267 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: simple emulation layer for the memory subsystem.
//
import ariane_pkg::*;
import serpent_cache_pkg::*;
module mem_emul #(
parameter MemRandHitRate = 10, //in percent
parameter MemRandInvRate = 5, //in percent
parameter MemWords = 1024*1024,// in 32bit words
parameter logic [63:0] CachedAddrBeg = MemWords/2
) (
input logic clk_i,
input logic rst_ni,
// some tb signals to enable randomization, etc
input logic mem_rand_en_i,
input logic io_rand_en_i,
input logic inv_rand_en_i,
input logic [63:0] tlb_offset_i,
// stimuli interface to get expected responses
input logic [63:0] stim_vaddr_i,
input logic stim_push_i,
input logic stim_flush_i,
output logic stim_full_o,
output logic [63:0] exp_data_o,
output logic [63:0] exp_vaddr_o,
output logic exp_empty_o,
input logic exp_pop_i,
// icache interface
output logic mem_rtrn_vld_o,
output icache_rtrn_t mem_rtrn_o,
input logic mem_data_req_i,
output logic mem_data_ack_o,
input icache_req_t mem_data_i
);
logic mem_ready_q, mem_inv_q;
logic [63:0] rand_addr_q;
icache_req_t outfifo_data;
logic outfifo_pop, outfifo_push, outfifo_full, outfifo_empty;
icache_rtrn_t infifo_data;
logic infifo_pop, infifo_push, infifo_full, infifo_empty;
logic [63:0] stim_addr;
logic exp_empty;
logic [31:0] mem_array [MemWords-1:0];
logic [31:0] mem_array_shadow [MemWords-1:0];
logic initialized_q;
logic [31:0] inval_addr_queue[$];
logic [31:0] inval_data_queue[$];
// sequential process holding the state of the memory readout process
always_ff @(posedge clk_i or negedge rst_ni) begin : p_tlb_rand
automatic bit ok = 0;
automatic int rnd = 0;
automatic logic [31:0] val;
automatic logic [63:0] lval;
if(~rst_ni) begin
mem_ready_q <= '0;
mem_inv_q <= '0;
rand_addr_q <= '0;
initialized_q <= '0;
end else begin
// fill the memory once with random data
if (~initialized_q) begin
for (int k=0; k<MemWords; k++) begin
ok=randomize(val);
mem_array[k] <= val;
mem_array_shadow[k] <= val;
end
initialized_q <= 1;
end
// re-randomize noncacheable I/O space if requested
if (io_rand_en_i) begin
for (int k=0; k<CachedAddrBeg; k++) begin
ok = randomize(val);
mem_array[k] <= val;
end
end
// generate random contentions
if (mem_rand_en_i) begin
ok = randomize(rnd) with {rnd > 0; rnd <= 100;};
if(rnd < MemRandHitRate) begin
mem_ready_q <= '1;
end else
mem_ready_q <= '0;
end else begin
mem_ready_q <= '1;
end
// generate random invalidations
if (inv_rand_en_i) begin
if (infifo_push) begin
// update coherent memory view for expected responses
while(inval_addr_queue.size()>0)begin
lval = inval_addr_queue.pop_back();
val = inval_data_queue.pop_back();
mem_array_shadow[lval] <= val;
end
end
ok = randomize(rnd) with {rnd > 0; rnd <= 100;};
if(rnd < MemRandInvRate) begin
mem_inv_q = '1;
ok = randomize(lval) with {lval>=0; lval<MemWords;};
ok = randomize(val);
// save for coherent view above
inval_addr_queue.push_front(lval);
inval_data_queue.push_front(val);
// overwrite the memory with new random data value
rand_addr_q <= lval<<2;
mem_array[lval] <= val;
end else begin
mem_inv_q <= '0;
end
end
end
end
// readout process
always_comb begin : proc_mem
infifo_push = 0;
infifo_data = '0;
outfifo_pop = 0;
infifo_data.rtype = ICACHE_IFILL_ACK;
// generate random invalidation
if (mem_inv_q) begin
infifo_data.rtype = ICACHE_INV_REQ;
// since we do not keep a mirror tag table here,
// we allways invalidate all ways of the aliased index.
// this is not entirely correct and will produce
// too many invalidations
infifo_data.inv.idx = rand_addr_q[ICACHE_INDEX_WIDTH-1:0];
infifo_data.inv.all = '1;
infifo_push = 1'b1;
end else if ((~outfifo_empty) && (~infifo_full) && mem_ready_q) begin
outfifo_pop = 1'b1;
infifo_push = 1'b1;
// address goes to I/O space
if (outfifo_data.nc) begin
infifo_data.nc = 1'b1;
infifo_data.f4b = 1'b1;
// replicate words (this is done in openpiton, too)
// note: openpiton replicates the words here.
for (int k=0; k<ICACHE_LINE_WIDTH/32; k++) begin
infifo_data.data[k*32 +:32] = mem_array[outfifo_data.paddr>>2];
end
infifo_data.data[0 +:32] = mem_array[outfifo_data.paddr>>2];
end else begin
infifo_data.nc = outfifo_data.nc;
// replicate words (this is done in openpiton, too)
for (int k=0; k<ICACHE_LINE_WIDTH/32; k++) begin
infifo_data.data[k*32 +:32] = mem_array[(outfifo_data.paddr>>2) + k];
end
end
end
end
fifo_v2 #(
.dtype(icache_req_t),
.DEPTH(2)
) i_outfifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( outfifo_full ),
.empty_o ( outfifo_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( mem_data_i ),
.push_i ( outfifo_push ),
.data_o ( outfifo_data ),
.pop_i ( outfifo_pop )
);
assign outfifo_push = mem_data_req_i & (~outfifo_full);
assign mem_data_ack_o = outfifo_push;
fifo_v2 #(
.dtype(icache_rtrn_t),
.DEPTH(2)
) i_infifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( 1'b0 ),
.testmode_i ( 1'b0 ),
.full_o ( infifo_full ),
.empty_o ( infifo_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( infifo_data ),
.push_i ( infifo_push ),
.data_o ( mem_rtrn_o ),
.pop_i ( infifo_pop )
);
assign infifo_pop = ~infifo_empty;
assign mem_rtrn_vld_o = infifo_pop;
// this is to readout the expected responses
fifo_v2 #(
.DATA_WIDTH(64),
.DEPTH(3)
) i_stimuli_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( stim_flush_i ),
.testmode_i ( 1'b0 ),
.full_o ( stim_full_o ),
.empty_o ( exp_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( stim_vaddr_i ),
.push_i ( stim_push_i ),
.data_o ( stim_addr ),
.pop_i ( exp_pop_i )
);
assign exp_empty_o = exp_empty | stim_flush_i;
// use last seen memory state in case random invalidations are present
assign exp_data_o = (inv_rand_en_i) ? mem_array_shadow[(stim_addr>>2) + (tlb_offset_i>>2)] :
mem_array [(stim_addr>>2) + (tlb_offset_i>>2)];
assign exp_vaddr_o = stim_addr;
align0: assert property (
@(posedge clk_i) disable iff (~rst_ni) ~exp_empty |-> stim_addr[1:0] == 0)
else $fatal(1,"stim_addr is not 32bit word aligned");
align1: assert property (
@(posedge clk_i) disable iff (~rst_ni) ~outfifo_empty |-> outfifo_data.paddr[1:0] == 0)
else $fatal(1,"paddr is not 32bit word aligned");
endmodule // mem_emul

View file

@ -0,0 +1,440 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: testbench for piton_icache. includes the following tests:
//
// 0) random accesses with disabled cache
// 1) random accesses with enabled cache to cacheable and noncacheable memory
// 2) linear, wrapping sweep with enabled cache
// 3) 1) with random stalls on the memory side and TLB side
// 4) nr 3) with random invalidations
//
// note that we use a simplified address translation scheme to emulate the TLB.
// (random offsets).
import ariane_pkg::*;
import serpent_cache_pkg::*;
import tb_pkg::*;
module tb;
// leave this
timeunit 1ps;
timeprecision 1ps;
// number of 32bit words
parameter MemBytes = 2**ICACHE_INDEX_WIDTH * 4 * 32;
parameter MemWords = MemBytes>>2;
parameter logic [63:0] CachedAddrBeg = MemBytes/4;
parameter logic [63:0] CachedAddrEnd = 64'hFFFF_FFFF_FFFF_FFFF;
// rates are in percent
parameter TlbRandHitRate = 50;
parameter MemRandHitRate = 50;
parameter MemRandInvRate = 10;
parameter SeqRate = 90;
parameter S1KillRate = 5;
parameter S2KillRate = 5;
parameter FlushRate = 1;
parameter logic [63:0] TlbOffset = 4*1024;//use multiples of 4kB pages!
///////////////////////////////////////////////////////////////////////////////
// MUT signal declarations
///////////////////////////////////////////////////////////////////////////////
logic clk_i;
logic rst_ni;
logic flush_i;
logic en_i;
logic miss_o;
icache_areq_i_t areq_i;
icache_areq_o_t areq_o;
icache_dreq_i_t dreq_i;
icache_dreq_o_t dreq_o;
logic mem_rtrn_vld_i;
icache_rtrn_t mem_rtrn_i;
logic mem_data_req_o;
logic mem_data_ack_i;
icache_req_t mem_data_o;
///////////////////////////////////////////////////////////////////////////////
// TB signal declarations
///////////////////////////////////////////////////////////////////////////////
logic stim_start, stim_end, end_of_sim, acq_done;
logic [63:0] num_vectors;
string test_name;
logic mem_rand_en;
logic inv_rand_en;
logic io_rand_en;
logic tlb_rand_en;
logic exception_en;
logic [63:0] stim_vaddr;
logic [63:0] exp_data;
logic [63:0] exp_vaddr;
logic stim_push, stim_flush, stim_full;
logic exp_empty, exp_pop;
logic dut_out_vld, dut_in_rdy;
///////////////////////////////////////////////////////////////////////////////
// Clock Process
///////////////////////////////////////////////////////////////////////////////
always @*
begin
do begin
clk_i = 1;#(CLK_HI);
clk_i = 0;#(CLK_LO);
end while (end_of_sim == 1'b0);
repeat (100) begin
// generate a few extra cycle to allow response acquisition to complete
clk_i = 1;#(CLK_HI);
clk_i = 0;#(CLK_LO);
end
end
///////////////////////////////////////////////////////////////////////////////
// Helper tasks
///////////////////////////////////////////////////////////////////////////////
// prepare tasks...
task automatic genRandReq();
automatic bit ok;
automatic logic [63:0] val;
dreq_i.req = 0;
dreq_i.kill_s1 = 0;
dreq_i.kill_s2 = 0;
num_vectors = 100000;
stim_end = 0;
stim_start = 1;
applWaitCyc(clk_i,10);
stim_start = 0;
// start with clean cache
flush_i = 1;
applWaitCyc(clk_i,1);
flush_i = 0;
while(~acq_done) begin
// randomize request
dreq_i.req = 0;
ok = randomize(val) with {val > 0; val <= 100;};
if (val < SeqRate) begin
dreq_i.req = 1;
// generate random address
ok = randomize(val) with {val >= 0; val < (MemBytes-TlbOffset)>>2;};
dreq_i.vaddr = val<<2;// align to 4Byte
// generate random control events
ok = randomize(val) with {val > 0; val <= 100;};
dreq_i.kill_s1 = (val < S1KillRate);
ok = randomize(val) with {val > 0; val <= 100;};
dreq_i.kill_s2 = (val < S2KillRate);
ok = randomize(val) with {val > 0; val <= 100;};
flush_i = (val < FlushRate);
applWait(clk_i, dut_in_rdy);
end else begin
applWaitCyc(clk_i,1);
end
end
stim_end = 1;
endtask : genRandReq
task automatic genSeqRead();
automatic bit ok;
automatic logic [63:0] val;
automatic logic [63:0] addr;
dreq_i.req = 0;
dreq_i.kill_s1 = 0;
dreq_i.kill_s2 = 0;
num_vectors = 32*4*1024;
addr = 0;
stim_end = 0;
stim_start = 1;
applWaitCyc(clk_i,10);
stim_start = 0;
// start with clean cache
flush_i = 1;
applWaitCyc(clk_i,1);
flush_i = 0;
while(~acq_done) begin
dreq_i.req = 1;
dreq_i.vaddr = addr;
// generate linear read
addr = (addr + 4) % (MemBytes - TlbOffset);
applWait(clk_i, dut_in_rdy);
end
stim_end = 1;
endtask : genSeqRead
///////////////////////////////////////////////////////////////////////////////
// TLB and memory emulation
///////////////////////////////////////////////////////////////////////////////
tlb_emul #(
.TlbRandHitRate(TlbRandHitRate)
) i_tlb_emul (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.tlb_rand_en_i ( tlb_rand_en ),
.exception_en_i ( exception_en ),
.tlb_offset_i ( TlbOffset ),
// icache interface
.req_i ( areq_o ),
.req_o ( areq_i )
);
mem_emul #(
.MemRandHitRate ( MemRandHitRate ),
.MemRandInvRate ( MemRandInvRate ),
.MemWords ( MemWords ),
.CachedAddrBeg ( CachedAddrBeg )
) i_mem_emul (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.mem_rand_en_i ( mem_rand_en ),
.io_rand_en_i ( io_rand_en ),
.inv_rand_en_i ( inv_rand_en ),
.tlb_offset_i ( TlbOffset ),
.stim_vaddr_i ( stim_vaddr ),
.stim_push_i ( stim_push ),
.stim_flush_i ( stim_flush ),
.stim_full_o ( stim_full ),
.exp_data_o ( exp_data ),
.exp_vaddr_o ( exp_vaddr ),
.exp_empty_o ( exp_empty ),
.exp_pop_i ( exp_pop ),
.mem_data_req_i ( mem_data_req_o ),
.mem_data_ack_o ( mem_data_ack_i ),
.mem_data_i ( mem_data_o ),
.mem_rtrn_vld_o ( mem_rtrn_vld_i ),
.mem_rtrn_o ( mem_rtrn_i )
);
///////////////////////////////////////////////////////////////////////////////
// MUT
///////////////////////////////////////////////////////////////////////////////
serpent_icache #(
.CachedAddrBeg(CachedAddrBeg),
.CachedAddrEnd(CachedAddrEnd)
) dut (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.en_i ( en_i ),
.miss_o ( miss_o ),
.areq_i ( areq_i ),
.areq_o ( areq_o ),
.dreq_i ( dreq_i ),
.dreq_o ( dreq_o ),
.mem_rtrn_vld_i ( mem_rtrn_vld_i ),
.mem_rtrn_i ( mem_rtrn_i ),
.mem_data_req_o ( mem_data_req_o ),
.mem_data_ack_i ( mem_data_ack_i ),
.mem_data_o ( mem_data_o )
);
// connect interface to expected response channel of memory emulation
assign stim_vaddr = dreq_i.vaddr;
assign stim_push = dreq_i.req & dreq_o.ready & (~dreq_i.kill_s1) & (~flush_i);
assign stim_flush = 0;
assign exp_pop = (dreq_o.valid | dreq_i.kill_s2) & (~exp_empty);
///////////////////////////////////////////////////////////////////////////////
// stimuli application process
///////////////////////////////////////////////////////////////////////////////
assign dut_in_rdy = dreq_o.ready;
initial // process runs just once
begin : p_stim
end_of_sim = 0;
num_vectors = 0;
stim_start = 0;
stim_end = 0;
rst_ni = 0;
mem_rand_en = 0;
tlb_rand_en = 0;
inv_rand_en = 0;
exception_en = 0;
io_rand_en = 0;
dreq_i.req = 0;
dreq_i.kill_s1 = 0;
dreq_i.kill_s2 = 0;
dreq_i.vaddr = 0;
flush_i = 0;
en_i = 0;
// print some info
$display("TB> current configuration:");
$display("TB> MemWords %d", MemWords);
$display("TB> CachedAddrBeg %16X", CachedAddrBeg);
$display("TB> TlbRandHitRate %d", TlbRandHitRate);
$display("TB> MemRandHitRate %d", MemRandHitRate);
$display("TB> MemRandInvRate %d", MemRandInvRate);
$display("TB> S1KillRate %d", S1KillRate);
$display("TB> S2KillRate %d", S2KillRate);
$display("TB> FlushRate %d", FlushRate);
applWaitCyc(clk_i,100);
$display("TB> choose TLB offset %16X", TlbOffset);
// reset cycles
applWaitCyc(clk_i,100);
rst_ni = 1'b1;
applWaitCyc(clk_i,100);
$display("TB> stimuli application started");
// apply each test until NUM_ACCESSES memory
// requests have successfully completed
///////////////////////////////////////////////
// TEST 0
en_i = 0;
genRandReq();
applWaitCyc(clk_i,40);
///////////////////////////////////////////////
// TEST 1
test_name = "TEST1, enabled cache";
en_i = 1;
genRandReq();
applWaitCyc(clk_i,40);
///////////////////////////////////////////////
// TEST 2
test_name = "TEST2, enabled cache, sequential reads";
en_i = 1;
genSeqRead();
applWaitCyc(clk_i,40);
///////////////////////////////////////////////
// TEST 3
test_name = "TEST3, enabled cache, random stalls in mem and TLB side";
en_i = 1;
mem_rand_en = 1;
tlb_rand_en = 1;
genRandReq();
applWaitCyc(clk_i,40);
///////////////////////////////////////////////
// TEST 4
test_name = "TEST4, +random invalidations";
en_i = 1;
mem_rand_en = 1;
tlb_rand_en = 1;
inv_rand_en = 1;
genRandReq();
applWaitCyc(clk_i,40);
///////////////////////////////////////////////
end_of_sim = 1;
$display("TB> stimuli application ended");
end
///////////////////////////////////////////////////////////////////////////////
// stimuli acquisition process
///////////////////////////////////////////////////////////////////////////////
assign dut_out_vld = dreq_o.valid;
initial // process runs just once
begin : p_acq
bit ok;
progress status;
string failingTests, tmpstr;
int n;
status = new();
failingTests = "";
acq_done = 0;
///////////////////////////////////////////////
// loop over tests
n=0;
while (~end_of_sim) begin
// wait for stimuli application
acqWait(clk_i, stim_start);
$display("TB: ----------------------------------------------------------------------\n");
$display("TB> %s", test_name);
status.reset(num_vectors);
acq_done = 0;
for (int k=0;k<num_vectors;k++) begin
// wait for response
acqWait(clk_i, dut_out_vld);
ok=(dreq_o.data == exp_data[FETCH_WIDTH-1:0]) && (dreq_o.vaddr == exp_vaddr);
if(!ok) begin
tmpstr =
$psprintf("vector: %02d - %06d -- exp_vaddr: %16X -- act_vaddr : %16X -- exp_data: %08X -- act_data: %08X",
n, k, exp_vaddr, dreq_o.vaddr, exp_data[FETCH_WIDTH-1:0], dreq_o.data);
failingTests = $psprintf("%sTB: %s\n", failingTests, tmpstr);
$display("TB> %s", tmpstr);
end
status.addRes(!ok);
status.print();
end
acq_done = 1;
n++;
// wait for stimuli application end
acqWait(clk_i, stim_end);
acqWaitCyc(clk_i,100);
end
///////////////////////////////////////////////
status.printToFile("summary.rep", 1);
if(status.totErrCnt == 0) begin
$display("TB: ----------------------------------------------------------------------\n");
$display("TB: PASSED %0d VECTORS", status.totAcqCnt);
$display("TB: ----------------------------------------------------------------------\n");
end else begin
$display("TB: ----------------------------------------------------------------------\n");
$display("TB: FAILED %0d OF %0d VECTORS\n", status.totErrCnt, status.totAcqCnt);
$display("TB: failing tests:");
$display("%s", failingTests);
$display("TB: ----------------------------------------------------------------------\n");
end
end
endmodule

View file

@ -0,0 +1,177 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: testbench package with some helper functions.
package tb_pkg;
// // for abs(double) function
// import mti_cstdlib::*;
// for timestamps
import "DPI-C" \time = function int _time (inout int tloc[4]);
import "DPI-C" function string ctime(inout int tloc[4]);
///////////////////////////////////////////////////////////////////////////////
// parameters
///////////////////////////////////////////////////////////////////////////////
// creates a 10ns ATI timing cycle
time CLK_HI = 5ns; // set clock high time
time CLK_LO = 5ns; // set clock low time
time APPL_DEL = 2ns; // set stimuli application delay
time ACQ_DEL = 8ns; // set response aquisition delay
parameter ERROR_CNT_STOP_LEVEL = 1; // use 1 for debugging. 0 runs the complete simulation...
//////////////////////////////////////////////////////////////////////////////
// use to ensure proper ATI timing
///////////////////////////////////////////////////////////////////////////////
task automatic applWaitCyc(ref logic Clk_C, input int unsigned n);
if (n > 0) begin
repeat (n) @(posedge(Clk_C));
#(APPL_DEL);
end
endtask
task automatic acqWaitCyc(ref logic Clk_C, input int unsigned n);
if (n > 0) begin
repeat (n) @(posedge(Clk_C));
#(ACQ_DEL);
end
endtask
// sample right on active clock edge
task automatic applWait(ref logic Clk_C, ref logic SigToWaitFor_S);
do begin
@(posedge(Clk_C));
end while(SigToWaitFor_S == 1'b0);
#(APPL_DEL);
endtask
// sample right on active clock edge
task automatic acqWait(ref logic Clk_C, ref logic SigToWaitFor_S);
do begin
@(posedge(Clk_C));
end while(SigToWaitFor_S == 1'b0);
//#(ACQ_DEL);
endtask
///////////////////////////////////////////////////////////////////////////////
// progress
///////////////////////////////////////////////////////////////////////////////
class progress;
real newState, oldState;
longint numResp, acqCnt, errCnt, totAcqCnt, totErrCnt;
function new();
begin
this.acqCnt = 0;
this.errCnt = 0;
this.newState = 0.0;
this.oldState = 0.0;
this.numResp = 1;
this.totAcqCnt = 0;
this.totErrCnt = 0;
end
endfunction : new
function void reset(longint numResp_);
begin
this.acqCnt = 0;
this.errCnt = 0;
this.newState = 0.0;
this.oldState = 0.0;
this.numResp = numResp_;
end
endfunction : reset
function void addRes(int isError);
begin
this.acqCnt++;
this.totAcqCnt++;
this.errCnt += isError;
this.totErrCnt += isError;
if(ERROR_CNT_STOP_LEVEL <= this.errCnt && ERROR_CNT_STOP_LEVEL > 0) begin
$error("TB> simulation stopped (ERROR_CNT_STOP_LEVEL = %d reached).", ERROR_CNT_STOP_LEVEL);
$stop();
end
end
endfunction : addRes
function void print();
begin
this.newState = $itor(this.acqCnt) / $itor(this.numResp);
if(this.newState - this.oldState >= 0.01) begin
$display("TB> validated %03d%% -- %01d failed (%03.3f%%) ",
$rtoi(this.newState*100.0),
this.errCnt,
$itor(this.errCnt) / $itor(this.acqCnt) * 100.0);
// $fflush();
this.oldState = this.newState;
end
end
endfunction : print
function void printToFile(string file, bit summary = 0);
begin
int fptr;
// sanitize string
for(fptr=0; fptr<$size(file);fptr++) begin
if(file[fptr] == " " || file[fptr] == "/" || file[fptr] == "\\") begin
file[fptr] = "_";
end
end
fptr = $fopen(file,"w");
if(summary) begin
$fdisplay(fptr, "Simulation Summary");
$fdisplay(fptr, "total: %01d of %01d vectors failed (%03.3f%%) ",
this.totErrCnt,
this.totAcqCnt,
$itor(this.totErrCnt) / $itor(this.totAcqCnt) * 100.0);
if(this.totErrCnt == 0) begin
$fdisplay(fptr, "CI: PASSED");
end else begin
$fdisplay(fptr, "CI: FAILED");
end
end else begin
$fdisplay(fptr, "test name: %s", file);
$fdisplay(fptr, "this test: %01d of %01d vectors failed (%03.3f%%) ",
this.errCnt,
this.acqCnt,
$itor(this.errCnt) / $itor(this.acqCnt) * 100.0);
$fdisplay(fptr, "total so far: %01d of %01d vectors failed (%03.3f%%) ",
this.totErrCnt,
this.totAcqCnt,
$itor(this.totErrCnt) / $itor(this.totAcqCnt) * 100.0);
end
$fclose(fptr);
end
endfunction : printToFile
endclass : progress
endpackage : tb_pkg

View file

@ -0,0 +1,75 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: simple emulation layer for the tlb.
//
import ariane_pkg::*;
import serpent_cache_pkg::*;
module tlb_emul #(
parameter TlbRandHitRate = 10 //in percent
)(
input logic clk_i,
input logic rst_ni,
input logic tlb_rand_en_i,
input logic exception_en_i,
input logic [63:0] tlb_offset_i,
// icache interface
input icache_areq_o_t req_i,
output icache_areq_i_t req_o
);
logic tlb_ready_d, tlb_ready_q;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_tlb_rand
automatic bit ok = 0;
automatic int rnd = 0;
assert(TlbRandHitRate<=100 && TlbRandHitRate>=0) else
$fatal("TlbRandHitRate must be a percentage");
if(~rst_ni) begin
tlb_ready_q <= '0;
end else begin
if (tlb_rand_en_i) begin
ok = randomize(rnd) with {rnd > 0; rnd <= 100;};
if(rnd < TlbRandHitRate) begin
tlb_ready_q = '1;
end else
tlb_ready_q = '0;
end else begin
tlb_ready_q = '1;
end
end
end
// TODO: add random exceptions
always_comb begin : proc_tlb
req_o.fetch_valid = '0;
req_o.fetch_paddr = '0;
req_o.fetch_exception = '0;
if (req_i.fetch_req && tlb_ready_q) begin
req_o.fetch_valid = 1'b1;
req_o.fetch_paddr = req_i.fetch_vaddr + tlb_offset_i;
end
end
endmodule // tlb_emul

View file

@ -0,0 +1,15 @@
../../include/riscv_pkg.sv
../../src/debug/dm_pkg.sv
../../include/ariane_pkg.sv
../../include/serpent_cache_pkg.sv
../../src/fpga-support/rtl/SyncSpRamBeNx64.sv
../../src/cache_subsystem/serpent_icache.sv
../../src/common_cells/src/lfsr_8bit.sv
../../src/common_cells/src/fifo_v2.sv
../../src/common_cells/src/fifo_v3.sv
../../src/common_cells/src/lzc.sv
../../src/util/sram.sv
hdl/mem_emul.sv
hdl/tlb_emul.sv
hdl/tb_pkg.sv
hdl/tb.sv

View file

@ -0,0 +1,160 @@
onerror {resume}
quietly WaveActivateNextPane {} 0
add wave -noupdate -group TB /tb/clk_i
add wave -noupdate -group TB /tb/rst_ni
add wave -noupdate -group TB /tb/flush_i
add wave -noupdate -group TB /tb/en_i
add wave -noupdate -group TB /tb/miss_o
add wave -noupdate -group TB /tb/areq_i
add wave -noupdate -group TB /tb/areq_o
add wave -noupdate -group TB /tb/dreq_i
add wave -noupdate -group TB /tb/dreq_o
add wave -noupdate -group TB /tb/mem_rtrn_vld_i
add wave -noupdate -group TB /tb/mem_rtrn_i
add wave -noupdate -group TB /tb/mem_data_req_o
add wave -noupdate -group TB /tb/mem_data_ack_i
add wave -noupdate -group TB /tb/mem_data_o
add wave -noupdate -group TB /tb/stim_start
add wave -noupdate -group TB /tb/stim_end
add wave -noupdate -group TB /tb/end_of_sim
add wave -noupdate -group TB /tb/acq_done
add wave -noupdate -group TB /tb/mem_rand_en
add wave -noupdate -group TB /tb/inv_rand_en
add wave -noupdate -group TB /tb/io_rand_en
add wave -noupdate -group TB /tb/tlb_rand_en
add wave -noupdate -group TB /tb/exception_en
add wave -noupdate -group TB /tb/tlb_offset
add wave -noupdate -group TB /tb/stim_vaddr
add wave -noupdate -group TB /tb/exp_data
add wave -noupdate -group TB /tb/exp_vaddr
add wave -noupdate -group TB /tb/stim_push
add wave -noupdate -group TB /tb/stim_flush
add wave -noupdate -group TB /tb/stim_full
add wave -noupdate -group TB /tb/exp_empty
add wave -noupdate -group TB /tb/exp_pop
add wave -noupdate -group TB /tb/dut_out_vld
add wave -noupdate -group TB /tb/dut_in_rdy
add wave -noupdate -expand -group icache /tb/dut/clk_i
add wave -noupdate -expand -group icache /tb/dut/rst_ni
add wave -noupdate -expand -group icache /tb/dut/flush_i
add wave -noupdate -expand -group icache /tb/dut/en_i
add wave -noupdate -expand -group icache /tb/dut/areq_i
add wave -noupdate -expand -group icache /tb/dut/dreq_i
add wave -noupdate -expand -group icache /tb/dut/mem_rtrn_vld_i
add wave -noupdate -expand -group icache /tb/dut/mem_rtrn_i
add wave -noupdate -expand -group icache /tb/dut/mem_data_ack_i
add wave -noupdate -expand -group icache /tb/dut/NC_ADDR_BEGIN
add wave -noupdate -expand -group icache /tb/dut/NC_ADDR_GE_LE
add wave -noupdate -expand -group icache /tb/dut/cache_en_d
add wave -noupdate -expand -group icache /tb/dut/cache_en_q
add wave -noupdate -expand -group icache /tb/dut/exception_d
add wave -noupdate -expand -group icache /tb/dut/exception_q
add wave -noupdate -expand -group icache /tb/dut/paddr_vld_d
add wave -noupdate -expand -group icache /tb/dut/paddr_vld_q
add wave -noupdate -expand -group icache /tb/dut/vaddr_d
add wave -noupdate -expand -group icache /tb/dut/vaddr_q
add wave -noupdate -expand -group icache /tb/dut/paddr_is_io
add wave -noupdate -expand -group icache /tb/dut/paddr_is_nc
add wave -noupdate -expand -group icache /tb/dut/cl_hit
add wave -noupdate -expand -group icache /tb/dut/cache_rden
add wave -noupdate -expand -group icache /tb/dut/cache_wren
add wave -noupdate -expand -group icache /tb/dut/cmp_en_d
add wave -noupdate -expand -group icache /tb/dut/cmp_en_q
add wave -noupdate -expand -group icache /tb/dut/flush_d
add wave -noupdate -expand -group icache /tb/dut/flush_q
add wave -noupdate -expand -group icache /tb/dut/update_lfsr
add wave -noupdate -expand -group icache /tb/dut/inv_way
add wave -noupdate -expand -group icache /tb/dut/rnd_way
add wave -noupdate -expand -group icache /tb/dut/repl_way
add wave -noupdate -expand -group icache /tb/dut/all_ways_valid
add wave -noupdate -expand -group icache /tb/dut/inv_en
add wave -noupdate -expand -group icache /tb/dut/flush_en
add wave -noupdate -expand -group icache /tb/dut/flush_done
add wave -noupdate -expand -group icache /tb/dut/flush_cnt_d
add wave -noupdate -expand -group icache /tb/dut/flush_cnt_q
add wave -noupdate -expand -group icache /tb/dut/cl_we
add wave -noupdate -expand -group icache /tb/dut/cl_req
add wave -noupdate -expand -group icache /tb/dut/cl_index
add wave -noupdate -expand -group icache /tb/dut/cl_offset_d
add wave -noupdate -expand -group icache /tb/dut/cl_offset_q
add wave -noupdate -expand -group icache /tb/dut/cl_tag_d
add wave -noupdate -expand -group icache /tb/dut/cl_tag_q
add wave -noupdate -expand -group icache /tb/dut/cl_tag_rdata
add wave -noupdate -expand -group icache /tb/dut/cl_rdata
add wave -noupdate -expand -group icache /tb/dut/cl_sel
add wave -noupdate -expand -group icache /tb/dut/vld_biten
add wave -noupdate -expand -group icache /tb/dut/vld_we
add wave -noupdate -expand -group icache /tb/dut/vld_req
add wave -noupdate -expand -group icache /tb/dut/vld_wdata
add wave -noupdate -expand -group icache /tb/dut/vld_rdata
add wave -noupdate -expand -group icache /tb/dut/vld_addr
add wave -noupdate -expand -group icache /tb/dut/state_d
add wave -noupdate -expand -group icache /tb/dut/state_q
add wave -noupdate -expand -group icache /tb/dut/miss_o
add wave -noupdate -expand -group icache /tb/dut/areq_o
add wave -noupdate -expand -group icache /tb/dut/dreq_o
add wave -noupdate -expand -group icache /tb/dut/mem_data_req_o
add wave -noupdate -expand -group icache /tb/dut/mem_data_o
add wave -noupdate -group mem_emul /tb/i_mem_emul/clk_i
add wave -noupdate -group mem_emul /tb/i_mem_emul/rst_ni
add wave -noupdate -group mem_emul /tb/i_mem_emul/mem_rand_en_i
add wave -noupdate -group mem_emul /tb/i_mem_emul/io_rand_en_i
add wave -noupdate -group mem_emul /tb/i_mem_emul/inv_rand_en_i
add wave -noupdate -group mem_emul /tb/i_mem_emul/tlb_offset_i
add wave -noupdate -group mem_emul /tb/i_mem_emul/stim_vaddr_i
add wave -noupdate -group mem_emul /tb/i_mem_emul/stim_push_i
add wave -noupdate -group mem_emul /tb/i_mem_emul/stim_flush_i
add wave -noupdate -group mem_emul /tb/i_mem_emul/stim_full_o
add wave -noupdate -group mem_emul /tb/i_mem_emul/exp_data_o
add wave -noupdate -group mem_emul /tb/i_mem_emul/exp_vaddr_o
add wave -noupdate -group mem_emul /tb/i_mem_emul/exp_empty_o
add wave -noupdate -group mem_emul /tb/i_mem_emul/exp_pop_i
add wave -noupdate -group mem_emul /tb/i_mem_emul/mem_rtrn_vld_o
add wave -noupdate -group mem_emul -expand /tb/i_mem_emul/mem_rtrn_o
add wave -noupdate -group mem_emul /tb/i_mem_emul/mem_data_req_i
add wave -noupdate -group mem_emul /tb/i_mem_emul/mem_data_ack_o
add wave -noupdate -group mem_emul /tb/i_mem_emul/mem_data_i
add wave -noupdate -group mem_emul /tb/i_mem_emul/mem_ready_q
add wave -noupdate -group mem_emul /tb/i_mem_emul/mem_inv_q
add wave -noupdate -group mem_emul /tb/i_mem_emul/rand_addr_q
add wave -noupdate -group mem_emul /tb/i_mem_emul/rand_data_q
add wave -noupdate -group mem_emul /tb/i_mem_emul/outfifo_data
add wave -noupdate -group mem_emul /tb/i_mem_emul/outfifo_pop
add wave -noupdate -group mem_emul /tb/i_mem_emul/outfifo_push
add wave -noupdate -group mem_emul /tb/i_mem_emul/outfifo_full
add wave -noupdate -group mem_emul /tb/i_mem_emul/outfifo_empty
add wave -noupdate -group mem_emul /tb/i_mem_emul/infifo_data
add wave -noupdate -group mem_emul /tb/i_mem_emul/infifo_pop
add wave -noupdate -group mem_emul /tb/i_mem_emul/infifo_push
add wave -noupdate -group mem_emul /tb/i_mem_emul/infifo_full
add wave -noupdate -group mem_emul /tb/i_mem_emul/infifo_empty
add wave -noupdate -group mem_emul /tb/i_mem_emul/stim_addr
add wave -noupdate -group mem_emul /tb/i_mem_emul/exp_empty
add wave -noupdate -group mem_emul /tb/i_mem_emul/initialized_q
add wave -noupdate -group tlb_emul /tb/i_tlb_emul/clk_i
add wave -noupdate -group tlb_emul /tb/i_tlb_emul/rst_ni
add wave -noupdate -group tlb_emul /tb/i_tlb_emul/tlb_rand_en_i
add wave -noupdate -group tlb_emul /tb/i_tlb_emul/exception_en_i
add wave -noupdate -group tlb_emul /tb/i_tlb_emul/tlb_offset_i
add wave -noupdate -group tlb_emul /tb/i_tlb_emul/req_i
add wave -noupdate -group tlb_emul /tb/i_tlb_emul/req_o
add wave -noupdate -group tlb_emul /tb/i_tlb_emul/tlb_ready_d
add wave -noupdate -group tlb_emul /tb/i_tlb_emul/tlb_ready_q
TreeUpdate [SetDefaultTree]
WaveRestoreCursors {{Cursor 1} {3047 ps} 0}
quietly wave cursor active 1
configure wave -namecolwidth 208
configure wave -valuecolwidth 420
configure wave -justifyvalue left
configure wave -signalnamewidth 1
configure wave -snapdistance 10
configure wave -datasetprefix 0
configure wave -rowmargin 4
configure wave -childrowmargin 2
configure wave -gridoffset 0
configure wave -gridperiod 1
configure wave -griddelta 40
configure wave -timeline 0
configure wave -timelineunits ps
update
WaveRestoreZoom {3049926 ps} {3050004 ps}