Merge remote-tracking branch 'origin/ariane_next' into fpnew

This commit is contained in:
Florian Zaruba 2018-09-11 12:00:48 +02:00
commit db846a6c75
No known key found for this signature in database
GPG key ID: E742FFE8EC38A792
82 changed files with 3660 additions and 2932 deletions

View file

@ -1,102 +1,77 @@
before_script:
- export CXX=g++-4.8.3 CC=gcc-4.8.3
# paths to local or network installation (the riscv toolchain and
# verilator are not built in a ci job in this case)
- export QUESTASIM_HOME=/scratch/$USER/questasim
- export QUESTASIM_VERSION=
- export RISCV=/scratch/$USER/riscv_install
- export VERILATOR_ROOT=/scratch/$USER/verilator-3.924
# setup dependent paths
- export PATH=${RISCV}/bin:$VERILATOR_ROOT/bin:${PATH}
- export LIBRARY_PATH=$CI_PROJECT_DIR/tmp/lib
- export LD_LIBRARY_PATH=$CI_PROJECT_DIR/tmp/lib
- export C_INCLUDE_PATH=$CI_PROJECT_DIR/tmp/include:$VERILATOR_ROOT/include
- export CPLUS_INCLUDE_PATH=$CI_PROJECT_DIR/tmp/include:$VERILATOR_ROOT/include
# number of parallel jobs to use for make commands and simulation
- export NUM_JOBS=4
- ci/make-tmp.sh
- git submodule update --init --recursive
- export LIBRARY_PATH=$LIBRARY_PATH:$CI_PROJECT_DIR/tmp/lib
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CI_PROJECT_DIR/tmp/lib
- export C_INCLUDE_PATH=$C_INCLUDE_PATH:$CI_PROJECT_DIR/tmp/include
- export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:$CI_PROJECT_DIR/tmp/include
- export VERILATOR_ROOT=$CI_PROJECT_DIR/tmp/verilator-3.918/
variables:
GIT_SUBMODULE_STRATEGY: recursive
stages:
- build
- test
- deploy
build-ci:
build:
stage: build
script:
- make build
- echo $VERILATOR_ROOT
- ci/make-tmp.sh
- ci/install-fesvr.sh
- ci/install-verilator.sh
- ci/build-riscv-tests.sh
- make clean
- make build questa_version=$QUESTASIM_VERSION
- make verilate verilator=$VERILATOR_ROOT/bin/verilator
artifacts:
paths:
- tmp/
paths:
- tmp
test_alu:
# rv64ui-p-* and rv64ui-v-* tests
run-asm-tests-questa:
stage: test
script:
- make build library=alu_lib
- make alu library=alu_lib
- vcover-10.6 report alu.ucdb
test_fifo:
stage: test
script:
- make build library=fifo_lib
- make fifo library=fifo_lib
- vcover-10.6 report fifo.ucdb
.test_scoreboard:
stage: test
script:
- make build library=scoreboard_lib
- make scoreboard library=scoreboard_lib
- vcover-10.6 report scoreboard.ucdb
test_store_queue:
stage: test
script:
- make build library=store_queue_lib
- make store_queue library=store_queue_lib
- vcover-10.6 report store_queue.ucdb
test_core_asm:
stage: test
script:
- make build library=core_lib
- make run-asm-tests library=core_lib
- vcover-10.6 report run-asm-tests.ucdb
- make -j${NUM_JOBS} run-asm-tests questa_version=$QUESTASIM_VERSION
dependencies:
- build-ci
- build
test_core_asm_verilator:
run-benchmarks-questa:
stage: test
script:
- make run-asm-tests-verilator verilator=$CI_PROJECT_DIR/tmp/bin/verilator
- make -j${NUM_JOBS} run-benchmarks questa_version=$QUESTASIM_VERSION
dependencies:
- build-ci
- build
# test with the randomized memory interfaces
.test_core_asm_rand:
# rv64ui-p-* tests
run-asm-tests1-verilator:
stage: test
script:
- make build library=core_rand_lib
# same as above but pass the rand_mem_if flag
- make run-asm-tests library=core_rand_lib uvm-flags=+rand_mem_if
- vcover-10.6 report run-asm-rand-tests.ucdb
- make -j${NUM_JOBS} run-asm-tests1-verilator verilator=$VERILATOR_ROOT/bin/verilator
dependencies:
- build-ci
- build
.test_failed_tests:
# rv64ui-v-* tests
run-asm-tests2-verilator:
stage: test
script:
- make build library=failed_tests_lib
- make run-failed-tests library=failed_tests_lib
- vcover-10.6 report run-failed-tests.ucdb
- make -j${NUM_JOBS} run-asm-tests2-verilator verilator=$VERILATOR_ROOT/bin/verilator
dependencies:
- build
.test_lsu:
run-benchmarks-verilator:
stage: test
script:
- make build library=lsu_lib
- make lsu library=lsu_lib
- vcover-10.6 report lsu.ucdb
- vcover-10.6 report -html lsu.ucdb
artifacts:
paths:
- covhtmlreport
- make -j${NUM_JOBS} run-benchmarks-verilator verilator=$VERILATOR_ROOT/bin/verilator
dependencies:
- build

16
.gitmodules vendored
View file

@ -10,9 +10,15 @@
[submodule "src/axi_node"]
path = src/axi_node
url = https://github.com/pulp-platform/axi_node.git
[submodule "fpnew"]
path = src/fpnew
url = git@iis-git.ee.ethz.ch:pulp-restricted/fpnew.git
[submodule "fpu-legacy"]
[submodule "src/fpu"]
path = src/fpu
url = git@iis-git.ee.ethz.ch:sasa/fpnew.git
[submodule "src/fpga-support"]
path = src/fpga-support
url = https://github.com/pulp-platform/fpga-support.git
[submodule "src/common_cells"]
path = src/common_cells
url = https://github.com/pulp-platform/common_cells.git
[submodule "src/fpu_legacy"]
path = src/fpu_legacy
url = git@github.com:pulp-platform/fpu.git
url = git@iis-git.ee.ethz.ch:sasa/fpu.git

View file

@ -5,6 +5,7 @@ cache:
apt: true
directories:
$RISCV
$VERILATOR_ROOT
# required packages to install
addons:
@ -32,7 +33,7 @@ addons:
env:
global:
- RISCV="/home/travis/riscv_install"
- PATH="/home/travis/riscv_install/bin:$PATH"
- VERILATOR_ROOT="/home/travis/verilator-3.924/"
branches:
only:
@ -41,12 +42,16 @@ branches:
before_install:
- export CXX=g++-4.8 CC=gcc-4.8
- ci/make-tmp.sh
# setup dependent paths
- export PATH=$RISCV/bin:$VERILATOR_ROOT/bin:$PATH
- export LIBRARY_PATH=$TRAVIS_BUILD_DIR/tmp/lib
- export LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/tmp/lib
- export C_INCLUDE_PATH=$TRAVIS_BUILD_DIR/tmp/include
- export CPLUS_INCLUDE_PATH=$TRAVIS_BUILD_DIR/tmp/include
- export VERILATOR_ROOT=$TRAVIS_BUILD_DIR/tmp/verilator-3.924/
- export C_INCLUDE_PATH=$TRAVIS_BUILD_DIR/tmp/include:$VERILATOR_ROOT/include
- export CPLUS_INCLUDE_PATH=$TRAVIS_BUILD_DIR/tmp/include:$VERILATOR_ROOT/include
# number of parallel jobs to use for make commands and simulation
- export NUM_JOBS=4
- ci/make-tmp.sh
- git submodule update --init --recursive
stages:
- compile
@ -55,15 +60,30 @@ stages:
jobs:
include:
- stage: compile
name: prepare cache
script:
- ci/build-riscv-gcc.sh
- stage: test
script:
- ci/install-verilator.sh
- stage: test
name: run riscv benchmarks
script:
- ci/install-fesvr.sh
- ci/build-riscv-tests.sh
- make verilate verilator=$TRAVIS_BUILD_DIR/tmp/bin/verilator
- ci/run-tests.sh
- make -j${NUM_JOBS} run-benchmarks-verilator verilator=$VERILATOR_ROOT/bin/verilator
# rv64ui-p-* tests
- stage: test
name: run rv64ui-p-* asm tests
script:
- ci/install-fesvr.sh
- ci/build-riscv-tests.sh
- make -j${NUM_JOBS} run-asm-tests1-verilator verilator=$VERILATOR_ROOT/bin/verilator
# rv64ui-v-* tests
- stage: test
name: run rv64ui-v-* asm tests
script:
- ci/install-fesvr.sh
- ci/build-riscv-tests.sh
- make -j${NUM_JOBS} run-asm-tests2-verilator verilator=$VERILATOR_ROOT/bin/verilator
# extra time during long builds
install: travis_wait

View file

@ -3,14 +3,14 @@ package:
authors: [ "Florian Zaruba <zarubaf@iis.ee.ethz.ch>" ]
dependencies:
axi: { git: "git@iis-git.ee.ethz.ch:sasa/axi.git", rev: master }
axi2per: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi2per.git", rev: master }
axi_mem_if: { git: "git@github.com:pulp-platform/axi_mem_if.git", rev: master }
axi_node: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_node.git", version: 1.0.3 }
axi_slice: { git: "git@iis-git.ee.ethz.ch:sasa/axi_slice.git", version: 1.1.2 }
tech_cells_generic: { git: "git@iis-git.ee.ethz.ch:pulp-open/tech_cells_generic.git", rev: master }
common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: 1.1.0 }
axi: { git: "git@iis-git.ee.ethz.ch:sasa/axi.git", rev: master }
axi2per: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi2per.git", rev: master }
axi_mem_if: { git: "git@github.com:pulp-platform/axi_mem_if.git", rev: master }
axi_node: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_node.git", version: 1.0.3 }
axi_slice: { git: "git@iis-git.ee.ethz.ch:sasa/axi_slice.git", version: 1.1.2 }
tech_cells_generic: { git: "git@iis-git.ee.ethz.ch:pulp-open/tech_cells_generic.git", rev: master }
common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: v1.7.0 }
fpga-support: { git: "https://github.com/pulp-platform/fpga-support.git", version: v0.3.2 }
sources:
- src/fpu_legacy/hdl/fpu_utils/fpu_ff.sv
- src/fpu_legacy/hdl/fpu_div_sqrt_mvp/defs_div_sqrt_mvp.sv
@ -21,35 +21,35 @@ sources:
- src/fpu_legacy/hdl/fpu_div_sqrt_mvp/norm_div_sqrt_mvp.sv
- src/fpu_legacy/hdl/fpu_div_sqrt_mvp/nrbd_nrsc_mvp.sv
- src/fpu_legacy/hdl/fpu_div_sqrt_mvp/preprocess_mvp.sv
- src/fpnew/src/pkg/fpnew_pkg.vhd
- src/fpnew/src/pkg/fpnew_fmts_pkg.vhd
- src/fpnew/src/pkg/fpnew_comps_pkg.vhd
- src/fpnew/src/pkg/fpnew_pkg_constants.vhd
- src/fpnew/src/utils/fp_pipe.vhd
- src/fpnew/src/utils/fp_rounding.vhd
- src/fpnew/src/utils/fp_arbiter.vhd
- src/fpnew/src/ops/fma_core.vhd
- src/fpnew/src/ops/fp_fma.vhd
- src/fpnew/src/ops/fp_divsqrt_multi.vhd
- src/fpnew/src/ops/fp_noncomp.vhd
- src/fpnew/src/ops/fp_f2fcasts.vhd
- src/fpnew/src/ops/fp_f2icasts.vhd
- src/fpnew/src/ops/fp_i2fcasts.vhd
- src/fpnew/src/ops/fp_conv_multi.vhd
- src/fpnew/src/subunits/addmul_fmt_slice.vhd
- src/fpnew/src/subunits/addmul_block.vhd
- src/fpnew/src/subunits/divsqrt_multifmt_slice.vhd
- src/fpnew/src/subunits/divsqrt_block.vhd
- src/fpnew/src/subunits/noncomp_fmt_slice.vhd
- src/fpnew/src/subunits/noncomp_block.vhd
- src/fpnew/src/subunits/conv_multifmt_slice.vhd
- src/fpnew/src/subunits/conv_block.vhd
- src/fpnew/src/fpnew.vhd
- src/fpnew/src/fpnew_top.vhd
- src/fpu/src/pkg/fpnew_pkg.vhd
- src/fpu/src/pkg/fpnew_fmts_pkg.vhd
- src/fpu/src/pkg/fpnew_comps_pkg.vhd
- src/fpu/src/pkg/fpnew_pkg_constants.vhd
- src/fpu/src/utils/fp_pipe.vhd
- src/fpu/src/utils/fp_rounding.vhd
- src/fpu/src/utils/fp_arbiter.vhd
- src/fpu/src/ops/fma_core.vhd
- src/fpu/src/ops/fp_fma.vhd
- src/fpu/src/ops/fp_divsqrt_multi.vhd
- src/fpu/src/ops/fp_noncomp.vhd
- src/fpu/src/ops/fp_f2fcasts.vhd
- src/fpu/src/ops/fp_f2icasts.vhd
- src/fpu/src/ops/fp_i2fcasts.vhd
- src/fpu/src/ops/fp_conv_multi.vhd
- src/fpu/src/subunits/addmul_fmt_slice.vhd
- src/fpu/src/subunits/addmul_block.vhd
- src/fpu/src/subunits/divsqrt_multifmt_slice.vhd
- src/fpu/src/subunits/divsqrt_block.vhd
- src/fpu/src/subunits/noncomp_fmt_slice.vhd
- src/fpu/src/subunits/noncomp_block.vhd
- src/fpu/src/subunits/conv_multifmt_slice.vhd
- src/fpu/src/subunits/conv_block.vhd
- src/fpu/src/fpnew.vhd
- src/fpu/src/fpnew_top.vhd
- include/riscv_pkg.sv
- src/debug/dm_pkg.sv
- include/ariane_pkg.sv
- include/nbdcache_pkg.sv
- include/std_cache_pkg.sv
- target: not(synthesis)
files:
- src/util/instruction_tracer_pkg.sv
@ -67,7 +67,6 @@ sources:
- src/decoder.sv
- src/ex_stage.sv
- src/fetch_fifo.sv
- src/ff1.sv
- src/frontend.sv
- src/icache.sv
- src/id_stage.sv
@ -82,8 +81,12 @@ sources:
- src/mmu.sv
- src/mult.sv
- src/nbdcache.sv
- src/vdregs.sv
- src/perf_counters.sv
- src/ptw.sv
- src/std_cache_subsystem.sv
- src/sram_wrapper.sv
# - src/ariane_regfile_ff.sv
- src/ariane_regfile.sv
- src/re_name.sv
- src/scoreboard.sv

227
Makefile
View file

@ -3,69 +3,119 @@
# Description: Makefile for linting and testing Ariane.
# compile everything in the following library
library ?= work
library ?= work
# Top level module to compile
top_level ?= ariane_tb
top_level ?= ariane_tb
test_top_level ?= ariane_tb
# Maximum amount of cycles for a successful simulation run
max_cycles ?= 10000000
max_cycles ?= 10000000
# Test case to run
test_case ?= core_test
test_case ?= core_test
# QuestaSim Version
questa_version ?= -10.6b
questa_version ?= ${QUESTASIM_VERSION}
# verilator version
verilator ?= verilator
verilator ?= verilator
# traget option
target-options ?=
# Sources
# Ariane PKG
ariane_pkg := include/riscv_pkg.sv src/debug/dm_pkg.sv include/ariane_pkg.sv include/nbdcache_pkg.sv include/axi_if.sv
# FPnew PKG
fpnew_pkg := src/fpnew/src/pkg/fpnew_pkg.vhd src/fpnew/src/pkg/fpnew_fmts_pkg.vhd src/fpnew/src/pkg/fpnew_comps_pkg.vhd src/fpnew/src/pkg/fpnew_pkg_constants.vhd
# Package files -> compile first
ariane_pkg := include/riscv_pkg.sv \
src/debug/dm_pkg.sv \
include/ariane_pkg.sv \
include/std_cache_pkg.sv \
include/axi_if.sv \
src/fpu/src/pkg/fpnew_pkg.vhd \
src/fpu/src/pkg/fpnew_fmts_pkg.vhd \
src/fpu/src/pkg/fpnew_comps_pkg.vhd \
src/fpu/src/pkg/fpnew_pkg_constants.vhd
# utility modules
util := $(wildcard src/util/*.svh) src/util/instruction_tracer_pkg.sv src/util/instruction_tracer_if.sv \
src/util/generic_fifo.sv src/util/cluster_clock_gating.sv src/util/behav_sram.sv
util := $(wildcard src/util/*.svh) \
src/util/instruction_tracer_pkg.sv \
src/util/instruction_tracer_if.sv \
src/util/cluster_clock_gating.sv \
src/util/sram.sv
# Test packages
test_pkg := $(wildcard tb/test/*/*sequence_pkg.sv*) $(wildcard tb/test/*/*_pkg.sv*)
test_pkg := $(wildcard tb/test/*/*sequence_pkg.sv*) \
$(wildcard tb/test/*/*_pkg.sv*)
# DPI
dpi := $(patsubst tb/dpi/%.cc,work/%.o,$(wildcard tb/dpi/*.cc))
dpi_hdr := $(wildcard tb/dpi/*.h)
# this list contains the standalone components
src := $(wildcard src/*.sv) $(wildcard tb/common/*.sv) $(wildcard src/axi_slice/*.sv) \
$(wildcard src/axi_node/*.sv) $(wildcard src/axi_mem_if/src/*.sv) src/fpu_legacy/hdl/fpu_utils/fpu_ff.sv \
src/fpu_legacy/hdl/fpu_div_sqrt_mvp/defs_div_sqrt_mvp.sv $(wildcard src/fpu_legacy/hdl/fpu_div_sqrt_mvp/*.sv) \
$(fpnew_pkg) $(wildcard src/fpnew/src/utils/*.vhd) $(wildcard src/fpnew/src/ops/*.vhd) \
$(wildcard src/fpnew/src/subunits/*.vhd) src/fpnew/src/fpnew.vhd src/fpnew/src/fpnew_top.vhd \
$(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv)) $(wildcard bootrom/*.sv) \
$(wildcard src/debug/debug_rom/*.sv)
# <<<<<<< HEAD
# src := $(wildcard src/*.sv) $(wildcard tb/common/*.sv) $(wildcard src/axi_slice/*.sv) \
# $(wildcard src/axi_node/*.sv) $(wildcard src/axi_mem_if/src/*.sv) src/fpu_legacy/hdl/fpu_utils/fpu_ff.sv \
# src/fpu_legacy/hdl/fpu_div_sqrt_mvp/defs_div_sqrt_mvp.sv $(wildcard src/fpu_legacy/hdl/fpu_div_sqrt_mvp/*.sv) \
# $(fpnew_pkg) $(wildcard src/fpnew/src/utils/*.vhd) $(wildcard src/fpnew/src/ops/*.vhd) \
# $(wildcard src/fpnew/src/subunits/*.vhd) src/fpnew/src/fpnew.vhd src/fpnew/src/fpnew_top.vhd \
# $(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv)) $(wildcard bootrom/*.sv) \
# $(wildcard src/debug/debug_rom/*.sv)
# =======
src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \
$(wildcard src/fpu/src/utils/*.vhd) \
$(wildcard src/fpu/src/ops/*.vhd) \
$(wildcard src/fpu/src/subunits/*.vhd) \
src/fpu_legacy/hdl/fpu_div_sqrt_mvp/defs_div_sqrt_mvp.sv \
$(wildcard src/fpu_legacy/hdl/fpu_div_sqrt_mvp/*.sv) \
$(wildcard src/cache_subsystem/*.sv) \
$(wildcard bootrom/*.sv) \
$(wildcard src/axi_slice/*.sv) \
$(wildcard src/clint/*.sv) \
$(wildcard src/axi_node/*.sv) \
$(wildcard src/axi_mem_if/src/*.sv) \
$(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv)) \
$(wildcard src/debug/debug_rom/*.sv) \
src/fpu/src/fpnew.vhd \
src/fpu/src/fpnew_top.vhd \
src/fpu_legacy/hdl/fpu_utils/fpu_ff.sv \
src/fpga-support/rtl/SyncSpRamBeNx64.sv \
src/common_cells/src/deprecated/generic_fifo.sv \
src/common_cells/src/deprecated/pulp_sync.sv \
src/common_cells/src/deprecated/find_first_one.sv \
src/common_cells/src/fifo_v2.sv \
src/common_cells/src/lzc.sv \
src/common_cells/src/rrarbiter.sv \
src/common_cells/src/lfsr_8bit.sv \
tb/ariane_testharness.sv \
tb/common/SimDTM.sv \
tb/common/SimJTAG.sv
# look for testbenches
tbs := tb/ariane_tb.sv tb/ariane_testharness.sv
# RISCV-tests path
riscv-test-dir := tmp/riscv-tests/build/isa
# there is a defined test-list of CI tests
riscv-ci-tests := $$(xargs printf '\n%s' < ci/test.list | cut -b 1-)
# RISCV asm tests and benchmark setup (used for CI)
# there is a defined test-list with selected CI tests
riscv-test-dir := tmp/riscv-tests/build/isa/
riscv-benchmarks-dir := tmp/riscv-tests/build/benchmarks/
riscv-asm-tests-list := ci/riscv-asm-tests.list
riscv-benchmarks-list := ci/riscv-benchmarks.list
riscv-asm-tests := $(shell xargs printf '\n%s' < $(riscv-asm-tests-list) | cut -b 1-)
riscv-benchmarks := $(shell xargs printf '\n%s' < $(riscv-benchmarks-list) | cut -b 1-)
# preset which runs a single test
riscv-test ?= $(riscv-test-dir)/rv64ui-p-add
riscv-test ?= rv64ui-p-add
# failed test directory
failed-tests := $(wildcard failedtests/*.S)
# Search here for include files (e.g.: non-standalone components)
incdir := ./includes
# Compile and sim flags
compile_flag += +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive
compile_flag += +cover=bcfst+/dut -quiet -incr -64 -nologo -suppress 13262 -permissive
compile_flag_vhd += -64 -nologo -quiet -2008
uvm-flags += +UVM_NO_RELNOTES
# Iterate over all include directories and write them with +incdir+ prefixed
# +incdir+ works for Verilator and QuestaSim
list_incdir := $(foreach dir, ${incdir}, +incdir+$(dir))
# Build the TB and module using QuestaSim
build: $(library) $(library)/.build-srcs $(library)/.build-tb $(library)/ariane_dpi.so
# Optimize top level
# Optimize top level
vopt$(questa_version) $(compile_flag) -work $(library) $(test_top_level) -o $(test_top_level)_optimized +acc -check_synthesis
# src files
$(library)/.build-srcs: $(ariane_pkg) $(util) $(src)
vlog$(questa_version) $(compile_flag) -work $(library) $(filter %.sv,$(ariane_pkg)) $(list_incdir) -suppress 2583
vcom$(questa_version) $(compile_flag_vhd) -work $(library) -pedanticerrors $(filter %.vhd,$(ariane_pkg))
vlog$(questa_version) $(compile_flag) -work $(library) $(filter %.sv,$(util)) $(list_incdir) -suppress 2583
# Suppress message that always_latch may not be checked thoroughly by QuestaSim.
vcom$(questa_version) $(compile_flag_vhd) -work $(library) -pedanticerrors $(filter %.vhd,$(src))
@ -89,60 +139,93 @@ $(library)/ariane_dpi.so: $(dpi)
$(library):
# Create the library
vlib${questa_version} ${library}
# +jtag_rbb_enable=1
sim: build $(library)/ariane_dpi.so
vsim${questa_version} +permissive -64 -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=HIGH" -coverage -classdebug +jtag_rbb_enable=1 \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi -do " do tb/wave/wave_core.do; run -all; exit" ${top_level}_optimized +permissive-off ++$(riscv-test)
vsim${questa_version} +permissive -noautoldlibpath -64 -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi -do " do tb/wave/wave_core.do; run -all; exit" \
${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options)
simc: build $(library)/ariane_dpi.so
vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=HIGH" -coverage -classdebug +jtag_rbb_enable=1 \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi -do " do tb/wave/wave_core.do; run -all; exit" ${top_level}_optimized +permissive-off ++$(riscv-test)
vsim${questa_version} +permissive -noautoldlibpath -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi -do "set StdArithNoWarnings 1; set NumericStdNoWarnings 1; do tb/wave/wave_core.do; run -all; exit" \
${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test)
run-asm-tests: build
$(foreach test, $(riscv-ci-tests), vsim$(questa_version) +permissive -64 +BASEDIR=$(riscv-test-dir) +max-cycles=$(max_cycles) \
+UVM_TESTNAME=$(test_case) $(uvm-flags) +ASMTEST=$(test) +uvm_set_action="*,_ALL_,UVM_ERROR,UVM_DISPLAY|UVM_STOP" -c \
-coverage -classdebug -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi \
-do "coverage save -onexit $@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
$(library).$(test_top_level)_optimized +permissive-off ++$(test);)
$(riscv-asm-tests): build $(library)/ariane_dpi.so
vsim${questa_version} +permissive -noautoldlibpath -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi \
-do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$@ ++$(target-options) | tee tmp/riscv-asm-tests-$@.log
verilate_command := $(verilator) \
$(ariane_pkg) \
tb/ariane_testharness.sv \
$(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \
$(wildcard src/axi_slice/*.sv) \
$(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv)) \
src/debug/debug_rom/debug_rom.sv \
src/util/generic_fifo.sv \
tb/common/SimDTM.sv \
tb/common/SimJTAG.sv \
tb/common/pulp_sync.sv \
bootrom/bootrom.sv \
src/util/cluster_clock_gating.sv \
src/util/behav_sram.sv \
src/axi_mem_if/src/axi2mem.sv \
+incdir+src/axi_node \
--unroll-count 256 \
-Werror-PINMISSING \
-Werror-IMPLICIT \
-Wno-fatal \
-Wno-PINCONNECTEMPTY \
-Wno-ASSIGNDLY \
-Wno-DECLFILENAME \
-Wno-UNOPTFLAT \
-Wno-UNUSED \
-Wno-ASSIGNDLY \
$(if $(DEBUG),--trace-structs --trace,) \
$(riscv-benchmarks): build $(library)/ariane_dpi.so
vsim${questa_version} +permissive -noautoldlibpath -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-benchmarks-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi \
-do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
${top_level}_optimized +permissive-off ++$(riscv-benchmarks-dir)/$@ ++$(target-options) | tee tmp/riscv-benchmarks-$@.log
# can use -jX to run ci tests in parallel using X processes
run-asm-tests: $(riscv-asm-tests)
make check-asm-tests
check-asm-tests:
ci/check-tests.sh tmp/riscv-asm-tests- $(riscv-asm-tests-list)
# can use -jX to run ci tests in parallel using X processes
run-benchmarks: $(riscv-benchmarks)
make check-benchmarks
check-benchmarks:
ci/check-tests.sh tmp/riscv-benchmarks- $(riscv-benchmarks-list)
verilate_command := $(verilator) \
$(ariane_pkg) \
$(filter-out tb/ariane_bt.sv,$(src)) \
src/util/sram.sv \
+incdir+src/axi_node \
--unroll-count 256 \
-Werror-PINMISSING \
-Werror-IMPLICIT \
-Wno-fatal \
-Wno-PINCONNECTEMPTY \
-Wno-ASSIGNDLY \
-Wno-DECLFILENAME \
-Wno-UNOPTFLAT \
-Wno-UNUSED \
-Wno-style \
-Wno-lint \
$(if $(DEBUG),--trace-structs --trace,) \
-LDFLAGS "-lfesvr" -CFLAGS "-std=c++11 -I../tb/dpi" -Wall --cc --vpi \
$(list_incdir) --top-module ariane_testharness \
--Mdir build -O3 \
$(list_incdir) --top-module ariane_testharness \
--Mdir build -O3 \
--exe tb/ariane_tb.cpp tb/dpi/SimDTM.cc tb/dpi/SimJTAG.cc tb/dpi/remote_bitbang.cc
# User Verilator, at some point in the future this will be auto-generated
verilate:
$(verilate_command)
cd build && make -j8 -f Variane_testharness.mk
cd build && make -j${NUM_JOBS} -f Variane_testharness.mk
$(addsuffix -verilator,$(riscv-asm-tests)): verilate
build/Variane_testharness $(riscv-test-dir)/$(subst -verilator,,$@)
run-asm-tests-verilator: $(addsuffix -verilator, $(riscv-asm-tests))
# split into two halfs for travis jobs (otherwise they will time out)
run-asm-tests1-verilator: $(addsuffix -verilator, $(filter rv64ui-p-% ,$(riscv-asm-tests)))
run-asm-tests2-verilator: $(addsuffix -verilator, $(filter rv64ui-v-% ,$(riscv-asm-tests)))
$(addsuffix -verilator,$(riscv-benchmarks)): verilate
build/Variane_testharness $(riscv-benchmarks-dir)/$(subst -verilator,,$@)
run-benchmarks-verilator: $(addsuffix -verilator,$(riscv-benchmarks))
verify:
qverify vlog -sv src/csr_regfile.sv
@ -150,6 +233,8 @@ verify:
clean:
rm -rf work/ *.ucdb
rm -rf build
rm -f tmp/*.ucdb
rm -f tmp/*.log
.PHONY:
build lint build-moore
build lint build-moore $(riscv-asm-tests) $(addsuffix _verilator,$(riscv-asm-tests)) $(riscv-benchmarks) $(addsuffix _verilator,$(riscv-benchmarks)) check simc sim verilate clean verilate

View file

@ -15,45 +15,62 @@ Go and get the [RISC-V tools](https://github.com/riscv/riscv-tools). Make sure t
Checkout the repository and initialize all submodules
```
git clone https://github.com/pulp-platform/ariane.git
git submodule update --init --recursive
$ git clone https://github.com/pulp-platform/ariane.git
$ git submodule update --init --recursive
```
The Verilator testbench relies on our forked version of `riscv-fesvr` which can be found [here](https://github.com/riscv/riscv-fesvr). Follow the README there and make sure that your compiler and linker is aware of the library (e.g.: add it to your path if it is in a non-default directory).
The testbench relies on `riscv-fesvr` which can be found [here](https://github.com/riscv/riscv-fesvr). Follow the README there and make sure that your compiler and linker is aware of the library (e.g.: add it to your path if it is in a non-default directory).
Build the Verilator model of Ariane by using the Makefile:
```
make verilate
$ make verilate
```
This will create a C++ model of the core including a SystemVerilog wrapper and link it against a C++ testbench (in the `tb` subfolder). The binary can be found in the `build` and accepts a RISC-V ELF binary as an argument, e.g.:
```
build/Variane_testharness rv64um-v-divuw
$ build/Variane_testharness rv64um-v-divuw
```
The Verilator testbench makes use of the `riscv-fesvr`. That means that bare `riscv-tests` can be run on the simulator.
The Verilator testbench makes use of the `riscv-fesvr`. This means that you can use the `riscv-tests` repository as well as `riscv-pk` out-of-the-box. As a general rule of thumb the Verilator model will behave like Spike (exception for being orders of magnitudes slower).
### Running custom C-code
It is possible to cross compile and run your own C-code or benchmarks on Ariane. The following steps need to be followed to compile and run:
Compile the file using the following command (you need to have the [riscv-tests](https://github.com/riscv/riscv-tests) repo checked-out):
Both, the Verilator model as well as the Questa simulation will produce trace logs. The Verilator trace is more basic but you can feed the log to `spike-dasm` to resolve instructions to mnemonics. Unfortunately value inspection is currently not possible for the Verilator trace file.
```
riscv64-unknown-elf-gcc -I./riscv-tests/benchmarks/../env -I./riscv-tests/benchmarks/common \
-DPREALLOCATE=1 -mcmodel=medany -static -std=gnu99 -O2 -ffast-math -fno-common \
-fno-builtin-printf ./riscv-tests/benchmarks/common/syscalls.c -static -nostdlib \
./riscv-tests/benchmarks/common/crt.S -nostartfiles -lm -lgcc \
-T ./riscv-tests/benchmarks/common/test.ld -o hello.riscv hello.c
$ spike-dasm < trace_core_00_0.dasm > logfile.txt
```
Use the generated ELF file as an input to the Verilator model:
### Running Applications
It is possible to run user-space binaries on Ariane with `riscv-pk` ([link](https://github.com/riscv/riscv-pk)). As Ariane currently does not support atomics and floating point extensions make sure that you configure `riscv-pk` with:
`--with-arch=rv64imc`. In particular inside the `riscv-pk` directory do:
```
build/Variane_testharness hello.riscv
$ mkdir build
$ cd build
$ ../configure --prefix=$RISCV --host=riscv64-unknown-elf --with-arch=rv64imc
$ make
$ make install
```
Then to run a RISC-V ELF using the Verilator model do:
```
$ make verilate
$ build/Variane_testharness /path/to/pk path/to/riscv.elf
```
If you want to use QuestaSim to run it you can use the following command:
```
$ make simc riscv-test=/path/to/pk target-options=path/to/riscv.elf
```
> Be patient! RTL simulation is way slower than Spike. If you think that you ran into problems you can inspect the trace files.
## FPGA Emulation
Coming.
## Planned Improvements
While developing Ariane it has become evident that, in order to support Linux, the atomic extension is going to be mandatory. While the core is currently booting Linux by emulating Atomics in BBL (in a single core environment this is trivially met by disabling interrupts) this is not the behavior which is intended. For that reason we are going to fully support all atomic extensions in the very near future.
@ -64,7 +81,7 @@ The core has been developed with a full licensed version of QuestaSim. If you ha
To specify the test to run use (e.g.: you want to run `rv64ui-p-sraw` inside the `tmp/risc-tests/build/isa` folder:
```
make sim riscv-test=tmp/risc-tests/build/isa/rv64ui-p-sraw
$ make sim riscv-test=tmp/risc-tests/build/isa/rv64ui-p-sraw
```
If you call `simc` instead of `sim` it will run without the GUI. QuestaSim uses `riscv-fesvr` for communication as well.

View file

@ -2,17 +2,21 @@ bootrom_img = bootrom.img
GCC=riscv64-unknown-elf-gcc
OBJCOPY=riscv64-unknown-elf-objcopy
DTB=ariane.dtb
all: $(bootrom_img)
%.img: %.bin
dd if=$< of=$@ bs=128 count=1
dd if=$< of=$@ bs=128
%.bin: %.elf
$(OBJCOPY) -O binary $< $@
%.elf: %.S linker.ld
%.elf: %.S linker.ld $(DTB)
$(GCC) -Tlinker.ld $< -nostdlib -static -Wl,--no-gc-sections -o $@
%.dtb: %.dts
dtc -I dts $< -O dtb -o $@
clean:
rm $(bootrom_img)
rm $(bootrom_img) $(DTB)

45
bootrom/ariane.dts Normal file
View file

@ -0,0 +1,45 @@
/dts-v1/;
/ {
#address-cells = <2>;
#size-cells = <2>;
compatible = "eth,ariane-bare-dev";
model = "eth,ariane-bare";
cpus {
#address-cells = <1>;
#size-cells = <0>;
timebase-frequency = <10000000>;
CPU0: cpu@0 {
device_type = "cpu";
reg = <0>;
status = "okay";
compatible = "riscv";
riscv,isa = "rv64imc";
mmu-type = "riscv,sv39";
clock-frequency = <1000000000>;
CPU0_intc: interrupt-controller {
#interrupt-cells = <1>;
interrupt-controller;
compatible = "riscv,cpu-intc";
};
};
};
memory@80000000 {
device_type = "memory";
reg = <0x0 0x80000000 0x0 0x1000000>;
};
soc {
#address-cells = <2>;
#size-cells = <2>;
compatible = "eth,ariane-bare-soc", "simple-bus";
ranges;
clint@2000000 {
compatible = "riscv,clint0";
interrupts-extended = <&CPU0_intc 3 &CPU0_intc 7 >;
reg = <0x0 0x2000000 0x0 0xc0000>;
};
};
htif {
compatible = "ucb,htif0";
};
};

View file

@ -21,4 +21,4 @@ _hang:
.globl _dtb
.align 5, 0
_dtb:
.ascii "DTB goes here"
.incbin "ariane.dtb"

Binary file not shown.

View file

@ -13,15 +13,141 @@
* Description: Auto-generated bootrom
*/
// Auto-generated code
module bootrom (
input logic clk_i,
input logic req_i,
input logic [63:0] addr_i,
output logic [63:0] rdata_o
);
localparam int RomSize = 16;
localparam int RomSize = 141;
const logic [RomSize-1:0][63:0] mem = {
64'h0064,
64'h65646e65_7478652d,
64'h73747075_72726574,
64'h6e690073_65676e61,
64'h7200656c_646e6168,
64'h70007265_6c6c6f72,
64'h746e6f63_2d747075,
64'h72726574_6e690073,
64'h6c6c6563_2d747075,
64'h72726574_6e692300,
64'h79636e65_75716572,
64'h662d6b63_6f6c6300,
64'h65707974_2d756d6d,
64'h00617369_2c766373,
64'h69720073_75746174,
64'h73006765_72006570,
64'h79745f65_63697665,
64'h64007963_6e657571,
64'h6572662d_65736162,
64'h656d6974_006c6564,
64'h6f6d0065_6c626974,
64'h61706d6f_6300736c,
64'h6c65632d_657a6973,
64'h2300736c_6c65632d,
64'h73736572_64646123,
64'h09000000_02000000,
64'h02000000_00000030,
64'h66697468_2c626375,
64'h1b000000_0a000000,
64'h03000000_00000000,
64'h66697468_01000000,
64'h02000000_02000000,
64'h00000c00_00000000,
64'h00000002_00000000,
64'h4b000000_10000000,
64'h03000000_07000000,
64'h01000000_03000000,
64'h01000000_ae000000,
64'h10000000_03000000,
64'h00000000_30746e69,
64'h6c632c76_63736972,
64'h1b000000_0d000000,
64'h03000000_00000030,
64'h30303030_30324074,
64'h6e696c63_01000000,
64'ha7000000_00000000,
64'h03000000_00007375,
64'h622d656c_706d6973,
64'h00636f73_2d657261,
64'h622d656e_61697261,
64'h2c687465_1b000000,
64'h1f000000_03000000,
64'h02000000_0f000000,
64'h04000000_03000000,
64'h02000000_00000000,
64'h04000000_03000000,
64'h00636f73_01000000,
64'h02000000_00000001,
64'h00000000_00000080,
64'h00000000_4b000000,
64'h10000000_03000000,
64'h00007972_6f6d656d,
64'h3f000000_07000000,
64'h03000000_00303030,
64'h30303030_38407972,
64'h6f6d656d_01000000,
64'h02000000_02000000,
64'h02000000_01000000,
64'h9f000000_04000000,
64'h03000000_00006374,
64'h6e692d75_70632c76,
64'h63736972_1b000000,
64'h0f000000_03000000,
64'h8a000000_00000000,
64'h03000000_01000000,
64'h79000000_04000000,
64'h03000000_00000000,
64'h72656c6c_6f72746e,
64'h6f632d74_70757272,
64'h65746e69_01000000,
64'h00ca9a3b_69000000,
64'h04000000_03000000,
64'h00003933_76732c76,
64'h63736972_60000000,
64'h0b000000_03000000,
64'h00636d69_34367672,
64'h56000000_08000000,
64'h03000000_00000076,
64'h63736972_1b000000,
64'h06000000_03000000,
64'h00000000_79616b6f,
64'h4f000000_05000000,
64'h03000000_00000000,
64'h4b000000_04000000,
64'h03000000_00757063,
64'h3f000000_04000000,
64'h03000000_00000030,
64'h40757063_01000000,
64'h80969800_2c000000,
64'h04000000_03000000,
64'h00000000_0f000000,
64'h04000000_03000000,
64'h01000000_00000000,
64'h04000000_03000000,
64'h00000000_73757063,
64'h01000000_00657261,
64'h622d656e_61697261,
64'h2c687465_26000000,
64'h10000000_03000000,
64'h00766564_2d657261,
64'h622d656e_61697261,
64'h2c687465_1b000000,
64'h14000000_03000000,
64'h02000000_0f000000,
64'h04000000_03000000,
64'h02000000_00000000,
64'h04000000_03000000,
64'h00000000_01000000,
64'h00000000_00000000,
64'h00000000_00000000,
64'he8020000_c2000000,
64'h00000000_10000000,
64'h11000000_28000000,
64'h20030000_38000000,
64'he2030000_edfe0dd0,
64'h00000000_00000000,
64'h00000000_00000000,
64'h00000000_00000000,

View file

@ -3,6 +3,10 @@ set -e
ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
cd $ROOT/tmp
if [ -z ${NUM_JOBS} ]; then
NUM_JOBS=1
fi
if ! [ -e $RISCV/bin ]; then
[ -d $ROOT/tmp/riscv-gnu-toolchain ] || git clone https://github.com/riscv/riscv-gnu-toolchain.git
cd riscv-gnu-toolchain
@ -13,6 +17,6 @@ if ! [ -e $RISCV/bin ]; then
echo "Compiling RISC-V Toolchain"
./configure --prefix=$RISCV > /dev/null
make -j2 > /dev/null
make -j${NUM_JOBS} > /dev/null
echo "Compilation Finished"
fi

View file

@ -3,6 +3,10 @@ set -e
ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
cd $ROOT/tmp
if [ -z ${NUM_JOBS} ]; then
NUM_JOBS=1
fi
[ -d $ROOT/tmp/riscv-tests ] || git clone https://github.com/riscv/riscv-tests.git
cd riscv-tests
git checkout 294bfce8a1ca2fc501b8939292146e44f813a2b8
@ -11,5 +15,6 @@ autoconf
mkdir -p build
cd build
../configure --prefix=$ROOT/tmp/riscv-tests/build
make isa -j2 > /dev/null
make isa -j${NUM_JOBS} > /dev/null
make benchmarks -j${NUM_JOBS} > /dev/null
make install

67
ci/check-tests.sh Executable file
View file

@ -0,0 +1,67 @@
#!/bin/bash
# check simulation output (only for questasim flow)
#
# $1 simulation output file basename
# $2 list file containing the test names
#
ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
cd $ROOT
# only use colors in interactive mode
if [[ -z "$-" ]]; then
GREEN=''
RED=''
NC=''
else
GREEN='\033[0;32m'
RED='\033[0;31m'
NC='\033[0m' # No Color
fi
if [ ! -f "${1}"*.log ]; then
echo -e "${RED}FAILED file $1 does not exist ${NC}"
exit 1;
fi
if [ ! -f "$2" ]; then
echo -e "${RED}FAILED file $2 does not exist ${NC}"
exit 1;
fi
# get NUM_TOTAL number of tests
NUM_TOTAL=`wc -l $2 | awk -F " " '{ print $1 }'`
echo "list containint tests: $2"
echo "checking files:"
ls "${1}"*.log
# check for patterns
NUM_PASSED=`grep -s "SUCCESS" ${1}*.log | wc -l`
NUM_FAILED=`grep -s "FAILED" ${1}*.log | wc -l`
NUM_FATAL=`grep -s "Fatal:" ${1}*.log | wc -l`
NUM_ERROR=`grep -s "Error:" ${1}*.log | wc -l`
echo "NUM_TOTAL: $NUM_TOTAL"
echo "NUM_PASSED: $NUM_PASSED"
echo "NUM_FAILED: $NUM_FAILED"
echo "NUM_FATAL: $NUM_FATAL"
echo "NUM_ERROR: $NUM_ERROR"
if [[ $(($NUM_FAILED)) -gt 0 ]]; then
echo -e "${RED}FAILED $NUM_FAILED of $NUM_TOTAL tests ${NC}"
exit 1;
elif [[ $(($NUM_FATAL)) -ne 0 ]]; then
echo -e "${RED}FAILED at least one test due to $NUM_FATAL FATAL assertions ${NC}"
exit 1;
elif [[ $(($NUM_ERROR)) -ne 0 ]]; then
echo -e "${RED}FAILED at least one test due to $NUM_ERROR ERROR assertions ${NC}"
exit 1;
elif [[ $(($NUM_PASSED)) -ne $(($NUM_TOTAL)) ]]; then
echo -e "${RED}FAILED since not all tests have been executed ${NC}"
exit 1;
else
echo -e "${GREEN}PASSED all $NUM_TOTAL tests ${NC}"
exit 0;
fi

43
ci/gitlab-ci-emul.sh Executable file
View file

@ -0,0 +1,43 @@
# !/bin/bash
# This script emulates what the gitlab ci config does (not on public server)
# source this with a bash shell in the project root
# comment out next command if you don't want to use sudo
sudo apt install \
gcc-4.8 \
g++-4.8 \
gperf \
autoconf \
automake \
autotools-dev \
libmpc-dev \
libmpfr-dev \
libgmp-dev \
gawk \
build-essential \
bison \
flex \
texinfo \
python-pexpect \
libusb-1.0-0-dev \
device-tree-compiler
# customize your paths here
source ci/path-setup.sh
git submodule update --init --recursive
ci/make-tmp.sh
ci/build-riscv-gcc.sh
ci/install-fesvr.sh
ci/install-verilator.sh
ci/build-riscv-tests.sh
make clean
# run asm tests on verilator
make -j${NUM_JOBS} verilate verilator=$VERILATOR_ROOT/bin/verilator
make -j${NUM_JOBS} run-asm-tests-verilator verilator=$VERILATOR_ROOT/bin/verilator
make -j${NUM_JOBS} run-benchmarks-verilator verilator=$VERILATOR_ROOT/bin/verilator
# run asm tests on questa
make -j${NUM_JOBS} build questa_version=$QUESTASIM_VERSION
make -j${NUM_JOBS} run-asm-tests questa_version=$QUESTASIM_VERSION
make -j${NUM_JOBS} run-benchmarks questa_version=$QUESTASIM_VERSION

View file

@ -4,6 +4,10 @@ ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
cd $ROOT/tmp
RELEASE=0.1.0
if [ -z ${NUM_JOBS} ]; then
NUM_JOBS=1
fi
if ! [ -e $ROOT/tmp/riscv-fesvr ]; then
git clone https://github.com/riscv/riscv-fesvr.git
fi
@ -11,5 +15,5 @@ cd $ROOT/tmp/riscv-fesvr
mkdir -p build
cd build
../configure --prefix="$ROOT/tmp"
make -j2
make -j${NUM_JOBS}
make install

View file

@ -3,11 +3,19 @@ set -e
ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
cd $ROOT/tmp
if [ ! -e "$ROOT/tmp/bin/verilator" ]; then
if [ -z ${NUM_JOBS} ]; then
NUM_JOBS=1
fi
if [ ! -e "$VERILATOR_ROOT/bin/verilator" ]; then
echo "Installing Verilator"
wget https://www.veripool.org/ftp/verilator-3.924.tgz
tar xzf verilator*.t*gz && cd verilator-*
autoconf && ./configure --prefix="$ROOT/tmp" && make -j2 && make test && make install
mkdir -p $VERILATOR_ROOT
# copy scripts
autoconf && ./configure --prefix="$VERILATOR_ROOT" && make -j${NUM_JOBS}
cp -r * $VERILATOR_ROOT/
make test
else
echo "Using Verilator from cached directory."
fi

21
ci/path-setup.sh Normal file
View file

@ -0,0 +1,21 @@
# Customise this to a fast local disk
export TOP=/scratch/$USER/projects
export CI_BUILD_DIR=$TOP/ariane-repo
#customize this to your setup
export QUESTASIM_HOME=
export QUESTASIM_VERSION=
export CXX=g++-4.8 CC=gcc-4.8
# where to install the tools
export RISCV=$TOP/riscv_install
export VERILATOR_ROOT=$TOP/verilator-3.924/
export PATH=$RISCV/bin:$VERILATOR_ROOT/bin:$PATH
export LIBRARY_PATH=$CI_BUILD_DIR/tmp/lib
export LD_LIBRARY_PATH=$CI_BUILD_DIR/tmp/lib
export C_INCLUDE_PATH=$CI_BUILD_DIR/tmp/include:$VERILATOR_ROOT/include
export CPLUS_INCLUDE_PATH=$CI_BUILD_DIR/tmp/include:$VERILATOR_ROOT/include
# number of parallel jobs to use for make commands and simulation
export NUM_JOBS=8

View file

@ -80,7 +80,6 @@ rv64ui-v-sub
rv64ui-v-subw
rv64ui-v-xor
rv64ui-v-xori
rv64ui-v-slliw
rv64ui-v-sll
rv64ui-v-slli
rv64ui-v-slliw

8
ci/riscv-benchmarks.list Normal file
View file

@ -0,0 +1,8 @@
dhrystone.riscv
median.riscv
multiply.riscv
pmp.riscv
qsort.riscv
rsort.riscv
towers.riscv
vvadd.riscv

View file

@ -1,5 +0,0 @@
#!/bin/bash
set -e
ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
# run the tests in parallel, 4 at a time
printf "$(xargs printf '\n%s' < ${ROOT}/ci/test.list | cut -b 1-)" | xargs -n1 -P4 -I{} ${ROOT}/build/Variane_testharness tmp/riscv-tests/build/isa/{}

38
ci/travis-ci-emul.sh Normal file
View file

@ -0,0 +1,38 @@
#!/bin/bash
# This script emulates what travis check in test does on the public server
# source this with a bash shell in the project root
# comment out next command if you don't want to use sudo
sudo apt install \
gcc-4.8 \
g++-4.8 \
gperf \
autoconf \
automake \
autotools-dev \
libmpc-dev \
libmpfr-dev \
libgmp-dev \
gawk \
build-essential \
bison \
flex \
texinfo \
python-pexpect \
libusb-1.0-0-dev \
device-tree-compiler
# customize your paths here
source ci/path-setup.sh
git submodule update --init --recursive
ci/make-tmp.sh
ci/build-riscv-gcc.sh
ci/install-fesvr.sh
ci/install-verilator.sh
ci/build-riscv-tests.sh
make clean
# run asm tests on verilator
make -j${NUM_JOBS} verilate verilator=$VERILATOR_ROOT/bin/verilator
make -j${NUM_JOBS} run-asm-tests-verilator verilator=$VERILATOR_ROOT/bin/verilator
make -j${NUM_JOBS} run-benchmarks-verilator verilator=$VERILATOR_ROOT/bin/verilator

View file

@ -16,7 +16,6 @@
* in one package.
*/
package ariane_pkg;
// ---------------
@ -94,6 +93,11 @@ package ariane_pkg;
// ---------------
// Fetch Stage
// ---------------
// leave as is (fails with >8 entries and wider fetch width)
localparam int unsigned FETCH_FIFO_DEPTH = 8;
localparam int unsigned FETCH_WIDTH = 32;
// Only use struct when signals have same direction
// exception
typedef struct packed {
@ -170,6 +174,22 @@ package ariane_pkg;
localparam EXC_OFF_RST = 8'h80;
// ---------------
// Cache config
// ---------------
// I$
parameter int unsigned ICACHE_INDEX_WIDTH = 12; // in bit
parameter int unsigned ICACHE_TAG_WIDTH = 44; // in bit
parameter int unsigned ICACHE_SET_ASSOC = 4;
parameter int unsigned ICACHE_LINE_WIDTH = 128; // in bit
// D$
localparam int unsigned DCACHE_INDEX_WIDTH = 12;
localparam int unsigned DCACHE_TAG_WIDTH = 44;
localparam int unsigned DCACHE_LINE_WIDTH = 128;
localparam int unsigned DCACHE_SET_ASSOC = 8;
// ---------------
// EX Stage
// ---------------
@ -356,6 +376,58 @@ package ariane_pkg;
// (e.g. 27*4K == 39bit address space).
localparam PPN4K_WIDTH = 38;
// ----------------------
// cache request ports
// ----------------------
// I$ address translation requests
typedef struct packed {
logic fetch_valid; // address translation valid
logic [63:0] fetch_paddr; // physical address in
exception_t fetch_exception; // exception occurred during fetch
} icache_areq_i_t;
typedef struct packed {
logic fetch_req; // address translation request
logic [63:0] fetch_vaddr; // virtual address out
} icache_areq_o_t;
// I$ data requests
typedef struct packed {
logic req; // we request a new word
logic kill_s1; // kill the current request
logic kill_s2; // kill the last request
logic [63:0] vaddr; // 1st cycle: 12 bit index is taken for lookup
} icache_dreq_i_t;
typedef struct packed {
logic ready; // icache is ready
logic valid; // signals a valid read
logic [FETCH_WIDTH-1:0] data; // 2+ cycle out: tag
logic [63:0] vaddr; // virtual address out
exception_t ex; // we've encountered an exception
} icache_dreq_o_t;
// D$ data requests
typedef struct packed {
logic [DCACHE_INDEX_WIDTH-1:0] address_index;
logic [DCACHE_TAG_WIDTH-1:0] address_tag;
logic [63:0] data_wdata;
logic data_req;
logic data_we;
logic [7:0] data_be;
logic [1:0] data_size;
logic kill_req;
logic tag_valid;
amo_t amo_op;
} dcache_req_i_t;
typedef struct packed {
logic data_gnt;
logic data_rvalid;
logic [63:0] data_rdata;
} dcache_req_o_t;
// ----------------------
// Arithmetic Functions
// ----------------------

View file

@ -1,87 +0,0 @@
/* Copyright 2018 ETH Zurich and University of Bologna.
* Copyright and related rights are licensed under the Solderpad Hardware
* License, Version 0.51 (the License); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
* or agreed to in writing, software, hardware and materials distributed under
* this License is distributed on an AS IS BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* File: nbdcache_pkh.sv
* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
* Date: 13.10.2017
*
* Description: Contains all the necessary defines for the non-block DCache
* of Ariane in one package.
*/
package nbdcache_pkg;
localparam int unsigned INDEX_WIDTH = 12;
localparam int unsigned TAG_WIDTH = 44;
localparam int unsigned CACHE_LINE_WIDTH = 128;
localparam int unsigned SET_ASSOCIATIVITY = 8;
localparam int unsigned NR_MSHR = 1;
// Calculated parameter
localparam BYTE_OFFSET = $clog2(CACHE_LINE_WIDTH/8);
localparam NUM_WORDS = 2**(INDEX_WIDTH-BYTE_OFFSET);
localparam DIRTY_WIDTH = SET_ASSOCIATIVITY*2;
// localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not
typedef enum logic { SINGLE_REQ, CACHE_LINE_REQ } req_t;
typedef struct packed {
logic [1:0] id; // id for which we handle the miss
logic valid;
logic we;
logic [55:0] addr;
logic [7:0][7:0] wdata;
logic [7:0] be;
} mshr_t;
typedef struct packed {
logic valid;
logic [63:0] addr;
logic [7:0] be;
logic [1:0] size;
logic we;
logic [63:0] wdata;
logic bypass;
} miss_req_t;
typedef struct packed {
logic [TAG_WIDTH-1:0] tag; // tag array
logic [CACHE_LINE_WIDTH-1:0] data; // data array
logic valid; // state array
logic dirty; // state array
} cache_line_t;
// cache line byte enable
typedef struct packed {
logic [TAG_WIDTH-1:0] tag; // byte enable into tag array
logic [CACHE_LINE_WIDTH-1:0] data; // byte enable into data array
logic [DIRTY_WIDTH/2-1:0] dirty; // byte enable into state array
logic [DIRTY_WIDTH/2-1:0] valid; // byte enable into state array
} cl_be_t;
// convert one hot to bin for -> needed for cache replacement
function automatic logic [$clog2(SET_ASSOCIATIVITY)-1:0] one_hot_to_bin (input logic [SET_ASSOCIATIVITY-1:0] in);
for (int unsigned i = 0; i < SET_ASSOCIATIVITY; i++) begin
if (in[i])
return i;
end
endfunction
// get the first bit set, returns one hot value
function automatic logic [SET_ASSOCIATIVITY-1:0] get_victim_cl (input logic [SET_ASSOCIATIVITY-1:0] valid_dirty);
// one-hot return vector
logic [SET_ASSOCIATIVITY-1:0] oh = '0;
for (int unsigned i = 0; i < SET_ASSOCIATIVITY; i++) begin
if (valid_dirty[i]) begin
oh[i] = 1'b1;
return oh;
end
end
endfunction
endpackage

87
include/std_cache_pkg.sv Normal file
View file

@ -0,0 +1,87 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
// Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: package for the standard Ariane cache subsystem.
package std_cache_pkg;
// Calculated parameter
localparam DCACHE_BYTE_OFFSET = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8);
localparam DCACHE_NUM_WORDS = 2**(ariane_pkg::DCACHE_INDEX_WIDTH-DCACHE_BYTE_OFFSET);
localparam DCACHE_DIRTY_WIDTH = ariane_pkg::DCACHE_SET_ASSOC*2;
// localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not
typedef enum logic { SINGLE_REQ, CACHE_LINE_REQ } req_t;
typedef struct packed {
logic [1:0] id; // id for which we handle the miss
logic valid;
logic we;
logic [55:0] addr;
logic [7:0][7:0] wdata;
logic [7:0] be;
} mshr_t;
typedef struct packed {
logic valid;
logic [63:0] addr;
logic [7:0] be;
logic [1:0] size;
logic we;
logic [63:0] wdata;
logic bypass;
} miss_req_t;
typedef struct packed {
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array
logic valid; // state array
logic dirty; // state array
} cache_line_t;
// cache line byte enable
typedef struct packed {
logic [(ariane_pkg::DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array
logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits)
} cl_be_t;
// convert one hot to bin for -> needed for cache replacement
function automatic logic [$clog2(ariane_pkg::DCACHE_SET_ASSOC)-1:0] one_hot_to_bin (
input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] in
);
for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin
if (in[i])
return i;
end
endfunction
// get the first bit set, returns one hot value
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] get_victim_cl (
input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] valid_dirty
);
// one-hot return vector
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] oh = '0;
for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin
if (valid_dirty[i]) begin
oh[i] = 1'b1;
return oh;
end
end
endfunction
endpackage : std_cache_pkg

View file

@ -55,13 +55,6 @@ module ariane #(
logic eret;
logic [NR_COMMIT_PORTS-1:0] commit_ack;
// --------------
// PCGEN <-> IF
// --------------
logic [63:0] fetch_address_pcgen_if;
branchpredict_sbe_t branch_predict_pcgen_if;
logic if_ready_if_pcgen;
logic fetch_valid_pcgen_if;
// --------------
// PCGEN <-> CSR
// --------------
@ -71,10 +64,8 @@ module ariane #(
// IF <-> ID
// --------------
fetch_entry_t fetch_entry_if_id;
logic ready_id_if;
logic fetch_valid_if_id;
logic decode_ack_id_if;
exception_t exception_if_id;
// --------------
// ID <-> ISSUE
@ -101,8 +92,6 @@ module ariane #(
logic [TRANS_ID_BITS-1:0] alu_trans_id_ex_id;
logic alu_valid_ex_id;
logic [63:0] alu_result_ex_id;
logic alu_branch_res_ex_id;
exception_t alu_exception_ex_id;
// Branches and Jumps
logic branch_ready_ex_id;
logic [TRANS_ID_BITS-1:0] branch_trans_id_ex_id;
@ -162,14 +151,6 @@ module ariane #(
logic [NR_COMMIT_PORTS-1:0] we_gpr_commit_id;
logic [NR_COMMIT_PORTS-1:0] we_fpr_commit_id;
// --------------
// IF <-> EX
// --------------
logic fetch_req_if_ex;
logic [63:0] fetch_vaddr_if_ex;
logic fetch_valid_ex_if;
logic [63:0] fetch_paddr_ex_if;
exception_t fetch_ex_ex_if;
// --------------
// CSR <-> *
// --------------
logic [4:0] fflags_csr_commit;
@ -191,7 +172,7 @@ module ariane #(
logic tsr_csr_id;
logic dcache_en_csr_nbdcache;
logic csr_write_fflags_commit_cs;
logic icache_en_csr_frontend;
logic icache_en_csr;
logic debug_mode_csr_id;
logic single_step_csr_commit;
// ----------------------------
@ -201,13 +182,14 @@ module ariane #(
logic [63:0] data_csr_perf, data_perf_csr;
logic we_csr_perf;
logic icache_flush_ctrl_cache;
logic itlb_miss_ex_perf;
logic dtlb_miss_ex_perf;
logic dcache_miss_ex_perf;
logic dcache_miss_cache_perf;
logic icache_miss_cache_perf;
// --------------
// CTRL <-> *
// --------------
logic flush_bp_ctrl_pcgen;
logic set_pc_ctrl_pcgen;
logic flush_csr_ctrl;
logic flush_unissued_instr_ctrl_id;
@ -220,25 +202,29 @@ module ariane #(
logic sfence_vma_commit_controller;
logic halt_ctrl;
logic halt_csr_ctrl;
logic flush_dcache_ctrl_ex;
logic flush_dcache_ack_ex_ctrl;
logic flush_icache_ctrl_icache;
logic dcache_flush_ctrl_cache;
logic dcache_flush_ack_cache_ctrl;
logic set_debug_pc;
icache_areq_i_t icache_areq_ex_cache;
icache_areq_o_t icache_areq_cache_ex;
icache_dreq_i_t icache_dreq_if_cache;
icache_dreq_o_t icache_dreq_cache_if;
// ----------------
// DCache <-> *
// ----------------
dcache_req_i_t [2:0] dcache_req_ports_ex_cache;
dcache_req_o_t [2:0] dcache_req_ports_cache_ex;
// --------------
// Frontend
// --------------
frontend i_frontend (
.en_cache_i ( icache_en_csr_frontend ),
.flush_i ( flush_ctrl_if ), // not entirely correct
.flush_bp_i ( 1'b0 ),
.flush_icache_i ( flush_icache_ctrl_icache ),
.boot_addr_i ( boot_addr_i ),
.fetch_req_o ( fetch_req_if_ex ),
.fetch_vaddr_o ( fetch_vaddr_if_ex ),
.fetch_valid_i ( fetch_valid_ex_if ),
.fetch_paddr_i ( fetch_paddr_ex_if ),
.fetch_exception_i ( fetch_ex_ex_if ),
.icache_dreq_i ( icache_dreq_cache_if ),
.icache_dreq_o ( icache_dreq_if_cache ),
.resolved_branch_i ( resolved_branch ),
.pc_commit_i ( pc_commit ),
.set_pc_commit_i ( set_pc_ctrl_pcgen ),
@ -247,8 +233,6 @@ module ariane #(
.eret_i ( eret ),
.trap_vector_base_i ( trap_vector_base_commit_pcgen ),
.ex_valid_i ( ex_commit.valid ),
.axi ( instr_if ),
.l1_icache_miss_o ( ), // performance counters
.fetch_entry_o ( fetch_entry_if_id ),
.fetch_entry_valid_o ( fetch_valid_if_id ),
.fetch_ack_i ( decode_ack_id_if ),
@ -344,11 +328,9 @@ module ariane #(
// ---------
// EX
// ---------
ex_stage #(
.CACHE_START_ADDR ( CACHE_START_ADDR ),
.AXI_ID_WIDTH ( AXI_ID_WIDTH ),
.AXI_USER_WIDTH ( AXI_USER_WIDTH )
) ex_stage_i (
ex_stage ex_stage_i (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_ctrl_ex ),
.fu_i ( fu_id_ex ),
.operator_i ( operator_id_ex ),
@ -364,8 +346,6 @@ module ariane #(
.alu_result_o ( alu_result_ex_id ),
.alu_trans_id_o ( alu_trans_id_ex_id ),
.alu_valid_o ( alu_valid_ex_id ),
.alu_branch_res_o ( alu_branch_res_ex_id ),
.alu_exception_o ( ),
// Branches and Jumps
.branch_ready_o ( branch_ready_ex_id ),
.branch_valid_o ( branch_valid_ex_id ),
@ -387,11 +367,11 @@ module ariane #(
.lsu_exception_o ( lsu_exception_ex_id ),
.no_st_pending_o ( no_st_pending_ex_commit ),
// MULT
.mult_ready_o ( mult_ready_ex_id ),
.mult_valid_i ( mult_valid_id_ex ),
.mult_trans_id_o ( mult_trans_id_ex_id ),
.mult_result_o ( mult_result_ex_id ),
.mult_valid_o ( mult_valid_ex_id ),
// .mult_ready_o ( mult_ready_ex_id ),
// .mult_valid_i ( mult_valid_id_ex ),
// .mult_trans_id_o ( mult_trans_id_ex_id ),
// .mult_result_o ( mult_result_ex_id ),
// .mult_valid_o ( mult_valid_ex_id ),
// FPU
.fpu_ready_o ( fpu_ready_ex_id ),
.fpu_valid_i ( fpu_valid_id_ex ),
@ -413,29 +393,27 @@ module ariane #(
// Performance counters
.itlb_miss_o ( itlb_miss_ex_perf ),
.dtlb_miss_o ( dtlb_miss_ex_perf ),
.dcache_miss_o ( dcache_miss_ex_perf ),
// Memory Management
.enable_translation_i ( enable_translation_csr_ex ), // from CSR
.en_ld_st_translation_i ( en_ld_st_translation_csr_ex ),
.flush_tlb_i ( flush_tlb_ctrl_ex ),
.fetch_req_i ( fetch_req_if_ex ),
.fetch_valid_o ( fetch_valid_ex_if ),
.fetch_vaddr_i ( fetch_vaddr_if_ex ),
.fetch_paddr_o ( fetch_paddr_ex_if ),
.fetch_exception_o ( fetch_ex_ex_if ), // fetch exception to IF
.priv_lvl_i ( priv_lvl ), // from CSR
.ld_st_priv_lvl_i ( ld_st_priv_lvl_csr_ex ), // from CSR
.sum_i ( sum_csr_ex ), // from CSR
.mxr_i ( mxr_csr_ex ), // from CSR
.satp_ppn_i ( satp_ppn_csr_ex ), // from CSR
.asid_i ( asid_csr_ex ), // from CSR
.data_if ( data_if ),
.dcache_en_i ( dcache_en_csr_nbdcache ),
.flush_dcache_i ( flush_dcache_ctrl_ex ),
.flush_dcache_ack_o ( flush_dcache_ack_ex_ctrl ),
.icache_areq_i ( icache_areq_cache_ex ),
.icache_areq_o ( icache_areq_ex_cache ),
.*
.mult_ready_o ( mult_ready_ex_id ),
.mult_valid_i ( mult_valid_id_ex ),
.mult_trans_id_o ( mult_trans_id_ex_id ),
.mult_result_o ( mult_result_ex_id ),
.mult_valid_o ( mult_valid_ex_id ),
// DCACHE interfaces
.dcache_req_ports_i ( dcache_req_ports_cache_ex ),
.dcache_req_ports_o ( dcache_req_ports_ex_cache )
);
// ---------
@ -443,7 +421,7 @@ module ariane #(
// ---------
commit_stage commit_stage_i (
.halt_i ( halt_ctrl ),
.flush_dcache_i ( flush_dcache_ctrl_ex ),
.flush_dcache_i ( dcache_flush_ctrl_cache ),
.exception_o ( ex_commit ),
.debug_mode_i ( debug_mode_csr_id ),
.debug_req_i ( debug_req_i ),
@ -508,7 +486,7 @@ module ariane #(
.debug_mode_o ( debug_mode_csr_id ),
.single_step_o ( single_step_csr_commit ),
.dcache_en_o ( dcache_en_csr_nbdcache ),
.icache_en_o ( icache_en_csr_frontend ),
.icache_en_o ( icache_en_csr ),
.perf_addr_o ( addr_csr_perf ),
.perf_data_o ( data_csr_perf ),
.perf_data_i ( data_perf_csr ),
@ -516,7 +494,6 @@ module ariane #(
.*
);
// ------------------------
// Performance Counters
// ------------------------
@ -528,8 +505,8 @@ module ariane #(
.commit_instr_i ( commit_instr_id_commit ),
.commit_ack_i ( commit_ack ),
.l1_icache_miss_i ( 1'b0 ),
.l1_dcache_miss_i ( dcache_miss_ex_perf ),
.l1_icache_miss_i ( icache_miss_cache_perf ),
.l1_dcache_miss_i ( dcache_miss_cache_perf ),
.itlb_miss_i ( itlb_miss_ex_perf ),
.dtlb_miss_i ( dtlb_miss_ex_perf ),
@ -538,20 +515,20 @@ module ariane #(
.resolved_branch_i ( resolved_branch ),
.*
);
// ------------
// Controller
// ------------
controller controller_i (
// flush ports
.flush_bp_o ( flush_bp_ctrl_pcgen ),
.set_pc_commit_o ( set_pc_ctrl_pcgen ),
.flush_unissued_instr_o ( flush_unissued_instr_ctrl_id ),
.flush_if_o ( flush_ctrl_if ),
.flush_id_o ( flush_ctrl_id ),
.flush_ex_o ( flush_ctrl_ex ),
.flush_tlb_o ( flush_tlb_ctrl_ex ),
.flush_dcache_o ( flush_dcache_ctrl_ex ),
.flush_dcache_ack_i ( flush_dcache_ack_ex_ctrl ),
.flush_dcache_o ( dcache_flush_ctrl_cache ),
.flush_dcache_ack_i ( dcache_flush_ack_cache_ctrl ),
.halt_csr_i ( halt_csr_ctrl ),
.halt_o ( halt_ctrl ),
@ -565,10 +542,45 @@ module ariane #(
.fence_i ( fence_commit_controller ),
.sfence_vma_i ( sfence_vma_commit_controller ),
.flush_icache_o ( flush_icache_ctrl_icache ),
.flush_icache_o ( icache_flush_ctrl_cache ),
.*
);
// -------------------
// Cache Subsystem
// -------------------
std_cache_subsystem #(
.CACHE_START_ADDR ( CACHE_START_ADDR )
) i_std_cache_subsystem (
// to D$
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
// I$
.icache_en_i ( icache_en_csr ),
.icache_flush_i ( icache_flush_ctrl_cache ),
.icache_miss_o ( icache_miss_cache_perf ),
.icache_areq_i ( icache_areq_ex_cache ),
.icache_areq_o ( icache_areq_cache_ex ),
.icache_dreq_i ( icache_dreq_if_cache ),
.icache_dreq_o ( icache_dreq_cache_if ),
// D$
.dcache_enable_i ( dcache_en_csr_nbdcache ),
.dcache_flush_i ( dcache_flush_ctrl_cache ),
.dcache_flush_ack_o ( dcache_flush_ack_cache_ctrl ),
// from PTW, Load Unit and Store Unit
.dcache_amo_commit_i ( 1'b0 ),
.dcache_amo_valid_o ( ),
.dcache_amo_result_o ( ),
.dcache_amo_flush_i ( 1'b0 ),
.dcache_miss_o ( dcache_miss_cache_perf ),
.dcache_req_ports_i ( dcache_req_ports_ex_cache ),
.dcache_req_ports_o ( dcache_req_ports_cache_ex ),
// memory side
.icache_data_if ( instr_if ),
.dcache_data_if ( data_if ),
.dcache_bypass_if ( bypass_if )
);
// -------------------
// Instruction Tracer
// -------------------
@ -602,8 +614,8 @@ module ariane #(
assign tracer_if.st_valid = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.valid_i;
assign tracer_if.st_paddr = ex_stage_i.lsu_i.i_store_unit.store_buffer_i.paddr_i;
// loads
assign tracer_if.ld_valid = ex_stage_i.lsu_i.i_load_unit.tag_valid_o;
assign tracer_if.ld_kill = ex_stage_i.lsu_i.i_load_unit.kill_req_o;
assign tracer_if.ld_valid = ex_stage_i.lsu_i.i_load_unit.req_port_o.tag_valid;
assign tracer_if.ld_kill = ex_stage_i.lsu_i.i_load_unit.req_port_o.kill_req;
assign tracer_if.ld_paddr = ex_stage_i.lsu_i.i_load_unit.paddr_i;
// exceptions
assign tracer_if.exception = commit_stage_i.exception_o;
@ -616,8 +628,7 @@ module ariane #(
`ifndef SYNTHESIS
`ifndef verilator
program instr_tracer
(
program instr_tracer (
instruction_tracer_if tracer_if,
input logic [5:0] cluster_id_i,
input logic [3:0] core_id_i
@ -638,7 +649,6 @@ module ariane #(
// mock tracer for Verilator, to be used with spike-dasm
`else
string s;
int f;
logic [63:0] cycles;
@ -651,7 +661,7 @@ module ariane #(
cycles <= 0;
end else begin
for (int i = 0; i < NR_COMMIT_PORTS; i++) begin
if (commit_ack[i] && !commit_stage_i.exception_o) begin
if (commit_ack[i] && !commit_instr_id_commit[i].ex.valid) begin
$fwrite(f, "%d 0x%0h (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].ex.tval[31:0]);
end else if (commit_ack[i] && commit_instr_id_commit[i].ex.valid) begin
if (commit_instr_id_commit[i].ex.cause == 2) begin

352
src/axi_adapter.sv Normal file
View file

@ -0,0 +1,352 @@
/* Copyright 2018 ETH Zurich and University of Bologna.
* Copyright and related rights are licensed under the Solderpad Hardware
* License, Version 0.51 (the License); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
* or agreed to in writing, software, hardware and materials distributed under
* this License is distributed on an AS IS BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* File: axi_adapter.sv
* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
* Date: 1.8.2018
*
* Description: Manages communication with the AXI Bus
*/
import std_cache_pkg::*;
module axi_adapter #(
parameter int unsigned DATA_WIDTH = 256,
parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature
parameter int unsigned AXI_ID_WIDTH = 10
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic req_i,
input req_t type_i,
output logic gnt_o,
output logic [AXI_ID_WIDTH-1:0] gnt_id_o,
input logic [63:0] addr_i,
input logic we_i,
input logic [(DATA_WIDTH/64)-1:0][63:0] wdata_i,
input logic [(DATA_WIDTH/64)-1:0][7:0] be_i,
input logic [1:0] size_i,
input logic [AXI_ID_WIDTH-1:0] id_i,
// read port
output logic valid_o,
output logic [(DATA_WIDTH/64)-1:0][63:0] rdata_o,
output logic [AXI_ID_WIDTH-1:0] id_o,
// critical word - read port
output logic [63:0] critical_word_o,
output logic critical_word_valid_o,
// AXI port
AXI_BUS.Master axi
);
localparam BURST_SIZE = DATA_WIDTH/64-1;
localparam ADDR_INDEX = ($clog2(DATA_WIDTH/64) > 0) ? $clog2(DATA_WIDTH/64) : 1;
enum logic [3:0] {
IDLE, WAIT_B_VALID, WAIT_AW_READY, WAIT_LAST_W_READY, WAIT_LAST_W_READY_AW_READY, WAIT_AW_READY_BURST,
WAIT_R_VALID, WAIT_R_VALID_MULTIPLE, COMPLETE_READ
} state_q, state_d;
// counter for AXI transfers
logic [ADDR_INDEX-1:0] cnt_d, cnt_q;
logic [(DATA_WIDTH/64)-1:0][63:0] cache_line_d, cache_line_q;
// save the address for a read, as we allow for non-cacheline aligned accesses
logic [(DATA_WIDTH/64)-1:0] addr_offset_d, addr_offset_q;
logic [AXI_ID_WIDTH-1:0] id_d, id_q;
logic [ADDR_INDEX-1:0] index;
always_comb begin : axi_fsm
// Default assignments
axi.aw_valid = 1'b0;
axi.aw_addr = addr_i;
axi.aw_prot = 3'b0;
axi.aw_region = 4'b0;
axi.aw_len = 8'b0;
axi.aw_size = {1'b0, size_i};
axi.aw_burst = (type_i == SINGLE_REQ) ? 2'b00 : 2'b01; // fixed size for single request and incremental transfer for everything else
axi.aw_lock = 1'b0;
axi.aw_cache = 4'b0;
axi.aw_qos = 4'b0;
axi.aw_id = id_i;
axi.aw_user = '0;
axi.ar_valid = 1'b0;
// in case of a single request or wrapping transfer we can simply begin at the address, if we want to request a cache-line
// with an incremental transfer we need to output the corresponding base address of the cache line
axi.ar_addr = (CRITICAL_WORD_FIRST || type_i == SINGLE_REQ) ? addr_i : { addr_i[63:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET}{1'b0}}};
axi.ar_prot = 3'b0;
axi.ar_region = 4'b0;
axi.ar_len = 8'b0;
axi.ar_size = {1'b0, size_i}; // 8 bytes
axi.ar_burst = (type_i == SINGLE_REQ) ? 2'b00 : (CRITICAL_WORD_FIRST ? 2'b10 : 2'b01); // wrapping transfer in case of a critical word first strategy
axi.ar_lock = 1'b0;
axi.ar_cache = 4'b0;
axi.ar_qos = 4'b0;
axi.ar_id = id_i;
axi.ar_user = '0;
axi.w_valid = 1'b0;
axi.w_data = wdata_i[0];
axi.w_strb = be_i[0];
axi.w_user = '0;
axi.w_last = 1'b0;
axi.b_ready = 1'b0;
axi.r_ready = 1'b0;
gnt_o = 1'b0;
gnt_id_o = '0;
valid_o = 1'b0;
id_o = axi.r_id;
// rdata_o = axi.r_data;
critical_word_o = axi.r_data;
critical_word_valid_o = 1'b0;
rdata_o = cache_line_q;
state_d = state_q;
cnt_d = cnt_q;
cache_line_d = cache_line_q;
addr_offset_d = addr_offset_q;
id_d = id_q;
index = '0;
case (state_q)
IDLE: begin
cnt_d = '0;
// we have an incoming request
if (req_i) begin
// is this a read or write?
// write
if (we_i) begin
// the data is valid
axi.aw_valid = 1'b1;
axi.w_valid = 1'b1;
// its a single write
if (type_i == SINGLE_REQ) begin
// single req can be granted here
gnt_o = axi.aw_ready & axi.w_ready;
gnt_id_o = id_i;
case ({axi.aw_ready, axi.w_ready})
2'b11: state_d = WAIT_B_VALID;
2'b01: state_d = WAIT_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
default: state_d = IDLE;
endcase
id_d = axi.aw_id;
// its a request for the whole cache line
end else begin
axi.aw_len = BURST_SIZE; // number of bursts to do
axi.w_last = 1'b0;
axi.w_data = wdata_i[0];
axi.w_strb = be_i[0];
if (axi.w_ready)
cnt_d = BURST_SIZE - 1;
else
cnt_d = BURST_SIZE;
case ({axi.aw_ready, axi.w_ready})
2'b11: state_d = WAIT_LAST_W_READY;
2'b01: state_d = WAIT_LAST_W_READY_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
default:;
endcase
// save id
id_d = axi.aw_id;
end
// read
end else begin
axi.ar_valid = 1'b1;
gnt_o = axi.ar_ready;
gnt_id_o = id_i;
if (type_i != SINGLE_REQ) begin
axi.ar_len = BURST_SIZE;
cnt_d = BURST_SIZE;
end
if (axi.ar_ready) begin
state_d = (type_i == SINGLE_REQ) ? WAIT_R_VALID : WAIT_R_VALID_MULTIPLE;
addr_offset_d = addr_i[ADDR_INDEX-1+3:3];
// save id
id_d = axi.ar_id;
end
end
end
end
// ~> from single write, write request has already been granted
WAIT_AW_READY: begin
axi.aw_valid = 1'b1;
axi.aw_len = 8'b0;
if (axi.aw_ready)
state_d = WAIT_B_VALID;
end
// ~> we need to wait for an aw_ready and there is at least one outstanding write
WAIT_LAST_W_READY_AW_READY: begin
axi.w_valid = 1'b1;
axi.w_last = (cnt_q == '0) ? 1'b1 : 1'b0;
axi.w_data = wdata_i[BURST_SIZE-cnt_q];
axi.w_strb = be_i[BURST_SIZE-cnt_q];
axi.aw_valid = 1'b1;
// we are here because we want to write a cache line
axi.aw_len = BURST_SIZE;
// we got an aw_ready
case ({axi.aw_ready, axi.w_ready})
// we got an aw ready
2'b01: begin
// are there any outstanding transactions?
if (cnt_q == 0)
state_d = WAIT_AW_READY_BURST;
else // yes, so reduce the count and stay here
cnt_d = cnt_q - 1;
end
2'b10: state_d = WAIT_LAST_W_READY;
2'b11: begin
// we are finished
if (cnt_q == 0) begin
state_d = WAIT_B_VALID;
gnt_o = 1'b1;
gnt_id_o = id_q;
// there are outstanding transactions
end else begin
state_d = WAIT_LAST_W_READY;
cnt_d = cnt_q - 1;
end
end
default:;
endcase
end
// ~> all data has already been sent, we are only waiting for the aw_ready
WAIT_AW_READY_BURST: begin
axi.aw_valid = 1'b1;
axi.aw_len = BURST_SIZE;
if (axi.aw_ready) begin
state_d = WAIT_B_VALID;
gnt_o = 1'b1;
gnt_id_o = id_q;
end
end
// ~> from write, there is an outstanding write
WAIT_LAST_W_READY: begin
axi.w_valid = 1'b1;
axi.w_data = wdata_i[BURST_SIZE-cnt_q];
axi.w_strb = be_i[BURST_SIZE-cnt_q];
// this is the last write
axi.w_last = (cnt_q == '0) ? 1'b1 : 1'b0;
if (axi.w_ready) begin
// last write -> go to WAIT_B_VALID
if (cnt_q == '0) begin
state_d = WAIT_B_VALID;
gnt_o = (cnt_q == '0);
gnt_id_o = id_q;
end else begin
cnt_d = cnt_q - 1;
end
end
end
// ~> finish write transaction
WAIT_B_VALID: begin
axi.b_ready = 1'b1;
id_o = axi.b_id;
// Write is valid
if (axi.b_valid) begin
state_d = IDLE;
valid_o = 1'b1;
end
end
// ~> cacheline read, single read
WAIT_R_VALID_MULTIPLE, WAIT_R_VALID: begin
if (CRITICAL_WORD_FIRST)
index = addr_offset_q + (BURST_SIZE-cnt_q);
else
index = BURST_SIZE-cnt_q;
// reads are always wrapping here
axi.r_ready = 1'b1;
// this is the first read a.k.a the critical word
if (axi.r_valid) begin
if (CRITICAL_WORD_FIRST) begin
// this is the first word of a cacheline read, e.g.: the word which was causing the miss
if (state_q == WAIT_R_VALID_MULTIPLE && cnt_q == BURST_SIZE) begin
critical_word_valid_o = 1'b1;
critical_word_o = axi.r_data;
end
end else begin
// check if the address offset matches - then we are getting the critical word
if (index == addr_offset_q) begin
critical_word_valid_o = 1'b1;
critical_word_o = axi.r_data;
end
end
// this is the last read
if (axi.r_last) begin
state_d = COMPLETE_READ;
end
// save the word
if (state_q == WAIT_R_VALID_MULTIPLE) begin
cache_line_d[index] = axi.r_data;
end else
cache_line_d[0] = axi.r_data;
// Decrease the counter
cnt_d = cnt_q - 1;
end
end
// ~> read is complete
COMPLETE_READ: begin
valid_o = 1'b1;
state_d = IDLE;
id_o = id_q;
end
endcase
end
// ----------------
// Registers
// ----------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
// start in flushing state and initialize the memory
state_q <= IDLE;
cnt_q <= '0;
cache_line_q <= '0;
addr_offset_q <= '0;
id_q <= '0;
end else begin
state_q <= state_d;
cnt_q <= cnt_d;
cache_line_q <= cache_line_d;
addr_offset_q <= addr_offset_d;
id_q <= id_d;
end
end
endmodule

View file

@ -18,13 +18,9 @@
// Description: Cache controller
import ariane_pkg::*;
import nbdcache_pkg::*;
import std_cache_pkg::*;
module cache_ctrl #(
parameter int unsigned SET_ASSOCIATIVITY = 8,
parameter int unsigned INDEX_WIDTH = 12,
parameter int unsigned TAG_WIDTH = 44,
parameter int unsigned CACHE_LINE_WIDTH = 100,
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000
)(
input logic clk_i, // Clock
@ -32,30 +28,21 @@ module cache_ctrl #(
input logic flush_i,
input logic bypass_i, // enable cache
output logic busy_o,
// Core request ports
input logic [INDEX_WIDTH-1:0] address_index_i,
input logic [TAG_WIDTH-1:0] address_tag_i,
input logic [63:0] data_wdata_i,
input logic data_req_i,
input logic data_we_i,
input logic [7:0] data_be_i,
input logic [1:0] data_size_i,
input logic kill_req_i,
input logic tag_valid_i,
output logic data_gnt_o,
output logic data_rvalid_o,
output logic [63:0] data_rdata_o,
input amo_t amo_op_i,
input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o,
// SRAM interface
output logic [SET_ASSOCIATIVITY-1:0] req_o, // req is valid
output logic [INDEX_WIDTH-1:0] addr_o, // address into cache array
output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
input logic gnt_i,
output cache_line_t data_o,
output cl_be_t be_o,
output logic [TAG_WIDTH-1:0] tag_o, //valid one cycle later
input cache_line_t [SET_ASSOCIATIVITY-1:0] data_i,
output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
output logic we_o,
input logic [SET_ASSOCIATIVITY-1:0] hit_way_i,
input logic [DCACHE_SET_ASSOC-1:0] hit_way_i,
// Miss handling
output miss_req_t miss_req_o,
// return
@ -88,8 +75,8 @@ module cache_ctrl #(
} state_d, state_q;
typedef struct packed {
logic [INDEX_WIDTH-1:0] index;
logic [TAG_WIDTH-1:0] tag;
logic [DCACHE_INDEX_WIDTH-1:0] index;
logic [DCACHE_TAG_WIDTH-1:0] tag;
logic [7:0] be;
logic [1:0] size;
logic we;
@ -97,17 +84,17 @@ module cache_ctrl #(
logic bypass;
} mem_req_t;
logic [SET_ASSOCIATIVITY-1:0] hit_way_d, hit_way_q;
logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q;
assign busy_o = (state_q != IDLE);
mem_req_t mem_req_d, mem_req_q;
logic [CACHE_LINE_WIDTH-1:0] cl_i;
logic [DCACHE_LINE_WIDTH-1:0] cl_i;
always_comb begin : way_select
cl_i = '0;
for (int unsigned i = 0; i < SET_ASSOCIATIVITY; i++)
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++)
if (hit_way_i[i])
cl_i = data_i[i].data;
@ -118,10 +105,10 @@ module cache_ctrl #(
// Cache FSM
// --------------
always_comb begin : cache_ctrl_fsm
automatic logic [$clog2(CACHE_LINE_WIDTH)-1:0] cl_offset;
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
// incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
// cache-line offset -> multiple of 64
cl_offset = mem_req_q.index[BYTE_OFFSET-1:3] << 6; // shift by 6 to the left
cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left
// default assignments
state_d = state_q;
@ -129,14 +116,14 @@ module cache_ctrl #(
hit_way_d = hit_way_q;
// output assignments
data_gnt_o = 1'b0;
data_rvalid_o = 1'b0;
data_rdata_o = '0;
req_port_o.data_gnt = 1'b0;
req_port_o.data_rvalid = 1'b0;
req_port_o.data_rdata = '0;
miss_req_o = '0;
mshr_addr_o = '0;
// Memory array communication
req_o = '0;
addr_o = address_index_i;
addr_o = req_port_i.address_index;
data_o = '0;
be_o = '0;
tag_o = '0;
@ -147,23 +134,23 @@ module cache_ctrl #(
IDLE: begin
// a new request arrived
if (data_req_i && !flush_i) begin
if (req_port_i.data_req && !flush_i) begin
// request the cache line - we can do this specualtive
req_o = '1;
// save index, be and we
mem_req_d.index = address_index_i;
mem_req_d.tag = address_tag_i;
mem_req_d.be = data_be_i;
mem_req_d.size = data_size_i;
mem_req_d.we = data_we_i;
mem_req_d.wdata = data_wdata_i;
mem_req_d.index = req_port_i.address_index;
mem_req_d.tag = req_port_i.address_tag;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
// Bypass mode, check for uncacheable address here as well
if (bypass_i) begin
state_d = WAIT_TAG_BYPASSED;
// grant this access
data_gnt_o = 1'b1;
req_port_o.data_gnt = 1'b1;
mem_req_d.bypass = 1'b1;
// ------------------
// Cache is enabled
@ -174,8 +161,8 @@ module cache_ctrl #(
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
// only for a read
if (!data_we_i)
data_gnt_o = 1'b1;
if (!req_port_i.data_we)
req_port_o.data_gnt = 1'b1;
end
end
end
@ -185,31 +172,30 @@ module cache_ctrl #(
WAIT_TAG, WAIT_TAG_SAVED: begin
// depending on where we come from
// For the store case the tag comes in the same cycle
tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag : address_tag_i;
tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag : req_port_i.address_tag;
// we speculatively request another transfer
if (data_req_i && !flush_i) begin
if (req_port_i.data_req && !flush_i) begin
req_o = '1;
end
// check that the client really wants to do the request
if (!kill_req_i) begin
if (!req_port_i.kill_req) begin
// ------------
// HIT CASE
// ------------
if (|hit_way_i) begin
// we can request another cache-line if this was a load
// make another request
if (data_req_i && !mem_req_q.we && !flush_i) begin
if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin
state_d = WAIT_TAG; // switch back to WAIT_TAG
mem_req_d.index = address_index_i;
mem_req_d.be = data_be_i;
mem_req_d.size = data_size_i;
mem_req_d.we = data_we_i;
mem_req_d.wdata = data_wdata_i;
mem_req_d.tag = address_tag_i;
mem_req_d.index = req_port_i.address_index;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.tag = req_port_i.address_tag;
mem_req_d.bypass = 1'b0;
data_gnt_o = gnt_i;
req_port_o.data_gnt = gnt_i;
if (!gnt_i) begin
state_d = IDLE;
@ -220,15 +206,15 @@ module cache_ctrl #(
end
// this is timing critical
// data_rdata_o = cl_i[cl_offset +: 64];
// req_port_o.data_rdata = cl_i[cl_offset +: 64];
case (mem_req_q.index[3])
1'b0: data_rdata_o = cl_i[63:0];
1'b1: data_rdata_o = cl_i[127:64];
1'b0: req_port_o.data_rdata = cl_i[63:0];
1'b1: req_port_o.data_rdata = cl_i[127:64];
endcase
// report data for a read
if (!mem_req_q.we) begin
data_rvalid_o = 1'b1;
req_port_o.data_rvalid = 1'b1;
// else this was a store so we need an extra step to handle it
end else begin
@ -239,31 +225,49 @@ module cache_ctrl #(
// MISS CASE
// ------------
end else begin
// also save tag
mem_req_d.tag = address_tag_i;
// also save the tag
mem_req_d.tag = req_port_i.address_tag;
// make a miss request
state_d = WAIT_REFILL_GNT;
end
// ---------------
// Check MSHR
// ---------------
mshr_addr_o = {address_tag_i, mem_req_q.index};
// we've got a match on MSHR
if (mshr_addr_matches_i) begin
// ----------------------------------------------
// Check MSHR - Miss Status Handling Register
// ----------------------------------------------
mshr_addr_o = {tag_o, mem_req_q.index};
// 1. We've got a match on MSHR and while are going down the
// store path. This means that the miss controller is
// currently evicting our cache-line. As the store is
// non-atomic we need to constantly check whether we are
// matching the address the miss handler is serving.
// Furthermore we need to check for the whole index
// because a completely different memory line could alias
// with the cache-line we are evicting.
// 2. The second case is where we are currently loading and
// the address matches the exact CL the miss controller
// is currently serving. That means we need to wait for
// the miss controller to finish its request before we
// can continue to serve this CL. Otherwise we will fetch
// the cache-line again and potentially loosing any
// content we've written so far. This as a consequence
// means we can't have hit on the CL which mean the
// req_port_o.data_rvalid will be de-asserted.
if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin
state_d = WAIT_MSHR;
// save tag if we didn't already save it e.g.: we are not in in the Tag saved state
if (state_q != WAIT_TAG_SAVED)
mem_req_d.tag = address_tag_i;
if (state_q != WAIT_TAG_SAVED) begin
mem_req_d.tag = req_port_i.address_tag;
end
end
// -------------------------
// Check for cache-ability
// -------------------------
if (tag_o < CACHE_START_ADDR[TAG_WIDTH+INDEX_WIDTH-1:INDEX_WIDTH]) begin
mem_req_d.tag = address_tag_i;
if (tag_o < CACHE_START_ADDR[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]) begin
mem_req_d.tag = req_port_i.address_tag;
mem_req_d.bypass = 1'b1;
state_d = WAIT_REFILL_GNT;
end
end
end // !kill_req_i
end
// ~> we are here as we need a second round of memory access for a store
@ -279,23 +283,18 @@ module cache_ctrl #(
addr_o = mem_req_q.index;
we_o = 1'b1;
be_o.dirty = hit_way_q;
be_o.valid = hit_way_q;
be_o.vldrty = hit_way_q;
// set the correct byte enable
for (int unsigned i = 0; i < 8; i++) begin
if (mem_req_q.be[i])
be_o.data[cl_offset + i*8 +: 8] = '1;
end
data_o.data[cl_offset +: 64] = mem_req_q.wdata;
be_o.data[cl_offset>>3 +: 8] = mem_req_q.be;
data_o.data[cl_offset +: 64] = mem_req_q.wdata;
// ~> change the state
data_o.dirty = 1'b1;
data_o.valid = 1'b1;
// got a grant ~> this is finished now
if (gnt_i) begin
data_gnt_o = 1'b1;
req_port_o.data_gnt = 1'b1;
state_d = IDLE;
end
end else begin
@ -303,7 +302,7 @@ module cache_ctrl #(
end
end // case: STORE_REQ
// we've got a match on MSHR ~> miss unit is scurrently serving a request
// we've got a match on MSHR ~> miss unit is currently serving a request
WAIT_MSHR: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// we can start a new request
@ -320,9 +319,9 @@ module cache_ctrl #(
// its for sure a miss
WAIT_TAG_BYPASSED: begin
// the request was killed
if (!kill_req_i) begin
if (!req_port_i.kill_req) begin
// save tag
mem_req_d.tag = address_tag_i;
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_REFILL_GNT;
end
end
@ -345,19 +344,20 @@ module cache_ctrl #(
state_d = WAIT_REFILL_VALID;
// if this was a write we still need to give a grant to the store unit
if (mem_req_q.we)
data_gnt_o = 1'b1;
req_port_o.data_gnt = 1'b1;
end
if (miss_gnt_i && !mem_req_q.we)
state_d = WAIT_CRITICAL_WORD;
else if (miss_gnt_i) begin
state_d = IDLE;
data_gnt_o = 1'b1;
req_port_o.data_gnt = 1'b1;
end
// it can be the case that the miss unit is currently serving a request which matches ours
// so we need to check the mshr for matching continously
// if the mshr matches we need to go to a different state -> we should never get a matching mshr and a high miss_gnt_i
// it can be the case that the miss unit is currently serving a
// request which matches ours
// so we need to check the MSHR for matching continuously
// if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i
if (mshr_addr_matches_i && !active_serving_i) begin
state_d = WAIT_MSHR;
end
@ -366,23 +366,23 @@ module cache_ctrl #(
// ~> wait for critical word to arrive
WAIT_CRITICAL_WORD: begin
// speculatively request another word
if (data_req_i) begin
if (req_port_i.data_req) begin
// request the cache line
req_o = '1;
end
if (critical_word_valid_i) begin
data_rvalid_o = 1'b1;
data_rdata_o = critical_word_i;
req_port_o.data_rvalid = 1'b1;
req_port_o.data_rdata = critical_word_i;
// we can make another request
if (data_req_i) begin
if (req_port_i.data_req) begin
// save index, be and we
mem_req_d.index = address_index_i;
mem_req_d.be = data_be_i;
mem_req_d.size = data_size_i;
mem_req_d.we = data_we_i;
mem_req_d.wdata = data_wdata_i;
mem_req_d.tag = address_tag_i;
mem_req_d.index = req_port_i.address_index;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.tag = req_port_i.address_tag;
state_d = IDLE;
@ -391,7 +391,7 @@ module cache_ctrl #(
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
data_gnt_o = 1'b1;
req_port_o.data_gnt = 1'b1;
end
end else begin
@ -403,16 +403,16 @@ module cache_ctrl #(
WAIT_REFILL_VALID: begin
// got a valid answer
if (bypass_valid_i) begin
data_rdata_o = bypass_data_i;
data_rvalid_o = 1'b1;
req_port_o.data_rdata = bypass_data_i;
req_port_o.data_rvalid = 1'b1;
state_d = IDLE;
end
end
endcase
if (kill_req_i) begin
if (req_port_i.kill_req) begin
state_d = IDLE;
data_rvalid_o = 1'b1;
req_port_o.data_rvalid = 1'b1;
end
end
@ -432,12 +432,21 @@ module cache_ctrl #(
end
`ifndef SYNTHESIS
`ifndef verilator
initial begin
assert (CACHE_LINE_WIDTH == 128) else $error ("Cacheline width has to be 128 for the moment. But only small changes required in data select logic");
assert (DCACHE_LINE_WIDTH == 128) else $error ("Cacheline width has to be 128 for the moment. But only small changes required in data select logic");
end
// if the full MSHR address matches so should also match the partial one
partial_full_mshr_match: assert property(@(posedge clk_i) disable iff (rst_ni !== 1'b0) mshr_addr_matches_i -> mshr_index_matches_i) else $fatal ("partial mshr index doesn't match");
// there should never be a valid answer when the MSHR matches
no_valid_on_mshr_match: assert property(@(posedge clk_i) disable iff (rst_ni !== 1'b0) mshr_addr_matches_i -> !req_port_o.data_rvalid) else $fatal ("rvalid_o should not be set on MSHR match");
`endif
`endif
endmodule
module AMO_alu (
input logic clk_i,
input logic rst_ni,

View file

@ -15,7 +15,8 @@
// --------------
// MISS Handler
// --------------
import nbdcache_pkg::*;
import ariane_pkg::*;
import std_cache_pkg::*;
module miss_handler #(
parameter int unsigned NR_PORTS = 3,
@ -47,12 +48,11 @@ module miss_handler #(
output logic [NR_PORTS-1:0] mshr_addr_matches_o,
output logic [NR_PORTS-1:0] mshr_index_matches_o,
// Port to SRAMs, for refill and eviction
output logic [SET_ASSOCIATIVITY-1:0] req_o,
output logic [INDEX_WIDTH-1:0] addr_o, // address into cache array
input logic gnt_i,
output logic [DCACHE_SET_ASSOC-1:0] req_o,
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
output cache_line_t data_o,
output cl_be_t be_o,
input cache_line_t [SET_ASSOCIATIVITY-1:0] data_i,
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
output logic we_o
);
@ -74,8 +74,8 @@ module miss_handler #(
REQ_CACHELINE, MISS_REPL, SAVE_CACHELINE, INIT } state_d, state_q;
// Registers
mshr_t mshr_d, mshr_q;
logic [INDEX_WIDTH-1:0] cnt_d, cnt_q;
logic [SET_ASSOCIATIVITY-1:0] evict_way_d, evict_way_q;
logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q;
logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q;
// cache line to evict
cache_line_t evict_cl_d, evict_cl_q;
@ -89,28 +89,28 @@ module miss_handler #(
logic [NR_PORTS-1:0][1:0] miss_req_size;
// Cache Line Refill <-> AXI
logic req_fsm_miss_valid;
logic req_fsm_miss_bypass;
logic [63:0] req_fsm_miss_addr;
logic [CACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata;
logic req_fsm_miss_we;
logic [(CACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be;
logic gnt_miss_fsm;
logic valid_miss_fsm;
logic [(CACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm;
logic req_fsm_miss_valid;
logic req_fsm_miss_bypass;
logic [63:0] req_fsm_miss_addr;
logic [DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata;
logic req_fsm_miss_we;
logic [(DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be;
logic gnt_miss_fsm;
logic valid_miss_fsm;
logic [(DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm;
// Cache Management <-> LFSR
logic lfsr_enable;
logic [SET_ASSOCIATIVITY-1:0] lfsr_oh;
logic [$clog2(SET_ASSOCIATIVITY-1)-1:0] lfsr_bin;
logic lfsr_enable;
logic [DCACHE_SET_ASSOC-1:0] lfsr_oh;
logic [$clog2(DCACHE_SET_ASSOC-1)-1:0] lfsr_bin;
// ------------------------------
// Cache Management
// ------------------------------
always_comb begin : cache_management
automatic logic [SET_ASSOCIATIVITY-1:0] evict_way, valid_way;
automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way;
for (int unsigned i = 0; i < SET_ASSOCIATIVITY; i++) begin
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin
evict_way[i] = data_i[i].valid & data_i[i].dirty;
valid_way[i] = data_i[i].valid;
end
@ -169,7 +169,7 @@ module miss_handler #(
mshr_d.valid = 1'b1;
mshr_d.we = miss_req_we[i];
mshr_d.id = i;
mshr_d.addr = miss_req_addr[i][TAG_WIDTH+INDEX_WIDTH-1:0];
mshr_d.addr = miss_req_addr[i][DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:0];
mshr_d.wdata = miss_req_wdata[i];
mshr_d.be = miss_req_be[i];
break;
@ -182,7 +182,7 @@ module miss_handler #(
// 1. Check if there is an empty cache-line
// 2. If not -> evict one
req_o = '1;
addr_o = mshr_q.addr[INDEX_WIDTH-1:0];
addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
state_d = MISS_REPL;
miss_o = 1'b1;
end
@ -198,7 +198,7 @@ module miss_handler #(
state_d = WB_CACHELINE_MISS;
evict_cl_d.tag = data_i[lfsr_bin].tag;
evict_cl_d.data = data_i[lfsr_bin].data;
cnt_d = mshr_q.addr[INDEX_WIDTH-1:0];
cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
// no - we can request a cache line now
end else
state_d = REQ_CACHELINE;
@ -224,18 +224,17 @@ module miss_handler #(
// ~> replace the cacheline
SAVE_CACHELINE: begin
// calculate cacheline offset
automatic logic [$clog2(CACHE_LINE_WIDTH)-1:0] cl_offset;
cl_offset = mshr_q.addr[BYTE_OFFSET-1:3] << 6;
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
cl_offset = mshr_q.addr[DCACHE_BYTE_OFFSET-1:3] << 6;
// we've got a valid response from refill unit
if (valid_miss_fsm) begin
addr_o = mshr_q.addr[INDEX_WIDTH-1:0];
addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0];
req_o = evict_way_q;
we_o = 1'b1;
be_o = '1;
be_o.valid = evict_way_q;
be_o.dirty = evict_way_q;
data_o.tag = mshr_q.addr[TAG_WIDTH+INDEX_WIDTH-1:INDEX_WIDTH];
be_o.vldrty = evict_way_q;
data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
data_o.data = data_miss_fsm;
data_o.valid = 1'b1;
data_o.dirty = 1'b0;
@ -265,7 +264,7 @@ module miss_handler #(
WB_CACHELINE_FLUSH, WB_CACHELINE_MISS: begin
req_fsm_miss_valid = 1'b1;
req_fsm_miss_addr = {evict_cl_q.tag, cnt_q[INDEX_WIDTH-1:BYTE_OFFSET], {{BYTE_OFFSET}{1'b0}}};
req_fsm_miss_addr = {evict_cl_q.tag, cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET}{1'b0}}};
req_fsm_miss_be = '1;
req_fsm_miss_we = 1'b1;
req_fsm_miss_wdata = evict_cl_q.data;
@ -277,8 +276,7 @@ module miss_handler #(
req_o = 1'b1;
we_o = 1'b1;
// invalidate
be_o.valid = evict_way_q;
be_o.dirty = evict_way_q;
be_o.vldrty = evict_way_q;
// go back to handling the miss or flushing, depending on where we came from
state_d = (state_q == WB_CACHELINE_MISS) ? MISS : FLUSH_REQ_STATUS;
end
@ -305,14 +303,14 @@ module miss_handler #(
// not dirty ~> increment and continue
end else begin
// increment and re-request
cnt_d = cnt_q + (1'b1 << BYTE_OFFSET);
state_d = FLUSH_REQ_STATUS;
addr_o = cnt_q;
req_o = 1'b1;
be_o.valid = '1;
we_o = 1'b1;
cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET);
state_d = FLUSH_REQ_STATUS;
addr_o = cnt_q;
req_o = 1'b1;
be_o.vldrty = '1;
we_o = 1'b1;
// finished with flushing operation, go back to idle
if (cnt_q[INDEX_WIDTH-1:BYTE_OFFSET] == NUM_WORDS-1) begin
if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1) begin
flush_ack_o = 1'b1;
state_d = IDLE;
end
@ -326,11 +324,10 @@ module miss_handler #(
req_o = 1'b1;
we_o = 1'b1;
// only write the dirty array
be_o.dirty = '1;
be_o.valid = '1;
cnt_d = cnt_q + (1'b1 << BYTE_OFFSET);
be_o.vldrty = '1;
cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET);
// finished initialization
if (cnt_q[INDEX_WIDTH-1:BYTE_OFFSET] == NUM_WORDS-1)
if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS-1)
state_d = IDLE;
end
endcase
@ -344,12 +341,12 @@ module miss_handler #(
for (int i = 0; i < NR_PORTS; i++) begin
// check mshr for potential matching of other units, exclude the unit currently being served
if (mshr_q.valid && mshr_addr_i[i][55:BYTE_OFFSET] == mshr_q.addr[55:BYTE_OFFSET]) begin
if (mshr_q.valid && mshr_addr_i[i][55:DCACHE_BYTE_OFFSET] == mshr_q.addr[55:DCACHE_BYTE_OFFSET]) begin
mshr_addr_matches_o[i] = 1'b1;
end
// same as previous, but checking only the index
if (mshr_q.valid && mshr_addr_i[i][INDEX_WIDTH-1:BYTE_OFFSET] == mshr_q.addr[INDEX_WIDTH-1:BYTE_OFFSET]) begin
if (mshr_q.valid && mshr_addr_i[i][DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == mshr_q.addr[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]) begin
mshr_index_matches_o[i] = 1'b1;
end
end
@ -442,7 +439,7 @@ module miss_handler #(
.id_i ( {{{AXI_ID_WIDTH-$clog2(NR_PORTS)}{1'b0}}, id_fsm_bypass} ),
.valid_o ( valid_bypass_fsm ),
.rdata_o ( data_bypass_fsm ),
.gnt_id_o ( gnt_id_bypass_fsm ),
.gnt_id_o ( gnt_id_bypass_fsm ),
.id_o ( id_bypass_fsm ),
.critical_word_o ( ), // not used for single requests
.critical_word_valid_o ( ), // not used for single requests
@ -454,7 +451,7 @@ module miss_handler #(
// Cache Line Arbiter
// ----------------------
axi_adapter #(
.DATA_WIDTH ( CACHE_LINE_WIDTH ),
.DATA_WIDTH ( DCACHE_LINE_WIDTH ),
.AXI_ID_WIDTH ( AXI_ID_WIDTH )
) i_miss_axi_adapter (
.req_i ( req_fsm_miss_valid ),
@ -477,7 +474,7 @@ module miss_handler #(
// -----------------
// Replacement LFSR
// -----------------
lfsr #(.WIDTH (SET_ASSOCIATIVITY)) i_lfsr (
lfsr_8bit #(.WIDTH (DCACHE_SET_ASSOC)) i_lfsr (
.en_i ( lfsr_enable ),
.refill_way_oh ( lfsr_oh ),
.refill_way_bin ( lfsr_bin ),
@ -587,10 +584,10 @@ module arbiter #(
if (data_req_i[i] == 1'b1) begin
data_req_o = data_req_i[i];
data_gnt_o[i] = data_req_i[i];
request_index = i;
request_index = i[$bits(request_index)-1:0];
// save the request
req_d.address = address_i[i];
req_d.id = i;
req_d.id = i[$bits(req_q.id)-1:0];
req_d.data = data_wdata_i[i];
req_d.size = data_size_i[i];
req_d.be = data_be_i[i];
@ -648,342 +645,3 @@ module arbiter #(
`endif
`endif
endmodule
// --------------
// AXI Adapter
// --------------
//
// Description: Manages communication with the AXI Bus
//
module axi_adapter #(
parameter int unsigned DATA_WIDTH = 256,
parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature
parameter int unsigned AXI_ID_WIDTH = 10
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic req_i,
input req_t type_i,
output logic gnt_o,
output logic [AXI_ID_WIDTH-1:0] gnt_id_o,
input logic [63:0] addr_i,
input logic we_i,
input logic [(DATA_WIDTH/64)-1:0][63:0] wdata_i,
input logic [(DATA_WIDTH/64)-1:0][7:0] be_i,
input logic [1:0] size_i,
input logic [AXI_ID_WIDTH-1:0] id_i,
// read port
output logic valid_o,
output logic [(DATA_WIDTH/64)-1:0][63:0] rdata_o,
output logic [AXI_ID_WIDTH-1:0] id_o,
// critical word - read port
output logic [63:0] critical_word_o,
output logic critical_word_valid_o,
// AXI port
AXI_BUS.Master axi
);
localparam BURST_SIZE = DATA_WIDTH/64-1;
localparam ADDR_INDEX = ($clog2(DATA_WIDTH/64) > 0) ? $clog2(DATA_WIDTH/64) : 1;
enum logic [3:0] {
IDLE, WAIT_B_VALID, WAIT_AW_READY, WAIT_LAST_W_READY, WAIT_LAST_W_READY_AW_READY, WAIT_AW_READY_BURST,
WAIT_R_VALID, WAIT_R_VALID_MULTIPLE, COMPLETE_READ
} state_q, state_d;
// counter for AXI transfers
logic [ADDR_INDEX-1:0] cnt_d, cnt_q;
logic [(DATA_WIDTH/64)-1:0][63:0] cache_line_d, cache_line_q;
// save the address for a read, as we allow for non-cacheline aligned accesses
logic [(DATA_WIDTH/64)-1:0] addr_offset_d, addr_offset_q;
logic [AXI_ID_WIDTH-1:0] id_d, id_q;
logic [ADDR_INDEX-1:0] index;
always_comb begin : axi_fsm
// Default assignments
axi.aw_valid = 1'b0;
axi.aw_addr = addr_i;
axi.aw_prot = 3'b0;
axi.aw_region = 4'b0;
axi.aw_len = 8'b0;
axi.aw_size = {1'b0, size_i};
axi.aw_burst = (type_i == SINGLE_REQ) ? 2'b00 : 2'b01; // fixed size for single request and incremental transfer for everything else
axi.aw_lock = 1'b0;
axi.aw_cache = 4'b0;
axi.aw_qos = 4'b0;
axi.aw_id = id_i;
axi.aw_user = '0;
axi.ar_valid = 1'b0;
// in case of a single request or wrapping transfer we can simply begin at the address, if we want to request a cache-line
// with an incremental transfer we need to output the corresponding base address of the cache line
axi.ar_addr = (CRITICAL_WORD_FIRST || type_i == SINGLE_REQ) ? addr_i : { addr_i[63:BYTE_OFFSET], {{BYTE_OFFSET}{1'b0}}};
axi.ar_prot = 3'b0;
axi.ar_region = 4'b0;
axi.ar_len = 8'b0;
axi.ar_size = {1'b0, size_i}; // 8 bytes
axi.ar_burst = (type_i == SINGLE_REQ) ? 2'b00 : (CRITICAL_WORD_FIRST ? 2'b10 : 2'b01); // wrapping transfer in case of a critical word first strategy
axi.ar_lock = 1'b0;
axi.ar_cache = 4'b0;
axi.ar_qos = 4'b0;
axi.ar_id = id_i;
axi.ar_user = '0;
axi.w_valid = 1'b0;
axi.w_data = wdata_i[0];
axi.w_strb = be_i[0];
axi.w_user = '0;
axi.w_last = 1'b0;
axi.b_ready = 1'b0;
axi.r_ready = 1'b0;
gnt_o = 1'b0;
gnt_id_o = '0;
valid_o = 1'b0;
id_o = axi.r_id;
// rdata_o = axi.r_data;
critical_word_o = axi.r_data;
critical_word_valid_o = 1'b0;
rdata_o = cache_line_q;
state_d = state_q;
cnt_d = cnt_q;
cache_line_d = cache_line_q;
addr_offset_d = addr_offset_q;
id_d = id_q;
index = '0;
case (state_q)
IDLE: begin
cnt_d = '0;
// we have an incoming request
if (req_i) begin
// is this a read or write?
// write
if (we_i) begin
// the data is valid
axi.aw_valid = 1'b1;
axi.w_valid = 1'b1;
// its a single write
if (type_i == SINGLE_REQ) begin
// single req can be granted here
gnt_o = axi.aw_ready & axi.w_ready;
gnt_id_o = id_i;
case ({axi.aw_ready, axi.w_ready})
2'b11: state_d = WAIT_B_VALID;
2'b01: state_d = WAIT_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
default: state_d = IDLE;
endcase
id_d = axi.aw_id;
// its a request for the whole cache line
end else begin
axi.aw_len = BURST_SIZE; // number of bursts to do
axi.w_last = 1'b0;
axi.w_data = wdata_i[0];
axi.w_strb = be_i[0];
if (axi.w_ready)
cnt_d = BURST_SIZE - 1;
else
cnt_d = BURST_SIZE;
case ({axi.aw_ready, axi.w_ready})
2'b11: state_d = WAIT_LAST_W_READY;
2'b01: state_d = WAIT_LAST_W_READY_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
default:;
endcase
// save id
id_d = axi.aw_id;
end
// read
end else begin
axi.ar_valid = 1'b1;
gnt_o = axi.ar_ready;
gnt_id_o = id_i;
if (type_i != SINGLE_REQ) begin
axi.ar_len = BURST_SIZE;
cnt_d = BURST_SIZE;
end
if (axi.ar_ready) begin
state_d = (type_i == SINGLE_REQ) ? WAIT_R_VALID : WAIT_R_VALID_MULTIPLE;
addr_offset_d = addr_i[ADDR_INDEX-1+3:3];
// save id
id_d = axi.ar_id;
end
end
end
end
// ~> from single write, write request has already been granted
WAIT_AW_READY: begin
axi.aw_valid = 1'b1;
axi.aw_len = 8'b0;
if (axi.aw_ready)
state_d = WAIT_B_VALID;
end
// ~> we need to wait for an aw_ready and there is at least one outstanding write
WAIT_LAST_W_READY_AW_READY: begin
axi.w_valid = 1'b1;
axi.w_last = (cnt_q == '0) ? 1'b1 : 1'b0;
axi.w_data = wdata_i[BURST_SIZE-cnt_q];
axi.w_strb = be_i[BURST_SIZE-cnt_q];
axi.aw_valid = 1'b1;
// we are here because we want to write a cache line
axi.aw_len = BURST_SIZE;
// we got an aw_ready
case ({axi.aw_ready, axi.w_ready})
// we got an aw ready
2'b01: begin
// are there any outstanding transactions?
if (cnt_q == 0)
state_d = WAIT_AW_READY_BURST;
else // yes, so reduce the count and stay here
cnt_d = cnt_q - 1;
end
2'b10: state_d = WAIT_LAST_W_READY;
2'b11: begin
// we are finished
if (cnt_q == 0) begin
state_d = WAIT_B_VALID;
gnt_o = 1'b1;
gnt_id_o = id_q;
// there are outstanding transactions
end else begin
state_d = WAIT_LAST_W_READY;
cnt_d = cnt_q - 1;
end
end
default:;
endcase
end
// ~> all data has already been sent, we are only waiting for the aw_ready
WAIT_AW_READY_BURST: begin
axi.aw_valid = 1'b1;
axi.aw_len = BURST_SIZE;
if (axi.aw_ready) begin
state_d = WAIT_B_VALID;
gnt_o = 1'b1;
gnt_id_o = id_q;
end
end
// ~> from write, there is an outstanding write
WAIT_LAST_W_READY: begin
axi.w_valid = 1'b1;
axi.w_data = wdata_i[BURST_SIZE-cnt_q];
axi.w_strb = be_i[BURST_SIZE-cnt_q];
// this is the last write
axi.w_last = (cnt_q == '0) ? 1'b1 : 1'b0;
if (axi.w_ready) begin
// last write -> go to WAIT_B_VALID
if (cnt_q == '0) begin
state_d = WAIT_B_VALID;
gnt_o = (cnt_q == '0);
gnt_id_o = id_q;
end else begin
cnt_d = cnt_q - 1;
end
end
end
// ~> finish write transaction
WAIT_B_VALID: begin
axi.b_ready = 1'b1;
id_o = axi.b_id;
// Write is valid
if (axi.b_valid) begin
state_d = IDLE;
valid_o = 1'b1;
end
end
// ~> cacheline read, single read
WAIT_R_VALID_MULTIPLE, WAIT_R_VALID: begin
if (CRITICAL_WORD_FIRST)
index = addr_offset_q + (BURST_SIZE-cnt_q);
else
index = BURST_SIZE-cnt_q;
// reads are always wrapping here
axi.r_ready = 1'b1;
// this is the first read a.k.a the critical word
if (axi.r_valid) begin
if (CRITICAL_WORD_FIRST) begin
// this is the first word of a cacheline read, e.g.: the word which was causing the miss
if (state_q == WAIT_R_VALID_MULTIPLE && cnt_q == BURST_SIZE) begin
critical_word_valid_o = 1'b1;
critical_word_o = axi.r_data;
end
end else begin
// check if the address offset matches - then we are getting the critical word
if (index == addr_offset_q) begin
critical_word_valid_o = 1'b1;
critical_word_o = axi.r_data;
end
end
// this is the last read
if (axi.r_last) begin
state_d = COMPLETE_READ;
end
// save the word
if (state_q == WAIT_R_VALID_MULTIPLE) begin
cache_line_d[index] = axi.r_data;
end else
cache_line_d[0] = axi.r_data;
// Decrease the counter
cnt_d = cnt_q - 1;
end
end
// ~> read is complete
COMPLETE_READ: begin
valid_o = 1'b1;
state_d = IDLE;
id_o = id_q;
end
endcase
end
// ----------------
// Registers
// ----------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
// start in flushing state and initialize the memory
state_q <= IDLE;
cnt_q <= '0;
cache_line_q <= '0;
addr_offset_q <= '0;
id_q <= '0;
end else begin
state_q <= state_d;
cnt_q <= cnt_d;
cache_line_q <= cache_line_d;
addr_offset_q <= addr_offset_d;
id_q <= id_d;
end
end
endmodule

View file

@ -0,0 +1,103 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
// Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Standard Ariane cache subsystem with instruction cache and
// write-back data cache.
import ariane_pkg::*;
import std_cache_pkg::*;
module std_cache_subsystem #(
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000
)(
input logic clk_i,
input logic rst_ni,
// I$
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter
// address translation requests
input icache_areq_i_t icache_areq_i, // to/from frontend
output icache_areq_o_t icache_areq_o,
// data requests
input icache_dreq_i_t icache_dreq_i, // to/from frontend
output icache_dreq_o_t icache_dreq_o,
// D$
// Cache management
input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st
// AMO interface (not functional yet)
input logic dcache_amo_commit_i, // commit atomic memory operation
output logic dcache_amo_valid_o, // we have a valid AMO result
output logic [63:0] dcache_amo_result_o, // result of atomic memory operation
input logic dcache_amo_flush_i, // forget about AMO
// Request ports
input dcache_req_i_t [2:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [2:0] dcache_req_ports_o, // to/from LSU
// memory side
AXI_BUS.Master icache_data_if, // I$ refill port
AXI_BUS.Master dcache_data_if, // D$ refill port
AXI_BUS.Master dcache_bypass_if // bypass axi port (disabled D$ or uncacheable access)
);
std_icache #(
) i_icache (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( icache_flush_i ),
.en_i ( icache_en_i ),
.miss_o ( icache_miss_o ),
.areq_i ( icache_areq_i ),
.areq_o ( icache_areq_o ),
.dreq_i ( icache_dreq_i ),
.dreq_o ( icache_dreq_o ),
.axi ( icache_data_if )
);
// decreasing priority
// Port 0: PTW
// Port 1: Load Unit
// Port 2: Store Unit
std_nbdcache #(
.CACHE_START_ADDR ( CACHE_START_ADDR )
) i_nbdcache (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.enable_i ( dcache_enable_i ),
.flush_i ( dcache_flush_i ),
.flush_ack_o ( dcache_flush_ack_o ),
.miss_o ( dcache_miss_o ),
.data_if ( dcache_data_if ),
.bypass_if ( dcache_bypass_if ),
.amo_commit_i ( dcache_amo_commit_i ),
.amo_valid_o ( dcache_amo_valid_o ),
.amo_result_o ( dcache_amo_result_o ),
.amo_flush_i ( dcache_amo_flush_i ),
.req_ports_i ( dcache_req_ports_i ),
.req_ports_o ( dcache_req_ports_o )
);
endmodule // std_cache_subsystem

View file

@ -14,40 +14,29 @@
// Instruction Cache
// ------------------------------
import ariane_pkg::*;
import std_cache_pkg::*;
module icache #(
parameter int unsigned SET_ASSOCIATIVITY = 4,
parameter int unsigned INDEX_WIDTH = 12, // in bit
parameter int unsigned TAG_WIDTH = 44, // in bit
parameter int unsigned CACHE_LINE_WIDTH = 64, // in bit
parameter int unsigned FETCH_WIDTH = 32 // in bit
module std_icache #(
)(
input logic clk_i,
input logic rst_ni,
input logic flush_i, // flush the icache, flush and kill have to be asserted together
input logic en_cache_i, // cache accesses
input logic req_i, // we request a new word
input logic kill_s1_i, // kill the current request
input logic kill_s2_i, // kill the last request
output logic ready_o, // icache is ready
input logic [63:0] vaddr_i, // 1st cycle: 12 bit index is taken for lookup
output logic [FETCH_WIDTH-1:0] data_o, // 2+ cycle out: tag
output logic [63:0] vaddr_o, // virtual address out
output logic valid_o, // signals a valid read
output exception_t ex_o, // we've encountered an exception
output logic miss_o, // we missed on the cache
AXI_BUS.Master axi,
// Address translation
output logic fetch_req_o,
output logic [63:0] fetch_vaddr_o,
input logic fetch_valid_i,
input logic [63:0] fetch_paddr_i,
input exception_t fetch_exception_i
input logic en_i, // enable icache
output logic miss_o, // to performance counter
// address translation requests
input icache_areq_i_t areq_i,
output icache_areq_o_t areq_o,
// data requests
input icache_dreq_i_t dreq_i,
output icache_dreq_o_t dreq_o,
// refill port
AXI_BUS.Master axi
);
localparam int unsigned BYTE_OFFSET = $clog2(CACHE_LINE_WIDTH/8); // 3
localparam int unsigned ICACHE_NUM_WORD = 2**(INDEX_WIDTH - BYTE_OFFSET);
localparam int unsigned NR_AXI_REFILLS = ($clog2(CACHE_LINE_WIDTH/64) == 0) ? 1 : $clog2(CACHE_LINE_WIDTH/64);
localparam int unsigned ICACHE_BYTE_OFFSET = $clog2(ICACHE_LINE_WIDTH/8); // 3
localparam int unsigned ICACHE_NUM_WORD = 2**(ICACHE_INDEX_WIDTH - ICACHE_BYTE_OFFSET);
localparam int unsigned NR_AXI_REFILLS = ($clog2(ICACHE_LINE_WIDTH/64) == 0) ? 1 : $clog2(ICACHE_LINE_WIDTH/64);
// registers
enum logic [3:0] { FLUSH, IDLE, TAG_CMP, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP,
REDO_REQ, TAG_CMP_SAVED, REFILL,
@ -56,59 +45,61 @@ module icache #(
logic [$clog2(ICACHE_NUM_WORD)-1:0] cnt_d, cnt_q;
logic [NR_AXI_REFILLS-1:0] burst_cnt_d, burst_cnt_q; // counter for AXI transfers
logic [63:0] vaddr_d, vaddr_q;
logic [TAG_WIDTH-1:0] tag_d, tag_q;
logic [SET_ASSOCIATIVITY-1:0] evict_way_d, evict_way_q;
logic [ICACHE_TAG_WIDTH-1:0] tag_d, tag_q;
logic [ICACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q;
logic flushing_d, flushing_q;
// signals
logic [SET_ASSOCIATIVITY-1:0] req; // request to memory array
logic [CACHE_LINE_WIDTH-1:0] data_be; // byte enable for data array
logic [(2**NR_AXI_REFILLS-1):0][63:0] be; // flat byte enable
logic [ICACHE_SET_ASSOC-1:0] req; // request to memory array
logic [(ICACHE_LINE_WIDTH+7)/8-1:0] data_be; // byte enable for data array
logic [(2**NR_AXI_REFILLS-1):0][7:0] be; // byte enable
logic [$clog2(ICACHE_NUM_WORD)-1:0] addr; // this is a cache-line address, to memory array
logic we; // write enable to memory array
logic [SET_ASSOCIATIVITY-1:0] hit; // hit from tag compare
logic [BYTE_OFFSET-1:2] idx; // index in cache line
logic [ICACHE_SET_ASSOC-1:0] hit; // hit from tag compare
logic [ICACHE_BYTE_OFFSET-1:2] idx; // index in cache line
logic update_lfsr; // shift the LFSR
logic [SET_ASSOCIATIVITY-1:0] random_way; // random way select from LFSR
logic [SET_ASSOCIATIVITY-1:0] way_valid; // bit string which contains the zapped valid bits
logic [$clog2(SET_ASSOCIATIVITY)-1:0] repl_invalid; // first non-valid encountered
logic [ICACHE_SET_ASSOC-1:0] random_way; // random way select from LFSR
logic [ICACHE_SET_ASSOC-1:0] way_valid; // bit string which contains the zapped valid bits
logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_invalid; // first non-valid encountered
logic repl_w_random; // we need to switch repl strategy since all are valid
logic [TAG_WIDTH-1:0] tag; // tag to do comparison with
logic [ICACHE_TAG_WIDTH-1:0] tag; // tag to do comparison with
// tag + valid bit read/write data
struct packed {
logic valid;
logic [TAG_WIDTH-1:0] tag;
} tag_rdata [SET_ASSOCIATIVITY-1:0], tag_wdata;
logic [ICACHE_TAG_WIDTH-1:0] tag;
} tag_rdata [ICACHE_SET_ASSOC-1:0], tag_wdata;
logic [CACHE_LINE_WIDTH-1:0] data_rdata [SET_ASSOCIATIVITY-1:0], data_wdata;
logic [ICACHE_LINE_WIDTH-1:0] data_rdata [ICACHE_SET_ASSOC-1:0], data_wdata;
logic [(2**NR_AXI_REFILLS-1):0][63:0] wdata;
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin : sram_block
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : sram_block
// ------------
// Tag RAM
// ------------
sram #(
// tag + valid bit
.DATA_WIDTH ( TAG_WIDTH + 1 ),
.DATA_WIDTH ( ICACHE_TAG_WIDTH + 1 ),
.NUM_WORDS ( ICACHE_NUM_WORD )
) tag_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req[i] ),
.we_i ( we ),
.addr_i ( addr ),
.wdata_i ( tag_wdata ),
.be_i ( '1 ),
.be_i ( '1 ),
.rdata_o ( tag_rdata[i] )
);
// ------------
// Data RAM
// ------------
sram #(
.DATA_WIDTH ( CACHE_LINE_WIDTH ),
.DATA_WIDTH ( ICACHE_LINE_WIDTH ),
.NUM_WORDS ( ICACHE_NUM_WORD )
) data_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req[i] ),
.we_i ( we ),
.addr_i ( addr ),
@ -120,7 +111,7 @@ module icache #(
// --------------------
// Tag Comparison
// --------------------
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin
assign hit[i] = (tag_rdata[i].tag == tag) ? tag_rdata[i].valid : 1'b0;
end
@ -135,15 +126,15 @@ module icache #(
// ------------------
// Way Select
// ------------------
assign idx = vaddr_q[BYTE_OFFSET-1:2];
assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2];
// cacheline selected by hit
logic [CACHE_LINE_WIDTH/FETCH_WIDTH-1:0][FETCH_WIDTH-1:0] selected_cl;
logic [CACHE_LINE_WIDTH-1:0] selected_cl_flat;
logic [ICACHE_LINE_WIDTH/FETCH_WIDTH-1:0][FETCH_WIDTH-1:0] selected_cl;
logic [ICACHE_LINE_WIDTH-1:0] selected_cl_flat;
for (genvar i = 0; i < CACHE_LINE_WIDTH; i++) begin
logic [SET_ASSOCIATIVITY-1:0] hit_masked_cl;
for (genvar i = 0; i < ICACHE_LINE_WIDTH; i++) begin
logic [ICACHE_SET_ASSOC-1:0] hit_masked_cl;
for (genvar j = 0; j < SET_ASSOCIATIVITY; j++)
for (genvar j = 0; j < ICACHE_SET_ASSOC; j++)
assign hit_masked_cl[j] = data_rdata[j][i] & hit[j];
assign selected_cl_flat[i] = |hit_masked_cl;
@ -151,9 +142,9 @@ module icache #(
assign selected_cl = selected_cl_flat;
// maybe re-work if critical
assign data_o = selected_cl[idx];
assign dreq_o.data = selected_cl[idx];
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin
assign way_valid[i] = tag_rdata[i].valid;
end
@ -196,7 +187,7 @@ module icache #(
assign data_be = be;
assign data_wdata = wdata;
assign ex_o = fetch_exception_i;
assign dreq_o.ex = areq_i.fetch_exception;
// ------------------
// Cache Ctrl
// ------------------
@ -212,36 +203,36 @@ module icache #(
flushing_d = flushing_q;
burst_cnt_d = burst_cnt_q;
vaddr_o = vaddr_q;
dreq_o.vaddr = vaddr_q;
req = '0;
addr = vaddr_i[INDEX_WIDTH-1:BYTE_OFFSET];
addr = dreq_i.vaddr[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET];
we = 1'b0;
be = '0;
wdata = '0;
tag_wdata = '0;
ready_o = 1'b0;
tag = fetch_paddr_i[TAG_WIDTH+INDEX_WIDTH-1:INDEX_WIDTH];
valid_o = 1'b0;
dreq_o.ready = 1'b0;
tag = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
dreq_o.valid = 1'b0;
update_lfsr = 1'b0;
miss_o = 1'b0;
axi.ar_valid = 1'b0;
axi.ar_addr = '0;
fetch_req_o = 1'b0;
fetch_vaddr_o = vaddr_q;
areq_o.fetch_req = 1'b0;
areq_o.fetch_vaddr = vaddr_q;
case (state_q)
// ~> we are ready to receive a new request
IDLE: begin
ready_o = 1'b1 & fetch_enable_i;
dreq_o.ready = 1'b1;
// we are getting a new request
if (req_i && fetch_enable_i) begin
if (dreq_i.req) begin
// request the content of all arrays
req = '1;
// save the virtual address
vaddr_d = vaddr_i;
vaddr_d = dreq_i.vaddr;
state_d = TAG_CMP;
end
@ -249,34 +240,36 @@ module icache #(
if (flush_i || flushing_q)
state_d = FLUSH;
if (kill_s1_i)
if (dreq_i.kill_s1)
state_d = IDLE;
end
// ~> compare the tag
TAG_CMP, TAG_CMP_SAVED: begin
fetch_req_o = 1'b1; // request address translation
areq_o.fetch_req = 1'b1; // request address translation
// use the saved tag
if (state_q == TAG_CMP_SAVED)
tag = tag_q;
// -------
// Hit
// -------
if (|hit && fetch_valid_i && (en_cache_i || (state_q != TAG_CMP))) begin
ready_o = 1'b1;
valid_o = 1'b1;
// disabling the icache just makes it fetch on every request
if (|hit && areq_i.fetch_valid && (en_i || (state_q != TAG_CMP))) begin
dreq_o.ready = 1'b1;
dreq_o.valid = 1'b1;
// we've got another request
if (req_i) begin
if (dreq_i.req) begin
// request the content of all arrays
req = '1;
// save the index and stay in compare mode
vaddr_d = vaddr_i;
vaddr_d = dreq_i.vaddr;
state_d = TAG_CMP;
// no new request -> go back to idle
end else begin
state_d = IDLE;
end
if (kill_s1_i)
if (dreq_i.kill_s1)
state_d = IDLE;
// -------
// Miss
@ -286,58 +279,66 @@ module icache #(
// hit gonna be zero in most cases except for when the cache is disabled
evict_way_d = hit;
// save tag
tag_d = fetch_paddr_i[TAG_WIDTH+INDEX_WIDTH-1:INDEX_WIDTH];
miss_o = 1'b1;
tag_d = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
miss_o = en_i;
// get way which to replace
if (repl_w_random) begin
evict_way_d = random_way;
// shift the lfsr
update_lfsr = 1'b1;
end else if (!(|hit)) begin
evict_way_d[repl_invalid] = 1'b1;
// only if there is no hit we should fall back to real replacement. If there was a hit then
// it means we are in bypass mode (!en_i) and should update the cache-line with the most recent
// value fetched from memory.
if (!(|hit)) begin
// all ways are currently full, randomly replace one of them
if (repl_w_random) begin
evict_way_d = random_way;
// shift the lfsr
update_lfsr = 1'b1;
// there is still one cache-line which is not valid ~> replace that one
end else begin
evict_way_d[repl_invalid] = 1'b1;
end
end
end
// if we didn't hit on the TLB we need to wait until the request has been completed
if (!fetch_valid_i) begin
if (!areq_i.fetch_valid) begin
state_d = WAIT_ADDRESS_TRANSLATION;
end
end
// ~> wait here for a valid address translation, or on a translation even if the request has been killed
WAIT_ADDRESS_TRANSLATION, WAIT_ADDRESS_TRANSLATION_KILLED: begin
fetch_req_o = 1'b1;
areq_o.fetch_req = 1'b1;
// retry the request if no exception occurred
if (fetch_valid_i && (state_q == WAIT_ADDRESS_TRANSLATION)) begin
if (fetch_exception_i.valid)
valid_o = 1'b1;
else begin
if (areq_i.fetch_valid && (state_q == WAIT_ADDRESS_TRANSLATION)) begin
if (areq_i.fetch_exception.valid) begin
dreq_o.valid = 1'b1;
state_d = IDLE;
end else begin
state_d = REDO_REQ;
tag_d = fetch_paddr_i[TAG_WIDTH+INDEX_WIDTH-1:INDEX_WIDTH];
tag_d = areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH];
end
end else if (fetch_valid_i) begin
end else if (areq_i.fetch_valid) begin
state_d = IDLE;
end
if (kill_s2_i)
if (dreq_i.kill_s2)
state_d = WAIT_ADDRESS_TRANSLATION_KILLED;
end
// ~> request a cache-line refill
REFILL, WAIT_KILLED_REFILL: begin
axi.ar_valid = 1'b1;
axi.ar_addr[INDEX_WIDTH+TAG_WIDTH-1:0] = {tag_q, vaddr_q[INDEX_WIDTH-1:BYTE_OFFSET], {BYTE_OFFSET{1'b0}}};
axi.ar_addr[ICACHE_INDEX_WIDTH+ICACHE_TAG_WIDTH-1:0] = {tag_q, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET], {ICACHE_BYTE_OFFSET{1'b0}}};
burst_cnt_d = '0;
if (kill_s2_i)
if (dreq_i.kill_s2)
state_d = WAIT_KILLED_REFILL;
// we need to finish this AXI transfer
if (axi.ar_ready)
state_d = (kill_s2_i || (state_q == WAIT_KILLED_REFILL)) ? WAIT_KILLED_AXI_R_RESP : WAIT_AXI_R_RESP;
state_d = (dreq_i.kill_s2 || (state_q == WAIT_KILLED_REFILL)) ? WAIT_KILLED_AXI_R_RESP : WAIT_AXI_R_RESP;
end
// ~> wait for the read response
WAIT_AXI_R_RESP, WAIT_KILLED_AXI_R_RESP: begin
req = evict_way_q;
addr = vaddr_q[INDEX_WIDTH-1:BYTE_OFFSET];
addr = vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET];
if (axi.r_valid) begin
we = 1'b1;
@ -350,11 +351,11 @@ module icache #(
burst_cnt_d = burst_cnt_q + 1;
end
if (kill_s2_i)
if (dreq_i.kill_s2)
state_d = WAIT_KILLED_AXI_R_RESP;
if (axi.r_valid && axi.r_last) begin
state_d = (kill_s2_i) ? IDLE : REDO_REQ;
state_d = (dreq_i.kill_s2) ? IDLE : REDO_REQ;
end
if ((state_q == WAIT_KILLED_AXI_R_RESP) && axi.r_last && axi.r_valid)
@ -363,16 +364,10 @@ module icache #(
// ~> redo the request,
REDO_REQ: begin
req = '1;
addr = vaddr_q[INDEX_WIDTH-1:BYTE_OFFSET];
addr = vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET];
tag = tag_q;
state_d = TAG_CMP_SAVED; // do tag comparison on the saved tag
end
// we need to wait for some AXI responses to come back
// here for the AW valid
WAIT_KILLED_REFILL: begin
if (axi.aw_valid)
state_d = IDLE;
end
// ~> we are coming here after reset or when a flush was requested
FLUSH: begin
addr = cnt_q;
@ -390,35 +385,35 @@ module icache #(
endcase
// those are the states where we need to wait a little longer until we can safely exit
if (kill_s2_i && !(state_q inside {REFILL, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP}) && !ready_o) begin
if (dreq_i.kill_s2 && !(state_q inside {REFILL, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP}) && !dreq_o.ready) begin
state_d = IDLE;
end
// if we are killing we can never give a valid response
if (kill_s2_i)
valid_o = 1'b0;
if (dreq_i.kill_s2)
dreq_o.valid = 1'b0;
if (flush_i) begin
flushing_d = 1'b1;
ready_o = 1'b0; // we are not ready to accept a further request here
dreq_o.ready = 1'b0; // we are not ready to accept a further request here
end
// if we are going to flush -> do not accept any new requests
if (flushing_q)
ready_o = 1'b0;
dreq_o.ready = 1'b0;
end
find_first_one #(
.WIDTH ( SET_ASSOCIATIVITY )
) i_ff1 (
.in_i ( ~way_valid ),
.first_one_o ( repl_invalid ),
.no_ones_o ( repl_w_random )
lzc #(
.WIDTH ( ICACHE_SET_ASSOC )
) i_lzc (
.in_i ( ~way_valid ),
.cnt_o ( repl_invalid ),
.empty_o ( repl_w_random )
);
// -----------------
// Replacement LFSR
// -----------------
lfsr #(.WIDTH (SET_ASSOCIATIVITY)) i_lfsr (
lfsr_8bit #(.WIDTH (ICACHE_SET_ASSOC)) i_lfsr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( update_lfsr ),
@ -426,7 +421,6 @@ module icache #(
.refill_way_bin ( ) // left open
);
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= FLUSH;

View file

@ -13,13 +13,11 @@
// Description: Nonblocking private L1 dcache
import ariane_pkg::*;
import nbdcache_pkg::*;
import std_cache_pkg::*;
module nbdcache #(
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000,
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned AXI_USER_WIDTH = 1
)(
module std_nbdcache #(
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// Cache management
@ -27,28 +25,18 @@ module nbdcache #(
input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st
// Cache AXI refill port
AXI_BUS.Master data_if,
AXI_BUS.Master bypass_if,
// AMO interface
input logic amo_commit_i, // commit atomic memory operation
output logic amo_valid_o, // we have a valid AMO result
output logic [63:0] amo_result_o, // result of atomic memory operation
input logic amo_flush_i, // forget about AMO
// Request ports
input logic [2:0][INDEX_WIDTH-1:0] address_index_i,
input logic [2:0][TAG_WIDTH-1:0] address_tag_i,
input logic [2:0][63:0] data_wdata_i,
input logic [2:0] data_req_i,
input logic [2:0] data_we_i,
input logic [2:0][7:0] data_be_i,
input logic [2:0][1:0] data_size_i,
input logic [2:0] kill_req_i,
input logic [2:0] tag_valid_i,
output logic [2:0] data_gnt_o,
output logic [2:0] data_rvalid_o,
output logic [2:0][63:0] data_rdata_o,
input amo_t [2:0] amo_op_i
input dcache_req_i_t [2:0] req_ports_i, // request ports
output dcache_req_o_t [2:0] req_ports_o, // request ports
// Cache AXI refill port
AXI_BUS.Master data_if,
AXI_BUS.Master bypass_if
);
// -------------------------------
@ -58,16 +46,16 @@ module nbdcache #(
// 2. PTW
// 3. Load Unit
// 4. Store unit
logic [3:0][SET_ASSOCIATIVITY-1:0] req;
logic [3:0][INDEX_WIDTH-1:0] addr;
logic [3:0][DCACHE_SET_ASSOC-1:0] req;
logic [3:0][DCACHE_INDEX_WIDTH-1:0]addr;
logic [3:0] gnt;
cache_line_t [SET_ASSOCIATIVITY-1:0] rdata;
logic [3:0][TAG_WIDTH-1:0] tag;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata;
logic [3:0][DCACHE_TAG_WIDTH-1:0] tag;
cache_line_t [3:0] wdata;
logic [3:0] we;
cl_be_t [3:0] be;
logic [SET_ASSOCIATIVITY-1:0] hit_way;
logic [DCACHE_SET_ASSOC-1:0] hit_way;
// -------------------------------
// Controller <-> Miss unit
// -------------------------------
@ -88,11 +76,11 @@ module nbdcache #(
// -------------------------------
// Arbiter <-> Datram,
// -------------------------------
logic [SET_ASSOCIATIVITY-1:0] req_ram;
logic [INDEX_WIDTH-1:0] addr_ram;
logic [DCACHE_SET_ASSOC-1:0] req_ram;
logic [DCACHE_INDEX_WIDTH-1:0] addr_ram;
logic we_ram;
cache_line_t wdata_ram;
cache_line_t [SET_ASSOCIATIVITY-1:0] rdata_ram;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram;
// ------------------
@ -101,27 +89,15 @@ module nbdcache #(
generate
for (genvar i = 0; i < 3; i++) begin : master_ports
cache_ctrl #(
.SET_ASSOCIATIVITY ( SET_ASSOCIATIVITY ),
.INDEX_WIDTH ( INDEX_WIDTH ),
.TAG_WIDTH ( TAG_WIDTH ),
.CACHE_LINE_WIDTH ( CACHE_LINE_WIDTH ),
.CACHE_START_ADDR ( CACHE_START_ADDR )
) i_cache_ctrl (
.bypass_i ( ~enable_i ),
.busy_o ( busy [i] ),
.address_index_i ( address_index_i [i] ),
.address_tag_i ( address_tag_i [i] ),
.data_wdata_i ( data_wdata_i [i] ),
.data_req_i ( data_req_i [i] ),
.data_we_i ( data_we_i [i] ),
.data_be_i ( data_be_i [i] ),
.data_size_i ( data_size_i [i] ),
.kill_req_i ( kill_req_i [i] ),
.tag_valid_i ( tag_valid_i [i] ),
.data_gnt_o ( data_gnt_o [i] ),
.data_rvalid_o ( data_rvalid_o [i] ),
.data_rdata_o ( data_rdata_o [i] ),
.amo_op_i ( amo_op_i [i] ),
.req_port_i ( req_ports_i [i] ),
.req_port_o ( req_ports_o [i] ),
.req_o ( req [i+1] ),
.addr_o ( addr [i+1] ),
@ -170,7 +146,6 @@ module nbdcache #(
.active_serving_o ( active_serving ),
.req_o ( req [0] ),
.addr_o ( addr [0] ),
.gnt_i ( gnt [0] ),
.data_i ( rdata ),
.be_o ( be [0] ),
.data_o ( wdata [0] ),
@ -183,14 +158,15 @@ module nbdcache #(
// --------------
// Memory Arrays
// --------------
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin : sram_block
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block
sram #(
.DATA_WIDTH ( CACHE_LINE_WIDTH ),
.NUM_WORDS ( NUM_WORDS )
.DATA_WIDTH ( DCACHE_LINE_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) data_sram (
.req_i ( req_ram [i] ),
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wdata_i ( wdata_ram.data ),
.be_i ( be_ram.data ),
.rdata_o ( rdata_ram[i].data ),
@ -198,12 +174,13 @@ module nbdcache #(
);
sram #(
.DATA_WIDTH ( TAG_WIDTH ),
.NUM_WORDS ( NUM_WORDS )
.DATA_WIDTH ( DCACHE_TAG_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) tag_sram (
.req_i ( req_ram [i] ),
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wdata_i ( wdata_ram.tag ),
.be_i ( be_ram.tag ),
.rdata_o ( rdata_ram[i].tag ),
@ -213,27 +190,32 @@ module nbdcache #(
end
// ----------------
// Dirty SRAM
// Valid/Dirty Regs
// ----------------
logic [DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
for (genvar i = 0; i < SET_ASSOCIATIVITY; i++) begin
assign dirty_wdata[i] = wdata_ram.dirty;
assign dirty_wdata[SET_ASSOCIATIVITY + i] = wdata_ram.valid;
assign rdata_ram[i].valid = dirty_rdata[SET_ASSOCIATIVITY + i];
assign rdata_ram[i].dirty = dirty_rdata[i];
// align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals.
// note: if you have an SRAM that supports flat bit enables for your target technology,
// you can use it here to save the extra 4x overhead introduced by this workaround.
logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign dirty_wdata[8*i] = wdata_ram.dirty;
assign dirty_wdata[8*i+1] = wdata_ram.valid;
assign rdata_ram[i].dirty = dirty_rdata[8*i];
assign rdata_ram[i].valid = dirty_rdata[8*i+1];
end
sram #(
.DATA_WIDTH ( DIRTY_WIDTH ),
.NUM_WORDS ( NUM_WORDS )
) dirty_sram (
.DATA_WIDTH ( 4*DCACHE_DIRTY_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) valid_dirty_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( |req_ram ),
.we_i ( we_ram ),
.addr_i ( addr_ram[INDEX_WIDTH-1:BYTE_OFFSET] ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wdata_i ( dirty_wdata ),
.be_i ( {be_ram.valid, be_ram.dirty} ),
.be_i ( be_ram.vldrty ),
.rdata_o ( dirty_rdata )
);
@ -242,8 +224,8 @@ module nbdcache #(
// ------------------------------------------------
tag_cmp #(
.NR_PORTS ( 4 ),
.ADDR_WIDTH ( INDEX_WIDTH ),
.SET_ASSOCIATIVITY ( SET_ASSOCIATIVITY )
.ADDR_WIDTH ( DCACHE_INDEX_WIDTH ),
.DCACHE_SET_ASSOC ( DCACHE_SET_ASSOC )
) i_tag_cmp (
.req_i ( req ),
.gnt_o ( gnt ),
@ -268,7 +250,7 @@ module nbdcache #(
`ifndef SYNTHESIS
initial begin
assert ($bits(data_if.aw_addr) == 64) else $fatal(1, "Ariane needs a 64-bit bus");
assert (CACHE_LINE_WIDTH/64 inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of 64");
assert (DCACHE_LINE_WIDTH/64 inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of 64");
end
`endif
endmodule
@ -285,34 +267,34 @@ module tag_cmp #(
parameter int unsigned ADDR_WIDTH = 64,
parameter type data_t = cache_line_t,
parameter type be_t = cl_be_t,
parameter int unsigned SET_ASSOCIATIVITY = 8
parameter int unsigned DCACHE_SET_ASSOC = 8
)(
input logic clk_i,
input logic rst_ni,
input logic [NR_PORTS-1:0][SET_ASSOCIATIVITY-1:0] req_i,
input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i,
output logic [NR_PORTS-1:0] gnt_o,
input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i,
input data_t [NR_PORTS-1:0] wdata_i,
input logic [NR_PORTS-1:0] we_i,
input be_t [NR_PORTS-1:0] be_i,
output data_t [SET_ASSOCIATIVITY-1:0] rdata_o,
input logic [NR_PORTS-1:0][TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
output logic [SET_ASSOCIATIVITY-1:0] hit_way_o, // we've got a hit on the corresponding way
output data_t [DCACHE_SET_ASSOC-1:0] rdata_o,
input logic [NR_PORTS-1:0][DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way
output logic [SET_ASSOCIATIVITY-1:0] req_o,
output logic [DCACHE_SET_ASSOC-1:0] req_o,
output logic [ADDR_WIDTH-1:0] addr_o,
output data_t wdata_o,
output logic we_o,
output be_t be_o,
input data_t [SET_ASSOCIATIVITY-1:0] rdata_i
input data_t [DCACHE_SET_ASSOC-1:0] rdata_i
);
assign rdata_o = rdata_i;
// one hot encoded
logic [NR_PORTS-1:0] id_d, id_q;
logic [TAG_WIDTH-1:0] sel_tag;
logic [DCACHE_TAG_WIDTH-1:0] sel_tag;
always_comb begin : tag_sel
sel_tag = '0;
@ -321,7 +303,7 @@ module tag_cmp #(
sel_tag = tag_i[i];
end
for (genvar j = 0; j < SET_ASSOCIATIVITY; j++) begin : tag_cmp
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
end

11
src/clint/README.md Normal file
View file

@ -0,0 +1,11 @@
# CLINT (Core-local Interrupt Controller)
This repository contains a RISC-V privilege spec 1.11 (WIP) compatible CLINT for the Ariane Core.
The CLINT plugs into an existing AXI Bus with an AXI 4 Lite interface. The IP mirrors transaction IDs and is fully pin-compatible with the full AXI 4 interface. It does not support burst transfers (as specified in the AMBA 4 Bus specifcation)
| Address | Description | Note |
|-------------------|-------------|------------------------------------------------|
| `BASE` + `0xo` | msip | Machine mode software interrupt (IPI) |
| `BASE` + `0x4000` | mtimecmp | Machine mode timer compare register for Hart 0 |
| `BASE` + `0xBFF8` | mtime | Timer register |

View file

@ -0,0 +1,169 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 17/07/2017
// Description: AXI Lite compatible interface
//
module axi_lite_interface #(
parameter int unsigned AXI_ADDR_WIDTH = 64,
parameter int unsigned AXI_DATA_WIDTH = 64,
parameter int unsigned AXI_ID_WIDTH = 10
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
AXI_BUS.Slave slave,
output logic [AXI_ADDR_WIDTH-1:0] address_o,
output logic en_o, // transaction is valid
output logic we_o, // write
input logic [AXI_DATA_WIDTH-1:0] data_i, // data
output logic [AXI_DATA_WIDTH-1:0] data_o
);
// The RLAST signal is not required, and is considered asserted for every transfer on the read data channel.
enum logic [1:0] { IDLE, READ, WRITE, WRITE_B} CS, NS;
// save the trans id, we will need it for reflection otherwise we are not plug compatible to the AXI standard
logic [AXI_ID_WIDTH-1:0] trans_id_n, trans_id_q;
// address register
logic [AXI_ADDR_WIDTH-1:0] address_n, address_q;
// pass through read data on the read data channel
assign slave.r_data = data_i;
// send back the transaction id we've latched
assign slave.r_id = trans_id_q;
assign slave.b_id = trans_id_q;
// set r_last to one as defined by the AXI4 - Lite standard
assign slave.r_last = 1'b1;
// we do not support any errors so set response flag to all zeros
assign slave.b_resp = 2'b0;
assign slave.r_resp = 2'b0;
// output data which we want to write to the slave
assign data_o = slave.w_data;
// ------------------------
// AXI4-Lite State Machine
// ------------------------
always_comb begin
// default signal assignment
NS = CS;
address_n = address_q;
trans_id_n = trans_id_q;
// we'll answer a write request only if we got address and data
slave.aw_ready = 1'b0;
slave.w_ready = 1'b0;
slave.b_valid = 1'b0;
slave.ar_ready = 1'b1;
slave.r_valid = 1'b0;
address_o = '0;
we_o = 1'b0;
en_o = 1'b0;
case (CS)
// we are ready to accept a new request
IDLE: begin
// we've git a valid write request, we also know that we have asserted the aw_ready
if (slave.aw_valid) begin
slave.aw_ready = 1'b1;
// this costs performance but the interconnect does not obey the AXI standard
NS = WRITE;
// save address
address_n = slave.aw_addr;
// save the transaction id for reflection
trans_id_n = slave.aw_id;
// we've got a valid read request, we also know that we have asserted the ar_ready
end else if (slave.ar_valid) begin
NS = READ;
address_n = slave.ar_addr;
// also request the word from the memory-like interface
address_o = slave.ar_addr;
// save the transaction id for reflection
trans_id_n = slave.ar_id;
end
end
// We've got a read request at least one cycle earlier
// so data_i will already contain the data we'd like tor read
READ: begin
// enable the ram-like
en_o = 1'b1;
// we are not ready for another request here
slave.ar_ready = 1'b0;
// further assert the correct address
address_o = address_q;
// the read is valid
slave.r_valid = 1'b1;
// check if we got a valid r_ready and go back to IDLE
if (slave.r_ready)
NS = IDLE;
end
// We've got a write request at least one cycle earlier
// wait here for the data
WRITE: begin
if (slave.w_valid) begin
// we are not ready for another request here
slave.ar_ready = 1'b0;
slave.w_ready = 1'b1;
// use the latched address
address_o = address_q;
en_o = 1'b1;
we_o = 1'b1;
// close this request
NS = WRITE_B;
end
end
WRITE_B: begin
slave.b_valid = 1'b1;
// we've already performed the write here so wait for the ready signal
if (slave.b_ready)
NS = IDLE;
end
default:;
endcase
end
// ------------------------
// Registers
// ------------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
CS <= IDLE;
address_q <= '0;
trans_id_q <= '0;
end else begin
CS <= NS;
address_q <= address_n;
trans_id_q <= trans_id_n;
end
end
// ------------------------
// Assertions
// ------------------------
// Listen for illegal transactions
`ifndef SYNTHESIS
`ifndef VERILATOR
// check that burst length is just one
assert property (@(posedge clk_i) slave.ar_valid |-> ((slave.ar_len == 8'b0) && (slave.ar_size == $clog2(AXI_ADDR_WIDTH/8))))
else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end
// do the same for the write channel
assert property (@(posedge clk_i) slave.aw_valid |-> ((slave.aw_len == 8'b0) && (slave.aw_size == $clog2(AXI_ADDR_WIDTH/8))))
else begin $error("AXI Lite does not support bursts larger than 1 or byte length unequal to the native bus size"); $stop(); end
`endif
`endif
endmodule

185
src/clint/clint.sv Normal file
View file

@ -0,0 +1,185 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 15/07/2017
// Description: A RISC-V privilege spec 1.11 (WIP) compatible CLINT (core local interrupt controller)
//
// Platforms provide a real-time counter, exposed as a memory-mapped machine-mode register, mtime. mtime must run at
// constant frequency, and the platform must provide a mechanism for determining the timebase of mtime (device tree).
module clint #(
parameter int unsigned AXI_ADDR_WIDTH = 64,
parameter int unsigned AXI_DATA_WIDTH = 64,
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned NR_CORES = 1 // Number of cores therefore also the number of timecmp registers and timer interrupts
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
AXI_BUS.Slave slave,
input logic halted_i, // cores are halted, also halt timer
input logic rtc_i, // Real-time clock in (usually 32.768 kHz)
output logic [63:0] time_o, // Global Time out, this is the time-base of the whole SoC
output logic [NR_CORES-1:0] timer_irq_o, // Timer interrupts
output logic [NR_CORES-1:0] ipi_o // software interrupt (a.k.a inter-process-interrupt)
);
// register offset
localparam logic [15:0] MSIP_BASE = 16'h0;
localparam logic [15:0] MTIMECMP_BASE = 16'h4000;
localparam logic [15:0] MTIME_BASE = 16'hbff8;
// signals from AXI 4 Lite
logic [AXI_ADDR_WIDTH-1:0] address;
logic en;
logic we;
logic [63:0] wdata;
logic [63:0] rdata;
// bit 11 and 10 are determining the address offset
logic [15:0] register_address;
assign register_address = address[15:0];
// actual registers
logic [63:0] mtime_n, mtime_q;
logic [NR_CORES-1:0][63:0] mtimecmp_n, mtimecmp_q;
logic [NR_CORES-1:0] msip_n, msip_q;
// increase the timer
logic increase_timer;
// directly output the mtime_q register - this needs synchronization (but in the core).
assign time_o = mtime_q;
// -----------------------------
// AXI Interface Logic
// -----------------------------
axi_lite_interface #(
.AXI_ADDR_WIDTH ( AXI_ADDR_WIDTH ),
.AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
.AXI_ID_WIDTH ( AXI_ID_WIDTH )
) axi_lite_interface_i (
.address_o ( address ),
.en_o ( en ),
.we_o ( we ),
.data_i ( rdata ),
.data_o ( wdata ),
.*
);
// -----------------------------
// Register Update Logic
// -----------------------------
// APB register write logic
always_comb begin
mtime_n = mtime_q;
mtimecmp_n = mtimecmp_q;
msip_n = msip_q;
// RTC says we should increase the timer
if (increase_timer && !halted_i)
mtime_n = mtime_q + 1;
// written from APB bus - gets priority
if (en && we) begin
case (register_address) inside
[MSIP_BASE:MSIP_BASE+8*NR_CORES]: begin
msip_n[$unsigned(address[NR_CORES-1+3:3])] = wdata[0];
end
[MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin
mtimecmp_n[$unsigned(address[NR_CORES-1+3:3])] = wdata;
end
MTIME_BASE: begin
mtime_n = wdata;
end
default:;
endcase
end
end
// APB register read logic
always_comb begin
rdata = 'b0;
if (en && !we) begin
case (register_address) inside
[MSIP_BASE:MSIP_BASE+8*NR_CORES]: begin
rdata = msip_q[$unsigned(address[NR_CORES-1+3:3])];
end
[MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin
rdata = mtimecmp_q[$unsigned(address[NR_CORES-1+3:3])];
end
MTIME_BASE: begin
rdata = mtime_q;
end
default:;
endcase
end
end
// -----------------------------
// IRQ Generation
// -----------------------------
// The mtime register has a 64-bit precision on all RV32, RV64, and RV128 systems. Platforms provide a 64-bit
// memory-mapped machine-mode timer compare register (mtimecmp), which causes a timer interrupt to be posted when the
// mtime register contains a value greater than or equal (mtime >= mtimecmp) to the value in the mtimecmp register.
// The interrupt remains posted until it is cleared by writing the mtimecmp register. The interrupt will only be taken
// if interrupts are enabled and the MTIE bit is set in the mie register.
always_comb begin : irq_gen
// check that the mtime cmp register is set to a meaningful value
for (int unsigned i = 0; i < NR_CORES; i++) begin
if (mtimecmp_q[i] != 0 && mtime_q >= mtimecmp_q[i])
timer_irq_o[i] = 1'b1;
else
timer_irq_o[i] = 1'b0;
end
end
// -----------------------------
// RTC time tracking facilities
// -----------------------------
// 1. Put the RTC input through a classic two stage edge-triggered synchronizer to filter out any
// metastability effects (or at least make them unlikely :-))
sync_wedge i_sync_edge (
.en_i ( 1'b1 ),
.serial_i ( rtc_i ),
.r_edge_o ( increase_timer ),
.f_edge_o ( ), // left open
.serial_o ( ),
.*
);
// Registers
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
mtime_q <= 64'b0;
mtimecmp_q <= 'b0;
msip_q <= '0;
end else begin
mtime_q <= mtime_n;
mtimecmp_q <= mtimecmp_n;
msip_q <= msip_n;
end
end
// -------------
// Assertions
// --------------
`ifndef SYNTHESIS
`ifndef VERILATOR
// Static assertion check for appropriate bus width
initial begin
assert (AXI_DATA_WIDTH == 64) else $fatal("Timer needs to interface with a 64 bit bus, everything else is not supported");
end
`endif
`endif
endmodule

1
src/common_cells Submodule

@ -0,0 +1 @@
Subproject commit 4277217c9ae8b1228f801e5a67de9ecdce8d887f

View file

@ -17,7 +17,6 @@ import ariane_pkg::*;
module controller (
input logic clk_i,
input logic rst_ni,
output logic flush_bp_o, // Flush branch prediction data structures
output logic set_pc_commit_o, // Set PC om PC Gen
output logic flush_if_o, // Flush the IF stage
output logic flush_unissued_instr_o, // Flush un-issued instructions of the scoreboard
@ -56,7 +55,6 @@ module controller (
flush_ex_o = 1'b0;
flush_tlb_o = 1'b0;
flush_dcache = 1'b0;
flush_bp_o = 1'b0; // flush branch prediction
flush_icache_o = 1'b0;
// ------------
// Mis-predict
@ -142,12 +140,6 @@ module controller (
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
// flush branch-prediction - it is difficult to say whether this actually looses performance or increases performance
// because of reduced mis-predicts. There is one case where flushing branch-prediction is absolutely necessary
// that is when trapping back to machine mode. As the core is making speculative accesses it can happen that it tries
// to load from an non-idempotent register where a read can have a side-effect. This can happen as the core can try to load
// from a user-mode address which is then not translated in machine-mode.
flush_bp_o = 1'b1;
end
end

View file

@ -48,7 +48,7 @@ module csr_regfile #(
output logic [63:0] epc_o, // Output the exception PC to PC Gen, the correct CSR (mepc, sepc) is set accordingly
output logic eret_o, // Return from exception, set the PC of epc_o
output logic [63:0] trap_vector_base_o, // Output base of exception vector, correct CSR is output (mtvec, stvec)
output priv_lvl_t priv_lvl_o, // Current privilege level the CPU is in
output riscv::priv_lvl_t priv_lvl_o, // Current privilege level the CPU is in
// FPU
output logic [4:0] fflags_o, // Floating-Point Accured Exceptions
output logic [2:0] frm_o, // Floating-Point Dynamic Rounding Mode
@ -110,6 +110,8 @@ module csr_regfile #(
riscv::satp_t satp_q, satp_d;
riscv::dcsr_t dcsr_q, dcsr_d;
logic mtvec_rst_load_q;// used to determine whether we came out of reset
logic [63:0] dpc_q, dpc_d;
logic [63:0] dscratch0_q, dscratch0_d;
logic [63:0] mtvec_q, mtvec_d;
@ -155,7 +157,7 @@ module csr_regfile #(
riscv::CSR_FRM: csr_rdata = {61'b0, fcsr_q.frm};
riscv::CSR_FCSR: csr_rdata = {32'b0, fcsr_q};
// debug registers
riscv::CSR_DCSR: csr_rdata = {31'b0, dcsr_q};
riscv::CSR_DCSR: csr_rdata = {32'b0, dcsr_q};
riscv::CSR_DPC: csr_rdata = dpc_q;
riscv::CSR_DSCRATCH0: csr_rdata = dscratch0_q;
// trigger module registers
@ -254,7 +256,19 @@ module csr_regfile #(
dpc_d = dpc_q;
dscratch0_d = dscratch0_q;
mstatus_d = mstatus_q;
mtvec_d = mtvec_q;
// check whether we come out of reset
// this is a workaround. some tools have issues
// having boot_addr_i in the asynchronous
// reset assignment to mtvec_d, even though
// boot_addr_i will be assigned a constant
// on the top-level.
if (mtvec_rst_load_q) begin
mtvec_d = boot_addr_i + 'h40;
end else begin
mtvec_d = mtvec_q;
end
medeleg_d = medeleg_q;
mideleg_d = mideleg_q;
mip_d = mip_q;
@ -870,7 +884,8 @@ module csr_regfile #(
// machine mode registers
mstatus_q <= 64'b0;
// set to boot address + direct mode + 4 byte offset which is the initial trap
mtvec_q <= boot_addr_i + 'h40;
mtvec_rst_load_q <= 1'b1;
mtvec_q <= '0;
medeleg_q <= 64'b0;
mideleg_q <= 64'b0;
mip_q <= 64'b0;
@ -906,6 +921,7 @@ module csr_regfile #(
dscratch0_q <= dscratch0_d;
// machine mode registers
mstatus_q <= mstatus_d;
mtvec_rst_load_q <= 1'b0;
mtvec_q <= mtvec_d;
medeleg_q <= medeleg_d;
mideleg_q <= mideleg_d;

View file

@ -8,7 +8,7 @@
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* File: axi_riscv_debug_module.sv
* File: dm_csrs.sv
* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
* Date: 30.6.2018
*
@ -18,43 +18,64 @@
module dm_csrs #(
parameter int NrHarts = -1
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic dmi_rst_ni, // Debug Module Interface reset, active-low
input logic dmi_req_valid_i,
output logic dmi_req_ready_o,
input logic [ 6:0] dmi_req_bits_addr_i,
input logic [ 1:0] dmi_req_bits_op_i, // 0 = nop, 1 = read, 2 = write
input logic [31:0] dmi_req_bits_data_i,
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic testmode_i,
input logic dmi_rst_ni, // Debug Module Interface reset, active-low
input logic dmi_req_valid_i,
output logic dmi_req_ready_o,
input logic [ 6:0] dmi_req_bits_addr_i,
input logic [ 1:0] dmi_req_bits_op_i, // 0 = nop, 1 = read, 2 = write
input logic [31:0] dmi_req_bits_data_i,
// every request needs a response one cycle later
output logic dmi_resp_valid_o,
input logic dmi_resp_ready_i,
output logic [ 1:0] dmi_resp_bits_resp_o,
output logic [31:0] dmi_resp_bits_data_o,
output logic dmi_resp_valid_o,
input logic dmi_resp_ready_i,
output logic [ 1:0] dmi_resp_bits_resp_o,
output logic [31:0] dmi_resp_bits_data_o,
// global ctrl
output logic ndmreset_o, // non-debug module reset, active-high
output logic dmactive_o, // 1 -> debug-module is active, 0 -> synchronous re-set
output logic ndmreset_o, // non-debug module reset, active-high
output logic dmactive_o, // 1 -> debug-module is active, 0 -> synchronous re-set
// hart status
input dm::hartinfo_t [NrHarts-1:0] hartinfo_i, // static hartinfo
input logic [NrHarts-1:0] halted_i, // hart is halted
input logic [NrHarts-1:0] unavailable_i, // e.g.: powered down
input logic [NrHarts-1:0] resumeack_i, // hart acknowledged resume request
input dm::hartinfo_t [NrHarts-1:0] hartinfo_i, // static hartinfo
input logic [NrHarts-1:0] halted_i, // hart is halted
input logic [NrHarts-1:0] unavailable_i, // e.g.: powered down
input logic [NrHarts-1:0] resumeack_i, // hart acknowledged resume request
// hart control
output logic [19:0] hartsel_o, // hartselect to ctrl module
output logic [NrHarts-1:0] haltreq_o, // request to halt a hart
output logic [NrHarts-1:0] resumereq_o, // request hart to resume
output logic [19:0] hartsel_o, // hartselect to ctrl module
output logic [NrHarts-1:0] haltreq_o, // request to halt a hart
output logic [NrHarts-1:0] resumereq_o, // request hart to resume
output logic cmd_valid_o, // debugger is writing to the command field
output dm::command_t cmd_o, // abstract command
input logic [NrHarts-1:0] cmderror_valid_i, // an error occured
input dm::cmderr_t [NrHarts-1:0] cmderror_i, // this error occured
input logic [NrHarts-1:0] cmdbusy_i, // cmd is currently busy executing
output logic cmd_valid_o, // debugger is writing to the command field
output dm::command_t cmd_o, // abstract command
input logic [NrHarts-1:0] cmderror_valid_i, // an error occured
input dm::cmderr_t [NrHarts-1:0] cmderror_i, // this error occured
input logic [NrHarts-1:0] cmdbusy_i, // cmd is currently busy executing
output logic [dm::ProgBufSize-1:0][31:0] progbuf_o, // to system bus
output logic [dm::DataCount-1:0][31:0] data_o,
input logic [dm::DataCount-1:0][31:0] data_i,
input logic data_valid_i
input logic data_valid_i,
// system bus access module (SBA)
output logic [63:0] sbaddress_o,
input logic [63:0] sbaddress_i,
output logic sbaddress_write_valid_o,
// control signals in
output logic sbreadonaddr_o,
output logic sbautoincrement_o,
output logic [2:0] sbaccess_o,
// data out
output logic sbreadondata_o,
output logic [63:0] sbdata_o,
output logic sbdata_read_valid_o,
output logic sbdata_write_valid_o,
// read data in
input logic [63:0] sbdata_i,
input logic sbdata_valid_i,
// control signals
input logic sbbusy_i,
input logic sberror_valid_i, // bus error occurred
input logic [2:0] sberror_i // bus error occurred
);
// the amount of bits we need to represent all harts
localparam HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts);
@ -64,10 +85,11 @@ module dm_csrs #(
logic resp_queue_full;
logic resp_queue_empty;
logic resp_queue_push;
logic resp_queue_pop;
logic [31:0] resp_queue_data;
localparam dm::dm_csr_t DataEnd = dm::dm_csr_t'((dm::Data0 + dm::DataCount));
localparam dm::dm_csr_t ProgBufEnd = dm::dm_csr_t'((dm::ProgBuf0 + dm::ProgBufSize));
localparam dm::dm_csr_t DataEnd = dm::dm_csr_t'((dm::Data0 + {4'b0, dm::DataCount}));
localparam dm::dm_csr_t ProgBufEnd = dm::dm_csr_t'((dm::ProgBuf0 + {4'b0, dm::ProgBufSize}));
logic [31:0] haltsum0, haltsum1, haltsum2, haltsum3;
// TODO(zarubaf) Need an elegant way to calculate haltsums
@ -86,11 +108,15 @@ module dm_csrs #(
dm::cmderr_t cmderr_d, cmderr_q;
dm::command_t command_d, command_q;
dm::abstractauto_t abstractauto_d, abstractauto_q;
dm::sbcs_t sbcs_d, sbcs_q;
logic [63:0] sbaddr_d, sbaddr_q;
logic [63:0] sbdata_d, sbdata_q;
logic [NrHarts-1:0] havereset_d, havereset_q;
// program buffer
logic [dm::ProgBufSize-1:0][31:0] progbuf_d, progbuf_q;
// because first data address starts at 0x04
logic [(dm::DataCount + dm::Data0 - 1):(dm::Data0)][31:0] data_d, data_q;
logic [({3'b0, dm::DataCount} + dm::Data0 - 1):(dm::Data0)][31:0] data_d, data_q;
logic [NrHarts-1:0] selected_hart;
@ -99,8 +125,15 @@ module dm_csrs #(
assign dmi_resp_valid_o = ~resp_queue_empty;
assign dmi_req_ready_o = ~resp_queue_full;
assign resp_queue_push = dmi_req_valid_i & dmi_req_ready_o;
// SBA
assign sbautoincrement_o = sbcs_q.sbautoincrement;
assign sbreadonaddr_o = sbcs_q.sbreadonaddr;
assign sbreadondata_o = sbcs_q.sbreadondata;
assign sbaccess_o = sbcs_q.sbaccess;
assign sbdata_o = sbdata_q;
assign sbaddress_o = sbaddr_q;
assign hartsel_o = {dmcontrol_q.hartselhi, dmcontrol_q.hartsello};
assign hartsel_o = {dmcontrol_q.hartselhi, dmcontrol_q.hartsello};
always_comb begin : csr_read_write
// --------------------
@ -125,8 +158,8 @@ module dm_csrs #(
// as soon as we are out of the legal Hart region tell the debugger
// that there are only non-existent harts
dmstatus.allnonexistent = (hartsel_o > NrHarts - 1) ? 1'b1 : 1'b0;
dmstatus.anynonexistent = (hartsel_o > NrHarts - 1) ? 1'b1 : 1'b0;
dmstatus.allnonexistent = (hartsel_o > NrHarts[19:0] - 1) ? 1'b1 : 1'b0;
dmstatus.anynonexistent = (hartsel_o > NrHarts[19:0] - 1) ? 1'b1 : 1'b0;
dmstatus.allhalted = halted_i[hartsel_o[HartSelLen-1:0]];
dmstatus.anyhalted = halted_i[hartsel_o[HartSelLen-1:0]];
@ -152,11 +185,17 @@ module dm_csrs #(
command_d = command_q;
progbuf_d = progbuf_q;
data_d = data_q;
sbcs_d = sbcs_q;
sbaddr_d = sbaddress_i;
sbdata_d = sbdata_q;
resp_queue_data = 32'b0;
cmd_valid_o = 1'b0;
resp_queue_data = 32'b0;
cmd_valid_o = 1'b0;
sbaddress_write_valid_o = 1'b0;
sbdata_read_valid_o = 1'b0;
sbdata_write_valid_o = 1'b0;
// read
// reads
if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_READ) begin
unique case ({1'b0, dmi_req_bits_addr_i}) inside
[(dm::Data0):DataEnd]: begin
@ -186,6 +225,44 @@ module dm_csrs #(
dm::HaltSum1: resp_queue_data = haltsum1;
dm::HaltSum2: resp_queue_data = haltsum2;
dm::HaltSum3: resp_queue_data = haltsum3;
dm::SBCS: begin
if (sbbusy_i) begin
sbcs_d.sbbusyerror = 1'b1;
end
end
dm::SBAddress0: begin
// access while the SBA was busy
if (sbbusy_i) begin
sbcs_d.sbbusyerror = 1'b1;
end begin
resp_queue_data = sbaddr_q[31:0];
end
end
dm::SBAddress1: begin
// access while the SBA was busy
if (sbbusy_i) begin
sbcs_d.sbbusyerror = 1'b1;
end begin
resp_queue_data = sbaddr_q[63:32];
end
end
dm::SBData0: begin
// access while the SBA was busy
if (sbbusy_i) begin
sbcs_d.sbbusyerror = 1'b1;
end begin
sbdata_read_valid_o = (sbcs_q.sberror == '0);
resp_queue_data = sbdata_q[31:0];
end
end
dm::SBData1: begin
// access while the SBA was busy
if (sbbusy_i) begin
sbcs_d.sbbusyerror = 1'b1;
end begin
resp_queue_data = sbdata_q[63:32];
end
end
default:;
endcase
end
@ -218,11 +295,11 @@ module dm_csrs #(
// field remain set until they are cleared by writing 1 to
// them. No abstract command is started until the value is
// reset to 0.
automatic dm::abstractcs_t abstractcs;
abstractcs = dm::abstractcs_t'(dmi_req_bits_data_i);
automatic dm::abstractcs_t a_abstractcs;
a_abstractcs = dm::abstractcs_t'(dmi_req_bits_data_i);
// reads during abstract command execution are not allowed
if (!cmdbusy_i) begin
cmderr_d = dm::cmderr_t'(~abstractcs.cmderr & cmderr_q);
cmderr_d = dm::cmderr_t'(~a_abstractcs.cmderr & cmderr_q);
end else if (cmderr_q == dm::CmdErrNone) begin
cmderr_d = dm::CmdErrBusy;
end
@ -256,6 +333,52 @@ module dm_csrs #(
cmd_valid_o = abstractauto_q.autoexecprogbuf[dmi_req_bits_addr_i[3:0]];
end
end
dm::SBCS: begin
// access while the SBA was busy
if (sbbusy_i) begin
sbcs_d.sbbusyerror = 1'b1;
end begin
automatic dm::sbcs_t sbcs = dm::sbcs_t'(dmi_req_bits_data_i);
sbcs_d = sbcs;
// R/W1C
sbcs_d.sbbusyerror = sbcs_q.sbbusyerror & (~sbcs.sbbusyerror);
sbcs_d.sberror = sbcs_q.sberror & (~sbcs.sberror);
end
end
dm::SBAddress0: begin
// access while the SBA was busy
if (sbbusy_i) begin
sbcs_d.sbbusyerror = 1'b1;
end begin
sbaddr_d[31:0] = dmi_req_bits_data_i;
sbaddress_write_valid_o = (sbcs_q.sberror == '0);
end
end
dm::SBAddress1: begin
// access while the SBA was busy
if (sbbusy_i) begin
sbcs_d.sbbusyerror = 1'b1;
end begin
sbaddr_d[63:32] = dmi_req_bits_data_i;
end
end
dm::SBData0: begin
// access while the SBA was busy
if (sbbusy_i) begin
sbcs_d.sbbusyerror = 1'b1;
end begin
sbdata_d[31:0] = dmi_req_bits_data_i;
sbdata_write_valid_o = (sbcs_q.sberror == '0);
end
end
dm::SBData1: begin
// access while the SBA was busy
if (sbbusy_i) begin
sbcs_d.sbbusyerror = 1'b1;
end begin
sbdata_d[63:32] = dmi_req_bits_data_i;
end
end
default:;
endcase
end
@ -272,6 +395,17 @@ module dm_csrs #(
if (ndmreset_o) begin
havereset_d = '1;
end
// -------------
// System Bus
// -------------
// set bus error
if (sberror_valid_i) begin
sbcs_d.sberror = sberror_i;
end
// update read data
if (sbdata_valid_i) begin
sbdata_d = sbdata_i;
end
// dmcontrol
// TODO(zarubaf) we currently do not implement the hartarry mask
@ -284,6 +418,16 @@ module dm_csrs #(
dmcontrol_d.zero0 = '0;
// Non-writeable, clear only
dmcontrol_d.ackhavereset = 1'b0;
// static values for dcsr
sbcs_d.sbversion = 3'b1;
sbcs_d.sbbusy = sbbusy_i;
sbcs_d.sbasize = 7'd64; // bus is 64 bit wide
sbcs_d.sbaccess128 = 1'b0;
sbcs_d.sbaccess64 = 1'b0;
sbcs_d.sbaccess32 = 1'b0;
sbcs_d.sbaccess16 = 1'b0;
sbcs_d.sbaccess8 = 1'b0;
sbcs_d.sbaccess = 1'b0;
end
// output multiplexer
@ -302,21 +446,26 @@ module dm_csrs #(
assign cmd_o = command_q;
assign progbuf_o = progbuf_q;
assign data_o = data_q;
assign resp_queue_pop = dmi_resp_ready_i & ~resp_queue_empty;
// response FIFO
fifo #(
fifo_v2 #(
.dtype ( logic [31:0] ),
.DEPTH ( 2 )
) i_fifo (
.clk_i ( clk_i ),
.rst_ni ( dmi_rst_ni ), // reset only when system is re-set
.flush_i ( 1'b0 ), // we do not need to flush this queue
.testmode_i ( testmode_i ),
.full_o ( resp_queue_full ),
.empty_o ( resp_queue_empty ),
.single_element_o ( ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( resp_queue_data ),
.push_i ( resp_queue_push ),
.data_o ( dmi_resp_bits_data_o ),
.pop_i ( dmi_resp_ready_i )
.pop_i ( resp_queue_pop )
);
always_ff @(posedge clk_i or negedge rst_ni) begin
@ -345,6 +494,9 @@ module dm_csrs #(
abstractauto_q <= '0;
progbuf_q <= '0;
data_q <= '0;
sbcs_q <= '0;
sbaddr_q <= '0;
sbdata_q <= '0;
end else begin
havereset_q <= havereset_d;
dmcontrol_q <= dmcontrol_d;
@ -353,6 +505,9 @@ module dm_csrs #(
abstractauto_q <= abstractauto_d;
progbuf_q <= progbuf_d;
data_q <= data_d;
sbcs_q <= sbcs_d;
sbaddr_q <= sbaddr_d;
sbdata_q <= sbdata_d;
end
end
end

View file

@ -218,7 +218,7 @@ module dm_mem #(
WhereTo: begin
// variable jump to abstract cmd, program_buffer or resume
if (resumereq_i) begin
rdata_d = {32'b0, riscv::jalr(0, 0, dm::ResumeAddress)};
rdata_d = {32'b0, riscv::jalr(0, 0, dm::ResumeAddress[11:0])};
end
// there is a command active so jump there
@ -240,7 +240,7 @@ module dm_mem #(
// TODO(zarubaf) change hard-coded values
[ProgBufBase:ProgBufEnd]: begin
case (addr_i)
case (addr_i[DbgAddressBits-1:0])
ProgBufBase + 16: rdata_d = {progbuf_i[5], progbuf_i[4]};
ProgBufBase + 8: rdata_d = {progbuf_i[3], progbuf_i[2]};
ProgBufBase: rdata_d = {progbuf_i[1], progbuf_i[0]};
@ -250,14 +250,17 @@ module dm_mem #(
// two slots for abstract command
[AbstractCmdBase:AbstractCmdEnd]: begin
// return the correct address index
rdata_d = abstract_cmd[(addr_i[DbgAddressBits-1:3] - (AbstractCmdBase >> 3))];
rdata_d = abstract_cmd[(addr_i[DbgAddressBits-1:3] - AbstractCmdBase[DbgAddressBits-1:3])];
end
// harts are polling for flags here
[FlagsBase:FlagsEnd]: begin
automatic logic [7:0][7:0] rdata;
rdata = '0;
// release the corresponding hart
if (({addr_i[DbgAddressBits-1:3], 3'b0} - FlagsBase) == {hartsel_i[19:3], 3'b0}) begin
rdata_d[hartsel_i[2:0]+:8] = {6'b0, resume, go};
if (({addr_i[DbgAddressBits-1:3], 3'b0} - FlagsBase[DbgAddressBits-1:0]) == {hartsel_i[DbgAddressBits-1:3], 3'b0}) begin
rdata[hartsel_i[2:0]] = {6'b0, resume, go};
end
rdata_d = rdata;
end
default: ;
endcase
@ -358,7 +361,7 @@ module dm_mem #(
// ROM starts at the HaltAddress of the core e.g.: it immediately jumps to
// the ROM base address
assign fwd_rom_d = (addr_i[DbgAddressBits-1:0] >= dm::HaltAddress) ? 1'b1 : 1'b0;
assign fwd_rom_d = (addr_i[DbgAddressBits-1:0] >= dm::HaltAddress[DbgAddressBits-1:0]) ? 1'b1 : 1'b0;
always_ff @(posedge clk_i) begin
if (~dmactive_i) begin

View file

@ -23,7 +23,7 @@ package dm;
// TODO(zarubaf) This is hard-coded to two at the moment
// amount of data count registers implemented
localparam logic [3:0] DataCount = 5'h2;
localparam logic [3:0] DataCount = 4'h2;
// address to which a hart should jump when it was requested to halt
localparam logic [63:0] HaltAddress = 64'h800;
@ -183,7 +183,24 @@ package dm;
DTM_WRITE = 2'h2
} dtm_op_t;
typedef struct packed {
logic [31:29] sbversion;
logic [28:23] zero0;
logic sbbusyerror;
logic sbbusy;
logic sbreadonaddr;
logic [19:17] sbaccess;
logic sbautoincrement;
logic sbreadondata;
logic [14:12] sberror;
logic [11:5] sbasize;
logic sbaccess128;
logic sbaccess64;
logic sbaccess32;
logic sbaccess16;
logic sbaccess8;
} sbcs_t;
localparam logic[1:0] DTM_SUCCESS = 2'h0;
endpackage

162
src/debug/dm_sba.sv Normal file
View file

@ -0,0 +1,162 @@
/* Copyright 2018 ETH Zurich and University of Bologna.
* Copyright and related rights are licensed under the Solderpad Hardware
* License, Version 0.51 (the License); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
* or agreed to in writing, software, hardware and materials distributed under
* this License is distributed on an AS IS BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
* File: dm_sba.sv
* Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
* Date: 1.8.2018
*
* Description: System Bus Access Module
*
*/
module dm_sba (
input logic clk_i, // Clock
input logic dmactive_i, // synchronous reset active low
AXI_BUS.Master axi_master,
input logic [63:0] sbaddress_i,
input logic sbaddress_write_valid_i,
// control signals in
input logic sbreadonaddr_i,
output logic [63:0] sbaddress_o,
input logic sbautoincrement_i,
input logic [2:0] sbaccess_i,
// data in
input logic sbreadondata_i,
input logic [63:0] sbdata_i,
input logic sbdata_read_valid_i,
input logic sbdata_write_valid_i,
// read data out
output logic [63:0] sbdata_o,
output logic sbdata_valid_o,
// control signals
output logic sbbusy_o,
output logic sberror_valid_o, // bus error occurred
output logic [2:0] sberror_o // bus error occurred
);
enum logic [2:0] { Idle, Read, Write, WaitRead, WaitWrite } state_d, state_q;
logic [63:0] address;
logic req;
logic gnt;
logic we;
logic [7:0] be;
assign sbbusy_o = (state_q != Idle) ? 1'b1 : 1'b0;
always_comb begin
req = 1'b0;
address = sbaddress_i;
we = 1'b0;
be = '0;
sberror_o = '0;
sberror_valid_o = 1'b0;
sbaddress_o = sbaddress_i;
state_d = state_q;
case (state_q)
Idle: begin
// debugger requested a read
if (sbaddress_write_valid_i && sbreadonaddr_i) state_d = Read;
// debugger requested a write
if (sbdata_write_valid_i) state_d = Write;
// perform another read
if (sbdata_read_valid_i && sbreadondata_i) state_d = Read;
end
Read: begin
req = 1'b1;
if (gnt) state_d = WaitRead;
end
Write: begin
req = 1'b1;
we = 1'b1;
// generate byte enable mask
case (sbaccess_i)
3'b000: be[ sbaddress_i[2:0]] = '1;
3'b001: be[{sbaddress_i[2:1], 1'b0} +: 2] = '1;
3'b010: be[{sbaddress_i[2:2], 2'b0} +: 4] = '1;
3'b011: be = '1;
default:;
endcase
if (gnt) state_d = WaitWrite;
end
WaitRead: begin
if (sbdata_valid_o) begin
state_d = Idle;
// auto-increment address
if (sbautoincrement_i) sbaddress_o = sbaddress_i + (1 << sbaccess_i);
end
end
WaitWrite: begin
if (sbdata_valid_o) begin
state_d = Idle;
// auto-increment address
if (sbautoincrement_i) sbaddress_o = sbaddress_i + (1 << sbaccess_i);
end
end
endcase
// handle error case
if (sbaccess_i > 3 && state_d != Idle) begin
req = 1'b0;
state_d = Idle;
sberror_valid_o = 1'b1;
sberror_o = 'd3;
end
// further error handling should go here ...
end
always_ff @(posedge clk_i) begin
if (~dmactive_i) begin
state_q <= Idle;
end else begin
state_q <= state_d;
end
end
axi_adapter #(
.DATA_WIDTH ( 64 )
) i_axi_master (
.clk_i ( clk_i ),
.rst_ni ( dmactive_i ),
.req_i ( req ),
.type_i ( std_cache_pkg::SINGLE_REQ),
.gnt_o ( gnt ),
.gnt_id_o ( ),
.addr_i ( address ),
.we_i ( we ),
.wdata_i ( sbdata_i ),
.be_i ( be ),
.size_i ( sbaccess_i[1:0] ),
.id_i ( '0 ),
.valid_o ( sbdata_valid_o ),
.rdata_o ( sbdata_o ),
.id_o ( ),
.critical_word_o ( ), // not needed here
.critical_word_valid_o ( ), // not needed here
.axi ( axi_master )
);
`ifndef SYNTHESIS
`ifndef verilator
// maybe bump severity to $error if not handled at runtime
dm_sba_access_size: assert property(@(posedge clk_i) disable iff (dmactive_i !== 1'b0) (state_d != Idle) |-> (sbaccess_i < 4)) else $warning ("accesses > 8 byte not supported at the moment");
`endif
`endif
endmodule

View file

@ -26,11 +26,14 @@ module dm_top #(
)(
input logic clk_i, // clock
input logic rst_ni, // asynchronous reset active low, connect PoR here, not the system reset
input logic testmode_i,
output logic ndmreset_o, // non-debug module reset
output logic dmactive_o, // debug module is active
output logic [NrHarts-1:0] debug_req_o, // async debug request
input logic [NrHarts-1:0] unavailable_i, // communicate whether the hart is unavailable (e.g.: power down)
AXI_BUS.Slave axi_slave, // bus slave, for an execution based technique
AXI_BUS.Master axi_master, // bus master, for system bus accesses
// Connection to DTM - compatible to RocketChip Debug Module
input logic dmi_rst_ni,
input logic dmi_req_valid_i,
@ -49,13 +52,19 @@ module dm_top #(
dm::hartinfo_t [NrHarts-1:0] hartinfo;
logic [NrHarts-1:0] halted;
logic [NrHarts-1:0] running;
logic [NrHarts-1:0] unavailable;
logic [NrHarts-1:0] resumeack;
logic [NrHarts-1:0] haltreq;
logic [NrHarts-1:0] resumereq;
logic cmd_valid;
dm::command_t cmd;
logic req;
logic we;
logic [63:0] addr;
logic [7:0] be;
logic [63:0] wdata;
logic [63:0] rdata;
logic [NrHarts-1:0] cmderror_valid;
dm::cmderr_t [NrHarts-1:0] cmderror;
logic [NrHarts-1:0] cmdbusy;
@ -64,85 +73,127 @@ module dm_top #(
logic [dm::DataCount-1:0][31:0] data_mem_csrs;
logic data_valid;
logic [19:0] hartsel;
// System Bus Access Module
logic [63:0] sbaddress_csrs_sba;
logic [63:0] sbaddress_sba_csrs;
logic sbaddress_write_valid;
logic sbreadonaddr;
logic sbautoincrement;
logic [2:0] sbaccess;
logic sbreadondata;
logic [63:0] sbdata_write;
logic sbdata_read_valid;
logic sbdata_write_valid;
logic [63:0] sbdata_read;
logic sbdata_valid;
logic sbbusy;
logic sberror_valid;
logic [2:0] sberror;
dm_csrs #(
.NrHarts(NrHarts)
) i_dm_csrs (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.dmi_rst_ni ( dmi_rst_ni ),
.dmi_req_valid_i ( dmi_req_valid_i ),
.dmi_req_ready_o ( dmi_req_ready_o ),
.dmi_req_bits_addr_i ( dmi_req_bits_addr_i ),
.dmi_req_bits_op_i ( dmi_req_bits_op_i ),
.dmi_req_bits_data_i ( dmi_req_bits_data_i ),
.dmi_resp_valid_o ( dmi_resp_valid_o ),
.dmi_resp_ready_i ( dmi_resp_ready_i ),
.dmi_resp_bits_resp_o ( dmi_resp_bits_resp_o ),
.dmi_resp_bits_data_o ( dmi_resp_bits_data_o ),
.ndmreset_o ( ndmreset_o ),
.dmactive_o ( dmactive_o ),
.hartsel_o ( hartsel ),
.hartinfo_i ( hartinfo ),
.halted_i ( halted ),
.unavailable_i ( unavailable ),
.resumeack_i ( resumeack ),
.haltreq_o ( haltreq ),
.resumereq_o ( resumereq ),
.cmd_valid_o ( cmd_valid ),
.cmd_o ( cmd ),
.cmderror_valid_i ( cmderror_valid ),
.cmderror_i ( cmderror ),
.cmdbusy_i ( cmdbusy ),
.progbuf_o ( progbuf ),
.data_i ( data_mem_csrs ),
.data_valid_i ( data_valid ),
.data_o ( data_csrs_mem )
);
assign unavailable = '0;
// Debug Ctrl for each hart
// Debug Ctrl for each hart -> I haven't found a better way to
// parameterize this
for (genvar i = 0; i < NrHarts; i++) begin : dm_hart_ctrl
assign hartinfo[i] = ariane_pkg::DebugHartInfo;
end
logic req;
logic we;
logic [63:0] addr;
logic [7:0] be;
logic [63:0] wdata;
logic [63:0] rdata;
dm_csrs #(
.NrHarts(NrHarts)
) i_dm_csrs (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.testmode_i ( testmode_i ),
.dmi_rst_ni ( dmi_rst_ni ),
.dmi_req_valid_i ( dmi_req_valid_i ),
.dmi_req_ready_o ( dmi_req_ready_o ),
.dmi_req_bits_addr_i ( dmi_req_bits_addr_i ),
.dmi_req_bits_op_i ( dmi_req_bits_op_i ),
.dmi_req_bits_data_i ( dmi_req_bits_data_i ),
.dmi_resp_valid_o ( dmi_resp_valid_o ),
.dmi_resp_ready_i ( dmi_resp_ready_i ),
.dmi_resp_bits_resp_o ( dmi_resp_bits_resp_o ),
.dmi_resp_bits_data_o ( dmi_resp_bits_data_o ),
.ndmreset_o ( ndmreset_o ),
.dmactive_o ( dmactive_o ),
.hartsel_o ( hartsel ),
.hartinfo_i ( hartinfo ),
.halted_i ( halted ),
.unavailable_i,
.resumeack_i ( resumeack ),
.haltreq_o ( haltreq ),
.resumereq_o ( resumereq ),
.cmd_valid_o ( cmd_valid ),
.cmd_o ( cmd ),
.cmderror_valid_i ( cmderror_valid ),
.cmderror_i ( cmderror ),
.cmdbusy_i ( cmdbusy ),
.progbuf_o ( progbuf ),
.data_i ( data_mem_csrs ),
.data_valid_i ( data_valid ),
.data_o ( data_csrs_mem ),
.sbaddress_o ( sbaddress_csrs_sba ),
.sbaddress_i ( sbaddress_sba_csrs ),
.sbaddress_write_valid_o ( sbaddress_write_valid ),
.sbreadonaddr_o ( sbreadonaddr ),
.sbautoincrement_o ( sbautoincrement ),
.sbaccess_o ( sbaccess ),
.sbreadondata_o ( sbreadondata ),
.sbdata_o ( sbdata_write ),
.sbdata_read_valid_o ( sbdata_read_valid ),
.sbdata_write_valid_o ( sbdata_write_valid ),
.sbdata_i ( sbdata_read ),
.sbdata_valid_i ( sbdata_valid ),
.sbbusy_i ( sbbusy ),
.sberror_valid_i ( sberror_valid ),
.sberror_i ( sberror )
);
dm_sba i_dm_sba (
.clk_i ( clk_i ),
.dmactive_i ( dmactive_o ),
.axi_master,
.sbaddress_i ( sbaddress_csrs_sba ),
.sbaddress_o ( sbaddress_sba_csrs ),
.sbaddress_write_valid_i ( sbaddress_write_valid ),
.sbreadonaddr_i ( sbreadonaddr ),
.sbautoincrement_i ( sbautoincrement ),
.sbaccess_i ( sbaccess ),
.sbreadondata_i ( sbreadondata ),
.sbdata_i ( sbdata_write ),
.sbdata_read_valid_i ( sbdata_read_valid ),
.sbdata_write_valid_i ( sbdata_write_valid ),
.sbdata_o ( sbdata_read ),
.sbdata_valid_o ( sbdata_valid ),
.sbbusy_o ( sbbusy ),
.sberror_valid_o ( sberror_valid ),
.sberror_o ( sberror )
);
dm_mem #(
.NrHarts (NrHarts)
) i_dm_mem (
.clk_i ( clk_i ),
.dmactive_i ( dmactive_o ),
.debug_req_o ( debug_req_o ),
.hartsel_i ( hartsel ),
.haltreq_i ( haltreq ),
.resumereq_i ( resumereq ),
.halted_o ( halted ),
.resuming_o ( resumeack ),
.cmd_valid_i ( cmd_valid ),
.cmd_i ( cmd ),
.cmderror_valid_o ( cmderror_valid ),
.cmderror_o ( cmderror ),
.cmdbusy_o ( cmdbusy ),
.progbuf_i ( progbuf ),
.data_i ( data_csrs_mem ),
.data_o ( data_mem_csrs ),
.data_valid_o ( data_valid ),
.req_i ( req ),
.we_i ( we ),
.addr_i ( addr ),
.wdata_i ( wdata ),
.be_i ( be ),
.rdata_o ( rdata )
.clk_i ( clk_i ),
.dmactive_i ( dmactive_o ),
.debug_req_o ( debug_req_o ),
.hartsel_i ( hartsel ),
.haltreq_i ( haltreq ),
.resumereq_i ( resumereq ),
.halted_o ( halted ),
.resuming_o ( resumeack ),
.cmd_valid_i ( cmd_valid ),
.cmd_i ( cmd ),
.cmderror_valid_o ( cmderror_valid ),
.cmderror_o ( cmderror ),
.cmdbusy_o ( cmdbusy ),
.progbuf_i ( progbuf ),
.data_i ( data_csrs_mem ),
.data_o ( data_mem_csrs ),
.data_valid_o ( data_valid ),
.req_i ( req ),
.we_i ( we ),
.addr_i ( addr ),
.wdata_i ( wdata ),
.be_i ( be ),
.rdata_o ( rdata )
);
axi2mem #(

View file

@ -187,7 +187,7 @@ module dmi_cdc_jtag #(
cdc_req_ao = 1'b0;
unique case (req_state_p)
RIDLE: begin
IDLE: begin
if (mem_req_i) begin
req_state_n = WAIT_ACK_HIGH;

View file

@ -39,11 +39,10 @@ module dmi_jtag (
output logic td_o, // JTAG test data output pad
output logic tdo_oe_o // Data out output enable
);
assign dmi_rst_no = 1'b1;
logic test_logic_reset;
logic run_test_idle;
logic shift_dr;
logic pause_dr;
logic update_dr;
logic capture_dr;
logic dmi_access;
@ -67,8 +66,8 @@ module dmi_jtag (
} dmi_t;
typedef enum logic [1:0] {
DMINoError = 0, DMIReservedError = 1,
DMIOPFailed = 2, DMIBusy = 3
DMINoError = 2'h0, DMIReservedError = 2'h1,
DMIOPFailed = 2'h2, DMIBusy = 2'h3
} dmi_error_t;
enum logic [2:0] { Idle, Read, WaitReadValid, Write, WaitWriteValid } state_d, state_q;
@ -77,7 +76,7 @@ module dmi_jtag (
logic [6:0] address_d, address_q;
logic [31:0] data_d, data_q;
dmi_t dmi, read_dmi;
dmi_t dmi;
assign dmi = dmi_t'(dr_q);
assign mem_addr = address_q;
assign mem_wdata = data_q;
@ -86,9 +85,6 @@ module dmi_jtag (
logic error_dmi_busy;
dmi_error_t error_d, error_q;
// DMI which we return
assign read_dmi = {7'b0, data_q, error_q};
always_comb begin
error_dmi_busy = 1'b0;
// default assignments
@ -209,6 +205,7 @@ module dmi_jtag (
error_q <= error_d;
end
end
// ---------
// TAP
// ---------
@ -222,9 +219,7 @@ module dmi_jtag (
.td_o,
.tdo_oe_o,
.test_logic_reset_o ( test_logic_reset ),
.run_test_idle_o ( run_test_idle ),
.shift_dr_o ( shift_dr ),
.pause_dr_o ( pause_dr ),
.update_dr_o ( update_dr ),
.capture_dr_o ( capture_dr ),
.dmi_access_o ( dmi_access ),

View file

@ -26,9 +26,7 @@ module dmi_jtag_tap #(
output logic td_o, // JTAG test data output pad
output logic tdo_oe_o, // Data out output enable
output logic test_logic_reset_o,
output logic run_test_idle_o,
output logic shift_dr_o,
output logic pause_dr_o,
output logic update_dr_o,
output logic capture_dr_o,
@ -225,11 +223,9 @@ module dmi_jtag_tap #(
// Determination of next state; purely combinatorial
always_comb begin
test_logic_reset_o = 1'b0;
run_test_idle_o = 1'b0;
capture_dr_o = 1'b0;
shift_dr_o = 1'b0;
pause_dr_o = 1'b0;
update_dr_o = 1'b0;
capture_ir = 1'b0;
@ -239,11 +235,9 @@ module dmi_jtag_tap #(
case (tap_state_q)
TestLogicReset: begin
test_logic_reset_o = 1'b1;
tap_state_d = (tms_i) ? TestLogicReset : RunTestIdle;
end
RunTestIdle: begin
run_test_idle_o = 1'b1;
tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
end
// DR Path
@ -262,7 +256,6 @@ module dmi_jtag_tap #(
tap_state_d = (tms_i) ? UpdateDr : PauseDr;
end
PauseDr: begin
pause_dr_o = 1'b1;
tap_state_d = (tms_i) ? Exit2Dr : PauseDr;
end
Exit2Dr: begin

View file

@ -50,11 +50,10 @@ module decoder (
// Immediate select
// --------------------
enum logic[3:0] {
NOIMM, PCIMM, IIMM, SIMM, SBIMM, BIMM, UIMM, JIMM, RS3
NOIMM, IIMM, SIMM, SBIMM, UIMM, JIMM, RS3
} imm_select;
logic [63:0] imm_i_type;
logic [11:0] imm_iz_type;
logic [63:0] imm_s_type;
logic [63:0] imm_sb_type;
logic [63:0] imm_u_type;
@ -70,10 +69,11 @@ module decoder (
instruction_o.trans_id = 5'b0;
instruction_o.fu = NONE;
instruction_o.op = ADD;
instruction_o.rs1 = 5'b0;
instruction_o.rs2 = 5'b0;
instruction_o.rd = 5'b0;
instruction_o.rs1 = '0;
instruction_o.rs2 = '0;
instruction_o.rd = '0;
instruction_o.use_pc = 1'b0;
instruction_o.trans_id = '0;
instruction_o.is_compressed = is_compressed_i;
instruction_o.use_zimm = 1'b0;
instruction_o.bp = branch_predict_i;
@ -84,9 +84,9 @@ module decoder (
if (~ex_i.valid) begin
case (instr.rtype.opcode)
riscv::OpcodeSystem: begin
instruction_o.fu = CSR;
instruction_o.rs1 = instr.itype.rs1;
instruction_o.rd = instr.itype.rd;
instruction_o.fu = CSR;
instruction_o.rs1[4:0] = instr.itype.rs1;
instruction_o.rd[4:0] = instr.itype.rd;
unique case (instr.itype.funct3)
3'b000: begin
@ -184,13 +184,13 @@ module decoder (
end
// use zimm and iimm
3'b101: begin// CSRRWI
instruction_o.rs1 = instr.itype.rs1;
instruction_o.rs1[4:0] = instr.itype.rs1;
imm_select = IIMM;
instruction_o.use_zimm = 1'b1;
instruction_o.op = CSR_WRITE;
end
3'b110: begin// CSRRSI
instruction_o.rs1 = instr.itype.rs1;
instruction_o.rs1[4:0] = instr.itype.rs1;
imm_select = IIMM;
instruction_o.use_zimm = 1'b1;
// this is just a read
@ -200,7 +200,7 @@ module decoder (
instruction_o.op = CSR_SET;
end
3'b111: begin// CSRRCI
instruction_o.rs1 = instr.itype.rs1;
instruction_o.rs1[4:0] = instr.itype.rs1;
imm_select = IIMM;
instruction_o.use_zimm = 1'b1;
// this is just a read
@ -247,12 +247,12 @@ module decoder (
if (FP_PRESENT & XFVEC) begin // only generate decoder if FP extensions are enabled (static)
automatic logic allow_replication; // control honoring of replication flag
instruction_o.fu = FPU_VEC; // Same unit, but sets 'vectorial' signal
instruction_o.rs1 = instr.rvftype.rs1;
instruction_o.rs2 = instr.rvftype.rs2;
instruction_o.rd = instr.rvftype.rd;
check_fprm = 1'b1;
allow_replication = 1'b1;
instruction_o.fu = FPU_VEC; // Same unit, but sets 'vectorial' signal
instruction_o.rs1[4:0] = instr.rvftype.rs1;
instruction_o.rs2[4:0] = instr.rvftype.rs2;
instruction_o.rd[4:0] = instr.rvftype.rd;
check_fprm = 1'b1;
allow_replication = 1'b1;
// decode vectorial FP instruction
unique case (instr.rvftype.vecfltop)
5'b00001 : instruction_o.op = FADD; // vfadd.vfmt - Vectorial FP Addition
@ -495,9 +495,9 @@ module decoder (
// --------------------------
riscv::OpcodeOp32: begin
instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU;
instruction_o.rs1 = instr.rtype.rs1;
instruction_o.rs2 = instr.rtype.rs2;
instruction_o.rd = instr.rtype.rd;
instruction_o.rs1[4:0] = instr.rtype.rs1;
instruction_o.rs2[4:0] = instr.rtype.rs2;
instruction_o.rd[4:0] = instr.rtype.rd;
unique case ({instr.rtype.funct7, instr.rtype.funct3})
{7'b000_0000, 3'b000}: instruction_o.op = ADDW; // addw
@ -520,8 +520,8 @@ module decoder (
riscv::OpcodeOpImm: begin
instruction_o.fu = ALU;
imm_select = IIMM;
instruction_o.rs1 = instr.itype.rs1;
instruction_o.rd = instr.itype.rd;
instruction_o.rs1[4:0] = instr.itype.rs1;
instruction_o.rd[4:0] = instr.itype.rd;
unique case (instr.itype.funct3)
3'b000: instruction_o.op = ADD; // Add Immediate
@ -554,8 +554,8 @@ module decoder (
riscv::OpcodeOpImm32: begin
instruction_o.fu = ALU;
imm_select = IIMM;
instruction_o.rs1 = instr.itype.rs1;
instruction_o.rd = instr.itype.rd;
instruction_o.rs1[4:0] = instr.itype.rs1;
instruction_o.rd[4:0] = instr.itype.rd;
unique case (instr.itype.funct3)
3'b000: instruction_o.op = ADDW; // Add Immediate
@ -584,8 +584,8 @@ module decoder (
riscv::OpcodeStore: begin
instruction_o.fu = STORE;
imm_select = SIMM;
instruction_o.rs1 = instr.stype.rs1;
instruction_o.rs2 = instr.stype.rs2;
instruction_o.rs1[4:0] = instr.stype.rs1;
instruction_o.rs2[4:0] = instr.stype.rs2;
// determine store size
unique case (instr.stype.funct3)
3'b000: instruction_o.op = SB;
@ -599,8 +599,8 @@ module decoder (
riscv::OpcodeLoad: begin
instruction_o.fu = LOAD;
imm_select = IIMM;
instruction_o.rs1 = instr.itype.rs1;
instruction_o.rd = instr.itype.rd;
instruction_o.rs1[4:0] = instr.itype.rs1;
instruction_o.rd[4:0] = instr.itype.rd;
// determine load size and signed type
unique case (instr.itype.funct3)
3'b000: instruction_o.op = LB;
@ -680,9 +680,9 @@ module decoder (
// select the correct fused operation
unique case (instr.r4type.opcode)
default: instruction_o.op = FMADD; // fmadd.fmt - FP Fused multiply-add
OPCODE_MSUB: instruction_o.op = FMSUB; // fmsub.fmt - FP Fused multiply-subtract
OPCODE_NMSUB: instruction_o.op = FNMSUB; // fnmsub.fmt - FP Negated fused multiply-subtract
OPCODE_NMADD: instruction_o.op = FNMADD; // fnmadd.fmt - FP Negated fused multiply-add
riscv::OpcodeMsub: instruction_o.op = FMSUB; // fmsub.fmt - FP Fused multiply-subtract
riscv::OpcodeNmsub: instruction_o.op = FNMSUB; // fnmsub.fmt - FP Negated fused multiply-subtract
riscv::OpcodeNmadd: instruction_o.op = FNMADD; // fnmadd.fmt - FP Negated fused multiply-add
endcase
// determine fp format
@ -865,8 +865,8 @@ module decoder (
riscv::OpcodeAmo: begin
// we are going to use the load unit for AMOs
instruction_o.fu = LOAD;
instruction_o.rd = instr.stype.imm0;
instruction_o.rs1 = instr.itype.rs1;
instruction_o.rd[4:0] = instr.stype.imm0;
instruction_o.rs1[4:0] = instr.itype.rs1;
// words
if (instr.stype.funct3 == 3'h2) begin
unique case (instr.instr[31:27])
@ -911,8 +911,8 @@ module decoder (
riscv::OpcodeBranch: begin
imm_select = SBIMM;
instruction_o.fu = CTRL_FLOW;
instruction_o.rs1 = instr.stype.rs1;
instruction_o.rs2 = instr.stype.rs2;
instruction_o.rs1[4:0] = instr.stype.rs1;
instruction_o.rs2[4:0] = instr.stype.rs2;
is_control_flow_instr_o = 1'b1;
@ -933,33 +933,32 @@ module decoder (
riscv::OpcodeJalr: begin
instruction_o.fu = CTRL_FLOW;
instruction_o.op = JALR;
instruction_o.rs1 = instr.itype.rs1;
instruction_o.rs1[4:0] = instr.itype.rs1;
imm_select = IIMM;
instruction_o.rd = instr.itype.rd;
instruction_o.rd[4:0] = instr.itype.rd;
is_control_flow_instr_o = 1'b1;
// invalid jump and link register -> reserved for vector encoding
if (instr.itype.funct3 != 3'b0)
illegal_instr = 1'b1;
if (instr.itype.funct3 != 3'b0) illegal_instr = 1'b1;
end
// Jump and link
riscv::OpcodeJal: begin
instruction_o.fu = CTRL_FLOW;
imm_select = JIMM;
instruction_o.rd = instr.utype.rd;
instruction_o.rd[4:0] = instr.utype.rd;
is_control_flow_instr_o = 1'b1;
end
riscv::OpcodeAuipc: begin
instruction_o.fu = ALU;
imm_select = UIMM;
instruction_o.use_pc = 1'b1;
instruction_o.rd = instr.utype.rd;
instruction_o.fu = ALU;
imm_select = UIMM;
instruction_o.use_pc = 1'b1;
instruction_o.rd[4:0] = instr.utype.rd;
end
riscv::OpcodeLui: begin
imm_select = UIMM;
instruction_o.fu = ALU;
instruction_o.rd = instr.utype.rd;
imm_select = UIMM;
instruction_o.fu = ALU;
instruction_o.rd[4:0] = instr.utype.rd;
end
default: illegal_instr = 1'b1;
@ -972,20 +971,15 @@ module decoder (
// --------------------------------
always_comb begin : sign_extend
imm_i_type = i_imm(instruction_i);
imm_iz_type = { 52'b0, instruction_i[31:20] };
imm_s_type = { {52 {instruction_i[31]}}, instruction_i[31:25], instruction_i[11:7] };
imm_sb_type = sb_imm(instruction_i);
imm_u_type = { {32 {instruction_i[31]}}, instruction_i[31:12], 12'b0 }; // JAL, AUIPC, sign extended to 64 bit
imm_uj_type = uj_imm(instruction_i);
imm_bi_type = { {59{instruction_i[24]}}, instruction_i[24:20] };
// NOIMM, PCIMM, IIMM, SIMM, BIMM, BIMM, UIMM, JIMM, RS3
// NOIMM, IIMM, SIMM, BIMM, UIMM, JIMM, RS3
// select immediate
case (imm_select)
PCIMM: begin
instruction_o.result = pc_i;
instruction_o.use_imm = 1'b1;
end
IIMM: begin
instruction_o.result = imm_i_type;
instruction_o.use_imm = 1'b1;
@ -998,10 +992,6 @@ module decoder (
instruction_o.result = imm_sb_type;
instruction_o.use_imm = 1'b1;
end
BIMM: begin
instruction_o.result = imm_bi_type;
instruction_o.use_imm = 1'b1;
end
UIMM: begin
instruction_o.result = imm_u_type;
instruction_o.use_imm = 1'b1;
@ -1033,7 +1023,7 @@ module decoder (
if (~ex_i.valid) begin
// if we didn't already get an exception save the instruction here as we may need it
// in the commit stage if we got a access exception to one of the CSR registers
instruction_o.ex.tval = instruction_i;
instruction_o.ex.tval = {32'b0, instruction_i};
// instructions which will throw an exception are marked as valid
// e.g.: they can be committed anytime and do not need to wait for any functional unit
// check here if we decoded an invalid instruction or if the compressed decoder already decoded

View file

@ -16,11 +16,8 @@
import ariane_pkg::*;
module ex_stage #(
parameter int ASID_WIDTH = 1,
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000,
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned AXI_USER_WIDTH = 1
)(
parameter int ASID_WIDTH = 1
) (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
@ -38,10 +35,8 @@ module ex_stage #(
output logic alu_ready_o, // FU is ready
input logic alu_valid_i, // Output is valid
output logic alu_valid_o, // ALU result is valid
output logic alu_branch_res_o, // Branch comparison result
output logic [63:0] alu_result_o,
output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, // ID of scoreboard entry at which to write back
output exception_t alu_exception_o,
// Branches and Jumps
output logic branch_ready_o,
input logic branch_valid_i, // we are using the branch unit
@ -92,36 +87,34 @@ module ex_stage #(
input logic enable_translation_i,
input logic en_ld_st_translation_i,
input logic flush_tlb_i,
input logic fetch_req_i,
input logic [63:0] fetch_vaddr_i,
output logic fetch_valid_o,
output logic [63:0] fetch_paddr_o,
output exception_t fetch_exception_o,
input riscv::priv_lvl_t priv_lvl_i,
input riscv::priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
input logic mxr_i,
input logic [43:0] satp_ppn_i,
input logic [ASID_WIDTH-1:0] asid_i,
// icache translation requests
input icache_areq_o_t icache_areq_i,
output icache_areq_i_t icache_areq_o,
// interface to dcache
input dcache_req_o_t [2:0] dcache_req_ports_i,
output dcache_req_i_t [2:0] dcache_req_ports_o,
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic dcache_miss_o,
// DCache interface
input logic dcache_en_i,
input logic flush_dcache_i,
output logic flush_dcache_ack_o,
AXI_BUS.Master data_if,
AXI_BUS.Master bypass_if
output logic dtlb_miss_o
);
logic alu_branch_res; // branch comparison result
// -----
// ALU
// -----
alu alu_i (
.result_o ( alu_result_o ),
.alu_branch_res_o ( alu_branch_res ),
.*
);
@ -130,7 +123,7 @@ module ex_stage #(
// --------------------
branch_unit branch_unit_i (
.fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i), // any functional unit is valid, check that there is no accidental mis-predict
.branch_comp_res_i ( alu_branch_res_o),
.branch_comp_res_i ( alu_branch_res ),
.*
);
@ -164,14 +157,11 @@ module ex_stage #(
// ----------------
// Load-Store Unit
// ----------------
lsu #(
.CACHE_START_ADDR ( CACHE_START_ADDR ),
.AXI_ID_WIDTH ( AXI_ID_WIDTH ),
.AXI_USER_WIDTH ( AXI_USER_WIDTH )
) lsu_i (
.commit_i ( lsu_commit_i ),
.commit_ready_o ( lsu_commit_ready_o ),
.data_if ( data_if ),
lsu lsu_i (
.commit_i ( lsu_commit_i ),
.commit_ready_o ( lsu_commit_ready_o ),
.dcache_req_ports_i ( dcache_req_ports_i ),
.dcache_req_ports_o ( dcache_req_ports_o ),
.*
);

View file

@ -1,136 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 14.05.2017
// Description: Dual Port fetch FIFO with instruction aligner and support for compressed instructions
import ariane_pkg::*;
module fetch_fifo (
input logic clk_i,
input logic rst_ni,
// control signals
input logic flush_i, // clears the contents of the FIFO -> quasi reset
// branch prediction at addr_i address, as this is an address and not PC it can be the case
// that we have two compressed instruction (or one compressed instruction and one unaligned instruction) so we
// only predict on one entry and discard (or keep) the other depending on its position and prediction.
// input port
input branchpredict_sbe_t branch_predict_i,
input exception_t ex_i, // fetch exception in
input logic [63:0] addr_i,
input logic [31:0] rdata_i,
input logic valid_i,
output logic ready_o,
// Dual Port Fetch FIFO
// output port 0
output fetch_entry_t fetch_entry_o,
output logic fetch_entry_valid_o,
input logic fetch_ack_i
// // output port 1
// output fetch_entry_t fetch_entry_1_o,
// output logic fetch_entry_valid_1_o,
// input logic fetch_ack_1_i
);
localparam int unsigned DEPTH = 8; // must be a power of two
// status signals
logic full, empty;
fetch_entry_t mem_n[DEPTH-1:0], mem_q[DEPTH-1:0];
logic [$clog2(DEPTH)-1:0] read_pointer_n, read_pointer_q;
logic [$clog2(DEPTH)-1:0] write_pointer_n, write_pointer_q;
logic [$clog2(DEPTH)-1:0] status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool
assign ready_o = (status_cnt_q < DEPTH-3);
assign full = (status_cnt_q == DEPTH);
assign empty = (status_cnt_q == '0);
always_comb begin : fetch_fifo_logic
// counter
automatic logic [$clog2(DEPTH)-1:0] status_cnt;
automatic logic [$clog2(DEPTH)-1:0] write_pointer;
automatic logic [$clog2(DEPTH)-1:0] read_pointer;
status_cnt = status_cnt_q;
write_pointer = write_pointer_q;
read_pointer = read_pointer_q;
mem_n = mem_q;
// -------------
// Input Port
// -------------
if (valid_i) begin
status_cnt++;
// new input data
mem_n[write_pointer_q] = {addr_i, rdata_i, branch_predict_i, ex_i};
write_pointer++;
end
// -------------
// Fetch Port 0
// -------------
fetch_entry_valid_o = (status_cnt_q >= 1);
fetch_entry_o = mem_q[read_pointer_q];
if (fetch_ack_i) begin
read_pointer++;
status_cnt--;
end
// -------------
// Fetch Port 1
// -------------
// fetch_entry_valid_1_o = (status_cnt_q >= 2);
// fetch_entry_1_o = mem_q[read_pointer_q + 1'b1];
// if (fetch_ack_1_i) begin
// read_pointer++;
// status_cnt--;
// end
write_pointer_n = write_pointer;
status_cnt_n = status_cnt;
read_pointer_n = read_pointer;
if (flush_i) begin
status_cnt_n = '0;
write_pointer_n = 'b0;
read_pointer_n = 'b0;
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
status_cnt_q <= '{default: 0};
mem_q <= '{default: 0};
read_pointer_q <= '{default: 0};
write_pointer_q <= '{default: 0};
end else begin
status_cnt_q <= status_cnt_n;
mem_q <= mem_n;
read_pointer_q <= read_pointer_n;
write_pointer_q <= write_pointer_n;
end
end
//-------------
// Assertions
//-------------
`ifndef SYNTHESIS
`ifndef VERILATOR
// Make sure we don't overflow the queue
assert property (@(posedge clk_i) ((full && !flush_i) |-> ##1 !empty)) else $error("Fetch FIFO Overflowed");
assert property (@(posedge clk_i) (flush_i || (status_cnt_q - status_cnt_n) <= 2 || (status_cnt_q - status_cnt_n) >= -2)) else $error("Fetch FIFO over- or underflowed");
assert property (@(posedge clk_i) (valid_i |-> !full)) else $error("Got a valid signal, although the queue is not ready to accept a new request");
`endif
`endif
endmodule

View file

@ -1,85 +0,0 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
/// A leading-one finder / leading zero counter.
/// Set FLIP to 0 for find_first_one => first_one_o is the index of the first one (from the LSB)
/// Set FLIP to 1 for leading zero counter => first_one_o is the number of leading zeroes (from the MSB)
module find_first_one #(
/// The width of the input vector.
parameter int WIDTH = -1,
parameter int FLIP = 0
)(
input logic [WIDTH-1:0] in_i,
output logic [$clog2(WIDTH)-1:0] first_one_o,
output logic no_ones_o
);
localparam int NUM_LEVELS = $clog2(WIDTH);
// pragma translate_off
initial begin
assert(WIDTH >= 0);
end
// pragma translate_on
logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut;
logic [2**NUM_LEVELS-1:0] sel_nodes;
logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes;
logic [WIDTH-1:0] in_tmp;
for (genvar i = 0; i < WIDTH; i++) begin
assign in_tmp[i] = FLIP ? in_i[WIDTH-1-i] : in_i[i];
end
for (genvar j = 0; j < WIDTH; j++) begin
assign index_lut[j] = j;
end
for (genvar level = 0; level < NUM_LEVELS; level++) begin
if (level < NUM_LEVELS-1) begin
for (genvar l = 0; l < 2**level; l++) begin
assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ?
index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1];
end
end
if (level == NUM_LEVELS-1) begin
for (genvar k = 0; k < 2**level; k++) begin
// if two successive indices are still in the vector...
if (k * 2 < WIDTH-1) begin
assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1];
assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
end
// if only the first index is still in the vector...
if (k * 2 == WIDTH-1) begin
assign sel_nodes[2**level-1+k] = in_tmp[k*2];
assign index_nodes[2**level-1+k] = index_lut[k*2];
end
// if index is out of range
if (k * 2 > WIDTH-1) begin
assign sel_nodes[2**level-1+k] = 1'b0;
assign index_nodes[2**level-1+k] = '0;
end
end
end
end
assign first_one_o = NUM_LEVELS > 0 ? index_nodes[0] : '0;
assign no_ones_o = NUM_LEVELS > 0 ? ~sel_nodes[0] : '1;
endmodule

View file

@ -1,109 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 24.4.2017
// Description: Generic FIFO implementation
module fifo #(
parameter type dtype = logic[63:0],
parameter int unsigned DEPTH = 4
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush the queue
// status flags
output logic full_o, // queue is full
output logic empty_o, // queue is empty
output logic single_element_o, // there is just a single element in the queue
// as long as the queue is not full we can push new data
input dtype data_i, // data to push into the queue
input logic push_i, // data is valid and can be pushed to the queue
// as long as the queue is not empty we can pop new elements
output dtype data_o, // output data
input logic pop_i // pop head from queue
);
// pointer to the read and write section of the queue
logic [$clog2(DEPTH) - 1:0] read_pointer_n, read_pointer_q, write_pointer_n, write_pointer_q;
// keep a counter to keep track of the current queue status
int unsigned status_cnt_n, status_cnt_q; // this integer will be truncated by the synthesis tool
// actual memory
dtype [DEPTH-1:0] mem_n, mem_q;
assign full_o = (status_cnt_q == DEPTH);
assign empty_o = (status_cnt_q == 0);
assign single_element_o = (status_cnt_q == 1);
// read and write queue logic
always_comb begin : read_write_comb
// default assignment
read_pointer_n = read_pointer_q;
write_pointer_n = write_pointer_q;
status_cnt_n = status_cnt_q;
data_o = mem_q[read_pointer_q];
mem_n = mem_q;
// push a new element to the queue
if (push_i && ~full_o) begin
// push the data onto the queue
mem_n[write_pointer_q] = data_i;
// increment the write counter, this counter can overflow
write_pointer_n = write_pointer_q + 1;
// increment the overall counter
status_cnt_n = status_cnt_q + 1;
end
if (pop_i && ~empty_o) begin
// read from the queue is a default assignment
// but increment the read pointer...
read_pointer_n = read_pointer_q + 1;
// ... and decrement the overall count
mem_n[read_pointer_q] = '0;
status_cnt_n = status_cnt_q - 1;
end
// keep the count pointer stable if we push and pop at the same time
if (push_i && ~full_o && pop_i && ~empty_o)
status_cnt_n = status_cnt_q;
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
read_pointer_q <= '0;
write_pointer_q <= '0;
status_cnt_q <= '0;
mem_q <= '0;
end else if (flush_i) begin
read_pointer_q <= '0;
write_pointer_q <= '0;
status_cnt_q <= '0;
mem_q <= '0;
end else begin
read_pointer_q <= read_pointer_n;
write_pointer_q <= write_pointer_n;
status_cnt_q <= status_cnt_n;
mem_q <= mem_n;
end
end
`ifndef SYNTHESIS
`ifndef verilator
initial begin
assert (DEPTH == 2**$clog2(DEPTH)) else $fatal("FIFO size needs to be a power of two.");
assert property(
@(posedge clk_i) (rst_ni && full_o |-> ~push_i))
else $error ("Trying to push new data although the FIFO is full.");
assert property(
@(posedge clk_i) (rst_ni && empty_o |-> ~pop_i))
else $error ("Trying to pop data although the FIFO is empty.");
end
`endif
`endif
endmodule

1
src/fpga-support Submodule

@ -0,0 +1 @@
Subproject commit 3e925e169bd02ebf26e3d4ab65cd1832319cf580

@ -1 +0,0 @@
Subproject commit f693140ea22f40f0b8989bf8fe03bf4726cde3e3

1
src/fpu Submodule

@ -0,0 +1 @@
Subproject commit 17976c9adf26c02c26d35df1899864abe6c23da2

@ -1 +1 @@
Subproject commit afbada8165819fa0174e5418fde67c7bde2216a6
Subproject commit 5fe4bf51be8c5ceb00f3cf03e3d893ca579dba14

View file

@ -12,27 +12,16 @@
// Date: 08.02.2018
// Description: Ariane Instruction Fetch Frontend
import ariane_pkg::*;
module frontend #(
parameter int unsigned SET_ASSOCIATIVITY = 4,
parameter int unsigned CACHE_LINE_WIDTH = 64, // in bit
parameter int unsigned FETCH_WIDTH = 32
)(
module frontend (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush request for PCGEN
input logic en_cache_i, // enable icache
input logic flush_bp_i, // flush branch prediction
input logic flush_icache_i, // instruction fence in
// global input
input logic [63:0] boot_addr_i,
// Address translation interface
output logic fetch_req_o, // address translation request
output logic [63:0] fetch_vaddr_o, // virtual address out
input logic fetch_valid_i, // address translation valid
input logic [63:0] fetch_paddr_i, // physical address in
input exception_t fetch_exception_i, // exception occurred during fetch
// Set a new PC
// mispredict
input branchpredict_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB
@ -46,9 +35,8 @@ module frontend #(
input logic ex_valid_i, // exception is valid - from commit
input logic set_debug_pc_i, // jump to debug address
// Instruction Fetch
AXI_BUS.Master axi,
output logic l1_icache_miss_o, // instruction cache missed
//
input icache_dreq_o_t icache_dreq_i,
output icache_dreq_i_t icache_dreq_o,
// instruction output port -> to processor back-end
output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
output logic fetch_entry_valid_o, // instruction in IF is valid
@ -58,13 +46,13 @@ module frontend #(
localparam int unsigned INSTR_PER_FETCH = FETCH_WIDTH/16;
// Registers
logic [31:0] icache_data_d, icache_data_q;
logic icache_valid_d, icache_valid_q;
exception_t icache_ex_d, icache_ex_q;
logic [31:0] icache_data_q;
logic icache_valid_q;
exception_t icache_ex_q;
logic instruction_valid;
logic [63:0] icache_vaddr_d, icache_vaddr_q;
logic [63:0] icache_vaddr_q;
// BHT, BTB and RAS prediction
bht_prediction_t bht_prediction;
@ -75,12 +63,10 @@ module frontend #(
logic ras_push, ras_pop;
logic [63:0] ras_update;
// icache control signals
logic icache_req, kill_s1, kill_s2, icache_ready;
// instruction fetch is ready
logic if_ready;
logic [63:0] npc_d, npc_q; // next PC
logic npc_rst_load_q; //indicates whether we come out of reset (then we need to load boot_addr_i)
// -----------------------
// Ctrl Flow Speculation
// -----------------------
@ -97,14 +83,17 @@ module frontend #(
logic [INSTR_PER_FETCH-1:0][31:0] instr;
logic [INSTR_PER_FETCH-1:0][63:0] addr;
// virtual address of current fetch
logic [63:0] fetch_vaddr;
logic [63:0] bp_vaddr;
logic bp_valid; // we have a valid branch-prediction
logic is_mispredict;
// branch-prediction which we inject into the pipeline
branchpredict_sbe_t bp_sbe;
logic fifo_valid, fifo_ready; // fetch FIFO
// fetch fifo credit system
logic fifo_valid, fifo_ready, fifo_empty, fifo_pop;
logic s2_eff_kill, issue_req, s2_in_flight_d, s2_in_flight_q;
logic [$clog2(FETCH_FIFO_DEPTH):0] fifo_credits_d;
logic [$clog2(FETCH_FIFO_DEPTH):0] fifo_credits_q;
// save the unaligned part of the instruction to this ff
logic [15:0] unaligned_instr_d, unaligned_instr_q;
@ -161,31 +150,30 @@ module frontend #(
unaligned_instr_d = icache_data_q[31:16];
end
if (kill_s2) begin
if (icache_dreq_o.kill_s2) begin
unaligned_d = 1'b0;
end
end
logic [INSTR_PER_FETCH:0] taken;
// control front-end + branch-prediction
always_comb begin : frontend_ctrl
automatic logic take_rvi_cf; // take the control flow change (non-compressed)
automatic logic take_rvc_cf; // take the control flow change (compressed)
take_rvi_cf = 1'b0;
take_rvc_cf = 1'b0;
ras_pop = 1'b0;
ras_push = 1'b0;
ras_update = '0;
taken = '0;
take_rvi_cf = 1'b0;
if_ready = icache_ready & fifo_ready;
icache_req = fifo_ready;
take_rvi_cf = 1'b0;
take_rvc_cf = 1'b0;
ras_pop = 1'b0;
ras_push = 1'b0;
ras_update = '0;
taken = '0;
take_rvi_cf = 1'b0;
bp_vaddr = '0; // predicted address
bp_valid = 1'b0; // prediction is valid
bp_vaddr = '0; // predicted address
bp_valid = 1'b0; // prediction is valid
bp_sbe.cf_type = RAS;
bp_sbe.cf_type = RAS;
// only predict if the response is valid
if (instruction_valid) begin
@ -262,22 +250,21 @@ module frontend #(
end
logic is_mispredict;
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
always_comb begin : id_if
kill_s1 = 1'b0;
kill_s2 = 1'b0;
icache_dreq_o.kill_s1 = 1'b0;
icache_dreq_o.kill_s2 = 1'b0;
// we mis-predicted so kill the icache request and the fetch queue
if (is_mispredict || flush_i) begin
kill_s1 = 1'b1;
kill_s2 = 1'b1;
icache_dreq_o.kill_s1 = 1'b1;
icache_dreq_o.kill_s2 = 1'b1;
end
// if we have a valid branch-prediction we need to kill the last cache request
if (bp_valid) begin
kill_s2 = 1'b1;
icache_dreq_o.kill_s2 = 1'b1;
end
fifo_valid = icache_valid_q;
@ -313,9 +300,21 @@ module frontend #(
always_comb begin : npc_select
automatic logic [63:0] fetch_address;
fetch_address = npc_q;
// keep stable by default
npc_d = npc_q;
// check whether we come out of reset
// this is a workaround. some tools have issues
// having boot_addr_i in the asynchronous
// reset assignment to npc_q, even though
// boot_addr_i will be assigned a constant
// on the top-level.
if (npc_rst_load_q) begin
npc_d = boot_addr_i;
fetch_address = boot_addr_i;
end else begin
fetch_address = npc_q;
// keep stable by default
npc_d = npc_q;
end
// -------------------------------
// 1. Branch Prediction
// -------------------------------
@ -366,12 +365,50 @@ module frontend #(
if (set_debug_pc_i) begin
npc_d = dm::HaltAddress;
end
fetch_vaddr = fetch_address;
icache_dreq_o.vaddr = fetch_address;
end
// -------------------
// Credit-based fetch FIFO flow ctrl
// -------------------
assign fifo_credits_d = (flush_i) ? FETCH_FIFO_DEPTH :
fifo_credits_q + fifo_pop + s2_eff_kill - issue_req;
// check whether there is a request in flight that is being killed now
// if this is the case, we need to increment the credit by 1
assign s2_eff_kill = s2_in_flight_q & icache_dreq_o.kill_s2;
assign s2_in_flight_d = (flush_i) ? 1'b0 :
(issue_req) ? 1'b1 :
(icache_dreq_i.valid) ? 1'b0 :
s2_in_flight_q;
// only enable counter if current request is not being killed
assign issue_req = if_ready & (~icache_dreq_o.kill_s1);
assign fifo_pop = fetch_ack_i & fetch_entry_valid_o;
assign fifo_ready = (|fifo_credits_q);
assign if_ready = icache_dreq_i.ready & fifo_ready;
assign icache_dreq_o.req = fifo_ready;
assign fetch_entry_valid_o = ~fifo_empty;
//pragma translate_off
`ifndef VERILATOR
fetch_fifo_credits0 : assert property (
@(posedge clk_i) disable iff (~rst_ni) (fifo_credits_q <= FETCH_FIFO_DEPTH))
else $fatal("[frontend] fetch fifo credits must be <= FETCH_FIFO_DEPTH!");
initial begin
assert (FETCH_FIFO_DEPTH<=8) else $fatal("[frontend] fetch fifo deeper than 8 not supported");
assert (FETCH_WIDTH==32) else $fatal("[frontend] fetch width != not supported");
end
`endif
//pragma translate_on
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
npc_q <= boot_addr_i;
npc_q <= '0;
npc_rst_load_q <= 1'b1;
icache_data_q <= '0;
icache_valid_q <= 1'b0;
icache_vaddr_q <= 'b0;
@ -379,15 +416,20 @@ module frontend #(
unaligned_q <= 1'b0;
unaligned_address_q <= '0;
unaligned_instr_q <= '0;
fifo_credits_q <= FETCH_FIFO_DEPTH;
s2_in_flight_q <= 1'b0;
end else begin
npc_rst_load_q <= 1'b0;
npc_q <= npc_d;
icache_data_q <= icache_data_d;
icache_valid_q <= icache_valid_d;
icache_vaddr_q <= icache_vaddr_d;
icache_ex_q <= icache_ex_d;
icache_data_q <= icache_dreq_i.data;
icache_valid_q <= icache_dreq_i.valid;
icache_vaddr_q <= icache_dreq_i.vaddr;
icache_ex_q <= icache_dreq_i.ex;
unaligned_q <= unaligned_d;
unaligned_address_q <= unaligned_address_d;
unaligned_instr_q <= unaligned_instr_d;
fifo_credits_q <= fifo_credits_d;
s2_in_flight_q <= s2_in_flight_d;
end
end
@ -421,33 +463,6 @@ module frontend #(
.*
);
icache #(
.SET_ASSOCIATIVITY ( 4 ),
.CACHE_LINE_WIDTH ( 128 ),
.FETCH_WIDTH ( FETCH_WIDTH )
) i_icache (
.clk_i,
.rst_ni,
.flush_i ( flush_icache_i ),
.en_cache_i,
.vaddr_i ( fetch_vaddr ), // 1st cycle
.data_o ( icache_data_d ),
.req_i ( icache_req ),
.kill_s1_i ( kill_s1 ),
.kill_s2_i ( kill_s2 ),
.ready_o ( icache_ready ),
.valid_o ( icache_valid_d ),
.ex_o ( icache_ex_d ),
.vaddr_o ( icache_vaddr_d ),
.axi,
.fetch_req_o,
.fetch_vaddr_o,
.fetch_valid_i,
.fetch_paddr_i,
.fetch_exception_i,
.miss_o ( l1_icache_miss_o )
);
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
instr_scan i_instr_scan (
.instr_i ( instr[i] ),
@ -468,19 +483,25 @@ module frontend #(
);
end
fetch_fifo i_fetch_fifo (
.flush_i ( flush_i ),
.branch_predict_i ( bp_sbe ),
.ex_i ( icache_ex_q ),
.addr_i ( icache_vaddr_q ),
.rdata_i ( icache_data_q ),
.valid_i ( fifo_valid ),
.ready_o ( fifo_ready ),
.fetch_entry_o ( fetch_entry_o ),
.fetch_entry_valid_o( fetch_entry_valid_o ),
.fetch_ack_i ( fetch_ack_i ),
.*
);
fifo_v2 #(
.DEPTH ( 8 ),
.dtype ( fetch_entry_t ))
i_fetch_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.testmode_i ( 1'b0 ),
.full_o ( ),
.empty_o ( fifo_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( {icache_vaddr_q, icache_data_q, bp_sbe, icache_ex_q} ),
.push_i ( fifo_valid ),
.data_o ( fetch_entry_o ),
.pop_i ( fifo_pop )
);
endmodule

View file

@ -219,6 +219,11 @@ module instr_realigner (
unaligned_n = 1'b0;
compressed_n = 1'b0;
end
// assign the correct address for a potentially faulting unaligned instruction
// we've already done the re-alignment for the instruction word so we
// can just assign it here to tval
fetch_entry_o.ex.tval = fetch_entry_o.address;
end
// ---------

View file

@ -107,8 +107,8 @@ module issue_read_operands #(
logic forward_rs1, forward_rs2, forward_rs3;
// original instruction stored in tval
instruction_t orig_instr;
assign orig_instr = instruction_t'(issue_instr_i.ex.tval[31:0]);
riscv::instruction_t orig_instr;
assign orig_instr = riscv::instruction_t'(issue_instr_i.ex.tval[31:0]);
// ID <-> EX registers
assign operand_a_o = operand_a_q;
@ -344,9 +344,9 @@ module issue_read_operands #(
logic [NR_COMMIT_PORTS-1:0][63:0] wdata_pack;
logic [NR_COMMIT_PORTS-1:0] we_pack;
assign raddr_pack = {issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]};
assign waddr_pack = {waddr_i[1], waddr_i[0]};
assign wdata_pack = {wdata_i[1], wdata_i[0]};
assign we_pack = {we_gpr_i[1], we_i[0]};
assign waddr_pack = {waddr_i[1], waddr_i[0]};
assign wdata_pack = {wdata_i[1], wdata_i[0]};
assign we_pack = {we_gpr_i[1], we_gpr_i[0]};
ariane_regfile #(
.DATA_WIDTH ( 64 ),

View file

@ -1,68 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Igor Loi - University of Bologna
// Author: Florian Zaruba, ETH Zurich
// Date: 12.11.2017
// Description: 8-bit LFSR
// --------------
// 8-bit LFSR
// --------------
//
// Description: Shift register for way selection
//
module lfsr #(
parameter logic [7:0] SEED = 8'b0,
parameter int unsigned WIDTH = 8
)(
input logic clk_i,
input logic rst_ni,
input logic en_i,
output logic [WIDTH-1:0] refill_way_oh,
output logic [$clog2(WIDTH)-1:0] refill_way_bin
);
localparam int unsigned LOG_WIDTH = $clog2(WIDTH);
logic [7:0] shift_d, shift_q;
always_comb begin
automatic logic shift_in;
shift_in = !(shift_q[7] ^ shift_q[3] ^ shift_q[2] ^ shift_q[1]);
shift_d = shift_q;
if (en_i)
shift_d = {shift_q[6:0], shift_in};
// output assignment
refill_way_oh = 'b0;
refill_way_oh[shift_q[LOG_WIDTH-1:0]] = 1'b1;
refill_way_bin = shift_q[$clog2(WIDTH)-1:0];
end
always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
if(~rst_ni) begin
shift_q <= SEED;
end else begin
shift_q <= shift_d;
end
end
`ifndef SYNTHESIS
initial begin
assert (WIDTH <= 8) else $fatal(1, "WIDTH needs to be less than 8 because of the 8-bit LFSR");
end
`endif
endmodule

View file

@ -8,8 +8,9 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 22.05.2017
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
// Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Load Unit, takes care of all load requests
import ariane_pkg::*;
@ -37,19 +38,8 @@ module load_unit (
output logic [11:0] page_offset_o,
input logic page_offset_matches_i,
// D$ interface
output logic [11:0] address_index_o,
output logic [43:0] address_tag_o,
output amo_t amo_op_o,
output logic [63:0] data_wdata_o,
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic [1:0] data_size_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
input logic data_rvalid_i,
input logic [63:0] data_rdata_i
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o
);
enum logic [2:0] {IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET, ABORT_TRANSACTION, WAIT_TRANSLATION, WAIT_FLUSH} NS, CS;
// in order to decouple the response interface from the request interface we need a
@ -58,21 +48,22 @@ module load_unit (
logic [TRANS_ID_BITS-1:0] trans_id;
logic [2:0] address_offset;
fu_op operator;
} load_data_n, load_data_q, in_data;
} load_data_d, load_data_q, in_data;
// page offset is defined as the lower 12 bits, feed through for address checker
assign page_offset_o = lsu_ctrl_i.vaddr[11:0];
// feed-through the virtual address for VA translation
assign vaddr_o = lsu_ctrl_i.vaddr;
// this is a read-only interface so set the write enable to 0
assign data_we_o = 1'b0;
assign req_port_o.data_we = 1'b0;
assign req_port_o.data_wdata = '0;
// compose the queue data, control is handled in the FSM
assign in_data = {lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.operator};
// output address
// we can now output the lower 12 bit as the index to the cache
assign address_index_o = lsu_ctrl_i.vaddr[11:0];
assign req_port_o.address_index = lsu_ctrl_i.vaddr[11:0];
// translation from last cycle, again: control is handled in the FSM
assign address_tag_o = paddr_i[55:12];
assign req_port_o.address_tag = paddr_i[55:12];
// directly output an exception
assign ex_o = ex_i;
@ -81,16 +72,16 @@ module load_unit (
// ---------------
always_comb begin : load_control
// default assignments
NS = CS;
load_data_n = load_data_q;
translation_req_o = 1'b0;
data_req_o = 1'b0;
NS = CS;
load_data_d = load_data_q;
translation_req_o = 1'b0;
req_port_o.data_req = 1'b0;
// tag control
kill_req_o = 1'b0;
tag_valid_o = 1'b0;
data_be_o = lsu_ctrl_i.be;
data_size_o = extract_transfer_size(lsu_ctrl_i.operator);
pop_ld_o = 1'b0;
req_port_o.kill_req = 1'b0;
req_port_o.tag_valid = 1'b0;
req_port_o.data_be = lsu_ctrl_i.be;
req_port_o.data_size = extract_transfer_size(lsu_ctrl_i.operator);
pop_ld_o = 1'b0;
case (CS)
IDLE: begin
@ -102,9 +93,9 @@ module load_unit (
// check if the page offset matches with a store, if it does then stall and wait
if (!page_offset_matches_i) begin
// make a load request to memory
data_req_o = 1'b1;
req_port_o.data_req = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!data_gnt_i) begin
if (!req_port_i.data_gnt) begin
NS = WAIT_GNT;
end else begin
if (dtlb_hit_i) begin
@ -133,8 +124,8 @@ module load_unit (
// we are here because of a TLB miss, we need to abort the current request and give way for the
// PTW walker to satisfy the TLB miss
ABORT_TRANSACTION: begin
kill_req_o = 1'b1;
tag_valid_o = 1'b1;
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// redo the request by going back to the wait gnt state
NS = WAIT_TRANSLATION;
end
@ -150,9 +141,9 @@ module load_unit (
// keep the translation request up
translation_req_o = 1'b1;
// keep the request up
data_req_o = 1'b1;
req_port_o.data_req = 1'b1;
// we finally got a data grant
if (data_gnt_i) begin
if (req_port_i.data_gnt) begin
// so we send the tag in the next cycle
if (dtlb_hit_i) begin
NS = SEND_TAG;
@ -164,7 +155,7 @@ module load_unit (
end
// we know for sure that the tag we want to send is valid
SEND_TAG: begin
tag_valid_o = 1'b1;
req_port_o.tag_valid = 1'b1;
NS = IDLE;
// we can make a new request here if we got one
if (valid_i) begin
@ -174,9 +165,9 @@ module load_unit (
// check if the page offset matches with a store, if it does stall and wait
if (!page_offset_matches_i) begin
// make a load request to memory
data_req_o = 1'b1;
req_port_o.data_req = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!data_gnt_i) begin
if (!req_port_i.data_gnt) begin
NS = WAIT_GNT;
end else begin
// we got a grant so we can send the tag in the next cycle
@ -197,15 +188,15 @@ module load_unit (
// ----------
// if we got an exception we need to kill the request immediately
if (ex_i.valid) begin
kill_req_o = 1'b1;
req_port_o.kill_req = 1'b1;
end
end
WAIT_FLUSH: begin
// the D$ arbiter will take care of presenting this to the memory only in case we
// have an outstanding request
kill_req_o = 1'b1;
tag_valid_o = 1'b1;
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// we've killed the current request so we can go back to idle
NS = IDLE;
end
@ -217,13 +208,13 @@ module load_unit (
// the next state will be the idle state
NS = IDLE;
// pop load - but only if we are not getting an rvalid in here - otherwise we will over-wright an incoming transaction
if (!data_rvalid_i)
if (!req_port_i.data_rvalid)
pop_ld_o = 1'b1;
end
// save the load data for later usage -> we should not clutter the load_data register
if (pop_ld_o && !ex_i.valid) begin
load_data_n = in_data;
load_data_d = in_data;
end
// if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage
@ -241,9 +232,9 @@ module load_unit (
// output the queue data directly, the valid signal is set corresponding to the process above
trans_id_o = load_data_q.trans_id;
// we got an rvalid and are currently not flushing and not aborting the request
if (data_rvalid_i && CS != WAIT_FLUSH) begin
if (req_port_i.data_rvalid && CS != WAIT_FLUSH) begin
// we killed the request
if(!kill_req_o)
if(!req_port_o.kill_req)
valid_o = 1'b1;
// the output is also valid if we got an exception
if (ex_i.valid)
@ -254,7 +245,7 @@ module load_unit (
// exceptions can retire out-of-order -> but we need to give priority to non-excepting load and stores
// so we simply check if we got an rvalid if so we prioritize it by not retiring the exception - we simply go for another
// round in the load FSM
if (valid_i && ex_i.valid && !data_rvalid_i) begin
if (valid_i && ex_i.valid && !req_port_i.data_rvalid) begin
valid_o = 1'b1;
trans_id_o = lsu_ctrl_i.trans_id;
// if we are waiting for the translation to finish do not give a valid signal yet
@ -272,7 +263,7 @@ module load_unit (
load_data_q <= '0;
end else begin
CS <= NS;
load_data_q <= load_data_n;
load_data_q <= load_data_d;
end
end
@ -280,33 +271,33 @@ module load_unit (
// AMO Operation
// ---------------
always_comb begin : amo_op_select
amo_op_o = AMO_NONE;
req_port_o.amo_op = AMO_NONE;
if (lsu_ctrl_i.valid) begin
case (lsu_ctrl_i.operator)
AMO_LRW: amo_op_o = AMO_LR;
AMO_LRD: amo_op_o = AMO_LR;
AMO_SCW: amo_op_o = AMO_SC;
AMO_SCD: amo_op_o = AMO_SC;
AMO_SWAPW: amo_op_o = AMO_SWAP;
AMO_ADDW: amo_op_o = AMO_ADD;
AMO_ANDW: amo_op_o = AMO_AND;
AMO_ORW: amo_op_o = AMO_OR;
AMO_XORW: amo_op_o = AMO_XOR;
AMO_MAXW: amo_op_o = AMO_MAX;
AMO_MAXWU: amo_op_o = AMO_MAXU;
AMO_MINW: amo_op_o = AMO_MIN;
AMO_MINWU: amo_op_o = AMO_MINU;
AMO_SWAPD: amo_op_o = AMO_SWAP;
AMO_ADDD: amo_op_o = AMO_ADD;
AMO_ANDD: amo_op_o = AMO_AND;
AMO_ORD: amo_op_o = AMO_OR;
AMO_XORD: amo_op_o = AMO_XOR;
AMO_MAXD: amo_op_o = AMO_MAX;
AMO_MAXDU: amo_op_o = AMO_MAXU;
AMO_MIND: amo_op_o = AMO_MIN;
AMO_MINDU: amo_op_o = AMO_MINU;
default: amo_op_o = AMO_NONE;
AMO_LRW: req_port_o.amo_op = AMO_LR;
AMO_LRD: req_port_o.amo_op = AMO_LR;
AMO_SCW: req_port_o.amo_op = AMO_SC;
AMO_SCD: req_port_o.amo_op = AMO_SC;
AMO_SWAPW: req_port_o.amo_op = AMO_SWAP;
AMO_ADDW: req_port_o.amo_op = AMO_ADD;
AMO_ANDW: req_port_o.amo_op = AMO_AND;
AMO_ORW: req_port_o.amo_op = AMO_OR;
AMO_XORW: req_port_o.amo_op = AMO_XOR;
AMO_MAXW: req_port_o.amo_op = AMO_MAX;
AMO_MAXWU: req_port_o.amo_op = AMO_MAXU;
AMO_MINW: req_port_o.amo_op = AMO_MIN;
AMO_MINWU: req_port_o.amo_op = AMO_MINU;
AMO_SWAPD: req_port_o.amo_op = AMO_SWAP;
AMO_ADDD: req_port_o.amo_op = AMO_ADD;
AMO_ANDD: req_port_o.amo_op = AMO_AND;
AMO_ORD: req_port_o.amo_op = AMO_OR;
AMO_XORD: req_port_o.amo_op = AMO_XOR;
AMO_MAXD: req_port_o.amo_op = AMO_MAX;
AMO_MAXDU: req_port_o.amo_op = AMO_MAXU;
AMO_MIND: req_port_o.amo_op = AMO_MIN;
AMO_MINDU: req_port_o.amo_op = AMO_MINU;
default: req_port_o.amo_op = AMO_NONE;
endcase
end
end
@ -314,108 +305,86 @@ module load_unit (
// ---------------
// Sign Extend
// ---------------
logic [63:0] rdata_d_ext; // sign extension for double words, actually only misaligned assembly
logic [63:0] rdata_w_ext; // sign extension for words
logic [63:0] rdata_h_ext; // sign extension for half words
logic [63:0] rdata_b_ext; // sign extension for bytes
logic [63:0] shifted_data;
logic [63:0] rdata_fw_box; // nan-boxing for single floats
logic [63:0] rdata_fh_box; // nan-boxing for half floats
logic [63:0] rdata_fb_box; // nan-boxing for quarter floats
// realign as needed
assign shifted_data = req_port_i.data_rdata >> {load_data_q.address_offset, 3'b000};
// double words or double floats
always_comb begin : sign_extend_double_word
rdata_d_ext = data_rdata_i[63:0];
end
// sign extension for words
always_comb begin : sign_extend_word
case (load_data_q.address_offset)
default: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[31]}}, data_rdata_i[31:0]} : {32'h0, data_rdata_i[31:0]};
3'b001: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[39]}}, data_rdata_i[39:8]} : {32'h0, data_rdata_i[39:8]};
3'b010: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[47]}}, data_rdata_i[47:16]} : {32'h0, data_rdata_i[47:16]};
3'b011: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[55]}}, data_rdata_i[55:24]} : {32'h0, data_rdata_i[55:24]};
3'b100: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[63]}}, data_rdata_i[63:32]} : {32'h0, data_rdata_i[63:32]};
endcase
end
// nan-boxing single floats
always_comb begin : nan_box_single_float
case (load_data_q.address_offset)
default: rdata_fw_box = {{32{1'b1}}, data_rdata_i[31:0]};
3'b001: rdata_fw_box = {{32{1'b1}}, data_rdata_i[39:8]};
3'b010: rdata_fw_box = {{32{1'b1}}, data_rdata_i[47:16]};
3'b011: rdata_fw_box = {{32{1'b1}}, data_rdata_i[55:24]};
3'b100: rdata_fw_box = {{32{1'b1}}, data_rdata_i[63:32]};
endcase
end
// sign extension for half words
always_comb begin : sign_extend_half_word
case (load_data_q.address_offset)
default: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[15]}}, data_rdata_i[15:0]} : {48'h0, data_rdata_i[15:0]};
3'b001: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[23]}}, data_rdata_i[23:8]} : {48'h0, data_rdata_i[23:8]};
3'b010: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[31]}}, data_rdata_i[31:16]} : {48'h0, data_rdata_i[31:16]};
3'b011: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[39]}}, data_rdata_i[39:24]} : {48'h0, data_rdata_i[39:24]};
3'b100: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[47]}}, data_rdata_i[47:32]} : {48'h0, data_rdata_i[47:32]};
3'b101: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[55]}}, data_rdata_i[55:40]} : {48'h0, data_rdata_i[55:40]};
3'b110: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[63]}}, data_rdata_i[63:48]} : {48'h0, data_rdata_i[63:48]};
endcase
end
// nan-boxing half floats
always_comb begin : nan_box_half_float
case (load_data_q.address_offset)
default: rdata_fh_box = {{48{1'b1}}, data_rdata_i[15:0]};
3'b001: rdata_fh_box = {{48{1'b1}}, data_rdata_i[23:8]};
3'b010: rdata_fh_box = {{48{1'b1}}, data_rdata_i[31:16]};
3'b011: rdata_fh_box = {{48{1'b1}}, data_rdata_i[39:24]};
3'b100: rdata_fh_box = {{48{1'b1}}, data_rdata_i[47:32]};
3'b101: rdata_fh_box = {{48{1'b1}}, data_rdata_i[55:40]};
3'b110: rdata_fh_box = {{48{1'b1}}, data_rdata_i[63:48]};
endcase
end
// sign extend byte
always_comb begin : sign_extend_byte
case (load_data_q.address_offset)
default: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[7]}}, data_rdata_i[7:0]} : {56'h0, data_rdata_i[7:0]};
3'b001: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[15]}}, data_rdata_i[15:8]} : {56'h0, data_rdata_i[15:8]};
3'b010: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[23]}}, data_rdata_i[23:16]} : {56'h0, data_rdata_i[23:16]};
3'b011: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[31]}}, data_rdata_i[31:24]} : {56'h0, data_rdata_i[31:24]};
3'b100: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[39]}}, data_rdata_i[39:32]} : {56'h0, data_rdata_i[39:32]};
3'b101: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[47]}}, data_rdata_i[47:40]} : {56'h0, data_rdata_i[47:40]};
3'b110: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[55]}}, data_rdata_i[55:48]} : {56'h0, data_rdata_i[55:48]};
3'b111: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[63]}}, data_rdata_i[63:56]} : {56'h0, data_rdata_i[63:56]};
endcase
end
// nan-boxing quarter floats
always_comb begin : nan_box_quarter_float
case (load_data_q.address_offset)
default: rdata_fb_box = {{56{1'b1}}, data_rdata_i[7:0]};
3'b001: rdata_fb_box = {{56{1'b1}}, data_rdata_i[15:8]};
3'b010: rdata_fb_box = {{56{1'b1}}, data_rdata_i[23:16]};
3'b011: rdata_fb_box = {{56{1'b1}}, data_rdata_i[31:24]};
3'b100: rdata_fb_box = {{56{1'b1}}, data_rdata_i[39:32]};
3'b101: rdata_fb_box = {{56{1'b1}}, data_rdata_i[47:40]};
3'b110: rdata_fb_box = {{56{1'b1}}, data_rdata_i[55:48]};
3'b111: rdata_fb_box = {{56{1'b1}}, data_rdata_i[63:56]};
endcase
end
// Result Mux
/* // result mux (leaner code, but more logic stages.
// can be used instead of the code below (in between //result mux fast) if timing is not so critical)
always_comb begin
case (load_data_q.operator)
LW, LWU: result_o = rdata_w_ext;
FLW: result_o = rdata_fw_box;
LH, LHU: result_o = rdata_h_ext;
FLH: result_o = rdata_fh_box;
LB, LBU: result_o = rdata_b_ext;
FLB: result_o = rdata_fb_box;
default: result_o = rdata_d_ext;
unique case (load_data_q.operator)
LWU: result_o = shifted_data[31:0];
LHU: result_o = shifted_data[15:0];
LBU: result_o = shifted_data[7:0];
LW: result_o = 64'(signed'(shifted_data[31:0]));
LH: result_o = 64'(signed'(shifted_data[15:0]));
LB: result_o = 64'(signed'(shifted_data[ 7:0]));
default: result_o = shifted_data;
endcase
end */
// result mux fast
logic [7:0] sign_bits;
logic [2:0] idx_d, idx_q;
logic sign_bit, signed_d, signed_q, fp_sign_d, fp_sign_q;
// prepare these signals for faster selection in the next cycle
assign signed_d = load_data_q.operator inside { LW, LH, LB };
assign fp_sign_d = load_data_q.operator inside { FLW, FLH, FLB };
assign idx_d = (load_data_d.operator inside {LW, FLW}) ? load_data_d.address_offset + 3 :
(load_data_d.operator inside {LH, FLH}) ? load_data_d.address_offset + 1 :
load_data_d.address_offset;
assign sign_bits = { req_port_i.data_rdata[63],
req_port_i.data_rdata[55],
req_port_i.data_rdata[47],
req_port_i.data_rdata[39],
req_port_i.data_rdata[31],
req_port_i.data_rdata[23],
req_port_i.data_rdata[15],
req_port_i.data_rdata[7] };
// select correct sign bit in parallel to result shifter above
// pull to 0 if unsigned
assign sign_bit = signed_q & sign_bits[idx_q] | fp_sign_q;
// result mux
always_comb begin
unique case (load_data_q.operator)
LW, LWU: result_o = {{32{sign_bit}}, shifted_data[31:0]};
LH, LHU: result_o = {{48{sign_bit}}, shifted_data[15:0]};
LB, LBU: result_o = {{56{sign_bit}}, shifted_data[7:0]};
default: result_o = shifted_data;
endcase
end
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(~rst_ni) begin
idx_q <= 0;
signed_q <= 0;
fp_sign_q <= 0;
end else begin
idx_q <= idx_d;
signed_q <= signed_d;
fp_sign_q <= fp_sign_d;
end
end
// end result mux fast
`ifndef SYNTHESIS
`ifndef VERILATOR
// check invalid offsets
assert property (@(posedge clk_i) disable iff (~rst_ni)
(load_data_q.operator inside {LW, LWU}) |-> load_data_q.address_offset < 5) else $fatal ("invalid address offset used with {LW, LWU}");
assert property (@(posedge clk_i) disable iff (~rst_ni)
(load_data_q.operator inside {LH, LHU}) |-> load_data_q.address_offset < 7) else $fatal ("invalid address offset used with {LH, LHU}");
assert property (@(posedge clk_i) disable iff (~rst_ni)
(load_data_q.operator inside {LB, LBU}) |-> load_data_q.address_offset < 8) else $fatal ("invalid address offset used with {LB, LBU}");
`endif
`endif
endmodule

View file

@ -15,10 +15,7 @@
import ariane_pkg::*;
module lsu #(
parameter int unsigned ASID_WIDTH = 1,
parameter logic [63:0] CACHE_START_ADDR = 64'h4000_0000,
parameter int unsigned AXI_ID_WIDTH = 10,
parameter int unsigned AXI_USER_WIDTH = 1
parameter int unsigned ASID_WIDTH = 1
)(
input logic clk_i,
input logic rst_ni,
@ -42,11 +39,9 @@ module lsu #(
input logic enable_translation_i, // enable virtual memory translation
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic fetch_req_i, // Instruction fetch interface
input logic [63:0] fetch_vaddr_i, // Instruction fetch interface
output logic fetch_valid_o, // Instruction fetch interface
output logic [63:0] fetch_paddr_o, // Instruction fetch interface
output exception_t fetch_exception_o, // Instruction fetch interface
// icache translation requests
input icache_areq_o_t icache_areq_i,
output icache_areq_i_t icache_areq_o,
input riscv::priv_lvl_t priv_lvl_i, // From CSR register file
input riscv::priv_lvl_t ld_st_priv_lvl_i, // From CSR register file
@ -58,14 +53,10 @@ module lsu #(
// Performance counters
output logic itlb_miss_o,
output logic dtlb_miss_o,
output logic dcache_miss_o,
input logic dcache_en_i,
input logic flush_dcache_i,
output logic flush_dcache_ack_o,
// Data cache refill port
AXI_BUS.Master data_if,
AXI_BUS.Master bypass_if,
// interface to dcache
input dcache_req_o_t [2:0] dcache_req_ports_i,
output dcache_req_i_t [2:0] dcache_req_ports_o,
output exception_t lsu_exception_o // to WB, signal exception status LD/ST exception
@ -139,74 +130,29 @@ module lsu #(
assign amo_op_i[0] = AMO_NONE;
assign amo_op_i[2] = AMO_NONE;
// decreasing priority
// Port 0: PTW
// Port 1: Load Unit
// Port 2: Store Unit
nbdcache #(
.CACHE_START_ADDR ( CACHE_START_ADDR ),
.AXI_ID_WIDTH ( AXI_ID_WIDTH ),
.AXI_USER_WIDTH ( AXI_USER_WIDTH )
) i_nbdcache (
// to D$
.data_if ( data_if ),
.bypass_if ( bypass_if ),
.enable_i ( dcache_en_i ),
.flush_i ( flush_dcache_i ),
.flush_ack_o ( flush_dcache_ack_o ),
// from PTW, Load Unit and Store Unit
.address_index_i ( address_index_i ),
.address_tag_i ( address_tag_i ),
.data_wdata_i ( data_wdata_i ),
.data_req_i ( data_req_i ),
.data_we_i ( data_we_i ),
.data_be_i ( data_be_i ),
.data_size_i ( data_size_i ),
.kill_req_i ( kill_req_i ),
.tag_valid_i ( tag_valid_i ),
.data_gnt_o ( data_gnt_o ),
.data_rvalid_o ( data_rvalid_o ),
.data_rdata_o ( data_rdata_o ),
.amo_op_i ( amo_op_i ),
.amo_commit_i ( ),
.amo_valid_o ( ),
.amo_result_o ( ),
.amo_flush_i ( 1'b0 ),
.miss_o ( dcache_miss_o ),
.*
);
// -------------------
// MMU e.g.: TLBs/PTW
// -------------------
mmu #(
.INSTR_TLB_ENTRIES ( 16 ),
.DATA_TLB_ENTRIES ( 16 ),
.ASID_WIDTH ( ASID_WIDTH )
.INSTR_TLB_ENTRIES ( 16 ),
.DATA_TLB_ENTRIES ( 16 ),
.ASID_WIDTH ( ASID_WIDTH )
) i_mmu (
// misaligned bypass
.misaligned_ex_i ( misaligned_exception ),
.lsu_is_store_i ( st_translation_req ),
.lsu_req_i ( translation_req ),
.lsu_vaddr_i ( mmu_vaddr ),
.lsu_valid_o ( translation_valid ),
.lsu_paddr_o ( mmu_paddr ),
.lsu_exception_o ( mmu_exception ),
.lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request
.misaligned_ex_i ( misaligned_exception ),
.lsu_is_store_i ( st_translation_req ),
.lsu_req_i ( translation_req ),
.lsu_vaddr_i ( mmu_vaddr ),
.lsu_valid_o ( translation_valid ),
.lsu_paddr_o ( mmu_paddr ),
.lsu_exception_o ( mmu_exception ),
.lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request
// connecting PTW to D$ IF (aka mem arbiter
.address_index_o ( address_index_i [0] ),
.address_tag_o ( address_tag_i [0] ),
.data_wdata_o ( data_wdata_i [0] ),
.data_req_o ( data_req_i [0] ),
.data_we_o ( data_we_i [0] ),
.data_be_o ( data_be_i [0] ),
.data_size_o ( data_size_i [0] ),
.kill_req_o ( kill_req_i [0] ),
.tag_valid_o ( tag_valid_i [0] ),
.data_gnt_i ( data_gnt_o [0] ),
.data_rvalid_i ( data_rvalid_o [0] ),
.data_rdata_i ( data_rdata_o [0] ),
.req_port_i ( dcache_req_ports_i [0] ),
.req_port_o ( dcache_req_ports_o [0] ),
// icache address translation requests
.icache_areq_i ( icache_areq_i ),
.icache_areq_o ( icache_areq_o ),
.*
);
// ------------------
@ -231,17 +177,8 @@ module lsu #(
.page_offset_i ( page_offset ),
.page_offset_matches_o ( page_offset_matches ),
// to memory arbiter
.address_index_o ( address_index_i [2] ),
.address_tag_o ( address_tag_i [2] ),
.data_wdata_o ( data_wdata_i [2] ),
.data_req_o ( data_req_i [2] ),
.data_we_o ( data_we_i [2] ),
.data_be_o ( data_be_i [2] ),
.data_size_o ( data_size_i [2] ),
.kill_req_o ( kill_req_i [2] ),
.tag_valid_o ( tag_valid_i [2] ),
.data_gnt_i ( data_gnt_o [2] ),
.data_rvalid_i ( data_rvalid_o [2] ),
.req_port_i ( dcache_req_ports_i [2] ),
.req_port_o ( dcache_req_ports_o [2] ),
.*
);
@ -267,19 +204,8 @@ module lsu #(
.page_offset_o ( page_offset ),
.page_offset_matches_i ( page_offset_matches ),
// to memory arbiter
.address_index_o ( address_index_i [1] ),
.address_tag_o ( address_tag_i [1] ),
.data_wdata_o ( data_wdata_i [1] ),
.amo_op_o ( amo_op_i [1] ),
.data_req_o ( data_req_i [1] ),
.data_we_o ( data_we_i [1] ),
.data_be_o ( data_be_i [1] ),
.data_size_o ( data_size_i [1] ),
.kill_req_o ( kill_req_i [1] ),
.tag_valid_o ( tag_valid_i [1] ),
.data_gnt_i ( data_gnt_o [1] ),
.data_rvalid_i ( data_rvalid_o [1] ),
.data_rdata_i ( data_rdata_o [1] ),
.req_port_i ( dcache_req_ports_i [1] ),
.req_port_o ( dcache_req_ports_o [1] ),
.*
);

View file

@ -8,8 +8,9 @@
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 22.05.2017
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
// Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: Arbitrates the LSU result port
import ariane_pkg::*;
@ -34,95 +35,99 @@ module lsu_arbiter (
output logic [63:0] result_o,
output exception_t ex_o
);
// this is a dual input FIFO which takes results from the load and store
// paths of the LSU and sequentializes through the FIFO construct. If there is a valid output
// it unconditionally posts the result on its output ports and expects it to be consumed.
// 4 entries is enough to unconditionally post loads and stores since we can only have two outstanding loads
localparam int WIDTH = 4;
// the two fifos are used to buffer results from ld and st paths, and arbits between these results in
// RR fashion. FIFOs need to be 2 deep in order to unconditionally accept loads and stores since we can
// have a maximum of 2 outstanding loads.
// if there are valid elements in the fifos, the unit posts the result on its output ports and expects it
// to be consumed unconditionally
// queue pointer
logic [$clog2(WIDTH)-1:0] read_pointer_n, read_pointer_q;
logic [$clog2(WIDTH)-1:0] write_pointer_n, write_pointer_q;
logic [$clog2(WIDTH)-1:0] status_cnt_n, status_cnt_q;
localparam int DEPTH = 2;
struct packed {
typedef struct packed {
logic [TRANS_ID_BITS-1:0] trans_id;
logic [63:0] result;
exception_t ex;
} mem_n[WIDTH-1:0], mem_q[WIDTH-1:0];
} fifo_t;
// output last element of queue
assign trans_id_o = mem_q[read_pointer_q].trans_id;
assign result_o = mem_q[read_pointer_q].result;
assign ex_o = mem_q[read_pointer_q].ex;
fifo_t st_in, st_out, ld_in, ld_out;
// if we are not empty we have a valid output
assign valid_o = (status_cnt_q != '0);
// -------------------
// Read-Write Process
// -------------------
always_comb begin : read_write_fifo
automatic logic [$clog2(WIDTH)-1:0] status_cnt;
automatic logic [$clog2(WIDTH)-1:0] write_pointer;
logic ld_full, ld_empty, ld_ren;
logic st_full, st_empty, st_ren;
logic idx;
status_cnt = status_cnt_q;
write_pointer = write_pointer_q;
assign st_in.trans_id = st_trans_id_i;
assign st_in.result = st_result_i;
assign st_in.ex = st_ex_i;
// default assignments
mem_n = mem_q;
read_pointer_n = read_pointer_q;
// ------------
// Write Port
// ------------
// write port 1 - load unit
if (ld_valid_i) begin
mem_n[write_pointer] = {ld_trans_id_i, ld_result_i, ld_ex_i};
write_pointer++;
status_cnt++;
end
// write port 2 - store unit
if (st_valid_i) begin
mem_n[write_pointer] = {st_trans_id_i, st_result_i, st_ex_i};
write_pointer++;
status_cnt++;
end
// ------------
// Read Port
// ------------
// if the last element in the queue was valid we can push it out and make space for a new element
if (valid_o) begin
read_pointer_n = read_pointer_q + 1;
status_cnt--;
end
assign ld_in.trans_id = ld_trans_id_i;
assign ld_in.result = ld_result_i;
assign ld_in.ex = ld_ex_i;
// update status count
status_cnt_n = status_cnt;
// update write pointer
write_pointer_n = write_pointer;
assign trans_id_o = (idx) ? st_out.trans_id : ld_out.trans_id;
assign result_o = (idx) ? st_out.result : ld_out.result;
assign ex_o = (idx) ? st_out.ex : ld_out.ex;
// round robin with "lookahead" for 2 requesters
rrarbiter #(
.NUM_REQ ( 2 )
) i_rrarbiter (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.en_i ( 1'b1 ),
.req_i ( {~st_empty, ~ld_empty} ),
.ack_o ( { st_ren, ld_ren } ),
.vld_o ( valid_o ),
.idx_o ( idx )
);
fifo_v2 #(
.dtype ( fifo_t ),
.DEPTH ( DEPTH )
) i_ld_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.testmode_i ( 1'b0 ),
.full_o ( ld_full ),
.empty_o ( ld_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( ld_in ),
.push_i ( ld_valid_i ),
.data_o ( ld_out ),
.pop_i ( ld_ren )
);
fifo_v2 #(
.dtype ( fifo_t ),
.DEPTH ( DEPTH )
) i_st_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.testmode_i ( 1'b0 ),
.full_o ( st_full ),
.empty_o ( st_empty ),
.alm_full_o ( ),
.alm_empty_o ( ),
.data_i ( st_in ),
.push_i ( st_valid_i ),
.data_o ( st_out ),
.pop_i ( st_ren )
);
`ifndef SYNTHESIS
`ifndef VERILATOR
// check fifo control signals
assert property (@(posedge clk_i) disable iff (~rst_ni) ld_full |-> !ld_valid_i) else $fatal ("cannot write full ld_fifo");
assert property (@(posedge clk_i) disable iff (~rst_ni) st_full |-> !st_valid_i) else $fatal ("cannot write full st_fifo");
assert property (@(posedge clk_i) disable iff (~rst_ni) ld_empty |-> !ld_ren) else $fatal ("cannot read empty ld_fifo");
assert property (@(posedge clk_i) disable iff (~rst_ni) st_empty |-> !st_ren) else $fatal ("cannot read empty st_fifo");
`endif
`endif
// ------------
// Flush
// ------------
if (flush_i) begin
status_cnt_n = '0;
write_pointer_n = '0;
read_pointer_n = '0;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mem_q <= '{default: 0};
read_pointer_q <= '0;
write_pointer_q <= '0;
status_cnt_q <= '0;
end else begin
mem_q <= mem_n;
read_pointer_q <= read_pointer_n;
write_pointer_q <= write_pointer_n;
status_cnt_q <= status_cnt_n;
end
end
endmodule

View file

@ -26,14 +26,9 @@ module mmu #(
input logic flush_i,
input logic enable_translation_i,
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
// IF interface
input logic fetch_req_i,
input logic [63:0] fetch_vaddr_i,
output logic fetch_valid_o, // translation is valid
output logic [63:0] fetch_paddr_o,
output exception_t fetch_exception_o, // write-back fetch exceptions (e.g.: bus faults, page faults, etc.)
input icache_areq_o_t icache_areq_i,
output icache_areq_i_t icache_areq_o,
// LSU interface
// this is a more minimalistic interface because the actual addressing logic is handled
// in the LSU as we distinguish load and stores, what we do here is simple address translation
@ -61,18 +56,8 @@ module mmu #(
output logic itlb_miss_o,
output logic dtlb_miss_o,
// PTW memory interface
output logic [11:0] address_index_o,
output logic [43:0] address_tag_o,
output logic [63:0] data_wdata_o,
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic [1:0] data_size_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
input logic data_rvalid_i,
input logic [63:0] data_rdata_i
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o
);
logic iaccess_err; // insufficient privilege to access this instruction page
@ -80,29 +65,28 @@ module mmu #(
logic ptw_active; // PTW is currently walking a page table
logic walking_instr; // PTW is walking because of an ITLB miss
logic ptw_error; // PTW threw an exception
logic [63:0] faulting_address;
logic [38:0] update_vaddr;
tlb_update_t update_ptw_itlb, update_ptw_dtlb;
logic itlb_update;
logic itlb_lu_access;
riscv::pte_t itlb_content;
logic itlb_is_2M;
logic itlb_is_1G;
logic itlb_lu_hit;
logic dtlb_update;
logic dtlb_lu_access;
riscv::pte_t dtlb_content;
logic dtlb_is_2M;
logic dtlb_is_1G;
logic dtlb_lu_hit;
// Assignments
assign itlb_lu_access = fetch_req_i;
assign itlb_lu_access = icache_areq_i.fetch_req;
assign dtlb_lu_access = lsu_req_i;
tlb #(
.TLB_ENTRIES ( INSTR_TLB_ENTRIES ),
.ASID_WIDTH ( ASID_WIDTH )
@ -115,7 +99,7 @@ module mmu #(
.lu_access_i ( itlb_lu_access ),
.lu_asid_i ( asid_i ),
.lu_vaddr_i ( fetch_vaddr_i ),
.lu_vaddr_i ( icache_areq_i.fetch_vaddr ),
.lu_content_o ( itlb_content ),
.lu_is_2M_o ( itlb_is_2M ),
@ -152,7 +136,6 @@ module mmu #(
.ptw_active_o ( ptw_active ),
.walking_instr_o ( walking_instr ),
.ptw_error_o ( ptw_error ),
.faulting_address_o ( faulting_address ),
.enable_translation_i ( enable_translation_i ),
.update_vaddr_o ( update_vaddr ),
@ -161,11 +144,15 @@ module mmu #(
.itlb_access_i ( itlb_lu_access ),
.itlb_hit_i ( itlb_lu_hit ),
.itlb_vaddr_i ( fetch_vaddr_i ),
.itlb_vaddr_i ( icache_areq_i.fetch_vaddr ),
.dtlb_access_i ( dtlb_lu_access ),
.dtlb_hit_i ( dtlb_lu_hit ),
.dtlb_vaddr_i ( lsu_vaddr_i ),
.req_port_i ( req_port_i ),
.req_port_o ( req_port_o ),
.*
);
@ -175,36 +162,36 @@ module mmu #(
// The instruction interface is a simple request response interface
always_comb begin : instr_interface
// MMU disabled: just pass through
fetch_valid_o = fetch_req_i;
fetch_paddr_o = fetch_vaddr_i; // play through in case we disabled address translation
icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr; // play through in case we disabled address translation
// two potential exception sources:
// 1. HPTW threw an exception -> signal with a page fault exception
// 2. We got an access error because of insufficient permissions -> throw an access exception
fetch_exception_o = '0;
icache_areq_o.fetch_exception = '0;
// Check whether we are allowed to access this memory region from a fetch perspective
iaccess_err = fetch_req_i && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u)
|| ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u));
iaccess_err = icache_areq_i.fetch_req && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u)
|| ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u));
// check that the upper-most bits (63-39) are the same, otherwise throw a page fault exception...
if (fetch_req_i && !((&fetch_vaddr_i[63:39]) == 1'b1 || (|fetch_vaddr_i[63:39]) == 1'b0)) begin
fetch_exception_o = {riscv::INSTR_PAGE_FAULT, fetch_vaddr_i, 1'b1};
if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[63:39]) == 1'b1 || (|icache_areq_i.fetch_vaddr[63:39]) == 1'b0)) begin
icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, icache_areq_i.fetch_vaddr, 1'b1};
end
// MMU enabled: address from TLB, request delayed until hit. Error when TLB
// hit and no access right or TLB hit and translated address not valid (e.g.
// AXI decode error), or when PTW performs walk due to ITLB miss and raises
// an error.
if (enable_translation_i) begin
fetch_valid_o = 1'b0;
icache_areq_o.fetch_valid = 1'b0;
// 4K page
fetch_paddr_o = {itlb_content.ppn, fetch_vaddr_i[11:0]};
icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]};
// Mega page
if (itlb_is_2M) begin
fetch_paddr_o[20:12] = fetch_vaddr_i[20:12];
icache_areq_o.fetch_paddr[20:12] = icache_areq_i.fetch_vaddr[20:12];
end
// Giga page
if (itlb_is_1G) begin
fetch_paddr_o[29:12] = fetch_vaddr_i[29:12];
icache_areq_o.fetch_paddr[29:12] = icache_areq_i.fetch_vaddr[29:12];
end
// ---------
@ -212,11 +199,11 @@ module mmu #(
// --------
// if we hit the ITLB output the request signal immediately
if (itlb_lu_hit) begin
fetch_valid_o = fetch_req_i;
icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
// we got an access error
if (iaccess_err) begin
// throw a page fault
fetch_exception_o = {riscv::INSTR_PAGE_FAULT, fetch_vaddr_i, 1'b1};
icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, icache_areq_i.fetch_vaddr, 1'b1};
end
end else
// ---------
@ -224,8 +211,8 @@ module mmu #(
// ---------
// watch out for exceptions happening during walking the page table
if (ptw_active && walking_instr) begin
fetch_valid_o = ptw_error;
fetch_exception_o = {riscv::INSTR_PAGE_FAULT, {25'b0, update_vaddr}, 1'b1};
icache_areq_o.fetch_valid = ptw_error;
icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, {25'b0, update_vaddr}, 1'b1};
end
end
end

View file

@ -69,9 +69,9 @@ module mult (
// ---------------------
// Division
// ---------------------
logic [5:0] ff1_result; // holds the index of the last '1' (as the input operand is reversed)
logic ff1_no_one; // no one was found by find first one
logic [63:0] ff1_input; // input to find first one
logic [5:0] lzc_result; // holds the index of the last '1' (as the input operand is reversed)
logic lzc_no_one; // no one was found by find first one
logic [63:0] lzc_input; // input to find first one
logic [63:0] operand_b_rev, operand_b_rev_neg, operand_b_shift; // couple of different representations for the dividend
logic [6:0] div_shift; // amount of which to shift to left
logic div_signed; // should this operation be performed as a signed or unsigned division
@ -95,7 +95,7 @@ module mult (
endgenerate
// negated reverse input operand, used for signed divisions
assign operand_b_rev_neg = ~operand_b_rev;
assign ff1_input = (div_op_signed) ? operand_b_rev_neg : operand_b_rev;
assign lzc_input = (div_op_signed) ? operand_b_rev_neg : operand_b_rev;
// prepare the input operands and control divider
always_comb begin
@ -139,19 +139,19 @@ module mult (
end
// ---------------------
// Find First one
// Leading Zero Counter
// ---------------------
// this unit is used to speed up the sequential division by shifting the dividend first
find_first_one #(
.WIDTH ( 64 )
) i_ff1 (
.in_i ( ff1_input ), // signed = operand_b_rev_neg, unsigned operand_b_rev
.first_one_o ( ff1_result ),
.no_ones_o ( ff1_no_one )
lzc #(
.WIDTH ( 64 )
) i_lzc (
.in_i ( lzc_input ), // signed = operand_b_rev_neg, unsigned operand_b_rev
.cnt_o ( lzc_result ),
.empty_o ( lzc_no_one )
);
// if the dividend is all zero go for the full length
assign div_shift = ff1_no_one ? 7'd64 : ff1_result;
assign div_shift = lzc_no_one ? 7'd64 : lzc_result;
// prepare dividend by shifting
assign operand_b_shift = operand_b <<< div_shift;

View file

@ -27,24 +27,15 @@ module ptw #(
output logic ptw_active_o,
output logic walking_instr_o, // set when walking for TLB
output logic ptw_error_o, // set when an error occurred
output logic [63:0] faulting_address_o, // the address which threw the page-fault exception
input logic enable_translation_i, // CSRs indicate to enable SV39
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic lsu_is_store_i, // this translation was triggered by a store
// PTW Memory Port
output logic [11:0] address_index_o,
output logic [43:0] address_tag_o,
output logic [63:0] data_wdata_o,
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic [1:0] data_size_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
input logic data_rvalid_i,
input logic [63:0] data_rdata_i,
// PTW memory interface
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o,
// to TLBs, update logic
output tlb_update_t itlb_update_o,
output tlb_update_t dtlb_update_o,
@ -69,6 +60,9 @@ module ptw #(
output logic dtlb_miss_o
);
assign req_port_o.amo_op = AMO_NONE;
// input registers
logic data_rvalid_q;
logic [63:0] data_rdata_q;
@ -82,7 +76,7 @@ module ptw #(
PTE_LOOKUP,
WAIT_RVALID,
PROPAGATE_ERROR
} CS, NS;
} state_q, state_d;
// SV39 defines three levels of page tables
enum logic [1:0] {
@ -104,15 +98,15 @@ module ptw #(
// Assignments
assign update_vaddr_o = vaddr_q;
assign ptw_active_o = (CS != IDLE);
assign ptw_active_o = (state_q != IDLE);
assign walking_instr_o = is_instr_ptw_q;
// directly output the correct physical address
assign address_index_o = ptw_pptr_q[11:0];
assign address_tag_o = ptw_pptr_q[55:12];
assign req_port_o.address_index = ptw_pptr_q[11:0];
assign req_port_o.address_tag = ptw_pptr_q[55:12];
// we are never going to kill this request
assign kill_req_o = '0;
assign req_port_o.kill_req = '0;
// we are never going to write with the HPTW
assign data_wdata_o = 64'b0;
assign req_port_o.data_wdata = 64'b0;
// -----------
// TLB Update
// -----------
@ -130,7 +124,7 @@ module ptw #(
assign itlb_update_o.content = pte | (global_mapping_q << 5);
assign dtlb_update_o.content = pte | (global_mapping_q << 5);
assign tag_valid_o = tag_valid_q;
assign req_port_o.tag_valid = tag_valid_q;
//-------------------
// Page table walker
@ -158,28 +152,27 @@ module ptw #(
always_comb begin : ptw
// default assignments
// PTW memory interface
tag_valid_n = 1'b0;
data_req_o = 1'b0;
data_be_o = 8'hFF;
data_size_o = 2'b11;
data_we_o = 1'b0;
ptw_error_o = 1'b0;
itlb_update_o.valid = 1'b0;
dtlb_update_o.valid = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
NS = CS;
global_mapping_n = global_mapping_q;
// input registers
tlb_update_asid_n = tlb_update_asid_q;
vaddr_n = vaddr_q;
faulting_address_o = '0;
tag_valid_n = 1'b0;
req_port_o.data_req = 1'b0;
req_port_o.data_be = 8'hFF;
req_port_o.data_size = 2'b11;
req_port_o.data_we = 1'b0;
ptw_error_o = 1'b0;
itlb_update_o.valid = 1'b0;
dtlb_update_o.valid = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
state_d = state_q;
global_mapping_n = global_mapping_q;
// input registers
tlb_update_asid_n = tlb_update_asid_q;
vaddr_n = vaddr_q;
itlb_miss_o = 1'b0;
dtlb_miss_o = 1'b0;
itlb_miss_o = 1'b0;
dtlb_miss_o = 1'b0;
case (CS)
case (state_q)
IDLE: begin
// by default we start with the top-most page table
@ -192,26 +185,26 @@ module ptw #(
is_instr_ptw_n = 1'b1;
tlb_update_asid_n = asid_i;
vaddr_n = itlb_vaddr_i;
NS = WAIT_GRANT;
state_d = WAIT_GRANT;
itlb_miss_o = 1'b1;
// we got an DTLB miss
end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin
ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[38:30], 3'b0};
tlb_update_asid_n = asid_i;
vaddr_n = dtlb_vaddr_i;
NS = WAIT_GRANT;
state_d = WAIT_GRANT;
dtlb_miss_o = 1'b1;
end
end
WAIT_GRANT: begin
// send a request out
data_req_o = 1'b1;
req_port_o.data_req = 1'b1;
// wait for the WAIT_GRANT
if (data_gnt_i) begin
if (req_port_i.data_gnt) begin
// send the tag valid signal one cycle later
tag_valid_n = 1'b1;
NS = PTE_LOOKUP;
state_d = PTE_LOOKUP;
end
end
@ -228,12 +221,12 @@ module ptw #(
// -------------
// If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception.
if (!pte.v || (!pte.r && pte.w))
NS = PROPAGATE_ERROR;
state_d = PROPAGATE_ERROR;
// -----------
// Valid PTE
// -----------
else begin
NS = IDLE;
state_d = IDLE;
// it is a valid PTE
// if pte.r = 1 or pte.x = 1 it is a valid PTE
if (pte.r || pte.x) begin
@ -246,7 +239,7 @@ module ptw #(
// doesn't put a useless entry into the TLB. The same idea applies
// to the access flag since we let the access flag be managed by SW.
if (!pte.x || !pte.a)
NS = PROPAGATE_ERROR;
state_d = PROPAGATE_ERROR;
else
itlb_update_o.valid = 1'b1;
@ -262,25 +255,25 @@ module ptw #(
if (pte.a && (pte.r || (pte.x && mxr_i))) begin
dtlb_update_o.valid = 1'b1;
end else begin
NS = PROPAGATE_ERROR;
state_d = PROPAGATE_ERROR;
end
// Request is a store: perform some additional checks
// If the request was a store and the page is not write-able, raise an error
// the same applies if the dirty flag is not set
if (lsu_is_store_i && (!pte.w || !pte.d)) begin
dtlb_update_o.valid = 1'b0;
NS = PROPAGATE_ERROR;
state_d = PROPAGATE_ERROR;
end
end
// check if the ppn is correctly aligned:
// 6. If i > 0 and pa.ppn[i 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault
// exception.
if (ptw_lvl_q == LVL1 && pte.ppn[17:0] != '0) begin
NS = PROPAGATE_ERROR;
state_d = PROPAGATE_ERROR;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end else if (ptw_lvl_q == LVL2 && pte.ppn[8:0] != '0) begin
NS = PROPAGATE_ERROR;
state_d = PROPAGATE_ERROR;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end
@ -299,12 +292,12 @@ module ptw #(
ptw_pptr_n = {pte.ppn, vaddr_q[20:12], 3'b0};
end
NS = WAIT_GRANT;
state_d = WAIT_GRANT;
if (ptw_lvl_q == LVL3) begin
// Should already be the last level page table => Error
ptw_lvl_n = LVL3;
NS = PROPAGATE_ERROR;
state_d = PROPAGATE_ERROR;
end
end
end
@ -313,14 +306,16 @@ module ptw #(
end
// Propagate error to MMU/LSU
PROPAGATE_ERROR: begin
NS = IDLE;
ptw_error_o = 1'b1;
faulting_address_o = vaddr_q;
state_d = IDLE;
ptw_error_o = 1'b1;
end
// wait for the rvalid before going back to IDLE
WAIT_RVALID: begin
if (data_rvalid_q)
NS = IDLE;
state_d = IDLE;
end
default: begin
state_d = IDLE;
end
endcase
@ -333,28 +328,28 @@ module ptw #(
// 1. in the PTE Lookup check whether we still need to wait for an rvalid
// 2. waiting for a grant, if so: wait for it
// if not, go back to idle
if ((CS == PTE_LOOKUP && !data_rvalid_q) || ((CS == WAIT_GRANT) && data_gnt_i))
NS = WAIT_RVALID;
if ((state_q == PTE_LOOKUP && !data_rvalid_q) || ((state_q == WAIT_GRANT) && req_port_i.data_gnt))
state_d = WAIT_RVALID;
else
NS = IDLE;
state_d = IDLE;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
CS <= IDLE;
state_q <= IDLE;
is_instr_ptw_q <= 1'b0;
ptw_lvl_q <= LVL1;
tag_valid_q <= 1'b0;
tlb_update_asid_q <= '{default: 0};
tlb_update_asid_q <= '0;
vaddr_q <= '0;
ptw_pptr_q <= '{default: 0};
ptw_pptr_q <= '0;
global_mapping_q <= 1'b0;
data_rdata_q <= '0;
data_rvalid_q <= 1'b0;
end else begin
CS <= NS;
state_q <= state_d;
ptw_pptr_q <= ptw_pptr_n;
is_instr_ptw_q <= is_instr_ptw_n;
ptw_lvl_q <= ptw_lvl_n;
@ -362,8 +357,8 @@ module ptw #(
tlb_update_asid_q <= tlb_update_asid_n;
vaddr_q <= vaddr_n;
global_mapping_q <= global_mapping_n;
data_rdata_q <= data_rdata_i;
data_rvalid_q <= data_rvalid_i;
data_rdata_q <= req_port_i.data_rdata;
data_rvalid_q <= req_port_i.data_rvalid;
end
end

View file

@ -13,6 +13,8 @@
// Description: Store queue persists store requests and pushes them to memory
// if they are no longer speculative
import ariane_pkg::*;
module store_buffer (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
@ -37,23 +39,15 @@ module store_buffer (
input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write)
// D$ interface
output logic [11:0] address_index_o,
output logic [43:0] address_tag_o,
output logic [63:0] data_wdata_o,
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic [1:0] data_size_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
input logic data_rvalid_i // not used
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o
);
// depth of store-buffers
localparam int unsigned DEPTH_SPEC = 4;
// allocate more space for the commit buffer to be on the save side
localparam int unsigned DEPTH_COMMIT = 4;
// the store queue has two parts:
// 1. Speculative queue
// 2. Commit queue which is non-speculative, e.g.: the store will definitely happen.
@ -77,6 +71,10 @@ module store_buffer (
logic [$clog2(DEPTH_COMMIT)-1:0] commit_read_pointer_n, commit_read_pointer_q;
logic [$clog2(DEPTH_COMMIT)-1:0] commit_write_pointer_n, commit_write_pointer_q;
assign req_port_o.amo_op = AMO_NONE;
// ----------------------------------------
// Speculative Queue - Core Interface
// ----------------------------------------
@ -132,17 +130,17 @@ module store_buffer (
// Commit Queue - Memory Interface
// ----------------------------------------
// those signals can directly be output to the memory
assign address_index_o = commit_queue_q[commit_read_pointer_q].address[11:0];
assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[11:0];
// if we got a new request we already saved the tag from the previous cycle
assign address_tag_o = commit_queue_q[commit_read_pointer_q].address[55:12];
assign tag_valid_o = 1'b0;
assign data_wdata_o = commit_queue_q[commit_read_pointer_q].data;
assign data_be_o = commit_queue_q[commit_read_pointer_q].be;
assign data_size_o = commit_queue_q[commit_read_pointer_q].data_size;
assign req_port_o.address_tag = commit_queue_q[commit_read_pointer_q].address[55:12];
assign req_port_o.tag_valid = 1'b0;
assign req_port_o.data_wdata = commit_queue_q[commit_read_pointer_q].data;
assign req_port_o.data_be = commit_queue_q[commit_read_pointer_q].be;
assign req_port_o.data_size = commit_queue_q[commit_read_pointer_q].data_size;
// we will never kill a request in the store buffer since we already know that the translation is valid
// e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault
assign kill_req_o = 1'b0;
assign data_we_o = 1'b1; // we will always write in the store queue
assign req_port_o.kill_req = 1'b0;
assign req_port_o.data_we = 1'b1; // we will always write in the store queue
always_comb begin : store_if
automatic logic [DEPTH_COMMIT:0] commit_status_cnt;
@ -157,13 +155,13 @@ module store_buffer (
commit_queue_n = commit_queue_q;
data_req_o = 1'b0;
req_port_o.data_req = 1'b0;
// there should be no commit when we are flushing
// if the entry in the commit queue is valid and not speculative anymore we can issue this instruction
if (commit_queue_q[commit_read_pointer_q].valid) begin
data_req_o = 1'b1;
if (data_gnt_i) begin
req_port_o.data_req = 1'b1;
if (req_port_i.data_gnt) begin
// we can evict it from the commit buffer
commit_queue_n[commit_read_pointer_q].valid = 1'b0;
// advance the read_pointer

View file

@ -41,17 +41,8 @@ module store_unit (
input logic [11:0] page_offset_i,
output logic page_offset_matches_o,
// D$ interface
output logic [11:0] address_index_o,
output logic [43:0] address_tag_o,
output logic [63:0] data_wdata_o,
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic [1:0] data_size_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
input logic data_rvalid_i
input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o
);
assign result_o = 64'b0;
@ -206,6 +197,10 @@ module store_unit (
.data_size_i ( st_data_size_q ),
// store buffer out
.ready_o ( st_ready ),
.req_port_i ( req_port_i ),
.req_port_o ( req_port_o ),
.*
);
// ---------------

View file

@ -1,46 +0,0 @@
// Copyright 2017, 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Date: 13.10.2017
// Description: SRAM Behavioral Model
module sram #(
int unsigned DATA_WIDTH = 64,
int unsigned NUM_WORDS = 1024
)(
input logic clk_i,
input logic req_i,
input logic we_i,
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
input logic [DATA_WIDTH-1:0] wdata_i,
input logic [DATA_WIDTH-1:0] be_i,
output logic [DATA_WIDTH-1:0] rdata_o
);
localparam ADDR_WIDTH = $clog2(NUM_WORDS);
logic [DATA_WIDTH-1:0] ram [NUM_WORDS-1:0];
logic [ADDR_WIDTH-1:0] raddr_q;
// 1. randomize array
// 2. randomize output when no request is active
always_ff @(posedge clk_i) begin
if (req_i) begin
if (!we_i)
raddr_q <= addr_i;
else
for (int i = 0; i < DATA_WIDTH; i++)
if (be_i[i]) ram[addr_i][i] <= wdata_i[i];
end
end
assign rdata_o = ram[raddr_q];
endmodule

View file

@ -23,7 +23,7 @@ module cluster_clock_gating (
`ifdef PULP_FPGA_EMUL
// no clock gates in FPGA flow
assign clk_o = clk_i;
`elseif verilator
`elsif verilator
assign clk_o = clk_i;
`else
logic clk_en;

View file

@ -1,254 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Igor Loi <igor.loi@unibo.it>
module generic_fifo
#(
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned DATA_DEPTH = 8
)
(
input logic clk,
input logic rst_n,
//PUSH SIDE
input logic [DATA_WIDTH-1:0] data_i,
input logic valid_i,
output logic grant_o,
//POP SIDE
output logic [DATA_WIDTH-1:0] data_o,
output logic valid_o,
input logic grant_i,
input logic test_mode_i
);
// Local Parameter
localparam int unsigned ADDR_DEPTH = $clog2(DATA_DEPTH);
enum logic [1:0] { EMPTY, FULL, MIDDLE } CS, NS;
// Internal Signals
logic gate_clock;
logic clk_gated;
logic [ADDR_DEPTH-1:0] Pop_Pointer_CS, Pop_Pointer_NS;
logic [ADDR_DEPTH-1:0] Push_Pointer_CS, Push_Pointer_NS;
logic [DATA_WIDTH-1:0] FIFO_REGISTERS[DATA_DEPTH-1:0];
int unsigned i;
// Parameter Check
// synopsys translate_off
initial
begin : parameter_check
integer param_err_flg;
param_err_flg = 0;
if (DATA_WIDTH < 1)
begin
param_err_flg = 1;
$display("ERROR: %m :\n Invalid value (%d) for parameter DATA_WIDTH (legal range: greater than 1)", DATA_WIDTH );
end
if (DATA_DEPTH < 1)
begin
param_err_flg = 1;
$display("ERROR: %m :\n Invalid value (%d) for parameter DATA_DEPTH (legal range: greater than 1)", DATA_DEPTH );
end
end
// synopsys translate_on
`ifndef PULP_FPGA_EMUL
cluster_clock_gating cg_cell
(
.clk_i ( clk ),
.en_i (~gate_clock ),
.test_en_i ( test_mode_i ),
.clk_o ( clk_gated )
);
`else
assign clk_gated = clk;
`endif
// UPDATE THE STATE
always_ff @(posedge clk, negedge rst_n)
begin
if(rst_n == 1'b0)
begin
CS <= EMPTY;
Pop_Pointer_CS <= {ADDR_DEPTH {1'b0}};
Push_Pointer_CS <= {ADDR_DEPTH {1'b0}};
end
else
begin
CS <= NS;
Pop_Pointer_CS <= Pop_Pointer_NS;
Push_Pointer_CS <= Push_Pointer_NS;
end
end
// Compute Next State
always_comb
begin
gate_clock = 1'b0;
case(CS)
EMPTY:
begin
grant_o = 1'b1;
valid_o = 1'b0;
case(valid_i)
1'b0 :
begin
NS = EMPTY;
Push_Pointer_NS = Push_Pointer_CS;
Pop_Pointer_NS = Pop_Pointer_CS;
gate_clock = 1'b1;
end
1'b1:
begin
NS = MIDDLE;
Push_Pointer_NS = Push_Pointer_CS + 1'b1;
Pop_Pointer_NS = Pop_Pointer_CS;
end
endcase
end//~EMPTY
MIDDLE:
begin
grant_o = 1'b1;
valid_o = 1'b1;
case({valid_i,grant_i})
2'b01:
begin
gate_clock = 1'b1;
if((Pop_Pointer_CS == Push_Pointer_CS -1 ) || ((Pop_Pointer_CS == DATA_DEPTH-1) && (Push_Pointer_CS == 0) ))
NS = EMPTY;
else
NS = MIDDLE;
Push_Pointer_NS = Push_Pointer_CS;
if(Pop_Pointer_CS == DATA_DEPTH-1)
Pop_Pointer_NS = 0;
else
Pop_Pointer_NS = Pop_Pointer_CS + 1'b1;
end
2'b00 :
begin
gate_clock = 1'b1;
NS = MIDDLE;
Push_Pointer_NS = Push_Pointer_CS;
Pop_Pointer_NS = Pop_Pointer_CS;
end
2'b11:
begin
NS = MIDDLE;
if(Push_Pointer_CS == DATA_DEPTH-1)
Push_Pointer_NS = 0;
else
Push_Pointer_NS = Push_Pointer_CS + 1'b1;
if(Pop_Pointer_CS == DATA_DEPTH-1)
Pop_Pointer_NS = 0;
else
Pop_Pointer_NS = Pop_Pointer_CS + 1'b1;
end
2'b10:
begin
if(( Push_Pointer_CS == Pop_Pointer_CS - 1) || ( (Push_Pointer_CS == DATA_DEPTH-1) && (Pop_Pointer_CS == 0) ))
NS = FULL;
else
NS = MIDDLE;
if(Push_Pointer_CS == DATA_DEPTH - 1)
Push_Pointer_NS = 0;
else
Push_Pointer_NS = Push_Pointer_CS + 1'b1;
Pop_Pointer_NS = Pop_Pointer_CS;
end
endcase
end
FULL:
begin
grant_o = 1'b0;
valid_o = 1'b1;
gate_clock = 1'b1;
case(grant_i)
1'b1:
begin
NS = MIDDLE;
Push_Pointer_NS = Push_Pointer_CS;
if(Pop_Pointer_CS == DATA_DEPTH-1)
Pop_Pointer_NS = 0;
else
Pop_Pointer_NS = Pop_Pointer_CS + 1'b1;
end
1'b0:
begin
NS = FULL;
Push_Pointer_NS = Push_Pointer_CS;
Pop_Pointer_NS = Pop_Pointer_CS;
end
endcase
end // end of FULL
default :
begin
gate_clock = 1'b1;
grant_o = 1'b0;
valid_o = 1'b0;
NS = EMPTY;
Pop_Pointer_NS = 0;
Push_Pointer_NS = 0;
end
endcase
end
always_ff @(posedge clk_gated, negedge rst_n)
begin
if(rst_n == 1'b0)
begin
for (i=0; i< DATA_DEPTH; i++)
FIFO_REGISTERS[i] <= {DATA_WIDTH {1'b0}};
end
else
begin
if((grant_o == 1'b1) && (valid_i == 1'b1))
FIFO_REGISTERS[Push_Pointer_CS] <= data_i;
end
end
assign data_o = FIFO_REGISTERS[Pop_Pointer_CS];
endmodule // generic_fifo

View file

@ -35,7 +35,7 @@ class instruction_trace_item;
// constructor creating a new instruction trace item, e.g.: a single instruction with all relevant information
function new (time simtime, longint unsigned cycle, scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] gp_reg_file [32],
logic [63:0] fp_reg_file [32], logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl, logic debug_mode, branchpredict_t bp);
logic [63:0] fp_reg_file [32], logic [63:0] result, logic [63:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, branchpredict_t bp);
this.simtime = simtime;
this.cycle = cycle;
this.pc = sbe.pc;

View file

@ -132,5 +132,5 @@ parameter INSTR_FCVT_I2F = { 5'b11010, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv:
// Load/Stores
parameter INSTR_LOAD = {25'b?, riscv::OpcodeLoad};
parameter INSTR_LOAD_FP = {25'b?, riscv::OpcodeLoadFp};
parameter INSTR_LOAD = {25'b?, riscv::OpcodeLoad };
parameter INSTR_STORE = {25'b?, riscv::OpcodeStore };
parameter INSTR_STORE = {25'b?, riscv::OpcodeStore};
parameter INSTR_STORE_FP = {25'b?, riscv::OpcodeStoreFp};

View file

@ -58,7 +58,7 @@ interface instruction_tracer_if (
clocking pck @(posedge clk);
input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr,
st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, resolve_branch,
wdata, we_gpr, we_fpr,, commit_instr, commit_ack, exception, priv_lvl, debug_mode;
wdata, we_gpr, we_fpr, commit_instr, commit_ack, exception, priv_lvl, debug_mode;
endclocking
`endif

74
src/util/sram.sv Normal file
View file

@ -0,0 +1,74 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>, ETH Zurich
// Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
// Date: 15.08.2018
// Description: SRAM wrapper for FPGA (requires the fpga-support submodule)
//
// Note: the wrapped module contains two different implementations for
// ALTERA and XILINX tools, since these follow different coding styles for
// inferrable RAMS with byte enable. define `FPGA_TARGET_XILINX or
// `FPGA_TARGET_ALTERA in your build environment (default is ALTERA)
module sram #(
parameter DATA_WIDTH = 64,
parameter NUM_WORDS = 1024,
parameter OUT_REGS = 0 // enables output registers in FPGA macro (read lat = 2)
)(
input logic clk_i,
input logic rst_ni,
input logic req_i,
input logic we_i,
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
input logic [DATA_WIDTH-1:0] wdata_i,
input logic [(DATA_WIDTH+7)/8-1:0] be_i,
output logic [DATA_WIDTH-1:0] rdata_o
);
localparam DATA_WIDTH_ALIGNED = ((DATA_WIDTH+63)/64)*64;
localparam BE_WIDTH_ALIGNED = (((DATA_WIDTH+7)/8+7)/8)*8;
logic [DATA_WIDTH_ALIGNED-1:0] wdata_aligned;
logic [BE_WIDTH_ALIGNED-1:0] be_aligned;
logic [DATA_WIDTH_ALIGNED-1:0] rdata_aligned;
// align to 64 bits for inferrable macro below
always_comb begin : p_align
wdata_aligned ='0;
be_aligned ='0;
wdata_aligned[DATA_WIDTH-1:0] = wdata_i;
be_aligned[BE_WIDTH_ALIGNED-1:0] = be_i;
rdata_o = rdata_aligned[DATA_WIDTH-1:0];
end
genvar k;
generate
for (k = 0; k<(DATA_WIDTH+63)/64; k++) begin
// unused byte-enable segments (8bits) are culled by the tool
SyncSpRamBeNx64 #(
.ADDR_WIDTH($clog2(NUM_WORDS)),
.DATA_DEPTH(NUM_WORDS),
.OUT_REGS (0)
) i_ram (
.Clk_CI ( clk_i ),
.Rst_RBI ( rst_ni ),
.CSel_SI ( req_i ),
.WrEn_SI ( we_i ),
.BEn_SI ( be_aligned[k*8 +: 8] ),
.WrData_DI ( wdata_aligned[k*64 +: 64] ),
.Addr_DI ( addr_i ),
.RdData_DO ( rdata_aligned[k*64 +: 64] )
);
end
endgenerate
endmodule : sram

View file

@ -1,83 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Florian Zaruba, ETH Zurich
// Date: 13.11.2017
// Description: SRAM Model for Xilinx FPGA
module sram #(
int unsigned DATA_WIDTH = 64,
int unsigned NUM_WORDS = 1024
)(
input logic clk_i,
input logic req_i,
input logic we_i,
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
input logic [DATA_WIDTH-1:0] wdata_i,
input logic [DATA_WIDTH-1:0] be_i,
output logic [DATA_WIDTH-1:0] rdata_o
);
generate
if (NUM_WORDS == 256) begin
// Dirty RAM
if (DATA_WIDTH == 16) begin
localparam NUM_WORDS = 2**8;
logic [NUM_WORDS-1:0][15:0] mem;
always_ff @(posedge clk_i) begin
// write
if (req_i && we_i) begin
for (int unsigned i = 0; i < 16; i++) begin
if (be_i[i])
mem[addr_i][i] <= wdata_i[i];
end
// read
end else if (req_i) begin
rdata_o <= mem[addr_i];
end
end
end
// Data RAM
if (DATA_WIDTH == 44) begin
logic [47:0] data_o;
assign rdata_o = data_o[43:0];
// this is actually 48 bits wide
xilinx_dcache_bank_tag_256x46 TAG_RAM (
.clka ( clk_i ),
.ena ( req_i ),
.wea ( {{be_i[40] & we_i}, {be_i[32] & we_i}, {be_i[24] & we_i}, {be_i[16] & we_i}, {be_i[8] & we_i}, {be_i[0] & we_i}} ),
.addra ( addr_i ),
.dina ( {4'b0, wdata_i} ),
.douta ( data_o )
);
end
// Data RAM
if (DATA_WIDTH == 128) begin
xilinx_dcache_bank_data_256x128 DATA_RAM (
.clka ( clk_i ),
.ena ( req_i ),
.wea ( {{be_i[15] & we_i}, {be_i[14] & we_i}, {be_i[13] & we_i}, {be_i[12] & we_i}, {be_i[11] & we_i}, {be_i[10] & we_i}, {be_i[9] & we_i}, {be_i[8] & we_i}, {be_i[7] & we_i}, {be_i[6] & we_i}, {be_i[5] & we_i}, {be_i[4] & we_i}, {be_i[3] & we_i}, {be_i[2] & we_i}, {be_i[1] & we_i}, {be_i[0] & we_i}}),
.addra ( addr_i ),
.dina ( wdata_i ),
.douta ( rdata_o )
);
end
end
endgenerate
endmodule

View file

@ -6,7 +6,7 @@ ariane:
include/riscv_pkg.sv,
src/debug/dm_pkg.sv,
include/ariane_pkg.sv,
include/nbdcache_pkg.sv,
include/std_cache_pkg.sv,
src/util/instruction_tracer_if.sv,
src/util/instruction_tracer_pkg.sv,
src/alu.sv,
@ -21,7 +21,6 @@ ariane:
src/decoder.sv,
src/ex_stage.sv,
src/fetch_fifo.sv,
src/ff1.sv,
src/frontend.sv,
src/icache.sv,
src/id_stage.sv,
@ -36,6 +35,10 @@ ariane:
src/mmu.sv,
src/mult.sv,
src/nbdcache.sv,
src/vdregs.sv,
src/std_cache_subsystem.sv,
src/sram_wrapper.sv,
src/pcgen_stage.sv,
src/perf_counters.sv,
src/ptw.sv,
src/re_name.sv,
@ -59,7 +62,6 @@ riscv_regfile_rtl:
]
files: [
src/ariane_regfile.sv,
src/util/behav_sram.sv,
]
riscv_regfile_fpga:
@ -71,5 +73,4 @@ riscv_regfile_fpga:
]
files: [
src/ariane_regfile_ff.sv,
src/util/xilinx_sram.sv,
]

View file

@ -15,97 +15,317 @@
// specific language governing permissions and limitations
// under the License.
#include "svdpi.h"
#include "Variane_wrapped__Dpi.h"
#include "Variane_testharness.h"
#include "verilator.h"
#include "verilated.h"
#include "verilated_vcd_c.h"
#include "Variane_testharness__Dpi.h"
#include <stdio.h>
#include "simmem.h"
#include <fesvr/htif.h>
#include <fesvr/memif.h>
#include <fesvr/htif_hexwriter.h>
#include <time.h>
#include <iostream>
#include <iomanip>
#include <string>
#include <fesvr/option_parser.h>
#include <getopt.h>
#include <chrono>
#include <ctime>
#include <signal.h>
#include <unistd.h>
std::unique_ptr<simmem_t> htif;
bool stop_sim = false;
#include <fesvr/dtm.h>
#include "remote_bitbang.h"
// This software is heavily based on Rocket Chip
// Checkout this awesome project:
// https://github.com/freechipsproject/rocket-chip/
extern unsigned long long read_uint64 (unsigned long long address) {
// as we do not have physical memory protection at the moment check here for invalid accesses
// in the soc this is done by the AXI bus
if (address < 0x80000000) {
return 0xdeadbeafdeadbeef;
}
return htif->memif().read_uint64(address);
}
extern void write_uint64 (unsigned long long address, unsigned long long data) {
htif->memif().write_uint64(address, data);
}
extern unsigned long long get_tohost_address() {
return htif->get_tohost_address();
}
extern unsigned long long get_fromhost_address() {
return htif->get_fromhost_address();
}
static void help()
{
fprintf(stderr, "usage: ariane C verilator simulator [host options] <target program> [target options]\n");
fprintf(stderr, "Host Options:\n");
fprintf(stderr, " --vcd=<file> Dump VCD trace to file\n");
fprintf(stderr, " --label=<label> Pass a label to the program\n");
fprintf(stderr, " -p Show simulation performance counters\n");
fprintf(stderr, " -v Verbose\n");
exit(1);
}
// This is a 64-bit integer to reduce wrap over issues and
// allow modulus. You can also use a double, if you wish.
static vluint64_t main_time = 0;
double sc_time_stamp () { // Called by $time in Verilog
return htif->main_time; // converts to double, to match
// what SystemC does
static const char *verilog_plusargs[] = {"jtag_rbb_enable"};
extern dtm_t* dtm;
extern remote_bitbang_t * jtag;
void handle_sigterm(int sig) {
dtm->stop();
}
// Called by $time in Verilog converts to double, to match what SystemC does
double sc_time_stamp () {
return main_time;
}
static void usage(const char * program_name) {
printf("Usage: %s [EMULATOR OPTION]... [VERILOG PLUSARG]... [HOST OPTION]... BINARY [TARGET OPTION]...\n",
program_name);
fputs("\
Run a BINARY on the Ariane emulator.\n\
\n\
Mandatory arguments to long options are mandatory for short options too.\n\
\n\
EMULATOR OPTIONS\n\
-r, --rbb-port=PORT Use PORT for remote bit bang (with OpenOCD and GDB) \n\
If not specified, a random port will be chosen\n\
automatically.\n\
", stdout);
#if VM_TRACE == 0
fputs("\
\n\
EMULATOR DEBUG OPTIONS (only supported in debug build -- try `make debug`)\n",
stdout);
#endif
fputs("\
-v, --vcd=FILE, Write vcd trace to FILE (or '-' for stdout)\n\
-p, Print performance statistic at end of test\n\
", stdout);
// fputs("\n" PLUSARG_USAGE_OPTIONS, stdout);
fputs("\n" HTIF_USAGE_OPTIONS, stdout);
printf("\n"
"EXAMPLES\n"
" - run a bare metal test:\n"
" %s $RISCV/riscv64-unknown-elf/share/riscv-tests/isa/rv64ui-p-add\n"
" - run a bare metal test showing cycle-by-cycle information:\n"
" %s spike-dasm < trace_core_00_0.dasm > trace.out\n"
#if VM_TRACE
" - run a bare metal test to generate a VCD waveform:\n"
" %s -v rv64ui-p-add.vcd $RISCV/riscv64-unknown-elf/share/riscv-tests/isa/rv64ui-p-add\n"
#endif
" - run an ELF (you wrote, called 'hello') using the proxy kernel:\n"
" %s pk hello\n",
program_name, program_name, program_name
#if VM_TRACE
, program_name
#endif
);
}
int main(int argc, char **argv) {
std::clock_t c_start = std::clock();
auto t_start = std::chrono::high_resolution_clock::now();
bool verbose;
bool perf;
unsigned random_seed = (unsigned)time(NULL) ^ (unsigned)getpid();
uint64_t max_cycles = -1;
int ret = 0;
bool print_cycles = false;
// Port numbers are 16 bit unsigned integers.
uint16_t rbb_port = 0;
#if VM_TRACE
FILE * vcdfile = NULL;
uint64_t start = 0;
#endif
char ** htif_argv = NULL;
int verilog_plusargs_legal = 1;
const char *vcd_file = NULL, *label = NULL;
bool dump_perf = false, verbose= false;
option_parser_t parser;
parser.help(&help);
parser.option('h', 0, 0, [&](const char* s){help();});
parser.option('p', 0, 0, [&](const char* s){dump_perf = true;});
parser.option('v', 0, 0, [&](const char* s){verbose = true;});
parser.option(0, "vcd", 1, [&](const char* s){vcd_file = s;});
parser.option(0, "label", 1, [&](const char* s){label = s;});
auto argv1 = parser.parse(argv);
std::vector<std::string> htif_args(argv1, (const char*const*)argv + argc);
htif.reset(new simmem_t(htif_args, 0x80000000, 8, 2097152));
htif->set_vcd(vcd_file);
htif->set_label(label);
htif->start();
clock_t t;
t = clock();
htif->run();
t = clock() - t;
if (dump_perf) {
fprintf(stderr, "Elapsed Time: %f seconds\n", t*1.0/CLOCKS_PER_SEC);
fprintf(stderr, "Cycles: %2.f \n", htif->main_time/10.0);
fprintf(stderr, "Cycles/s: %2.f\n", (htif->main_time*1.0)/(t*10.0/CLOCKS_PER_SEC));
while (1) {
static struct option long_options[] = {
{"cycle-count", no_argument, 0, 'c' },
{"help", no_argument, 0, 'h' },
{"max-cycles", required_argument, 0, 'm' },
{"seed", required_argument, 0, 's' },
{"rbb-port", required_argument, 0, 'r' },
{"verbose", no_argument, 0, 'V' },
#if VM_TRACE
{"vcd", required_argument, 0, 'v' },
{"dump-start", required_argument, 0, 'x' },
#endif
HTIF_LONG_OPTIONS
};
int option_index = 0;
#if VM_TRACE
int c = getopt_long(argc, argv, "-chpm:s:r:v:Vx:", long_options, &option_index);
#else
int c = getopt_long(argc, argv, "-chpm:s:r:V", long_options, &option_index);
#endif
if (c == -1) break;
retry:
switch (c) {
// Process long and short EMULATOR options
case '?': usage(argv[0]); return 1;
case 'c': print_cycles = true; break;
case 'h': usage(argv[0]); return 0;
case 'm': max_cycles = atoll(optarg); break;
case 's': random_seed = atoi(optarg); break;
case 'r': rbb_port = atoi(optarg); break;
case 'V': verbose = true; break;
case 'p': perf = true; break;
#if VM_TRACE
case 'v': {
vcdfile = strcmp(optarg, "-") == 0 ? stdout : fopen(optarg, "w");
if (!vcdfile) {
std::cerr << "Unable to open " << optarg << " for VCD write\n";
return 1;
}
break;
}
case 'x': start = atoll(optarg); break;
#endif
// Process legacy '+' EMULATOR arguments by replacing them with
// their getopt equivalents
case 1: {
std::string arg = optarg;
if (arg.substr(0, 1) != "+") {
optind--;
goto done_processing;
}
if (arg == "+verbose")
c = 'V';
else if (arg.substr(0, 12) == "+max-cycles=") {
c = 'm';
optarg = optarg+12;
}
#if VM_TRACE
else if (arg.substr(0, 12) == "+dump-start=") {
c = 'x';
optarg = optarg+12;
}
#endif
else if (arg.substr(0, 12) == "+cycle-count")
c = 'c';
// If we don't find a legacy '+' EMULATOR argument, it still could be
// a VERILOG_PLUSARG and not an error.
else if (verilog_plusargs_legal) {
const char ** plusarg = &verilog_plusargs[0];
int legal_verilog_plusarg = 0;
while (*plusarg && (legal_verilog_plusarg == 0)){
if (arg.substr(1, strlen(*plusarg)) == *plusarg) {
legal_verilog_plusarg = 1;
}
plusarg ++;
}
if (!legal_verilog_plusarg) {
verilog_plusargs_legal = 0;
} else {
c = 'P';
}
goto retry;
}
// If we STILL don't find a legacy '+' argument, it still could be
// an HTIF (HOST) argument and not an error. If this is the case, then
// we're done processing EMULATOR and VERILOG arguments.
else {
static struct option htif_long_options [] = { HTIF_LONG_OPTIONS };
struct option * htif_option = &htif_long_options[0];
while (htif_option->name) {
if (arg.substr(1, strlen(htif_option->name)) == htif_option->name) {
optind--;
goto done_processing;
}
htif_option++;
}
std::cerr << argv[0] << ": invalid plus-arg (Verilog or HTIF) \""
<< arg << "\"\n";
c = '?';
}
goto retry;
}
case 'P': break; // Nothing to do here, Verilog PlusArg
// Realize that we've hit HTIF (HOST) arguments or error out
default:
if (c >= HTIF_LONG_OPTIONS_OPTIND) {
optind--;
goto done_processing;
}
c = '?';
goto retry;
}
}
exit(0);
done_processing:
if (optind == argc) {
std::cerr << "No binary specified for emulator\n";
usage(argv[0]);
return 1;
}
int htif_argc = 1 + argc - optind;
htif_argv = (char **) malloc((htif_argc) * sizeof (char *));
htif_argv[0] = argv[0];
for (int i = 1; optind < argc;) htif_argv[i++] = argv[optind++];
const char *vcd_file = NULL;
Verilated::commandArgs(argc, argv);
jtag = new remote_bitbang_t(rbb_port);
dtm = new dtm_t(htif_argc, htif_argv);
signal(SIGTERM, handle_sigterm);
std::unique_ptr<Variane_testharness> top(new Variane_testharness);
#if VM_TRACE
Verilated::traceEverOn(true); // Verilator must compute traced signals
std::unique_ptr<VerilatedVcdFILE> vcdfd(new VerilatedVcdFILE(vcdfile));
std::unique_ptr<VerilatedVcdC> tfp(new VerilatedVcdC(vcdfd.get()));
if (vcdfile) {
top->trace(tfp.get(), 99); // Trace 99 levels of hierarchy
tfp->open("");
}
#endif
for (int i = 0; i < 10; i++) {
top->rst_ni = 0;
top->clk_i = 0;
top->eval();
#if VM_TRACE
tfp->dump(static_cast<vluint64_t>(main_time * 2));
#endif
top->clk_i = 1;
top->eval();
#if VM_TRACE
tfp->dump(static_cast<vluint64_t>(main_time * 2 + 1));
#endif
main_time ++;
}
top->rst_ni = 1;
while (!dtm->done() && !jtag->done()) {
top->clk_i = 0;
top->eval();
#if VM_TRACE
// dump = tfp && trace_count >= start;
// if (dump)
tfp->dump(static_cast<vluint64_t>(main_time * 2));
#endif
top->clk_i = 1;
top->eval();
#if VM_TRACE
// if (dump)
tfp->dump(static_cast<vluint64_t>(main_time * 2 + 1));
#endif
main_time++;
}
#if VM_TRACE
if (tfp)
tfp->close();
if (vcdfile)
fclose(vcdfile);
#endif
if (dtm->exit_code()) {
fprintf(stderr, "%s *** FAILED *** (code = %d) after %ld cycles\n", htif_argv[1], dtm->exit_code(), main_time);
ret = dtm->exit_code();
} else if (jtag->exit_code()) {
fprintf(stderr, "%s *** FAILED *** (code = %d, seed %d) after %ld cycles\n", htif_argv[1], jtag->exit_code(), random_seed, main_time);
ret = jtag->exit_code();
} else {
fprintf(stderr, "%s completed after %ld cycles\n", htif_argv[1], main_time);
}
if (dtm) delete dtm;
if (jtag) delete jtag;
std::clock_t c_end = std::clock();
auto t_end = std::chrono::high_resolution_clock::now();
if (perf) {
std::cout << std::fixed << std::setprecision(2) << "CPU time used: "
<< 1000.0 * (c_end-c_start) / CLOCKS_PER_SEC << " ms\n"
<< "Wall clock time passed: "
<< std::chrono::duration<double, std::milli>(t_end-t_start).count()
<< " ms\n";
}
return ret;
}

View file

@ -70,7 +70,7 @@ module ariane_testharness #(
assign test_en = 1'b0;
assign ndmreset_n = ~ndmreset ;
localparam NB_SLAVE = 3;
localparam NB_SLAVE = 4;
localparam NB_MASTER = 3;
localparam AXI_ID_WIDTH_SLAVES = AXI_ID_WIDTH + $clog2(NB_SLAVE);
@ -174,9 +174,12 @@ module ariane_testharness #(
) i_dm_top (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ), // PoR
.testmode_i ( test_en ),
.ndmreset_o ( ndmreset ),
.dmactive_o ( ), // active debug session
.debug_req_o ( debug_req ),
.unavailable_i ( '0 ),
.axi_master ( slave[3] ),
.axi_slave ( master[2] ),
.dmi_rst_ni ( rst_ni ),
.dmi_req_valid_i ( debug_req_valid ),
@ -230,13 +233,8 @@ module ariane_testharness #(
logic [AXI_DATA_WIDTH/8-1:0] be;
logic [AXI_DATA_WIDTH-1:0] wdata;
logic [AXI_DATA_WIDTH-1:0] rdata;
logic [AXI_DATA_WIDTH-1:0] bit_en;
// convert byte enable to bit enable
for (genvar i = 0; i < AXI_DATA_WIDTH/8; i++) begin
assign bit_en[i*8 +: 8] = {8{be[i]}};
end
axi2mem #(
.AXI_ID_WIDTH ( AXI_ID_WIDTH_SLAVES ),
.AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ),
@ -259,11 +257,12 @@ module ariane_testharness #(
.NUM_WORDS ( NUM_WORDS )
) i_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req ),
.we_i ( we ),
.addr_i ( addr[$clog2(NUM_WORDS)-1+$clog2(AXI_DATA_WIDTH/8):$clog2(AXI_DATA_WIDTH/8)] ),
.wdata_i ( wdata ),
.be_i ( bit_en ),
.be_i ( be ),
.rdata_o ( rdata )
);

View file

@ -1,33 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
// Antonio Pullini <pullinia@iis.ee.ethz.ch>
module pulp_sync #(
parameter STAGES = 2
)(
input logic clk_i,
input logic rstn_i,
input logic serial_i,
output logic serial_o
);
logic [STAGES-1:0] r_reg;
always_ff @(posedge clk_i, negedge rstn_i) begin
if (!rstn_i)
r_reg <= 'h0;
else
r_reg <= {r_reg[STAGES-2:0], serial_i};
end
assign serial_o = r_reg[STAGES-1];
endmodule

View file

@ -32,7 +32,7 @@ extern "C" int debug_tick
info.argv[i][j] = info.argv[i][j + 2];
}
}
// printf("Argument %d: %s\n", i, info.argv[i]);
printf("Argument %d: %s\n", i, info.argv[i]);
}
dtm = new dtm_t(info.argc, info.argv);

View file

@ -1,83 +1,83 @@
add wave -noupdate -group core /ariane_tb/dut/i_ariane/*
# add wave -noupdate -group core /ariane_tb/dut/i_ariane/*
add wave -noupdate -group frontend /ariane_tb/dut/i_ariane/i_frontend/*
add wave -noupdate -group frontend -group icache /ariane_tb/dut/i_ariane/i_frontend/i_icache/*
add wave -noupdate -group frontend -group ras /ariane_tb/dut/i_ariane/i_frontend/i_ras/*
add wave -noupdate -group frontend -group btb /ariane_tb/dut/i_ariane/i_frontend/i_btb/*
add wave -noupdate -group frontend -group bht /ariane_tb/dut/i_ariane/i_frontend/i_bht/*
# add wave -noupdate -group frontend -group instr_scan /ariane_tb/dut/i_ariane/i_frontend/*i_instr_scan/*
add wave -noupdate -group frontend -group fetch_fifo /ariane_tb/dut/i_ariane/i_frontend/i_fetch_fifo/*
# add wave -noupdate -group frontend /ariane_tb/dut/i_ariane/i_frontend/*
# add wave -noupdate -group frontend -group icache /ariane_tb/dut/i_ariane/i_frontend/i_icache/*
# add wave -noupdate -group frontend -group ras /ariane_tb/dut/i_ariane/i_frontend/i_ras/*
# add wave -noupdate -group frontend -group btb /ariane_tb/dut/i_ariane/i_frontend/i_btb/*
# add wave -noupdate -group frontend -group bht /ariane_tb/dut/i_ariane/i_frontend/i_bht/*
# # add wave -noupdate -group frontend -group instr_scan /ariane_tb/dut/i_ariane/i_frontend/*i_instr_scan/*
# add wave -noupdate -group frontend -group fetch_fifo /ariane_tb/dut/i_ariane/i_frontend/i_fetch_fifo/*
add wave -noupdate -group id_stage -group decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/*
add wave -noupdate -group id_stage -group compressed_decoder /ariane_tb/dut/i_ariane/id_stage_i/compressed_decoder_i/*
add wave -noupdate -group id_stage -group instr_realigner /ariane_tb/dut/i_ariane/id_stage_i/instr_realigner_i/*
add wave -noupdate -group id_stage /ariane_tb/dut/i_ariane/id_stage_i/*
# add wave -noupdate -group id_stage -group decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/*
# add wave -noupdate -group id_stage -group compressed_decoder /ariane_tb/dut/i_ariane/id_stage_i/compressed_decoder_i/*
# add wave -noupdate -group id_stage -group instr_realigner /ariane_tb/dut/i_ariane/id_stage_i/instr_realigner_i/*
# add wave -noupdate -group id_stage /ariane_tb/dut/i_ariane/id_stage_i/*
add wave -noupdate -group issue_stage -group scoreboard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/*
add wave -noupdate -group issue_stage -group issue_read_operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/*
add wave -noupdate -group issue_stage -group rename /ariane_tb/dut/i_ariane/issue_stage_i/i_re_name/*
add wave -noupdate -group issue_stage /ariane_tb/dut/i_ariane/issue_stage_i/*
# add wave -noupdate -group issue_stage -group scoreboard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/*
# add wave -noupdate -group issue_stage -group issue_read_operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/*
# add wave -noupdate -group issue_stage -group rename /ariane_tb/dut/i_ariane/issue_stage_i/i_re_name/*
# add wave -noupdate -group issue_stage /ariane_tb/dut/i_ariane/issue_stage_i/*
add wave -noupdate -group ex_stage -group alu /ariane_tb/dut/i_ariane/ex_stage_i/alu_i/*
add wave -noupdate -group ex_stage -group mult /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/*
add wave -noupdate -group ex_stage -group mult -group mul /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_mul/*
add wave -noupdate -group ex_stage -group mult -group div /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_div/*
add wave -noupdate -group ex_stage -group mult -group ff1 /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_ff1/*
add wave -noupdate -group ex_stage -group fpu /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/*
add wave -noupdate -group ex_stage -group fpu -group fpnew /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/*
# add wave -noupdate -group ex_stage -group alu /ariane_tb/dut/i_ariane/ex_stage_i/alu_i/*
# add wave -noupdate -group ex_stage -group mult /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/*
# add wave -noupdate -group ex_stage -group mult -group mul /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_mul/*
# add wave -noupdate -group ex_stage -group mult -group div /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_div/*
# add wave -noupdate -group ex_stage -group mult -group ff1 /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_ff1/*
# add wave -noupdate -group ex_stage -group fpu /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/*
# add wave -noupdate -group ex_stage -group fpu -group fpnew /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/*
add wave -noupdate -group ex_stage -group lsu /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/*
add wave -noupdate -group ex_stage -group lsu -group lsu_bypass /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/lsu_bypass_i/*
add wave -noupdate -group ex_stage -group lsu -group mmu /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_mmu/*
add wave -noupdate -group ex_stage -group lsu -group mmu -group itlb /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_mmu/i_itlb/*
add wave -noupdate -group ex_stage -group lsu -group mmu -group dtlb /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_mmu/i_dtlb/*
add wave -noupdate -group ex_stage -group lsu -group mmu -group ptw /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_mmu/i_ptw/*
# add wave -noupdate -group ex_stage -group lsu /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/*
# add wave -noupdate -group ex_stage -group lsu -group lsu_bypass /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/lsu_bypass_i/*
# add wave -noupdate -group ex_stage -group lsu -group mmu /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_mmu/*
# add wave -noupdate -group ex_stage -group lsu -group mmu -group itlb /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_mmu/i_itlb/*
# add wave -noupdate -group ex_stage -group lsu -group mmu -group dtlb /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_mmu/i_dtlb/*
# add wave -noupdate -group ex_stage -group lsu -group mmu -group ptw /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_mmu/i_ptw/*
add wave -noupdate -group ex_stage -group lsu -group store_unit /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_store_unit/*
add wave -noupdate -group ex_stage -group lsu -group store_unit -group store_buffer /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_store_unit/store_buffer_i/*
# add wave -noupdate -group ex_stage -group lsu -group store_unit /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_store_unit/*
# add wave -noupdate -group ex_stage -group lsu -group store_unit -group store_buffer /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_store_unit/store_buffer_i/*
add wave -noupdate -group ex_stage -group lsu -group load_unit /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_load_unit/*
add wave -noupdate -group ex_stage -group lsu -group lsu_arbiter /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_lsu_arbiter/*
# add wave -noupdate -group ex_stage -group lsu -group load_unit /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_load_unit/*
# add wave -noupdate -group ex_stage -group lsu -group lsu_arbiter /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_lsu_arbiter/*
add wave -noupdate -group ex_stage -group branch_unit /ariane_tb/dut/i_ariane/ex_stage_i/branch_unit_i/*
# add wave -noupdate -group ex_stage -group branch_unit /ariane_tb/dut/i_ariane/ex_stage_i/branch_unit_i/*
add wave -noupdate -group ex_stage -group csr_buffer /ariane_tb/dut/i_ariane/ex_stage_i/csr_buffer_i/*
add wave -noupdate -group ex_stage /ariane_tb/dut/i_ariane/ex_stage_i/*
# add wave -noupdate -group ex_stage -group csr_buffer /ariane_tb/dut/i_ariane/ex_stage_i/csr_buffer_i/*
# add wave -noupdate -group ex_stage /ariane_tb/dut/i_ariane/ex_stage_i/*
add wave -noupdate -group commit_stage /ariane_tb/dut/i_ariane/commit_stage_i/*
# add wave -noupdate -group commit_stage /ariane_tb/dut/i_ariane/commit_stage_i/*
add wave -noupdate -group csr_file /ariane_tb/dut/i_ariane/csr_regfile_i/*
# add wave -noupdate -group csr_file /ariane_tb/dut/i_ariane/csr_regfile_i/*
add wave -noupdate -group controller /ariane_tb/dut/i_ariane/controller_i/*
# add wave -noupdate -group controller /ariane_tb/dut/i_ariane/controller_i/*
add wave -noupdate -group nbdcache /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/*
add wave -noupdate -group nbdcache -group miss_handler /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_miss_handler/*
# add wave -noupdate -group nbdcache /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/*
# add wave -noupdate -group nbdcache -group miss_handler /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_miss_handler/*
add wave -noupdate -group nbdcache -group bypass_arbiter /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_miss_handler/i_bypass_arbiter/*
add wave -noupdate -group nbdcache -group bypass_axi /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_miss_handler/i_bypass_axi_adapter/*
# add wave -noupdate -group nbdcache -group bypass_arbiter /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_miss_handler/i_bypass_arbiter/*
# add wave -noupdate -group nbdcache -group bypass_axi /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_miss_handler/i_bypass_axi_adapter/*
add wave -noupdate -group nbdcache -group miss_axi /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_miss_handler/i_miss_axi_adapter/*
add wave -noupdate -group nbdcache -group lfsr /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_miss_handler/i_lfsr/*
# add wave -noupdate -group nbdcache -group miss_axi /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_miss_handler/i_miss_axi_adapter/*
# add wave -noupdate -group nbdcache -group lfsr /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_miss_handler/i_lfsr/*
add wave -noupdate -group nbdcache -group dirty_ram /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/dirty_sram/*
add wave -noupdate -group nbdcache -group tag_cmp /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_tag_cmp/*
# add wave -noupdate -group nbdcache -group dirty_ram /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/dirty_sram/*
# add wave -noupdate -group nbdcache -group tag_cmp /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/i_tag_cmp/*
add wave -noupdate -group nbdcache -group ptw {/ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/master_ports[0]/i_cache_ctrl/*}
add wave -noupdate -group nbdcache -group load {/ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/master_ports[1]/i_cache_ctrl/*}
add wave -noupdate -group nbdcache -group store {/ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/master_ports[2]/i_cache_ctrl/*}
# add wave -noupdate -group nbdcache -group ptw {/ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/master_ports[0]/i_cache_ctrl/*}
# add wave -noupdate -group nbdcache -group load {/ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/master_ports[1]/i_cache_ctrl/*}
# add wave -noupdate -group nbdcache -group store {/ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/i_nbdcache/master_ports[2]/i_cache_ctrl/*}
add wave -noupdate -group perf_counters {/ariane_tb/dut/i_ariane/i_perf_counters/*}
# add wave -noupdate -group perf_counters {/ariane_tb/dut/i_ariane/i_perf_counters/*}
add wave -noupdate -group dm_top /ariane_tb/dut/i_dm_top/*
add wave -noupdate -group dm_top -group dm_csrs /ariane_tb/dut/i_dm_top/i_dm_csrs/*
add wave -noupdate -group dm_top -group dm_mem /ariane_tb/dut/i_dm_top/i_dm_mem/*
# add wave -noupdate -group dm_top /ariane_tb/dut/i_dm_top/*
# add wave -noupdate -group dm_top -group dm_csrs /ariane_tb/dut/i_dm_top/i_dm_csrs/*
# add wave -noupdate -group dm_top -group dm_mem /ariane_tb/dut/i_dm_top/i_dm_mem/*
add wave -noupdate -group bootrom /ariane_tb/dut/i_bootrom/*
# add wave -noupdate -group bootrom /ariane_tb/dut/i_bootrom/*
add wave -noupdate -group tracer_if /ariane_tb/dut/i_ariane/instr_tracer_i/tracer_if/*
# add wave -noupdate -group tracer_if /ariane_tb/dut/i_ariane/instr_tracer_i/tracer_if/*
add wave -group SimJTAG /ariane_tb/dut/i_SimJTAG/*
# add wave -group SimJTAG /ariane_tb/dut/i_SimJTAG/*
add wave -group dmi_jtag /ariane_tb/dut/i_dmi_jtag/*
add wave -group dmi_jtag -group dmi_jtag_tap /ariane_tb/dut/i_dmi_jtag/i_dmi_jtag_tap/*
add wave -group dmi_jtag -group dmi_cdc /ariane_tb/dut/i_dmi_jtag/i_dmi_cdc/*
# add wave -group dmi_jtag /ariane_tb/dut/i_dmi_jtag/*
# add wave -group dmi_jtag -group dmi_jtag_tap /ariane_tb/dut/i_dmi_jtag/i_dmi_jtag_tap/*
# add wave -group dmi_jtag -group dmi_cdc /ariane_tb/dut/i_dmi_jtag/i_dmi_cdc/*

View file

@ -1,37 +0,0 @@
# This script emulates what travis check in test does on the public server
# comment out next command if you don't want to use sudo
sudo apt install \
gcc-4.8 \
g++-4.8 \
gperf \
autoconf \
automake \
autotools-dev \
libmpc-dev \
libmpfr-dev \
libgmp-dev \
gawk \
build-essential \
bison \
flex \
texinfo \
python-pexpect \
libusb-1.0-0-dev \
device-tree-compiler
# Customise this to a fast local disk
export TOP=/local/scratch/$USER
export TRAVIS_BUILD_DIR=$TOP/ariane-isatest
export RISCV=$TOP/riscv_install
export PATH=$TOP/riscv_install/bin:$TRAVIS_BUILD_DIR/tmp/bin:$PATH
export CXX=g++-4.8 CC=gcc-4.8
ci/make-tmp.sh
export LIBRARY_PATH=$TRAVIS_BUILD_DIR/tmp/lib
export LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/tmp/lib
export C_INCLUDE_PATH=$TRAVIS_BUILD_DIR/tmp/include
export CPLUS_INCLUDE_PATH=$TRAVIS_BUILD_DIR/tmp/include
export VERILATOR_ROOT=$TRAVIS_BUILD_DIR/tmp/verilator-3.918/
ci/build-riscv-gcc.sh
ci/install-verilator.sh
ci/install-fesvr.sh
ci/build-riscv-tests.sh
make run-asm-tests-verilator