Merge branch 'master' into ariane_next

This commit is contained in:
Florian Zaruba 2018-10-15 16:01:57 +02:00
commit aa47e4a8be
No known key found for this signature in database
GPG key ID: E742FFE8EC38A792
52 changed files with 3253 additions and 1112 deletions

View file

@ -1,23 +1,23 @@
before_script:
- export CXX=g++-7 CC=gcc-7
# paths to local or network installations (the riscv toolchain and
# verilator are not built in the ci job as in travis)
- export QUESTASIM_HOME=
- export QUESTASIM_VERSION=
- export QUESTASIM_FLAGS=
- export RISCV=/scratch/$USER/projects/riscv_install
- export VERILATOR_ROOT=/scratch/$USER/projects/verilator-3.924
- export QUESTASIM_HOME=/usr/pack/modelsim-10.6b-kgf/questasim/
- export QUESTASIM_VERSION=-10.6b
- export QUESTASIM_FLAGS=-noautoldlibpath
- export CXX=g++-7.2.0 CC=gcc-7.2.0
- export RISCV=/usr/scratch2/larain1/gitlabci/riscv_install
- export VERILATOR_ROOT=/usr/scratch2/larain1/gitlabci/verilator-3.924
# setup dependent paths
- export PATH=${RISCV}/bin:$VERILATOR_ROOT/bin:${PATH}
- export LIBRARY_PATH=$RISCV/lib
- export LD_LIBRARY_PATH=$RISCV/lib
- export C_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include
- export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include
- export LD_LIBRARY_PATH=$RISCV/lib:/usr/pack/gcc-7.2.0-af/linux-x64/lib64/
- export C_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include:/usr/pack/gcc-7.2.0-af/linux-x64/include
- export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include:/usr/pack/gcc-7.2.0-af/linux-x64/include
# number of parallel jobs to use for make commands and simulation
- export NUM_JOBS=4
- ci/make-tmp.sh
- git submodule update --init --recursive
variables:
GIT_SUBMODULE_STRATEGY: recursive
@ -52,33 +52,9 @@ run-benchmarks-questa:
dependencies:
- build
# rv64ui-p-* tests
run-asm-tests1-verilator:
stage: test_std
script:
- make -j${NUM_JOBS} run-asm-tests1-verilator
dependencies:
- build
# rv64ui-v-* tests
run-asm-tests2-verilator:
stage: test_std
script:
- make -j${NUM_JOBS} run-asm-tests2-verilator
dependencies:
- build
run-benchmarks-verilator:
stage: test_std
script:
- make -j${NUM_JOBS} run-benchmarks-verilator
dependencies:
- build
torture:
stage: test_std
script:
- make torture-rtest
- make torture-rtest-verilator
dependencies:
- build

11
.gitmodules vendored
View file

@ -4,12 +4,21 @@
[submodule "src/axi_node"]
path = src/axi_node
url = https://github.com/pulp-platform/axi_node.git
[submodule "src/fpu"]
path = src/fpu
url = https://github.com/pulp-platform/fpnew.git
[submodule "src/fpga-support"]
path = src/fpga-support
url = https://github.com/pulp-platform/fpga-support.git
[submodule "src/common_cells"]
path = src/common_cells
url = https://github.com/pulp-platform/common_cells.git
url = https://github.com/pulp-platform/common_cells.git
[submodule "src/axi"]
path = src/axi
url = https://github.com/pulp-platform/axi.git
[submodule "src/fpu_div_sqrt_mvp"]
path = src/fpu_div_sqrt_mvp
url = https://github.com/pulp-platform/fpu_div_sqrt_mvp.git
[submodule "src/tech_cells_generic"]
path = src/tech_cells_generic
url = https://github.com/pulp-platform/tech_cells_generic.git

View file

@ -3,15 +3,48 @@ package:
authors: [ "Florian Zaruba <zarubaf@iis.ee.ethz.ch>" ]
dependencies:
axi: { git: "git@iis-git.ee.ethz.ch:sasa/axi.git", rev: master }
axi2per: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi2per.git", rev: master }
axi_mem_if: { git: "git@github.com:pulp-platform/axi_mem_if.git", rev: master }
axi_node: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_node.git", version: v1.1.0 }
axi_slice: { git: "git@iis-git.ee.ethz.ch:sasa/axi_slice.git", version: 1.1.2 }
tech_cells_generic: { git: "git@iis-git.ee.ethz.ch:pulp-open/tech_cells_generic.git", rev: master }
common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: v1.7.4 }
fpga-support: { git: "https://github.com/pulp-platform/fpga-support.git", version: v0.3.2 }
axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.4.5 }
axi_mem_if: { git: "https://github.com/pulp-platform/axi_mem_if.git", version: 0.2.0 }
axi_node: { git: "https://github.com/pulp-platform/axi_node.git", version: 1.1.1 }
tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.1.1 }
common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.7.5 }
fpga-support: { git: "https://github.com/pulp-platform/fpga-support.git", version: 0.3.2 }
sources:
- src/fpu_div_sqrt_mvp/hdl/fpu_ff.sv
- src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
- src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
- src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
- src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
- src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
- src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
- src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
- src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
- src/fpu/src/pkg/fpnew_pkg.vhd
- src/fpu/src/pkg/fpnew_fmts_pkg.vhd
- src/fpu/src/pkg/fpnew_comps_pkg.vhd
- src/fpu/src/pkg/fpnew_pkg_constants.vhd
- src/fpu/src/utils/fp_pipe.vhd
- src/fpu/src/utils/fp_rounding.vhd
- src/fpu/src/utils/fp_arbiter.vhd
- src/fpu/src/ops/fma_core.vhd
- src/fpu/src/ops/fp_fma.vhd
- src/fpu/src/ops/fp_divsqrt_multi.vhd
- src/fpu/src/ops/fp_noncomp.vhd
- src/fpu/src/ops/fp_f2fcasts_fmt.vhd
- src/fpu/src/ops/fp_f2icasts_fmt.vhd
- src/fpu/src/ops/fp_i2fcasts_fmt.vhd
- src/fpu/src/subunits/addmul_fmt_slice.vhd
- src/fpu/src/subunits/addmul_block.vhd
- src/fpu/src/subunits/divsqrt_multifmt_slice.vhd
- src/fpu/src/subunits/divsqrt_block.vhd
- src/fpu/src/subunits/noncomp_fmt_slice.vhd
- src/fpu/src/subunits/noncomp_block.vhd
- src/fpu/src/subunits/conv_fmt_slice.vhd
- src/fpu/src/subunits/conv_ifmt_slice.vhd
- src/fpu/src/subunits/conv_block.vhd
- src/fpu/src/fpnew.vhd
- src/fpu/src/fpnew_top.vhd
- include/riscv_pkg.sv
- src/debug/dm_pkg.sv
- include/ariane_pkg.sv
@ -21,49 +54,43 @@ sources:
- src/util/instruction_tracer_pkg.sv
- src/util/instruction_tracer_if.sv
- src/alu.sv
- src/fpu_wrap.sv
- src/ariane.sv
- src/branch_unit.sv
- src/cache_ctrl.sv
- src/commit_stage.sv
- src/compressed_decoder.sv
- src/controller.sv
- src/csr_buffer.sv
- src/csr_regfile.sv
- src/decoder.sv
- src/ex_stage.sv
- src/frontend/btb.sv,
- src/frontend/bht.sv,
- src/frontend/ras.sv,
- src/frontend/instr_scan.sv,
- src/frontend/btb.sv
- src/frontend/bht.sv
- src/frontend/ras.sv
- src/frontend/instr_scan.sv
- src/frontend/frontend.sv
- src/icache.sv
- src/id_stage.sv
- src/instr_realigner.sv
- src/issue_read_operands.sv
- src/issue_stage.sv
- src/lfsr.sv
- src/load_unit.sv
- src/lsu_arbiter.sv
- src/lsu.sv
- src/miss_handler.sv
- src/mmu.sv
- src/mult.sv
- src/nbdcache.sv
- src/vdregs.sv
- src/perf_counters.sv
- src/ptw.sv
- src/std_cache_subsystem.sv
- src/sram_wrapper.sv
# - src/ariane_regfile_ff.sv
- src/ariane_regfile.sv
- src/ariane_regfile_ff.sv
# - src/ariane_regfile.sv
- src/re_name.sv
- src/scoreboard.sv
- src/store_buffer.sv
- src/amo_buffer.sv
- src/store_unit.sv
- src/tlb.sv
- src/commit_stage.sv
- src/axi_adapter.sv
- src/cache_subsystem/cache_ctrl.sv
- src/cache_subsystem/amo_alu.sv
- src/cache_subsystem/miss_handler.sv
- src/cache_subsystem/std_cache_subsystem.sv
- src/cache_subsystem/std_icache.sv
@ -76,4 +103,5 @@ sources:
- src/debug/dm_top.sv
- src/debug/dmi_cdc.sv
- src/debug/dmi_jtag.sv
- src/debug/dm_sba.sv
- src/debug/dmi_jtag_tap.sv

1
CODEOWNERS Normal file
View file

@ -0,0 +1 @@
* @zarubaf @msfschaffner

View file

@ -20,6 +20,7 @@ See [style-guidlines](https://github.com/pulp-platform/style-guidelines)
* :fire: `:fire` Removing code or files.
* :memo: `:memo:` When writing docs
* :bug: `:bug:` When fixing a bug
* :fire: `:fire:` When removing code or files
* :wastebasket: `:wastebasket:` When removing code or files
* :green_heart: `:green_heart:` When fixing the CI build
* :construction_worker: `:construction_worker:` Adding CI build system
@ -28,17 +29,16 @@ See [style-guidlines](https://github.com/pulp-platform/style-guidelines)
* :arrow_up: `:arrow_up:` When upgrading dependencies
* :arrow_down: `:arrow_down:` When downgrading dependencies
* :rotating_light: `:rotating_light:` When removing linter warnings
* :pencil2: `:pencil2:` Fixing typos
* :recycle: `:recycle:` Refactoring code.
* :pencil2: `pencil2:` Fixing typos
* :recycle: `:scisccor:` Refactoring code.
* :boom: `:boom:` Introducing breaking changes
* :truck: `:truck:` Moving or renaming files.
* :truck: `truck` Moving or renaming files.
* :space_invader: `:space_invader:` When fixing something synthesis related
* :beers: `:beer:` Writing code drunkenly.
* :ok_hand: `:ok_hand:` Updating code due to code review changes
* :ok_hand: `:ok_hand` Updating code due to code review changes
* :building_construction: `:building_construction:` Making architectural changes.
* :wrench: `:wrench:` Tooling
* :construction: `:construction:` Work In Progress WIP
* :bookmark: `:bookmark:` version tag
For a detailed why and how please refer to one of the multiple [resources](https://chris.beams.io/posts/git-commit/) regarding git commit messages.
If you use `vi` for your commit message, consider to put the following snippet inside your `~/.vimrc`:

View file

@ -29,18 +29,23 @@ torture-logs := -log
# Sources
# Package files -> compile first
ariane_pkg := include/riscv_pkg.sv \
src/debug/dm_pkg.sv \
include/ariane_pkg.sv \
include/std_cache_pkg.sv \
src/axi/src/axi_pkg.sv \
include/axi_intf.sv
ariane_pkg := include/riscv_pkg.sv \
src/debug/dm_pkg.sv \
include/ariane_pkg.sv \
include/std_cache_pkg.sv \
src/axi/src/axi_pkg.sv \
include/axi_intf.sv \
src/fpu/src/pkg/fpnew_pkg.vhd \
src/fpu/src/pkg/fpnew_fmts_pkg.vhd \
src/fpu/src/pkg/fpnew_comps_pkg.vhd \
src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \
src/fpu/src/pkg/fpnew_pkg_constants.vhd
# utility modules
util := $(wildcard src/util/*.svh) \
src/util/instruction_tracer_pkg.sv \
src/util/instruction_tracer_if.sv \
src/util/cluster_clock_gating.sv \
util := $(wildcard src/util/*.svh) \
src/util/instruction_tracer_pkg.sv \
src/util/instruction_tracer_if.sv \
src/tech_cells_generic/src/cluster_clock_gating.sv \
src/util/sram.sv
# Test packages
@ -51,6 +56,11 @@ dpi := $(patsubst tb/dpi/%.cc,${dpi-library}/%.o,$(wildcard tb/dpi/*.cc))
dpi_hdr := $(wildcard tb/dpi/*.h)
# this list contains the standalone components
src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \
$(wildcard src/fpu/src/utils/*.vhd) \
$(wildcard src/fpu/src/ops/*.vhd) \
$(wildcard src/fpu/src/subunits/*.vhd) \
$(filter-out src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv, \
$(wildcard src/fpu_div_sqrt_mvp/hdl/*.sv)) \
$(wildcard src/frontend/*.sv) \
$(wildcard src/cache_subsystem/*.sv) \
$(wildcard bootrom/*.sv) \
@ -59,6 +69,12 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \
$(wildcard src/axi_mem_if/src/*.sv) \
$(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv)) \
$(wildcard src/debug/debug_rom/*.sv) \
src/fpu/src/fpnew.vhd \
src/fpu/src/fpnew_top.vhd \
src/common_cells/src/deprecated/generic_fifo.sv \
src/common_cells/src/deprecated/pulp_sync.sv \
src/common_cells/src/deprecated/find_first_one.sv \
src/common_cells/src/rstgen_bypass.sv \
src/axi/src/axi_cut.sv \
src/axi/src/axi_join.sv \
src/fpga-support/rtl/SyncSpRamBeNx64.sv \
@ -71,12 +87,12 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \
src/common_cells/src/lzc.sv \
src/common_cells/src/rrarbiter.sv \
src/common_cells/src/lfsr_8bit.sv \
src/tech_cells_generic/src/cluster_clock_inverter.sv \
src/tech_cells_generic/src/pulp_clock_mux2.sv \
tb/ariane_testharness.sv \
tb/common/SimDTM.sv \
tb/common/SimJTAG.sv
# root path
root-dir := $(shell pwd)
# look for testbenches
@ -96,6 +112,7 @@ riscv-test ?= rv64ui-p-add
incdir :=
# Compile and sim flags
compile_flag += +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive +define+$(defines)
compile_flag_vhd += -64 -nologo -quiet -2008
uvm-flags += +UVM_NO_RELNOTES
# Iterate over all include directories and write them with +incdir+ prefixed
# +incdir+ works for Verilator and QuestaSim
@ -113,9 +130,11 @@ build: $(library) $(library)/.build-srcs $(library)/.build-tb $(dpi-library)/ari
# src files
$(library)/.build-srcs: $(ariane_pkg) $(util) $(src) $(library)
vlog$(questa_version) $(compile_flag) -work $(library) $(filter %.sv,$(ariane_pkg)) $(list_incdir) -suppress 2583
vcom$(questa_version) $(compile_flag_vhd) -work $(library) -pedanticerrors $(filter %.vhd,$(ariane_pkg))
vlog$(questa_version) $(compile_flag) -work $(library) $(filter %.sv,$(util)) $(list_incdir) -suppress 2583
# Suppress message that always_latch may not be checked thoroughly by QuestaSim.
vlog$(questa_version) $(compile_flag) -work $(library) -pedanticerrors $(src) $(list_incdir) -suppress 2583
vcom$(questa_version) $(compile_flag_vhd) -work $(library) -pedanticerrors $(filter %.vhd,$(src))
vlog$(questa_version) $(compile_flag) -work $(library) -pedanticerrors $(filter %.sv,$(src)) $(list_incdir) -suppress 2583
touch $(library)/.build-srcs
# build TBs
@ -137,19 +156,25 @@ $(dpi-library)/ariane_dpi.so: $(dpi)
# Compile C-code and generate .so file
$(CXX) -shared -m64 -o $(dpi-library)/ariane_dpi.so $? -lfesvr
sim: build
vsim${questa_version} +permissive -64 -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \
$(QUESTASIM_FLAGS) \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " log -r /*; run -all; exit" \
vsim${questa_version} +permissive -64 -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \
$(QUESTASIM_FLAGS) \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; log -r /*; run -all; exit" \
${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options)
simc: build
vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \
$(QUESTASIM_FLAGS) \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " run -all; exit" \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; run -all; exit" \
${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options)
simc-log: build
vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \
$(QUESTASIM_FLAGS) \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; log -r /*; run -all; exit" \
${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options)
$(riscv-asm-tests): build
@ -157,7 +182,7 @@ $(riscv-asm-tests): build
+BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \
$(QUESTASIM_FLAGS) \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \
-do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
-do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$@ ++$(target-options) | tee tmp/riscv-asm-tests-$@.log
$(riscv-benchmarks): build
@ -165,27 +190,27 @@ $(riscv-benchmarks): build
+BASEDIR=$(riscv-benchmarks-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \
$(QUESTASIM_FLAGS) \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \
-do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
-do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
${top_level}_optimized +permissive-off ++$(riscv-benchmarks-dir)/$@ ++$(target-options) | tee tmp/riscv-benchmarks-$@.log
# can use -jX to run ci tests in parallel using X processes
run-asm-tests: $(riscv-asm-tests)
make check-asm-tests
$(MAKE) check-asm-tests
check-asm-tests:
ci/check-tests.sh tmp/riscv-asm-tests- $(shell wc -l $(riscv-asm-tests-list) | awk -F " " '{ print $1 }')
# can use -jX to run ci tests in parallel using X processes
run-benchmarks: $(riscv-benchmarks)
make check-benchmarks
$(MAKE) check-benchmarks
check-benchmarks:
ci/check-tests.sh tmp/riscv-benchmarks- $(shell wc -l $(riscv-benchmarks-list) | awk -F " " '{ print $1 }')
# verilator-specific
verilate_command := $(verilator) \
$(ariane_pkg) \
$(filter-out tb/ariane_bt.sv,$(src)) \
$(filter-out %.vhd, $(ariane_pkg)) \
$(filter-out src/fpu_wrap.sv, $(filter-out %.vhd, $(src))) \
+define+$(defines) \
src/util/sram.sv \
+incdir+src/axi_node \
@ -209,7 +234,7 @@ verilate_command := $(verilator)
# User Verilator, at some point in the future this will be auto-generated
verilate:
$(verilate_command)
cd $(ver-library) && make -j${NUM_JOBS} -f Variane_testharness.mk
cd $(ver-library) && $(MAKE) -j${NUM_JOBS} -f Variane_testharness.mk
$(addsuffix -verilator,$(riscv-asm-tests)): verilate
$(ver-library)/Variane_testharness $(riscv-test-dir)/$(subst -verilator,,$@)
@ -235,29 +260,29 @@ torture-itest:
cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -a output/test.S'
torture-rtest: build
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture$(torture-logs) defines=$(defines) test-location=$(test-location)" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture$(torture-logs) defines=$(defines) test-location=$(test-location)" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a $(test-location).S' | tee $(test-location).log
make check-torture test-location=$(test-location)
torture-dummy: build
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture defines=$(defines) test-location=\$${@: -1}" > call.sh
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture defines=$(defines) test-location=\$${@: -1}" > call.sh
torture-rnight: build
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture$(torture-logs) defines=$(defines) test-location=\$${@: -1}" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture$(torture-logs) defines=$(defines) test-location=\$${@: -1}" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && $(riscv-torture-bin) 'overnight/run -r ./call.sh -g none' | tee output/overnight.log
make check-torture
$(MAKE) check-torture
torture-rtest-verilator: verilate
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture-verilator defines=$(defines)" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture-verilator defines=$(defines)" > call.sh && chmod +x call.sh
cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a output/test.S' | tee output/test.log
make check-torture
$(MAKE) check-torture
run-torture: build
vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles)+UVM_TESTNAME=${test_case} \
+BASEDIR=$(riscv-torture-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \
$(QUESTASIM_FLAGS) \
-gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \
-do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
-do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \
${top_level}_optimized +permissive-off \
+signature=$(riscv-torture-dir)/$(test-location).rtlsim.sig ++$(riscv-torture-dir)/$(test-location) ++$(target-options)
@ -271,6 +296,7 @@ run-torture-log: build
+signature=$(riscv-torture-dir)/$(test-location).rtlsim.sig ++$(riscv-torture-dir)/$(test-location) ++$(target-options)
cp vsim.wlf $(riscv-torture-dir)/$(test-location).wlf
cp trace_core_00_0.log $(riscv-torture-dir)/$(test-location).trace
cp trace_core_00_0_commit.log $(riscv-torture-dir)/$(test-location).commit
cp transcript $(riscv-torture-dir)/$(test-location).transcript
run-torture-verilator: verilate

View file

@ -81,6 +81,10 @@ $ make simc riscv-test-dir=$RISCV/riscv64-unknown-elf/bin riscv-test=pk target-o
> Be patient! RTL simulation is way slower than Spike. If you think that you ran into problems you can inspect the trace files.
### FPU Support
> There is preliminary support for floating point extensions F and D. At the moment floating point support will only be available in QuestaSim as the FPU is written in VHDL. This is likely to change. The floating point extensions can be enabled by setting `RVF` and `RVD` to `1'b1` in the `include/ariane_pkg.sv` file.
## FPGA Emulation
Coming.

View file

@ -164,4 +164,4 @@ rv64ua-v-amomin_d
rv64ua-v-amomin_w
rv64ua-v-amominu_d
rv64ua-v-amominu_w
rv64ua-v-lrsc
rv64ua-v-lrsc

View file

@ -24,7 +24,7 @@ package ariane_pkg;
localparam NR_SB_ENTRIES = 8; // number of scoreboard entries
localparam TRANS_ID_BITS = $clog2(NR_SB_ENTRIES); // depending on the number of scoreboard entries we need that many bits
// to uniquely identify the entry in the scoreboard
localparam NR_WB_PORTS = 5;
localparam NR_WB_PORTS = 4;
localparam ASID_WIDTH = 1;
localparam BTB_ENTRIES = 8;
localparam BHT_ENTRIES = 32;
@ -32,18 +32,64 @@ package ariane_pkg;
localparam BITS_SATURATION_COUNTER = 2;
localparam NR_COMMIT_PORTS = 2;
localparam logic [63:0] ISA_CODE =
| (1 << 0) // A - Atomic extension
| (1 << 2) // C - Compressed extension
| (1 << 8) // I - RV32I/64I/128I base ISA
| (1 << 12) // M - Integer Multiply/Divide extension
| (0 << 13) // N - User level interrupts supported
| (1 << 18) // S - Supervisor mode implemented
| (1 << 20) // U - User mode implemented
| (0 << 23) // X - Non-standard extensions present
| (1 << 63); // RV64
localparam ENABLE_RENAME = 1'b1;
// Floating-point extensions configuration
localparam bit RVF = 1'b0; // Is F extension enabled
localparam bit RVD = 1'b0; // Is D extension enabled
localparam bit RVA = 1'b1; // Is A extension enabled
// Transprecision floating-point extensions configuration
localparam bit XF16 = 1'b0; // Is half-precision float extension (Xf16) enabled
localparam bit XF16ALT = 1'b0; // Is alternative half-precision float extension (Xf16alt) enabled
localparam bit XF8 = 1'b0; // Is quarter-precision float extension (Xf8) enabled
localparam bit XFVEC = 1'b0; // Is vectorial float extension (Xfvec) enabled
// Transprecision float unit
localparam logic [30:0] LAT_COMP_FP32 = 'd3;
localparam logic [30:0] LAT_COMP_FP64 = 'd4;
localparam logic [30:0] LAT_COMP_FP16 = 'd3;
localparam logic [30:0] LAT_COMP_FP16ALT = 'd3;
localparam logic [30:0] LAT_COMP_FP8 = 'd2;
localparam logic [30:0] LAT_DIVSQRT = 'd2;
localparam logic [30:0] LAT_NONCOMP = 'd1;
localparam logic [30:0] LAT_CONV = 'd2;
// --------------------------------------
// vvvv Don't change these by hand! vvvv
localparam bit FP_PRESENT = RVF | RVD | XF16 | XF16ALT | XF8;
// Length of widest floating-point format
localparam FLEN = RVD ? 64 : // D ext.
RVF ? 32 : // F ext.
XF16 ? 16 : // Xf16 ext.
XF16ALT ? 16 : // Xf16alt ext.
XF8 ? 8 : // Xf8 ext.
0; // Unused in case of no FP
localparam bit NSX = XF16 | XF16ALT | XF8 | XFVEC; // Are non-standard extensions present?
localparam bit RVFVEC = RVF & XFVEC & FLEN>32; // FP32 vectors available if vectors and larger fmt enabled
localparam bit XF16VEC = XF16 & XFVEC & FLEN>16; // FP16 vectors available if vectors and larger fmt enabled
localparam bit XF16ALTVEC = XF16ALT & XFVEC & FLEN>16; // FP16ALT vectors available if vectors and larger fmt enabled
localparam bit XF8VEC = XF8 & XFVEC & FLEN>8; // FP8 vectors available if vectors and larger fmt enabled
// ^^^^ until here ^^^^
// ---------------------
localparam logic [63:0] ARIANE_MARCHID = 64'd3;
localparam logic [63:0] ISA_CODE = (RVA << 0) // A - Atomic Instructions extension
| (1 << 2) // C - Compressed extension
| (RVD << 3) // D - Double precsision floating-point extension
| (RVF << 5) // F - Single precsision floating-point extension
| (1 << 8) // I - RV32I/64I/128I base ISA
| (1 << 12) // M - Integer Multiply/Divide extension
| (0 << 13) // N - User level interrupts supported
| (1 << 18) // S - Supervisor mode implemented
| (1 << 20) // U - User mode implemented
| (NSX << 23) // X - Non-standard extensions present
| (1 << 63); // RV64
// 32 registers + 1 bit for re-naming = 6
localparam REG_ADDR_SIZE = 6;
@ -57,9 +103,8 @@ package ariane_pkg;
dataaddr: dm::DataAddr
};
// enables a commit log which matches spikes commit log format for easier trace comparison
localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b0;
localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b1;
// ------------- Dangerouse -------------
// if set to zero a flush will not invalidate the cache-lines, in a single core environment
@ -152,7 +197,9 @@ package ariane_pkg;
ALU, // 3
CTRL_FLOW, // 4
MULT, // 5
CSR // 6
CSR, // 6
FPU, // 7
FPU_VEC // 8
} fu_t;
localparam EXC_OFF_RST = 8'h80;
@ -199,9 +246,94 @@ package ariane_pkg;
// Multiplications
MUL, MULH, MULHU, MULHSU, MULW,
// Divisions
DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW
DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW,
// Floating-Point Load and Store Instructions
FLD, FLW, FLH, FLB, FSD, FSW, FSH, FSB,
// Floating-Point Computational Instructions
FADD, FSUB, FMUL, FDIV, FMIN_MAX, FSQRT, FMADD, FMSUB, FNMSUB, FNMADD,
// Floating-Point Conversion and Move Instructions
FCVT_F2I, FCVT_I2F, FCVT_F2F, FSGNJ, FMV_F2X, FMV_X2F,
// Floating-Point Compare Instructions
FCMP,
// Floating-Point Classify Instruction
FCLASS,
// Vectorial Floating-Point Instructions that don't directly map onto the scalar ones
VFMIN, VFMAX, VFSGNJ, VFSGNJN, VFSGNJX, VFEQ, VFNE, VFLT, VFGE, VFLE, VFGT, VFCPKAB_S, VFCPKCD_S, VFCPKAB_D, VFCPKCD_D
} fu_op;
typedef struct packed {
fu_op operator;
logic [63:0] operand_a;
logic [63:0] operand_b;
logic [63:0] imm;
} fu_data_t;
// -------------------------------
// Extract Src/Dst FP Reg from Op
// -------------------------------
function automatic logic is_rs1_fpr (input fu_op op);
if (FP_PRESENT) begin // makes function static for non-fp case
unique case (op) inside
[FMUL:FNMADD], // Computational Operations (except ADD/SUB)
FCVT_F2I, // Float-Int Casts
FCVT_F2F, // Float-Float Casts
FSGNJ, // Sign Injections
FMV_F2X, // FPR-GPR Moves
FCMP, // Comparisons
FCLASS, // Classifications
[VFMIN:VFCPKCD_D] : return 1'b1; // Additional Vectorial FP ops
default : return 1'b0; // all other ops
endcase
end else
return 1'b0;
endfunction;
function automatic logic is_rs2_fpr (input fu_op op);
if (FP_PRESENT) begin // makes function static for non-fp case
unique case (op) inside
[FSD:FSB], // FP Stores
[FADD:FMIN_MAX], // Computational Operations (no sqrt)
[FMADD:FNMADD], // Fused Computational Operations
FCVT_F2F, // Vectorial F2F Conversions requrie target
[FSGNJ:FMV_F2X], // Sign Injections and moves mapped to SGNJ
FCMP, // Comparisons
[VFMIN:VFCPKCD_D] : return 1'b1; // Additional Vectorial FP ops
default : return 1'b0; // all other ops
endcase
end else
return 1'b0;
endfunction;
// ternary operations encode the rs3 address in the imm field, also add/sub
function automatic logic is_imm_fpr (input fu_op op);
if (FP_PRESENT) begin // makes function static for non-fp case
unique case (op) inside
[FADD:FSUB], // ADD/SUB need inputs as Operand B/C
[FMADD:FNMADD], // Fused Computational Operations
[VFCPKAB_S:VFCPKCD_D] : return 1'b1; // Vectorial FP cast and pack ops
default : return 1'b0; // all other ops
endcase
end else
return 1'b0;
endfunction;
function automatic logic is_rd_fpr (input fu_op op);
if (FP_PRESENT) begin // makes function static for non-fp case
unique case (op) inside
[FLD:FLB], // FP Loads
[FADD:FNMADD], // Computational Operations
FCVT_I2F, // Int-Float Casts
FCVT_F2F, // Float-Float Casts
FSGNJ, // Sign Injections
FMV_X2F, // GPR-FPR Moves
[VFMIN:VFSGNJX], // Vectorial MIN/MAX and SGNJ
[VFCPKAB_S:VFCPKCD_D] : return 1'b1; // Vectorial FP cast and pack ops
default : return 1'b0; // all other ops
endcase
end else
return 1'b0;
endfunction;
function automatic logic is_amo (fu_op op);
case (op) inside
[AMO_LRW:AMO_MINDU]: begin
@ -244,7 +376,10 @@ package ariane_pkg;
logic [REG_ADDR_SIZE-1:0] rs1; // register source address 1
logic [REG_ADDR_SIZE-1:0] rs2; // register source address 2
logic [REG_ADDR_SIZE-1:0] rd; // register destination address
logic [63:0] result; // for unfinished instructions this field also holds the immediate
logic [63:0] result; // for unfinished instructions this field also holds the immediate,
// for unfinished floating-point that are partly encoded in rs2, this field also holds rs2
// for unfinished floating-point fused operations (FMADD, FMSUB, FNMADD, FNMSUB)
// this field holds the address of the third operand from the floating-point register file
logic valid; // is the result valid
logic use_imm; // should we use the immediate as operand b?
logic use_zimm; // use zimm as operand a
@ -433,7 +568,7 @@ package ariane_pkg;
// ----------------------
function automatic logic [1:0] extract_transfer_size(fu_op op);
case (op)
LD, SD,
LD, SD, FLD, FSD,
AMO_LRD, AMO_SCD,
AMO_SWAPD, AMO_ADDD,
AMO_ANDD, AMO_ORD,
@ -442,7 +577,7 @@ package ariane_pkg;
AMO_MINDU: begin
return 2'b11;
end
LW, LWU, SW,
LW, LWU, SW, FLW, FSW,
AMO_LRW, AMO_SCW,
AMO_SWAPW, AMO_ADDW,
AMO_ANDW, AMO_ORW,
@ -451,8 +586,8 @@ package ariane_pkg;
AMO_MINWU: begin
return 2'b10;
end
LH, LHU, SH: return 2'b01;
LB, SB, LBU: return 2'b00;
LH, LHU, SH, FLH, FSH: return 2'b01;
LB, LBU, SB, FLB, FSB: return 2'b00;
default: return 2'b11;
endcase
endfunction

View file

@ -32,6 +32,13 @@ package riscv;
XLEN_128 = 2'b11
} xlen_t;
typedef enum logic [1:0] {
Off = 2'b00,
Initial = 2'b01,
Clean = 2'b10,
Dirty = 2'b11
} xs_t;
typedef struct packed {
logic sd; // signal dirty - read-only - hardwired zero
logic [62:36] wpri4; // writes preserved reads ignored
@ -44,8 +51,8 @@ package riscv;
logic mxr; // make executable readable
logic sum; // permit supervisor user memory access
logic mprv; // modify privilege - privilege level for ld/st
logic [1:0] xs; // extension register - hardwired to zero
logic [1:0] fs; // extension register - hardwired to zero
xs_t xs; // extension register - hardwired to zero
xs_t fs; // floating point extension register
priv_lvl_t mpp; // holds the previous privilege mode up to machine
logic [1:0] wpri2; // writes preserved reads ignored
logic spp; // holds the previous privilege mode up to supervisor
@ -104,6 +111,37 @@ package riscv;
logic [6:0] opcode;
} rtype_t;
typedef struct packed {
logic [31:27] rs3;
logic [26:25] funct2;
logic [24:20] rs2;
logic [19:15] rs1;
logic [14:12] funct3;
logic [11:7] rd;
logic [6:0] opcode;
} r4type_t;
typedef struct packed {
logic [31:27] funct5;
logic [26:25] fmt;
logic [24:20] rs2;
logic [19:15] rs1;
logic [14:12] rm;
logic [11:7] rd;
logic [6:0] opcode;
} rftype_t; // floating-point
typedef struct packed {
logic [31:30] funct2;
logic [29:25] vecfltop;
logic [24:20] rs2;
logic [19:15] rs1;
logic [14:14] repl;
logic [13:12] vfmt;
logic [11:7] rd;
logic [6:0] opcode;
} rvftype_t; // vectorial floating-point
typedef struct packed {
logic [31:20] imm;
logic [19:15] rs1;
@ -142,6 +180,9 @@ package riscv;
typedef union packed {
logic [31:0] instr;
rtype_t rtype;
r4type_t r4type;
rftype_t rftype;
rvftype_t rvftype;
itype_t itype;
stype_t stype;
utype_t utype;
@ -151,27 +192,72 @@ package riscv;
// --------------------
// Opcodes
// --------------------
localparam OpcodeSystem = 7'h73;
localparam OpcodeFence = 7'h0f;
localparam OpcodeOp = 7'h33;
localparam OpcodeOp32 = 7'h3B;
localparam OpcodeOpimm = 7'h13;
localparam OpcodeOpimm32 = 7'h1B;
localparam OpcodeStore = 7'h23;
localparam OpcodeStoreFP = 7'b01_001_11;
localparam OpcodeLoad = 7'h03;
localparam OpcodeLoadFP = 7'b00_001_11;
localparam OpcodeBranch = 7'h63;
localparam OpcodeJalr = 7'h67;
localparam OpcodeJal = 7'h6f;
localparam OpcodeAuipc = 7'h17;
localparam OpcodeLui = 7'h37;
localparam OpcodeAmo = 7'h2F;
// RV32/64G listings:
// Quadrant 0
localparam OpcodeLoad = 7'b00_000_11;
localparam OpcodeLoadFp = 7'b00_001_11;
localparam OpcodeCustom0 = 7'b00_010_11;
localparam OpcodeMiscMem = 7'b00_011_11;
localparam OpcodeOpImm = 7'b00_100_11;
localparam OpcodeAuipc = 7'b00_101_11;
localparam OpcodeOpImm32 = 7'b00_110_11;
// Quadrant 1
localparam OpcodeStore = 7'b01_000_11;
localparam OpcodeStoreFp = 7'b01_001_11;
localparam OpcodeCustom1 = 7'b01_010_11;
localparam OpcodeAmo = 7'b01_011_11;
localparam OpcodeOp = 7'b01_100_11;
localparam OpcodeLui = 7'b01_101_11;
localparam OpcodeOp32 = 7'b01_110_11;
// Quadrant 2
localparam OpcodeMadd = 7'b10_000_11;
localparam OpcodeMsub = 7'b10_001_11;
localparam OpcodeNmsub = 7'b10_010_11;
localparam OpcodeNmadd = 7'b10_011_11;
localparam OpcodeOpFp = 7'b10_100_11;
localparam OpcodeRsrvd1 = 7'b10_101_11;
localparam OpcodeCustom2 = 7'b10_110_11;
// Quadrant 3
localparam OpcodeBranch = 7'b11_000_11;
localparam OpcodeJalr = 7'b11_001_11;
localparam OpcodeRsrvd2 = 7'b11_010_11;
localparam OpcodeJal = 7'b11_011_11;
localparam OpcodeSystem = 7'b11_100_11;
localparam OpcodeRsrvd3 = 7'b11_101_11;
localparam OpcodeCustom3 = 7'b11_110_11;
localparam OpcodeCJ = 3'b101;
localparam OpcodeCBeqz = 3'b110;
localparam OpcodeCBnez = 3'b111;
// RV64C listings:
// Quadrant 0
localparam OpcodeC0 = 2'b00;
localparam OpcodeC0Addi4spn = 3'b000;
localparam OpcodeC0Fld = 3'b001;
localparam OpcodeC0Lw = 3'b010;
localparam OpcodeC0Ld = 3'b011;
localparam OpcodeC0Rsrvd = 3'b100;
localparam OpcodeC0Fsd = 3'b101;
localparam OpcodeC0Sw = 3'b110;
localparam OpcodeC0Sd = 3'b111;
// Quadrant 1
localparam OpcodeC1 = 2'b01;
localparam OpcodeC1Addi = 3'b000;
localparam OpcodeC1Addiw = 3'b001;
localparam OpcodeC1Li = 3'b010;
localparam OpcodeC1LuiAddi16sp = 3'b011;
localparam OpcodeC1MiscAlu = 3'b100;
localparam OpcodeC1J = 3'b101;
localparam OpcodeC1Beqz = 3'b110;
localparam OpcodeC1Bnez = 3'b111;
// Quadrant 2
localparam OpcodeC2 = 2'b10;
localparam OpcodeC2Slli = 3'b000;
localparam OpcodeC2Fldsp = 3'b001;
localparam OpcodeC2Lwsp = 3'b010;
localparam OpcodeC2Ldsp = 3'b011;
localparam OpcodeC2JalrMvAdd = 3'b100;
localparam OpcodeC2Fsdsp = 3'b101;
localparam OpcodeC2Swsp = 3'b110;
localparam OpcodeC2Sdsp = 3'b111;
// ----------------------
// Performance Counters
// ----------------------
@ -235,6 +321,11 @@ package riscv;
// CSRs
// -----
typedef enum logic [11:0] {
// Floating-Point CSRs
CSR_FFLAGS = 12'h001,
CSR_FRM = 12'h002,
CSR_FCSR = 12'h003,
CSR_FTRAN = 12'h800,
// Supervisor Mode CSRs
CSR_SSTATUS = 12'h100,
CSR_SIE = 12'h104,
@ -321,6 +412,14 @@ package riscv;
csr_addr_t csr_decode;
} csr_t;
// Floating-Point control and status register (32-bit!)
typedef struct packed {
logic [31:15] reserved; // reserved for L extension, return 0 otherwise
logic [6:0] fprec; // div/sqrt precision control
logic [2:0] frm; // float rounding mode
logic [4:0] fflags; // float exception flags
} fcsr_t;
// -----
// Debug
// -----
@ -398,13 +497,14 @@ package riscv;
// trace log compatible to spikes commit log feature
// pragma translate_off
function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr, logic [4:0] rd, logic [63:0] result);
function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr, logic [4:0] rd, logic [63:0] result, logic rd_fpr);
string rd_s;
automatic string rf_s = rd_fpr ? "f" : "x";
if (rd < 10) rd_s = $sformatf("x %0d", rd);
else rd_s = $sformatf("x%0d", rd);
if (rd < 10) rd_s = $sformatf("%s %0d", rf_s, rd);
else rd_s = $sformatf("%s%0d", rf_s, rd);
if (rd != 0) begin
if (rd_fpr || rd != 0) begin
// 0 0x0000000080000118 (0xeecf8f93) x31 0x0000000080004000
return $sformatf("%d 0x%h (0x%h) %s 0x%h\n", priv_lvl, pc, instr, rd_s, result);
end else begin

View file

@ -19,23 +19,40 @@
import ariane_pkg::*;
module alu
(
module alu (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
input logic [63:0] pc_i,
input logic [TRANS_ID_BITS-1:0] trans_id_i,
input logic alu_valid_i,
input logic branch_valid_i,
input logic csr_valid_i,
input fu_op operator_i,
input logic [63:0] operand_a_i,
input logic [63:0] operand_b_i,
input logic [63:0] imm_i,
output logic [63:0] result_o,
output logic alu_branch_res_o,
output logic alu_valid_o,
output logic alu_ready_o,
output logic [TRANS_ID_BITS-1:0] alu_trans_id_o
output logic [TRANS_ID_BITS-1:0] alu_trans_id_o,
output exception_t alu_exception_o,
input logic fu_valid_i,
input logic is_compressed_instr_i,
input branchpredict_sbe_t branch_predict_i,
output branchpredict_t resolved_branch_o,
output logic resolve_branch_o,
input logic commit_i,
// to CSR file
output logic [11:0] csr_addr_o // CSR address to commit stage
);
// ALU is a single cycle instructions, hence it is always ready
assign alu_ready_o = 1'b1;
assign alu_valid_o = alu_valid_i;
logic csr_ready;
assign alu_ready_o = csr_ready;
assign alu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i;
assign alu_trans_id_o = trans_id_i;
logic [63:0] operand_a_rev;
@ -43,6 +60,8 @@ module alu
logic [64:0] operand_b_neg;
logic [65:0] adder_result_ext_o;
logic less; // handles both signed and unsigned forms
logic alu_branch_res;
logic [63:0] branch_result, csr_result;
// bit reverse operand_a for left shifts and bit counting
generate
@ -89,13 +108,13 @@ module alu
// get the right branch comparison result
always_comb begin : branch_resolve
// set comparison by default
alu_branch_res_o = 1'b1;
alu_branch_res = 1'b1;
case (operator_i)
EQ: alu_branch_res_o = adder_z_flag;
NE: alu_branch_res_o = ~adder_z_flag;
LTS, LTU: alu_branch_res_o = less;
GES, GEU: alu_branch_res_o = ~less;
default: alu_branch_res_o = 1'b1;
EQ: alu_branch_res = adder_z_flag;
NE: alu_branch_res = ~adder_z_flag;
LTS, LTU: alu_branch_res = less;
GES, GEU: alu_branch_res = ~less;
default: alu_branch_res = 1'b1;
endcase
end
@ -198,6 +217,48 @@ module alu
default: ; // default case to suppress unique warning
endcase
if (branch_valid_i) begin
result_o = branch_result;
end else if (csr_valid_i) begin
result_o = csr_result;
end
end
// ----------------------
// Branch Unit
// ----------------------
branch_unit branch_unit_i (
.operator_i,
.operand_a_i,
.operand_b_i,
.imm_i,
.pc_i,
.is_compressed_instr_i,
// any functional unit is valid, check that there is no accidental mis-predict
.fu_valid_i,
.branch_valid_i,
.branch_comp_res_i ( alu_branch_res ),
.branch_result_o ( branch_result ),
.branch_predict_i,
.resolved_branch_o,
.resolve_branch_o,
.branch_exception_o ( alu_exception_o )
);
csr_buffer csr_buffer_i (
.clk_i,
.rst_ni,
.flush_i,
.csr_valid_i,
.operator_i,
.operand_a_i,
.operand_b_i,
.csr_ready_o ( csr_ready ),
.csr_result_o ( csr_result ),
.commit_i,
.csr_addr_o
);
endmodule

View file

@ -91,12 +91,8 @@ module ariane #(
logic [TRANS_ID_BITS-1:0] alu_trans_id_ex_id;
logic alu_valid_ex_id;
logic [63:0] alu_result_ex_id;
exception_t alu_exception_ex_id;
// Branches and Jumps
logic branch_ready_ex_id;
logic [TRANS_ID_BITS-1:0] branch_trans_id_ex_id;
logic [63:0] branch_result_ex_id;
exception_t branch_exception_ex_id;
logic branch_valid_ex_id;
logic branch_valid_id_ex;
branchpredict_sbe_t branch_predict_id_ex;
@ -114,17 +110,23 @@ module ariane #(
logic [TRANS_ID_BITS-1:0] mult_trans_id_ex_id;
logic [63:0] mult_result_ex_id;
logic mult_valid_ex_id;
// FPU
logic fpu_ready_ex_id;
logic fpu_valid_id_ex;
logic [1:0] fpu_fmt_id_ex;
logic [2:0] fpu_rm_id_ex;
logic [TRANS_ID_BITS-1:0] fpu_trans_id_ex_id;
logic [63:0] fpu_result_ex_id;
logic fpu_valid_ex_id;
exception_t fpu_exception_ex_id;
// CSR
logic csr_ready_ex_id;
logic csr_valid_id_ex;
logic [TRANS_ID_BITS-1:0] csr_trans_id_ex_id;
logic [63:0] csr_result_ex_id;
logic csr_valid_ex_id;
// --------------
// EX <-> COMMIT
// --------------
// CSR Commit
logic csr_commit_commit_ex;
logic dirty_fp_state;
// LSU Commit
logic lsu_commit_commit_ex;
logic lsu_commit_ready_ex_commit;
@ -139,10 +141,15 @@ module ariane #(
// --------------
logic [NR_COMMIT_PORTS-1:0][4:0] waddr_commit_id;
logic [NR_COMMIT_PORTS-1:0][63:0] wdata_commit_id;
logic [NR_COMMIT_PORTS-1:0] we_commit_id;
logic [NR_COMMIT_PORTS-1:0] we_gpr_commit_id;
logic [NR_COMMIT_PORTS-1:0] we_fpr_commit_id;
// --------------
// CSR <-> *
// --------------
logic [4:0] fflags_csr_commit;
riscv::xs_t fs;
logic [2:0] frm_csr_id_issue_ex;
logic [6:0] fprec_csr_ex;
logic enable_translation_csr_ex;
logic en_ld_st_translation_csr_ex;
riscv::priv_lvl_t ld_st_priv_lvl_csr_ex;
@ -159,6 +166,7 @@ module ariane #(
logic tw_csr_id;
logic tsr_csr_id;
logic dcache_en_csr_nbdcache;
logic csr_write_fflags_commit_cs;
logic icache_en_csr;
logic debug_mode;
logic single_step_csr_commit;
@ -252,6 +260,8 @@ module ariane #(
.issue_instr_ack_i ( issue_instr_issue_id ),
.priv_lvl_i ( priv_lvl ),
.fs_i ( fs ),
.frm_i ( frm_csr_id_issue_ex ),
.debug_mode_i ( debug_mode ),
.tvm_i ( tvm_csr_id ),
.tw_i ( tw_csr_id ),
@ -288,7 +298,6 @@ module ariane #(
.alu_ready_i ( alu_ready_ex_id ),
.alu_valid_o ( alu_valid_id_ex ),
// Branches and Jumps
.branch_ready_i ( branch_ready_ex_id ),
.branch_valid_o ( branch_valid_id_ex ), // branch is valid
.branch_predict_o ( branch_predict_id_ex ), // branch predict to ex
.resolve_branch_i ( resolve_branch_ex_id ), // in order to resolve the branch
@ -298,20 +307,25 @@ module ariane #(
// Multiplier
.mult_ready_i ( mult_ready_ex_id ),
.mult_valid_o ( mult_valid_id_ex ),
// FPU
.fpu_ready_i ( fpu_ready_ex_id ),
.fpu_valid_o ( fpu_valid_id_ex ),
.fpu_fmt_o ( fpu_fmt_id_ex ),
.fpu_rm_o ( fpu_rm_id_ex ),
// CSR
.csr_ready_i ( csr_ready_ex_id ),
.csr_valid_o ( csr_valid_id_ex ),
// Commit
.resolved_branch_i ( resolved_branch ),
.trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, branch_trans_id_ex_id, csr_trans_id_ex_id, mult_trans_id_ex_id }),
.wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, branch_result_ex_id, csr_result_ex_id, mult_result_ex_id }),
.ex_ex_i ( {{$bits(exception_t){1'b0}}, lsu_exception_ex_id, branch_exception_ex_id, {$bits(exception_t){1'b0}}, {$bits(exception_t){1'b0}} }),
.wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, branch_valid_ex_id, csr_valid_ex_id, mult_valid_ex_id }),
.trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, mult_trans_id_ex_id, fpu_trans_id_ex_id }),
.wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, mult_result_ex_id, fpu_result_ex_id }),
.ex_ex_i ( {alu_exception_ex_id, lsu_exception_ex_id, {$bits(exception_t){1'b0}}, fpu_exception_ex_id }),
.wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, mult_valid_ex_id, fpu_valid_ex_id }),
.waddr_i ( waddr_commit_id ),
.wdata_i ( wdata_commit_id ),
.we_i ( we_commit_id ),
.we_gpr_i ( we_gpr_commit_id ),
.we_fpr_i ( we_fpr_commit_id ),
.commit_instr_o ( commit_instr_id_commit ),
.commit_ack_i ( commit_ack ),
.*
@ -321,6 +335,8 @@ module ariane #(
// EX
// ---------
ex_stage ex_stage_i (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_ctrl_ex ),
.fu_i ( fu_id_ex ),
.operator_i ( operator_id_ex ),
@ -336,16 +352,16 @@ module ariane #(
.alu_result_o ( alu_result_ex_id ),
.alu_trans_id_o ( alu_trans_id_ex_id ),
.alu_valid_o ( alu_valid_ex_id ),
.alu_exception_o ( alu_exception_ex_id ),
// Branches and Jumps
.branch_ready_o ( branch_ready_ex_id ),
.branch_valid_o ( branch_valid_ex_id ),
.branch_valid_i ( branch_valid_id_ex ),
.branch_trans_id_o ( branch_trans_id_ex_id ),
.branch_result_o ( branch_result_ex_id ),
.branch_exception_o ( branch_exception_ex_id ),
.branch_predict_i ( branch_predict_id_ex ), // branch predict to ex
.resolved_branch_o ( resolved_branch ),
.resolve_branch_o ( resolve_branch_ex_id ),
// CSR
.csr_valid_i ( csr_valid_id_ex ),
.csr_addr_o ( csr_addr_ex_csr ),
.csr_commit_i ( csr_commit_commit_ex ), // from commit
// LSU
.lsu_ready_o ( lsu_ready_ex_id ),
.lsu_valid_i ( lsu_valid_id_ex ),
@ -356,17 +372,26 @@ module ariane #(
.lsu_commit_ready_o ( lsu_commit_ready_ex_commit ), // to commit
.lsu_exception_o ( lsu_exception_ex_id ),
.no_st_pending_o ( no_st_pending_ex_commit ),
// MULT
.mult_ready_o ( mult_ready_ex_id ),
.mult_valid_i ( mult_valid_id_ex ),
.mult_trans_id_o ( mult_trans_id_ex_id ),
.mult_result_o ( mult_result_ex_id ),
.mult_valid_o ( mult_valid_ex_id ),
// FPU
.fpu_ready_o ( fpu_ready_ex_id ),
.fpu_valid_i ( fpu_valid_id_ex ),
.fpu_fmt_i ( fpu_fmt_id_ex ),
.fpu_rm_i ( fpu_rm_id_ex ),
.fpu_frm_i ( frm_csr_id_issue_ex ),
.fpu_prec_i ( fprec_csr_ex ),
.fpu_trans_id_o ( fpu_trans_id_ex_id ),
.fpu_result_o ( fpu_result_ex_id ),
.fpu_valid_o ( fpu_valid_ex_id ),
.fpu_exception_o ( fpu_exception_ex_id ),
.amo_valid_commit_i ( amo_valid_commit ),
.amo_req_o ( amo_req ),
.amo_resp_i ( amo_resp ),
// CSR
.csr_ready_o ( csr_ready_ex_id ),
.csr_valid_i ( csr_valid_id_ex ),
.csr_trans_id_o ( csr_trans_id_ex_id ),
.csr_result_o ( csr_result_ex_id ),
.csr_valid_o ( csr_valid_ex_id ),
.csr_addr_o ( csr_addr_ex_csr ),
.csr_commit_i ( csr_commit_commit_ex ), // from commit
// Performance counters
.itlb_miss_o ( itlb_miss_ex_perf ),
.dtlb_miss_o ( dtlb_miss_ex_perf ),
@ -382,16 +407,9 @@ module ariane #(
.asid_i ( asid_csr_ex ), // from CSR
.icache_areq_i ( icache_areq_cache_ex ),
.icache_areq_o ( icache_areq_ex_cache ),
.mult_ready_o ( mult_ready_ex_id ),
.mult_valid_i ( mult_valid_id_ex ),
.mult_trans_id_o ( mult_trans_id_ex_id ),
.mult_result_o ( mult_result_ex_id ),
.mult_valid_o ( mult_valid_ex_id ),
// DCACHE interfaces
.dcache_req_ports_i ( dcache_req_ports_cache_ex ),
.dcache_req_ports_o ( dcache_req_ports_ex_cache ),
.*
.dcache_req_ports_o ( dcache_req_ports_ex_cache )
);
// ---------
@ -403,6 +421,7 @@ module ariane #(
.halt_i ( halt_ctrl ),
.flush_dcache_i ( dcache_flush_ctrl_cache ),
.exception_o ( ex_commit ),
.dirty_fp_state_o ( dirty_fp_state ),
.debug_mode_i ( debug_mode ),
.debug_req_i ( debug_req ),
.single_step_i ( single_step_csr_commit ),
@ -411,7 +430,8 @@ module ariane #(
.no_st_pending_i ( no_st_pending_ex_commit ),
.waddr_o ( waddr_commit_id ),
.wdata_o ( wdata_commit_id ),
.we_o ( we_commit_id ),
.we_gpr_o ( we_gpr_commit_id ),
.we_fpr_o ( we_fpr_commit_id ),
.commit_lsu_o ( lsu_commit_commit_ex ),
.commit_lsu_ready_i ( lsu_commit_ready_ex_commit ),
.amo_valid_commit_o ( amo_valid_commit ),
@ -421,6 +441,7 @@ module ariane #(
.csr_op_o ( csr_op_commit_csr ),
.csr_wdata_o ( csr_wdata_commit_csr ),
.csr_rdata_i ( csr_rdata_csr_commit ),
.csr_write_fflags_o ( csr_write_fflags_commit_cs ),
.csr_exception_i ( csr_exception_csr_commit ),
.fence_i_o ( fence_i_commit_controller ),
.fence_o ( fence_commit_controller ),
@ -441,6 +462,8 @@ module ariane #(
.commit_ack_i ( commit_ack ),
.ex_i ( ex_commit ),
.csr_op_i ( csr_op_commit_csr ),
.csr_write_fflags_i ( csr_write_fflags_commit_cs ),
.dirty_fp_state_i ( dirty_fp_state ),
.csr_addr_i ( csr_addr_ex_csr ),
.csr_wdata_i ( csr_wdata_commit_csr ),
.csr_rdata_o ( csr_rdata_csr_commit ),
@ -451,6 +474,10 @@ module ariane #(
.set_debug_pc_o ( set_debug_pc ),
.trap_vector_base_o ( trap_vector_base_commit_pcgen ),
.priv_lvl_o ( priv_lvl ),
.fs_o ( fs ),
.fflags_o ( fflags_csr_commit ),
.frm_o ( frm_csr_id_issue_ex ),
.fprec_o ( fprec_csr_ex ),
.ld_st_priv_lvl_o ( ld_st_priv_lvl_csr_ex ),
.en_translation_o ( enable_translation_csr_ex ),
.en_ld_st_translation_o ( en_ld_st_translation_csr_ex ),
@ -584,7 +611,8 @@ module ariane #(
// write-back
assign tracer_if.waddr = waddr_commit_id;
assign tracer_if.wdata = wdata_commit_id;
assign tracer_if.we = we_commit_id;
assign tracer_if.we_gpr = we_gpr_commit_id;
assign tracer_if.we_fpr = we_fpr_commit_id;
// commit
assign tracer_if.commit_instr = commit_instr_id_commit;
assign tracer_if.commit_ack = commit_ack;

View file

@ -23,151 +23,98 @@
// latches and is thus smaller than the flip-flop based RF.
//
module ariane_regfile #(
parameter DATA_WIDTH = 32
module ariane_regfile_lol #(
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter int unsigned NR_WRITE_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
)(
// Clock and Reset
input logic clk,
input logic rst_n,
input logic test_en_i,
//Read port R1
input logic [4:0] raddr_a_i,
output logic [DATA_WIDTH-1:0] rdata_a_o,
//Read port R2
input logic [4:0] raddr_b_i,
output logic [DATA_WIDTH-1:0] rdata_b_o,
// Write port W1
input logic [4:0] waddr_a_i,
input logic [DATA_WIDTH-1:0] wdata_a_i,
input logic we_a_i,
// Write port W2
input logic [4:0] waddr_b_i,
input logic [DATA_WIDTH-1:0] wdata_b_i,
input logic we_b_i
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [NR_READ_PORTS-1:0][4:0] raddr_i,
output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [NR_WRITE_PORTS-1:0][4:0] waddr_i,
input logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [NR_WRITE_PORTS-1:0] we_i
);
localparam ADDR_WIDTH = 5;;
localparam NUM_WORDS = 2**ADDR_WIDTH;
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH;
logic [DATA_WIDTH-1:0] mem[NUM_WORDS];
logic [NUM_WORDS-1:ZERO_REG_ZERO] mem_clocks;
logic [NUM_WORDS-1:1] waddr_onehot_a;
logic [NUM_WORDS-1:1] waddr_onehot_b, waddr_onehot_b_q;
logic [DATA_WIDTH-1:0] mem[NUM_WORDS];
logic [NR_WRITE_PORTS-1:0][NUM_WORDS-1:1] waddr_onehot,waddr_onehot_q;
logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_q;
logic [NUM_WORDS-1:1] mem_clocks;
logic [DATA_WIDTH-1:0] wdata_a_q;
logic [DATA_WIDTH-1:0] wdata_b_q;
// Write port W1
logic [ADDR_WIDTH-1:0] raddr_a_int, raddr_b_int, waddr_a_int;
// decode addresses
for (genvar i = 0; i < NR_READ_PORTS; i++)
assign rdata_o[i] = mem[raddr_i[i][ADDR_WIDTH-1:0]];
assign raddr_a_int = raddr_a_i[ADDR_WIDTH-1:0];
assign raddr_b_int = raddr_b_i[ADDR_WIDTH-1:0];
assign waddr_a_int = waddr_a_i[ADDR_WIDTH-1:0];
int unsigned i;
int unsigned j;
int unsigned k;
int unsigned l;
genvar x;
logic clk_int;
//-----------------------------------------------------------------------------
//-- READ : Read address decoder RAD
//-----------------------------------------------------------------------------
assign rdata_a_o = mem[raddr_a_int];
assign rdata_b_o = mem[raddr_b_int];
//-----------------------------------------------------------------------------
// WRITE : SAMPLE INPUT DATA
//---------------------------------------------------------------------------
cluster_clock_gating CG_WE_GLOBAL
(
.clk_i ( clk ),
.en_i ( we_a_i ),
.test_en_i ( test_en_i ),
.clk_o ( clk_int )
);
// use clk_int here, since otherwise we don't want to write anything anyway
always_ff @(posedge clk_int, negedge rst_n) begin : sample_waddr
if (~rst_n) begin
wdata_a_q <= '0;
wdata_b_q <= '0;
waddr_onehot_b_q <= '0;
always_ff @(posedge clk_i, negedge rst_ni) begin : sample_waddr
if (~rst_ni) begin
wdata_q <= '0;
end else begin
if (we_a_i)
wdata_a_q <= wdata_a_i;
if (we_b_i)
wdata_b_q <= wdata_b_i;
waddr_onehot_b_q <= waddr_onehot_b;
for (int unsigned i = 0; i < NR_WRITE_PORTS; i++)
// enable flipflop will most probably infer clock gating
if (we_i[i]) begin
wdata_q[i] <= wdata_i[i];
end
waddr_onehot_q <= waddr_onehot;
end
end
//-----------------------------------------------------------------------------
//-- WRITE : Write Address Decoder (WAD), combinatorial process
//-----------------------------------------------------------------------------
always_comb begin : p_WADa
for (i = 1; i < NUM_WORDS; i++) begin : p_WordItera
if ((we_a_i == 1'b1) && (waddr_a_i == i))
waddr_onehot_a[i] = 1'b1;
else
waddr_onehot_a[i] = 1'b0;
// WRITE : Write Address Decoder (WAD), combinatorial process
always_comb begin : decode_write_addess
for (int unsigned i = 0; i < NR_WRITE_PORTS; i++) begin
for (int unsigned j = 1; j < NUM_WORDS; j++) begin
if (we_i[i] && (waddr_i[i] == j))
waddr_onehot[i][j] = 1'b1;
else
waddr_onehot[i][j] = 1'b0;
end
end
end
always_comb begin : p_WADb
for (j = 1; j < NUM_WORDS; j++) begin : p_WordIterb
if ((we_b_i == 1'b1) && (waddr_b_i == j))
waddr_onehot_b[j] = 1'b1;
else
waddr_onehot_b[j] = 1'b0;
end
// WRITE : Clock gating (if integrated clock-gating cells are available)
for (genvar x = ZERO_REG_ZERO; x < NUM_WORDS; x++) begin
logic [NR_WRITE_PORTS-1:0] waddr_ored;
for (genvar i = 0; i < NR_WRITE_PORTS; i++)
assign waddr_ored[i] = waddr_onehot[i][x];
cluster_clock_gating i_cg (
.clk_i ( clk_i ),
.en_i ( |waddr_ored ),
.test_en_i ( test_en_i ),
.clk_o ( mem_clocks[x] )
);
end
//-----------------------------------------------------------------------------
//-- WRITE : Clock gating (if integrated clock-gating cells are available)
//-----------------------------------------------------------------------------
generate
for (x = 1; x < NUM_WORDS; x++)
begin : CG_CELL_WORD_ITER
cluster_clock_gating CG_Inst
(
.clk_i ( clk_int ),
.en_i ( waddr_onehot_a[x] | waddr_onehot_b[x] ),
.test_en_i ( test_en_i ),
.clk_o ( mem_clocks[x] )
);
end
endgenerate
//-----------------------------------------------------------------------------
//-- WRITE : Write operation
//-----------------------------------------------------------------------------
//-- Generate M = WORDS sequential processes, each of which describes one
//-- word of the memory. The processes are synchronized with the clocks
//-- ClocksxC(i), i = 0, 1, ..., M-1
//-- Use active low, i.e. transparent on low latches as storage elements
//-- Data is sampled on rising clock edge
// Generate M = WORDS sequential processes, each of which describes one
// word of the memory. The processes are synchronized with the clocks
// ClocksxC(i), i = 0, 1, ..., M-1
// Use active low, i.e. transparent on low latches as storage elements
// Data is sampled on rising clock edge
// Integer registers
always_latch begin : latch_wdata
// Note: The assignment has to be done inside this process or Modelsim complains about it
mem[0] = '0;
if (ZERO_REG_ZERO)
mem[0] = '0;
for(k = 1; k < NUM_WORDS; k++)
begin : w_WordIter
if (mem_clocks[k] == 1'b1)
mem[k] = waddr_onehot_b_q[k] ? wdata_b_q : wdata_a_q;
end
for (int unsigned i = 0; i < NR_WRITE_PORTS; i++) begin
for (int unsigned k = ZERO_REG_ZERO; k < NUM_WORDS; k++) begin
if (mem_clocks[k] && waddr_onehot_q[i][k])
mem[k] = wdata_q[i];
end
end
end
endmodule

View file

@ -23,87 +23,63 @@
//
module ariane_regfile #(
parameter DATA_WIDTH = 32
parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2,
parameter int unsigned NR_WRITE_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0
)(
// Clock and Reset
input logic clk,
input logic rst_n,
input logic test_en_i,
//Read port R1
input logic [4:0] raddr_a_i,
output logic [DATA_WIDTH-1:0] rdata_a_o,
//Read port R2
input logic [4:0] raddr_b_i,
output logic [DATA_WIDTH-1:0] rdata_b_o,
// Write port W1
input logic [4:0] waddr_a_i,
input logic [DATA_WIDTH-1:0] wdata_a_i,
input logic we_a_i,
// Write port W2
input logic [4:0] waddr_b_i,
input logic [DATA_WIDTH-1:0] wdata_b_i,
input logic we_b_i
// clock and reset
input logic clk_i,
input logic rst_ni,
// disable clock gates for testing
input logic test_en_i,
// read port
input logic [NR_READ_PORTS-1:0][4:0] raddr_i,
output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port
input logic [NR_WRITE_PORTS-1:0][4:0] waddr_i,
input logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [NR_WRITE_PORTS-1:0] we_i
);
localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH;
logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] rf_reg;
logic [NUM_WORDS-1:0] we_a_dec, we_b_dec;
logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] mem;
logic [NR_WRITE_PORTS-1:0][NUM_WORDS-1:0] we_dec;
always_comb begin : we_a_decoder
for (int i = 0; i < NUM_WORDS; i++) begin
if (waddr_a_i == i)
we_a_dec[i] = we_a_i;
else
we_a_dec[i] = 1'b0;
end
end
always_comb begin : we_b_decoder
for (int i = 0; i < NUM_WORDS; i++) begin
if (waddr_b_i == i)
we_b_dec[i] = we_b_i;
else
we_b_dec[i] = 1'b0;
end
end
generate
// loop from 1 to NUM_WORDS-1 as R0 is nil
for (genvar i = 1; i < NUM_WORDS; i++) begin : rf_gen
always_ff @(posedge clk, negedge rst_n) begin : register_write_behavioral
if (rst_n==1'b0) begin
rf_reg[i] <= 'b0;
end else begin
if (we_a_dec[i])
rf_reg[i] <= wdata_a_i;
if (we_b_dec[i])
rf_reg[i] <= wdata_b_i;
always_comb begin : we_decoder
for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (waddr_i[j] == i)
we_dec[j][i] = we_i[j];
else
we_dec[j][i] = 1'b0;
end
end
end
end
// R0 is nil
`ifdef verilator
always_ff @(posedge clk, negedge rst_n) begin
rf_reg[0] <= '0;
// loop from 1 to NUM_WORDS-1 as R0 is nil
always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral
if (~rst_ni) begin
mem <= '{default: '0};
end else begin
for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (we_dec[j][i]) begin
mem[i] <= wdata_i[j];
end
end
if (ZERO_REG_ZERO) begin
mem[0] <= '0;
end
end
end
end
`else
assign rf_reg[0] = '0;
`endif
endgenerate
assign rdata_a_o = rf_reg[raddr_a_i];
assign rdata_b_o = rf_reg[raddr_b_i];
for (genvar i = 0; i < NR_READ_PORTS; i++) begin
assign rdata_o[i] = mem[raddr_i[i]];
end
endmodule

View file

@ -15,7 +15,6 @@
import ariane_pkg::*;
module branch_unit (
input logic [TRANS_ID_BITS-1:0] trans_id_i,
input fu_op operator_i, // comparison operation to perform
input logic [63:0] operand_a_i, // contains content of RS 1
input logic [63:0] operand_b_i, // contains content of RS 2
@ -25,10 +24,7 @@ module branch_unit (
input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict
input logic branch_valid_i,
input logic branch_comp_res_i, // branch comparison result from ALU
output logic branch_ready_o,
output logic branch_valid_o,
output logic [63:0] branch_result_o,
output logic [TRANS_ID_BITS-1:0] branch_trans_id_o,
input branchpredict_sbe_t branch_predict_i, // this is the address we predicted
output branchpredict_t resolved_branch_o, // this is the actual address we are targeting
@ -38,10 +34,6 @@ module branch_unit (
);
logic [63:0] target_address;
logic [63:0] next_pc;
// branches are single cycle at the moment, feed-through the control signals
assign branch_trans_id_o = trans_id_i;
assign branch_valid_o = branch_valid_i;
assign branch_ready_o = 1'b1; // we are always ready
// here we handle the various possibilities of mis-predicts
always_comb begin : mispredict_handler

View file

@ -180,7 +180,7 @@ module miss_handler #(
IDLE: begin
// lowest priority are AMOs, wait until everything else is served before going for the AMOs
if (amo_req_i.req) begin
if (amo_req_i.req && !busy_i) begin
// 1. Flush the cache
if (!serve_amo_q) begin
state_d = FLUSH_REQ_STATUS;
@ -203,6 +203,8 @@ module miss_handler #(
// here comes the refill portion of code
if (miss_req_valid[i] && !miss_req_bypass[i]) begin
state_d = MISS;
// we are taking another request so don't take the AMO
serve_amo_d = 1'b0;
// save to MSHR
mshr_d.valid = 1'b1;
mshr_d.we = miss_req_we[i];

View file

@ -50,8 +50,8 @@ module std_icache #(
logic flushing_d, flushing_q;
// signals
logic [ICACHE_SET_ASSOC-1:0] req; // request to data memory
logic [ICACHE_SET_ASSOC-1:0] vld_req; // request to valid/tag memory
logic [ICACHE_SET_ASSOC-1:0] req; // request to data memory
logic [ICACHE_SET_ASSOC-1:0] vld_req; // request to valid/tag memory
logic [(ICACHE_LINE_WIDTH+7)/8-1:0] data_be; // byte enable for data memory
logic [(2**NR_AXI_REFILLS-1):0][7:0] be; // byte enable
logic [$clog2(ICACHE_NUM_WORD)-1:0] addr; // this is a cache-line address, to memory array
@ -109,24 +109,24 @@ module std_icache #(
.rdata_o ( data_rdata[i] )
);
end
// --------------------
// Tag Comparison and way select
// --------------------
// cacheline selected by hit
logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel;
logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel;
assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2];
generate
generate
for (genvar i=0;i<ICACHE_SET_ASSOC;i++) begin : g_tag_cmpsel
assign hit[i] = (tag_rdata[i].tag == tag) ? tag_rdata[i].valid : 1'b0;
assign cl_sel[i] = (hit[i]) ? data_rdata[i][{idx,5'b0} +: FETCH_WIDTH] : '0;
assign way_valid[i] = tag_rdata[i].valid;
end
endgenerate
// OR reduction of selected cachelines
always_comb begin : p_reduction
dreq_o.data = cl_sel[0];
@ -177,7 +177,7 @@ module std_icache #(
assign dreq_o.ex = areq_i.fetch_exception;
assign addr = (state_q==FLUSH) ? cnt_q : vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_BYTE_OFFSET];
// ------------------
// Cache Ctrl
@ -219,7 +219,7 @@ module std_icache #(
IDLE: begin
dreq_o.ready = 1'b1;
vaddr_d = dreq_i.vaddr;
// we are getting a new request
if (dreq_i.req) begin
// request the content of all arrays
@ -239,7 +239,7 @@ module std_icache #(
// ~> compare the tag
TAG_CMP, TAG_CMP_SAVED: begin
areq_o.fetch_req = 1'b1; // request address translation
// (speculatively) request the content of all arrays
req = '1;
vld_req = '1;
@ -255,7 +255,7 @@ module std_icache #(
dreq_o.ready = 1'b1;
dreq_o.valid = 1'b1;
vaddr_d = dreq_i.vaddr;
// we've got another request
if (dreq_i.req) begin
// save the index and stay in compare mode
@ -335,7 +335,7 @@ module std_icache #(
req = evict_way_q;
vld_req = evict_way_q;
if (axi.r_valid) begin
we = 1'b1;
tag_wdata.tag = tag_q;
@ -380,7 +380,14 @@ module std_icache #(
endcase
// those are the states where we need to wait a little longer until we can safely exit
if (dreq_i.kill_s2 && !(state_q inside {REFILL, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP}) && !dreq_o.ready) begin
if (dreq_i.kill_s2 && !(state_q inside {
REFILL,
WAIT_AXI_R_RESP,
WAIT_KILLED_AXI_R_RESP,
WAIT_KILLED_REFILL,
WAIT_ADDRESS_TRANSLATION,
WAIT_ADDRESS_TRANSLATION_KILLED})
&& !dreq_o.ready) begin
state_d = IDLE;
end
@ -443,14 +450,14 @@ module std_icache #(
//pragma translate_off
`ifndef VERILATOR
initial begin
assert ($bits(axi.aw_addr) == 64)
assert ($bits(axi.aw_addr) == 64)
else $fatal(1, "[icache] Ariane needs a 64-bit bus");
end
// assert that cache only hits on one way
onehot: assert property (
@(posedge clk_i) disable iff (~rst_ni) $onehot0(hit))
@(posedge clk_i) disable iff (~rst_ni) $onehot0(hit))
else $fatal(1, "[icache] Hit should be one-hot encoded");
`endif
//pragma translate_on
//pragma translate_on
endmodule

View file

@ -24,7 +24,7 @@ module clint #(
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic testmode_i,
AXI_BUS.Slave slave,
input logic rtc_i, // Real-time clock in (usually 32.768 kHz)
@ -146,7 +146,7 @@ module clint #(
// 1. Put the RTC input through a classic two stage edge-triggered synchronizer to filter out any
// metastability effects (or at least make them unlikely :-))
sync_wedge i_sync_edge (
.en_i ( 1'b1 ),
.en_i ( ~testmode_i ),
.serial_i ( rtc_i ),
.r_edge_o ( increase_timer ),
.f_edge_o ( ), // left open

View file

@ -22,6 +22,7 @@ module commit_stage #(
input logic halt_i, // request to halt the core
input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline
output exception_t exception_o, // take exception to controller
output logic dirty_fp_state_o, // mark the F state as dirty
input logic debug_mode_i, // we are in debug mode
input logic debug_req_i, // debug unit is requesting to enter debug mode
input logic single_step_i, // we are in single step debug mode
@ -31,7 +32,8 @@ module commit_stage #(
// to register file
output logic [NR_COMMIT_PORTS-1:0][4:0] waddr_o, // register file write address
output logic [NR_COMMIT_PORTS-1:0][63:0] wdata_o, // register file write data
output logic [NR_COMMIT_PORTS-1:0] we_o, // register file write enable
output logic [NR_COMMIT_PORTS-1:0] we_gpr_o, // register file write enable
output logic [NR_COMMIT_PORTS-1:0] we_fpr_o, // floating point register enable
// Atomic memory operations
input amo_resp_t amo_resp_i, // result of AMO operation
// to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline)
@ -41,6 +43,7 @@ module commit_stage #(
output logic [63:0] csr_wdata_o, // data to write to CSR
input logic [63:0] csr_rdata_i, // data to read from CSR
input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit)
output logic csr_write_fflags_o, // write the fflags CSR
// commit signals to ex
output logic commit_lsu_o, // commit the pending store
input logic commit_lsu_ready_i, // commit buffer of LSU is ready
@ -53,10 +56,12 @@ module commit_stage #(
output logic sfence_vma_o // flush TLBs and pipeline
);
// TODO make these parametric with NR_COMMIT_PORTS
assign waddr_o[0] = commit_instr_i[0].rd[4:0];
assign waddr_o[1] = commit_instr_i[1].rd[4:0];
assign pc_o = commit_instr_i[0].pc;
assign pc_o = commit_instr_i[0].pc;
assign dirty_fp_state_o = |we_fpr_o;
logic instr_0_is_amo;
assign instr_0_is_amo = is_amo(commit_instr_i[0].op);
@ -65,25 +70,27 @@ module commit_stage #(
// -------------------
// write register file or commit instruction in LSU or CSR Buffer
always_comb begin : commit
// default assignments
commit_ack_o[0] = 1'b0;
commit_ack_o[1] = 1'b0;
commit_ack_o[0] = 1'b0;
commit_ack_o[1] = 1'b0;
amo_valid_commit_o = 1'b0;
we_o[0] = 1'b0;
we_o[1] = 1'b0;
commit_lsu_o = 1'b0;
commit_csr_o = 1'b0;
we_gpr_o[0] = 1'b0;
we_gpr_o[1] = 1'b0;
we_fpr_o = '{default: 1'b0};
commit_lsu_o = 1'b0;
commit_csr_o = 1'b0;
// amos will commit on port 0
wdata_o[0] = (amo_resp_i.ack) ? amo_resp_i.result : commit_instr_i[0].result;
wdata_o[1] = commit_instr_i[1].result;
csr_op_o = ADD; // this corresponds to a CSR NOP
csr_wdata_o = 64'b0;
fence_i_o = 1'b0;
fence_o = 1'b0;
sfence_vma_o = 1'b0;
csr_wdata_o = 64'b0;
fence_i_o = 1'b0;
fence_o = 1'b0;
sfence_vma_o = 1'b0;
csr_write_fflags_o = 1'b0;
flush_commit_o = 1'b0;
// we will not commit the instruction if we took an exception
@ -92,6 +99,8 @@ module commit_stage #(
// also check that there is no atomic memory operation committing, right now this is the only operation
// which will take longer than one cycle to commit
if (commit_instr_i[0].valid && !halt_i) begin
// we have to exclude the AMOs from debug mode as we are not jumping to debug
// while committing an AMO
if (!debug_req_i || debug_mode_i) begin
commit_ack_o[0] = 1'b1;
// register will be the all zero register.
@ -101,7 +110,10 @@ module commit_stage #(
if (!exception_o.valid) begin
// we can definitely write the register file
// if the instruction is not committing anything the destination
we_o[0] = 1'b1;
if (is_rd_fpr(commit_instr_i[0].op))
we_fpr_o[0] = 1'b1;
else
we_gpr_o[0] = 1'b1;
// check whether the instruction we retire was a store
// do not commit the instruction if we got an exception since the store buffer will be cleared
@ -113,6 +125,14 @@ module commit_stage #(
else // if the LSU buffer is not ready - do not commit, wait
commit_ack_o[0] = 1'b0;
end
// ---------
// FPU Flags
// ---------
if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin
// write the CSR with potential exception flags from retiring floating point instruction
csr_wdata_o = {59'b0, commit_instr_i[0].ex.cause[4:0]};
csr_write_fflags_o = 1'b1;
end
end
// ---------
@ -158,13 +178,13 @@ module commit_stage #(
// ------------------
// AMO
// ------------------
if (instr_0_is_amo && !commit_instr_i[0].ex.valid) begin
if (RVA && instr_0_is_amo && !commit_instr_i[0].ex.valid) begin
// AMO finished
commit_ack_o[0] = amo_resp_i.ack;
// flush the pipeline
flush_commit_o = amo_resp_i.ack;
amo_valid_commit_o = 1'b1;
we_o[0] = amo_resp_i.ack;
we_gpr_o[0] = amo_resp_i.ack;
end
end
@ -180,11 +200,27 @@ module commit_stage #(
&& !instr_0_is_amo
&& !single_step_i) begin
// only if the first instruction didn't throw an exception and this instruction won't throw an exception
// and the operator is of type ALU, LOAD, CTRL_FLOW, MULT
// and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC
if (!exception_o.valid && !commit_instr_i[1].ex.valid
&& (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT})) begin
we_o[1] = 1'b1;
&& (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin
if (is_rd_fpr(commit_instr_i[1].op))
we_fpr_o[1] = 1'b1;
else
we_gpr_o[1] = 1'b1;
commit_ack_o[1] = 1'b1;
// additionally check if we are retiring an FPU instruction because we need to make sure that we write all
// exception flags
if (commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin
if (csr_write_fflags_o)
csr_wdata_o = {59'b0, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])};
else
csr_wdata_o = {59'b0, commit_instr_i[1].ex.cause[4:0]};
csr_write_fflags_o = 1'b1;
end
end
end
end

@ -1 +1 @@
Subproject commit 9278bc769f3efd006864a7ef7721f2796ed968e6
Subproject commit 21a060d2c2c75173312b82cc72db96a2c62e66c5

View file

@ -21,10 +21,10 @@ import ariane_pkg::*;
module compressed_decoder
(
input logic [31:0] instr_i,
output logic [31:0] instr_o,
output logic illegal_instr_o,
output logic is_compressed_o
input logic [31:0] instr_i,
output logic [31:0] instr_o,
output logic illegal_instr_o,
output logic is_compressed_o
);
// -------------------
@ -36,33 +36,46 @@ module compressed_decoder
is_compressed_o = 1'b1;
instr_o = instr_i;
// I: | imm[11:0] | rs1 | funct3 | rd | opcode |
// S: | imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode |
unique case (instr_i[1:0])
// C0
2'b00: begin
riscv::OpcodeC0: begin
unique case (instr_i[15:13])
3'b000: begin
riscv::OpcodeC0Addi4spn: begin
// c.addi4spn -> addi rd', x2, imm
instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], riscv::OpcodeOpimm};
instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], riscv::OpcodeOpImm};
if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1;
end
3'b010: begin
riscv::OpcodeC0Fld: begin
// c.fld -> fld rd', imm(rs1')
// CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 |
instr_o = {4'b0, instr_i[6:5], instr_i[12:10], 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], riscv::OpcodeLoadFp};
end
riscv::OpcodeC0Lw: begin
// c.lw -> lw rd', imm(rs1')
instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6], 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], riscv::OpcodeLoad};
end
3'b011: begin
riscv::OpcodeC0Ld: begin
// c.ld -> ld rd', imm(rs1')
// | imm[11:0] | rs1 | funct3 | rd | opcode|
// CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 |
instr_o = {4'b0, instr_i[6:5], instr_i[12:10], 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], riscv::OpcodeLoad};
end
3'b110: begin
riscv::OpcodeC0Fsd: begin
// c.fsd -> fsd rs2', imm(rs1')
instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, riscv::OpcodeStoreFp};
end
riscv::OpcodeC0Sw: begin
// c.sw -> sw rs2', imm(rs1')
instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6], 2'b00, riscv::OpcodeStore};
end
3'b111: begin
riscv::OpcodeC0Sd: begin
// c.sd -> sd rs2', imm(rs1')
instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, riscv::OpcodeStore};
end
@ -74,137 +87,143 @@ module compressed_decoder
end
// C1
2'b01: begin
unique case (instr_i[15:13])
3'b000: begin
// c.addi -> addi rd, rd, nzimm
// c.nop -> addi 0, 0, 0
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOpimm};
end
riscv::OpcodeC1: begin
unique case (instr_i[15:13])
riscv::OpcodeC1Addi: begin
// c.addi -> addi rd, rd, nzimm
// c.nop -> addi 0, 0, 0
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOpImm};
end
// c.addiw -> addiw rd, rd, nzimm for RV64
3'b001: begin
if (instr_i[11:7] != 5'h0) // only valid if the destination is not r0
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOpimm32};
else
illegal_instr_o = 1'b1;
end
// c.addiw -> addiw rd, rd, nzimm for RV64
riscv::OpcodeC1Addiw: begin
if (instr_i[11:7] != 5'h0) // only valid if the destination is not r0
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOpImm32};
else
illegal_instr_o = 1'b1;
end
riscv::OpcodeCJ: begin
// 101: c.j -> jal x0, imm
instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], {9 {instr_i[12]}}, 4'b0, ~instr_i[15], riscv::OpcodeJal};
end
riscv::OpcodeC1Li: begin
// c.li -> addi rd, x0, nzimm
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], riscv::OpcodeOpImm};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
3'b010: begin
// c.li -> addi rd, x0, nzimm
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], riscv::OpcodeOpimm};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
riscv::OpcodeC1LuiAddi16sp: begin
// c.lui -> lui rd, imm
instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], riscv::OpcodeLui};
3'b011: begin
// c.lui -> lui rd, imm
instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], riscv::OpcodeLui};
if (instr_i[11:7] == 5'h02) begin
// c.addi16sp -> addi x2, x2, nzimm
instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, riscv::OpcodeOpImm};
end else if (instr_i[11:7] == 5'b0) begin
illegal_instr_o = 1'b1;
end
if (instr_i[11:7] == 5'h02) begin
// c.addi16sp -> addi x2, x2, nzimm
instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, riscv::OpcodeOpimm};
end else if (instr_i[11:7] == 5'b0) begin
illegal_instr_o = 1'b1;
end
if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
end
if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
end
riscv::OpcodeC1MiscAlu: begin
unique case (instr_i[11:10])
2'b00,
2'b01: begin
// 00: c.srli -> srli rd, rd, shamt
// 01: c.srai -> srai rd, rd, shamt
instr_o = {1'b0, instr_i[10], 4'b0, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b101, 2'b01, instr_i[9:7], riscv::OpcodeOpImm};
// shamt field must be non-zero
if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
end
3'b100: begin
unique case (instr_i[11:10])
2'b00,
2'b01: begin
// 00: c.srli -> srli rd, rd, shamt
// 01: c.srai -> srai rd, rd, shamt
instr_o = {1'b0, instr_i[10], 4'b0, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b101, 2'b01, instr_i[9:7], riscv::OpcodeOpimm};
// shamt field must be non-zero
if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
end
2'b10: begin
// c.andi -> andi rd, rd, imm
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], riscv::OpcodeOpImm};
end
2'b10: begin
// c.andi -> andi rd, rd, imm
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], riscv::OpcodeOpimm};
end
2'b11: begin
unique case ({instr_i[12], instr_i[6:5]})
3'b000: begin
// c.sub -> sub rd', rd', rs2'
instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp};
end
2'b11: begin
unique case ({instr_i[12], instr_i[6:5]})
3'b000: begin
// c.sub -> sub rd', rd', rs2'
instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp};
end
3'b001: begin
// c.xor -> xor rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, 2'b01, instr_i[9:7], riscv::OpcodeOp};
end
3'b001: begin
// c.xor -> xor rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, 2'b01, instr_i[9:7], riscv::OpcodeOp};
end
3'b010: begin
// c.or -> or rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, 2'b01, instr_i[9:7], riscv::OpcodeOp};
end
3'b010: begin
// c.or -> or rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, 2'b01, instr_i[9:7], riscv::OpcodeOp};
end
3'b011: begin
// c.and -> and rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], riscv::OpcodeOp};
end
3'b011: begin
// c.and -> and rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], riscv::OpcodeOp};
end
3'b100: begin
// c.subw -> subw rd', rd', rs2'
instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp32};
end
3'b101: begin
// c.addw -> addw rd', rd', rs2'
instr_o = {2'b00, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp32};
end
3'b100: begin
// c.subw -> subw rd', rd', rs2'
instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp32};
end
3'b101: begin
// c.addw -> addw rd', rd', rs2'
instr_o = {2'b00, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp32};
end
3'b110,
3'b111: begin
// 100: c.subw
// 101: c.addw
illegal_instr_o = 1'b1;
instr_o = {16'b0, instr_i};
end
endcase
end
endcase
end
3'b110,
3'b111: begin
// 100: c.subw
// 101: c.addw
illegal_instr_o = 1'b1;
instr_o = {16'b0, instr_i[15:0]};
end
endcase
end
endcase
end
riscv::OpcodeC1J: begin
// 101: c.j -> jal x0, imm
instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], {9 {instr_i[12]}}, 4'b0, ~instr_i[15], riscv::OpcodeJal};
end
riscv::OpcodeCBeqz, riscv::OpcodeCBnez: begin
// 0: c.beqz -> beq rs1', x0, imm
// 1: c.bnez -> bne rs1', x0, imm
instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], instr_i[12], riscv::OpcodeBranch};
end
endcase
riscv::OpcodeC1Beqz, riscv::OpcodeC1Bnez: begin
// 0: c.beqz -> beq rs1', x0, imm
// 1: c.bnez -> bne rs1', x0, imm
instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], instr_i[12], riscv::OpcodeBranch};
end
endcase
end
// C2
2'b10: begin
riscv::OpcodeC2: begin
unique case (instr_i[15:13])
3'b000: begin
riscv::OpcodeC2Slli: begin
// c.slli -> slli rd, rd, shamt
instr_o = {6'b0, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], riscv::OpcodeOpimm};
instr_o = {6'b0, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], riscv::OpcodeOpImm};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; // register not x0
if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; // shift amount must be non zero
end
3'b010: begin
riscv::OpcodeC2Fldsp: begin
// c.fldsp -> fld rd, imm(x2)
instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02, 3'b011, instr_i[11:7], riscv::OpcodeLoadFp};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
riscv::OpcodeC2Lwsp: begin
// c.lwsp -> lw rd, imm(x2)
instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02, 3'b010, instr_i[11:7], riscv::OpcodeLoad};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
3'b011: begin
riscv::OpcodeC2Ldsp: begin
// c.ldsp -> ld rd, imm(x2)
instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02, 3'b011, instr_i[11:7], riscv::OpcodeLoad};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
3'b100: begin
riscv::OpcodeC2JalrMvAdd: begin
if (instr_i[12] == 1'b0) begin
// c.mv -> add rd/rs1, x0, rs2
instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], riscv::OpcodeOp};
@ -231,12 +250,17 @@ module compressed_decoder
end
end
3'b110: begin
riscv::OpcodeC2Fsdsp: begin
// c.fsdsp -> fsd rs2, imm(x2)
instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011, instr_i[11:10], 3'b000, riscv::OpcodeStoreFp};
end
riscv::OpcodeC2Swsp: begin
// c.swsp -> sw rs2, imm(x2)
instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010, instr_i[11:9], 2'b00, riscv::OpcodeStore};
end
3'b111: begin
riscv::OpcodeC2Sdsp: begin
// c.sdsp -> sd rs2, imm(x2)
instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011, instr_i[11:10], 3'b000, riscv::OpcodeStore};
end

View file

@ -23,13 +23,10 @@ module csr_buffer (
input fu_op operator_i,
input logic [63:0] operand_a_i,
input logic [63:0] operand_b_i,
input logic [TRANS_ID_BITS-1:0] trans_id_i, // transaction id, needed for WB
output logic csr_ready_o, // FU is ready e.g. not busy
input logic csr_valid_i, // Input is valid
output logic [TRANS_ID_BITS-1:0] csr_trans_id_o, // ID of scoreboard entry at which to write back
output logic [63:0] csr_result_o,
output logic csr_valid_o, // transaction id for which the output is the requested one
input logic commit_i, // commit the pending CSR OP
// to CSR file
@ -43,9 +40,6 @@ module csr_buffer (
} csr_reg_n, csr_reg_q;
// control logic, scoreboard signals
assign csr_trans_id_o = trans_id_i;
// CSR instructions for this post buffer are single cycle
assign csr_valid_o = csr_valid_i;
assign csr_result_o = operand_a_i;
assign csr_addr_o = csr_reg_q.csr_address;

View file

@ -21,7 +21,6 @@ module csr_regfile #(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic time_irq_i, // Timer threw a interrupt
// send a flush request out if a CSR with a side effect has changed (e.g. written)
output logic flush_o,
output logic halt_csr_o, // halt requested
@ -39,6 +38,8 @@ module csr_regfile #(
input logic [11:0] csr_addr_i, // Address of the register to read/write
input logic [63:0] csr_wdata_i, // Write data in
output logic [63:0] csr_rdata_o, // Read data out
input logic dirty_fp_state_i, // Mark the FP sate as dirty
input logic csr_write_fflags_i, // Write fflags register e.g.: we are retiring a floating point instruction
input logic [63:0] pc_i, // PC of instruction accessing the CSR
output exception_t csr_exception_o, // attempts to access a CSR without appropriate privilege
// level or to write a read-only register also
@ -48,6 +49,11 @@ module csr_regfile #(
output logic eret_o, // Return from exception, set the PC of epc_o
output logic [63:0] trap_vector_base_o, // Output base of exception vector, correct CSR is output (mtvec, stvec)
output riscv::priv_lvl_t priv_lvl_o, // Current privilege level the CPU is in
// FPU
output riscv::xs_t fs_o, // Floating point extension status
output logic [4:0] fflags_o, // Floating-Point Accured Exceptions
output logic [2:0] frm_o, // Floating-Point Dynamic Rounding Mode
output logic [6:0] fprec_o, // Floating-Point Precision Control
// MMU
output logic en_translation_o, // enable VA translation
output logic en_ld_st_translation_o, // enable VA translation for load and stores
@ -87,12 +93,14 @@ module csr_regfile #(
logic mret; // return from M-mode exception
logic sret; // return from S-mode exception
logic dret; // return from debug mode
// CSR write causes us to mark the FPU state as dirty
logic dirty_fp_state_csr;
riscv::csr_t csr_addr;
// ----------------
// Assignments
// ----------------
assign csr_addr = riscv::csr_t'(csr_addr_i);
assign fs_o = mstatus_q.fs;
// ----------------
// CSR Registers
// ----------------
@ -134,6 +142,8 @@ module csr_regfile #(
logic [63:0] cycle_q, cycle_d;
logic [63:0] instret_q, instret_d;
riscv::fcsr_t fcsr_q, fcsr_d;
// ----------------
// CSR Read logic
// ----------------
@ -146,6 +156,35 @@ module csr_regfile #(
if (csr_read) begin
case (csr_addr.address)
riscv::CSR_FFLAGS: begin
if (mstatus_q.fs == riscv::Off) begin
read_access_exception = 1'b1;
end else begin
csr_rdata = {59'b0, fcsr_q.fflags};
end
end
riscv::CSR_FRM: begin
if (mstatus_q.fs == riscv::Off) begin
read_access_exception = 1'b1;
end else begin
csr_rdata = {61'b0, fcsr_q.frm};
end
end
riscv::CSR_FCSR: begin
if (mstatus_q.fs == riscv::Off) begin
read_access_exception = 1'b1;
end else begin
csr_rdata = {56'b0, fcsr_q.frm, fcsr_q.fflags};
end
end
// non-standard extension
riscv::CSR_FTRAN: begin
if (mstatus_q.fs == riscv::Off) begin
read_access_exception = 1'b1;
end else begin
csr_rdata = {57'b0, fcsr_q.fprec};
end
end
// debug registers
riscv::CSR_DCSR: csr_rdata = {32'b0, dcsr_q};
riscv::CSR_DPC: csr_rdata = dpc_q;
@ -189,7 +228,7 @@ module csr_regfile #(
riscv::CSR_PMPCFG0: csr_rdata = pmpcfg0_q;
riscv::CSR_PMPADDR0: csr_rdata = pmpaddr0_q;
riscv::CSR_MVENDORID: csr_rdata = 64'b0; // not implemented
riscv::CSR_MARCHID: csr_rdata = 64'b0; // PULP, anonymous source (no allocated ID yet)
riscv::CSR_MARCHID: csr_rdata = ARIANE_MARCHID;
riscv::CSR_MIMPID: csr_rdata = 64'b0; // not implemented
riscv::CSR_MHARTID: csr_rdata = {53'b0, cluster_id_i[5:0], 1'b0, core_id_i[3:0]};
riscv::CSR_MCYCLE: csr_rdata = cycle_q;
@ -227,7 +266,7 @@ module csr_regfile #(
sapt = satp_q;
mip = csr_wdata & 64'h33;
instret = instret_q;
// only USIP, SSIP, UTIP, STIP are write-able
// only FCSR, USIP, SSIP, UTIP, STIP are write-able
eret_o = 1'b0;
flush_o = 1'b0;
@ -238,6 +277,8 @@ module csr_regfile #(
perf_we_o = 1'b0;
perf_data_o = 'b0;
fcsr_d = fcsr_q;
priv_lvl_d = priv_lvl_q;
debug_mode_d = debug_mode_q;
dcsr_d = dcsr_q;
@ -279,10 +320,51 @@ module csr_regfile #(
instret_d = instret_q;
en_ld_st_translation_d = en_ld_st_translation_q;
dirty_fp_state_csr = 1'b0;
// check for correct access rights and that we are writing
if (csr_we) begin
case (csr_addr.address)
// Floating-Point
riscv::CSR_FFLAGS: begin
if (mstatus_q.fs == riscv::Off) begin
update_access_exception = 1'b1;
end else begin
dirty_fp_state_csr = 1'b1;
fcsr_d.fflags = csr_wdata[4:0];
// this instruction has side-effects
flush_o = 1'b1;
end
end
riscv::CSR_FRM: begin
if (mstatus_q.fs == riscv::Off) begin
update_access_exception = 1'b1;
end else begin
dirty_fp_state_csr = 1'b1;
fcsr_d.frm = csr_wdata[2:0];
// this instruction has side-effects
flush_o = 1'b1;
end
end
riscv::CSR_FCSR: begin
if (mstatus_q.fs == riscv::Off) begin
update_access_exception = 1'b1;
end else begin
dirty_fp_state_csr = 1'b1;
fcsr_d[7:0] = csr_wdata[7:0]; // ignore writes to reserved space
// this instruction has side-effects
flush_o = 1'b1;
end
end
riscv::CSR_FTRAN: begin
if (mstatus_q.fs == riscv::Off) begin
update_access_exception = 1'b1;
end else begin
dirty_fp_state_csr = 1'b1;
fcsr_d.fprec = csr_wdata[6:0]; // ignore writes to reserved space
// this instruction has side-effects
flush_o = 1'b1;
end
end
// debug CSR
riscv::CSR_DCSR: begin
dcsr_d = csr_wdata[31:0];
@ -306,10 +388,13 @@ module csr_regfile #(
// also hardwire the registers for sstatus
mstatus_d.sxl = riscv::XLEN_64;
mstatus_d.uxl = riscv::XLEN_64;
// hardwired zero registers
mstatus_d.sd = 1'b0;
mstatus_d.xs = 2'b0;
mstatus_d.fs = 2'b0;
// hardwired extension registers
mstatus_d.sd = (&mstatus_q.xs) | (&mstatus_q.fs);
mstatus_d.xs = riscv::Off;
// hardwire to zero if floating point extension is not present
if (!FP_PRESENT) begin
mstatus_d.fs = riscv::Off;
end
mstatus_d.upie = 1'b0;
mstatus_d.uie = 1'b0;
// not all fields of mstatus can be written
@ -366,9 +451,11 @@ module csr_regfile #(
mstatus_d.sxl = riscv::XLEN_64;
mstatus_d.uxl = riscv::XLEN_64;
// hardwired zero registers
mstatus_d.sd = 1'b0;
mstatus_d.xs = 2'b0;
mstatus_d.fs = 2'b0;
mstatus_d.sd = (&mstatus_q.xs) | (&mstatus_q.fs);
mstatus_d.xs = riscv::Off;
if (!FP_PRESENT) begin
mstatus_d.fs = riscv::Off;
end
mstatus_d.upie = 1'b0;
mstatus_d.uie = 1'b0;
// this register has side-effects on other registers, flush the pipeline
@ -426,6 +513,16 @@ module csr_regfile #(
default: update_access_exception = 1'b1;
endcase
end
// mark the floating point extension register as dirty
if (FP_PRESENT && (dirty_fp_state_csr || dirty_fp_state_i)) begin
mstatus_d.fs = riscv::Dirty;
end
// write the floating point status register
if (csr_write_fflags_i)
fcsr_d.fflags = csr_wdata_i[4:0] | fcsr_q.fflags;
// ---------------------
// External Interrupts
// ---------------------
@ -541,9 +638,21 @@ module csr_regfile #(
end
// single step enable and we just retired an instruction
if (dcsr_q.step && (|commit_ack_i)) begin
// we saved the correct target address during execute
dpc_d = commit_instr_i[0].bp.predict_address;
if (dcsr_q.step && commit_ack_i[0]) begin
// valid CTRL flow change
if (commit_instr_i[0].fu == CTRL_FLOW) begin
// we saved the correct target address during execute
dpc_d = commit_instr_i[0].bp.predict_address;
// exception valid
end else if (ex_i.valid) begin
dpc_d = trap_vector_base_o;
// return from environment
end else if (eret_o) begin
dpc_d = epc_o;
// consecutive PC
end else begin
dpc_d = commit_instr_i[0].pc + (commit_instr_i[0].is_compressed ? 'h2 : 'h4);
end
debug_mode_d = 1'b1;
set_debug_pc_o = 1'b1;
dcsr_d.cause = dm::CauseSingleStep;
@ -807,6 +916,10 @@ module csr_regfile #(
assign csr_rdata_o = csr_rdata;
// in debug mode we execute with privilege level M
assign priv_lvl_o = (debug_mode_q) ? riscv::PRIV_LVL_M : priv_lvl_q;
// FPU outputs
assign fflags_o = fcsr_q.fflags;
assign frm_o = fcsr_q.frm;
assign fprec_o = fcsr_q.fprec;
// MMU outputs
assign satp_ppn_o = satp_q.ppn;
assign asid_o = satp_q.asid[ASID_WIDTH-1:0];
@ -829,6 +942,8 @@ module csr_regfile #(
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
priv_lvl_q <= riscv::PRIV_LVL_M;
// floating-point registers
fcsr_q <= 64'b0;
// debug signals
debug_mode_q <= 1'b0;
dcsr_q <= '0;
@ -866,6 +981,8 @@ module csr_regfile #(
wfi_q <= 1'b0;
end else begin
priv_lvl_q <= priv_lvl_d;
// floating-point registers
fcsr_q <= fcsr_d;
// debug signals
debug_mode_q <= debug_mode_d;
dcsr_q <= dcsr_d;

View file

@ -438,14 +438,26 @@ module dm_csrs #(
end
assign dmactive_o = dmcontrol_q.dmactive;
// if the PoR is set we want to re-set the other system as well
assign ndmreset_o = dmcontrol_q.ndmreset | (~rst_ni);
assign cmd_o = command_q;
assign progbuf_o = progbuf_q;
assign data_o = data_q;
assign resp_queue_pop = dmi_resp_ready_i & ~resp_queue_empty;
logic ndmreset_n;
// if the PoR is set we want to re-set the other system as well
rstgen_bypass i_rstgen_bypass (
.clk_i ( clk_i ),
.rst_ni ( ~(dmcontrol_q.ndmreset | ~rst_ni) ),
.rst_test_mode_ni ( rst_ni ),
.test_mode_i ( testmode_i ),
.rst_no ( ndmreset_n ),
.init_no () // keep open
);
assign ndmreset_o = ~ndmreset_n;
// response FIFO
fifo_v2 #(
.dtype ( logic [31:0] ),
@ -468,9 +480,19 @@ module dm_csrs #(
always_ff @(posedge clk_i or negedge rst_ni) begin
// PoR
if (~rst_ni) begin
dmcontrol_q <= '0;
havereset_q <= '1;
dmcontrol_q <= '0;
havereset_q <= '1;
// this is the only write-able bit during reset
cmderr_q <= dm::CmdErrNone;
command_q <= '0;
abstractauto_q <= '0;
progbuf_q <= '0;
data_q <= '0;
sbcs_q <= '0;
sbaddr_q <= '0;
sbdata_q <= '0;
end else begin
havereset_q <= havereset_d;
// synchronous re-set of debug module, active-low, except for dmactive
if (!dmcontrol_q.dmactive) begin
dmcontrol_q.haltreq <= '0;
@ -495,7 +517,6 @@ module dm_csrs #(
sbaddr_q <= '0;
sbdata_q <= '0;
end else begin
havereset_q <= havereset_d;
dmcontrol_q <= dmcontrol_d;
cmderr_q <= cmderr_d;
command_q <= command_d;
@ -508,4 +529,4 @@ module dm_csrs #(
end
end
end
endmodule
endmodule

View file

@ -20,7 +20,7 @@ module dm_mem #(
parameter int NrHarts = -1
)(
input logic clk_i, // Clock
input logic dmactive_i, // debug module reset
input logic rst_ni, // debug module reset
output logic [NrHarts-1:0] debug_req_o,
input logic [19:0] hartsel_i,
@ -363,8 +363,8 @@ module dm_mem #(
// the ROM base address
assign fwd_rom_d = (addr_i[DbgAddressBits-1:0] >= dm::HaltAddress[DbgAddressBits-1:0]) ? 1'b1 : 1'b0;
always_ff @(posedge clk_i) begin
if (~dmactive_i) begin
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
fwd_rom_q <= 1'b0;
rdata_q <= '0;
halted_q <= 1'b0;

View file

@ -18,6 +18,7 @@
module dm_sba (
input logic clk_i, // Clock
input logic rst_ni,
input logic dmactive_i, // synchronous reset active low
AXI_BUS.Master axi_master,
@ -111,7 +112,7 @@ module dm_sba (
end
endcase
// handle error case
if (sbaccess_i > 3 && state_d != Idle) begin
if (sbaccess_i > 3 && state_q != Idle) begin
req = 1'b0;
state_d = Idle;
sberror_valid_o = 1'b1;
@ -120,35 +121,36 @@ module dm_sba (
// further error handling should go here ...
end
always_ff @(posedge clk_i) begin
if (~dmactive_i) begin
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= Idle;
end else begin
state_q <= state_d;
end
end
axi_adapter #(
.DATA_WIDTH ( 64 )
.DATA_WIDTH ( 64 )
) i_axi_master (
.clk_i ( clk_i ),
.rst_ni ( dmactive_i ),
.req_i ( req ),
.type_i ( std_cache_pkg::SINGLE_REQ),
.gnt_o ( gnt ),
.gnt_id_o ( ),
.addr_i ( address ),
.we_i ( we ),
.wdata_i ( sbdata_i ),
.be_i ( be ),
.size_i ( sbaccess_i[1:0] ),
.id_i ( '0 ),
.valid_o ( sbdata_valid_o ),
.rdata_o ( sbdata_o ),
.id_o ( ),
.critical_word_o ( ), // not needed here
.critical_word_valid_o ( ), // not needed here
.axi ( axi_master )
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( req ),
.type_i ( std_cache_pkg::SINGLE_REQ ),
.gnt_o ( gnt ),
.gnt_id_o ( ),
.addr_i ( address ),
.we_i ( we ),
.wdata_i ( sbdata_i ),
.be_i ( be ),
.size_i ( sbaccess_i[1:0] ),
.id_i ( '0 ),
.valid_o ( sbdata_valid_o ),
.rdata_o ( sbdata_o ),
.id_o ( ),
.critical_word_o ( ), // not needed here
.critical_word_valid_o ( ), // not needed here
.axi ( axi_master )
);

View file

@ -143,6 +143,7 @@ module dm_top #(
dm_sba i_dm_sba (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.dmactive_i ( dmactive_o ),
.axi_master,
.sbaddress_i ( sbaddress_csrs_sba ),
@ -166,7 +167,7 @@ module dm_top #(
.NrHarts (NrHarts)
) i_dm_mem (
.clk_i ( clk_i ),
.dmactive_i ( dmactive_o ),
.rst_ni ( rst_ni ),
.debug_req_o ( debug_req_o ),
.hartsel_i ( hartsel ),
.haltreq_i ( haltreq ),
@ -197,7 +198,7 @@ module dm_top #(
.AXI_USER_WIDTH ( AxiUserWidth )
) i_axi2mem (
.clk_i ( clk_i ),
.rst_ni ( dmactive_o ),
.rst_ni ( rst_ni ),
.slave ( axi_slave ),
.req_o ( req ),
.we_o ( we ),

View file

@ -19,9 +19,9 @@
module dmi_jtag (
input logic clk_i, // DMI Clock
input logic rst_ni, // Asynchronous reset active low
input logic testmode_i,
output logic dmi_rst_no, // hard reset
output dm::dmi_req_t dmi_req_o,
output logic dmi_req_valid_o,
input logic dmi_req_ready_i,
@ -37,7 +37,7 @@ module dmi_jtag (
output logic td_o, // JTAG test data output pad
output logic tdo_oe_o // Data out output enable
);
assign dmi_rst_no = 1'b1;
assign dmi_rst_no = rst_ni;
logic test_logic_reset;
logic shift_dr;
@ -218,6 +218,7 @@ module dmi_jtag (
.td_i,
.td_o,
.tdo_oe_o,
.testmode_i ( testmode_i ),
.test_logic_reset_o ( test_logic_reset ),
.shift_dr_o ( shift_dr ),
.update_dr_o ( update_dr ),

View file

@ -25,6 +25,7 @@ module dmi_jtag_tap #(
input logic td_i, // JTAG test data input pad
output logic td_o, // JTAG test data output pad
output logic tdo_oe_o, // Data out output enable
input logic testmode_i,
output logic test_logic_reset_o,
output logic shift_dr_o,
output logic update_dr_o,
@ -207,8 +208,23 @@ module dmi_jtag_tap #(
end
// TDO changes state at negative edge of TCK
always_ff @(negedge tck_i, negedge trst_ni) begin
// DFT
logic tck_n, tck_ni;
cluster_clock_inverter i_tck_inv (
.clk_i ( tck_i ),
.clk_o ( tck_ni )
);
pulp_clock_mux2 i_dft_tck_mux (
.clk0_i ( tck_ni ),
.clk1_i ( tck_i ), // bypass the inverted clock for testing
.clk_sel_i ( testmode_i ),
.clk_o ( tck_n )
);
// TDO changes state at negative edge of TCK
always_ff @(posedge tck_n, negedge trst_ni) begin
if (~trst_ni) begin
td_o <= 1'b0;
tdo_oe_o <= 1'b0;

View file

@ -30,6 +30,8 @@ module decoder (
// From CSR
input riscv::priv_lvl_t priv_lvl_i, // current privilege level
input logic debug_mode_i, // we are in debug mode
input riscv::xs_t fs_i, // floating point extension status
input logic [2:0] frm_i, // floating-point dynamic rounding mode
input logic tvm_i, // trap virtual memory
input logic tw_i, // timeout wait
input logic tsr_i, // trap sret
@ -41,13 +43,15 @@ module decoder (
logic ecall;
// this instruction is a software break-point
logic ebreak;
// this instruction needs floating-point rounding-mode verification
logic check_fprm;
riscv::instruction_t instr;
assign instr = riscv::instruction_t'(instruction_i);
// --------------------
// Immediate select
// --------------------
enum logic[3:0] {
NOIMM, IIMM, SIMM, SBIMM, UIMM, JIMM
NOIMM, IIMM, SIMM, SBIMM, UIMM, JIMM, RS3
} imm_select;
logic [63:0] imm_i_type;
@ -63,6 +67,7 @@ module decoder (
is_control_flow_instr_o = 1'b0;
illegal_instr = 1'b0;
instruction_o.pc = pc_i;
instruction_o.trans_id = 5'b0;
instruction_o.fu = NONE;
instruction_o.op = ADD;
instruction_o.rs1 = '0;
@ -75,6 +80,7 @@ module decoder (
instruction_o.bp = branch_predict_i;
ecall = 1'b0;
ebreak = 1'b0;
check_fprm = 1'b0;
if (~ex_i.valid) begin
case (instr.rtype.opcode)
@ -208,7 +214,7 @@ module decoder (
endcase
end
// Memory ordering instructions
riscv::OpcodeFence: begin
riscv::OpcodeMiscMem: begin
instruction_o.fu = CSR;
instruction_o.rs1 = '0;
instruction_o.rs2 = '0;
@ -235,35 +241,268 @@ module decoder (
// Reg-Reg Operations
// --------------------------
riscv::OpcodeOp: begin
instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU;
instruction_o.rs1[4:0] = instr.rtype.rs1;
instruction_o.rs2[4:0] = instr.rtype.rs2;
instruction_o.rd[4:0] = instr.rtype.rd;
// --------------------------------------------
// Vectorial Floating-Point Reg-Reg Operations
// --------------------------------------------
if (instr.rvftype.funct2 == 2'b10) begin // Prefix 10 for all Xfvec ops
// only generate decoder if FP extensions are enabled (static)
if (FP_PRESENT && XFVEC && fs_i != riscv::Off) begin
automatic logic allow_replication; // control honoring of replication flag
unique case ({instr.rtype.funct7, instr.rtype.funct3})
{7'b000_0000, 3'b000}: instruction_o.op = ADD; // Add
{7'b010_0000, 3'b000}: instruction_o.op = SUB; // Sub
{7'b000_0000, 3'b010}: instruction_o.op = SLTS; // Set Lower Than
{7'b000_0000, 3'b011}: instruction_o.op = SLTU; // Set Lower Than Unsigned
{7'b000_0000, 3'b100}: instruction_o.op = XORL; // Xor
{7'b000_0000, 3'b110}: instruction_o.op = ORL; // Or
{7'b000_0000, 3'b111}: instruction_o.op = ANDL; // And
{7'b000_0000, 3'b001}: instruction_o.op = SLL; // Shift Left Logical
{7'b000_0000, 3'b101}: instruction_o.op = SRL; // Shift Right Logical
{7'b010_0000, 3'b101}: instruction_o.op = SRA; // Shift Right Arithmetic
// Multiplications
{7'b000_0001, 3'b000}: instruction_o.op = MUL;
{7'b000_0001, 3'b001}: instruction_o.op = MULH;
{7'b000_0001, 3'b010}: instruction_o.op = MULHSU;
{7'b000_0001, 3'b011}: instruction_o.op = MULHU;
{7'b000_0001, 3'b100}: instruction_o.op = DIV;
{7'b000_0001, 3'b101}: instruction_o.op = DIVU;
{7'b000_0001, 3'b110}: instruction_o.op = REM;
{7'b000_0001, 3'b111}: instruction_o.op = REMU;
default: begin
instruction_o.fu = FPU_VEC; // Same unit, but sets 'vectorial' signal
instruction_o.rs1[4:0] = instr.rvftype.rs1;
instruction_o.rs2[4:0] = instr.rvftype.rs2;
instruction_o.rd[4:0] = instr.rvftype.rd;
check_fprm = 1'b1;
allow_replication = 1'b1;
// decode vectorial FP instruction
unique case (instr.rvftype.vecfltop)
5'b00001 : begin
instruction_o.op = FADD; // vfadd.vfmt - Vectorial FP Addition
instruction_o.rs1 = '0; // Operand A is set to 0
instruction_o.rs2 = instr.rvftype.rs1; // Operand B is set to rs1
imm_select = IIMM; // Operand C is set to rs2
end
5'b00010 : begin
instruction_o.op = FSUB; // vfsub.vfmt - Vectorial FP Subtraction
instruction_o.rs1 = '0; // Operand A is set to 0
instruction_o.rs2 = instr.rvftype.rs1; // Operand B is set to rs1
imm_select = IIMM; // Operand C is set to rs2
end
5'b00011 : instruction_o.op = FMUL; // vfmul.vfmt - Vectorial FP Multiplication
5'b00100 : instruction_o.op = FDIV; // vfdiv.vfmt - Vectorial FP Division
5'b00101 : begin
instruction_o.op = VFMIN; // vfmin.vfmt - Vectorial FP Minimum
check_fprm = 1'b0; // rounding mode irrelevant
end
5'b00110 : begin
instruction_o.op = VFMAX; // vfmax.vfmt - Vectorial FP Maximum
check_fprm = 1'b0; // rounding mode irrelevant
end
5'b00111 : begin
instruction_o.op = FSQRT; // vfsqrt.vfmt - Vectorial FP Square Root
allow_replication = 1'b0; // only one operand
if (instr.rvftype.rs2 != 5'b00000) illegal_instr = 1'b1; // rs2 must be 0
end
5'b01000 : begin
instruction_o.op = FMADD; // vfmac.vfmt - Vectorial FP Multiply-Accumulate
imm_select = SIMM; // rd into result field (upper bits don't matter)
end
5'b01001 : begin
instruction_o.op = FMSUB; // vfmre.vfmt - Vectorial FP Multiply-Reduce
imm_select = SIMM; // rd into result field (upper bits don't matter)
end
5'b01100 : begin
unique case (instr.rvftype.rs2) inside // operation encoded in rs2, `inside` for matching ?
5'b00000 : begin
instruction_o.rs2 = instr.rvftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit
if (instr.rvftype.repl)
instruction_o.op = FMV_F2X; // vfmv.x.vfmt - FPR to GPR Move
else
instruction_o.op = FMV_X2F; // vfmv.vfmt.x - GPR to FPR Move
check_fprm = 1'b0; // no rounding for moves
end
5'b00001 : begin
instruction_o.op = FCLASS; // vfclass.vfmt - Vectorial FP Classify
check_fprm = 1'b0; // no rounding for classification
allow_replication = 1'b0; // R must not be set
end
5'b00010 : instruction_o.op = FCVT_F2I; // vfcvt.x.vfmt - Vectorial FP to Int Conversion
5'b00011 : instruction_o.op = FCVT_I2F; // vfcvt.vfmt.x - Vectorial Int to FP Conversion
5'b001?? : begin
instruction_o.op = FCVT_F2F; // vfcvt.vfmt.vfmt - Vectorial FP to FP Conversion
instruction_o.rs2 = instr.rvftype.rd; // set rs2 = rd as target vector for conversion
imm_select = IIMM; // rs2 holds part of the intruction
// TODO CHECK R bit for valid fmt combinations
// determine source format
unique case (instr.rvftype.rs2[21:20])
// Only process instruction if corresponding extension is active (static)
2'b00: if (~RVFVEC) illegal_instr = 1'b1;
2'b01: if (~XF16ALTVEC) illegal_instr = 1'b1;
2'b10: if (~XF16VEC) illegal_instr = 1'b1;
2'b11: if (~XF8VEC) illegal_instr = 1'b1;
default : illegal_instr = 1'b1;
endcase
end
default : illegal_instr = 1'b1;
endcase
end
5'b01101 : begin
check_fprm = 1'b0; // no rounding for sign-injection
instruction_o.op = VFSGNJ; // vfsgnj.vfmt - Vectorial FP Sign Injection
end
5'b01110 : begin
check_fprm = 1'b0; // no rounding for sign-injection
instruction_o.op = VFSGNJN; // vfsgnjn.vfmt - Vectorial FP Negated Sign Injection
end
5'b01111 : begin
check_fprm = 1'b0; // no rounding for sign-injection
instruction_o.op = VFSGNJX; // vfsgnjx.vfmt - Vectorial FP XORed Sign Injection
end
5'b10000 : begin
check_fprm = 1'b0; // no rounding for comparisons
instruction_o.op = VFEQ; // vfeq.vfmt - Vectorial FP Equality
end
5'b10001 : begin
check_fprm = 1'b0; // no rounding for comparisons
instruction_o.op = VFNE; // vfne.vfmt - Vectorial FP Non-Equality
end
5'b10010 : begin
check_fprm = 1'b0; // no rounding for comparisons
instruction_o.op = VFLT; // vfle.vfmt - Vectorial FP Less Than
end
5'b10011 : begin
check_fprm = 1'b0; // no rounding for comparisons
instruction_o.op = VFGE; // vfge.vfmt - Vectorial FP Greater or Equal
end
5'b10100 : begin
check_fprm = 1'b0; // no rounding for comparisons
instruction_o.op = VFLE; // vfle.vfmt - Vectorial FP Less or Equal
end
5'b10101 : begin
check_fprm = 1'b0; // no rounding for comparisons
instruction_o.op = VFGT; // vfgt.vfmt - Vectorial FP Greater Than
end
5'b11000 : begin
instruction_o.op = VFCPKAB_S; // vfcpka/b.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, lowest 4 entries
imm_select = SIMM; // rd into result field (upper bits don't matter)
if (~RVF) illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32
// check destination format
unique case (instr.rvftype.vfmt)
// Only process instruction if corresponding extension is active and FLEN suffices (static)
2'b00: begin
if (~RVFVEC) illegal_instr = 1'b1; // destination vector not supported
if (instr.rvftype.repl) illegal_instr = 1'b1; // no entries 2/3 in vector of 2 fp32
end
2'b01: begin
if (~XF16ALTVEC) illegal_instr = 1'b1; // destination vector not supported
end
2'b10: begin
if (~XF16VEC) illegal_instr = 1'b1; // destination vector not supported
end
2'b11: begin
if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported
end
default : illegal_instr = 1'b1;
endcase
end
5'b11001 : begin
instruction_o.op = VFCPKCD_S; // vfcpkc/d.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, second 4 entries
imm_select = SIMM; // rd into result field (upper bits don't matter)
if (~RVF) illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32
// check destination format
unique case (instr.rvftype.vfmt)
// Only process instruction if corresponding extension is active and FLEN suffices (static)
2'b00: illegal_instr = 1'b1; // no entries 4-7 in vector of 2 FP32
2'b01: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16ALT
2'b10: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16
2'b11: begin
if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported
end
default : illegal_instr = 1'b1;
endcase
end
5'b11010 : begin
instruction_o.op = VFCPKAB_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, lowest 4 entries
imm_select = SIMM; // rd into result field (upper bits don't matter)
if (~RVD) illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64
// check destination format
unique case (instr.rvftype.vfmt)
// Only process instruction if corresponding extension is active and FLEN suffices (static)
2'b00: begin
if (~RVFVEC) illegal_instr = 1'b1; // destination vector not supported
if (instr.rvftype.repl) illegal_instr = 1'b1; // no entries 2/3 in vector of 2 fp32
end
2'b01: begin
if (~XF16ALTVEC) illegal_instr = 1'b1; // destination vector not supported
end
2'b10: begin
if (~XF16VEC) illegal_instr = 1'b1; // destination vector not supported
end
2'b11: begin
if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported
end
default : illegal_instr = 1'b1;
endcase
end
5'b11011 : begin
instruction_o.op = VFCPKCD_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, second 4 entries
imm_select = SIMM; // rd into result field (upper bits don't matter)
if (~RVD) illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64
// check destination format
unique case (instr.rvftype.vfmt)
// Only process instruction if corresponding extension is active and FLEN suffices (static)
2'b00: illegal_instr = 1'b1; // no entries 4-7 in vector of 2 FP32
2'b01: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16ALT
2'b10: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16
2'b11: begin
if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported
end
default : illegal_instr = 1'b1;
endcase
end
default : illegal_instr = 1'b1;
endcase
// check format
unique case (instr.rvftype.vfmt)
// Only process instruction if corresponding extension is active (static)
2'b00: if (~RVFVEC) illegal_instr = 1'b1;
2'b01: if (~XF16ALTVEC) illegal_instr = 1'b1;
2'b10: if (~XF16VEC) illegal_instr = 1'b1;
2'b11: if (~XF8VEC) illegal_instr = 1'b1;
default: illegal_instr = 1'b1;
endcase
// check disallowed replication
if (~allow_replication & instr.rvftype.repl) illegal_instr = 1'b1;
// check rounding mode
if (check_fprm) begin
unique case (frm_i) inside // actual rounding mode from frm csr
[3'b000:3'b100]: ; //legal rounding modes
default : illegal_instr = 1'b1;
endcase
end
end else begin // No vectorial FP enabled (static)
illegal_instr = 1'b1;
end
endcase
// ---------------------------
// Integer Reg-Reg Operations
// ---------------------------
end else begin
instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU;
instruction_o.rs1 = instr.rtype.rs1;
instruction_o.rs2 = instr.rtype.rs2;
instruction_o.rd = instr.rtype.rd;
unique case ({instr.rtype.funct7, instr.rtype.funct3})
{7'b000_0000, 3'b000}: instruction_o.op = ADD; // Add
{7'b010_0000, 3'b000}: instruction_o.op = SUB; // Sub
{7'b000_0000, 3'b010}: instruction_o.op = SLTS; // Set Lower Than
{7'b000_0000, 3'b011}: instruction_o.op = SLTU; // Set Lower Than Unsigned
{7'b000_0000, 3'b100}: instruction_o.op = XORL; // Xor
{7'b000_0000, 3'b110}: instruction_o.op = ORL; // Or
{7'b000_0000, 3'b111}: instruction_o.op = ANDL; // And
{7'b000_0000, 3'b001}: instruction_o.op = SLL; // Shift Left Logical
{7'b000_0000, 3'b101}: instruction_o.op = SRL; // Shift Right Logical
{7'b010_0000, 3'b101}: instruction_o.op = SRA; // Shift Right Arithmetic
// Multiplications
{7'b000_0001, 3'b000}: instruction_o.op = MUL;
{7'b000_0001, 3'b001}: instruction_o.op = MULH;
{7'b000_0001, 3'b010}: instruction_o.op = MULHSU;
{7'b000_0001, 3'b011}: instruction_o.op = MULHU;
{7'b000_0001, 3'b100}: instruction_o.op = DIV;
{7'b000_0001, 3'b101}: instruction_o.op = DIVU;
{7'b000_0001, 3'b110}: instruction_o.op = REM;
{7'b000_0001, 3'b111}: instruction_o.op = REMU;
default: begin
illegal_instr = 1'b1;
end
endcase
end
end
// --------------------------
@ -293,7 +532,7 @@ module decoder (
// --------------------------------
// Reg-Immediate Operations
// --------------------------------
riscv::OpcodeOpimm: begin
riscv::OpcodeOpImm: begin
instruction_o.fu = ALU;
imm_select = IIMM;
instruction_o.rs1[4:0] = instr.itype.rs1;
@ -327,7 +566,7 @@ module decoder (
// --------------------------------
// 32 bit Reg-Immediate Operations
// --------------------------------
riscv::OpcodeOpimm32: begin
riscv::OpcodeOpImm32: begin
instruction_o.fu = ALU;
imm_select = IIMM;
instruction_o.rs1[4:0] = instr.itype.rs1;
@ -390,6 +629,264 @@ module decoder (
endcase
end
// --------------------------------
// Floating-Point Load/store
// --------------------------------
riscv::OpcodeStoreFp: begin
if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static)
instruction_o.fu = STORE;
imm_select = SIMM;
instruction_o.rs1 = instr.stype.rs1;
instruction_o.rs2 = instr.stype.rs2;
// determine store size
unique case (instr.stype.funct3)
// Only process instruction if corresponding extension is active (static)
3'b000: if (XF8) instruction_o.op = FSB;
else illegal_instr = 1'b1;
3'b001: if (XF16 | XF16ALT) instruction_o.op = FSH;
else illegal_instr = 1'b1;
3'b010: if (RVF) instruction_o.op = FSW;
else illegal_instr = 1'b1;
3'b011: if (RVD) instruction_o.op = FSD;
else illegal_instr = 1'b1;
default: illegal_instr = 1'b1;
endcase
end else
illegal_instr = 1'b1;
end
riscv::OpcodeLoadFp: begin
if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static)
instruction_o.fu = LOAD;
imm_select = IIMM;
instruction_o.rs1 = instr.itype.rs1;
instruction_o.rd = instr.itype.rd;
// determine load size
unique case (instr.itype.funct3)
// Only process instruction if corresponding extension is active (static)
3'b000: if (XF8) instruction_o.op = FLB;
else illegal_instr = 1'b1;
3'b001: if (XF16 | XF16ALT) instruction_o.op = FLH;
else illegal_instr = 1'b1;
3'b010: if (RVF) instruction_o.op = FLW;
else illegal_instr = 1'b1;
3'b011: if (RVD) instruction_o.op = FLD;
else illegal_instr = 1'b1;
default: illegal_instr = 1'b1;
endcase
end else
illegal_instr = 1'b1;
end
// ----------------------------------
// Floating-Point Reg-Reg Operations
// ----------------------------------
riscv::OpcodeMadd,
riscv::OpcodeMsub,
riscv::OpcodeNmsub,
riscv::OpcodeNmadd: begin
if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static)
instruction_o.fu = FPU;
instruction_o.rs1 = instr.r4type.rs1;
instruction_o.rs2 = instr.r4type.rs2;
instruction_o.rd = instr.r4type.rd;
imm_select = RS3; // rs3 into result field
check_fprm = 1'b1;
// select the correct fused operation
unique case (instr.r4type.opcode)
default: instruction_o.op = FMADD; // fmadd.fmt - FP Fused multiply-add
riscv::OpcodeMsub: instruction_o.op = FMSUB; // fmsub.fmt - FP Fused multiply-subtract
riscv::OpcodeNmsub: instruction_o.op = FNMSUB; // fnmsub.fmt - FP Negated fused multiply-subtract
riscv::OpcodeNmadd: instruction_o.op = FNMADD; // fnmadd.fmt - FP Negated fused multiply-add
endcase
// determine fp format
unique case (instr.r4type.funct2)
// Only process instruction if corresponding extension is active (static)
2'b00: if (~RVF) illegal_instr = 1'b1;
2'b01: if (~RVD) illegal_instr = 1'b1;
2'b10: if (~XF16 & ~XF16ALT) illegal_instr = 1'b1;
2'b11: if (~XF8) illegal_instr = 1'b1;
default: illegal_instr = 1'b1;
endcase
// check rounding mode
if (check_fprm) begin
unique case (instr.rftype.rm) inside
[3'b000:3'b100]: ; //legal rounding modes
3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101
if (~XF16ALT || instr.rftype.fmt != 2'b10)
illegal_instr = 1'b1;
unique case (frm_i) inside // actual rounding mode from frm csr
[3'b000:3'b100]: ; //legal rounding modes
default : illegal_instr = 1'b1;
endcase
end
3'b111: begin
// rounding mode from frm csr
unique case (frm_i) inside
[3'b000:3'b100]: ; //legal rounding modes
default : illegal_instr = 1'b1;
endcase
end
default : illegal_instr = 1'b1;
endcase
end
end else begin
illegal_instr = 1'b1;
end
end
riscv::OpcodeOpFp: begin
if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static)
instruction_o.fu = FPU;
instruction_o.rs1 = instr.rftype.rs1;
instruction_o.rs2 = instr.rftype.rs2;
instruction_o.rd = instr.rftype.rd;
check_fprm = 1'b1;
// decode FP instruction
unique case (instr.rftype.funct5)
5'b00000: begin
instruction_o.op = FADD; // fadd.fmt - FP Addition
instruction_o.rs1 = '0; // Operand A is set to 0
instruction_o.rs2 = instr.rftype.rs1; // Operand B is set to rs1
imm_select = IIMM; // Operand C is set to rs2
end
5'b00001: begin
instruction_o.op = FSUB; // fsub.fmt - FP Subtraction
instruction_o.rs1 = '0; // Operand A is set to 0
instruction_o.rs2 = instr.rftype.rs1; // Operand B is set to rs1
imm_select = IIMM; // Operand C is set to rs2
end
5'b00010: instruction_o.op = FMUL; // fmul.fmt - FP Multiplication
5'b00011: instruction_o.op = FDIV; // fdiv.fmt - FP Division
5'b01011: begin
instruction_o.op = FSQRT; // fsqrt.fmt - FP Square Root
// rs2 must be zero
if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1;
end
5'b00100: begin
instruction_o.op = FSGNJ; // fsgn{j[n]/jx}.fmt - FP Sign Injection
check_fprm = 1'b0; // instruction encoded in rm, do the check here
if (XF16ALT) begin // FP16ALT instructions encoded in rm separately (static)
if (!(instr.rftype.rm inside {[3'b000:3'b010], [3'b100:3'b110]}))
illegal_instr = 1'b1;
end else begin
if (!(instr.rftype.rm inside {[3'b000:3'b010]}))
illegal_instr = 1'b1;
end
end
5'b00101: begin
instruction_o.op = FMIN_MAX; // fmin/fmax.fmt - FP Minimum / Maximum
check_fprm = 1'b0; // instruction encoded in rm, do the check here
if (XF16ALT) begin // FP16ALT instructions encoded in rm separately (static)
if (!(instr.rftype.rm inside {[3'b000:3'b001], [3'b100:3'b101]}))
illegal_instr = 1'b1;
end else begin
if (!(instr.rftype.rm inside {[3'b000:3'b001]}))
illegal_instr = 1'b1;
end
end
5'b01000: begin
instruction_o.op = FCVT_F2F; // fcvt.fmt.fmt - FP to FP Conversion
instruction_o.rs2 = instr.rvftype.rs1; // tie rs2 to rs1 to be safe (vectors use rs2)
imm_select = IIMM; // rs2 holds part of the intruction
if (instr.rftype.rs2[24:23]) illegal_instr = 1'b1; // bits [22:20] used, other bits must be 0
// check source format
unique case (instr.rftype.rs2[22:20])
// Only process instruction if corresponding extension is active (static)
3'b000: if (~RVF) illegal_instr = 1'b1;
3'b001: if (~RVD) illegal_instr = 1'b1;
3'b010: if (~XF16) illegal_instr = 1'b1;
3'b110: if (~XF16ALT) illegal_instr = 1'b1;
3'b011: if (~XF8) illegal_instr = 1'b1;
default: illegal_instr = 1'b1;
endcase
end
5'b10100: begin
instruction_o.op = FCMP; // feq/flt/fle.fmt - FP Comparisons
check_fprm = 1'b0; // instruction encoded in rm, do the check here
if (XF16ALT) begin // FP16ALT instructions encoded in rm separately (static)
if (!(instr.rftype.rm inside {[3'b000:3'b010], [3'b100:3'b110]}))
illegal_instr = 1'b1;
end else begin
if (!(instr.rftype.rm inside {[3'b000:3'b010]}))
illegal_instr = 1'b1;
end
end
5'b11000: begin
instruction_o.op = FCVT_F2I; // fcvt.ifmt.fmt - FP to Int Conversion
imm_select = IIMM; // rs2 holds part of the instruction
if (instr.rftype.rs2[24:22]) illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0
end
5'b11010: begin
instruction_o.op = FCVT_I2F; // fcvt.fmt.ifmt - Int to FP Conversion
imm_select = IIMM; // rs2 holds part of the instruction
if (instr.rftype.rs2[24:22]) illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0
end
5'b11100: begin
instruction_o.rs2 = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit
check_fprm = 1'b0; // instruction encoded in rm, do the check here
if (instr.rftype.rm == 3'b000 || (XF16ALT && instr.rftype.rm == 3'b100)) // FP16ALT has separate encoding
instruction_o.op = FMV_F2X; // fmv.ifmt.fmt - FPR to GPR Move
else if (instr.rftype.rm == 3'b001 || (XF16ALT && instr.rftype.rm == 3'b101)) // FP16ALT has separate encoding
instruction_o.op = FCLASS; // fclass.fmt - FP Classify
else illegal_instr = 1'b1;
// rs2 must be zero
if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1;
end
5'b11110: begin
instruction_o.op = FMV_X2F; // fmv.fmt.ifmt - GPR to FPR Move
instruction_o.rs2 = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit
check_fprm = 1'b0; // instruction encoded in rm, do the check here
if (!(instr.rftype.rm == 3'b000 || (XF16ALT && instr.rftype.rm == 3'b100)))
illegal_instr = 1'b1;
// rs2 must be zero
if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1;
end
default : illegal_instr = 1'b1;
endcase
// check format
unique case (instr.rftype.fmt)
// Only process instruction if corresponding extension is active (static)
2'b00: if (~RVF) illegal_instr = 1'b1;
2'b01: if (~RVD) illegal_instr = 1'b1;
2'b10: if (~XF16 & ~XF16ALT) illegal_instr = 1'b1;
2'b11: if (~XF8) illegal_instr = 1'b1;
default: illegal_instr = 1'b1;
endcase
// check rounding mode
if (check_fprm) begin
unique case (instr.rftype.rm) inside
[3'b000:3'b100]: ; //legal rounding modes
3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101
if (~XF16ALT || instr.rftype.fmt != 2'b10)
illegal_instr = 1'b1;
unique case (frm_i) inside // actual rounding mode from frm csr
[3'b000:3'b100]: ; //legal rounding modes
default : illegal_instr = 1'b1;
endcase
end
3'b111: begin
// rounding mode from frm csr
unique case (frm_i) inside
[3'b000:3'b100]: ; //legal rounding modes
default : illegal_instr = 1'b1;
endcase
end
default : illegal_instr = 1'b1;
endcase
end
end else begin
illegal_instr = 1'b1;
end
end
// ----------------------------------
// Atomic Operations
// ----------------------------------
riscv::OpcodeAmo: begin
// we are going to use the load unit for AMOs
instruction_o.fu = STORE;
@ -398,7 +895,7 @@ module decoder (
instruction_o.rd[4:0] = instr.atype.rd;
// TODO(zarubaf): Ordering
// words
if (instr.stype.funct3 == 3'h2) begin
if (RVA && instr.stype.funct3 == 3'h2) begin
unique case (instr.instr[31:27])
5'h0: instruction_o.op = AMO_ADDW;
5'h1: instruction_o.op = AMO_SWAPW;
@ -417,7 +914,7 @@ module decoder (
default: illegal_instr = 1'b1;
endcase
// double words
end else if (instr.stype.funct3 == 3'h3) begin
end else if (RVA && instr.stype.funct3 == 3'h3) begin
unique case (instr.instr[31:27])
5'h0: instruction_o.op = AMO_ADDD;
5'h1: instruction_o.op = AMO_SWAPD;
@ -500,6 +997,7 @@ module decoder (
endcase
end
end
// --------------------------------
// Sign extend immediate
// --------------------------------
@ -511,7 +1009,7 @@ module decoder (
imm_uj_type = uj_imm(instruction_i);
imm_bi_type = { {59{instruction_i[24]}}, instruction_i[24:20] };
// NOIMM, IIMM, SIMM, BIMM, UIMM, JIMM
// NOIMM, IIMM, SIMM, BIMM, UIMM, JIMM, RS3
// select immediate
case (imm_select)
IIMM: begin
@ -534,6 +1032,11 @@ module decoder (
instruction_o.result = imm_uj_type;
instruction_o.use_imm = 1'b1;
end
RS3: begin
// result holds address of fp operand rs3
instruction_o.result = {59'b0, instr.r4type.rs3};
instruction_o.use_imm = 1'b0;
end
default: begin
instruction_o.result = 64'b0;
instruction_o.use_imm = 1'b0;

View file

@ -16,8 +16,8 @@
import ariane_pkg::*;
module ex_stage #(
parameter int ASID_WIDTH = 1
) (
parameter int ASID_WIDTH = 1
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
@ -37,17 +37,16 @@ module ex_stage #(
output logic alu_valid_o, // ALU result is valid
output logic [63:0] alu_result_o,
output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, // ID of scoreboard entry at which to write back
output exception_t alu_exception_o,
// Branches and Jumps
output logic branch_ready_o,
input logic branch_valid_i, // we are using the branch unit
output logic branch_valid_o, // the calculated branch target is valid
output logic [63:0] branch_result_o, // branch target address out
input branchpredict_sbe_t branch_predict_i, // branch prediction in
output logic [TRANS_ID_BITS-1:0] branch_trans_id_o,
output exception_t branch_exception_o, // branch unit detected an exception
input branchpredict_sbe_t branch_predict_i,
output branchpredict_t resolved_branch_o, // the branch engine uses the write back from the ALU
output logic resolve_branch_o, // to ID signaling that we resolved the branch
// CSR
input logic csr_valid_i,
output logic [11:0] csr_addr_o,
input logic csr_commit_i,
// LSU
output logic lsu_ready_o, // FU is ready
input logic lsu_valid_i, // Input is valid
@ -59,20 +58,23 @@ module ex_stage #(
output exception_t lsu_exception_o,
output logic no_st_pending_o,
input logic amo_valid_commit_i,
// CSR
output logic csr_ready_o,
input logic csr_valid_i,
output logic [TRANS_ID_BITS-1:0] csr_trans_id_o,
output logic [63:0] csr_result_o,
output logic csr_valid_o,
output logic [11:0] csr_addr_o,
input logic csr_commit_i,
// MULT
output logic mult_ready_o, // FU is ready
input logic mult_valid_i, // Output is valid
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o,
output logic [63:0] mult_result_o,
output logic mult_valid_o,
// FPU
output logic fpu_ready_o, // FU is ready
input logic fpu_valid_i, // Output is valid
input logic [1:0] fpu_fmt_i, // FP format
input logic [2:0] fpu_rm_i, // FP rm
input logic [2:0] fpu_frm_i, // FP frm csr
input logic [6:0] fpu_prec_i, // FP precision control
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
output logic [63:0] fpu_result_o,
output logic fpu_valid_o,
output exception_t fpu_exception_o,
// Memory Management
input logic enable_translation_i,
@ -104,50 +106,153 @@ module ex_stage #(
// -----
// ALU
// -----
alu alu_i (
.result_o ( alu_result_o ),
.alu_branch_res_o ( alu_branch_res ),
.*
);
fu_data_t alu_data;
assign alu_data.operator = (alu_valid_i | branch_valid_i | csr_valid_i) ? operator_i : ADD;
assign alu_data.operand_a = (alu_valid_i | branch_valid_i | csr_valid_i) ? operand_a_i : '0;
assign alu_data.operand_b = (alu_valid_i | branch_valid_i | csr_valid_i) ? operand_b_i : '0;
assign alu_data.imm = (alu_valid_i | branch_valid_i | csr_valid_i) ? imm_i : '0;
// --------------------
// Branch Engine
// --------------------
branch_unit branch_unit_i (
.fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i), // any functional unit is valid, check that there is no accidental mis-predict
.branch_comp_res_i ( alu_branch_res ),
.*
// fixed latency FUs
// TOOD(zarubaf) Re-name this module and re-factor ALU
alu alu_i (
.clk_i,
.rst_ni,
.flush_i,
.pc_i,
.trans_id_i,
.alu_valid_i,
.branch_valid_i,
.csr_valid_i ( csr_valid_i ),
.operator_i ( alu_data.operator ),
.operand_a_i ( alu_data.operand_a ),
.operand_b_i ( alu_data.operand_b ),
.imm_i ( alu_data.imm ),
.result_o ( alu_result_o ),
.alu_valid_o,
.alu_ready_o,
.alu_trans_id_o,
.alu_exception_o,
.fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i ),
.is_compressed_instr_i,
.branch_predict_i,
.resolved_branch_o,
.resolve_branch_o,
.commit_i ( csr_commit_i ),
.csr_addr_o ( csr_addr_o )
);
// ----------------
// Multiplication
// ----------------
fu_data_t mult_data;
assign mult_data.operator = mult_valid_i ? operator_i : MUL;
assign mult_data.operand_a = mult_valid_i ? operand_a_i : '0;
assign mult_data.operand_b = mult_valid_i ? operand_b_i : '0;
mult i_mult (
.result_o ( mult_result_o ),
.*
.clk_i,
.rst_ni,
.flush_i,
.trans_id_i,
.mult_valid_i,
.operator_i ( mult_data.operator ),
.operand_a_i ( mult_data.operand_a ),
.operand_b_i ( mult_data.operand_b ),
.result_o ( mult_result_o ),
.mult_valid_o,
.mult_ready_o,
.mult_trans_id_o
);
// ----------------
// FPU
// ----------------
generate
if (FP_PRESENT) begin : fpu_gen
fu_data_t fpu_data;
assign fpu_data.operator = fpu_valid_i ? operator_i : FSGNJ;
assign fpu_data.operand_a = fpu_valid_i ? operand_a_i : '0;
assign fpu_data.operand_b = fpu_valid_i ? operand_b_i : '0;
assign fpu_data.imm = fpu_valid_i ? imm_i : '0;
fpu_wrap fpu_i (
.clk_i,
.rst_ni,
.flush_i,
.trans_id_i,
.fu_i,
.fpu_valid_i,
.fpu_ready_o,
.operator_i ( fpu_data.operator ),
.operand_a_i ( fpu_data.operand_a[FLEN-1:0] ),
.operand_b_i ( fpu_data.operand_b[FLEN-1:0] ),
.operand_c_i ( fpu_data.imm[FLEN-1:0] ),
.fpu_fmt_i,
.fpu_rm_i,
.fpu_frm_i,
.fpu_prec_i,
.fpu_trans_id_o,
.result_o ( fpu_result_o ),
.fpu_valid_o,
.fpu_exception_o
);
end else begin : no_fpu_gen
assign fpu_ready_o = '0;
assign fpu_trans_id_o = '0;
assign fpu_result_o = '0;
assign fpu_valid_o = '0;
assign fpu_exception_o = '0;
end
endgenerate
// ----------------
// Load-Store Unit
// ----------------
fu_data_t lsu_data;
assign lsu_data.operator = lsu_valid_i ? operator_i : LD;
assign lsu_data.operand_a = lsu_valid_i ? operand_a_i : '0;
assign lsu_data.operand_b = lsu_valid_i ? operand_b_i : '0;
assign lsu_data.imm = lsu_valid_i ? imm_i : '0;
lsu lsu_i (
.commit_i ( lsu_commit_i ),
.commit_ready_o ( lsu_commit_ready_o ),
.dcache_req_ports_i,
.dcache_req_ports_o,
.amo_req_o,
.amo_resp_i,
.*
.clk_i ,
.rst_ni ,
.flush_i ,
.no_st_pending_o ,
.fu_i ,
.operator_i (lsu_data.operator ),
.operand_a_i (lsu_data.operand_a ),
.operand_b_i (lsu_data.operand_b ),
.imm_i (lsu_data.imm ),
.lsu_ready_o ,
.lsu_valid_i ,
.trans_id_i ,
.lsu_trans_id_o ,
.lsu_result_o ,
.lsu_valid_o ,
.commit_i (lsu_commit_i ),
.commit_ready_o (lsu_commit_ready_o ),
.enable_translation_i ,
.en_ld_st_translation_i ,
.icache_areq_i ,
.icache_areq_o ,
.priv_lvl_i ,
.ld_st_priv_lvl_i ,
.sum_i ,
.mxr_i ,
.satp_ppn_i ,
.asid_i ,
.flush_tlb_i ,
.itlb_miss_o ,
.dtlb_miss_o ,
.dcache_req_ports_i ,
.dcache_req_ports_o ,
.lsu_exception_o ,
.amo_valid_commit_i ,
.amo_req_o ,
.amo_resp_i
);
// -----
// CSR
// -----
// CSR address buffer
csr_buffer csr_buffer_i (
.commit_i ( csr_commit_i ),
.*
);
endmodule

1
src/fpu Submodule

@ -0,0 +1 @@
Subproject commit 00e2579173f1412f06d4eb95d6b98d0eb1cd2e94

1
src/fpu_div_sqrt_mvp Submodule

@ -0,0 +1 @@
Subproject commit 3736c4c844074bd64c3c505c017181db71b738b4

603
src/fpu_wrap.sv Normal file
View file

@ -0,0 +1,603 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Author: Stefan Mach, ETH Zurich
// Date: 12.04.2018
// Description: Wrapper for the floating-point unit
import ariane_pkg::*;
module fpu_wrap (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input logic [TRANS_ID_BITS-1:0] trans_id_i,
input fu_t fu_i,
input logic fpu_valid_i,
output logic fpu_ready_o,
input fu_op operator_i,
input logic [FLEN-1:0] operand_a_i,
input logic [FLEN-1:0] operand_b_i, // imm will be here unless used as operand
input logic [FLEN-1:0] operand_c_i, // imm will be here unless used as operand
input logic [1:0] fpu_fmt_i,
input logic [2:0] fpu_rm_i,
input logic [2:0] fpu_frm_i,
input logic [6:0] fpu_prec_i,
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
output logic [FLEN-1:0] result_o,
output logic fpu_valid_o,
output exception_t fpu_exception_o
);
//-----------------------------------
// FPnew encoding from FPnew package
//-----------------------------------
localparam OPBITS = 4;
localparam FMTBITS = 3;
localparam IFMTBITS = 2;
integer OP_NUMBITS, FMT_NUMBITS, IFMT_NUMBITS;
logic [OPBITS-1:0] OP_FMADD;
logic [OPBITS-1:0] OP_FNMSUB;
logic [OPBITS-1:0] OP_ADD;
logic [OPBITS-1:0] OP_MUL;
logic [OPBITS-1:0] OP_DIV;
logic [OPBITS-1:0] OP_SQRT;
logic [OPBITS-1:0] OP_SGNJ;
logic [OPBITS-1:0] OP_MINMAX;
logic [OPBITS-1:0] OP_CMP;
logic [OPBITS-1:0] OP_CLASS;
logic [OPBITS-1:0] OP_F2I;
logic [OPBITS-1:0] OP_I2F;
logic [OPBITS-1:0] OP_F2F;
logic [OPBITS-1:0] OP_CPKAB;
logic [OPBITS-1:0] OP_CPKCD;
logic [FMTBITS-1:0] FMT_FP32;
logic [FMTBITS-1:0] FMT_FP64;
logic [FMTBITS-1:0] FMT_FP16;
logic [FMTBITS-1:0] FMT_FP8;
logic [FMTBITS-1:0] FMT_FP16ALT;
logic [FMTBITS-1:0] FMT_CUST1;
logic [FMTBITS-1:0] FMT_CUST2;
logic [FMTBITS-1:0] FMT_CUST3;
logic [IFMTBITS-1:0] IFMT_INT8;
logic [IFMTBITS-1:0] IFMT_INT16;
logic [IFMTBITS-1:0] IFMT_INT32;
logic [IFMTBITS-1:0] IFMT_INT64;
// bind the constants from the fpnew entity
fpnew_pkg_constants i_fpnew_constants (
.OP_NUMBITS ( OP_NUMBITS ),
.OP_FMADD ( OP_FMADD ),
.OP_FNMSUB ( OP_FNMSUB ),
.OP_ADD ( OP_ADD ),
.OP_MUL ( OP_MUL ),
.OP_DIV ( OP_DIV ),
.OP_SQRT ( OP_SQRT ),
.OP_SGNJ ( OP_SGNJ ),
.OP_MINMAX ( OP_MINMAX ),
.OP_CMP ( OP_CMP ),
.OP_CLASS ( OP_CLASS ),
.OP_F2I ( OP_F2I ),
.OP_I2F ( OP_I2F ),
.OP_F2F ( OP_F2F ),
.OP_CPKAB ( OP_CPKAB ),
.OP_CPKCD ( OP_CPKCD ),
.FMT_NUMBITS ( FMT_NUMBITS ),
.FMT_FP32 ( FMT_FP32 ),
.FMT_FP64 ( FMT_FP64 ),
.FMT_FP16 ( FMT_FP16 ),
.FMT_FP8 ( FMT_FP8 ),
.FMT_FP16ALT ( FMT_FP16ALT ),
.FMT_CUST1 ( FMT_CUST1 ),
.FMT_CUST2 ( FMT_CUST2 ),
.FMT_CUST3 ( FMT_CUST3 ),
.IFMT_NUMBITS ( IFMT_NUMBITS ),
.IFMT_INT8 ( IFMT_INT8 ),
.IFMT_INT16 ( IFMT_INT16 ),
.IFMT_INT32 ( IFMT_INT32 ),
.IFMT_INT64 ( IFMT_INT64 )
);
// always_comb begin
// assert (OPBITS >= OP_NUMBITS) else $error("OPBITS is smaller than %0d", OP_NUMBITS);
// assert (FMTBITS >= FMT_NUMBITS) else $error("FMTBITS is smaller than %0d", FMT_NUMBITS);
// assert (IFMTBITS >= IFMT_NUMBITS) else $error("IFMTBITS is smaller than %0d", IFMT_NUMBITS);
// end
//-------------------------------------------------
// Inputs to the FPU and protocol inversion buffer
//-------------------------------------------------
logic [FLEN-1:0] operand_a_d, operand_a_q, operand_a;
logic [FLEN-1:0] operand_b_d, operand_b_q, operand_b;
logic [FLEN-1:0] operand_c_d, operand_c_q, operand_c;
logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op;
logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod;
logic [FMTBITS-1:0] fpu_fmt_d, fpu_fmt_q, fpu_fmt;
logic [FMTBITS-1:0] fpu_fmt2_d, fpu_fmt2_q, fpu_fmt2;
logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt;
logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm;
logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op;
logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag;
logic fpu_in_ready, fpu_in_valid;
logic fpu_out_ready, fpu_out_valid;
logic [4:0] fpu_status;
// FSM to handle protocol inversion
enum logic {READY, STALL} state_q, state_d;
logic hold_inputs;
logic use_hold;
//-----------------------------
// Translate inputs
//-----------------------------
always_comb begin : input_translation
automatic logic vec_replication; // control honoring of replication flag
automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB)
automatic logic check_ah; // Decide for AH from RM field encoding
// Default Values
operand_a_d = operand_a_i;
operand_b_d = operand_b_i; // immediates come through this port unless used as operand
operand_c_d = operand_c_i; // immediates come through this port unless used as operand
fpu_op_d = OP_SGNJ; // sign injection by default
fpu_op_mod_d = 1'b0;
fpu_fmt_d = FMT_FP32;
fpu_fmt2_d = FMT_FP32;
fpu_ifmt_d = IFMT_INT32;
fpu_rm_d = fpu_rm_i;
fpu_vec_op_d = fu_i == FPU_VEC;
fpu_tag_d = trans_id_i;
vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field
replicate_c = 1'b0;
check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i
// Scalar Rounding Modes - some ops encode inside RM but use smaller range
if (!(fpu_rm_i inside {[3'b000:3'b100]}))
fpu_rm_d = fpu_frm_i;
// Vectorial ops always consult FRM
if (fpu_vec_op_d)
fpu_rm_d = fpu_frm_i;
// Formats
unique case (fpu_fmt_i)
// FP32
2'b00 : fpu_fmt_d = FMT_FP32;
// FP64 or FP16ALT (vectorial)
2'b01 : fpu_fmt_d = fpu_vec_op_d ? FMT_FP16ALT : FMT_FP64;
// FP16 or FP16ALT (scalar)
2'b10 : begin
if (!fpu_vec_op_d && fpu_rm_i==3'b101)
fpu_fmt_d = FMT_FP16ALT;
else
fpu_fmt_d = FMT_FP16;
end
// FP8
default : fpu_fmt_d = FMT_FP8;
endcase
// Operations (this can modify the rounding mode field and format!)
unique case (operator_i)
// Addition
FADD : begin
fpu_op_d = OP_ADD;
replicate_c = 1'b1; // second operand is in C
end
// Subtraction is modified ADD
FSUB : begin
fpu_op_d = OP_ADD;
fpu_op_mod_d = 1'b1;
replicate_c = 1'b1; // second operand is in C
end
// Multiplication
FMUL : fpu_op_d = OP_MUL;
// Division
FDIV : fpu_op_d = OP_DIV;
// Min/Max - OP is encoded in rm (000-001)
FMIN_MAX : begin
fpu_op_d = OP_MINMAX;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Square Root
FSQRT : fpu_op_d = OP_SQRT;
// Fused Multiply Add
FMADD : fpu_op_d = OP_FMADD;
// Fused Multiply Subtract is modified FMADD
FMSUB : begin
fpu_op_d = OP_FMADD;
fpu_op_mod_d = 1'b1;
end
// Fused Negated Multiply Subtract
FNMSUB : fpu_op_d = OP_FNMSUB;
// Fused Negated Multiply Add is modified FNMSUB
FNMADD : begin
fpu_op_d = OP_FNMSUB;
fpu_op_mod_d = 1'b1;
end
// Float to Int Cast - Op encoded in lowest two imm bits or rm
FCVT_F2I : begin
fpu_op_d = OP_F2I;
// Vectorial Ops encoded in R bit
if (fpu_vec_op_d) begin
fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op
unique case (fpu_fmt_i)
2'b00 : fpu_ifmt_d = IFMT_INT32;
2'b01,
2'b10 : fpu_ifmt_d = IFMT_INT16;
2'b11 : fpu_ifmt_d = IFMT_INT8;
endcase
// Scalar casts encoded in imm
end else begin
fpu_op_mod_d = operand_c_i[0];
if (operand_c_i[1])
fpu_ifmt_d = IFMT_INT64;
else
fpu_ifmt_d = IFMT_INT32;
end
end
// Int to Float Cast - Op encoded in lowest two imm bits or rm
FCVT_I2F : begin
fpu_op_d = OP_I2F;
// Vectorial Ops encoded in R bit
if (fpu_vec_op_d) begin
fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op
unique case (fpu_fmt_i)
2'b00 : fpu_ifmt_d = IFMT_INT32;
2'b01,
2'b10 : fpu_ifmt_d = IFMT_INT16;
2'b11 : fpu_ifmt_d = IFMT_INT8;
endcase
// Scalar casts encoded in imm
end else begin
fpu_op_mod_d = operand_c_i[0];
if (operand_c_i[1])
fpu_ifmt_d = IFMT_INT64;
else
fpu_ifmt_d = IFMT_INT32;
end
end
// Float to Float Cast - Source format encoded in lowest two/three imm bits
FCVT_F2F : begin
fpu_op_d = OP_F2F;
// Vectorial ops encoded in lowest two imm bits
if (fpu_vec_op_d) begin
vec_replication = 1'b0; // no replication for casts (not needed)
unique case (operand_c_i[1:0])
2'b00: fpu_fmt2_d = FMT_FP32;
2'b01: fpu_fmt2_d = FMT_FP16ALT;
2'b10: fpu_fmt2_d = FMT_FP16;
2'b11: fpu_fmt2_d = FMT_FP8;
endcase
// Scalar ops encoded in lowest three imm bits
end else begin
unique case (operand_c_i[2:0])
3'b000: fpu_fmt2_d = FMT_FP32;
3'b001: fpu_fmt2_d = FMT_FP64;
3'b010: fpu_fmt2_d = FMT_FP16;
3'b110: fpu_fmt2_d = FMT_FP16ALT;
3'b011: fpu_fmt2_d = FMT_FP8;
endcase
end
end
// Scalar Sign Injection - op encoded in rm (000-010)
FSGNJ : begin
fpu_op_d = OP_SGNJ;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding
FMV_F2X : begin
fpu_op_d = OP_SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box
fpu_op_mod_d = 1'b1; // no NaN-Boxing
check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand
end
// Move from GPR to FPR - mapped to NOP since no recoding
FMV_X2F : begin
fpu_op_d = OP_SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box
check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand
end
// Scalar Comparisons - op encoded in rm (000-010)
FCMP : begin
fpu_op_d = OP_CMP;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding
end
// Classification
FCLASS : begin
fpu_op_d = OP_CLASS;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit - CLASS doesn't care anyways
check_ah = 1'b1; // AH has RM MSB encoding
end
// Vectorial Minimum - set up scalar encoding in rm
VFMIN : begin
fpu_op_d = OP_MINMAX;
fpu_rm_d = 3'b000; // min
end
// Vectorial Maximum - set up scalar encoding in rm
VFMAX : begin
fpu_op_d = OP_MINMAX;
fpu_rm_d = 3'b001; // max
end
// Vectorial Sign Injection - set up scalar encoding in rm
VFSGNJ : begin
fpu_op_d = OP_SGNJ;
fpu_rm_d = 3'b000; // sgnj
end
// Vectorial Negated Sign Injection - set up scalar encoding in rm
VFSGNJN : begin
fpu_op_d = OP_SGNJ;
fpu_rm_d = 3'b001; // sgnjn
end
// Vectorial Xored Sign Injection - set up scalar encoding in rm
VFSGNJX : begin
fpu_op_d = OP_SGNJ;
fpu_rm_d = 3'b010; // sgnjx
end
// Vectorial Equals - set up scalar encoding in rm
VFEQ : begin
fpu_op_d = OP_CMP;
fpu_rm_d = 3'b010; // eq
end
// Vectorial Not Equals - set up scalar encoding in rm
VFNE : begin
fpu_op_d = OP_CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b010; // eq
end
// Vectorial Less Than - set up scalar encoding in rm
VFLT : begin
fpu_op_d = OP_CMP;
fpu_rm_d = 3'b001; // lt
end
// Vectorial Greater or Equal - set up scalar encoding in rm
VFGE : begin
fpu_op_d = OP_CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b001; // lt
end
// Vectorial Less or Equal - set up scalar encoding in rm
VFLE : begin
fpu_op_d = OP_CMP;
fpu_rm_d = 3'b000; // le
end
// Vectorial Greater Than - set up scalar encoding in rm
VFGT : begin
fpu_op_d = OP_CMP;
fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b000; // le
end
// Vectorial Convert-and-Pack from FP32, lower 4 entries
VFCPKAB_S : begin
fpu_op_d = OP_CPKAB;
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_fmt2_d = FMT_FP32; // Cast from FP32
end
// Vectorial Convert-and-Pack from FP32, upper 4 entries
VFCPKCD_S : begin
fpu_op_d = OP_CPKCD;
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_fmt2_d = FMT_FP64; // Cast from FP64
end
// Vectorial Convert-and-Pack from FP64, lower 4 entries
VFCPKAB_S : begin
fpu_op_d = OP_CPKAB;
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_fmt2_d = FMT_FP64; // Cast from FP64
end
// Vectorial Convert-and-Pack from FP64, upper 4 entries
VFCPKCD_S : begin
fpu_op_d = OP_CPKCD;
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op
fpu_fmt2_d = FMT_FP64; // Cast from FP64
end
// No changes per default
default : ; //nothing
endcase
// Scalar AH encoding fixing
if (!fpu_vec_op_d && check_ah)
if (fpu_rm_i[2])
fpu_fmt_d = FMT_FP16ALT;
// Replication
if (fpu_vec_op_d && vec_replication) begin
if (replicate_c) begin
unique case (fpu_fmt_d)
FMT_FP32 : operand_c_d = RVD ? {2{operand_c_i[31:0]}} : operand_c_i;
FMT_FP16,
FMT_FP16ALT : operand_c_d = RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}};
FMT_FP8 : operand_c_d = RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}};
endcase // fpu_fmt_d
end else begin
unique case (fpu_fmt_d)
FMT_FP32 : operand_b_d = RVD ? {2{operand_b_i[31:0]}} : operand_b_i;
FMT_FP16,
FMT_FP16ALT : operand_b_d = RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}};
FMT_FP8 : operand_b_d = RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}};
endcase // fpu_fmt_d
end
end
end
//---------------------------------------------------------
// Upstream protocol inversion: InValid depends on InReady
//---------------------------------------------------------
always_comb begin : p_inputFSM
// Default Values
fpu_ready_o = 1'b0;
fpu_in_valid = 1'b0;
hold_inputs = 1'b0; // hold register disabled
use_hold = 1'b0; // inputs go directly to unit
state_d = state_q; // stay in the same state
// FSM
unique case (state_q)
// Default state, ready for instructions
READY : begin
fpu_ready_o = 1'b1; // Act as if FPU ready
fpu_in_valid = fpu_valid_i; // Forward input valid to FPU
// There is a transaction but the FPU can't handle it
if (fpu_valid_i & ~fpu_in_ready) begin
fpu_ready_o = 1'b0; // No token given to Issue
hold_inputs = 1'b1; // save inputs to the holding register
state_d = STALL; // stall future incoming requests
end
end
// We're stalling the upstream (ready=0)
STALL : begin
fpu_in_valid = 1'b1; // we have data for the FPU
use_hold = 1'b1; // the data comes from the hold reg
// Wait until it's consumed
if (fpu_in_ready) begin
fpu_ready_o = 1'b1; // Give a token to issue
state_d = READY; // accept future requests
end
end
// Default: emit default values
default : ;
endcase
// Flushing will override issue and go back to idle
if (flush_i) begin
state_d = READY;
end
end
// Buffer register and FSM state holding
always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg
if(~rst_ni) begin
state_q <= READY;
operand_a_q <= '0;
operand_b_q <= '0;
operand_c_q <= '0;
fpu_op_q <= '0;
fpu_op_mod_q <= '0;
fpu_fmt_q <= '0;
fpu_fmt2_q <= '0;
fpu_ifmt_q <= '0;
fpu_rm_q <= '0;
fpu_vec_op_q <= '0;
fpu_tag_q <= '0;
end else begin
state_q <= state_d;
// Hold register is [TRIGGERED] by FSM
if (hold_inputs) begin
operand_a_q <= operand_a_d;
operand_b_q <= operand_b_d;
operand_c_q <= operand_c_d;
fpu_op_q <= fpu_op_d;
fpu_op_mod_q <= fpu_op_mod_d;
fpu_fmt_q <= fpu_fmt_d;
fpu_fmt2_q <= fpu_fmt2_d;
fpu_ifmt_q <= fpu_ifmt_d;
fpu_rm_q <= fpu_rm_d;
fpu_vec_op_q <= fpu_vec_op_d;
fpu_tag_q <= fpu_tag_d;
end
end
end
// Select FPU input data: from register if valid data in register, else directly from input
assign operand_a = use_hold ? operand_a_q : operand_a_d;
assign operand_b = use_hold ? operand_b_q : operand_b_d;
assign operand_c = use_hold ? operand_c_q : operand_c_d;
assign fpu_op = use_hold ? fpu_op_q : fpu_op_d;
assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d;
assign fpu_fmt = use_hold ? fpu_fmt_q : fpu_fmt_d;
assign fpu_fmt2 = use_hold ? fpu_fmt2_q : fpu_fmt2_d;
assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d;
assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d;
assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d;
assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d;
//---------------
// FPU instance
//---------------
fpnew_top #(
.WIDTH ( FLEN ),
.TAG_WIDTH ( TRANS_ID_BITS ),
.RV64 ( 1'b1 ),
.RVF ( RVF ),
.RVD ( RVD ),
.Xf16 ( XF16 ),
.Xf16alt ( XF16ALT ),
.Xf8 ( XF8 ),
.Xfvec ( XFVEC ),
// TODO MOVE THESE VALUES TO PACKAGE
.LATENCY_COMP_F ( LAT_COMP_FP32 ),
.LATENCY_COMP_D ( LAT_COMP_FP64 ),
.LATENCY_COMP_Xf16 ( LAT_COMP_FP16 ),
.LATENCY_COMP_Xf16alt ( LAT_COMP_FP16ALT ),
.LATENCY_COMP_Xf8 ( LAT_COMP_FP8 ),
.LATENCY_DIVSQRT ( LAT_DIVSQRT ),
.LATENCY_NONCOMP ( LAT_NONCOMP ),
.LATENCY_CONV ( LAT_CONV )
) fpnew_top_i (
.Clk_CI ( clk_i ),
.Reset_RBI ( rst_ni ),
.A_DI ( operand_a ),
.B_DI ( operand_b ),
.C_DI ( operand_c ),
.RoundMode_SI ( fpu_rm ),
.Op_SI ( fpu_op ),
.OpMod_SI ( fpu_op_mod ),
.VectorialOp_SI ( fpu_vec_op ),
.FpFmt_SI ( fpu_fmt ),
.FpFmt2_SI ( fpu_fmt2 ),
.IntFmt_SI ( fpu_ifmt ),
.Tag_DI ( fpu_tag ),
.PrecCtl_SI ( fpu_prec_i ),
.InValid_SI ( fpu_in_valid ),
.InReady_SO ( fpu_in_ready ),
.Flush_SI ( flush_i ),
.Z_DO ( result_o ),
.Status_DO ( fpu_status ),
.Tag_DO ( fpu_trans_id_o ),
.OutValid_SO ( fpu_out_valid ),
.OutReady_SI ( fpu_out_ready )
);
// Pack status flag into exception cause, tval ignored in wb, exception is always invalid
assign fpu_exception_o.cause = {59'h0, fpu_status};
assign fpu_exception_o.valid = 1'b0;
// Donwstream write port is dedicated to FPU and always ready
assign fpu_out_ready = 1'b1;
// Downstream valid from unit
assign fpu_valid_o = fpu_out_valid;
endmodule

View file

@ -42,16 +42,16 @@ module instr_scan (
assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr) ? 1'b1 : 1'b0;
assign rvi_jump_o = (instr_i[6:0] == riscv::OpcodeJal) ? 1'b1 : 1'b0;
// opcode JAL
assign rvc_jump_o = (instr_i[15:13] == riscv::OpcodeCJ) & is_rvc_o & (instr_i[1:0] == 2'b01);
assign rvc_jump_o = (instr_i[15:13] == riscv::OpcodeC1J) & is_rvc_o & (instr_i[1:0] == riscv::OpcodeC1);
// always links to register 0
assign rvc_jr_o = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd)
& ~instr_i[12]
& (instr_i[6:2] == 5'b00000)
& (instr_i[1:0] == 2'b10)
& (instr_i[1:0] == riscv::OpcodeC2)
& is_rvc_o;
assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez))
& (instr_i[1:0] == riscv::OpcodeC1)
& is_rvc_o;
assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeCBeqz) | (instr_i[15:13] == riscv::OpcodeCBnez))
& (instr_i[1:0] == 2'b01)
& is_rvc_o ;
// check that rs1 is x1 or x5
assign rvc_return_o = ~instr_i[11] & ~instr_i[10] & ~instr_i[8] & instr_i[7] & rvc_jr_o ;
// always links to register 1 e.g.: it is a jump

View file

@ -32,6 +32,9 @@ module id_stage (
input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions
// from CSR file
input riscv::priv_lvl_t priv_lvl_i, // current privilege level
input riscv::xs_t fs_i, // floating point extension status
input logic [2:0] frm_i, // floating-point dynamic rounding mode
input logic debug_mode_i, // we are in debug mode
input logic tvm_i,
input logic tw_i,
@ -39,9 +42,9 @@ module id_stage (
);
// register stage
struct packed {
logic valid;
logic valid;
scoreboard_entry_t sbe;
logic is_ctrl_flow;
logic is_ctrl_flow;
} issue_n, issue_q;
@ -90,6 +93,8 @@ module id_stage (
.ex_i ( fetch_entry.ex ),
.instruction_o ( decoded_instruction ),
.is_control_flow_instr_o ( is_control_flow_instr ),
.fs_i,
.frm_i,
.*
);

View file

@ -17,7 +17,7 @@ import ariane_pkg::*;
module issue_read_operands #(
parameter int unsigned NR_COMMIT_PORTS = 2
)(
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
// flush
@ -33,8 +33,12 @@ module issue_read_operands #(
output logic [REG_ADDR_SIZE-1:0] rs2_o,
input logic [63:0] rs2_i,
input logic rs2_valid_i,
output logic [REG_ADDR_SIZE-1:0] rs3_o,
input logic [FLEN-1:0] rs3_i,
input logic rs3_valid_i,
// get clobber input
input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_i,
input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_gpr_i,
input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_fpr_i,
// To FU, just single issue for now
output fu_t fu_o,
output fu_op operator_o,
@ -48,22 +52,26 @@ module issue_read_operands #(
input logic alu_ready_i, // FU is ready
output logic alu_valid_o, // Output is valid
// Branches and Jumps
input logic branch_ready_i,
output logic branch_valid_o, // this is a valid branch instruction
output branchpredict_sbe_t branch_predict_o,
// LSU
input logic lsu_ready_i, // FU is ready
output logic lsu_valid_o, // Output is valid
// MULT
input logic mult_ready_i, // FU is ready
output logic mult_valid_o, // Output is valid
input logic mult_ready_i, // FU is ready
output logic mult_valid_o, // Output is valid
// FPU
input logic fpu_ready_i, // FU is ready
output logic fpu_valid_o, // Output is valid
output logic [1:0] fpu_fmt_o, // FP fmt field from instr.
output logic [2:0] fpu_rm_o, // FP rm field from instr.
// CSR
input logic csr_ready_i, // FU is ready
output logic csr_valid_o, // Output is valid
// commit port
input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i,
input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i,
input logic [NR_COMMIT_PORTS-1:0] we_i
input logic [NR_COMMIT_PORTS-1:0] we_gpr_i,
input logic [NR_COMMIT_PORTS-1:0] we_fpr_i
// committing instruction instruction
// from scoreboard
// input scoreboard_entry commit_instr_i,
@ -72,24 +80,33 @@ module issue_read_operands #(
logic stall; // stall signal, we do not want to fetch any more entries
logic fu_busy; // functional unit is busy
logic [63:0] operand_a_regfile, operand_b_regfile; // operands coming from regfile
logic [FLEN-1:0] operand_c_regfile; // third operand only from fp regfile
// output flipflop (ID <-> EX)
logic [63:0] operand_a_n, operand_a_q,
operand_b_n, operand_b_q,
imm_n, imm_q;
logic alu_valid_n, alu_valid_q;
logic mult_valid_n, mult_valid_q;
logic lsu_valid_n, lsu_valid_q;
logic csr_valid_n, csr_valid_q;
logic branch_valid_n, branch_valid_q;
logic alu_valid_n, alu_valid_q;
logic mult_valid_n, mult_valid_q;
logic fpu_valid_n, fpu_valid_q;
logic [1:0] fpu_fmt_n, fpu_fmt_q;
logic [2:0] fpu_rm_n, fpu_rm_q;
logic lsu_valid_n, lsu_valid_q;
logic csr_valid_n, csr_valid_q;
logic branch_valid_n, branch_valid_q;
logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
fu_op operator_n, operator_q; // operation to perform
fu_t fu_n, fu_q; // functional unit to use
// forwarding signals
logic forward_rs1, forward_rs2;
logic forward_rs1, forward_rs2, forward_rs3;
// original instruction stored in tval
riscv::instruction_t orig_instr;
assign orig_instr = riscv::instruction_t'(issue_instr_i.ex.tval[31:0]);
// ID <-> EX registers
assign operand_a_o = operand_a_q;
assign operand_b_o = operand_b_q;
@ -100,11 +117,174 @@ module issue_read_operands #(
assign lsu_valid_o = lsu_valid_q;
assign csr_valid_o = csr_valid_q;
assign mult_valid_o = mult_valid_q;
assign fpu_valid_o = fpu_valid_q;
assign fpu_fmt_o = fpu_fmt_q;
assign fpu_rm_o = fpu_rm_q;
assign trans_id_o = trans_id_q;
assign imm_o = imm_q;
// ---------------
// Issue Stage
// ---------------
// select the right busy signal
// this obviously depends on the functional unit we need
always_comb begin : unit_busy
unique case (issue_instr_i.fu)
NONE:
fu_busy = 1'b0;
ALU, CTRL_FLOW, CSR:
fu_busy = ~alu_ready_i;
MULT:
fu_busy = ~mult_ready_i;
FPU, FPU_VEC:
fu_busy = ~fpu_ready_i;
LOAD, STORE:
fu_busy = ~lsu_ready_i;
default:
fu_busy = 1'b0;
endcase
end
// ---------------
// Register stage
// ---------------
// check that all operands are available, otherwise stall
// forward corresponding register
always_comb begin : operands_available
stall = 1'b0;
// operand forwarding signals
forward_rs1 = 1'b0;
forward_rs2 = 1'b0;
forward_rs3 = 1'b0; // FPR only
// poll the scoreboard for those values
rs1_o = issue_instr_i.rs1;
rs2_o = issue_instr_i.rs2;
rs3_o = issue_instr_i.result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field
// 0. check that we are not using the zimm type in RS1
// as this is an immediate we do not have to wait on anything here
// 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr)
// 2. poll the scoreboard
if (~issue_instr_i.use_zimm && (is_rs1_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs1] != NONE
: rd_clobber_gpr_i[issue_instr_i.rs1] != NONE)) begin
// check if the clobbering instruction is not a CSR instruction, CSR instructions can only
// be fetched through the register file since they can't be forwarded
// if the operand is available, forward it. CSRs don't write to/from FPR
if (rs1_valid_i && (is_rs1_fpr(issue_instr_i.op) ? 1'b1 : rd_clobber_gpr_i[issue_instr_i.rs1] != CSR))
forward_rs1 = 1'b1;
else // the operand is not available -> stall
stall = 1'b1;
end
if (is_rs2_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs2] != NONE
: rd_clobber_gpr_i[issue_instr_i.rs2] != NONE) begin
// if the operand is available, forward it. CSRs don't write to/from FPR
if (rs2_valid_i && (is_rs2_fpr(issue_instr_i.op) ? 1'b1 : rd_clobber_gpr_i[issue_instr_i.rs2] != CSR))
forward_rs2 = 1'b1;
else // the operand is not available -> stall
stall = 1'b1;
end
if (is_imm_fpr(issue_instr_i.op) && rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE) begin
// if the operand is available, forward it. CSRs don't write to/from FPR so no need to check
if (rs3_valid_i)
forward_rs3 = 1'b1;
else // the operand is not available -> stall
stall = 1'b1;
end
end
// Forwarding/Output MUX
always_comb begin : forwarding_operand_select
// default is regfiles (gpr or fpr)
operand_a_n = operand_a_regfile;
operand_b_n = operand_b_regfile;
// immediates are the third operands in the store case
// for FP operations, the imm field can also be the third operand from the regfile
imm_n = is_imm_fpr(issue_instr_i.op) ? operand_c_regfile : issue_instr_i.result;
trans_id_n = issue_instr_i.trans_id;
fu_n = issue_instr_i.fu;
operator_n = issue_instr_i.op;
// or should we forward
if (forward_rs1) begin
operand_a_n = rs1_i;
end
if (forward_rs2) begin
operand_b_n = rs2_i;
end
if (forward_rs3) begin
imm_n = rs3_i;
end
// use the PC as operand a
if (issue_instr_i.use_pc) begin
operand_a_n = issue_instr_i.pc;
end
// use the zimm as operand a
if (issue_instr_i.use_zimm) begin
// zero extend operand a
operand_a_n = {52'b0, issue_instr_i.rs1[4:0]};
end
// or is it an immediate (including PC), this is not the case for a store and control flow instructions
// also make sure operand B is not already used as an FP operand
if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && !is_rs2_fpr(issue_instr_i.op)) begin
operand_b_n = issue_instr_i.result;
end
end
// FU select, assert the correct valid out signal (in the next cycle)
always_comb begin : unit_valid
alu_valid_n = 1'b0;
lsu_valid_n = 1'b0;
mult_valid_n = 1'b0;
fpu_valid_n = 1'b0;
fpu_fmt_n = 2'b0;
fpu_rm_n = 3'b0;
csr_valid_n = 1'b0;
branch_valid_n = 1'b0;
// Exception pass through:
// If an exception has occurred simply pass it through
// we do not want to issue this instruction
if (~issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin
case (issue_instr_i.fu)
ALU:
alu_valid_n = 1'b1;
CTRL_FLOW:
branch_valid_n = 1'b1;
MULT:
mult_valid_n = 1'b1;
FPU : begin
fpu_valid_n = 1'b1;
fpu_fmt_n = orig_instr.rftype.fmt; // fmt bits from instruction
fpu_rm_n = orig_instr.rftype.rm; // rm bits from instruction
end
FPU_VEC : begin
fpu_valid_n = 1'b1;
fpu_fmt_n = orig_instr.rvftype.vfmt; // vfmt bits from instruction
fpu_rm_n = {2'b0, orig_instr.rvftype.repl}; // repl bit from instruction
end
LOAD, STORE:
lsu_valid_n = 1'b1;
CSR:
csr_valid_n = 1'b1;
default:;
endcase
end
// if we got a flush request, de-assert the valid flag, otherwise we will start this
// functional unit with the wrong inputs
if (flush_i) begin
alu_valid_n = 1'b0;
lsu_valid_n = 1'b0;
mult_valid_n = 1'b0;
fpu_valid_n = 1'b0;
csr_valid_n = 1'b0;
branch_valid_n = 1'b0;
end
end
// We can issue an instruction if we do not detect that any other instruction is writing the same
// destination register.
// We also need to check if there is an unresolved branch in the scoreboard.
@ -120,13 +300,15 @@ module issue_read_operands #(
// WAW - Write After Write Dependency Check
// -----------------------------------------
// no other instruction has the same destination register -> issue the instruction
if (rd_clobber_i[issue_instr_i.rd] == NONE) begin
if (is_rd_fpr(issue_instr_i.op) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE)
: (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin
issue_ack_o = 1'b1;
end
// or check that the target destination register will be written in this cycle by the
// commit stage
for (int unsigned i = 0; i < NR_COMMIT_PORTS; i++)
if (we_i[i] && waddr_i[i] == issue_instr_i.rd) begin
if (is_rd_fpr(issue_instr_i.op) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd)
: (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd)) begin
issue_ack_o = 1'b1;
end
end
@ -145,159 +327,73 @@ module issue_read_operands #(
end
end
// select the right busy signal
// this obviously depends on the functional unit we need
always_comb begin : unit_busy
unique case (issue_instr_i.fu)
NONE:
fu_busy = 1'b0;
ALU:
fu_busy = ~alu_ready_i;
CTRL_FLOW:
fu_busy = ~branch_ready_i;
MULT:
fu_busy = ~mult_ready_i;
LOAD, STORE:
fu_busy = ~lsu_ready_i;
CSR:
fu_busy = ~csr_ready_i;
default:
fu_busy = 1'b0;
endcase
end
// ---------------
// Register stage
// ---------------
// check that all operands are available, otherwise stall
// forward corresponding register
always_comb begin : operands_available
stall = 1'b0;
// operand forwarding signals
forward_rs1 = 1'b0;
forward_rs2 = 1'b0;
// poll the scoreboard for those values
rs1_o = issue_instr_i.rs1;
rs2_o = issue_instr_i.rs2;
// 0. check that we are not using the zimm type in RS1
// as this is an immediate we do not have to wait on anything here
// 1. check if the source registers are clobberd
// 2. poll the scoreboard
if (~issue_instr_i.use_zimm && rd_clobber_i[issue_instr_i.rs1] != NONE) begin
// check if the clobbering instruction is not a CSR instruction, CSR instructions can only
// be fetched through the register file since they can't be forwarded
// the operand is available, forward it
if (rs1_valid_i && rd_clobber_i[issue_instr_i.rs1] != CSR)
forward_rs1 = 1'b1;
else // the operand is not available -> stall
stall = 1'b1;
end
if (rd_clobber_i[issue_instr_i.rs2] != NONE) begin
// the operand is available, forward it
if (rs2_valid_i && rd_clobber_i[issue_instr_i.rs2] != CSR)
forward_rs2 = 1'b1;
else // the operand is not available -> stall
stall = 1'b1;
end
end
// Forwarding/Output MUX
always_comb begin : forwarding_operand_select
// default is regfile
operand_a_n = operand_a_regfile;
operand_b_n = operand_b_regfile;
// immediates are the third operands in the store case
imm_n = issue_instr_i.result;
trans_id_n = issue_instr_i.trans_id;
fu_n = issue_instr_i.fu;
operator_n = issue_instr_i.op;
// or should we forward
if (forward_rs1) begin
operand_a_n = rs1_i;
end
if (forward_rs2) begin
operand_b_n = rs2_i;
end
// use the PC as operand a
if (issue_instr_i.use_pc) begin
operand_a_n = issue_instr_i.pc;
end
// use the zimm as operand a
if (issue_instr_i.use_zimm) begin
// zero extend operand a
operand_a_n = {52'b0, issue_instr_i.rs1[4:0]};
end
// or is it an immediate (including PC), this is not the case for a store and control flow instructions
if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW)) begin
operand_b_n = issue_instr_i.result;
end
end
// FU select, assert the correct valid out signal (in the next cycle)
always_comb begin : unit_valid
alu_valid_n = 1'b0;
lsu_valid_n = 1'b0;
mult_valid_n = 1'b0;
csr_valid_n = 1'b0;
branch_valid_n = 1'b0;
// Exception pass through:
// If an exception has occurred simply pass it through
// we do not want to issue this instruction
if (~issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin
case (issue_instr_i.fu)
ALU:
alu_valid_n = 1'b1;
CTRL_FLOW:
branch_valid_n = 1'b1;
MULT:
mult_valid_n = 1'b1;
LOAD, STORE:
lsu_valid_n = 1'b1;
CSR:
csr_valid_n = 1'b1;
default:;
endcase
end
// if we got a flush request, de-assert the valid flag, otherwise we will start this
// functional unit with the wrong inputs
if (flush_i) begin
alu_valid_n = 1'b0;
lsu_valid_n = 1'b0;
mult_valid_n = 1'b0;
csr_valid_n = 1'b0;
branch_valid_n = 1'b0;
end
end
// ----------------------
// Integer Register File
// ----------------------
logic [1:0][63:0] rdata;
logic [1:0][4:0] raddr_pack;
// pack signals
logic [NR_COMMIT_PORTS-1:0][4:0] waddr_pack;
logic [NR_COMMIT_PORTS-1:0][63:0] wdata_pack;
logic [NR_COMMIT_PORTS-1:0] we_pack;
assign raddr_pack = {issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]};
assign waddr_pack = {waddr_i[1], waddr_i[0]};
assign wdata_pack = {wdata_i[1], wdata_i[0]};
assign we_pack = {we_gpr_i[1], we_gpr_i[0]};
ariane_regfile #(
.DATA_WIDTH ( 64 )
) regfile_i (
// Clock and Reset
.clk ( clk_i ),
.rst_n ( rst_ni ),
.test_en_i ( 1'b0 ),
.raddr_a_i ( issue_instr_i.rs1[4:0] ),
.rdata_a_o ( operand_a_regfile ),
.raddr_b_i ( issue_instr_i.rs2[4:0] ),
.rdata_b_o ( operand_b_regfile ),
.waddr_a_i ( waddr_i[0] ),
.wdata_a_i ( wdata_i[0] ),
.we_a_i ( we_i[0] ),
.waddr_b_i ( waddr_i[1] ),
.wdata_b_i ( wdata_i[1] ),
.we_b_i ( we_i[1] )
.DATA_WIDTH ( 64 ),
.NR_READ_PORTS ( 2 ),
.NR_WRITE_PORTS ( NR_COMMIT_PORTS ),
.ZERO_REG_ZERO ( 1 )
) i_ariane_regfile (
.test_en_i ( 1'b0 ),
.raddr_i ( raddr_pack ),
.rdata_o ( rdata ),
.waddr_i ( waddr_pack ),
.wdata_i ( wdata_pack ),
.we_i ( we_pack ),
.*
);
// -----------------------------
// Floating-Point Register File
// -----------------------------
logic [2:0][FLEN-1:0] fprdata;
// pack signals
logic [2:0][4:0] fp_raddr_pack;
logic [NR_COMMIT_PORTS-1:0][63:0] fp_wdata_pack;
generate
if (FP_PRESENT) begin : float_regfile_gen
assign fp_raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]};
assign fp_wdata_pack = {wdata_i[1][FLEN-1:0], wdata_i[0][FLEN-1:0]};
ariane_regfile #(
.DATA_WIDTH ( FLEN ),
.NR_READ_PORTS ( 3 ),
.NR_WRITE_PORTS ( NR_COMMIT_PORTS ),
.ZERO_REG_ZERO ( 0 )
) i_ariane_fp_regfile (
.test_en_i ( 1'b0 ),
.raddr_i ( fp_raddr_pack ),
.rdata_o ( fprdata ),
.waddr_i ( waddr_pack ),
.wdata_i ( wdata_pack ),
.we_i ( we_fpr_i ),
.*
);
end else begin : no_fpr_gen
assign fprdata = '{default: '0};
end
endgenerate
assign operand_a_regfile = is_rs1_fpr(issue_instr_i.op) ? fprdata[0] : rdata[0];
assign operand_b_regfile = is_rs2_fpr(issue_instr_i.op) ? fprdata[1] : rdata[1];
assign operand_c_regfile = fprdata[2];
// ----------------------
// Registers (ID <-> EX)
// ----------------------
@ -309,6 +405,9 @@ module issue_read_operands #(
alu_valid_q <= 1'b0;
branch_valid_q <= 1'b0;
mult_valid_q <= 1'b0;
fpu_valid_q <= 1'b0;
fpu_fmt_q <= 2'b0;
fpu_rm_q <= 3'b0;
lsu_valid_q <= 1'b0;
csr_valid_q <= 1'b0;
fu_q <= NONE;
@ -324,6 +423,9 @@ module issue_read_operands #(
alu_valid_q <= alu_valid_n;
branch_valid_q <= branch_valid_n;
mult_valid_q <= mult_valid_n;
fpu_valid_q <= fpu_valid_n;
fpu_fmt_q <= fpu_fmt_n;
fpu_rm_q <= fpu_rm_n;
lsu_valid_q <= lsu_valid_n;
csr_valid_q <= csr_valid_n;
fu_q <= fu_n;

View file

@ -16,9 +16,9 @@
import ariane_pkg::*;
module issue_stage #(
parameter int unsigned NR_ENTRIES = 8,
parameter int unsigned NR_WB_PORTS = 4,
parameter int unsigned NR_COMMIT_PORTS = 2
parameter int unsigned NR_ENTRIES = 8,
parameter int unsigned NR_WB_PORTS = 4,
parameter int unsigned NR_COMMIT_PORTS = 2
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
@ -48,14 +48,17 @@ module issue_stage #(
input logic lsu_ready_i,
output logic lsu_valid_o,
// branch prediction
input logic branch_ready_i,
output logic branch_valid_o, // use branch prediction unit
output logic branch_valid_o, // use branch prediction unit
output branchpredict_sbe_t branch_predict_o,
input logic mult_ready_i,
output logic mult_valid_o, // Branch predict Out
input logic csr_ready_i,
input logic fpu_ready_i,
output logic fpu_valid_o,
output logic [1:0] fpu_fmt_o, // FP fmt field from instr.
output logic [2:0] fpu_rm_o, // FP rm field from instr.
output logic csr_valid_o,
// write back port
@ -68,7 +71,8 @@ module issue_stage #(
// commit port
input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i,
input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i,
input logic [NR_COMMIT_PORTS-1:0] we_i,
input logic [NR_COMMIT_PORTS-1:0] we_gpr_i,
input logic [NR_COMMIT_PORTS-1:0] we_fpr_i,
output scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_o,
input logic [NR_COMMIT_PORTS-1:0] commit_ack_i
@ -76,7 +80,8 @@ module issue_stage #(
// ---------------------------------------------------
// Scoreboard (SB) <-> Issue and Read Operands (IRO)
// ---------------------------------------------------
fu_t [2**REG_ADDR_SIZE:0] rd_clobber_sb_iro;
fu_t [2**REG_ADDR_SIZE:0] rd_clobber_gpr_sb_iro;
fu_t [2**REG_ADDR_SIZE:0] rd_clobber_fpr_sb_iro;
logic [REG_ADDR_SIZE-1:0] rs1_iro_sb;
logic [63:0] rs1_sb_iro;
@ -86,6 +91,10 @@ module issue_stage #(
logic [63:0] rs2_sb_iro;
logic rs2_valid_iro_sb;
logic [REG_ADDR_SIZE-1:0] rs3_iro_sb;
logic [FLEN-1:0] rs3_sb_iro;
logic rs3_valid_iro_sb;
scoreboard_entry_t issue_instr_rename_sb;
logic issue_instr_valid_rename_sb;
logic issue_ack_sb_rename;
@ -117,35 +126,31 @@ module issue_stage #(
.NR_ENTRIES (NR_ENTRIES ),
.NR_WB_PORTS(NR_WB_PORTS)
) i_scoreboard (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_unissued_instr_i ( flush_unissued_instr_i ),
.flush_i ( flush_i ),
.unresolved_branch_i ( 1'b0 ),
.unresolved_branch_i ( 1'b0 ),
.rd_clobber_gpr_o ( rd_clobber_gpr_sb_iro ),
.rd_clobber_fpr_o ( rd_clobber_fpr_sb_iro ),
.rs1_i ( rs1_iro_sb ),
.rs1_o ( rs1_sb_iro ),
.rs1_valid_o ( rs1_valid_sb_iro ),
.rs2_i ( rs2_iro_sb ),
.rs2_o ( rs2_sb_iro ),
.rs2_valid_o ( rs2_valid_iro_sb ),
.rs3_i ( rs3_iro_sb ),
.rs3_o ( rs3_sb_iro ),
.rs3_valid_o ( rs3_valid_iro_sb ),
.rd_clobber_o ( rd_clobber_sb_iro ),
.rs1_i ( rs1_iro_sb ),
.rs1_o ( rs1_sb_iro ),
.rs1_valid_o ( rs1_valid_sb_iro ),
.rs2_i ( rs2_iro_sb ),
.rs2_o ( rs2_sb_iro ),
.rs2_valid_o ( rs2_valid_iro_sb ),
.decoded_instr_i ( issue_instr_rename_sb ),
.decoded_instr_valid_i ( issue_instr_valid_rename_sb ),
.decoded_instr_ack_o ( issue_ack_sb_rename ),
.issue_instr_o ( issue_instr_sb_iro ),
.issue_instr_valid_o ( issue_instr_valid_sb_iro ),
.issue_ack_i ( issue_ack_iro_sb ),
.commit_instr_o ( commit_instr_o ),
.commit_ack_i ( commit_ack_i ),
.decoded_instr_i ( issue_instr_rename_sb ),
.decoded_instr_valid_i ( issue_instr_valid_rename_sb ),
.decoded_instr_ack_o ( issue_ack_sb_rename ),
.issue_instr_o ( issue_instr_sb_iro ),
.issue_instr_valid_o ( issue_instr_valid_sb_iro ),
.issue_ack_i ( issue_ack_iro_sb ),
.resolved_branch_i ( resolved_branch_i ),
.trans_id_i ( trans_id_i ),
.wbdata_i ( wbdata_i ),
.ex_i ( ex_ex_i ),
.wb_valid_i ( wb_valid_i )
.resolved_branch_i ( resolved_branch_i ),
.trans_id_i ( trans_id_i ),
.wbdata_i ( wbdata_i ),
.ex_i ( ex_ex_i ),
.*
);
// ---------------------------------------------------------
@ -162,7 +167,11 @@ module issue_stage #(
.rs2_o ( rs2_iro_sb ),
.rs2_i ( rs2_sb_iro ),
.rs2_valid_i ( rs2_valid_iro_sb ),
.rd_clobber_i ( rd_clobber_sb_iro ),
.rs3_o ( rs3_iro_sb ),
.rs3_i ( rs3_sb_iro ),
.rs3_valid_i ( rs3_valid_iro_sb ),
.rd_clobber_gpr_i ( rd_clobber_gpr_sb_iro ),
.rd_clobber_fpr_i ( rd_clobber_fpr_sb_iro ),
.*
);

View file

@ -299,17 +299,10 @@ module load_unit (
// prepare these signals for faster selection in the next cycle
assign signed_d = load_data_d.operator inside {LW, LH, LB};
assign fp_sign_d = 1'b0;
assign idx_d = (load_data_d.operator inside {LW}) ? load_data_d.address_offset + 3 :
(load_data_d.operator inside {LH}) ? load_data_d.address_offset + 1 :
load_data_d.address_offset;
// use this with FP support:
// assign signed_d = load_data_d.operator inside {LW, LH, LB};
// assign fp_sign_d = load_data_d.operator inside {FLW, FLH, FLB};
// assign idx_d = (load_data_d.operator inside {LW, FLW}) ? load_data_d.address_offset + 3 :
// (load_data_d.operator inside {LH, FLH}) ? load_data_d.address_offset + 1 :
// load_data_d.address_offset;
assign fp_sign_d = load_data_d.operator inside {FLW, FLH, FLB};
assign idx_d = (load_data_d.operator inside {LW, FLW}) ? load_data_d.address_offset + 3 :
(load_data_d.operator inside {LH, FLH}) ? load_data_d.address_offset + 1 :
load_data_d.address_offset;
assign sign_bits = { req_port_i.data_rdata[63],
@ -328,25 +321,13 @@ module load_unit (
// result mux
always_comb begin
unique case (load_data_q.operator)
LW, LWU: begin
result_o = {{32{sign_bit}}, shifted_data[31:0]};
end
LH, LHU: result_o = {{48{sign_bit}}, shifted_data[15:0]};
LB, LBU: result_o = {{56{sign_bit}}, shifted_data[7:0]};
LW, LWU, FLW: result_o = {{32{sign_bit}}, shifted_data[31:0]};
LH, LHU, FLH: result_o = {{48{sign_bit}}, shifted_data[15:0]};
LB, LBU, FLB: result_o = {{56{sign_bit}}, shifted_data[7:0]};
default: result_o = shifted_data;
endcase
end
// use this with FP support:
// always_comb begin
// unique case (load_data_q.operator)
// LW, LWU, FLW: result_o = {{32{sign_bit}}, shifted_data[31:0]};
// LH, LHU, FLH: result_o = {{48{sign_bit}}, shifted_data[15:0]};
// LB, LBU, FLB: result_o = {{56{sign_bit}}, shifted_data[7:0]};
// default: result_o = shifted_data;
// endcase
// end
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (~rst_ni) begin
idx_q <= 0;

View file

@ -280,7 +280,7 @@ module lsu #(
if (lsu_ctrl.valid) begin
case (lsu_ctrl.operator)
// double word
LD, SD,
LD, SD, FLD, FSD,
AMO_LRD, AMO_SCD,
AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD,
AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND,
@ -290,7 +290,7 @@ module lsu #(
end
end
// word
LW, LWU, SW,
LW, LWU, SW, FLW, FSW,
AMO_LRW, AMO_SCW,
AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW,
AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW,
@ -300,7 +300,7 @@ module lsu #(
end
end
// half word
LH, LHU, SH: begin
LH, LHU, SH, FLH, FSH: begin
if (lsu_ctrl.vaddr[0] != 1'b0) begin
data_misaligned = 1'b1;
end
@ -366,6 +366,7 @@ module lsu #(
.ready_o ( lsu_ready_o ),
.*
);
endmodule
// ------------------

View file

@ -40,9 +40,10 @@ module lsu_arbiter (
// RR fashion. FIFOs need to be 2 deep in order to unconditionally accept loads and stores since we can
// have a maximum of 2 outstanding loads.
// if there are valid elements in the fifos, the unit posts the result on its output ports and expects it
// to be consumed unconditionally
// to be consumed unconditionally
localparam int DEPTH = 2;
// Important: this needs to be greater than 2 to unconditionally acept incoming requests
localparam int DEPTH = 4;
typedef struct packed {
logic [TRANS_ID_BITS-1:0] trans_id;
@ -64,9 +65,9 @@ module lsu_arbiter (
assign ld_in.result = ld_result_i;
assign ld_in.ex = ld_ex_i;
assign trans_id_o = (idx) ? st_out.trans_id : ld_out.trans_id;
assign result_o = (idx) ? st_out.result : ld_out.result;
assign ex_o = (idx) ? st_out.ex : ld_out.ex;
assign trans_id_o = (idx) ? st_out.trans_id : ld_out.trans_id;
assign result_o = (idx) ? st_out.result : ld_out.result;
assign ex_o = (idx) ? st_out.ex : ld_out.ex;
// round robin with "lookahead" for 2 requesters
rrarbiter #(
@ -85,7 +86,7 @@ module lsu_arbiter (
fifo_v2 #(
.dtype ( fifo_t ),
.DEPTH ( DEPTH )
) i_ld_fifo (
) i_ld_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
@ -98,12 +99,12 @@ module lsu_arbiter (
.push_i ( ld_valid_i ),
.data_o ( ld_out ),
.pop_i ( ld_ren )
);
);
fifo_v2 #(
.dtype ( fifo_t ),
.DEPTH ( DEPTH )
) i_st_fifo (
) i_st_fifo (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
@ -116,7 +117,7 @@ module lsu_arbiter (
.push_i ( st_valid_i ),
.data_o ( st_out ),
.pop_i ( st_ren )
);
);
`ifndef SYNTHESIS

View file

@ -41,34 +41,48 @@ module re_name (
// keep track of re-naming data structures
logic [31:0] re_name_table_gpr_n, re_name_table_gpr_q;
logic [31:0] re_name_table_fpr_n, re_name_table_fpr_q;
// -------------------
// Re-naming
// -------------------
always_comb begin
// MSB of the renamed source register addresses
logic name_bit_rs1, name_bit_rs2, name_bit_rd;
logic name_bit_rs1, name_bit_rs2, name_bit_rs3, name_bit_rd;
// default assignments
re_name_table_gpr_n = re_name_table_gpr_q;
re_name_table_fpr_n = re_name_table_fpr_q;
issue_instr_o = issue_instr_i;
if (issue_ack_i && !flush_unissied_instr_i) begin
// if we acknowledge the instruction tic the corresponding destination register
re_name_table_gpr_n[issue_instr_i.rd] = re_name_table_gpr_q[issue_instr_i.rd] ^ 1'b1;
if (is_rd_fpr(issue_instr_i.op))
re_name_table_fpr_n[issue_instr_i.rd] = re_name_table_fpr_q[issue_instr_i.rd] ^ 1'b1;
else
re_name_table_gpr_n[issue_instr_i.rd] = re_name_table_gpr_q[issue_instr_i.rd] ^ 1'b1;
end
// select name bit according to the register file used for source operands
name_bit_rs1 = re_name_table_gpr_q[issue_instr_i.rs1];
name_bit_rs2 = re_name_table_gpr_q[issue_instr_i.rs2];
name_bit_rs1 = is_rs1_fpr(issue_instr_i.op) ? re_name_table_fpr_q[issue_instr_i.rs1]
: re_name_table_gpr_q[issue_instr_i.rs1];
name_bit_rs2 = is_rs2_fpr(issue_instr_i.op) ? re_name_table_fpr_q[issue_instr_i.rs2]
: re_name_table_gpr_q[issue_instr_i.rs2];
// rs3 is only used in certain FP operations and held like an immediate
name_bit_rs3 = re_name_table_fpr_q[issue_instr_i.result[4:0]]; // make sure only the addr bits are read
// select name bit according to the state it will have after renaming
name_bit_rd = re_name_table_gpr_q[issue_instr_i.rd] ^ (issue_instr_i.rd != '0); // don't rename x0
name_bit_rd = is_rd_fpr(issue_instr_i.op) ? re_name_table_fpr_q[issue_instr_i.rd] ^ 1'b1
: re_name_table_gpr_q[issue_instr_i.rd] ^ (issue_instr_i.rd != '0); // don't rename x0
// re-name the source registers
issue_instr_o.rs1 = { ENABLE_RENAME & name_bit_rs1, issue_instr_i.rs1[4:0] };
issue_instr_o.rs2 = { ENABLE_RENAME & name_bit_rs2, issue_instr_i.rs2[4:0] };
// re-name the third operand in imm if it's actually an operand
if (is_imm_fpr(issue_instr_i.op))
issue_instr_o.result = { ENABLE_RENAME & name_bit_rs3, issue_instr_i.result[4:0]};
// re-name the destination register
issue_instr_o.rd = { ENABLE_RENAME & name_bit_rd, issue_instr_i.rd[4:0] };
@ -78,6 +92,7 @@ module re_name (
// Handle flushes
if (flush_i) begin
re_name_table_gpr_n = '0;
re_name_table_fpr_n = '0;
end
end
@ -88,8 +103,10 @@ module re_name (
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
re_name_table_gpr_q <= '0;
re_name_table_fpr_q <= '0;
end else begin
re_name_table_gpr_q <= re_name_table_gpr_n;
re_name_table_fpr_q <= re_name_table_fpr_n;
end
end
endmodule

View file

@ -25,7 +25,8 @@ module scoreboard #(
input logic flush_i, // flush whole scoreboard
input logic unresolved_branch_i, // we have an unresolved branch
// list of clobbered registers to issue stage
output fu_t [2**REG_ADDR_SIZE:0] rd_clobber_o,
output fu_t [2**REG_ADDR_SIZE:0] rd_clobber_gpr_o,
output fu_t [2**REG_ADDR_SIZE:0] rd_clobber_fpr_o,
// regfile like interface to operand read stage
input logic [REG_ADDR_SIZE-1:0] rs1_i,
@ -36,12 +37,16 @@ module scoreboard #(
output logic [63:0] rs2_o,
output logic rs2_valid_o,
input logic [REG_ADDR_SIZE-1:0] rs3_i,
output logic [FLEN-1:0] rs3_o,
output logic rs3_valid_o,
// advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer
output scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_o,
input logic [NR_COMMIT_PORTS-1:0] commit_ack_i,
// instruction to put on top of scoreboard e.g. : top pointer
// we can always put this instruction to the to p unless we signal with asserted full_o
// instruction to put on top of scoreboard e.g.: top pointer
// we can always put this instruction to the top unless we signal with asserted full_o
input scoreboard_entry_t decoded_instr_i,
input logic decoded_instr_valid_i,
output logic decoded_instr_ack_o,
@ -66,9 +71,9 @@ module scoreboard #(
scoreboard_entry_t sbe; // this is the score board entry we will send to ex
} mem_q [NR_ENTRIES-1:0], mem_n [NR_ENTRIES-1:0];
logic [$clog2(NR_ENTRIES)-1:0] issue_cnt_n, issue_cnt_q;
logic [$clog2(NR_ENTRIES)-1:0] issue_pointer_n, issue_pointer_q;
logic [$clog2(NR_ENTRIES)-1:0] commit_pointer_n, commit_pointer_q;
logic [BITS_ENTRIES-1:0] issue_cnt_n, issue_cnt_q;
logic [BITS_ENTRIES-1:0] issue_pointer_n, issue_pointer_q;
logic [BITS_ENTRIES-1:0] commit_pointer_n, commit_pointer_q;
logic issue_full;
// the issue queue is full don't issue any new instructions
@ -76,7 +81,7 @@ module scoreboard #(
// output commit instruction directly
always_comb begin : commit_ports
for (logic [$clog2(NR_ENTRIES)-1:0] i = 0; i < NR_COMMIT_PORTS; i++)
for (logic [BITS_ENTRIES-1:0] i = 0; i < NR_COMMIT_PORTS; i++)
commit_instr_o[i] = mem_q[commit_pointer_q + i].sbe;
end
@ -94,8 +99,8 @@ module scoreboard #(
// maintain a FIFO with issued instructions
// keep track of all issued instructions
always_comb begin : issue_fifo
automatic logic [$clog2(NR_ENTRIES)-1:0] issue_cnt;
automatic logic [$clog2(NR_ENTRIES)-1:0] commit_pointer;
automatic logic [BITS_ENTRIES-1:0] issue_cnt;
automatic logic [BITS_ENTRIES-1:0] commit_pointer;
commit_pointer = commit_pointer_q;
issue_cnt = issue_cnt_q;
@ -124,13 +129,13 @@ module scoreboard #(
mem_n[trans_id_i[i]].sbe.valid = 1'b1;
mem_n[trans_id_i[i]].sbe.result = wbdata_i[i];
// save the target address of a branch (needed for debug in commit stage)
if (resolved_branch_i.valid) begin
mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address;
end
mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address;
// write the exception back if it is valid
if (ex_i[i].valid) begin
if (ex_i[i].valid)
mem_n[trans_id_i[i]].sbe.ex = ex_i[i];
end
// write the fflags back from the FPU (exception valid is never set), leave tval intact
else if (mem_n[trans_id_i[i]].sbe.fu inside {FPU, FPU_VEC})
mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause;
end
end
@ -138,7 +143,7 @@ module scoreboard #(
// Commit Port
// ------------
// we've got an acknowledge from commit
for (logic [$clog2(NR_ENTRIES)-1:0] i = 0; i < NR_COMMIT_PORTS; i++) begin
for (logic [BITS_ENTRIES-1:0] i = 0; i < NR_COMMIT_PORTS; i++) begin
if (commit_ack_i[i]) begin
// decrease the issue counter
issue_cnt--;
@ -149,6 +154,7 @@ module scoreboard #(
commit_pointer++;
end
end
// ------
// Flush
// ------
@ -164,6 +170,7 @@ module scoreboard #(
commit_pointer = '0;
end
end
// update issue counter
issue_cnt_n = issue_cnt;
// update commit potiner
@ -175,16 +182,20 @@ module scoreboard #(
// -------------------
// rd_clobber output: output currently clobbered destination registers
always_comb begin : clobber_output
rd_clobber_o = '{default: NONE};
rd_clobber_gpr_o = '{default: NONE};
rd_clobber_fpr_o = '{default: NONE};
// check for all valid entries and set the clobber register accordingly
for (int unsigned i = 0; i < NR_ENTRIES; i++) begin
if (mem_q[i].issued) begin
// output the functional unit which is going to clobber this register
rd_clobber_o[mem_q[i].sbe.rd] = mem_q[i].sbe.fu;
if (is_rd_fpr(mem_q[i].sbe.op))
rd_clobber_fpr_o[mem_q[i].sbe.rd] = mem_q[i].sbe.fu;
else
rd_clobber_gpr_o[mem_q[i].sbe.rd] = mem_q[i].sbe.fu;
end
end
// the zero register is always free
rd_clobber_o[0] = NONE;
// the gpr zero register is always free
rd_clobber_gpr_o[0] = NONE;
end
// ----------------------------------
@ -194,20 +205,26 @@ module scoreboard #(
always_comb begin : read_operands
rs1_o = 64'b0;
rs2_o = 64'b0;
rs3_o = '0;
rs1_valid_o = 1'b0;
rs2_valid_o = 1'b0;
rs3_valid_o = 1'b0;
for (int unsigned i = 0; i < NR_ENTRIES; i++) begin
// only consider this entry if it is valid
if (mem_q[i].issued) begin
// look at the appropriate fields and look whether there was an
// instruction that wrote the rd field before, first for RS1 and then for RS2
if (mem_q[i].sbe.rd == rs1_i) begin
// instruction that wrote the rd field before, first for RS1 and then for RS2, then for RS3
// we check the type of the stored result register file against issued register file
if ((mem_q[i].sbe.rd == rs1_i) && (is_rd_fpr(mem_q[i].sbe.op) == is_rs1_fpr(issue_instr_o.op))) begin
rs1_o = mem_q[i].sbe.result;
rs1_valid_o = mem_q[i].sbe.valid;
end else if (mem_q[i].sbe.rd == rs2_i) begin
end else if ((mem_q[i].sbe.rd == rs2_i) && (is_rd_fpr(mem_q[i].sbe.op) == is_rs2_fpr(issue_instr_o.op))) begin
rs2_o = mem_q[i].sbe.result;
rs2_valid_o = mem_q[i].sbe.valid;
end else if ((mem_q[i].sbe.rd == rs3_i) && (is_rd_fpr(mem_q[i].sbe.op) == is_imm_fpr(issue_instr_o.op))) begin
rs3_o = mem_q[i].sbe.result;
rs3_valid_o = mem_q[i].sbe.valid;
end
end
end
@ -218,22 +235,30 @@ module scoreboard #(
// provide a direct combinational path from WB a.k.a forwarding
// make sure that we are not forwarding a result that got an exception
for (int unsigned j = 0; j < NR_WB_PORTS; j++) begin
if (mem_q[trans_id_i[j]].sbe.rd == rs1_i && wb_valid_i[j] && ~ex_i[j].valid) begin
if (mem_q[trans_id_i[j]].sbe.rd == rs1_i && wb_valid_i[j] && ~ex_i[j].valid
&& (is_rd_fpr(mem_q[trans_id_i[j]].sbe.op) == is_rs1_fpr(issue_instr_o.op))) begin
rs1_o = wbdata_i[j];
rs1_valid_o = wb_valid_i[j];
break;
end
if (mem_q[trans_id_i[j]].sbe.rd == rs2_i && wb_valid_i[j] && ~ex_i[j].valid) begin
if (mem_q[trans_id_i[j]].sbe.rd == rs2_i && wb_valid_i[j] && ~ex_i[j].valid
&& (is_rd_fpr(mem_q[trans_id_i[j]].sbe.op) == is_rs2_fpr(issue_instr_o.op))) begin
rs2_o = wbdata_i[j];
rs2_valid_o = wb_valid_i[j];
break;
end
if (mem_q[trans_id_i[j]].sbe.rd == rs3_i && wb_valid_i[j] && ~ex_i[j].valid
&& (is_rd_fpr(mem_q[trans_id_i[j]].sbe.op) == is_imm_fpr(issue_instr_o.op))) begin
rs3_o = wbdata_i[j];
rs3_valid_o = wb_valid_i[j];
break;
end
end
// make sure we didn't read the zero register
if (rs1_i == '0)
if (rs1_i == '0 && ~is_rs1_fpr(issue_instr_o.op)) // only GPR reg0 is 0
rs1_valid_o = 1'b0;
if (rs2_i == '0)
if (rs2_i == '0 && ~is_rs2_fpr(issue_instr_o.op)) // only GPR reg0 is 0
rs2_valid_o = 1'b0;
end
@ -254,12 +279,12 @@ module scoreboard #(
`ifndef SYNTHESIS
`ifndef verilator
initial begin
assert (NR_ENTRIES == 2**$clog2(NR_ENTRIES)) else $fatal("Scoreboard size needs to be a power of two.");
assert (NR_ENTRIES == 2**BITS_ENTRIES) else $fatal("Scoreboard size needs to be a power of two.");
end
// assert that zero is never set
assert property (
@(posedge clk_i) rst_ni |-> (rd_clobber_o[0] == NONE))
@(posedge clk_i) rst_ni |-> (rd_clobber_gpr_o[0] == NONE))
else $error ("RD 0 should not bet set");
// assert that we never acknowledge a commit if the instruction is not valid
assert property (

@ -0,0 +1 @@
Subproject commit ffe7818dc24eba29cf3634d404d1b3b85034272b

View file

@ -1,40 +0,0 @@
// Copyright 2018 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
//
// Behavioural GLock Gating
// File: cluster_clock_gating.sv
// Author: ?
// Date: ?
module cluster_clock_gating (
input logic clk_i,
input logic en_i,
input logic test_en_i,
output logic clk_o
);
`ifdef PULP_FPGA_EMUL
// no clock gates in FPGA flow
assign clk_o = clk_i;
`elsif verilator
assign clk_o = clk_i;
`else
logic clk_en;
always_latch
begin
if (clk_i == 1'b0)
clk_en <= en_i | test_en_i;
end
assign clk_o = clk_i & clk_en;
`endif
endmodule

View file

@ -0,0 +1,85 @@
// Copyright (c) 2018 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
/// A leading-one finder / leading zero counter.
/// Set FLIP to 0 for find_first_one => first_one_o is the index of the first one (from the LSB)
/// Set FLIP to 1 for leading zero counter => first_one_o is the number of leading zeroes (from the MSB)
module find_first_one #(
/// The width of the input vector.
parameter int WIDTH = -1,
parameter int FLIP = 0
)(
input logic [WIDTH-1:0] in_i,
output logic [$clog2(WIDTH)-1:0] first_one_o,
output logic no_ones_o
);
localparam int NUM_LEVELS = $clog2(WIDTH);
// pragma translate_off
initial begin
assert(WIDTH >= 0);
end
// pragma translate_on
logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut;
logic [2**NUM_LEVELS-1:0] sel_nodes;
logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes;
logic [WIDTH-1:0] in_tmp;
for (genvar i = 0; i < WIDTH; i++) begin
assign in_tmp[i] = FLIP ? in_i[WIDTH-1-i] : in_i[i];
end
for (genvar j = 0; j < WIDTH; j++) begin
assign index_lut[j] = j;
end
for (genvar level = 0; level < NUM_LEVELS; level++) begin
if (level < NUM_LEVELS-1) begin
for (genvar l = 0; l < 2**level; l++) begin
assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1];
assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ?
index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1];
end
end
if (level == NUM_LEVELS-1) begin
for (genvar k = 0; k < 2**level; k++) begin
// if two successive indices are still in the vector...
if (k * 2 < WIDTH-1) begin
assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1];
assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1];
end
// if only the first index is still in the vector...
if (k * 2 == WIDTH-1) begin
assign sel_nodes[2**level-1+k] = in_tmp[k*2];
assign index_nodes[2**level-1+k] = index_lut[k*2];
end
// if index is out of range
if (k * 2 > WIDTH-1) begin
assign sel_nodes[2**level-1+k] = 1'b0;
assign index_nodes[2**level-1+k] = '0;
end
end
end
end
assign first_one_o = NUM_LEVELS > 0 ? index_nodes[0] : '0;
assign no_ones_o = NUM_LEVELS > 0 ? ~sel_nodes[0] : '1;
endmodule

View file

@ -19,9 +19,12 @@ class instruction_trace_item;
scoreboard_entry_t sbe;
logic [31:0] pc;
logic [31:0] instr;
logic [63:0] reg_file [32];
logic [63:0] gp_reg_file [32];
logic [63:0] fp_reg_file [32];
logic [4:0] read_regs [$];
logic read_fpr [$];
logic [4:0] result_regs [$];
logic result_fpr [$];
logic [63:0] imm;
logic [63:0] result;
logic [63:0] paddr;
@ -31,14 +34,15 @@ class instruction_trace_item;
logic [4:0] rs1, rs2, rs3, rd;
// constructor creating a new instruction trace item, e.g.: a single instruction with all relevant information
function new (time simtime, longint unsigned cycle, scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] reg_file [32],
logic [63:0] result, logic [63:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, branchpredict_t bp);
function new (time simtime, longint unsigned cycle, scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] gp_reg_file [32],
logic [63:0] fp_reg_file [32], logic [63:0] result, logic [63:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, branchpredict_t bp);
this.simtime = simtime;
this.cycle = cycle;
this.pc = sbe.pc;
this.sbe = sbe;
this.instr = instr;
this.reg_file = reg_file;
this.gp_reg_file = gp_reg_file;
this.fp_reg_file = fp_reg_file;
this.result = result;
this.paddr = paddr;
this.bp = bp;
@ -48,7 +52,8 @@ class instruction_trace_item;
this.rs3 = instr[31:27];
this.rd = sbe.rd[4:0];
endfunction
// convert register address to ABI compatible form
// convert gp register address to ABI compatible form
function string regAddrToStr(logic [5:0] addr);
case (addr[4:0])
0: return "x0";
@ -63,9 +68,64 @@ class instruction_trace_item;
default: return $sformatf("s%0d", (addr[4:0] - 16));
endcase
endfunction
// convert fp register address to ABI compatible form
function string fpRegAddrToStr(logic [5:0] addr);
case (addr) inside
[0:7] : return $sformatf("ft%0d", addr);
[8:9] : return $sformatf("fs%0d", (addr - 8));
[10:17] : return $sformatf("fa%0d", (addr - 10));
[18:27] : return $sformatf("fs%0d", (addr - 16));
[28:31] : return $sformatf("ft%0d", (addr - 20));
endcase
endfunction
function string fpFmtToStr(logic [1:0] fmt);
case (fmt)
2'b00 : return "s";
2'b01 : return "d";
2'b10 : return "h";
2'b11 : return "b";
default : return "XX";
endcase
endfunction
function string fmvFpFmtToStr(logic [1:0] fmt);
case (fmt)
2'b00 : return "w";
2'b01 : return "d";
2'b10 : return "h";
2'b11 : return "b";
default : return "XX";
endcase
endfunction
function string intFmtToStr(logic [1:0] ifmt);
case (ifmt)
2'b00 : return "w";
2'b01 : return "wu";
2'b10 : return "l";
2'b11 : return "lu";
default : return "XX";
endcase
endfunction
function string fpRmToStr(logic [2:0] rm);
case (rm)
3'b000 : return "rne";
3'b001 : return "rtz";
3'b010 : return "rdn";
3'b011 : return "rup";
3'b100 : return "rmm";
3'b111 : return "dyn"; // what is this called in rv binutils?
default: return "INVALID";
endcase
endfunction
function string csrAddrToStr(logic [11:0] addr);
case (addr)
riscv::CSR_FFLAGS: return "fflags";
riscv::CSR_FRM: return "frm";
riscv::CSR_FCSR: return "fcsr";
riscv::CSR_SSTATUS: return "sstatus";
riscv::CSR_SIE: return "sie";
riscv::CSR_STVEC: return "stvec";
@ -120,7 +180,7 @@ class instruction_trace_item;
function string printInstr();
string s;
casex (instr)
case (instr) inside
// Aliases
32'h00_00_00_13: s = this.printMnemonic("nop");
// Regular opcodes
@ -174,6 +234,33 @@ class instruction_trace_item;
INSTR_SRLW: s = this.printRInstr("srlw");
INSTR_SRAW: s = this.printRInstr("sraw");
INSTR_MULW: s = this.printMulInstr(1'b1);
// FP
INSTR_FMADD: s = this.printR4Instr("fmadd");
INSTR_FMSUB: s = this.printR4Instr("fmsub");
INSTR_FNSMSUB: s = this.printR4Instr("fnmsub");
INSTR_FNMADD: s = this.printR4Instr("fnmadd");
INSTR_FADD: s = this.printRFBCInstr("fadd", 1'b1);
INSTR_FSUB: s = this.printRFBCInstr("fsub", 1'b1);
INSTR_FMUL: s = this.printRFInstr("fmul", 1'b1);
INSTR_FDIV: s = this.printRFInstr("fdiv", 1'b1);
INSTR_FSQRT: s = this.printRFInstr1Op("fsqrt", 1'b1);
INSTR_FSGNJ: s = this.printRFInstr("fsgnj", 1'b0);
INSTR_FSGNJN: s = this.printRFInstr("fsgnjn", 1'b0);
INSTR_FSGNJX: s = this.printRFInstr("fsgnjx", 1'b0);
INSTR_FMIN: s = this.printRFInstr("fmin", 1'b0);
INSTR_FMAX: s = this.printRFInstr("fmax", 1'b0);
INSTR_FLE: s = this.printRFInstr("fle", 1'b0);
INSTR_FLT: s = this.printRFInstr("flt", 1'b0);
INSTR_FEQ: s = this.printRFInstr("feq", 1'b0);
INSTR_FCLASS: s = this.printRFInstr1Op("fclass", 1'b0);
INSTR_FCVT_F2F,
INSTR_FMV_F2X,
INSTR_FMV_X2F,
INSTR_FCVT_F2I,
INSTR_FCVT_I2F: s = this.printFpSpecialInstr(); // these are a mess to do nicely
// FENCE
INSTR_FENCE: s = this.printMnemonic("fence");
INSTR_FENCEI: s = this.printMnemonic("fence.i");
@ -201,14 +288,16 @@ class instruction_trace_item;
INSTR_WFI: s = this.printMnemonic("wfi");
INSTR_SFENCE: s = this.printMnemonic("sfence.vma");
// loads and stores
INSTR_LOAD: s = this.printLoadInstr();
INSTR_STORE: s = this.printStoreInstr();
INSTR_LOAD,
INSTR_LOAD_FP: s = this.printLoadInstr();
INSTR_STORE,
INSTR_STORE_FP: s = this.printStoreInstr();
INSTR_AMO: s = this.printAMOInstr();
default: s = this.printMnemonic("INVALID");
endcase
s = $sformatf("%10t %10d %s %h %h %h %-36s", simtime,
s = $sformatf("%8dns %8d %s %h %h %h %-36s", simtime,
cycle,
priv_lvl,
sbe.pc,
@ -223,23 +312,29 @@ class instruction_trace_item;
// s);
foreach (result_regs[i]) begin
if (result_regs[i] != 0)
if (result_fpr[i])
s = $sformatf("%s %-4s:%16x", s, fpRegAddrToStr(result_regs[i]), this.result);
else if (result_regs[i] != 0)
s = $sformatf("%s %-4s:%16x", s, regAddrToStr(result_regs[i]), this.result);
end
foreach (read_regs[i]) begin
if (read_regs[i] != 0)
s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), reg_file[read_regs[i]]);
if (read_fpr[i])
s = $sformatf("%s %-4s:%16x", s, fpRegAddrToStr(read_regs[i]), fp_reg_file[read_regs[i]]);
else if (read_regs[i] != 0)
s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), gp_reg_file[read_regs[i]]);
end
casex (instr)
case (instr) inside
// check of the instrction was a load or store
INSTR_STORE: begin
logic [63:0] vaddress = reg_file[read_regs[1]] + this.imm;
INSTR_STORE,
INSTR_STORE_FP: begin
logic [63:0] vaddress = gp_reg_file[read_regs[1]] + this.imm;
s = $sformatf("%s VA: %x PA: %x", s, vaddress, this.paddr);
end
INSTR_LOAD: begin
logic [63:0] vaddress = reg_file[read_regs[0]] + this.imm;
INSTR_LOAD,
INSTR_LOAD_FP: begin
logic [63:0] vaddress = gp_reg_file[read_regs[0]] + this.imm;
s = $sformatf("%s VA: %x PA: %x", s, vaddress, this.paddr);
end
endcase
@ -261,48 +356,131 @@ class instruction_trace_item;
function string printRInstr(input string mnemonic);
result_regs.push_back(sbe.rd);
read_regs.push_back(sbe.rs1);
read_regs.push_back(sbe.rs2);
result_regs.push_back(rd);
result_fpr.push_back(1'b0);
read_regs.push_back(rs1);
read_fpr.push_back(1'b0);
read_regs.push_back(rs2);
read_fpr.push_back(1'b0);
return $sformatf("%-16s %s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2));
return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), regAddrToStr(rs2));
endfunction // printRInstr
function string printRFBCInstr(input string mnemonic, input bit use_rnd);
result_regs.push_back(rd);
result_fpr.push_back(is_rd_fpr(sbe.op));
read_regs.push_back(rs2);
read_fpr.push_back(is_rs2_fpr(sbe.op));
read_regs.push_back(sbe.result[4:0]);
read_fpr.push_back(is_imm_fpr(sbe.op));
if (use_rnd && instr[14:12]!=3'b111)
return $sformatf("%-12s %4s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2), is_imm_fpr(sbe.op)?fpRegAddrToStr(sbe.result[4:0]):regAddrToStr(sbe.result[4:0]), fpRmToStr(instr[14:12]));
else
return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2), is_imm_fpr(sbe.op)?fpRegAddrToStr(sbe.result[4:0]):regAddrToStr(sbe.result[4:0]));
endfunction // printRFInstr
function string printRFInstr(input string mnemonic, input bit use_rnd);
result_regs.push_back(rd);
result_fpr.push_back(is_rd_fpr(sbe.op));
read_regs.push_back(rs1);
read_fpr.push_back(is_rs1_fpr(sbe.op));
read_regs.push_back(rs2);
read_fpr.push_back(is_rs2_fpr(sbe.op));
if (use_rnd && instr[14:12]!=3'b111)
return $sformatf("%-12s %4s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2), fpRmToStr(instr[14:12]));
else
return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2));
endfunction // printRFInstr
function string printRFInstr1Op(input string mnemonic, input bit use_rnd);
result_regs.push_back(rd);
result_fpr.push_back(is_rd_fpr(sbe.op));
read_regs.push_back(rs1);
read_fpr.push_back(is_rs1_fpr(sbe.op));
if (use_rnd && instr[14:12]!=3'b111)
return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), fpRmToStr(instr[14:12]));
else
return $sformatf("%-12s %4s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1));
endfunction // printRFInstr1Op
function string printR4Instr(input string mnemonic);
result_regs.push_back(rd);
result_fpr.push_back(1'b1);
read_regs.push_back(rs1);
read_fpr.push_back(1'b1);
read_regs.push_back(rs2);
read_fpr.push_back(1'b1);
read_regs.push_back(rs3);
read_fpr.push_back(1'b1);
return $sformatf("%-12s %4s, %s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), fpRegAddrToStr(rd), fpRegAddrToStr(rs1), fpRegAddrToStr(rs2), fpRegAddrToStr(instr[31:27]), fpRmToStr(instr[14:12]));
endfunction // printR4Instr
function string printFpSpecialInstr();
result_regs.push_back(rd);
result_fpr.push_back(is_rd_fpr(sbe.op));
read_regs.push_back(rs1);
read_fpr.push_back(is_rs1_fpr(sbe.op));
case (sbe.op)
FCVT_F2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), fpFmtToStr(instr[21:20])), fpRegAddrToStr(rd), fpRegAddrToStr(rs1), fpRmToStr(instr[14:12]));
FCVT_F2I : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1), fpRmToStr(instr[14:12]));
FCVT_I2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20])), fpRegAddrToStr(rd), regAddrToStr(rs1), fpRmToStr(instr[14:12]));
FMV_F2X : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1));
FMV_X2F : return $sformatf("%-12s %4s, %s", $sformatf("fmv.%s.x", fmvFpFmtToStr(instr[26:25])), fpRegAddrToStr(rd), regAddrToStr(rs1));
endcase
endfunction
function string printIInstr(input string mnemonic);
result_regs.push_back(sbe.rd);
read_regs.push_back(sbe.rs1);
result_regs.push_back(rd);
result_fpr.push_back(1'b0);
read_regs.push_back(rs1);
read_fpr.push_back(1'b0);
if (sbe.rs1 == 0)
return $sformatf("%-16s %s, %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result));
if (rs1 == 0)
return $sformatf("%-12s %4s, %0d", mnemonic, regAddrToStr(rd), $signed(sbe.result));
return $sformatf("%-16s %s, %s, %0d", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), $signed(sbe.result));
return $sformatf("%-12s %4s, %s, %0d", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), $signed(sbe.result));
endfunction // printIInstr
function string printIuInstr(input string mnemonic);
result_regs.push_back(sbe.rd);
read_regs.push_back(sbe.rs1);
result_regs.push_back(rd);
result_fpr.push_back(1'b0);
read_regs.push_back(rs1);
read_fpr.push_back(1'b0);
return $sformatf("%-16s %s, %s, 0x%0x", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), sbe.result);
return $sformatf("%-12s %4s, %s, 0x%0x", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), sbe.result);
endfunction // printIuInstr
function string printSBInstr(input string mnemonic);
read_regs.push_back(sbe.rs1);
read_regs.push_back(sbe.rs2);
read_regs.push_back(rs1);
read_fpr.push_back(1'b0);
read_regs.push_back(rs2);
read_fpr.push_back(1'b0);
if (sbe.rs2 == 0)
return $sformatf("%-16s %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), $signed(sbe.result));
if (rs2 == 0)
return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(rs1), $signed(sbe.result));
else
return $sformatf("%-16s %s, %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2), $signed(sbe.result));
return $sformatf("%-12s %4s, %s, pc + %0d", mnemonic, regAddrToStr(rs1), regAddrToStr(rs2), $signed(sbe.result));
endfunction // printIuInstr
function string printUInstr(input string mnemonic);
result_regs.push_back(sbe.rd);
result_regs.push_back(rd);
result_fpr.push_back(1'b0);
return $sformatf("%-16s %s, 0x%0h", mnemonic, regAddrToStr(sbe.rd), sbe.result[31:12]);
return $sformatf("%-12s %4s, 0x%0h", mnemonic, regAddrToStr(rd), sbe.result[31:12]);
endfunction // printUInstr
function string printJump();
@ -329,58 +507,70 @@ class instruction_trace_item;
function string printUJInstr(input string mnemonic);
result_regs.push_back(sbe.rd);
result_regs.push_back(rd);
result_fpr.push_back(1'b0);
// jump instruction
if (sbe.rd == 0)
return $sformatf("%-16s pc + %0d", mnemonic, $signed(sbe.result));
if (rd == 0)
return $sformatf("%-12s pc + %0d", mnemonic, $signed(sbe.result));
else
return $sformatf("%-16s %s, pc + %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result));
return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(rd), $signed(sbe.result));
endfunction // printUJInstr
function string printCSRInstr(input string mnemonic);
result_regs.push_back(sbe.rd);
result_regs.push_back(rd);
result_fpr.push_back(1'b0);
if (instr[14] == 0) begin
read_regs.push_back(sbe.rs1);
if (sbe.rd != 0 && sbe.rs1 != 0) begin
return $sformatf("%-16s %s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0]));
read_regs.push_back(rs1);
read_fpr.push_back(1'b0);
if (rd != 0 && rs1 != 0) begin
return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), csrAddrToStr(sbe.result[11:0]));
// don't display instructions which write to zero
end else if (sbe.rd == 0) begin
return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0]));
end else if (sbe.rs1 == 0) begin
return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0]));
end else if (rd == 0) begin
return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rs1), csrAddrToStr(sbe.result[11:0]));
end else if (rs1 == 0) begin
return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rd), csrAddrToStr(sbe.result[11:0]));
end
end else begin
if (sbe.rd != 0 && sbe.rs1 != 0) begin
return $sformatf("%-16s %s, %d, %s", mnemonic, regAddrToStr(sbe.rd), $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0]));
if (rd != 0 && rs1 != 0) begin
return $sformatf("%-12s %4s, %d, %s", mnemonic, regAddrToStr(rd), $unsigned(rs1), csrAddrToStr(sbe.result[11:0]));
// don't display instructions which write to zero
end else if (sbe.rd == 0) begin
return $sformatf("%-16s %d, %s", mnemonic, $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0]));
end else if (sbe.rs1 == 0) begin
return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0]));
end else if (rd == 0) begin
return $sformatf("%-14s %2d, %s", mnemonic, $unsigned(rs1), csrAddrToStr(sbe.result[11:0]));
end else if (rs1 == 0) begin
return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rd), csrAddrToStr(sbe.result[11:0]));
end
end
endfunction // printCSRInstr
function string printLoadInstr();
string mnemonic;
case (instr[14:12])
3'b000: mnemonic = "lb";
3'b001: mnemonic = "lh";
3'b010: mnemonic = "lw";
3'b100: mnemonic = "lbu";
3'b101: mnemonic = "lhu";
3'b110: mnemonic = "lwu";
3'b011: mnemonic = "ld";
default: return printMnemonic("INVALID");
endcase
result_regs.push_back(sbe.rd);
read_regs.push_back(sbe.rs1);
// save the immediate for calculating the virtual address
this.imm = sbe.result;
case (instr[14:12])
3'b000: mnemonic = "lb";
3'b001: mnemonic = "lh";
3'b010: mnemonic = "lw";
3'b100: mnemonic = "lbu";
3'b101: mnemonic = "lhu";
3'b110: mnemonic = "lwu";
3'b011: mnemonic = "ld";
default: return printMnemonic("INVALID");
endcase
return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1));
if (instr[6:0] == riscv::OpcodeLoadFp)
mnemonic = $sformatf("f%s",mnemonic);
result_regs.push_back(rd);
result_fpr.push_back(is_rd_fpr(sbe.op));
read_regs.push_back(rs1);
read_fpr.push_back(1'b0);
// save the immediate for calculating the virtual address
this.imm = sbe.result;
if (instr[6:0] == riscv::OpcodeLoadFp)
return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(rd), $signed(sbe.result), regAddrToStr(rs1));
else
return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(rd), $signed(sbe.result), regAddrToStr(rs1));
endfunction
function string printStoreInstr();
@ -393,12 +583,20 @@ class instruction_trace_item;
default: return printMnemonic("INVALID");
endcase
read_regs.push_back(sbe.rs2);
read_regs.push_back(sbe.rs1);
if (instr[6:0] == riscv::OpcodeStoreFp)
mnemonic = $sformatf("f%s",mnemonic);
read_regs.push_back(rs2);
read_fpr.push_back(is_rs2_fpr(sbe.op));
read_regs.push_back(rs1);
read_fpr.push_back(1'b0);
// save the immediate for calculating the virtual address
this.imm = sbe.result;
return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1));
if (instr[6:0] == riscv::OpcodeStoreFp)
return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(rs2), $signed(sbe.result), regAddrToStr(rs1));
else
return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(rs2), $signed(sbe.result), regAddrToStr(rs1));
endfunction // printSInstr
function string printAMOInstr();

View file

@ -25,8 +25,9 @@ class instruction_tracer;
scoreboard_entry_t issue_sbe;
// store resolved branches, get (mis-)predictions
branchpredict_t bp [$];
// shadow copy of the register file
logic [63:0] reg_file [32];
// shadow copy of the register files
logic [63:0] gp_reg_file [32];
logic [63:0] fp_reg_file [32];
// 64 bit clock tick count
longint unsigned clk_ticks;
int f, commit_log;
@ -60,7 +61,7 @@ class instruction_tracer;
logic [31:0] decode_instruction, issue_instruction, issue_commit_instruction;
scoreboard_entry_t commit_instruction;
// initialize register 0
reg_file [0] = 0;
gp_reg_file [0] = 0;
forever begin
automatic branchpredict_t bp_instruction = '0;
@ -125,10 +126,12 @@ class instruction_tracer;
// the scoreboards issue entry still contains the immediate value as a result
// check if the write back is valid, if not we need to source the result from the register file
// as the most recent version of this register will be there.
if (tracer_if.pck.we[i]) begin
if (tracer_if.pck.we_gpr[i] || tracer_if.pck.we_fpr[i]) begin
printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata[i], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
end else if (is_rd_fpr(commit_instruction.op)) begin
printInstr(issue_sbe, issue_commit_instruction, fp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
end else begin
printInstr(issue_sbe, issue_commit_instruction, reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
printInstr(issue_sbe, issue_commit_instruction, gp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction);
end
end
end
@ -142,13 +145,14 @@ class instruction_tracer;
// ----------------------
// Commit Registers
// ----------------------
// update shadow reg file here
// update shadow reg files here
for (int i = 0; i < 2; i++) begin
if (tracer_if.pck.we[i] && tracer_if.pck.waddr[i] != 5'b0) begin
reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i];
if (tracer_if.pck.we_gpr[i] && tracer_if.pck.waddr[i] != 5'b0) begin
gp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i];
end else if (tracer_if.pck.we_fpr[i]) begin
fp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i];
end
end
// --------------
// Flush Signals
// --------------
@ -182,11 +186,11 @@ class instruction_tracer;
endfunction
function void printInstr(scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [63:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, branchpredict_t bp);
instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr, priv_lvl, debug_mode, bp);
instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.gp_reg_file, this.fp_reg_file, result, paddr, priv_lvl, debug_mode, bp);
// print instruction to console
string print_instr = iti.printInstr();
if (ENABLE_SPIKE_COMMIT_LOG && !debug_mode) begin
$fwrite(this.commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result));
$fwrite(this.commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result, is_rd_fpr(sbe.op)));
end
uvm_report_info( "Tracer", print_instr, UVM_HIGH);
$fwrite(this.f, {print_instr, "\n"});

View file

@ -28,23 +28,23 @@ parameter INSTR_BGE = { 7'b?, 5'b?, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranc
parameter INSTR_BLTU = { 7'b?, 5'b?, 5'b?, 3'b110, 5'b?, riscv::OpcodeBranch };
parameter INSTR_BGEU = { 7'b?, 5'b?, 5'b?, 3'b111, 5'b?, riscv::OpcodeBranch };
// OPIMM
parameter INSTR_LI = { 12'b?, 5'b0, 3'b000, 5'b?, riscv::OpcodeOpimm };
parameter INSTR_ADDI = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpimm };
parameter INSTR_SLTI = { 17'b?, 3'b010, 5'b?, riscv::OpcodeOpimm };
parameter INSTR_SLTIU = { 17'b?, 3'b011, 5'b?, riscv::OpcodeOpimm };
parameter INSTR_XORI = { 17'b?, 3'b100, 5'b?, riscv::OpcodeOpimm };
parameter INSTR_ORI = { 17'b?, 3'b110, 5'b?, riscv::OpcodeOpimm };
parameter INSTR_ANDI = { 17'b?, 3'b111, 5'b?, riscv::OpcodeOpimm };
parameter INSTR_SLLI = { 6'b000000, 11'b?, 3'b001, 5'b?, riscv::OpcodeOpimm };
parameter INSTR_SRLI = { 6'b000000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpimm };
parameter INSTR_SRAI = { 6'b010000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpimm };
// OP-IMM
parameter INSTR_LI = { 12'b?, 5'b0, 3'b000, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_ADDI = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_SLTI = { 17'b?, 3'b010, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_SLTIU = { 17'b?, 3'b011, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_XORI = { 17'b?, 3'b100, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_ORI = { 17'b?, 3'b110, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_ANDI = { 17'b?, 3'b111, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_SLLI = { 6'b000000, 11'b?, 3'b001, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_SRLI = { 6'b000000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm };
parameter INSTR_SRAI = { 6'b010000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm };
// OPIMM32
parameter INSTR_ADDIW = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpimm32 };
parameter INSTR_SLLIW = { 7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOpimm32 };
parameter INSTR_SRLIW = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpimm32 };
parameter INSTR_SRAIW = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpimm32 };
// OP-IMM-32
parameter INSTR_ADDIW = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm32 };
parameter INSTR_SLLIW = { 7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOpImm32 };
parameter INSTR_SRLIW = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32 };
parameter INSTR_SRAIW = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32 };
// OP
parameter INSTR_ADD = { 7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp };
@ -67,9 +67,10 @@ parameter INSTR_SRLW = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32
parameter INSTR_SRAW = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32 };
parameter INSTR_MULW = { 7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp32 };
// FENCE
parameter INSTR_FENCE = { 4'b0, 8'b?, 13'b0, riscv::OpcodeFence };
parameter INSTR_FENCEI = { 17'b0, 3'b001, 5'b0, riscv::OpcodeFence };
// MISC-MEM
parameter INSTR_FENCE = { 4'b0, 8'b?, 13'b0, riscv::OpcodeMiscMem };
parameter INSTR_FENCEI = { 17'b0, 3'b001, 5'b0, riscv::OpcodeMiscMem };
// SYSTEM
parameter INSTR_CSRW = { 12'b?, 5'b?, 3'b001, 5'b0, riscv::OpcodeSystem };
parameter INSTR_CSRRW = { 12'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeSystem };
@ -101,9 +102,38 @@ parameter INSTR_DIVU = { 7'b0000001, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp }
parameter INSTR_REM = { 7'b0000001, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp };
parameter INSTR_REMU = { 7'b0000001, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp };
// RVFD
parameter INSTR_FMADD = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMadd};
parameter INSTR_FMSUB = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMsub};
parameter INSTR_FNSMSUB = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmsub};
parameter INSTR_FNMADD = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmadd};
parameter INSTR_FADD = { 5'b00000, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSUB = { 5'b00001, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMUL = { 5'b00010, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FDIV = { 5'b00011, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSQRT = { 5'b01011, 2'b?, 5'b0, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJ = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJN = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJX = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMIN = { 5'b00101, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMAX = { 5'b00101, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FLE = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FLT = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FEQ = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_F2F = { 5'b01000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMV_F2X = { 5'b11100, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCLASS = { 5'b11100, 2'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMV_X2F = { 5'b11110, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_F2I = { 5'b11000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_I2F = { 5'b11010, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
// A
parameter INSTR_AMO = {25'b?, riscv::OpcodeAmo };
// Load/Stores
parameter INSTR_LOAD = {25'b?, riscv::OpcodeLoad };
parameter INSTR_STORE = {25'b?, riscv::OpcodeStore };
parameter INSTR_LOAD = {25'b?, riscv::OpcodeLoad};
parameter INSTR_LOAD_FP = {25'b?, riscv::OpcodeLoadFp};
parameter INSTR_STORE = {25'b?, riscv::OpcodeStore};
parameter INSTR_STORE_FP = {25'b?, riscv::OpcodeStoreFp};

View file

@ -32,7 +32,8 @@ interface instruction_tracer_if (
// WB stage
logic [1:0][4:0] waddr;
logic [1:0][63:0] wdata;
logic [1:0] we;
logic [1:0] we_gpr;
logic [1:0] we_fpr;
// commit stage
scoreboard_entry_t [1:0] commit_instr; // commit instruction
logic [1:0] commit_ack;
@ -56,7 +57,7 @@ interface instruction_tracer_if (
clocking pck @(posedge clk);
input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr,
st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, resolve_branch,
wdata, we, commit_instr, commit_ack, exception, priv_lvl, debug_mode;
wdata, we_gpr, we_fpr, commit_instr, commit_ack, exception, priv_lvl, debug_mode;
endclocking
`endif

View file

@ -131,6 +131,7 @@ module ariane_testharness #(
dmi_jtag i_dmi_jtag (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.testmode_i ( test_en ),
.dmi_req_o ( jtag_dmi_req ),
.dmi_req_valid_o ( jtag_req_valid ),
.dmi_req_ready_i ( debug_req_ready ),
@ -300,6 +301,7 @@ module ariane_testharness #(
) i_clint (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.testmode_i ( test_en ),
.slave ( master[1] ),
.rtc_i ( rtc_i ),
.timer_irq_o ( timer_irq ),

View file

@ -5,8 +5,8 @@ add wave -noupdate -group frontend -group icache /ariane_tb/dut/i_ariane/i_std_c
add wave -noupdate -group frontend -group ras /ariane_tb/dut/i_ariane/i_frontend/i_ras/*
add wave -noupdate -group frontend -group btb /ariane_tb/dut/i_ariane/i_frontend/i_btb/*
add wave -noupdate -group frontend -group bht /ariane_tb/dut/i_ariane/i_frontend/i_bht/*
add wave -noupdate -group frontend -group instr_scan /ariane_tb/dut/i_ariane/i_frontend/*/i_instr_scan/*
add wave -noupdate -group frontend -group fetch_fifo /ariane_tb/dut/i_ariane/i_frontend/i_fetch_fifo/*
# add wave -noupdate -group frontend -group instr_scan /ariane_tb/dut/i_ariane/i_frontend/*/i_instr_scan/*
# add wave -noupdate -group frontend -group fetch_fifo /ariane_tb/dut/i_ariane/i_frontend/i_fetch_fifo/*
add wave -noupdate -group id_stage -group decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/*
add wave -noupdate -group id_stage -group compressed_decoder /ariane_tb/dut/i_ariane/id_stage_i/compressed_decoder_i/*
@ -22,6 +22,8 @@ add wave -noupdate -group ex_stage -group alu /ariane_tb/dut/i_ariane/ex_stage_i
add wave -noupdate -group ex_stage -group mult /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/*
add wave -noupdate -group ex_stage -group mult -group mul /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_mul/*
add wave -noupdate -group ex_stage -group mult -group div /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_div/*
add wave -noupdate -group ex_stage -group fpu /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/*
add wave -noupdate -group ex_stage -group fpu -group fpnew /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/*
add wave -noupdate -group ex_stage -group lsu /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/*
add wave -noupdate -group ex_stage -group lsu -group lsu_bypass /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/lsu_bypass_i/*