diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 97f8d30ad..daa6a1154 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,23 +1,23 @@ before_script: - - export CXX=g++-7 CC=gcc-7 # paths to local or network installations (the riscv toolchain and # verilator are not built in the ci job as in travis) - - export QUESTASIM_HOME= - - export QUESTASIM_VERSION= - - export QUESTASIM_FLAGS= - - export RISCV=/scratch/$USER/projects/riscv_install - - export VERILATOR_ROOT=/scratch/$USER/projects/verilator-3.924 + - export QUESTASIM_HOME=/usr/pack/modelsim-10.6b-kgf/questasim/ + - export QUESTASIM_VERSION=-10.6b + - export QUESTASIM_FLAGS=-noautoldlibpath + - export CXX=g++-7.2.0 CC=gcc-7.2.0 + - export RISCV=/usr/scratch2/larain1/gitlabci/riscv_install + - export VERILATOR_ROOT=/usr/scratch2/larain1/gitlabci/verilator-3.924 # setup dependent paths - export PATH=${RISCV}/bin:$VERILATOR_ROOT/bin:${PATH} - export LIBRARY_PATH=$RISCV/lib - - export LD_LIBRARY_PATH=$RISCV/lib - - export C_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include - - export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include + - export LD_LIBRARY_PATH=$RISCV/lib:/usr/pack/gcc-7.2.0-af/linux-x64/lib64/ + - export C_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include:/usr/pack/gcc-7.2.0-af/linux-x64/include + - export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include:/usr/pack/gcc-7.2.0-af/linux-x64/include # number of parallel jobs to use for make commands and simulation - export NUM_JOBS=4 - ci/make-tmp.sh - git submodule update --init --recursive - + variables: GIT_SUBMODULE_STRATEGY: recursive @@ -52,33 +52,9 @@ run-benchmarks-questa: dependencies: - build -# rv64ui-p-* tests -run-asm-tests1-verilator: - stage: test_std - script: - - make -j${NUM_JOBS} run-asm-tests1-verilator - dependencies: - - build - -# rv64ui-v-* tests -run-asm-tests2-verilator: - stage: test_std - script: - - make -j${NUM_JOBS} run-asm-tests2-verilator - dependencies: - - build - -run-benchmarks-verilator: - stage: test_std - script: - - make -j${NUM_JOBS} run-benchmarks-verilator - dependencies: - - build - torture: stage: test_std script: - make torture-rtest - - make torture-rtest-verilator dependencies: - build diff --git a/.gitmodules b/.gitmodules index 38c356f40..d25c969d3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,12 +4,21 @@ [submodule "src/axi_node"] path = src/axi_node url = https://github.com/pulp-platform/axi_node.git +[submodule "src/fpu"] + path = src/fpu + url = https://github.com/pulp-platform/fpnew.git [submodule "src/fpga-support"] path = src/fpga-support url = https://github.com/pulp-platform/fpga-support.git [submodule "src/common_cells"] path = src/common_cells - url = https://github.com/pulp-platform/common_cells.git + url = https://github.com/pulp-platform/common_cells.git [submodule "src/axi"] path = src/axi url = https://github.com/pulp-platform/axi.git +[submodule "src/fpu_div_sqrt_mvp"] + path = src/fpu_div_sqrt_mvp + url = https://github.com/pulp-platform/fpu_div_sqrt_mvp.git +[submodule "src/tech_cells_generic"] + path = src/tech_cells_generic + url = https://github.com/pulp-platform/tech_cells_generic.git diff --git a/Bender.yml b/Bender.yml index 1b12de061..5b169a76d 100644 --- a/Bender.yml +++ b/Bender.yml @@ -3,15 +3,48 @@ package: authors: [ "Florian Zaruba " ] dependencies: - axi: { git: "git@iis-git.ee.ethz.ch:sasa/axi.git", rev: master } - axi2per: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi2per.git", rev: master } - axi_mem_if: { git: "git@github.com:pulp-platform/axi_mem_if.git", rev: master } - axi_node: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_node.git", version: v1.1.0 } - axi_slice: { git: "git@iis-git.ee.ethz.ch:sasa/axi_slice.git", version: 1.1.2 } - tech_cells_generic: { git: "git@iis-git.ee.ethz.ch:pulp-open/tech_cells_generic.git", rev: master } - common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: v1.7.4 } - fpga-support: { git: "https://github.com/pulp-platform/fpga-support.git", version: v0.3.2 } + axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.4.5 } + axi_mem_if: { git: "https://github.com/pulp-platform/axi_mem_if.git", version: 0.2.0 } + axi_node: { git: "https://github.com/pulp-platform/axi_node.git", version: 1.1.1 } + tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.1.1 } + common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.7.5 } + fpga-support: { git: "https://github.com/pulp-platform/fpga-support.git", version: 0.3.2 } + sources: + - src/fpu_div_sqrt_mvp/hdl/fpu_ff.sv + - src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv + - src/fpu_div_sqrt_mvp/hdl/control_mvp.sv + - src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv + - src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv + - src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv + - src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv + - src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv + - src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv + - src/fpu/src/pkg/fpnew_pkg.vhd + - src/fpu/src/pkg/fpnew_fmts_pkg.vhd + - src/fpu/src/pkg/fpnew_comps_pkg.vhd + - src/fpu/src/pkg/fpnew_pkg_constants.vhd + - src/fpu/src/utils/fp_pipe.vhd + - src/fpu/src/utils/fp_rounding.vhd + - src/fpu/src/utils/fp_arbiter.vhd + - src/fpu/src/ops/fma_core.vhd + - src/fpu/src/ops/fp_fma.vhd + - src/fpu/src/ops/fp_divsqrt_multi.vhd + - src/fpu/src/ops/fp_noncomp.vhd + - src/fpu/src/ops/fp_f2fcasts_fmt.vhd + - src/fpu/src/ops/fp_f2icasts_fmt.vhd + - src/fpu/src/ops/fp_i2fcasts_fmt.vhd + - src/fpu/src/subunits/addmul_fmt_slice.vhd + - src/fpu/src/subunits/addmul_block.vhd + - src/fpu/src/subunits/divsqrt_multifmt_slice.vhd + - src/fpu/src/subunits/divsqrt_block.vhd + - src/fpu/src/subunits/noncomp_fmt_slice.vhd + - src/fpu/src/subunits/noncomp_block.vhd + - src/fpu/src/subunits/conv_fmt_slice.vhd + - src/fpu/src/subunits/conv_ifmt_slice.vhd + - src/fpu/src/subunits/conv_block.vhd + - src/fpu/src/fpnew.vhd + - src/fpu/src/fpnew_top.vhd - include/riscv_pkg.sv - src/debug/dm_pkg.sv - include/ariane_pkg.sv @@ -21,49 +54,43 @@ sources: - src/util/instruction_tracer_pkg.sv - src/util/instruction_tracer_if.sv - src/alu.sv + - src/fpu_wrap.sv - src/ariane.sv - src/branch_unit.sv - - src/cache_ctrl.sv - - src/commit_stage.sv - src/compressed_decoder.sv - src/controller.sv - src/csr_buffer.sv - src/csr_regfile.sv - src/decoder.sv - src/ex_stage.sv - - src/frontend/btb.sv, - - src/frontend/bht.sv, - - src/frontend/ras.sv, - - src/frontend/instr_scan.sv, + - src/frontend/btb.sv + - src/frontend/bht.sv + - src/frontend/ras.sv + - src/frontend/instr_scan.sv - src/frontend/frontend.sv - - src/icache.sv - src/id_stage.sv - src/instr_realigner.sv - src/issue_read_operands.sv - src/issue_stage.sv - - src/lfsr.sv - src/load_unit.sv - src/lsu_arbiter.sv - src/lsu.sv - - src/miss_handler.sv - src/mmu.sv - src/mult.sv - - src/nbdcache.sv - - src/vdregs.sv - src/perf_counters.sv - src/ptw.sv - - src/std_cache_subsystem.sv - - src/sram_wrapper.sv - # - src/ariane_regfile_ff.sv - - src/ariane_regfile.sv + - src/ariane_regfile_ff.sv + # - src/ariane_regfile.sv - src/re_name.sv - src/scoreboard.sv - src/store_buffer.sv + - src/amo_buffer.sv - src/store_unit.sv - src/tlb.sv - src/commit_stage.sv - src/axi_adapter.sv - src/cache_subsystem/cache_ctrl.sv + - src/cache_subsystem/amo_alu.sv - src/cache_subsystem/miss_handler.sv - src/cache_subsystem/std_cache_subsystem.sv - src/cache_subsystem/std_icache.sv @@ -76,4 +103,5 @@ sources: - src/debug/dm_top.sv - src/debug/dmi_cdc.sv - src/debug/dmi_jtag.sv + - src/debug/dm_sba.sv - src/debug/dmi_jtag_tap.sv diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 000000000..e3285d0ad --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @zarubaf @msfschaffner diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 44337a124..084df0aa1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,6 +20,7 @@ See [style-guidlines](https://github.com/pulp-platform/style-guidelines) * :fire: `:fire` Removing code or files. * :memo: `:memo:` When writing docs * :bug: `:bug:` When fixing a bug + * :fire: `:fire:` When removing code or files * :wastebasket: `:wastebasket:` When removing code or files * :green_heart: `:green_heart:` When fixing the CI build * :construction_worker: `:construction_worker:` Adding CI build system @@ -28,17 +29,16 @@ See [style-guidlines](https://github.com/pulp-platform/style-guidelines) * :arrow_up: `:arrow_up:` When upgrading dependencies * :arrow_down: `:arrow_down:` When downgrading dependencies * :rotating_light: `:rotating_light:` When removing linter warnings - * :pencil2: `:pencil2:` Fixing typos - * :recycle: `:recycle:` Refactoring code. + * :pencil2: `pencil2:` Fixing typos + * :recycle: `:scisccor:` Refactoring code. * :boom: `:boom:` Introducing breaking changes - * :truck: `:truck:` Moving or renaming files. + * :truck: `truck` Moving or renaming files. * :space_invader: `:space_invader:` When fixing something synthesis related * :beers: `:beer:` Writing code drunkenly. - * :ok_hand: `:ok_hand:` Updating code due to code review changes + * :ok_hand: `:ok_hand` Updating code due to code review changes * :building_construction: `:building_construction:` Making architectural changes. - * :wrench: `:wrench:` Tooling - * :construction: `:construction:` Work In Progress WIP - * :bookmark: `:bookmark:` version tag + +For a detailed why and how please refer to one of the multiple [resources](https://chris.beams.io/posts/git-commit/) regarding git commit messages. If you use `vi` for your commit message, consider to put the following snippet inside your `~/.vimrc`: diff --git a/Makefile b/Makefile index a76620921..1445cb108 100755 --- a/Makefile +++ b/Makefile @@ -29,18 +29,23 @@ torture-logs := -log # Sources # Package files -> compile first -ariane_pkg := include/riscv_pkg.sv \ - src/debug/dm_pkg.sv \ - include/ariane_pkg.sv \ - include/std_cache_pkg.sv \ - src/axi/src/axi_pkg.sv \ - include/axi_intf.sv +ariane_pkg := include/riscv_pkg.sv \ + src/debug/dm_pkg.sv \ + include/ariane_pkg.sv \ + include/std_cache_pkg.sv \ + src/axi/src/axi_pkg.sv \ + include/axi_intf.sv \ + src/fpu/src/pkg/fpnew_pkg.vhd \ + src/fpu/src/pkg/fpnew_fmts_pkg.vhd \ + src/fpu/src/pkg/fpnew_comps_pkg.vhd \ + src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \ + src/fpu/src/pkg/fpnew_pkg_constants.vhd # utility modules -util := $(wildcard src/util/*.svh) \ - src/util/instruction_tracer_pkg.sv \ - src/util/instruction_tracer_if.sv \ - src/util/cluster_clock_gating.sv \ +util := $(wildcard src/util/*.svh) \ + src/util/instruction_tracer_pkg.sv \ + src/util/instruction_tracer_if.sv \ + src/tech_cells_generic/src/cluster_clock_gating.sv \ src/util/sram.sv # Test packages @@ -51,6 +56,11 @@ dpi := $(patsubst tb/dpi/%.cc,${dpi-library}/%.o,$(wildcard tb/dpi/*.cc)) dpi_hdr := $(wildcard tb/dpi/*.h) # this list contains the standalone components src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ + $(wildcard src/fpu/src/utils/*.vhd) \ + $(wildcard src/fpu/src/ops/*.vhd) \ + $(wildcard src/fpu/src/subunits/*.vhd) \ + $(filter-out src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv, \ + $(wildcard src/fpu_div_sqrt_mvp/hdl/*.sv)) \ $(wildcard src/frontend/*.sv) \ $(wildcard src/cache_subsystem/*.sv) \ $(wildcard bootrom/*.sv) \ @@ -59,6 +69,12 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ $(wildcard src/axi_mem_if/src/*.sv) \ $(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv)) \ $(wildcard src/debug/debug_rom/*.sv) \ + src/fpu/src/fpnew.vhd \ + src/fpu/src/fpnew_top.vhd \ + src/common_cells/src/deprecated/generic_fifo.sv \ + src/common_cells/src/deprecated/pulp_sync.sv \ + src/common_cells/src/deprecated/find_first_one.sv \ + src/common_cells/src/rstgen_bypass.sv \ src/axi/src/axi_cut.sv \ src/axi/src/axi_join.sv \ src/fpga-support/rtl/SyncSpRamBeNx64.sv \ @@ -71,12 +87,12 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ src/common_cells/src/lzc.sv \ src/common_cells/src/rrarbiter.sv \ src/common_cells/src/lfsr_8bit.sv \ + src/tech_cells_generic/src/cluster_clock_inverter.sv \ + src/tech_cells_generic/src/pulp_clock_mux2.sv \ tb/ariane_testharness.sv \ tb/common/SimDTM.sv \ tb/common/SimJTAG.sv - - # root path root-dir := $(shell pwd) # look for testbenches @@ -96,6 +112,7 @@ riscv-test ?= rv64ui-p-add incdir := # Compile and sim flags compile_flag += +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive +define+$(defines) +compile_flag_vhd += -64 -nologo -quiet -2008 uvm-flags += +UVM_NO_RELNOTES # Iterate over all include directories and write them with +incdir+ prefixed # +incdir+ works for Verilator and QuestaSim @@ -113,9 +130,11 @@ build: $(library) $(library)/.build-srcs $(library)/.build-tb $(dpi-library)/ari # src files $(library)/.build-srcs: $(ariane_pkg) $(util) $(src) $(library) vlog$(questa_version) $(compile_flag) -work $(library) $(filter %.sv,$(ariane_pkg)) $(list_incdir) -suppress 2583 + vcom$(questa_version) $(compile_flag_vhd) -work $(library) -pedanticerrors $(filter %.vhd,$(ariane_pkg)) vlog$(questa_version) $(compile_flag) -work $(library) $(filter %.sv,$(util)) $(list_incdir) -suppress 2583 # Suppress message that always_latch may not be checked thoroughly by QuestaSim. - vlog$(questa_version) $(compile_flag) -work $(library) -pedanticerrors $(src) $(list_incdir) -suppress 2583 + vcom$(questa_version) $(compile_flag_vhd) -work $(library) -pedanticerrors $(filter %.vhd,$(src)) + vlog$(questa_version) $(compile_flag) -work $(library) -pedanticerrors $(filter %.sv,$(src)) $(list_incdir) -suppress 2583 touch $(library)/.build-srcs # build TBs @@ -137,19 +156,25 @@ $(dpi-library)/ariane_dpi.so: $(dpi) # Compile C-code and generate .so file $(CXX) -shared -m64 -o $(dpi-library)/ariane_dpi.so $? -lfesvr - sim: build - vsim${questa_version} +permissive -64 -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ - +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ - $(QUESTASIM_FLAGS) \ - -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " log -r /*; run -all; exit" \ + vsim${questa_version} +permissive -64 -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ + +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ + $(QUESTASIM_FLAGS) \ + -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; log -r /*; run -all; exit" \ ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options) simc: build vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ $(QUESTASIM_FLAGS) \ - -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " run -all; exit" \ + -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; run -all; exit" \ + ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options) + +simc-log: build + vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ + +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ + $(QUESTASIM_FLAGS) \ + -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; log -r /*; run -all; exit" \ ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options) $(riscv-asm-tests): build @@ -157,7 +182,7 @@ $(riscv-asm-tests): build +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ $(QUESTASIM_FLAGS) \ -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \ - -do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ + -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$@ ++$(target-options) | tee tmp/riscv-asm-tests-$@.log $(riscv-benchmarks): build @@ -165,27 +190,27 @@ $(riscv-benchmarks): build +BASEDIR=$(riscv-benchmarks-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ $(QUESTASIM_FLAGS) \ -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \ - -do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ + -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ ${top_level}_optimized +permissive-off ++$(riscv-benchmarks-dir)/$@ ++$(target-options) | tee tmp/riscv-benchmarks-$@.log # can use -jX to run ci tests in parallel using X processes run-asm-tests: $(riscv-asm-tests) - make check-asm-tests + $(MAKE) check-asm-tests check-asm-tests: ci/check-tests.sh tmp/riscv-asm-tests- $(shell wc -l $(riscv-asm-tests-list) | awk -F " " '{ print $1 }') # can use -jX to run ci tests in parallel using X processes run-benchmarks: $(riscv-benchmarks) - make check-benchmarks + $(MAKE) check-benchmarks check-benchmarks: ci/check-tests.sh tmp/riscv-benchmarks- $(shell wc -l $(riscv-benchmarks-list) | awk -F " " '{ print $1 }') # verilator-specific verilate_command := $(verilator) \ - $(ariane_pkg) \ - $(filter-out tb/ariane_bt.sv,$(src)) \ + $(filter-out %.vhd, $(ariane_pkg)) \ + $(filter-out src/fpu_wrap.sv, $(filter-out %.vhd, $(src))) \ +define+$(defines) \ src/util/sram.sv \ +incdir+src/axi_node \ @@ -209,7 +234,7 @@ verilate_command := $(verilator) # User Verilator, at some point in the future this will be auto-generated verilate: $(verilate_command) - cd $(ver-library) && make -j${NUM_JOBS} -f Variane_testharness.mk + cd $(ver-library) && $(MAKE) -j${NUM_JOBS} -f Variane_testharness.mk $(addsuffix -verilator,$(riscv-asm-tests)): verilate $(ver-library)/Variane_testharness $(riscv-test-dir)/$(subst -verilator,,$@) @@ -235,29 +260,29 @@ torture-itest: cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -a output/test.S' torture-rtest: build - cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture$(torture-logs) defines=$(defines) test-location=$(test-location)" > call.sh && chmod +x call.sh + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture$(torture-logs) defines=$(defines) test-location=$(test-location)" > call.sh && chmod +x call.sh cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a $(test-location).S' | tee $(test-location).log make check-torture test-location=$(test-location) torture-dummy: build - cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture defines=$(defines) test-location=\$${@: -1}" > call.sh + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture defines=$(defines) test-location=\$${@: -1}" > call.sh torture-rnight: build - cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture$(torture-logs) defines=$(defines) test-location=\$${@: -1}" > call.sh && chmod +x call.sh + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture$(torture-logs) defines=$(defines) test-location=\$${@: -1}" > call.sh && chmod +x call.sh cd $(riscv-torture-dir) && $(riscv-torture-bin) 'overnight/run -r ./call.sh -g none' | tee output/overnight.log - make check-torture + $(MAKE) check-torture torture-rtest-verilator: verilate - cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture-verilator defines=$(defines)" > call.sh && chmod +x call.sh + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture-verilator defines=$(defines)" > call.sh && chmod +x call.sh cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a output/test.S' | tee output/test.log - make check-torture + $(MAKE) check-torture run-torture: build vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles)+UVM_TESTNAME=${test_case} \ +BASEDIR=$(riscv-torture-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ $(QUESTASIM_FLAGS) \ -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \ - -do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ + -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ ${top_level}_optimized +permissive-off \ +signature=$(riscv-torture-dir)/$(test-location).rtlsim.sig ++$(riscv-torture-dir)/$(test-location) ++$(target-options) @@ -271,6 +296,7 @@ run-torture-log: build +signature=$(riscv-torture-dir)/$(test-location).rtlsim.sig ++$(riscv-torture-dir)/$(test-location) ++$(target-options) cp vsim.wlf $(riscv-torture-dir)/$(test-location).wlf cp trace_core_00_0.log $(riscv-torture-dir)/$(test-location).trace + cp trace_core_00_0_commit.log $(riscv-torture-dir)/$(test-location).commit cp transcript $(riscv-torture-dir)/$(test-location).transcript run-torture-verilator: verilate diff --git a/README.md b/README.md index 72ec74dc7..766e0dcfd 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,10 @@ $ make simc riscv-test-dir=$RISCV/riscv64-unknown-elf/bin riscv-test=pk target-o > Be patient! RTL simulation is way slower than Spike. If you think that you ran into problems you can inspect the trace files. +### FPU Support + +> There is preliminary support for floating point extensions F and D. At the moment floating point support will only be available in QuestaSim as the FPU is written in VHDL. This is likely to change. The floating point extensions can be enabled by setting `RVF` and `RVD` to `1'b1` in the `include/ariane_pkg.sv` file. + ## FPGA Emulation Coming. diff --git a/ci/riscv-asm-tests.list b/ci/riscv-asm-tests.list index 01e12844b..e03f7c392 100644 --- a/ci/riscv-asm-tests.list +++ b/ci/riscv-asm-tests.list @@ -164,4 +164,4 @@ rv64ua-v-amomin_d rv64ua-v-amomin_w rv64ua-v-amominu_d rv64ua-v-amominu_w -rv64ua-v-lrsc \ No newline at end of file +rv64ua-v-lrsc diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index b920db551..7db67cb16 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -24,7 +24,7 @@ package ariane_pkg; localparam NR_SB_ENTRIES = 8; // number of scoreboard entries localparam TRANS_ID_BITS = $clog2(NR_SB_ENTRIES); // depending on the number of scoreboard entries we need that many bits // to uniquely identify the entry in the scoreboard - localparam NR_WB_PORTS = 5; + localparam NR_WB_PORTS = 4; localparam ASID_WIDTH = 1; localparam BTB_ENTRIES = 8; localparam BHT_ENTRIES = 32; @@ -32,18 +32,64 @@ package ariane_pkg; localparam BITS_SATURATION_COUNTER = 2; localparam NR_COMMIT_PORTS = 2; - localparam logic [63:0] ISA_CODE = - | (1 << 0) // A - Atomic extension - | (1 << 2) // C - Compressed extension - | (1 << 8) // I - RV32I/64I/128I base ISA - | (1 << 12) // M - Integer Multiply/Divide extension - | (0 << 13) // N - User level interrupts supported - | (1 << 18) // S - Supervisor mode implemented - | (1 << 20) // U - User mode implemented - | (0 << 23) // X - Non-standard extensions present - | (1 << 63); // RV64 localparam ENABLE_RENAME = 1'b1; + // Floating-point extensions configuration + localparam bit RVF = 1'b0; // Is F extension enabled + localparam bit RVD = 1'b0; // Is D extension enabled + localparam bit RVA = 1'b1; // Is A extension enabled + + // Transprecision floating-point extensions configuration + localparam bit XF16 = 1'b0; // Is half-precision float extension (Xf16) enabled + localparam bit XF16ALT = 1'b0; // Is alternative half-precision float extension (Xf16alt) enabled + localparam bit XF8 = 1'b0; // Is quarter-precision float extension (Xf8) enabled + localparam bit XFVEC = 1'b0; // Is vectorial float extension (Xfvec) enabled + + // Transprecision float unit + localparam logic [30:0] LAT_COMP_FP32 = 'd3; + localparam logic [30:0] LAT_COMP_FP64 = 'd4; + localparam logic [30:0] LAT_COMP_FP16 = 'd3; + localparam logic [30:0] LAT_COMP_FP16ALT = 'd3; + localparam logic [30:0] LAT_COMP_FP8 = 'd2; + localparam logic [30:0] LAT_DIVSQRT = 'd2; + localparam logic [30:0] LAT_NONCOMP = 'd1; + localparam logic [30:0] LAT_CONV = 'd2; + + // -------------------------------------- + // vvvv Don't change these by hand! vvvv + localparam bit FP_PRESENT = RVF | RVD | XF16 | XF16ALT | XF8; + + // Length of widest floating-point format + localparam FLEN = RVD ? 64 : // D ext. + RVF ? 32 : // F ext. + XF16 ? 16 : // Xf16 ext. + XF16ALT ? 16 : // Xf16alt ext. + XF8 ? 8 : // Xf8 ext. + 0; // Unused in case of no FP + + localparam bit NSX = XF16 | XF16ALT | XF8 | XFVEC; // Are non-standard extensions present? + + localparam bit RVFVEC = RVF & XFVEC & FLEN>32; // FP32 vectors available if vectors and larger fmt enabled + localparam bit XF16VEC = XF16 & XFVEC & FLEN>16; // FP16 vectors available if vectors and larger fmt enabled + localparam bit XF16ALTVEC = XF16ALT & XFVEC & FLEN>16; // FP16ALT vectors available if vectors and larger fmt enabled + localparam bit XF8VEC = XF8 & XFVEC & FLEN>8; // FP8 vectors available if vectors and larger fmt enabled + // ^^^^ until here ^^^^ + // --------------------- + + localparam logic [63:0] ARIANE_MARCHID = 64'd3; + + localparam logic [63:0] ISA_CODE = (RVA << 0) // A - Atomic Instructions extension + | (1 << 2) // C - Compressed extension + | (RVD << 3) // D - Double precsision floating-point extension + | (RVF << 5) // F - Single precsision floating-point extension + | (1 << 8) // I - RV32I/64I/128I base ISA + | (1 << 12) // M - Integer Multiply/Divide extension + | (0 << 13) // N - User level interrupts supported + | (1 << 18) // S - Supervisor mode implemented + | (1 << 20) // U - User mode implemented + | (NSX << 23) // X - Non-standard extensions present + | (1 << 63); // RV64 + // 32 registers + 1 bit for re-naming = 6 localparam REG_ADDR_SIZE = 6; @@ -57,9 +103,8 @@ package ariane_pkg; dataaddr: dm::DataAddr }; - // enables a commit log which matches spikes commit log format for easier trace comparison - localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b0; + localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b1; // ------------- Dangerouse ------------- // if set to zero a flush will not invalidate the cache-lines, in a single core environment @@ -152,7 +197,9 @@ package ariane_pkg; ALU, // 3 CTRL_FLOW, // 4 MULT, // 5 - CSR // 6 + CSR, // 6 + FPU, // 7 + FPU_VEC // 8 } fu_t; localparam EXC_OFF_RST = 8'h80; @@ -199,9 +246,94 @@ package ariane_pkg; // Multiplications MUL, MULH, MULHU, MULHSU, MULW, // Divisions - DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW + DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW, + // Floating-Point Load and Store Instructions + FLD, FLW, FLH, FLB, FSD, FSW, FSH, FSB, + // Floating-Point Computational Instructions + FADD, FSUB, FMUL, FDIV, FMIN_MAX, FSQRT, FMADD, FMSUB, FNMSUB, FNMADD, + // Floating-Point Conversion and Move Instructions + FCVT_F2I, FCVT_I2F, FCVT_F2F, FSGNJ, FMV_F2X, FMV_X2F, + // Floating-Point Compare Instructions + FCMP, + // Floating-Point Classify Instruction + FCLASS, + // Vectorial Floating-Point Instructions that don't directly map onto the scalar ones + VFMIN, VFMAX, VFSGNJ, VFSGNJN, VFSGNJX, VFEQ, VFNE, VFLT, VFGE, VFLE, VFGT, VFCPKAB_S, VFCPKCD_S, VFCPKAB_D, VFCPKCD_D } fu_op; + typedef struct packed { + fu_op operator; + logic [63:0] operand_a; + logic [63:0] operand_b; + logic [63:0] imm; + } fu_data_t; + + // ------------------------------- + // Extract Src/Dst FP Reg from Op + // ------------------------------- + function automatic logic is_rs1_fpr (input fu_op op); + if (FP_PRESENT) begin // makes function static for non-fp case + unique case (op) inside + [FMUL:FNMADD], // Computational Operations (except ADD/SUB) + FCVT_F2I, // Float-Int Casts + FCVT_F2F, // Float-Float Casts + FSGNJ, // Sign Injections + FMV_F2X, // FPR-GPR Moves + FCMP, // Comparisons + FCLASS, // Classifications + [VFMIN:VFCPKCD_D] : return 1'b1; // Additional Vectorial FP ops + default : return 1'b0; // all other ops + endcase + end else + return 1'b0; + endfunction; + + function automatic logic is_rs2_fpr (input fu_op op); + if (FP_PRESENT) begin // makes function static for non-fp case + unique case (op) inside + [FSD:FSB], // FP Stores + [FADD:FMIN_MAX], // Computational Operations (no sqrt) + [FMADD:FNMADD], // Fused Computational Operations + FCVT_F2F, // Vectorial F2F Conversions requrie target + [FSGNJ:FMV_F2X], // Sign Injections and moves mapped to SGNJ + FCMP, // Comparisons + [VFMIN:VFCPKCD_D] : return 1'b1; // Additional Vectorial FP ops + default : return 1'b0; // all other ops + endcase + end else + return 1'b0; + endfunction; + + // ternary operations encode the rs3 address in the imm field, also add/sub + function automatic logic is_imm_fpr (input fu_op op); + if (FP_PRESENT) begin // makes function static for non-fp case + unique case (op) inside + [FADD:FSUB], // ADD/SUB need inputs as Operand B/C + [FMADD:FNMADD], // Fused Computational Operations + [VFCPKAB_S:VFCPKCD_D] : return 1'b1; // Vectorial FP cast and pack ops + default : return 1'b0; // all other ops + endcase + end else + return 1'b0; + endfunction; + + function automatic logic is_rd_fpr (input fu_op op); + if (FP_PRESENT) begin // makes function static for non-fp case + unique case (op) inside + [FLD:FLB], // FP Loads + [FADD:FNMADD], // Computational Operations + FCVT_I2F, // Int-Float Casts + FCVT_F2F, // Float-Float Casts + FSGNJ, // Sign Injections + FMV_X2F, // GPR-FPR Moves + [VFMIN:VFSGNJX], // Vectorial MIN/MAX and SGNJ + [VFCPKAB_S:VFCPKCD_D] : return 1'b1; // Vectorial FP cast and pack ops + default : return 1'b0; // all other ops + endcase + end else + return 1'b0; + endfunction; + function automatic logic is_amo (fu_op op); case (op) inside [AMO_LRW:AMO_MINDU]: begin @@ -244,7 +376,10 @@ package ariane_pkg; logic [REG_ADDR_SIZE-1:0] rs1; // register source address 1 logic [REG_ADDR_SIZE-1:0] rs2; // register source address 2 logic [REG_ADDR_SIZE-1:0] rd; // register destination address - logic [63:0] result; // for unfinished instructions this field also holds the immediate + logic [63:0] result; // for unfinished instructions this field also holds the immediate, + // for unfinished floating-point that are partly encoded in rs2, this field also holds rs2 + // for unfinished floating-point fused operations (FMADD, FMSUB, FNMADD, FNMSUB) + // this field holds the address of the third operand from the floating-point register file logic valid; // is the result valid logic use_imm; // should we use the immediate as operand b? logic use_zimm; // use zimm as operand a @@ -433,7 +568,7 @@ package ariane_pkg; // ---------------------- function automatic logic [1:0] extract_transfer_size(fu_op op); case (op) - LD, SD, + LD, SD, FLD, FSD, AMO_LRD, AMO_SCD, AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD, @@ -442,7 +577,7 @@ package ariane_pkg; AMO_MINDU: begin return 2'b11; end - LW, LWU, SW, + LW, LWU, SW, FLW, FSW, AMO_LRW, AMO_SCW, AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW, @@ -451,8 +586,8 @@ package ariane_pkg; AMO_MINWU: begin return 2'b10; end - LH, LHU, SH: return 2'b01; - LB, SB, LBU: return 2'b00; + LH, LHU, SH, FLH, FSH: return 2'b01; + LB, LBU, SB, FLB, FSB: return 2'b00; default: return 2'b11; endcase endfunction diff --git a/include/riscv_pkg.sv b/include/riscv_pkg.sv index 7a223e444..ba5fdf34b 100644 --- a/include/riscv_pkg.sv +++ b/include/riscv_pkg.sv @@ -32,6 +32,13 @@ package riscv; XLEN_128 = 2'b11 } xlen_t; + typedef enum logic [1:0] { + Off = 2'b00, + Initial = 2'b01, + Clean = 2'b10, + Dirty = 2'b11 + } xs_t; + typedef struct packed { logic sd; // signal dirty - read-only - hardwired zero logic [62:36] wpri4; // writes preserved reads ignored @@ -44,8 +51,8 @@ package riscv; logic mxr; // make executable readable logic sum; // permit supervisor user memory access logic mprv; // modify privilege - privilege level for ld/st - logic [1:0] xs; // extension register - hardwired to zero - logic [1:0] fs; // extension register - hardwired to zero + xs_t xs; // extension register - hardwired to zero + xs_t fs; // floating point extension register priv_lvl_t mpp; // holds the previous privilege mode up to machine logic [1:0] wpri2; // writes preserved reads ignored logic spp; // holds the previous privilege mode up to supervisor @@ -104,6 +111,37 @@ package riscv; logic [6:0] opcode; } rtype_t; + typedef struct packed { + logic [31:27] rs3; + logic [26:25] funct2; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } r4type_t; + + typedef struct packed { + logic [31:27] funct5; + logic [26:25] fmt; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] rm; + logic [11:7] rd; + logic [6:0] opcode; + } rftype_t; // floating-point + + typedef struct packed { + logic [31:30] funct2; + logic [29:25] vecfltop; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:14] repl; + logic [13:12] vfmt; + logic [11:7] rd; + logic [6:0] opcode; + } rvftype_t; // vectorial floating-point + typedef struct packed { logic [31:20] imm; logic [19:15] rs1; @@ -142,6 +180,9 @@ package riscv; typedef union packed { logic [31:0] instr; rtype_t rtype; + r4type_t r4type; + rftype_t rftype; + rvftype_t rvftype; itype_t itype; stype_t stype; utype_t utype; @@ -151,27 +192,72 @@ package riscv; // -------------------- // Opcodes // -------------------- - localparam OpcodeSystem = 7'h73; - localparam OpcodeFence = 7'h0f; - localparam OpcodeOp = 7'h33; - localparam OpcodeOp32 = 7'h3B; - localparam OpcodeOpimm = 7'h13; - localparam OpcodeOpimm32 = 7'h1B; - localparam OpcodeStore = 7'h23; - localparam OpcodeStoreFP = 7'b01_001_11; - localparam OpcodeLoad = 7'h03; - localparam OpcodeLoadFP = 7'b00_001_11; - localparam OpcodeBranch = 7'h63; - localparam OpcodeJalr = 7'h67; - localparam OpcodeJal = 7'h6f; - localparam OpcodeAuipc = 7'h17; - localparam OpcodeLui = 7'h37; - localparam OpcodeAmo = 7'h2F; + // RV32/64G listings: + // Quadrant 0 + localparam OpcodeLoad = 7'b00_000_11; + localparam OpcodeLoadFp = 7'b00_001_11; + localparam OpcodeCustom0 = 7'b00_010_11; + localparam OpcodeMiscMem = 7'b00_011_11; + localparam OpcodeOpImm = 7'b00_100_11; + localparam OpcodeAuipc = 7'b00_101_11; + localparam OpcodeOpImm32 = 7'b00_110_11; + // Quadrant 1 + localparam OpcodeStore = 7'b01_000_11; + localparam OpcodeStoreFp = 7'b01_001_11; + localparam OpcodeCustom1 = 7'b01_010_11; + localparam OpcodeAmo = 7'b01_011_11; + localparam OpcodeOp = 7'b01_100_11; + localparam OpcodeLui = 7'b01_101_11; + localparam OpcodeOp32 = 7'b01_110_11; + // Quadrant 2 + localparam OpcodeMadd = 7'b10_000_11; + localparam OpcodeMsub = 7'b10_001_11; + localparam OpcodeNmsub = 7'b10_010_11; + localparam OpcodeNmadd = 7'b10_011_11; + localparam OpcodeOpFp = 7'b10_100_11; + localparam OpcodeRsrvd1 = 7'b10_101_11; + localparam OpcodeCustom2 = 7'b10_110_11; + // Quadrant 3 + localparam OpcodeBranch = 7'b11_000_11; + localparam OpcodeJalr = 7'b11_001_11; + localparam OpcodeRsrvd2 = 7'b11_010_11; + localparam OpcodeJal = 7'b11_011_11; + localparam OpcodeSystem = 7'b11_100_11; + localparam OpcodeRsrvd3 = 7'b11_101_11; + localparam OpcodeCustom3 = 7'b11_110_11; - localparam OpcodeCJ = 3'b101; - localparam OpcodeCBeqz = 3'b110; - localparam OpcodeCBnez = 3'b111; + // RV64C listings: + // Quadrant 0 + localparam OpcodeC0 = 2'b00; + localparam OpcodeC0Addi4spn = 3'b000; + localparam OpcodeC0Fld = 3'b001; + localparam OpcodeC0Lw = 3'b010; + localparam OpcodeC0Ld = 3'b011; + localparam OpcodeC0Rsrvd = 3'b100; + localparam OpcodeC0Fsd = 3'b101; + localparam OpcodeC0Sw = 3'b110; + localparam OpcodeC0Sd = 3'b111; + // Quadrant 1 + localparam OpcodeC1 = 2'b01; + localparam OpcodeC1Addi = 3'b000; + localparam OpcodeC1Addiw = 3'b001; + localparam OpcodeC1Li = 3'b010; + localparam OpcodeC1LuiAddi16sp = 3'b011; + localparam OpcodeC1MiscAlu = 3'b100; + localparam OpcodeC1J = 3'b101; + localparam OpcodeC1Beqz = 3'b110; + localparam OpcodeC1Bnez = 3'b111; + // Quadrant 2 + localparam OpcodeC2 = 2'b10; + localparam OpcodeC2Slli = 3'b000; + localparam OpcodeC2Fldsp = 3'b001; + localparam OpcodeC2Lwsp = 3'b010; + localparam OpcodeC2Ldsp = 3'b011; localparam OpcodeC2JalrMvAdd = 3'b100; + localparam OpcodeC2Fsdsp = 3'b101; + localparam OpcodeC2Swsp = 3'b110; + localparam OpcodeC2Sdsp = 3'b111; + // ---------------------- // Performance Counters // ---------------------- @@ -235,6 +321,11 @@ package riscv; // CSRs // ----- typedef enum logic [11:0] { + // Floating-Point CSRs + CSR_FFLAGS = 12'h001, + CSR_FRM = 12'h002, + CSR_FCSR = 12'h003, + CSR_FTRAN = 12'h800, // Supervisor Mode CSRs CSR_SSTATUS = 12'h100, CSR_SIE = 12'h104, @@ -321,6 +412,14 @@ package riscv; csr_addr_t csr_decode; } csr_t; + // Floating-Point control and status register (32-bit!) + typedef struct packed { + logic [31:15] reserved; // reserved for L extension, return 0 otherwise + logic [6:0] fprec; // div/sqrt precision control + logic [2:0] frm; // float rounding mode + logic [4:0] fflags; // float exception flags + } fcsr_t; + // ----- // Debug // ----- @@ -398,13 +497,14 @@ package riscv; // trace log compatible to spikes commit log feature // pragma translate_off - function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr, logic [4:0] rd, logic [63:0] result); + function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr, logic [4:0] rd, logic [63:0] result, logic rd_fpr); string rd_s; + automatic string rf_s = rd_fpr ? "f" : "x"; - if (rd < 10) rd_s = $sformatf("x %0d", rd); - else rd_s = $sformatf("x%0d", rd); + if (rd < 10) rd_s = $sformatf("%s %0d", rf_s, rd); + else rd_s = $sformatf("%s%0d", rf_s, rd); - if (rd != 0) begin + if (rd_fpr || rd != 0) begin // 0 0x0000000080000118 (0xeecf8f93) x31 0x0000000080004000 return $sformatf("%d 0x%h (0x%h) %s 0x%h\n", priv_lvl, pc, instr, rd_s, result); end else begin diff --git a/src/alu.sv b/src/alu.sv index fadd053a2..b9fe8d235 100644 --- a/src/alu.sv +++ b/src/alu.sv @@ -19,23 +19,40 @@ import ariane_pkg::*; -module alu -( +module alu ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, + input logic [63:0] pc_i, input logic [TRANS_ID_BITS-1:0] trans_id_i, input logic alu_valid_i, + input logic branch_valid_i, + input logic csr_valid_i, input fu_op operator_i, input logic [63:0] operand_a_i, input logic [63:0] operand_b_i, + input logic [63:0] imm_i, output logic [63:0] result_o, - output logic alu_branch_res_o, output logic alu_valid_o, output logic alu_ready_o, - output logic [TRANS_ID_BITS-1:0] alu_trans_id_o + output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, + output exception_t alu_exception_o, + + input logic fu_valid_i, + input logic is_compressed_instr_i, + input branchpredict_sbe_t branch_predict_i, + output branchpredict_t resolved_branch_o, + output logic resolve_branch_o, + + input logic commit_i, + // to CSR file + output logic [11:0] csr_addr_o // CSR address to commit stage ); - // ALU is a single cycle instructions, hence it is always ready - assign alu_ready_o = 1'b1; - assign alu_valid_o = alu_valid_i; + logic csr_ready; + + assign alu_ready_o = csr_ready; + assign alu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i; assign alu_trans_id_o = trans_id_i; logic [63:0] operand_a_rev; @@ -43,6 +60,8 @@ module alu logic [64:0] operand_b_neg; logic [65:0] adder_result_ext_o; logic less; // handles both signed and unsigned forms + logic alu_branch_res; + logic [63:0] branch_result, csr_result; // bit reverse operand_a for left shifts and bit counting generate @@ -89,13 +108,13 @@ module alu // get the right branch comparison result always_comb begin : branch_resolve // set comparison by default - alu_branch_res_o = 1'b1; + alu_branch_res = 1'b1; case (operator_i) - EQ: alu_branch_res_o = adder_z_flag; - NE: alu_branch_res_o = ~adder_z_flag; - LTS, LTU: alu_branch_res_o = less; - GES, GEU: alu_branch_res_o = ~less; - default: alu_branch_res_o = 1'b1; + EQ: alu_branch_res = adder_z_flag; + NE: alu_branch_res = ~adder_z_flag; + LTS, LTU: alu_branch_res = less; + GES, GEU: alu_branch_res = ~less; + default: alu_branch_res = 1'b1; endcase end @@ -198,6 +217,48 @@ module alu default: ; // default case to suppress unique warning endcase + + if (branch_valid_i) begin + result_o = branch_result; + end else if (csr_valid_i) begin + result_o = csr_result; + end + end + // ---------------------- + // Branch Unit + // ---------------------- + branch_unit branch_unit_i ( + .operator_i, + .operand_a_i, + .operand_b_i, + .imm_i, + .pc_i, + .is_compressed_instr_i, + // any functional unit is valid, check that there is no accidental mis-predict + .fu_valid_i, + .branch_valid_i, + .branch_comp_res_i ( alu_branch_res ), + .branch_result_o ( branch_result ), + .branch_predict_i, + .resolved_branch_o, + .resolve_branch_o, + .branch_exception_o ( alu_exception_o ) + ); + + csr_buffer csr_buffer_i ( + .clk_i, + .rst_ni, + .flush_i, + .csr_valid_i, + .operator_i, + .operand_a_i, + .operand_b_i, + .csr_ready_o ( csr_ready ), + .csr_result_o ( csr_result ), + .commit_i, + .csr_addr_o + ); + endmodule diff --git a/src/ariane.sv b/src/ariane.sv index 0f8829049..bfdf5f4b5 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -91,12 +91,8 @@ module ariane #( logic [TRANS_ID_BITS-1:0] alu_trans_id_ex_id; logic alu_valid_ex_id; logic [63:0] alu_result_ex_id; + exception_t alu_exception_ex_id; // Branches and Jumps - logic branch_ready_ex_id; - logic [TRANS_ID_BITS-1:0] branch_trans_id_ex_id; - logic [63:0] branch_result_ex_id; - exception_t branch_exception_ex_id; - logic branch_valid_ex_id; logic branch_valid_id_ex; branchpredict_sbe_t branch_predict_id_ex; @@ -114,17 +110,23 @@ module ariane #( logic [TRANS_ID_BITS-1:0] mult_trans_id_ex_id; logic [63:0] mult_result_ex_id; logic mult_valid_ex_id; + // FPU + logic fpu_ready_ex_id; + logic fpu_valid_id_ex; + logic [1:0] fpu_fmt_id_ex; + logic [2:0] fpu_rm_id_ex; + logic [TRANS_ID_BITS-1:0] fpu_trans_id_ex_id; + logic [63:0] fpu_result_ex_id; + logic fpu_valid_ex_id; + exception_t fpu_exception_ex_id; // CSR - logic csr_ready_ex_id; logic csr_valid_id_ex; - logic [TRANS_ID_BITS-1:0] csr_trans_id_ex_id; - logic [63:0] csr_result_ex_id; - logic csr_valid_ex_id; // -------------- // EX <-> COMMIT // -------------- // CSR Commit logic csr_commit_commit_ex; + logic dirty_fp_state; // LSU Commit logic lsu_commit_commit_ex; logic lsu_commit_ready_ex_commit; @@ -139,10 +141,15 @@ module ariane #( // -------------- logic [NR_COMMIT_PORTS-1:0][4:0] waddr_commit_id; logic [NR_COMMIT_PORTS-1:0][63:0] wdata_commit_id; - logic [NR_COMMIT_PORTS-1:0] we_commit_id; + logic [NR_COMMIT_PORTS-1:0] we_gpr_commit_id; + logic [NR_COMMIT_PORTS-1:0] we_fpr_commit_id; // -------------- // CSR <-> * // -------------- + logic [4:0] fflags_csr_commit; + riscv::xs_t fs; + logic [2:0] frm_csr_id_issue_ex; + logic [6:0] fprec_csr_ex; logic enable_translation_csr_ex; logic en_ld_st_translation_csr_ex; riscv::priv_lvl_t ld_st_priv_lvl_csr_ex; @@ -159,6 +166,7 @@ module ariane #( logic tw_csr_id; logic tsr_csr_id; logic dcache_en_csr_nbdcache; + logic csr_write_fflags_commit_cs; logic icache_en_csr; logic debug_mode; logic single_step_csr_commit; @@ -252,6 +260,8 @@ module ariane #( .issue_instr_ack_i ( issue_instr_issue_id ), .priv_lvl_i ( priv_lvl ), + .fs_i ( fs ), + .frm_i ( frm_csr_id_issue_ex ), .debug_mode_i ( debug_mode ), .tvm_i ( tvm_csr_id ), .tw_i ( tw_csr_id ), @@ -288,7 +298,6 @@ module ariane #( .alu_ready_i ( alu_ready_ex_id ), .alu_valid_o ( alu_valid_id_ex ), // Branches and Jumps - .branch_ready_i ( branch_ready_ex_id ), .branch_valid_o ( branch_valid_id_ex ), // branch is valid .branch_predict_o ( branch_predict_id_ex ), // branch predict to ex .resolve_branch_i ( resolve_branch_ex_id ), // in order to resolve the branch @@ -298,20 +307,25 @@ module ariane #( // Multiplier .mult_ready_i ( mult_ready_ex_id ), .mult_valid_o ( mult_valid_id_ex ), + // FPU + .fpu_ready_i ( fpu_ready_ex_id ), + .fpu_valid_o ( fpu_valid_id_ex ), + .fpu_fmt_o ( fpu_fmt_id_ex ), + .fpu_rm_o ( fpu_rm_id_ex ), // CSR - .csr_ready_i ( csr_ready_ex_id ), .csr_valid_o ( csr_valid_id_ex ), + // Commit .resolved_branch_i ( resolved_branch ), - .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, branch_trans_id_ex_id, csr_trans_id_ex_id, mult_trans_id_ex_id }), - .wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, branch_result_ex_id, csr_result_ex_id, mult_result_ex_id }), - .ex_ex_i ( {{$bits(exception_t){1'b0}}, lsu_exception_ex_id, branch_exception_ex_id, {$bits(exception_t){1'b0}}, {$bits(exception_t){1'b0}} }), - .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, branch_valid_ex_id, csr_valid_ex_id, mult_valid_ex_id }), + .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, mult_trans_id_ex_id, fpu_trans_id_ex_id }), + .wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, mult_result_ex_id, fpu_result_ex_id }), + .ex_ex_i ( {alu_exception_ex_id, lsu_exception_ex_id, {$bits(exception_t){1'b0}}, fpu_exception_ex_id }), + .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, mult_valid_ex_id, fpu_valid_ex_id }), .waddr_i ( waddr_commit_id ), .wdata_i ( wdata_commit_id ), - .we_i ( we_commit_id ), - + .we_gpr_i ( we_gpr_commit_id ), + .we_fpr_i ( we_fpr_commit_id ), .commit_instr_o ( commit_instr_id_commit ), .commit_ack_i ( commit_ack ), .* @@ -321,6 +335,8 @@ module ariane #( // EX // --------- ex_stage ex_stage_i ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), .flush_i ( flush_ctrl_ex ), .fu_i ( fu_id_ex ), .operator_i ( operator_id_ex ), @@ -336,16 +352,16 @@ module ariane #( .alu_result_o ( alu_result_ex_id ), .alu_trans_id_o ( alu_trans_id_ex_id ), .alu_valid_o ( alu_valid_ex_id ), + .alu_exception_o ( alu_exception_ex_id ), // Branches and Jumps - .branch_ready_o ( branch_ready_ex_id ), - .branch_valid_o ( branch_valid_ex_id ), .branch_valid_i ( branch_valid_id_ex ), - .branch_trans_id_o ( branch_trans_id_ex_id ), - .branch_result_o ( branch_result_ex_id ), - .branch_exception_o ( branch_exception_ex_id ), .branch_predict_i ( branch_predict_id_ex ), // branch predict to ex .resolved_branch_o ( resolved_branch ), .resolve_branch_o ( resolve_branch_ex_id ), + // CSR + .csr_valid_i ( csr_valid_id_ex ), + .csr_addr_o ( csr_addr_ex_csr ), + .csr_commit_i ( csr_commit_commit_ex ), // from commit // LSU .lsu_ready_o ( lsu_ready_ex_id ), .lsu_valid_i ( lsu_valid_id_ex ), @@ -356,17 +372,26 @@ module ariane #( .lsu_commit_ready_o ( lsu_commit_ready_ex_commit ), // to commit .lsu_exception_o ( lsu_exception_ex_id ), .no_st_pending_o ( no_st_pending_ex_commit ), + // MULT + .mult_ready_o ( mult_ready_ex_id ), + .mult_valid_i ( mult_valid_id_ex ), + .mult_trans_id_o ( mult_trans_id_ex_id ), + .mult_result_o ( mult_result_ex_id ), + .mult_valid_o ( mult_valid_ex_id ), + // FPU + .fpu_ready_o ( fpu_ready_ex_id ), + .fpu_valid_i ( fpu_valid_id_ex ), + .fpu_fmt_i ( fpu_fmt_id_ex ), + .fpu_rm_i ( fpu_rm_id_ex ), + .fpu_frm_i ( frm_csr_id_issue_ex ), + .fpu_prec_i ( fprec_csr_ex ), + .fpu_trans_id_o ( fpu_trans_id_ex_id ), + .fpu_result_o ( fpu_result_ex_id ), + .fpu_valid_o ( fpu_valid_ex_id ), + .fpu_exception_o ( fpu_exception_ex_id ), .amo_valid_commit_i ( amo_valid_commit ), .amo_req_o ( amo_req ), .amo_resp_i ( amo_resp ), - // CSR - .csr_ready_o ( csr_ready_ex_id ), - .csr_valid_i ( csr_valid_id_ex ), - .csr_trans_id_o ( csr_trans_id_ex_id ), - .csr_result_o ( csr_result_ex_id ), - .csr_valid_o ( csr_valid_ex_id ), - .csr_addr_o ( csr_addr_ex_csr ), - .csr_commit_i ( csr_commit_commit_ex ), // from commit // Performance counters .itlb_miss_o ( itlb_miss_ex_perf ), .dtlb_miss_o ( dtlb_miss_ex_perf ), @@ -382,16 +407,9 @@ module ariane #( .asid_i ( asid_csr_ex ), // from CSR .icache_areq_i ( icache_areq_cache_ex ), .icache_areq_o ( icache_areq_ex_cache ), - - .mult_ready_o ( mult_ready_ex_id ), - .mult_valid_i ( mult_valid_id_ex ), - .mult_trans_id_o ( mult_trans_id_ex_id ), - .mult_result_o ( mult_result_ex_id ), - .mult_valid_o ( mult_valid_ex_id ), // DCACHE interfaces .dcache_req_ports_i ( dcache_req_ports_cache_ex ), - .dcache_req_ports_o ( dcache_req_ports_ex_cache ), - .* + .dcache_req_ports_o ( dcache_req_ports_ex_cache ) ); // --------- @@ -403,6 +421,7 @@ module ariane #( .halt_i ( halt_ctrl ), .flush_dcache_i ( dcache_flush_ctrl_cache ), .exception_o ( ex_commit ), + .dirty_fp_state_o ( dirty_fp_state ), .debug_mode_i ( debug_mode ), .debug_req_i ( debug_req ), .single_step_i ( single_step_csr_commit ), @@ -411,7 +430,8 @@ module ariane #( .no_st_pending_i ( no_st_pending_ex_commit ), .waddr_o ( waddr_commit_id ), .wdata_o ( wdata_commit_id ), - .we_o ( we_commit_id ), + .we_gpr_o ( we_gpr_commit_id ), + .we_fpr_o ( we_fpr_commit_id ), .commit_lsu_o ( lsu_commit_commit_ex ), .commit_lsu_ready_i ( lsu_commit_ready_ex_commit ), .amo_valid_commit_o ( amo_valid_commit ), @@ -421,6 +441,7 @@ module ariane #( .csr_op_o ( csr_op_commit_csr ), .csr_wdata_o ( csr_wdata_commit_csr ), .csr_rdata_i ( csr_rdata_csr_commit ), + .csr_write_fflags_o ( csr_write_fflags_commit_cs ), .csr_exception_i ( csr_exception_csr_commit ), .fence_i_o ( fence_i_commit_controller ), .fence_o ( fence_commit_controller ), @@ -441,6 +462,8 @@ module ariane #( .commit_ack_i ( commit_ack ), .ex_i ( ex_commit ), .csr_op_i ( csr_op_commit_csr ), + .csr_write_fflags_i ( csr_write_fflags_commit_cs ), + .dirty_fp_state_i ( dirty_fp_state ), .csr_addr_i ( csr_addr_ex_csr ), .csr_wdata_i ( csr_wdata_commit_csr ), .csr_rdata_o ( csr_rdata_csr_commit ), @@ -451,6 +474,10 @@ module ariane #( .set_debug_pc_o ( set_debug_pc ), .trap_vector_base_o ( trap_vector_base_commit_pcgen ), .priv_lvl_o ( priv_lvl ), + .fs_o ( fs ), + .fflags_o ( fflags_csr_commit ), + .frm_o ( frm_csr_id_issue_ex ), + .fprec_o ( fprec_csr_ex ), .ld_st_priv_lvl_o ( ld_st_priv_lvl_csr_ex ), .en_translation_o ( enable_translation_csr_ex ), .en_ld_st_translation_o ( en_ld_st_translation_csr_ex ), @@ -584,7 +611,8 @@ module ariane #( // write-back assign tracer_if.waddr = waddr_commit_id; assign tracer_if.wdata = wdata_commit_id; - assign tracer_if.we = we_commit_id; + assign tracer_if.we_gpr = we_gpr_commit_id; + assign tracer_if.we_fpr = we_fpr_commit_id; // commit assign tracer_if.commit_instr = commit_instr_id_commit; assign tracer_if.commit_ack = commit_ack; diff --git a/src/ariane_regfile.sv b/src/ariane_regfile.sv index 0203202e8..8e54a8278 100644 --- a/src/ariane_regfile.sv +++ b/src/ariane_regfile.sv @@ -23,151 +23,98 @@ // latches and is thus smaller than the flip-flop based RF. // -module ariane_regfile #( - parameter DATA_WIDTH = 32 +module ariane_regfile_lol #( + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned NR_READ_PORTS = 2, + parameter int unsigned NR_WRITE_PORTS = 2, + parameter bit ZERO_REG_ZERO = 0 )( - // Clock and Reset - input logic clk, - input logic rst_n, - - input logic test_en_i, - - //Read port R1 - input logic [4:0] raddr_a_i, - output logic [DATA_WIDTH-1:0] rdata_a_o, - - //Read port R2 - input logic [4:0] raddr_b_i, - output logic [DATA_WIDTH-1:0] rdata_b_o, - - - // Write port W1 - input logic [4:0] waddr_a_i, - input logic [DATA_WIDTH-1:0] wdata_a_i, - input logic we_a_i, - - // Write port W2 - input logic [4:0] waddr_b_i, - input logic [DATA_WIDTH-1:0] wdata_b_i, - input logic we_b_i + // clock and reset + input logic clk_i, + input logic rst_ni, + // disable clock gates for testing + input logic test_en_i, + // read port + input logic [NR_READ_PORTS-1:0][4:0] raddr_i, + output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o, + // write port + input logic [NR_WRITE_PORTS-1:0][4:0] waddr_i, + input logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_i, + input logic [NR_WRITE_PORTS-1:0] we_i ); - localparam ADDR_WIDTH = 5;; - localparam NUM_WORDS = 2**ADDR_WIDTH; + localparam ADDR_WIDTH = 5; + localparam NUM_WORDS = 2**ADDR_WIDTH; - logic [DATA_WIDTH-1:0] mem[NUM_WORDS]; + logic [NUM_WORDS-1:ZERO_REG_ZERO] mem_clocks; - logic [NUM_WORDS-1:1] waddr_onehot_a; - logic [NUM_WORDS-1:1] waddr_onehot_b, waddr_onehot_b_q; + logic [DATA_WIDTH-1:0] mem[NUM_WORDS]; + logic [NR_WRITE_PORTS-1:0][NUM_WORDS-1:1] waddr_onehot,waddr_onehot_q; + logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_q; - logic [NUM_WORDS-1:1] mem_clocks; - logic [DATA_WIDTH-1:0] wdata_a_q; - logic [DATA_WIDTH-1:0] wdata_b_q; - // Write port W1 - logic [ADDR_WIDTH-1:0] raddr_a_int, raddr_b_int, waddr_a_int; + // decode addresses + for (genvar i = 0; i < NR_READ_PORTS; i++) + assign rdata_o[i] = mem[raddr_i[i][ADDR_WIDTH-1:0]]; - assign raddr_a_int = raddr_a_i[ADDR_WIDTH-1:0]; - assign raddr_b_int = raddr_b_i[ADDR_WIDTH-1:0]; - assign waddr_a_int = waddr_a_i[ADDR_WIDTH-1:0]; - - int unsigned i; - int unsigned j; - int unsigned k; - int unsigned l; - genvar x; - - logic clk_int; - - //----------------------------------------------------------------------------- - //-- READ : Read address decoder RAD - //----------------------------------------------------------------------------- - assign rdata_a_o = mem[raddr_a_int]; - assign rdata_b_o = mem[raddr_b_int]; - - //----------------------------------------------------------------------------- - // WRITE : SAMPLE INPUT DATA - //--------------------------------------------------------------------------- - - cluster_clock_gating CG_WE_GLOBAL - ( - .clk_i ( clk ), - .en_i ( we_a_i ), - .test_en_i ( test_en_i ), - .clk_o ( clk_int ) - ); - - // use clk_int here, since otherwise we don't want to write anything anyway - always_ff @(posedge clk_int, negedge rst_n) begin : sample_waddr - if (~rst_n) begin - wdata_a_q <= '0; - wdata_b_q <= '0; - waddr_onehot_b_q <= '0; + always_ff @(posedge clk_i, negedge rst_ni) begin : sample_waddr + if (~rst_ni) begin + wdata_q <= '0; end else begin - if (we_a_i) - wdata_a_q <= wdata_a_i; - if (we_b_i) - wdata_b_q <= wdata_b_i; - - waddr_onehot_b_q <= waddr_onehot_b; + for (int unsigned i = 0; i < NR_WRITE_PORTS; i++) + // enable flipflop will most probably infer clock gating + if (we_i[i]) begin + wdata_q[i] <= wdata_i[i]; + end + waddr_onehot_q <= waddr_onehot; end end - //----------------------------------------------------------------------------- - //-- WRITE : Write Address Decoder (WAD), combinatorial process - //----------------------------------------------------------------------------- - always_comb begin : p_WADa - for (i = 1; i < NUM_WORDS; i++) begin : p_WordItera - if ((we_a_i == 1'b1) && (waddr_a_i == i)) - waddr_onehot_a[i] = 1'b1; - else - waddr_onehot_a[i] = 1'b0; + // WRITE : Write Address Decoder (WAD), combinatorial process + always_comb begin : decode_write_addess + for (int unsigned i = 0; i < NR_WRITE_PORTS; i++) begin + for (int unsigned j = 1; j < NUM_WORDS; j++) begin + if (we_i[i] && (waddr_i[i] == j)) + waddr_onehot[i][j] = 1'b1; + else + waddr_onehot[i][j] = 1'b0; + end end end - always_comb begin : p_WADb - for (j = 1; j < NUM_WORDS; j++) begin : p_WordIterb - if ((we_b_i == 1'b1) && (waddr_b_i == j)) - waddr_onehot_b[j] = 1'b1; - else - waddr_onehot_b[j] = 1'b0; - end + // WRITE : Clock gating (if integrated clock-gating cells are available) + for (genvar x = ZERO_REG_ZERO; x < NUM_WORDS; x++) begin + + logic [NR_WRITE_PORTS-1:0] waddr_ored; + + for (genvar i = 0; i < NR_WRITE_PORTS; i++) + assign waddr_ored[i] = waddr_onehot[i][x]; + + cluster_clock_gating i_cg ( + .clk_i ( clk_i ), + .en_i ( |waddr_ored ), + .test_en_i ( test_en_i ), + .clk_o ( mem_clocks[x] ) + ); end - //----------------------------------------------------------------------------- - //-- WRITE : Clock gating (if integrated clock-gating cells are available) - //----------------------------------------------------------------------------- - generate - for (x = 1; x < NUM_WORDS; x++) - begin : CG_CELL_WORD_ITER - cluster_clock_gating CG_Inst - ( - .clk_i ( clk_int ), - .en_i ( waddr_onehot_a[x] | waddr_onehot_b[x] ), - .test_en_i ( test_en_i ), - .clk_o ( mem_clocks[x] ) - ); - end - endgenerate - - //----------------------------------------------------------------------------- - //-- WRITE : Write operation - //----------------------------------------------------------------------------- - //-- Generate M = WORDS sequential processes, each of which describes one - //-- word of the memory. The processes are synchronized with the clocks - //-- ClocksxC(i), i = 0, 1, ..., M-1 - //-- Use active low, i.e. transparent on low latches as storage elements - //-- Data is sampled on rising clock edge + // Generate M = WORDS sequential processes, each of which describes one + // word of the memory. The processes are synchronized with the clocks + // ClocksxC(i), i = 0, 1, ..., M-1 + // Use active low, i.e. transparent on low latches as storage elements + // Data is sampled on rising clock edge // Integer registers always_latch begin : latch_wdata // Note: The assignment has to be done inside this process or Modelsim complains about it - mem[0] = '0; + if (ZERO_REG_ZERO) + mem[0] = '0; - for(k = 1; k < NUM_WORDS; k++) - begin : w_WordIter - if (mem_clocks[k] == 1'b1) - mem[k] = waddr_onehot_b_q[k] ? wdata_b_q : wdata_a_q; - end + for (int unsigned i = 0; i < NR_WRITE_PORTS; i++) begin + for (int unsigned k = ZERO_REG_ZERO; k < NUM_WORDS; k++) begin + if (mem_clocks[k] && waddr_onehot_q[i][k]) + mem[k] = wdata_q[i]; + end + end end endmodule diff --git a/src/ariane_regfile_ff.sv b/src/ariane_regfile_ff.sv index 6514ecb12..a5b9c6954 100644 --- a/src/ariane_regfile_ff.sv +++ b/src/ariane_regfile_ff.sv @@ -23,87 +23,63 @@ // module ariane_regfile #( - parameter DATA_WIDTH = 32 + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned NR_READ_PORTS = 2, + parameter int unsigned NR_WRITE_PORTS = 2, + parameter bit ZERO_REG_ZERO = 0 )( - // Clock and Reset - input logic clk, - input logic rst_n, - - input logic test_en_i, - - //Read port R1 - input logic [4:0] raddr_a_i, - output logic [DATA_WIDTH-1:0] rdata_a_o, - - //Read port R2 - input logic [4:0] raddr_b_i, - output logic [DATA_WIDTH-1:0] rdata_b_o, - - - // Write port W1 - input logic [4:0] waddr_a_i, - input logic [DATA_WIDTH-1:0] wdata_a_i, - input logic we_a_i, - - // Write port W2 - input logic [4:0] waddr_b_i, - input logic [DATA_WIDTH-1:0] wdata_b_i, - input logic we_b_i + // clock and reset + input logic clk_i, + input logic rst_ni, + // disable clock gates for testing + input logic test_en_i, + // read port + input logic [NR_READ_PORTS-1:0][4:0] raddr_i, + output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o, + // write port + input logic [NR_WRITE_PORTS-1:0][4:0] waddr_i, + input logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_i, + input logic [NR_WRITE_PORTS-1:0] we_i ); localparam ADDR_WIDTH = 5; localparam NUM_WORDS = 2**ADDR_WIDTH; - logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] rf_reg; - logic [NUM_WORDS-1:0] we_a_dec, we_b_dec; + logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] mem; + logic [NR_WRITE_PORTS-1:0][NUM_WORDS-1:0] we_dec; - always_comb begin : we_a_decoder - for (int i = 0; i < NUM_WORDS; i++) begin - if (waddr_a_i == i) - we_a_dec[i] = we_a_i; - else - we_a_dec[i] = 1'b0; - end - end - always_comb begin : we_b_decoder - for (int i = 0; i < NUM_WORDS; i++) begin - if (waddr_b_i == i) - we_b_dec[i] = we_b_i; - else - we_b_dec[i] = 1'b0; - end - end - - generate - // loop from 1 to NUM_WORDS-1 as R0 is nil - for (genvar i = 1; i < NUM_WORDS; i++) begin : rf_gen - - always_ff @(posedge clk, negedge rst_n) begin : register_write_behavioral - if (rst_n==1'b0) begin - rf_reg[i] <= 'b0; - end else begin - if (we_a_dec[i]) - rf_reg[i] <= wdata_a_i; - - if (we_b_dec[i]) - rf_reg[i] <= wdata_b_i; + always_comb begin : we_decoder + for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin + for (int unsigned i = 0; i < NUM_WORDS; i++) begin + if (waddr_i[j] == i) + we_dec[j][i] = we_i[j]; + else + we_dec[j][i] = 1'b0; + end end - end end -// R0 is nil -`ifdef verilator - always_ff @(posedge clk, negedge rst_n) begin - rf_reg[0] <= '0; + // loop from 1 to NUM_WORDS-1 as R0 is nil + always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral + if (~rst_ni) begin + mem <= '{default: '0}; + end else begin + for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin + for (int unsigned i = 0; i < NUM_WORDS; i++) begin + if (we_dec[j][i]) begin + mem[i] <= wdata_i[j]; + end + end + if (ZERO_REG_ZERO) begin + mem[0] <= '0; + end + end + end end -`else - assign rf_reg[0] = '0; -`endif - endgenerate - - assign rdata_a_o = rf_reg[raddr_a_i]; - assign rdata_b_o = rf_reg[raddr_b_i]; + for (genvar i = 0; i < NR_READ_PORTS; i++) begin + assign rdata_o[i] = mem[raddr_i[i]]; + end endmodule diff --git a/src/branch_unit.sv b/src/branch_unit.sv index 7600ce35d..ff646d36a 100644 --- a/src/branch_unit.sv +++ b/src/branch_unit.sv @@ -15,7 +15,6 @@ import ariane_pkg::*; module branch_unit ( - input logic [TRANS_ID_BITS-1:0] trans_id_i, input fu_op operator_i, // comparison operation to perform input logic [63:0] operand_a_i, // contains content of RS 1 input logic [63:0] operand_b_i, // contains content of RS 2 @@ -25,10 +24,7 @@ module branch_unit ( input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict input logic branch_valid_i, input logic branch_comp_res_i, // branch comparison result from ALU - output logic branch_ready_o, - output logic branch_valid_o, output logic [63:0] branch_result_o, - output logic [TRANS_ID_BITS-1:0] branch_trans_id_o, input branchpredict_sbe_t branch_predict_i, // this is the address we predicted output branchpredict_t resolved_branch_o, // this is the actual address we are targeting @@ -38,10 +34,6 @@ module branch_unit ( ); logic [63:0] target_address; logic [63:0] next_pc; - // branches are single cycle at the moment, feed-through the control signals - assign branch_trans_id_o = trans_id_i; - assign branch_valid_o = branch_valid_i; - assign branch_ready_o = 1'b1; // we are always ready // here we handle the various possibilities of mis-predicts always_comb begin : mispredict_handler diff --git a/src/cache_subsystem/miss_handler.sv b/src/cache_subsystem/miss_handler.sv index b119f68a6..31eed3a14 100644 --- a/src/cache_subsystem/miss_handler.sv +++ b/src/cache_subsystem/miss_handler.sv @@ -180,7 +180,7 @@ module miss_handler #( IDLE: begin // lowest priority are AMOs, wait until everything else is served before going for the AMOs - if (amo_req_i.req) begin + if (amo_req_i.req && !busy_i) begin // 1. Flush the cache if (!serve_amo_q) begin state_d = FLUSH_REQ_STATUS; @@ -203,6 +203,8 @@ module miss_handler #( // here comes the refill portion of code if (miss_req_valid[i] && !miss_req_bypass[i]) begin state_d = MISS; + // we are taking another request so don't take the AMO + serve_amo_d = 1'b0; // save to MSHR mshr_d.valid = 1'b1; mshr_d.we = miss_req_we[i]; diff --git a/src/cache_subsystem/std_icache.sv b/src/cache_subsystem/std_icache.sv index a6dcc2def..5fbb295b5 100644 --- a/src/cache_subsystem/std_icache.sv +++ b/src/cache_subsystem/std_icache.sv @@ -50,8 +50,8 @@ module std_icache #( logic flushing_d, flushing_q; // signals - logic [ICACHE_SET_ASSOC-1:0] req; // request to data memory - logic [ICACHE_SET_ASSOC-1:0] vld_req; // request to valid/tag memory + logic [ICACHE_SET_ASSOC-1:0] req; // request to data memory + logic [ICACHE_SET_ASSOC-1:0] vld_req; // request to valid/tag memory logic [(ICACHE_LINE_WIDTH+7)/8-1:0] data_be; // byte enable for data memory logic [(2**NR_AXI_REFILLS-1):0][7:0] be; // byte enable logic [$clog2(ICACHE_NUM_WORD)-1:0] addr; // this is a cache-line address, to memory array @@ -109,24 +109,24 @@ module std_icache #( .rdata_o ( data_rdata[i] ) ); end - + // -------------------- // Tag Comparison and way select // -------------------- // cacheline selected by hit - logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; - + logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; + assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2]; - generate + generate for (genvar i=0;i compare the tag TAG_CMP, TAG_CMP_SAVED: begin areq_o.fetch_req = 1'b1; // request address translation - + // (speculatively) request the content of all arrays req = '1; vld_req = '1; @@ -255,7 +255,7 @@ module std_icache #( dreq_o.ready = 1'b1; dreq_o.valid = 1'b1; vaddr_d = dreq_i.vaddr; - + // we've got another request if (dreq_i.req) begin // save the index and stay in compare mode @@ -335,7 +335,7 @@ module std_icache #( req = evict_way_q; vld_req = evict_way_q; - + if (axi.r_valid) begin we = 1'b1; tag_wdata.tag = tag_q; @@ -380,7 +380,14 @@ module std_icache #( endcase // those are the states where we need to wait a little longer until we can safely exit - if (dreq_i.kill_s2 && !(state_q inside {REFILL, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP}) && !dreq_o.ready) begin + if (dreq_i.kill_s2 && !(state_q inside { + REFILL, + WAIT_AXI_R_RESP, + WAIT_KILLED_AXI_R_RESP, + WAIT_KILLED_REFILL, + WAIT_ADDRESS_TRANSLATION, + WAIT_ADDRESS_TRANSLATION_KILLED}) + && !dreq_o.ready) begin state_d = IDLE; end @@ -443,14 +450,14 @@ module std_icache #( //pragma translate_off `ifndef VERILATOR initial begin - assert ($bits(axi.aw_addr) == 64) + assert ($bits(axi.aw_addr) == 64) else $fatal(1, "[icache] Ariane needs a 64-bit bus"); end // assert that cache only hits on one way onehot: assert property ( - @(posedge clk_i) disable iff (~rst_ni) $onehot0(hit)) + @(posedge clk_i) disable iff (~rst_ni) $onehot0(hit)) else $fatal(1, "[icache] Hit should be one-hot encoded"); `endif -//pragma translate_on +//pragma translate_on endmodule diff --git a/src/clint/clint.sv b/src/clint/clint.sv index a53766497..6b58686ed 100644 --- a/src/clint/clint.sv +++ b/src/clint/clint.sv @@ -24,7 +24,7 @@ module clint #( )( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low - + input logic testmode_i, AXI_BUS.Slave slave, input logic rtc_i, // Real-time clock in (usually 32.768 kHz) @@ -146,7 +146,7 @@ module clint #( // 1. Put the RTC input through a classic two stage edge-triggered synchronizer to filter out any // metastability effects (or at least make them unlikely :-)) sync_wedge i_sync_edge ( - .en_i ( 1'b1 ), + .en_i ( ~testmode_i ), .serial_i ( rtc_i ), .r_edge_o ( increase_timer ), .f_edge_o ( ), // left open diff --git a/src/commit_stage.sv b/src/commit_stage.sv index e91be4e68..957d13a5a 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -22,6 +22,7 @@ module commit_stage #( input logic halt_i, // request to halt the core input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline output exception_t exception_o, // take exception to controller + output logic dirty_fp_state_o, // mark the F state as dirty input logic debug_mode_i, // we are in debug mode input logic debug_req_i, // debug unit is requesting to enter debug mode input logic single_step_i, // we are in single step debug mode @@ -31,7 +32,8 @@ module commit_stage #( // to register file output logic [NR_COMMIT_PORTS-1:0][4:0] waddr_o, // register file write address output logic [NR_COMMIT_PORTS-1:0][63:0] wdata_o, // register file write data - output logic [NR_COMMIT_PORTS-1:0] we_o, // register file write enable + output logic [NR_COMMIT_PORTS-1:0] we_gpr_o, // register file write enable + output logic [NR_COMMIT_PORTS-1:0] we_fpr_o, // floating point register enable // Atomic memory operations input amo_resp_t amo_resp_i, // result of AMO operation // to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline) @@ -41,6 +43,7 @@ module commit_stage #( output logic [63:0] csr_wdata_o, // data to write to CSR input logic [63:0] csr_rdata_i, // data to read from CSR input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit) + output logic csr_write_fflags_o, // write the fflags CSR // commit signals to ex output logic commit_lsu_o, // commit the pending store input logic commit_lsu_ready_i, // commit buffer of LSU is ready @@ -53,10 +56,12 @@ module commit_stage #( output logic sfence_vma_o // flush TLBs and pipeline ); + // TODO make these parametric with NR_COMMIT_PORTS assign waddr_o[0] = commit_instr_i[0].rd[4:0]; assign waddr_o[1] = commit_instr_i[1].rd[4:0]; - assign pc_o = commit_instr_i[0].pc; + assign pc_o = commit_instr_i[0].pc; + assign dirty_fp_state_o = |we_fpr_o; logic instr_0_is_amo; assign instr_0_is_amo = is_amo(commit_instr_i[0].op); @@ -65,25 +70,27 @@ module commit_stage #( // ------------------- // write register file or commit instruction in LSU or CSR Buffer always_comb begin : commit + // default assignments - commit_ack_o[0] = 1'b0; - commit_ack_o[1] = 1'b0; + commit_ack_o[0] = 1'b0; + commit_ack_o[1] = 1'b0; amo_valid_commit_o = 1'b0; - we_o[0] = 1'b0; - we_o[1] = 1'b0; - - commit_lsu_o = 1'b0; - commit_csr_o = 1'b0; + we_gpr_o[0] = 1'b0; + we_gpr_o[1] = 1'b0; + we_fpr_o = '{default: 1'b0}; + commit_lsu_o = 1'b0; + commit_csr_o = 1'b0; // amos will commit on port 0 wdata_o[0] = (amo_resp_i.ack) ? amo_resp_i.result : commit_instr_i[0].result; wdata_o[1] = commit_instr_i[1].result; csr_op_o = ADD; // this corresponds to a CSR NOP - csr_wdata_o = 64'b0; - fence_i_o = 1'b0; - fence_o = 1'b0; - sfence_vma_o = 1'b0; + csr_wdata_o = 64'b0; + fence_i_o = 1'b0; + fence_o = 1'b0; + sfence_vma_o = 1'b0; + csr_write_fflags_o = 1'b0; flush_commit_o = 1'b0; // we will not commit the instruction if we took an exception @@ -92,6 +99,8 @@ module commit_stage #( // also check that there is no atomic memory operation committing, right now this is the only operation // which will take longer than one cycle to commit if (commit_instr_i[0].valid && !halt_i) begin + // we have to exclude the AMOs from debug mode as we are not jumping to debug + // while committing an AMO if (!debug_req_i || debug_mode_i) begin commit_ack_o[0] = 1'b1; // register will be the all zero register. @@ -101,7 +110,10 @@ module commit_stage #( if (!exception_o.valid) begin // we can definitely write the register file // if the instruction is not committing anything the destination - we_o[0] = 1'b1; + if (is_rd_fpr(commit_instr_i[0].op)) + we_fpr_o[0] = 1'b1; + else + we_gpr_o[0] = 1'b1; // check whether the instruction we retire was a store // do not commit the instruction if we got an exception since the store buffer will be cleared @@ -113,6 +125,14 @@ module commit_stage #( else // if the LSU buffer is not ready - do not commit, wait commit_ack_o[0] = 1'b0; end + // --------- + // FPU Flags + // --------- + if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin + // write the CSR with potential exception flags from retiring floating point instruction + csr_wdata_o = {59'b0, commit_instr_i[0].ex.cause[4:0]}; + csr_write_fflags_o = 1'b1; + end end // --------- @@ -158,13 +178,13 @@ module commit_stage #( // ------------------ // AMO // ------------------ - if (instr_0_is_amo && !commit_instr_i[0].ex.valid) begin + if (RVA && instr_0_is_amo && !commit_instr_i[0].ex.valid) begin // AMO finished commit_ack_o[0] = amo_resp_i.ack; // flush the pipeline flush_commit_o = amo_resp_i.ack; amo_valid_commit_o = 1'b1; - we_o[0] = amo_resp_i.ack; + we_gpr_o[0] = amo_resp_i.ack; end end @@ -180,11 +200,27 @@ module commit_stage #( && !instr_0_is_amo && !single_step_i) begin // only if the first instruction didn't throw an exception and this instruction won't throw an exception - // and the operator is of type ALU, LOAD, CTRL_FLOW, MULT + // and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC if (!exception_o.valid && !commit_instr_i[1].ex.valid - && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT})) begin - we_o[1] = 1'b1; + && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin + + if (is_rd_fpr(commit_instr_i[1].op)) + we_fpr_o[1] = 1'b1; + else + we_gpr_o[1] = 1'b1; + commit_ack_o[1] = 1'b1; + + // additionally check if we are retiring an FPU instruction because we need to make sure that we write all + // exception flags + if (commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin + if (csr_write_fflags_o) + csr_wdata_o = {59'b0, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])}; + else + csr_wdata_o = {59'b0, commit_instr_i[1].ex.cause[4:0]}; + + csr_write_fflags_o = 1'b1; + end end end end diff --git a/src/common_cells b/src/common_cells index 9278bc769..21a060d2c 160000 --- a/src/common_cells +++ b/src/common_cells @@ -1 +1 @@ -Subproject commit 9278bc769f3efd006864a7ef7721f2796ed968e6 +Subproject commit 21a060d2c2c75173312b82cc72db96a2c62e66c5 diff --git a/src/compressed_decoder.sv b/src/compressed_decoder.sv index f45a21ed0..7298db691 100644 --- a/src/compressed_decoder.sv +++ b/src/compressed_decoder.sv @@ -21,10 +21,10 @@ import ariane_pkg::*; module compressed_decoder ( - input logic [31:0] instr_i, - output logic [31:0] instr_o, - output logic illegal_instr_o, - output logic is_compressed_o + input logic [31:0] instr_i, + output logic [31:0] instr_o, + output logic illegal_instr_o, + output logic is_compressed_o ); // ------------------- @@ -36,33 +36,46 @@ module compressed_decoder is_compressed_o = 1'b1; instr_o = instr_i; + // I: | imm[11:0] | rs1 | funct3 | rd | opcode | + // S: | imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode | unique case (instr_i[1:0]) // C0 - 2'b00: begin + riscv::OpcodeC0: begin unique case (instr_i[15:13]) - 3'b000: begin + riscv::OpcodeC0Addi4spn: begin // c.addi4spn -> addi rd', x2, imm - instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], riscv::OpcodeOpimm}; + instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], riscv::OpcodeOpImm}; if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1; end - 3'b010: begin + riscv::OpcodeC0Fld: begin + // c.fld -> fld rd', imm(rs1') + // CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 | + instr_o = {4'b0, instr_i[6:5], instr_i[12:10], 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], riscv::OpcodeLoadFp}; + end + + riscv::OpcodeC0Lw: begin // c.lw -> lw rd', imm(rs1') instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6], 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], riscv::OpcodeLoad}; end - 3'b011: begin + riscv::OpcodeC0Ld: begin // c.ld -> ld rd', imm(rs1') - // | imm[11:0] | rs1 | funct3 | rd | opcode| + // CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 | instr_o = {4'b0, instr_i[6:5], instr_i[12:10], 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], riscv::OpcodeLoad}; end - 3'b110: begin + riscv::OpcodeC0Fsd: begin + // c.fsd -> fsd rs2', imm(rs1') + instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, riscv::OpcodeStoreFp}; + end + + riscv::OpcodeC0Sw: begin // c.sw -> sw rs2', imm(rs1') instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6], 2'b00, riscv::OpcodeStore}; end - 3'b111: begin + riscv::OpcodeC0Sd: begin // c.sd -> sd rs2', imm(rs1') instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, riscv::OpcodeStore}; end @@ -74,137 +87,143 @@ module compressed_decoder end // C1 - 2'b01: begin - unique case (instr_i[15:13]) - 3'b000: begin - // c.addi -> addi rd, rd, nzimm - // c.nop -> addi 0, 0, 0 - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOpimm}; - end + riscv::OpcodeC1: begin + unique case (instr_i[15:13]) + riscv::OpcodeC1Addi: begin + // c.addi -> addi rd, rd, nzimm + // c.nop -> addi 0, 0, 0 + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOpImm}; + end - // c.addiw -> addiw rd, rd, nzimm for RV64 - 3'b001: begin - if (instr_i[11:7] != 5'h0) // only valid if the destination is not r0 - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOpimm32}; - else - illegal_instr_o = 1'b1; - end + // c.addiw -> addiw rd, rd, nzimm for RV64 + riscv::OpcodeC1Addiw: begin + if (instr_i[11:7] != 5'h0) // only valid if the destination is not r0 + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], riscv::OpcodeOpImm32}; + else + illegal_instr_o = 1'b1; + end - riscv::OpcodeCJ: begin - // 101: c.j -> jal x0, imm - instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], {9 {instr_i[12]}}, 4'b0, ~instr_i[15], riscv::OpcodeJal}; - end + riscv::OpcodeC1Li: begin + // c.li -> addi rd, x0, nzimm + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], riscv::OpcodeOpImm}; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end - 3'b010: begin - // c.li -> addi rd, x0, nzimm - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], riscv::OpcodeOpimm}; - if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; - end + riscv::OpcodeC1LuiAddi16sp: begin + // c.lui -> lui rd, imm + instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], riscv::OpcodeLui}; - 3'b011: begin - // c.lui -> lui rd, imm - instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], riscv::OpcodeLui}; + if (instr_i[11:7] == 5'h02) begin + // c.addi16sp -> addi x2, x2, nzimm + instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, riscv::OpcodeOpImm}; + end else if (instr_i[11:7] == 5'b0) begin + illegal_instr_o = 1'b1; + end - if (instr_i[11:7] == 5'h02) begin - // c.addi16sp -> addi x2, x2, nzimm - instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, riscv::OpcodeOpimm}; - end else if (instr_i[11:7] == 5'b0) begin - illegal_instr_o = 1'b1; - end + if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; + end - if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; - end + riscv::OpcodeC1MiscAlu: begin + unique case (instr_i[11:10]) + 2'b00, + 2'b01: begin + // 00: c.srli -> srli rd, rd, shamt + // 01: c.srai -> srai rd, rd, shamt + instr_o = {1'b0, instr_i[10], 4'b0, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b101, 2'b01, instr_i[9:7], riscv::OpcodeOpImm}; + // shamt field must be non-zero + if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; + end - 3'b100: begin - unique case (instr_i[11:10]) - 2'b00, - 2'b01: begin - // 00: c.srli -> srli rd, rd, shamt - // 01: c.srai -> srai rd, rd, shamt - instr_o = {1'b0, instr_i[10], 4'b0, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b101, 2'b01, instr_i[9:7], riscv::OpcodeOpimm}; - // shamt field must be non-zero - if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; - end + 2'b10: begin + // c.andi -> andi rd, rd, imm + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], riscv::OpcodeOpImm}; + end - 2'b10: begin - // c.andi -> andi rd, rd, imm - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], riscv::OpcodeOpimm}; - end + 2'b11: begin + unique case ({instr_i[12], instr_i[6:5]}) + 3'b000: begin + // c.sub -> sub rd', rd', rs2' + instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp}; + end - 2'b11: begin - unique case ({instr_i[12], instr_i[6:5]}) - 3'b000: begin - // c.sub -> sub rd', rd', rs2' - instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp}; - end + 3'b001: begin + // c.xor -> xor rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, 2'b01, instr_i[9:7], riscv::OpcodeOp}; + end - 3'b001: begin - // c.xor -> xor rd', rd', rs2' - instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, 2'b01, instr_i[9:7], riscv::OpcodeOp}; - end + 3'b010: begin + // c.or -> or rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, 2'b01, instr_i[9:7], riscv::OpcodeOp}; + end - 3'b010: begin - // c.or -> or rd', rd', rs2' - instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, 2'b01, instr_i[9:7], riscv::OpcodeOp}; - end + 3'b011: begin + // c.and -> and rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], riscv::OpcodeOp}; + end - 3'b011: begin - // c.and -> and rd', rd', rs2' - instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], riscv::OpcodeOp}; - end + 3'b100: begin + // c.subw -> subw rd', rd', rs2' + instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp32}; + end + 3'b101: begin + // c.addw -> addw rd', rd', rs2' + instr_o = {2'b00, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp32}; + end - 3'b100: begin - // c.subw -> subw rd', rd', rs2' - instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp32}; - end - 3'b101: begin - // c.addw -> addw rd', rd', rs2' - instr_o = {2'b00, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], riscv::OpcodeOp32}; - end + 3'b110, + 3'b111: begin + // 100: c.subw + // 101: c.addw + illegal_instr_o = 1'b1; + instr_o = {16'b0, instr_i}; + end + endcase + end + endcase + end - 3'b110, - 3'b111: begin - // 100: c.subw - // 101: c.addw - illegal_instr_o = 1'b1; - instr_o = {16'b0, instr_i[15:0]}; - end - endcase - end - endcase - end + riscv::OpcodeC1J: begin + // 101: c.j -> jal x0, imm + instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], {9 {instr_i[12]}}, 4'b0, ~instr_i[15], riscv::OpcodeJal}; + end - riscv::OpcodeCBeqz, riscv::OpcodeCBnez: begin - // 0: c.beqz -> beq rs1', x0, imm - // 1: c.bnez -> bne rs1', x0, imm - instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], instr_i[12], riscv::OpcodeBranch}; - end - endcase + riscv::OpcodeC1Beqz, riscv::OpcodeC1Bnez: begin + // 0: c.beqz -> beq rs1', x0, imm + // 1: c.bnez -> bne rs1', x0, imm + instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], instr_i[12], riscv::OpcodeBranch}; + end + endcase end // C2 - 2'b10: begin + riscv::OpcodeC2: begin unique case (instr_i[15:13]) - 3'b000: begin + riscv::OpcodeC2Slli: begin // c.slli -> slli rd, rd, shamt - instr_o = {6'b0, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], riscv::OpcodeOpimm}; + instr_o = {6'b0, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], riscv::OpcodeOpImm}; if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; // register not x0 if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; // shift amount must be non zero end - 3'b010: begin + riscv::OpcodeC2Fldsp: begin + // c.fldsp -> fld rd, imm(x2) + instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02, 3'b011, instr_i[11:7], riscv::OpcodeLoadFp}; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end + + riscv::OpcodeC2Lwsp: begin // c.lwsp -> lw rd, imm(x2) instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02, 3'b010, instr_i[11:7], riscv::OpcodeLoad}; if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; end - 3'b011: begin + riscv::OpcodeC2Ldsp: begin // c.ldsp -> ld rd, imm(x2) instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02, 3'b011, instr_i[11:7], riscv::OpcodeLoad}; if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; end - 3'b100: begin + riscv::OpcodeC2JalrMvAdd: begin if (instr_i[12] == 1'b0) begin // c.mv -> add rd/rs1, x0, rs2 instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], riscv::OpcodeOp}; @@ -231,12 +250,17 @@ module compressed_decoder end end - 3'b110: begin + riscv::OpcodeC2Fsdsp: begin + // c.fsdsp -> fsd rs2, imm(x2) + instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011, instr_i[11:10], 3'b000, riscv::OpcodeStoreFp}; + end + + riscv::OpcodeC2Swsp: begin // c.swsp -> sw rs2, imm(x2) instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010, instr_i[11:9], 2'b00, riscv::OpcodeStore}; end - 3'b111: begin + riscv::OpcodeC2Sdsp: begin // c.sdsp -> sd rs2, imm(x2) instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011, instr_i[11:10], 3'b000, riscv::OpcodeStore}; end diff --git a/src/csr_buffer.sv b/src/csr_buffer.sv index 3afdc1def..9ddac2b65 100644 --- a/src/csr_buffer.sv +++ b/src/csr_buffer.sv @@ -23,13 +23,10 @@ module csr_buffer ( input fu_op operator_i, input logic [63:0] operand_a_i, input logic [63:0] operand_b_i, - input logic [TRANS_ID_BITS-1:0] trans_id_i, // transaction id, needed for WB output logic csr_ready_o, // FU is ready e.g. not busy input logic csr_valid_i, // Input is valid - output logic [TRANS_ID_BITS-1:0] csr_trans_id_o, // ID of scoreboard entry at which to write back output logic [63:0] csr_result_o, - output logic csr_valid_o, // transaction id for which the output is the requested one input logic commit_i, // commit the pending CSR OP // to CSR file @@ -43,9 +40,6 @@ module csr_buffer ( } csr_reg_n, csr_reg_q; // control logic, scoreboard signals - assign csr_trans_id_o = trans_id_i; - // CSR instructions for this post buffer are single cycle - assign csr_valid_o = csr_valid_i; assign csr_result_o = operand_a_i; assign csr_addr_o = csr_reg_q.csr_address; diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 98e5abfd4..541d7c2eb 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -21,7 +21,6 @@ module csr_regfile #( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic time_irq_i, // Timer threw a interrupt - // send a flush request out if a CSR with a side effect has changed (e.g. written) output logic flush_o, output logic halt_csr_o, // halt requested @@ -39,6 +38,8 @@ module csr_regfile #( input logic [11:0] csr_addr_i, // Address of the register to read/write input logic [63:0] csr_wdata_i, // Write data in output logic [63:0] csr_rdata_o, // Read data out + input logic dirty_fp_state_i, // Mark the FP sate as dirty + input logic csr_write_fflags_i, // Write fflags register e.g.: we are retiring a floating point instruction input logic [63:0] pc_i, // PC of instruction accessing the CSR output exception_t csr_exception_o, // attempts to access a CSR without appropriate privilege // level or to write a read-only register also @@ -48,6 +49,11 @@ module csr_regfile #( output logic eret_o, // Return from exception, set the PC of epc_o output logic [63:0] trap_vector_base_o, // Output base of exception vector, correct CSR is output (mtvec, stvec) output riscv::priv_lvl_t priv_lvl_o, // Current privilege level the CPU is in + // FPU + output riscv::xs_t fs_o, // Floating point extension status + output logic [4:0] fflags_o, // Floating-Point Accured Exceptions + output logic [2:0] frm_o, // Floating-Point Dynamic Rounding Mode + output logic [6:0] fprec_o, // Floating-Point Precision Control // MMU output logic en_translation_o, // enable VA translation output logic en_ld_st_translation_o, // enable VA translation for load and stores @@ -87,12 +93,14 @@ module csr_regfile #( logic mret; // return from M-mode exception logic sret; // return from S-mode exception logic dret; // return from debug mode - + // CSR write causes us to mark the FPU state as dirty + logic dirty_fp_state_csr; riscv::csr_t csr_addr; // ---------------- // Assignments // ---------------- assign csr_addr = riscv::csr_t'(csr_addr_i); + assign fs_o = mstatus_q.fs; // ---------------- // CSR Registers // ---------------- @@ -134,6 +142,8 @@ module csr_regfile #( logic [63:0] cycle_q, cycle_d; logic [63:0] instret_q, instret_d; + riscv::fcsr_t fcsr_q, fcsr_d; + // ---------------- // CSR Read logic // ---------------- @@ -146,6 +156,35 @@ module csr_regfile #( if (csr_read) begin case (csr_addr.address) + riscv::CSR_FFLAGS: begin + if (mstatus_q.fs == riscv::Off) begin + read_access_exception = 1'b1; + end else begin + csr_rdata = {59'b0, fcsr_q.fflags}; + end + end + riscv::CSR_FRM: begin + if (mstatus_q.fs == riscv::Off) begin + read_access_exception = 1'b1; + end else begin + csr_rdata = {61'b0, fcsr_q.frm}; + end + end + riscv::CSR_FCSR: begin + if (mstatus_q.fs == riscv::Off) begin + read_access_exception = 1'b1; + end else begin + csr_rdata = {56'b0, fcsr_q.frm, fcsr_q.fflags}; + end + end + // non-standard extension + riscv::CSR_FTRAN: begin + if (mstatus_q.fs == riscv::Off) begin + read_access_exception = 1'b1; + end else begin + csr_rdata = {57'b0, fcsr_q.fprec}; + end + end // debug registers riscv::CSR_DCSR: csr_rdata = {32'b0, dcsr_q}; riscv::CSR_DPC: csr_rdata = dpc_q; @@ -189,7 +228,7 @@ module csr_regfile #( riscv::CSR_PMPCFG0: csr_rdata = pmpcfg0_q; riscv::CSR_PMPADDR0: csr_rdata = pmpaddr0_q; riscv::CSR_MVENDORID: csr_rdata = 64'b0; // not implemented - riscv::CSR_MARCHID: csr_rdata = 64'b0; // PULP, anonymous source (no allocated ID yet) + riscv::CSR_MARCHID: csr_rdata = ARIANE_MARCHID; riscv::CSR_MIMPID: csr_rdata = 64'b0; // not implemented riscv::CSR_MHARTID: csr_rdata = {53'b0, cluster_id_i[5:0], 1'b0, core_id_i[3:0]}; riscv::CSR_MCYCLE: csr_rdata = cycle_q; @@ -227,7 +266,7 @@ module csr_regfile #( sapt = satp_q; mip = csr_wdata & 64'h33; instret = instret_q; - // only USIP, SSIP, UTIP, STIP are write-able + // only FCSR, USIP, SSIP, UTIP, STIP are write-able eret_o = 1'b0; flush_o = 1'b0; @@ -238,6 +277,8 @@ module csr_regfile #( perf_we_o = 1'b0; perf_data_o = 'b0; + fcsr_d = fcsr_q; + priv_lvl_d = priv_lvl_q; debug_mode_d = debug_mode_q; dcsr_d = dcsr_q; @@ -279,10 +320,51 @@ module csr_regfile #( instret_d = instret_q; en_ld_st_translation_d = en_ld_st_translation_q; - + dirty_fp_state_csr = 1'b0; // check for correct access rights and that we are writing if (csr_we) begin case (csr_addr.address) + // Floating-Point + riscv::CSR_FFLAGS: begin + if (mstatus_q.fs == riscv::Off) begin + update_access_exception = 1'b1; + end else begin + dirty_fp_state_csr = 1'b1; + fcsr_d.fflags = csr_wdata[4:0]; + // this instruction has side-effects + flush_o = 1'b1; + end + end + riscv::CSR_FRM: begin + if (mstatus_q.fs == riscv::Off) begin + update_access_exception = 1'b1; + end else begin + dirty_fp_state_csr = 1'b1; + fcsr_d.frm = csr_wdata[2:0]; + // this instruction has side-effects + flush_o = 1'b1; + end + end + riscv::CSR_FCSR: begin + if (mstatus_q.fs == riscv::Off) begin + update_access_exception = 1'b1; + end else begin + dirty_fp_state_csr = 1'b1; + fcsr_d[7:0] = csr_wdata[7:0]; // ignore writes to reserved space + // this instruction has side-effects + flush_o = 1'b1; + end + end + riscv::CSR_FTRAN: begin + if (mstatus_q.fs == riscv::Off) begin + update_access_exception = 1'b1; + end else begin + dirty_fp_state_csr = 1'b1; + fcsr_d.fprec = csr_wdata[6:0]; // ignore writes to reserved space + // this instruction has side-effects + flush_o = 1'b1; + end + end // debug CSR riscv::CSR_DCSR: begin dcsr_d = csr_wdata[31:0]; @@ -306,10 +388,13 @@ module csr_regfile #( // also hardwire the registers for sstatus mstatus_d.sxl = riscv::XLEN_64; mstatus_d.uxl = riscv::XLEN_64; - // hardwired zero registers - mstatus_d.sd = 1'b0; - mstatus_d.xs = 2'b0; - mstatus_d.fs = 2'b0; + // hardwired extension registers + mstatus_d.sd = (&mstatus_q.xs) | (&mstatus_q.fs); + mstatus_d.xs = riscv::Off; + // hardwire to zero if floating point extension is not present + if (!FP_PRESENT) begin + mstatus_d.fs = riscv::Off; + end mstatus_d.upie = 1'b0; mstatus_d.uie = 1'b0; // not all fields of mstatus can be written @@ -366,9 +451,11 @@ module csr_regfile #( mstatus_d.sxl = riscv::XLEN_64; mstatus_d.uxl = riscv::XLEN_64; // hardwired zero registers - mstatus_d.sd = 1'b0; - mstatus_d.xs = 2'b0; - mstatus_d.fs = 2'b0; + mstatus_d.sd = (&mstatus_q.xs) | (&mstatus_q.fs); + mstatus_d.xs = riscv::Off; + if (!FP_PRESENT) begin + mstatus_d.fs = riscv::Off; + end mstatus_d.upie = 1'b0; mstatus_d.uie = 1'b0; // this register has side-effects on other registers, flush the pipeline @@ -426,6 +513,16 @@ module csr_regfile #( default: update_access_exception = 1'b1; endcase end + + // mark the floating point extension register as dirty + if (FP_PRESENT && (dirty_fp_state_csr || dirty_fp_state_i)) begin + mstatus_d.fs = riscv::Dirty; + end + + // write the floating point status register + if (csr_write_fflags_i) + fcsr_d.fflags = csr_wdata_i[4:0] | fcsr_q.fflags; + // --------------------- // External Interrupts // --------------------- @@ -541,9 +638,21 @@ module csr_regfile #( end // single step enable and we just retired an instruction - if (dcsr_q.step && (|commit_ack_i)) begin - // we saved the correct target address during execute - dpc_d = commit_instr_i[0].bp.predict_address; + if (dcsr_q.step && commit_ack_i[0]) begin + // valid CTRL flow change + if (commit_instr_i[0].fu == CTRL_FLOW) begin + // we saved the correct target address during execute + dpc_d = commit_instr_i[0].bp.predict_address; + // exception valid + end else if (ex_i.valid) begin + dpc_d = trap_vector_base_o; + // return from environment + end else if (eret_o) begin + dpc_d = epc_o; + // consecutive PC + end else begin + dpc_d = commit_instr_i[0].pc + (commit_instr_i[0].is_compressed ? 'h2 : 'h4); + end debug_mode_d = 1'b1; set_debug_pc_o = 1'b1; dcsr_d.cause = dm::CauseSingleStep; @@ -807,6 +916,10 @@ module csr_regfile #( assign csr_rdata_o = csr_rdata; // in debug mode we execute with privilege level M assign priv_lvl_o = (debug_mode_q) ? riscv::PRIV_LVL_M : priv_lvl_q; + // FPU outputs + assign fflags_o = fcsr_q.fflags; + assign frm_o = fcsr_q.frm; + assign fprec_o = fcsr_q.fprec; // MMU outputs assign satp_ppn_o = satp_q.ppn; assign asid_o = satp_q.asid[ASID_WIDTH-1:0]; @@ -829,6 +942,8 @@ module csr_regfile #( always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin priv_lvl_q <= riscv::PRIV_LVL_M; + // floating-point registers + fcsr_q <= 64'b0; // debug signals debug_mode_q <= 1'b0; dcsr_q <= '0; @@ -866,6 +981,8 @@ module csr_regfile #( wfi_q <= 1'b0; end else begin priv_lvl_q <= priv_lvl_d; + // floating-point registers + fcsr_q <= fcsr_d; // debug signals debug_mode_q <= debug_mode_d; dcsr_q <= dcsr_d; diff --git a/src/debug/dm_csrs.sv b/src/debug/dm_csrs.sv index aee8177ab..f3a0aaca3 100644 --- a/src/debug/dm_csrs.sv +++ b/src/debug/dm_csrs.sv @@ -438,14 +438,26 @@ module dm_csrs #( end assign dmactive_o = dmcontrol_q.dmactive; - // if the PoR is set we want to re-set the other system as well - assign ndmreset_o = dmcontrol_q.ndmreset | (~rst_ni); assign cmd_o = command_q; assign progbuf_o = progbuf_q; assign data_o = data_q; assign resp_queue_pop = dmi_resp_ready_i & ~resp_queue_empty; + logic ndmreset_n; + + // if the PoR is set we want to re-set the other system as well + rstgen_bypass i_rstgen_bypass ( + .clk_i ( clk_i ), + .rst_ni ( ~(dmcontrol_q.ndmreset | ~rst_ni) ), + .rst_test_mode_ni ( rst_ni ), + .test_mode_i ( testmode_i ), + .rst_no ( ndmreset_n ), + .init_no () // keep open + ); + + assign ndmreset_o = ~ndmreset_n; + // response FIFO fifo_v2 #( .dtype ( logic [31:0] ), @@ -468,9 +480,19 @@ module dm_csrs #( always_ff @(posedge clk_i or negedge rst_ni) begin // PoR if (~rst_ni) begin - dmcontrol_q <= '0; - havereset_q <= '1; + dmcontrol_q <= '0; + havereset_q <= '1; + // this is the only write-able bit during reset + cmderr_q <= dm::CmdErrNone; + command_q <= '0; + abstractauto_q <= '0; + progbuf_q <= '0; + data_q <= '0; + sbcs_q <= '0; + sbaddr_q <= '0; + sbdata_q <= '0; end else begin + havereset_q <= havereset_d; // synchronous re-set of debug module, active-low, except for dmactive if (!dmcontrol_q.dmactive) begin dmcontrol_q.haltreq <= '0; @@ -495,7 +517,6 @@ module dm_csrs #( sbaddr_q <= '0; sbdata_q <= '0; end else begin - havereset_q <= havereset_d; dmcontrol_q <= dmcontrol_d; cmderr_q <= cmderr_d; command_q <= command_d; @@ -508,4 +529,4 @@ module dm_csrs #( end end end -endmodule +endmodule \ No newline at end of file diff --git a/src/debug/dm_mem.sv b/src/debug/dm_mem.sv index 36d1f63be..3cfb09c2d 100644 --- a/src/debug/dm_mem.sv +++ b/src/debug/dm_mem.sv @@ -20,7 +20,7 @@ module dm_mem #( parameter int NrHarts = -1 )( input logic clk_i, // Clock - input logic dmactive_i, // debug module reset + input logic rst_ni, // debug module reset output logic [NrHarts-1:0] debug_req_o, input logic [19:0] hartsel_i, @@ -363,8 +363,8 @@ module dm_mem #( // the ROM base address assign fwd_rom_d = (addr_i[DbgAddressBits-1:0] >= dm::HaltAddress[DbgAddressBits-1:0]) ? 1'b1 : 1'b0; - always_ff @(posedge clk_i) begin - if (~dmactive_i) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin fwd_rom_q <= 1'b0; rdata_q <= '0; halted_q <= 1'b0; diff --git a/src/debug/dm_sba.sv b/src/debug/dm_sba.sv index d316982a4..7b46d92cb 100644 --- a/src/debug/dm_sba.sv +++ b/src/debug/dm_sba.sv @@ -18,6 +18,7 @@ module dm_sba ( input logic clk_i, // Clock + input logic rst_ni, input logic dmactive_i, // synchronous reset active low AXI_BUS.Master axi_master, @@ -111,7 +112,7 @@ module dm_sba ( end endcase // handle error case - if (sbaccess_i > 3 && state_d != Idle) begin + if (sbaccess_i > 3 && state_q != Idle) begin req = 1'b0; state_d = Idle; sberror_valid_o = 1'b1; @@ -120,35 +121,36 @@ module dm_sba ( // further error handling should go here ... end - always_ff @(posedge clk_i) begin - if (~dmactive_i) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin state_q <= Idle; end else begin state_q <= state_d; end end + axi_adapter #( - .DATA_WIDTH ( 64 ) + .DATA_WIDTH ( 64 ) ) i_axi_master ( - .clk_i ( clk_i ), - .rst_ni ( dmactive_i ), - .req_i ( req ), - .type_i ( std_cache_pkg::SINGLE_REQ), - .gnt_o ( gnt ), - .gnt_id_o ( ), - .addr_i ( address ), - .we_i ( we ), - .wdata_i ( sbdata_i ), - .be_i ( be ), - .size_i ( sbaccess_i[1:0] ), - .id_i ( '0 ), - .valid_o ( sbdata_valid_o ), - .rdata_o ( sbdata_o ), - .id_o ( ), - .critical_word_o ( ), // not needed here - .critical_word_valid_o ( ), // not needed here - .axi ( axi_master ) + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .req_i ( req ), + .type_i ( std_cache_pkg::SINGLE_REQ ), + .gnt_o ( gnt ), + .gnt_id_o ( ), + .addr_i ( address ), + .we_i ( we ), + .wdata_i ( sbdata_i ), + .be_i ( be ), + .size_i ( sbaccess_i[1:0] ), + .id_i ( '0 ), + .valid_o ( sbdata_valid_o ), + .rdata_o ( sbdata_o ), + .id_o ( ), + .critical_word_o ( ), // not needed here + .critical_word_valid_o ( ), // not needed here + .axi ( axi_master ) ); diff --git a/src/debug/dm_top.sv b/src/debug/dm_top.sv index 5e3431677..303c414af 100644 --- a/src/debug/dm_top.sv +++ b/src/debug/dm_top.sv @@ -143,6 +143,7 @@ module dm_top #( dm_sba i_dm_sba ( .clk_i ( clk_i ), + .rst_ni ( rst_ni ), .dmactive_i ( dmactive_o ), .axi_master, .sbaddress_i ( sbaddress_csrs_sba ), @@ -166,7 +167,7 @@ module dm_top #( .NrHarts (NrHarts) ) i_dm_mem ( .clk_i ( clk_i ), - .dmactive_i ( dmactive_o ), + .rst_ni ( rst_ni ), .debug_req_o ( debug_req_o ), .hartsel_i ( hartsel ), .haltreq_i ( haltreq ), @@ -197,7 +198,7 @@ module dm_top #( .AXI_USER_WIDTH ( AxiUserWidth ) ) i_axi2mem ( .clk_i ( clk_i ), - .rst_ni ( dmactive_o ), + .rst_ni ( rst_ni ), .slave ( axi_slave ), .req_o ( req ), .we_o ( we ), diff --git a/src/debug/dmi_jtag.sv b/src/debug/dmi_jtag.sv index 430ccae51..49df7ce5b 100644 --- a/src/debug/dmi_jtag.sv +++ b/src/debug/dmi_jtag.sv @@ -19,9 +19,9 @@ module dmi_jtag ( input logic clk_i, // DMI Clock input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, output logic dmi_rst_no, // hard reset - output dm::dmi_req_t dmi_req_o, output logic dmi_req_valid_o, input logic dmi_req_ready_i, @@ -37,7 +37,7 @@ module dmi_jtag ( output logic td_o, // JTAG test data output pad output logic tdo_oe_o // Data out output enable ); - assign dmi_rst_no = 1'b1; + assign dmi_rst_no = rst_ni; logic test_logic_reset; logic shift_dr; @@ -218,6 +218,7 @@ module dmi_jtag ( .td_i, .td_o, .tdo_oe_o, + .testmode_i ( testmode_i ), .test_logic_reset_o ( test_logic_reset ), .shift_dr_o ( shift_dr ), .update_dr_o ( update_dr ), diff --git a/src/debug/dmi_jtag_tap.sv b/src/debug/dmi_jtag_tap.sv index 5d55bacb1..ae4b2fcfb 100644 --- a/src/debug/dmi_jtag_tap.sv +++ b/src/debug/dmi_jtag_tap.sv @@ -25,6 +25,7 @@ module dmi_jtag_tap #( input logic td_i, // JTAG test data input pad output logic td_o, // JTAG test data output pad output logic tdo_oe_o, // Data out output enable + input logic testmode_i, output logic test_logic_reset_o, output logic shift_dr_o, output logic update_dr_o, @@ -207,8 +208,23 @@ module dmi_jtag_tap #( end - // TDO changes state at negative edge of TCK - always_ff @(negedge tck_i, negedge trst_ni) begin + // DFT + logic tck_n, tck_ni; + + cluster_clock_inverter i_tck_inv ( + .clk_i ( tck_i ), + .clk_o ( tck_ni ) + ); + + pulp_clock_mux2 i_dft_tck_mux ( + .clk0_i ( tck_ni ), + .clk1_i ( tck_i ), // bypass the inverted clock for testing + .clk_sel_i ( testmode_i ), + .clk_o ( tck_n ) + ); + + // TDO changes state at negative edge of TCK + always_ff @(posedge tck_n, negedge trst_ni) begin if (~trst_ni) begin td_o <= 1'b0; tdo_oe_o <= 1'b0; diff --git a/src/decoder.sv b/src/decoder.sv index efb31c5ad..4362d551e 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -30,6 +30,8 @@ module decoder ( // From CSR input riscv::priv_lvl_t priv_lvl_i, // current privilege level input logic debug_mode_i, // we are in debug mode + input riscv::xs_t fs_i, // floating point extension status + input logic [2:0] frm_i, // floating-point dynamic rounding mode input logic tvm_i, // trap virtual memory input logic tw_i, // timeout wait input logic tsr_i, // trap sret @@ -41,13 +43,15 @@ module decoder ( logic ecall; // this instruction is a software break-point logic ebreak; + // this instruction needs floating-point rounding-mode verification + logic check_fprm; riscv::instruction_t instr; assign instr = riscv::instruction_t'(instruction_i); // -------------------- // Immediate select // -------------------- enum logic[3:0] { - NOIMM, IIMM, SIMM, SBIMM, UIMM, JIMM + NOIMM, IIMM, SIMM, SBIMM, UIMM, JIMM, RS3 } imm_select; logic [63:0] imm_i_type; @@ -63,6 +67,7 @@ module decoder ( is_control_flow_instr_o = 1'b0; illegal_instr = 1'b0; instruction_o.pc = pc_i; + instruction_o.trans_id = 5'b0; instruction_o.fu = NONE; instruction_o.op = ADD; instruction_o.rs1 = '0; @@ -75,6 +80,7 @@ module decoder ( instruction_o.bp = branch_predict_i; ecall = 1'b0; ebreak = 1'b0; + check_fprm = 1'b0; if (~ex_i.valid) begin case (instr.rtype.opcode) @@ -208,7 +214,7 @@ module decoder ( endcase end // Memory ordering instructions - riscv::OpcodeFence: begin + riscv::OpcodeMiscMem: begin instruction_o.fu = CSR; instruction_o.rs1 = '0; instruction_o.rs2 = '0; @@ -235,35 +241,268 @@ module decoder ( // Reg-Reg Operations // -------------------------- riscv::OpcodeOp: begin - instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU; - instruction_o.rs1[4:0] = instr.rtype.rs1; - instruction_o.rs2[4:0] = instr.rtype.rs2; - instruction_o.rd[4:0] = instr.rtype.rd; + // -------------------------------------------- + // Vectorial Floating-Point Reg-Reg Operations + // -------------------------------------------- + if (instr.rvftype.funct2 == 2'b10) begin // Prefix 10 for all Xfvec ops + // only generate decoder if FP extensions are enabled (static) + if (FP_PRESENT && XFVEC && fs_i != riscv::Off) begin + automatic logic allow_replication; // control honoring of replication flag - unique case ({instr.rtype.funct7, instr.rtype.funct3}) - {7'b000_0000, 3'b000}: instruction_o.op = ADD; // Add - {7'b010_0000, 3'b000}: instruction_o.op = SUB; // Sub - {7'b000_0000, 3'b010}: instruction_o.op = SLTS; // Set Lower Than - {7'b000_0000, 3'b011}: instruction_o.op = SLTU; // Set Lower Than Unsigned - {7'b000_0000, 3'b100}: instruction_o.op = XORL; // Xor - {7'b000_0000, 3'b110}: instruction_o.op = ORL; // Or - {7'b000_0000, 3'b111}: instruction_o.op = ANDL; // And - {7'b000_0000, 3'b001}: instruction_o.op = SLL; // Shift Left Logical - {7'b000_0000, 3'b101}: instruction_o.op = SRL; // Shift Right Logical - {7'b010_0000, 3'b101}: instruction_o.op = SRA; // Shift Right Arithmetic - // Multiplications - {7'b000_0001, 3'b000}: instruction_o.op = MUL; - {7'b000_0001, 3'b001}: instruction_o.op = MULH; - {7'b000_0001, 3'b010}: instruction_o.op = MULHSU; - {7'b000_0001, 3'b011}: instruction_o.op = MULHU; - {7'b000_0001, 3'b100}: instruction_o.op = DIV; - {7'b000_0001, 3'b101}: instruction_o.op = DIVU; - {7'b000_0001, 3'b110}: instruction_o.op = REM; - {7'b000_0001, 3'b111}: instruction_o.op = REMU; - default: begin + instruction_o.fu = FPU_VEC; // Same unit, but sets 'vectorial' signal + instruction_o.rs1[4:0] = instr.rvftype.rs1; + instruction_o.rs2[4:0] = instr.rvftype.rs2; + instruction_o.rd[4:0] = instr.rvftype.rd; + check_fprm = 1'b1; + allow_replication = 1'b1; + // decode vectorial FP instruction + unique case (instr.rvftype.vecfltop) + 5'b00001 : begin + instruction_o.op = FADD; // vfadd.vfmt - Vectorial FP Addition + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2 = instr.rvftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00010 : begin + instruction_o.op = FSUB; // vfsub.vfmt - Vectorial FP Subtraction + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2 = instr.rvftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00011 : instruction_o.op = FMUL; // vfmul.vfmt - Vectorial FP Multiplication + 5'b00100 : instruction_o.op = FDIV; // vfdiv.vfmt - Vectorial FP Division + 5'b00101 : begin + instruction_o.op = VFMIN; // vfmin.vfmt - Vectorial FP Minimum + check_fprm = 1'b0; // rounding mode irrelevant + end + 5'b00110 : begin + instruction_o.op = VFMAX; // vfmax.vfmt - Vectorial FP Maximum + check_fprm = 1'b0; // rounding mode irrelevant + end + 5'b00111 : begin + instruction_o.op = FSQRT; // vfsqrt.vfmt - Vectorial FP Square Root + allow_replication = 1'b0; // only one operand + if (instr.rvftype.rs2 != 5'b00000) illegal_instr = 1'b1; // rs2 must be 0 + end + 5'b01000 : begin + instruction_o.op = FMADD; // vfmac.vfmt - Vectorial FP Multiply-Accumulate + imm_select = SIMM; // rd into result field (upper bits don't matter) + end + 5'b01001 : begin + instruction_o.op = FMSUB; // vfmre.vfmt - Vectorial FP Multiply-Reduce + imm_select = SIMM; // rd into result field (upper bits don't matter) + end + 5'b01100 : begin + unique case (instr.rvftype.rs2) inside // operation encoded in rs2, `inside` for matching ? + 5'b00000 : begin + instruction_o.rs2 = instr.rvftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + if (instr.rvftype.repl) + instruction_o.op = FMV_F2X; // vfmv.x.vfmt - FPR to GPR Move + else + instruction_o.op = FMV_X2F; // vfmv.vfmt.x - GPR to FPR Move + check_fprm = 1'b0; // no rounding for moves + end + 5'b00001 : begin + instruction_o.op = FCLASS; // vfclass.vfmt - Vectorial FP Classify + check_fprm = 1'b0; // no rounding for classification + allow_replication = 1'b0; // R must not be set + end + 5'b00010 : instruction_o.op = FCVT_F2I; // vfcvt.x.vfmt - Vectorial FP to Int Conversion + 5'b00011 : instruction_o.op = FCVT_I2F; // vfcvt.vfmt.x - Vectorial Int to FP Conversion + 5'b001?? : begin + instruction_o.op = FCVT_F2F; // vfcvt.vfmt.vfmt - Vectorial FP to FP Conversion + instruction_o.rs2 = instr.rvftype.rd; // set rs2 = rd as target vector for conversion + imm_select = IIMM; // rs2 holds part of the intruction + // TODO CHECK R bit for valid fmt combinations + // determine source format + unique case (instr.rvftype.rs2[21:20]) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~RVFVEC) illegal_instr = 1'b1; + 2'b01: if (~XF16ALTVEC) illegal_instr = 1'b1; + 2'b10: if (~XF16VEC) illegal_instr = 1'b1; + 2'b11: if (~XF8VEC) illegal_instr = 1'b1; + default : illegal_instr = 1'b1; + endcase + end + default : illegal_instr = 1'b1; + endcase + end + 5'b01101 : begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = VFSGNJ; // vfsgnj.vfmt - Vectorial FP Sign Injection + end + 5'b01110 : begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = VFSGNJN; // vfsgnjn.vfmt - Vectorial FP Negated Sign Injection + end + 5'b01111 : begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = VFSGNJX; // vfsgnjx.vfmt - Vectorial FP XORed Sign Injection + end + 5'b10000 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFEQ; // vfeq.vfmt - Vectorial FP Equality + end + 5'b10001 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFNE; // vfne.vfmt - Vectorial FP Non-Equality + end + 5'b10010 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFLT; // vfle.vfmt - Vectorial FP Less Than + end + 5'b10011 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFGE; // vfge.vfmt - Vectorial FP Greater or Equal + end + 5'b10100 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFLE; // vfle.vfmt - Vectorial FP Less or Equal + end + 5'b10101 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFGT; // vfgt.vfmt - Vectorial FP Greater Than + end + 5'b11000 : begin + instruction_o.op = VFCPKAB_S; // vfcpka/b.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, lowest 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~RVF) illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: begin + if (~RVFVEC) illegal_instr = 1'b1; // destination vector not supported + if (instr.rvftype.repl) illegal_instr = 1'b1; // no entries 2/3 in vector of 2 fp32 + end + 2'b01: begin + if (~XF16ALTVEC) illegal_instr = 1'b1; // destination vector not supported + end + 2'b10: begin + if (~XF16VEC) illegal_instr = 1'b1; // destination vector not supported + end + 2'b11: begin + if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported + end + default : illegal_instr = 1'b1; + endcase + end + 5'b11001 : begin + instruction_o.op = VFCPKCD_S; // vfcpkc/d.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, second 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~RVF) illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: illegal_instr = 1'b1; // no entries 4-7 in vector of 2 FP32 + 2'b01: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16ALT + 2'b10: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16 + 2'b11: begin + if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported + end + default : illegal_instr = 1'b1; + endcase + end + 5'b11010 : begin + instruction_o.op = VFCPKAB_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, lowest 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~RVD) illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: begin + if (~RVFVEC) illegal_instr = 1'b1; // destination vector not supported + if (instr.rvftype.repl) illegal_instr = 1'b1; // no entries 2/3 in vector of 2 fp32 + end + 2'b01: begin + if (~XF16ALTVEC) illegal_instr = 1'b1; // destination vector not supported + end + 2'b10: begin + if (~XF16VEC) illegal_instr = 1'b1; // destination vector not supported + end + 2'b11: begin + if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported + end + default : illegal_instr = 1'b1; + endcase + end + 5'b11011 : begin + instruction_o.op = VFCPKCD_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, second 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) + if (~RVD) illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: illegal_instr = 1'b1; // no entries 4-7 in vector of 2 FP32 + 2'b01: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16ALT + 2'b10: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16 + 2'b11: begin + if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported + end + default : illegal_instr = 1'b1; + endcase + end + default : illegal_instr = 1'b1; + endcase + + // check format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~RVFVEC) illegal_instr = 1'b1; + 2'b01: if (~XF16ALTVEC) illegal_instr = 1'b1; + 2'b10: if (~XF16VEC) illegal_instr = 1'b1; + 2'b11: if (~XF8VEC) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + + // check disallowed replication + if (~allow_replication & instr.rvftype.repl) illegal_instr = 1'b1; + + // check rounding mode + if (check_fprm) begin + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000:3'b100]: ; //legal rounding modes + default : illegal_instr = 1'b1; + endcase + end + + end else begin // No vectorial FP enabled (static) illegal_instr = 1'b1; end - endcase + + // --------------------------- + // Integer Reg-Reg Operations + // --------------------------- + end else begin + instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU; + instruction_o.rs1 = instr.rtype.rs1; + instruction_o.rs2 = instr.rtype.rs2; + instruction_o.rd = instr.rtype.rd; + + unique case ({instr.rtype.funct7, instr.rtype.funct3}) + {7'b000_0000, 3'b000}: instruction_o.op = ADD; // Add + {7'b010_0000, 3'b000}: instruction_o.op = SUB; // Sub + {7'b000_0000, 3'b010}: instruction_o.op = SLTS; // Set Lower Than + {7'b000_0000, 3'b011}: instruction_o.op = SLTU; // Set Lower Than Unsigned + {7'b000_0000, 3'b100}: instruction_o.op = XORL; // Xor + {7'b000_0000, 3'b110}: instruction_o.op = ORL; // Or + {7'b000_0000, 3'b111}: instruction_o.op = ANDL; // And + {7'b000_0000, 3'b001}: instruction_o.op = SLL; // Shift Left Logical + {7'b000_0000, 3'b101}: instruction_o.op = SRL; // Shift Right Logical + {7'b010_0000, 3'b101}: instruction_o.op = SRA; // Shift Right Arithmetic + // Multiplications + {7'b000_0001, 3'b000}: instruction_o.op = MUL; + {7'b000_0001, 3'b001}: instruction_o.op = MULH; + {7'b000_0001, 3'b010}: instruction_o.op = MULHSU; + {7'b000_0001, 3'b011}: instruction_o.op = MULHU; + {7'b000_0001, 3'b100}: instruction_o.op = DIV; + {7'b000_0001, 3'b101}: instruction_o.op = DIVU; + {7'b000_0001, 3'b110}: instruction_o.op = REM; + {7'b000_0001, 3'b111}: instruction_o.op = REMU; + default: begin + illegal_instr = 1'b1; + end + endcase + end end // -------------------------- @@ -293,7 +532,7 @@ module decoder ( // -------------------------------- // Reg-Immediate Operations // -------------------------------- - riscv::OpcodeOpimm: begin + riscv::OpcodeOpImm: begin instruction_o.fu = ALU; imm_select = IIMM; instruction_o.rs1[4:0] = instr.itype.rs1; @@ -327,7 +566,7 @@ module decoder ( // -------------------------------- // 32 bit Reg-Immediate Operations // -------------------------------- - riscv::OpcodeOpimm32: begin + riscv::OpcodeOpImm32: begin instruction_o.fu = ALU; imm_select = IIMM; instruction_o.rs1[4:0] = instr.itype.rs1; @@ -390,6 +629,264 @@ module decoder ( endcase end + // -------------------------------- + // Floating-Point Load/store + // -------------------------------- + riscv::OpcodeStoreFp: begin + if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = STORE; + imm_select = SIMM; + instruction_o.rs1 = instr.stype.rs1; + instruction_o.rs2 = instr.stype.rs2; + // determine store size + unique case (instr.stype.funct3) + // Only process instruction if corresponding extension is active (static) + 3'b000: if (XF8) instruction_o.op = FSB; + else illegal_instr = 1'b1; + 3'b001: if (XF16 | XF16ALT) instruction_o.op = FSH; + else illegal_instr = 1'b1; + 3'b010: if (RVF) instruction_o.op = FSW; + else illegal_instr = 1'b1; + 3'b011: if (RVD) instruction_o.op = FSD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end else + illegal_instr = 1'b1; + end + + riscv::OpcodeLoadFp: begin + if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = LOAD; + imm_select = IIMM; + instruction_o.rs1 = instr.itype.rs1; + instruction_o.rd = instr.itype.rd; + // determine load size + unique case (instr.itype.funct3) + // Only process instruction if corresponding extension is active (static) + 3'b000: if (XF8) instruction_o.op = FLB; + else illegal_instr = 1'b1; + 3'b001: if (XF16 | XF16ALT) instruction_o.op = FLH; + else illegal_instr = 1'b1; + 3'b010: if (RVF) instruction_o.op = FLW; + else illegal_instr = 1'b1; + 3'b011: if (RVD) instruction_o.op = FLD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end else + illegal_instr = 1'b1; + end + + // ---------------------------------- + // Floating-Point Reg-Reg Operations + // ---------------------------------- + riscv::OpcodeMadd, + riscv::OpcodeMsub, + riscv::OpcodeNmsub, + riscv::OpcodeNmadd: begin + if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = FPU; + instruction_o.rs1 = instr.r4type.rs1; + instruction_o.rs2 = instr.r4type.rs2; + instruction_o.rd = instr.r4type.rd; + imm_select = RS3; // rs3 into result field + check_fprm = 1'b1; + // select the correct fused operation + unique case (instr.r4type.opcode) + default: instruction_o.op = FMADD; // fmadd.fmt - FP Fused multiply-add + riscv::OpcodeMsub: instruction_o.op = FMSUB; // fmsub.fmt - FP Fused multiply-subtract + riscv::OpcodeNmsub: instruction_o.op = FNMSUB; // fnmsub.fmt - FP Negated fused multiply-subtract + riscv::OpcodeNmadd: instruction_o.op = FNMADD; // fnmadd.fmt - FP Negated fused multiply-add + endcase + + // determine fp format + unique case (instr.r4type.funct2) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~RVF) illegal_instr = 1'b1; + 2'b01: if (~RVD) illegal_instr = 1'b1; + 2'b10: if (~XF16 & ~XF16ALT) illegal_instr = 1'b1; + 2'b11: if (~XF8) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + + // check rounding mode + if (check_fprm) begin + unique case (instr.rftype.rm) inside + [3'b000:3'b100]: ; //legal rounding modes + 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 + if (~XF16ALT || instr.rftype.fmt != 2'b10) + illegal_instr = 1'b1; + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000:3'b100]: ; //legal rounding modes + default : illegal_instr = 1'b1; + endcase + end + 3'b111: begin + // rounding mode from frm csr + unique case (frm_i) inside + [3'b000:3'b100]: ; //legal rounding modes + default : illegal_instr = 1'b1; + endcase + end + default : illegal_instr = 1'b1; + endcase + end + end else begin + illegal_instr = 1'b1; + end + end + + riscv::OpcodeOpFp: begin + if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) + instruction_o.fu = FPU; + instruction_o.rs1 = instr.rftype.rs1; + instruction_o.rs2 = instr.rftype.rs2; + instruction_o.rd = instr.rftype.rd; + check_fprm = 1'b1; + // decode FP instruction + unique case (instr.rftype.funct5) + 5'b00000: begin + instruction_o.op = FADD; // fadd.fmt - FP Addition + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2 = instr.rftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00001: begin + instruction_o.op = FSUB; // fsub.fmt - FP Subtraction + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2 = instr.rftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00010: instruction_o.op = FMUL; // fmul.fmt - FP Multiplication + 5'b00011: instruction_o.op = FDIV; // fdiv.fmt - FP Division + 5'b01011: begin + instruction_o.op = FSQRT; // fsqrt.fmt - FP Square Root + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + 5'b00100: begin + instruction_o.op = FSGNJ; // fsgn{j[n]/jx}.fmt - FP Sign Injection + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr.rftype.rm inside {[3'b000:3'b010], [3'b100:3'b110]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000:3'b010]})) + illegal_instr = 1'b1; + end + end + 5'b00101: begin + instruction_o.op = FMIN_MAX; // fmin/fmax.fmt - FP Minimum / Maximum + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr.rftype.rm inside {[3'b000:3'b001], [3'b100:3'b101]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000:3'b001]})) + illegal_instr = 1'b1; + end + end + 5'b01000: begin + instruction_o.op = FCVT_F2F; // fcvt.fmt.fmt - FP to FP Conversion + instruction_o.rs2 = instr.rvftype.rs1; // tie rs2 to rs1 to be safe (vectors use rs2) + imm_select = IIMM; // rs2 holds part of the intruction + if (instr.rftype.rs2[24:23]) illegal_instr = 1'b1; // bits [22:20] used, other bits must be 0 + // check source format + unique case (instr.rftype.rs2[22:20]) + // Only process instruction if corresponding extension is active (static) + 3'b000: if (~RVF) illegal_instr = 1'b1; + 3'b001: if (~RVD) illegal_instr = 1'b1; + 3'b010: if (~XF16) illegal_instr = 1'b1; + 3'b110: if (~XF16ALT) illegal_instr = 1'b1; + 3'b011: if (~XF8) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end + 5'b10100: begin + instruction_o.op = FCMP; // feq/flt/fle.fmt - FP Comparisons + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr.rftype.rm inside {[3'b000:3'b010], [3'b100:3'b110]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000:3'b010]})) + illegal_instr = 1'b1; + end + end + 5'b11000: begin + instruction_o.op = FCVT_F2I; // fcvt.ifmt.fmt - FP to Int Conversion + imm_select = IIMM; // rs2 holds part of the instruction + if (instr.rftype.rs2[24:22]) illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 + end + 5'b11010: begin + instruction_o.op = FCVT_I2F; // fcvt.fmt.ifmt - Int to FP Conversion + imm_select = IIMM; // rs2 holds part of the instruction + if (instr.rftype.rs2[24:22]) illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 + end + 5'b11100: begin + instruction_o.rs2 = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (instr.rftype.rm == 3'b000 || (XF16ALT && instr.rftype.rm == 3'b100)) // FP16ALT has separate encoding + instruction_o.op = FMV_F2X; // fmv.ifmt.fmt - FPR to GPR Move + else if (instr.rftype.rm == 3'b001 || (XF16ALT && instr.rftype.rm == 3'b101)) // FP16ALT has separate encoding + instruction_o.op = FCLASS; // fclass.fmt - FP Classify + else illegal_instr = 1'b1; + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + 5'b11110: begin + instruction_o.op = FMV_X2F; // fmv.fmt.ifmt - GPR to FPR Move + instruction_o.rs2 = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (!(instr.rftype.rm == 3'b000 || (XF16ALT && instr.rftype.rm == 3'b100))) + illegal_instr = 1'b1; + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + default : illegal_instr = 1'b1; + endcase + + // check format + unique case (instr.rftype.fmt) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~RVF) illegal_instr = 1'b1; + 2'b01: if (~RVD) illegal_instr = 1'b1; + 2'b10: if (~XF16 & ~XF16ALT) illegal_instr = 1'b1; + 2'b11: if (~XF8) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + + // check rounding mode + if (check_fprm) begin + unique case (instr.rftype.rm) inside + [3'b000:3'b100]: ; //legal rounding modes + 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 + if (~XF16ALT || instr.rftype.fmt != 2'b10) + illegal_instr = 1'b1; + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000:3'b100]: ; //legal rounding modes + default : illegal_instr = 1'b1; + endcase + end + 3'b111: begin + // rounding mode from frm csr + unique case (frm_i) inside + [3'b000:3'b100]: ; //legal rounding modes + default : illegal_instr = 1'b1; + endcase + end + default : illegal_instr = 1'b1; + endcase + end + end else begin + illegal_instr = 1'b1; + end + end + + // ---------------------------------- + // Atomic Operations + // ---------------------------------- riscv::OpcodeAmo: begin // we are going to use the load unit for AMOs instruction_o.fu = STORE; @@ -398,7 +895,7 @@ module decoder ( instruction_o.rd[4:0] = instr.atype.rd; // TODO(zarubaf): Ordering // words - if (instr.stype.funct3 == 3'h2) begin + if (RVA && instr.stype.funct3 == 3'h2) begin unique case (instr.instr[31:27]) 5'h0: instruction_o.op = AMO_ADDW; 5'h1: instruction_o.op = AMO_SWAPW; @@ -417,7 +914,7 @@ module decoder ( default: illegal_instr = 1'b1; endcase // double words - end else if (instr.stype.funct3 == 3'h3) begin + end else if (RVA && instr.stype.funct3 == 3'h3) begin unique case (instr.instr[31:27]) 5'h0: instruction_o.op = AMO_ADDD; 5'h1: instruction_o.op = AMO_SWAPD; @@ -500,6 +997,7 @@ module decoder ( endcase end end + // -------------------------------- // Sign extend immediate // -------------------------------- @@ -511,7 +1009,7 @@ module decoder ( imm_uj_type = uj_imm(instruction_i); imm_bi_type = { {59{instruction_i[24]}}, instruction_i[24:20] }; - // NOIMM, IIMM, SIMM, BIMM, UIMM, JIMM + // NOIMM, IIMM, SIMM, BIMM, UIMM, JIMM, RS3 // select immediate case (imm_select) IIMM: begin @@ -534,6 +1032,11 @@ module decoder ( instruction_o.result = imm_uj_type; instruction_o.use_imm = 1'b1; end + RS3: begin + // result holds address of fp operand rs3 + instruction_o.result = {59'b0, instr.r4type.rs3}; + instruction_o.use_imm = 1'b0; + end default: begin instruction_o.result = 64'b0; instruction_o.use_imm = 1'b0; diff --git a/src/ex_stage.sv b/src/ex_stage.sv index ac14f98ca..acb41cc46 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -16,8 +16,8 @@ import ariane_pkg::*; module ex_stage #( - parameter int ASID_WIDTH = 1 - ) ( + parameter int ASID_WIDTH = 1 +)( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic flush_i, @@ -37,17 +37,16 @@ module ex_stage #( output logic alu_valid_o, // ALU result is valid output logic [63:0] alu_result_o, output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, // ID of scoreboard entry at which to write back + output exception_t alu_exception_o, // Branches and Jumps - output logic branch_ready_o, input logic branch_valid_i, // we are using the branch unit - output logic branch_valid_o, // the calculated branch target is valid - output logic [63:0] branch_result_o, // branch target address out - input branchpredict_sbe_t branch_predict_i, // branch prediction in - output logic [TRANS_ID_BITS-1:0] branch_trans_id_o, - output exception_t branch_exception_o, // branch unit detected an exception - + input branchpredict_sbe_t branch_predict_i, output branchpredict_t resolved_branch_o, // the branch engine uses the write back from the ALU output logic resolve_branch_o, // to ID signaling that we resolved the branch + // CSR + input logic csr_valid_i, + output logic [11:0] csr_addr_o, + input logic csr_commit_i, // LSU output logic lsu_ready_o, // FU is ready input logic lsu_valid_i, // Input is valid @@ -59,20 +58,23 @@ module ex_stage #( output exception_t lsu_exception_o, output logic no_st_pending_o, input logic amo_valid_commit_i, - // CSR - output logic csr_ready_o, - input logic csr_valid_i, - output logic [TRANS_ID_BITS-1:0] csr_trans_id_o, - output logic [63:0] csr_result_o, - output logic csr_valid_o, - output logic [11:0] csr_addr_o, - input logic csr_commit_i, // MULT output logic mult_ready_o, // FU is ready input logic mult_valid_i, // Output is valid output logic [TRANS_ID_BITS-1:0] mult_trans_id_o, output logic [63:0] mult_result_o, output logic mult_valid_o, + // FPU + output logic fpu_ready_o, // FU is ready + input logic fpu_valid_i, // Output is valid + input logic [1:0] fpu_fmt_i, // FP format + input logic [2:0] fpu_rm_i, // FP rm + input logic [2:0] fpu_frm_i, // FP frm csr + input logic [6:0] fpu_prec_i, // FP precision control + output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, + output logic [63:0] fpu_result_o, + output logic fpu_valid_o, + output exception_t fpu_exception_o, // Memory Management input logic enable_translation_i, @@ -104,50 +106,153 @@ module ex_stage #( // ----- // ALU // ----- - alu alu_i ( - .result_o ( alu_result_o ), - .alu_branch_res_o ( alu_branch_res ), - .* - ); + fu_data_t alu_data; + assign alu_data.operator = (alu_valid_i | branch_valid_i | csr_valid_i) ? operator_i : ADD; + assign alu_data.operand_a = (alu_valid_i | branch_valid_i | csr_valid_i) ? operand_a_i : '0; + assign alu_data.operand_b = (alu_valid_i | branch_valid_i | csr_valid_i) ? operand_b_i : '0; + assign alu_data.imm = (alu_valid_i | branch_valid_i | csr_valid_i) ? imm_i : '0; - // -------------------- - // Branch Engine - // -------------------- - branch_unit branch_unit_i ( - .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i), // any functional unit is valid, check that there is no accidental mis-predict - .branch_comp_res_i ( alu_branch_res ), - .* + // fixed latency FUs + // TOOD(zarubaf) Re-name this module and re-factor ALU + alu alu_i ( + .clk_i, + .rst_ni, + .flush_i, + .pc_i, + .trans_id_i, + .alu_valid_i, + .branch_valid_i, + .csr_valid_i ( csr_valid_i ), + .operator_i ( alu_data.operator ), + .operand_a_i ( alu_data.operand_a ), + .operand_b_i ( alu_data.operand_b ), + .imm_i ( alu_data.imm ), + .result_o ( alu_result_o ), + .alu_valid_o, + .alu_ready_o, + .alu_trans_id_o, + .alu_exception_o, + + .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i ), + .is_compressed_instr_i, + .branch_predict_i, + .resolved_branch_o, + .resolve_branch_o, + + .commit_i ( csr_commit_i ), + .csr_addr_o ( csr_addr_o ) ); // ---------------- // Multiplication // ---------------- + fu_data_t mult_data; + assign mult_data.operator = mult_valid_i ? operator_i : MUL; + assign mult_data.operand_a = mult_valid_i ? operand_a_i : '0; + assign mult_data.operand_b = mult_valid_i ? operand_b_i : '0; + mult i_mult ( - .result_o ( mult_result_o ), - .* + .clk_i, + .rst_ni, + .flush_i, + .trans_id_i, + .mult_valid_i, + .operator_i ( mult_data.operator ), + .operand_a_i ( mult_data.operand_a ), + .operand_b_i ( mult_data.operand_b ), + .result_o ( mult_result_o ), + .mult_valid_o, + .mult_ready_o, + .mult_trans_id_o ); + // ---------------- + // FPU + // ---------------- + generate + if (FP_PRESENT) begin : fpu_gen + fu_data_t fpu_data; + assign fpu_data.operator = fpu_valid_i ? operator_i : FSGNJ; + assign fpu_data.operand_a = fpu_valid_i ? operand_a_i : '0; + assign fpu_data.operand_b = fpu_valid_i ? operand_b_i : '0; + assign fpu_data.imm = fpu_valid_i ? imm_i : '0; + + fpu_wrap fpu_i ( + .clk_i, + .rst_ni, + .flush_i, + .trans_id_i, + .fu_i, + .fpu_valid_i, + .fpu_ready_o, + .operator_i ( fpu_data.operator ), + .operand_a_i ( fpu_data.operand_a[FLEN-1:0] ), + .operand_b_i ( fpu_data.operand_b[FLEN-1:0] ), + .operand_c_i ( fpu_data.imm[FLEN-1:0] ), + .fpu_fmt_i, + .fpu_rm_i, + .fpu_frm_i, + .fpu_prec_i, + .fpu_trans_id_o, + .result_o ( fpu_result_o ), + .fpu_valid_o, + .fpu_exception_o + ); + end else begin : no_fpu_gen + assign fpu_ready_o = '0; + assign fpu_trans_id_o = '0; + assign fpu_result_o = '0; + assign fpu_valid_o = '0; + assign fpu_exception_o = '0; + end + endgenerate + // ---------------- // Load-Store Unit // ---------------- + fu_data_t lsu_data; + assign lsu_data.operator = lsu_valid_i ? operator_i : LD; + assign lsu_data.operand_a = lsu_valid_i ? operand_a_i : '0; + assign lsu_data.operand_b = lsu_valid_i ? operand_b_i : '0; + assign lsu_data.imm = lsu_valid_i ? imm_i : '0; + lsu lsu_i ( - .commit_i ( lsu_commit_i ), - .commit_ready_o ( lsu_commit_ready_o ), - .dcache_req_ports_i, - .dcache_req_ports_o, - .amo_req_o, - .amo_resp_i, - .* + .clk_i , + .rst_ni , + .flush_i , + .no_st_pending_o , + .fu_i , + .operator_i (lsu_data.operator ), + .operand_a_i (lsu_data.operand_a ), + .operand_b_i (lsu_data.operand_b ), + .imm_i (lsu_data.imm ), + .lsu_ready_o , + .lsu_valid_i , + .trans_id_i , + .lsu_trans_id_o , + .lsu_result_o , + .lsu_valid_o , + .commit_i (lsu_commit_i ), + .commit_ready_o (lsu_commit_ready_o ), + .enable_translation_i , + .en_ld_st_translation_i , + .icache_areq_i , + .icache_areq_o , + .priv_lvl_i , + .ld_st_priv_lvl_i , + .sum_i , + .mxr_i , + .satp_ppn_i , + .asid_i , + .flush_tlb_i , + .itlb_miss_o , + .dtlb_miss_o , + .dcache_req_ports_i , + .dcache_req_ports_o , + .lsu_exception_o , + .amo_valid_commit_i , + .amo_req_o , + .amo_resp_i ); - // ----- - // CSR - // ----- - // CSR address buffer - csr_buffer csr_buffer_i ( - .commit_i ( csr_commit_i ), - .* - ); - - endmodule diff --git a/src/fpu b/src/fpu new file mode 160000 index 000000000..00e257917 --- /dev/null +++ b/src/fpu @@ -0,0 +1 @@ +Subproject commit 00e2579173f1412f06d4eb95d6b98d0eb1cd2e94 diff --git a/src/fpu_div_sqrt_mvp b/src/fpu_div_sqrt_mvp new file mode 160000 index 000000000..3736c4c84 --- /dev/null +++ b/src/fpu_div_sqrt_mvp @@ -0,0 +1 @@ +Subproject commit 3736c4c844074bd64c3c505c017181db71b738b4 diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv new file mode 100644 index 000000000..eb2775b2e --- /dev/null +++ b/src/fpu_wrap.sv @@ -0,0 +1,603 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Stefan Mach, ETH Zurich +// Date: 12.04.2018 +// Description: Wrapper for the floating-point unit + + +import ariane_pkg::*; + +module fpu_wrap ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic [TRANS_ID_BITS-1:0] trans_id_i, + input fu_t fu_i, + input logic fpu_valid_i, + output logic fpu_ready_o, + input fu_op operator_i, + input logic [FLEN-1:0] operand_a_i, + input logic [FLEN-1:0] operand_b_i, // imm will be here unless used as operand + input logic [FLEN-1:0] operand_c_i, // imm will be here unless used as operand + input logic [1:0] fpu_fmt_i, + input logic [2:0] fpu_rm_i, + input logic [2:0] fpu_frm_i, + input logic [6:0] fpu_prec_i, + output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, + output logic [FLEN-1:0] result_o, + output logic fpu_valid_o, + output exception_t fpu_exception_o +); + + //----------------------------------- + // FPnew encoding from FPnew package + //----------------------------------- + localparam OPBITS = 4; + localparam FMTBITS = 3; + localparam IFMTBITS = 2; + + integer OP_NUMBITS, FMT_NUMBITS, IFMT_NUMBITS; + + logic [OPBITS-1:0] OP_FMADD; + logic [OPBITS-1:0] OP_FNMSUB; + logic [OPBITS-1:0] OP_ADD; + logic [OPBITS-1:0] OP_MUL; + logic [OPBITS-1:0] OP_DIV; + logic [OPBITS-1:0] OP_SQRT; + logic [OPBITS-1:0] OP_SGNJ; + logic [OPBITS-1:0] OP_MINMAX; + logic [OPBITS-1:0] OP_CMP; + logic [OPBITS-1:0] OP_CLASS; + logic [OPBITS-1:0] OP_F2I; + logic [OPBITS-1:0] OP_I2F; + logic [OPBITS-1:0] OP_F2F; + logic [OPBITS-1:0] OP_CPKAB; + logic [OPBITS-1:0] OP_CPKCD; + + logic [FMTBITS-1:0] FMT_FP32; + logic [FMTBITS-1:0] FMT_FP64; + logic [FMTBITS-1:0] FMT_FP16; + logic [FMTBITS-1:0] FMT_FP8; + logic [FMTBITS-1:0] FMT_FP16ALT; + logic [FMTBITS-1:0] FMT_CUST1; + logic [FMTBITS-1:0] FMT_CUST2; + logic [FMTBITS-1:0] FMT_CUST3; + + logic [IFMTBITS-1:0] IFMT_INT8; + logic [IFMTBITS-1:0] IFMT_INT16; + logic [IFMTBITS-1:0] IFMT_INT32; + logic [IFMTBITS-1:0] IFMT_INT64; + + // bind the constants from the fpnew entity + fpnew_pkg_constants i_fpnew_constants ( + .OP_NUMBITS ( OP_NUMBITS ), + .OP_FMADD ( OP_FMADD ), + .OP_FNMSUB ( OP_FNMSUB ), + .OP_ADD ( OP_ADD ), + .OP_MUL ( OP_MUL ), + .OP_DIV ( OP_DIV ), + .OP_SQRT ( OP_SQRT ), + .OP_SGNJ ( OP_SGNJ ), + .OP_MINMAX ( OP_MINMAX ), + .OP_CMP ( OP_CMP ), + .OP_CLASS ( OP_CLASS ), + .OP_F2I ( OP_F2I ), + .OP_I2F ( OP_I2F ), + .OP_F2F ( OP_F2F ), + .OP_CPKAB ( OP_CPKAB ), + .OP_CPKCD ( OP_CPKCD ), + .FMT_NUMBITS ( FMT_NUMBITS ), + .FMT_FP32 ( FMT_FP32 ), + .FMT_FP64 ( FMT_FP64 ), + .FMT_FP16 ( FMT_FP16 ), + .FMT_FP8 ( FMT_FP8 ), + .FMT_FP16ALT ( FMT_FP16ALT ), + .FMT_CUST1 ( FMT_CUST1 ), + .FMT_CUST2 ( FMT_CUST2 ), + .FMT_CUST3 ( FMT_CUST3 ), + .IFMT_NUMBITS ( IFMT_NUMBITS ), + .IFMT_INT8 ( IFMT_INT8 ), + .IFMT_INT16 ( IFMT_INT16 ), + .IFMT_INT32 ( IFMT_INT32 ), + .IFMT_INT64 ( IFMT_INT64 ) + ); + + // always_comb begin + // assert (OPBITS >= OP_NUMBITS) else $error("OPBITS is smaller than %0d", OP_NUMBITS); + // assert (FMTBITS >= FMT_NUMBITS) else $error("FMTBITS is smaller than %0d", FMT_NUMBITS); + // assert (IFMTBITS >= IFMT_NUMBITS) else $error("IFMTBITS is smaller than %0d", IFMT_NUMBITS); + // end + + //------------------------------------------------- + // Inputs to the FPU and protocol inversion buffer + //------------------------------------------------- + logic [FLEN-1:0] operand_a_d, operand_a_q, operand_a; + logic [FLEN-1:0] operand_b_d, operand_b_q, operand_b; + logic [FLEN-1:0] operand_c_d, operand_c_q, operand_c; + logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op; + logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod; + logic [FMTBITS-1:0] fpu_fmt_d, fpu_fmt_q, fpu_fmt; + logic [FMTBITS-1:0] fpu_fmt2_d, fpu_fmt2_q, fpu_fmt2; + logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt; + logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm; + logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op; + + logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag; + + logic fpu_in_ready, fpu_in_valid; + logic fpu_out_ready, fpu_out_valid; + + logic [4:0] fpu_status; + + // FSM to handle protocol inversion + enum logic {READY, STALL} state_q, state_d; + logic hold_inputs; + logic use_hold; + + //----------------------------- + // Translate inputs + //----------------------------- + + always_comb begin : input_translation + + automatic logic vec_replication; // control honoring of replication flag + automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB) + automatic logic check_ah; // Decide for AH from RM field encoding + + // Default Values + operand_a_d = operand_a_i; + operand_b_d = operand_b_i; // immediates come through this port unless used as operand + operand_c_d = operand_c_i; // immediates come through this port unless used as operand + fpu_op_d = OP_SGNJ; // sign injection by default + fpu_op_mod_d = 1'b0; + fpu_fmt_d = FMT_FP32; + fpu_fmt2_d = FMT_FP32; + fpu_ifmt_d = IFMT_INT32; + fpu_rm_d = fpu_rm_i; + fpu_vec_op_d = fu_i == FPU_VEC; + fpu_tag_d = trans_id_i; + vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field + replicate_c = 1'b0; + check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i + + // Scalar Rounding Modes - some ops encode inside RM but use smaller range + if (!(fpu_rm_i inside {[3'b000:3'b100]})) + fpu_rm_d = fpu_frm_i; + + // Vectorial ops always consult FRM + if (fpu_vec_op_d) + fpu_rm_d = fpu_frm_i; + + // Formats + unique case (fpu_fmt_i) + // FP32 + 2'b00 : fpu_fmt_d = FMT_FP32; + // FP64 or FP16ALT (vectorial) + 2'b01 : fpu_fmt_d = fpu_vec_op_d ? FMT_FP16ALT : FMT_FP64; + // FP16 or FP16ALT (scalar) + 2'b10 : begin + if (!fpu_vec_op_d && fpu_rm_i==3'b101) + fpu_fmt_d = FMT_FP16ALT; + else + fpu_fmt_d = FMT_FP16; + end + // FP8 + default : fpu_fmt_d = FMT_FP8; + endcase + + + // Operations (this can modify the rounding mode field and format!) + unique case (operator_i) + // Addition + FADD : begin + fpu_op_d = OP_ADD; + replicate_c = 1'b1; // second operand is in C + end + // Subtraction is modified ADD + FSUB : begin + fpu_op_d = OP_ADD; + fpu_op_mod_d = 1'b1; + replicate_c = 1'b1; // second operand is in C + end + // Multiplication + FMUL : fpu_op_d = OP_MUL; + // Division + FDIV : fpu_op_d = OP_DIV; + // Min/Max - OP is encoded in rm (000-001) + FMIN_MAX : begin + fpu_op_d = OP_MINMAX; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Square Root + FSQRT : fpu_op_d = OP_SQRT; + // Fused Multiply Add + FMADD : fpu_op_d = OP_FMADD; + // Fused Multiply Subtract is modified FMADD + FMSUB : begin + fpu_op_d = OP_FMADD; + fpu_op_mod_d = 1'b1; + end + // Fused Negated Multiply Subtract + FNMSUB : fpu_op_d = OP_FNMSUB; + // Fused Negated Multiply Add is modified FNMSUB + FNMADD : begin + fpu_op_d = OP_FNMSUB; + fpu_op_mod_d = 1'b1; + end + // Float to Int Cast - Op encoded in lowest two imm bits or rm + FCVT_F2I : begin + fpu_op_d = OP_F2I; + // Vectorial Ops encoded in R bit + if (fpu_vec_op_d) begin + fpu_op_mod_d = fpu_rm_i[0]; + vec_replication = 1'b0; // no replication, R bit used for op + unique case (fpu_fmt_i) + 2'b00 : fpu_ifmt_d = IFMT_INT32; + 2'b01, + 2'b10 : fpu_ifmt_d = IFMT_INT16; + 2'b11 : fpu_ifmt_d = IFMT_INT8; + endcase + // Scalar casts encoded in imm + end else begin + fpu_op_mod_d = operand_c_i[0]; + if (operand_c_i[1]) + fpu_ifmt_d = IFMT_INT64; + else + fpu_ifmt_d = IFMT_INT32; + end + end + // Int to Float Cast - Op encoded in lowest two imm bits or rm + FCVT_I2F : begin + fpu_op_d = OP_I2F; + // Vectorial Ops encoded in R bit + if (fpu_vec_op_d) begin + fpu_op_mod_d = fpu_rm_i[0]; + vec_replication = 1'b0; // no replication, R bit used for op + unique case (fpu_fmt_i) + 2'b00 : fpu_ifmt_d = IFMT_INT32; + 2'b01, + 2'b10 : fpu_ifmt_d = IFMT_INT16; + 2'b11 : fpu_ifmt_d = IFMT_INT8; + endcase + // Scalar casts encoded in imm + end else begin + fpu_op_mod_d = operand_c_i[0]; + if (operand_c_i[1]) + fpu_ifmt_d = IFMT_INT64; + else + fpu_ifmt_d = IFMT_INT32; + end + end + // Float to Float Cast - Source format encoded in lowest two/three imm bits + FCVT_F2F : begin + fpu_op_d = OP_F2F; + // Vectorial ops encoded in lowest two imm bits + if (fpu_vec_op_d) begin + vec_replication = 1'b0; // no replication for casts (not needed) + unique case (operand_c_i[1:0]) + 2'b00: fpu_fmt2_d = FMT_FP32; + 2'b01: fpu_fmt2_d = FMT_FP16ALT; + 2'b10: fpu_fmt2_d = FMT_FP16; + 2'b11: fpu_fmt2_d = FMT_FP8; + endcase + // Scalar ops encoded in lowest three imm bits + end else begin + unique case (operand_c_i[2:0]) + 3'b000: fpu_fmt2_d = FMT_FP32; + 3'b001: fpu_fmt2_d = FMT_FP64; + 3'b010: fpu_fmt2_d = FMT_FP16; + 3'b110: fpu_fmt2_d = FMT_FP16ALT; + 3'b011: fpu_fmt2_d = FMT_FP8; + endcase + end + end + // Scalar Sign Injection - op encoded in rm (000-010) + FSGNJ : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding + FMV_F2X : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = 3'b011; // passthrough without checking nan-box + fpu_op_mod_d = 1'b1; // no NaN-Boxing + check_ah = 1'b1; // AH has RM MSB encoding + vec_replication = 1'b0; // no replication, we set second operand + end + // Move from GPR to FPR - mapped to NOP since no recoding + FMV_X2F : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = 3'b011; // passthrough without checking nan-box + check_ah = 1'b1; // AH has RM MSB encoding + vec_replication = 1'b0; // no replication, we set second operand + end + // Scalar Comparisons - op encoded in rm (000-010) + FCMP : begin + fpu_op_d = OP_CMP; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Classification + FCLASS : begin + fpu_op_d = OP_CLASS; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit - CLASS doesn't care anyways + check_ah = 1'b1; // AH has RM MSB encoding + end + // Vectorial Minimum - set up scalar encoding in rm + VFMIN : begin + fpu_op_d = OP_MINMAX; + fpu_rm_d = 3'b000; // min + end + // Vectorial Maximum - set up scalar encoding in rm + VFMAX : begin + fpu_op_d = OP_MINMAX; + fpu_rm_d = 3'b001; // max + end + // Vectorial Sign Injection - set up scalar encoding in rm + VFSGNJ : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = 3'b000; // sgnj + end + // Vectorial Negated Sign Injection - set up scalar encoding in rm + VFSGNJN : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = 3'b001; // sgnjn + end + // Vectorial Xored Sign Injection - set up scalar encoding in rm + VFSGNJX : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = 3'b010; // sgnjx + end + // Vectorial Equals - set up scalar encoding in rm + VFEQ : begin + fpu_op_d = OP_CMP; + fpu_rm_d = 3'b010; // eq + end + // Vectorial Not Equals - set up scalar encoding in rm + VFNE : begin + fpu_op_d = OP_CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b010; // eq + end + // Vectorial Less Than - set up scalar encoding in rm + VFLT : begin + fpu_op_d = OP_CMP; + fpu_rm_d = 3'b001; // lt + end + // Vectorial Greater or Equal - set up scalar encoding in rm + VFGE : begin + fpu_op_d = OP_CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b001; // lt + end + // Vectorial Less or Equal - set up scalar encoding in rm + VFLE : begin + fpu_op_d = OP_CMP; + fpu_rm_d = 3'b000; // le + end + // Vectorial Greater Than - set up scalar encoding in rm + VFGT : begin + fpu_op_d = OP_CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b000; // le + end + // Vectorial Convert-and-Pack from FP32, lower 4 entries + VFCPKAB_S : begin + fpu_op_d = OP_CPKAB; + fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_fmt2_d = FMT_FP32; // Cast from FP32 + end + // Vectorial Convert-and-Pack from FP32, upper 4 entries + VFCPKCD_S : begin + fpu_op_d = OP_CPKCD; + fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_fmt2_d = FMT_FP64; // Cast from FP64 + end + // Vectorial Convert-and-Pack from FP64, lower 4 entries + VFCPKAB_S : begin + fpu_op_d = OP_CPKAB; + fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_fmt2_d = FMT_FP64; // Cast from FP64 + end + // Vectorial Convert-and-Pack from FP64, upper 4 entries + VFCPKCD_S : begin + fpu_op_d = OP_CPKCD; + fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_fmt2_d = FMT_FP64; // Cast from FP64 + end + + // No changes per default + default : ; //nothing + endcase + + // Scalar AH encoding fixing + if (!fpu_vec_op_d && check_ah) + if (fpu_rm_i[2]) + fpu_fmt_d = FMT_FP16ALT; + + // Replication + if (fpu_vec_op_d && vec_replication) begin + if (replicate_c) begin + unique case (fpu_fmt_d) + FMT_FP32 : operand_c_d = RVD ? {2{operand_c_i[31:0]}} : operand_c_i; + FMT_FP16, + FMT_FP16ALT : operand_c_d = RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}}; + FMT_FP8 : operand_c_d = RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}}; + endcase // fpu_fmt_d + end else begin + unique case (fpu_fmt_d) + FMT_FP32 : operand_b_d = RVD ? {2{operand_b_i[31:0]}} : operand_b_i; + FMT_FP16, + FMT_FP16ALT : operand_b_d = RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}}; + FMT_FP8 : operand_b_d = RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}}; + endcase // fpu_fmt_d + end + end + end + + + //--------------------------------------------------------- + // Upstream protocol inversion: InValid depends on InReady + //--------------------------------------------------------- + + always_comb begin : p_inputFSM + // Default Values + fpu_ready_o = 1'b0; + fpu_in_valid = 1'b0; + hold_inputs = 1'b0; // hold register disabled + use_hold = 1'b0; // inputs go directly to unit + state_d = state_q; // stay in the same state + + // FSM + unique case (state_q) + // Default state, ready for instructions + READY : begin + fpu_ready_o = 1'b1; // Act as if FPU ready + fpu_in_valid = fpu_valid_i; // Forward input valid to FPU + // There is a transaction but the FPU can't handle it + if (fpu_valid_i & ~fpu_in_ready) begin + fpu_ready_o = 1'b0; // No token given to Issue + hold_inputs = 1'b1; // save inputs to the holding register + state_d = STALL; // stall future incoming requests + end + end + // We're stalling the upstream (ready=0) + STALL : begin + fpu_in_valid = 1'b1; // we have data for the FPU + use_hold = 1'b1; // the data comes from the hold reg + // Wait until it's consumed + if (fpu_in_ready) begin + fpu_ready_o = 1'b1; // Give a token to issue + state_d = READY; // accept future requests + end + end + // Default: emit default values + default : ; + endcase + + // Flushing will override issue and go back to idle + if (flush_i) begin + state_d = READY; + end + + end + + // Buffer register and FSM state holding + always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg + if(~rst_ni) begin + state_q <= READY; + operand_a_q <= '0; + operand_b_q <= '0; + operand_c_q <= '0; + fpu_op_q <= '0; + fpu_op_mod_q <= '0; + fpu_fmt_q <= '0; + fpu_fmt2_q <= '0; + fpu_ifmt_q <= '0; + fpu_rm_q <= '0; + fpu_vec_op_q <= '0; + fpu_tag_q <= '0; + end else begin + state_q <= state_d; + // Hold register is [TRIGGERED] by FSM + if (hold_inputs) begin + operand_a_q <= operand_a_d; + operand_b_q <= operand_b_d; + operand_c_q <= operand_c_d; + fpu_op_q <= fpu_op_d; + fpu_op_mod_q <= fpu_op_mod_d; + fpu_fmt_q <= fpu_fmt_d; + fpu_fmt2_q <= fpu_fmt2_d; + fpu_ifmt_q <= fpu_ifmt_d; + fpu_rm_q <= fpu_rm_d; + fpu_vec_op_q <= fpu_vec_op_d; + fpu_tag_q <= fpu_tag_d; + end + end + end + + // Select FPU input data: from register if valid data in register, else directly from input + assign operand_a = use_hold ? operand_a_q : operand_a_d; + assign operand_b = use_hold ? operand_b_q : operand_b_d; + assign operand_c = use_hold ? operand_c_q : operand_c_d; + assign fpu_op = use_hold ? fpu_op_q : fpu_op_d; + assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d; + assign fpu_fmt = use_hold ? fpu_fmt_q : fpu_fmt_d; + assign fpu_fmt2 = use_hold ? fpu_fmt2_q : fpu_fmt2_d; + assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d; + assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d; + assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d; + assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d; + + //--------------- + // FPU instance + //--------------- + fpnew_top #( + .WIDTH ( FLEN ), + .TAG_WIDTH ( TRANS_ID_BITS ), + .RV64 ( 1'b1 ), + .RVF ( RVF ), + .RVD ( RVD ), + .Xf16 ( XF16 ), + .Xf16alt ( XF16ALT ), + .Xf8 ( XF8 ), + .Xfvec ( XFVEC ), + // TODO MOVE THESE VALUES TO PACKAGE + .LATENCY_COMP_F ( LAT_COMP_FP32 ), + .LATENCY_COMP_D ( LAT_COMP_FP64 ), + .LATENCY_COMP_Xf16 ( LAT_COMP_FP16 ), + .LATENCY_COMP_Xf16alt ( LAT_COMP_FP16ALT ), + .LATENCY_COMP_Xf8 ( LAT_COMP_FP8 ), + .LATENCY_DIVSQRT ( LAT_DIVSQRT ), + .LATENCY_NONCOMP ( LAT_NONCOMP ), + .LATENCY_CONV ( LAT_CONV ) + ) fpnew_top_i ( + .Clk_CI ( clk_i ), + .Reset_RBI ( rst_ni ), + .A_DI ( operand_a ), + .B_DI ( operand_b ), + .C_DI ( operand_c ), + .RoundMode_SI ( fpu_rm ), + .Op_SI ( fpu_op ), + .OpMod_SI ( fpu_op_mod ), + .VectorialOp_SI ( fpu_vec_op ), + .FpFmt_SI ( fpu_fmt ), + .FpFmt2_SI ( fpu_fmt2 ), + .IntFmt_SI ( fpu_ifmt ), + .Tag_DI ( fpu_tag ), + .PrecCtl_SI ( fpu_prec_i ), + .InValid_SI ( fpu_in_valid ), + .InReady_SO ( fpu_in_ready ), + .Flush_SI ( flush_i ), + .Z_DO ( result_o ), + .Status_DO ( fpu_status ), + .Tag_DO ( fpu_trans_id_o ), + .OutValid_SO ( fpu_out_valid ), + .OutReady_SI ( fpu_out_ready ) + ); + + // Pack status flag into exception cause, tval ignored in wb, exception is always invalid + assign fpu_exception_o.cause = {59'h0, fpu_status}; + assign fpu_exception_o.valid = 1'b0; + + // Donwstream write port is dedicated to FPU and always ready + assign fpu_out_ready = 1'b1; + + // Downstream valid from unit + assign fpu_valid_o = fpu_out_valid; + +endmodule diff --git a/src/frontend/instr_scan.sv b/src/frontend/instr_scan.sv index 766f911cd..06fa3b928 100644 --- a/src/frontend/instr_scan.sv +++ b/src/frontend/instr_scan.sv @@ -42,16 +42,16 @@ module instr_scan ( assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr) ? 1'b1 : 1'b0; assign rvi_jump_o = (instr_i[6:0] == riscv::OpcodeJal) ? 1'b1 : 1'b0; // opcode JAL - assign rvc_jump_o = (instr_i[15:13] == riscv::OpcodeCJ) & is_rvc_o & (instr_i[1:0] == 2'b01); + assign rvc_jump_o = (instr_i[15:13] == riscv::OpcodeC1J) & is_rvc_o & (instr_i[1:0] == riscv::OpcodeC1); // always links to register 0 assign rvc_jr_o = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd) & ~instr_i[12] & (instr_i[6:2] == 5'b00000) - & (instr_i[1:0] == 2'b10) + & (instr_i[1:0] == riscv::OpcodeC2) + & is_rvc_o; + assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez)) + & (instr_i[1:0] == riscv::OpcodeC1) & is_rvc_o; - assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeCBeqz) | (instr_i[15:13] == riscv::OpcodeCBnez)) - & (instr_i[1:0] == 2'b01) - & is_rvc_o ; // check that rs1 is x1 or x5 assign rvc_return_o = ~instr_i[11] & ~instr_i[10] & ~instr_i[8] & instr_i[7] & rvc_jr_o ; // always links to register 1 e.g.: it is a jump diff --git a/src/id_stage.sv b/src/id_stage.sv index 7c5d525b9..46b248c60 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -32,6 +32,9 @@ module id_stage ( input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions // from CSR file input riscv::priv_lvl_t priv_lvl_i, // current privilege level + input riscv::xs_t fs_i, // floating point extension status + input logic [2:0] frm_i, // floating-point dynamic rounding mode + input logic debug_mode_i, // we are in debug mode input logic tvm_i, input logic tw_i, @@ -39,9 +42,9 @@ module id_stage ( ); // register stage struct packed { - logic valid; + logic valid; scoreboard_entry_t sbe; - logic is_ctrl_flow; + logic is_ctrl_flow; } issue_n, issue_q; @@ -90,6 +93,8 @@ module id_stage ( .ex_i ( fetch_entry.ex ), .instruction_o ( decoded_instruction ), .is_control_flow_instr_o ( is_control_flow_instr ), + .fs_i, + .frm_i, .* ); diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index e88de864c..492e2b305 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -17,7 +17,7 @@ import ariane_pkg::*; module issue_read_operands #( parameter int unsigned NR_COMMIT_PORTS = 2 - )( +)( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low // flush @@ -33,8 +33,12 @@ module issue_read_operands #( output logic [REG_ADDR_SIZE-1:0] rs2_o, input logic [63:0] rs2_i, input logic rs2_valid_i, + output logic [REG_ADDR_SIZE-1:0] rs3_o, + input logic [FLEN-1:0] rs3_i, + input logic rs3_valid_i, // get clobber input - input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_i, + input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_gpr_i, + input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_fpr_i, // To FU, just single issue for now output fu_t fu_o, output fu_op operator_o, @@ -48,22 +52,26 @@ module issue_read_operands #( input logic alu_ready_i, // FU is ready output logic alu_valid_o, // Output is valid // Branches and Jumps - input logic branch_ready_i, output logic branch_valid_o, // this is a valid branch instruction output branchpredict_sbe_t branch_predict_o, // LSU input logic lsu_ready_i, // FU is ready output logic lsu_valid_o, // Output is valid // MULT - input logic mult_ready_i, // FU is ready - output logic mult_valid_o, // Output is valid + input logic mult_ready_i, // FU is ready + output logic mult_valid_o, // Output is valid + // FPU + input logic fpu_ready_i, // FU is ready + output logic fpu_valid_o, // Output is valid + output logic [1:0] fpu_fmt_o, // FP fmt field from instr. + output logic [2:0] fpu_rm_o, // FP rm field from instr. // CSR - input logic csr_ready_i, // FU is ready output logic csr_valid_o, // Output is valid // commit port input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i, input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i, - input logic [NR_COMMIT_PORTS-1:0] we_i + input logic [NR_COMMIT_PORTS-1:0] we_gpr_i, + input logic [NR_COMMIT_PORTS-1:0] we_fpr_i // committing instruction instruction // from scoreboard // input scoreboard_entry commit_instr_i, @@ -72,24 +80,33 @@ module issue_read_operands #( logic stall; // stall signal, we do not want to fetch any more entries logic fu_busy; // functional unit is busy logic [63:0] operand_a_regfile, operand_b_regfile; // operands coming from regfile + logic [FLEN-1:0] operand_c_regfile; // third operand only from fp regfile // output flipflop (ID <-> EX) logic [63:0] operand_a_n, operand_a_q, operand_b_n, operand_b_q, imm_n, imm_q; - logic alu_valid_n, alu_valid_q; - logic mult_valid_n, mult_valid_q; - logic lsu_valid_n, lsu_valid_q; - logic csr_valid_n, csr_valid_q; - logic branch_valid_n, branch_valid_q; + logic alu_valid_n, alu_valid_q; + logic mult_valid_n, mult_valid_q; + logic fpu_valid_n, fpu_valid_q; + logic [1:0] fpu_fmt_n, fpu_fmt_q; + logic [2:0] fpu_rm_n, fpu_rm_q; + logic lsu_valid_n, lsu_valid_q; + logic csr_valid_n, csr_valid_q; + logic branch_valid_n, branch_valid_q; logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; fu_op operator_n, operator_q; // operation to perform fu_t fu_n, fu_q; // functional unit to use // forwarding signals - logic forward_rs1, forward_rs2; + logic forward_rs1, forward_rs2, forward_rs3; + + // original instruction stored in tval + riscv::instruction_t orig_instr; + assign orig_instr = riscv::instruction_t'(issue_instr_i.ex.tval[31:0]); + // ID <-> EX registers assign operand_a_o = operand_a_q; assign operand_b_o = operand_b_q; @@ -100,11 +117,174 @@ module issue_read_operands #( assign lsu_valid_o = lsu_valid_q; assign csr_valid_o = csr_valid_q; assign mult_valid_o = mult_valid_q; + assign fpu_valid_o = fpu_valid_q; + assign fpu_fmt_o = fpu_fmt_q; + assign fpu_rm_o = fpu_rm_q; assign trans_id_o = trans_id_q; assign imm_o = imm_q; // --------------- // Issue Stage // --------------- + + // select the right busy signal + // this obviously depends on the functional unit we need + always_comb begin : unit_busy + unique case (issue_instr_i.fu) + NONE: + fu_busy = 1'b0; + ALU, CTRL_FLOW, CSR: + fu_busy = ~alu_ready_i; + MULT: + fu_busy = ~mult_ready_i; + FPU, FPU_VEC: + fu_busy = ~fpu_ready_i; + LOAD, STORE: + fu_busy = ~lsu_ready_i; + default: + fu_busy = 1'b0; + endcase + end + + // --------------- + // Register stage + // --------------- + // check that all operands are available, otherwise stall + // forward corresponding register + always_comb begin : operands_available + stall = 1'b0; + // operand forwarding signals + forward_rs1 = 1'b0; + forward_rs2 = 1'b0; + forward_rs3 = 1'b0; // FPR only + // poll the scoreboard for those values + rs1_o = issue_instr_i.rs1; + rs2_o = issue_instr_i.rs2; + rs3_o = issue_instr_i.result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field + + // 0. check that we are not using the zimm type in RS1 + // as this is an immediate we do not have to wait on anything here + // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr) + // 2. poll the scoreboard + if (~issue_instr_i.use_zimm && (is_rs1_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs1] != NONE + : rd_clobber_gpr_i[issue_instr_i.rs1] != NONE)) begin + // check if the clobbering instruction is not a CSR instruction, CSR instructions can only + // be fetched through the register file since they can't be forwarded + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs1_valid_i && (is_rs1_fpr(issue_instr_i.op) ? 1'b1 : rd_clobber_gpr_i[issue_instr_i.rs1] != CSR)) + forward_rs1 = 1'b1; + else // the operand is not available -> stall + stall = 1'b1; + end + + if (is_rs2_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs2] != NONE + : rd_clobber_gpr_i[issue_instr_i.rs2] != NONE) begin + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs2_valid_i && (is_rs2_fpr(issue_instr_i.op) ? 1'b1 : rd_clobber_gpr_i[issue_instr_i.rs2] != CSR)) + forward_rs2 = 1'b1; + else // the operand is not available -> stall + stall = 1'b1; + end + + if (is_imm_fpr(issue_instr_i.op) && rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE) begin + // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check + if (rs3_valid_i) + forward_rs3 = 1'b1; + else // the operand is not available -> stall + stall = 1'b1; + end + end + + // Forwarding/Output MUX + always_comb begin : forwarding_operand_select + // default is regfiles (gpr or fpr) + operand_a_n = operand_a_regfile; + operand_b_n = operand_b_regfile; + // immediates are the third operands in the store case + // for FP operations, the imm field can also be the third operand from the regfile + imm_n = is_imm_fpr(issue_instr_i.op) ? operand_c_regfile : issue_instr_i.result; + trans_id_n = issue_instr_i.trans_id; + fu_n = issue_instr_i.fu; + operator_n = issue_instr_i.op; + // or should we forward + if (forward_rs1) begin + operand_a_n = rs1_i; + end + + if (forward_rs2) begin + operand_b_n = rs2_i; + end + + if (forward_rs3) begin + imm_n = rs3_i; + end + + // use the PC as operand a + if (issue_instr_i.use_pc) begin + operand_a_n = issue_instr_i.pc; + end + + // use the zimm as operand a + if (issue_instr_i.use_zimm) begin + // zero extend operand a + operand_a_n = {52'b0, issue_instr_i.rs1[4:0]}; + end + // or is it an immediate (including PC), this is not the case for a store and control flow instructions + // also make sure operand B is not already used as an FP operand + if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && !is_rs2_fpr(issue_instr_i.op)) begin + operand_b_n = issue_instr_i.result; + end + end + + // FU select, assert the correct valid out signal (in the next cycle) + always_comb begin : unit_valid + alu_valid_n = 1'b0; + lsu_valid_n = 1'b0; + mult_valid_n = 1'b0; + fpu_valid_n = 1'b0; + fpu_fmt_n = 2'b0; + fpu_rm_n = 3'b0; + csr_valid_n = 1'b0; + branch_valid_n = 1'b0; + // Exception pass through: + // If an exception has occurred simply pass it through + // we do not want to issue this instruction + if (~issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin + case (issue_instr_i.fu) + ALU: + alu_valid_n = 1'b1; + CTRL_FLOW: + branch_valid_n = 1'b1; + MULT: + mult_valid_n = 1'b1; + FPU : begin + fpu_valid_n = 1'b1; + fpu_fmt_n = orig_instr.rftype.fmt; // fmt bits from instruction + fpu_rm_n = orig_instr.rftype.rm; // rm bits from instruction + end + FPU_VEC : begin + fpu_valid_n = 1'b1; + fpu_fmt_n = orig_instr.rvftype.vfmt; // vfmt bits from instruction + fpu_rm_n = {2'b0, orig_instr.rvftype.repl}; // repl bit from instruction + end + LOAD, STORE: + lsu_valid_n = 1'b1; + CSR: + csr_valid_n = 1'b1; + default:; + endcase + end + // if we got a flush request, de-assert the valid flag, otherwise we will start this + // functional unit with the wrong inputs + if (flush_i) begin + alu_valid_n = 1'b0; + lsu_valid_n = 1'b0; + mult_valid_n = 1'b0; + fpu_valid_n = 1'b0; + csr_valid_n = 1'b0; + branch_valid_n = 1'b0; + end + end + // We can issue an instruction if we do not detect that any other instruction is writing the same // destination register. // We also need to check if there is an unresolved branch in the scoreboard. @@ -120,13 +300,15 @@ module issue_read_operands #( // WAW - Write After Write Dependency Check // ----------------------------------------- // no other instruction has the same destination register -> issue the instruction - if (rd_clobber_i[issue_instr_i.rd] == NONE) begin + if (is_rd_fpr(issue_instr_i.op) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE) + : (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin issue_ack_o = 1'b1; end // or check that the target destination register will be written in this cycle by the // commit stage for (int unsigned i = 0; i < NR_COMMIT_PORTS; i++) - if (we_i[i] && waddr_i[i] == issue_instr_i.rd) begin + if (is_rd_fpr(issue_instr_i.op) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd) + : (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd)) begin issue_ack_o = 1'b1; end end @@ -145,159 +327,73 @@ module issue_read_operands #( end end - // select the right busy signal - // this obviously depends on the functional unit we need - always_comb begin : unit_busy - unique case (issue_instr_i.fu) - NONE: - fu_busy = 1'b0; - ALU: - fu_busy = ~alu_ready_i; - CTRL_FLOW: - fu_busy = ~branch_ready_i; - MULT: - fu_busy = ~mult_ready_i; - LOAD, STORE: - fu_busy = ~lsu_ready_i; - CSR: - fu_busy = ~csr_ready_i; - default: - fu_busy = 1'b0; - endcase - end - - // --------------- - // Register stage - // --------------- - // check that all operands are available, otherwise stall - // forward corresponding register - always_comb begin : operands_available - stall = 1'b0; - // operand forwarding signals - forward_rs1 = 1'b0; - forward_rs2 = 1'b0; - // poll the scoreboard for those values - rs1_o = issue_instr_i.rs1; - rs2_o = issue_instr_i.rs2; - // 0. check that we are not using the zimm type in RS1 - // as this is an immediate we do not have to wait on anything here - // 1. check if the source registers are clobberd - // 2. poll the scoreboard - if (~issue_instr_i.use_zimm && rd_clobber_i[issue_instr_i.rs1] != NONE) begin - // check if the clobbering instruction is not a CSR instruction, CSR instructions can only - // be fetched through the register file since they can't be forwarded - // the operand is available, forward it - if (rs1_valid_i && rd_clobber_i[issue_instr_i.rs1] != CSR) - forward_rs1 = 1'b1; - else // the operand is not available -> stall - stall = 1'b1; - - end - - if (rd_clobber_i[issue_instr_i.rs2] != NONE) begin - // the operand is available, forward it - if (rs2_valid_i && rd_clobber_i[issue_instr_i.rs2] != CSR) - forward_rs2 = 1'b1; - else // the operand is not available -> stall - stall = 1'b1; - end - end - // Forwarding/Output MUX - always_comb begin : forwarding_operand_select - // default is regfile - operand_a_n = operand_a_regfile; - operand_b_n = operand_b_regfile; - // immediates are the third operands in the store case - imm_n = issue_instr_i.result; - trans_id_n = issue_instr_i.trans_id; - fu_n = issue_instr_i.fu; - operator_n = issue_instr_i.op; - // or should we forward - if (forward_rs1) begin - operand_a_n = rs1_i; - end - - if (forward_rs2) begin - operand_b_n = rs2_i; - end - - // use the PC as operand a - if (issue_instr_i.use_pc) begin - operand_a_n = issue_instr_i.pc; - end - - // use the zimm as operand a - if (issue_instr_i.use_zimm) begin - // zero extend operand a - operand_a_n = {52'b0, issue_instr_i.rs1[4:0]}; - end - // or is it an immediate (including PC), this is not the case for a store and control flow instructions - if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW)) begin - operand_b_n = issue_instr_i.result; - end - end - // FU select, assert the correct valid out signal (in the next cycle) - always_comb begin : unit_valid - alu_valid_n = 1'b0; - lsu_valid_n = 1'b0; - mult_valid_n = 1'b0; - csr_valid_n = 1'b0; - branch_valid_n = 1'b0; - // Exception pass through: - // If an exception has occurred simply pass it through - // we do not want to issue this instruction - if (~issue_instr_i.ex.valid && issue_instr_valid_i && issue_ack_o) begin - case (issue_instr_i.fu) - ALU: - alu_valid_n = 1'b1; - CTRL_FLOW: - branch_valid_n = 1'b1; - MULT: - mult_valid_n = 1'b1; - LOAD, STORE: - lsu_valid_n = 1'b1; - CSR: - csr_valid_n = 1'b1; - default:; - endcase - end - // if we got a flush request, de-assert the valid flag, otherwise we will start this - // functional unit with the wrong inputs - if (flush_i) begin - alu_valid_n = 1'b0; - lsu_valid_n = 1'b0; - mult_valid_n = 1'b0; - csr_valid_n = 1'b0; - branch_valid_n = 1'b0; - end - end - // ---------------------- // Integer Register File // ---------------------- + logic [1:0][63:0] rdata; + logic [1:0][4:0] raddr_pack; + + // pack signals + logic [NR_COMMIT_PORTS-1:0][4:0] waddr_pack; + logic [NR_COMMIT_PORTS-1:0][63:0] wdata_pack; + logic [NR_COMMIT_PORTS-1:0] we_pack; + assign raddr_pack = {issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; + assign waddr_pack = {waddr_i[1], waddr_i[0]}; + assign wdata_pack = {wdata_i[1], wdata_i[0]}; + assign we_pack = {we_gpr_i[1], we_gpr_i[0]}; + ariane_regfile #( - .DATA_WIDTH ( 64 ) - ) regfile_i ( - // Clock and Reset - .clk ( clk_i ), - .rst_n ( rst_ni ), - .test_en_i ( 1'b0 ), - - .raddr_a_i ( issue_instr_i.rs1[4:0] ), - .rdata_a_o ( operand_a_regfile ), - - .raddr_b_i ( issue_instr_i.rs2[4:0] ), - .rdata_b_o ( operand_b_regfile ), - - .waddr_a_i ( waddr_i[0] ), - .wdata_a_i ( wdata_i[0] ), - .we_a_i ( we_i[0] ), - - .waddr_b_i ( waddr_i[1] ), - .wdata_b_i ( wdata_i[1] ), - .we_b_i ( we_i[1] ) + .DATA_WIDTH ( 64 ), + .NR_READ_PORTS ( 2 ), + .NR_WRITE_PORTS ( NR_COMMIT_PORTS ), + .ZERO_REG_ZERO ( 1 ) + ) i_ariane_regfile ( + .test_en_i ( 1'b0 ), + .raddr_i ( raddr_pack ), + .rdata_o ( rdata ), + .waddr_i ( waddr_pack ), + .wdata_i ( wdata_pack ), + .we_i ( we_pack ), + .* ); + // ----------------------------- + // Floating-Point Register File + // ----------------------------- + logic [2:0][FLEN-1:0] fprdata; + + // pack signals + logic [2:0][4:0] fp_raddr_pack; + logic [NR_COMMIT_PORTS-1:0][63:0] fp_wdata_pack; + + generate + if (FP_PRESENT) begin : float_regfile_gen + assign fp_raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; + assign fp_wdata_pack = {wdata_i[1][FLEN-1:0], wdata_i[0][FLEN-1:0]}; + + ariane_regfile #( + .DATA_WIDTH ( FLEN ), + .NR_READ_PORTS ( 3 ), + .NR_WRITE_PORTS ( NR_COMMIT_PORTS ), + .ZERO_REG_ZERO ( 0 ) + ) i_ariane_fp_regfile ( + .test_en_i ( 1'b0 ), + .raddr_i ( fp_raddr_pack ), + .rdata_o ( fprdata ), + .waddr_i ( waddr_pack ), + .wdata_i ( wdata_pack ), + .we_i ( we_fpr_i ), + .* + ); + end else begin : no_fpr_gen + assign fprdata = '{default: '0}; + end + endgenerate + + assign operand_a_regfile = is_rs1_fpr(issue_instr_i.op) ? fprdata[0] : rdata[0]; + assign operand_b_regfile = is_rs2_fpr(issue_instr_i.op) ? fprdata[1] : rdata[1]; + assign operand_c_regfile = fprdata[2]; + // ---------------------- // Registers (ID <-> EX) // ---------------------- @@ -309,6 +405,9 @@ module issue_read_operands #( alu_valid_q <= 1'b0; branch_valid_q <= 1'b0; mult_valid_q <= 1'b0; + fpu_valid_q <= 1'b0; + fpu_fmt_q <= 2'b0; + fpu_rm_q <= 3'b0; lsu_valid_q <= 1'b0; csr_valid_q <= 1'b0; fu_q <= NONE; @@ -324,6 +423,9 @@ module issue_read_operands #( alu_valid_q <= alu_valid_n; branch_valid_q <= branch_valid_n; mult_valid_q <= mult_valid_n; + fpu_valid_q <= fpu_valid_n; + fpu_fmt_q <= fpu_fmt_n; + fpu_rm_q <= fpu_rm_n; lsu_valid_q <= lsu_valid_n; csr_valid_q <= csr_valid_n; fu_q <= fu_n; diff --git a/src/issue_stage.sv b/src/issue_stage.sv index 793aedaa5..1cc08afc4 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -16,9 +16,9 @@ import ariane_pkg::*; module issue_stage #( - parameter int unsigned NR_ENTRIES = 8, - parameter int unsigned NR_WB_PORTS = 4, - parameter int unsigned NR_COMMIT_PORTS = 2 + parameter int unsigned NR_ENTRIES = 8, + parameter int unsigned NR_WB_PORTS = 4, + parameter int unsigned NR_COMMIT_PORTS = 2 )( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -48,14 +48,17 @@ module issue_stage #( input logic lsu_ready_i, output logic lsu_valid_o, // branch prediction - input logic branch_ready_i, - output logic branch_valid_o, // use branch prediction unit + output logic branch_valid_o, // use branch prediction unit output branchpredict_sbe_t branch_predict_o, input logic mult_ready_i, output logic mult_valid_o, // Branch predict Out - input logic csr_ready_i, + input logic fpu_ready_i, + output logic fpu_valid_o, + output logic [1:0] fpu_fmt_o, // FP fmt field from instr. + output logic [2:0] fpu_rm_o, // FP rm field from instr. + output logic csr_valid_o, // write back port @@ -68,7 +71,8 @@ module issue_stage #( // commit port input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i, input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i, - input logic [NR_COMMIT_PORTS-1:0] we_i, + input logic [NR_COMMIT_PORTS-1:0] we_gpr_i, + input logic [NR_COMMIT_PORTS-1:0] we_fpr_i, output scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_o, input logic [NR_COMMIT_PORTS-1:0] commit_ack_i @@ -76,7 +80,8 @@ module issue_stage #( // --------------------------------------------------- // Scoreboard (SB) <-> Issue and Read Operands (IRO) // --------------------------------------------------- - fu_t [2**REG_ADDR_SIZE:0] rd_clobber_sb_iro; + fu_t [2**REG_ADDR_SIZE:0] rd_clobber_gpr_sb_iro; + fu_t [2**REG_ADDR_SIZE:0] rd_clobber_fpr_sb_iro; logic [REG_ADDR_SIZE-1:0] rs1_iro_sb; logic [63:0] rs1_sb_iro; @@ -86,6 +91,10 @@ module issue_stage #( logic [63:0] rs2_sb_iro; logic rs2_valid_iro_sb; + logic [REG_ADDR_SIZE-1:0] rs3_iro_sb; + logic [FLEN-1:0] rs3_sb_iro; + logic rs3_valid_iro_sb; + scoreboard_entry_t issue_instr_rename_sb; logic issue_instr_valid_rename_sb; logic issue_ack_sb_rename; @@ -117,35 +126,31 @@ module issue_stage #( .NR_ENTRIES (NR_ENTRIES ), .NR_WB_PORTS(NR_WB_PORTS) ) i_scoreboard ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .flush_unissued_instr_i ( flush_unissued_instr_i ), - .flush_i ( flush_i ), - .unresolved_branch_i ( 1'b0 ), + .unresolved_branch_i ( 1'b0 ), + .rd_clobber_gpr_o ( rd_clobber_gpr_sb_iro ), + .rd_clobber_fpr_o ( rd_clobber_fpr_sb_iro ), + .rs1_i ( rs1_iro_sb ), + .rs1_o ( rs1_sb_iro ), + .rs1_valid_o ( rs1_valid_sb_iro ), + .rs2_i ( rs2_iro_sb ), + .rs2_o ( rs2_sb_iro ), + .rs2_valid_o ( rs2_valid_iro_sb ), + .rs3_i ( rs3_iro_sb ), + .rs3_o ( rs3_sb_iro ), + .rs3_valid_o ( rs3_valid_iro_sb ), - .rd_clobber_o ( rd_clobber_sb_iro ), - .rs1_i ( rs1_iro_sb ), - .rs1_o ( rs1_sb_iro ), - .rs1_valid_o ( rs1_valid_sb_iro ), - .rs2_i ( rs2_iro_sb ), - .rs2_o ( rs2_sb_iro ), - .rs2_valid_o ( rs2_valid_iro_sb ), + .decoded_instr_i ( issue_instr_rename_sb ), + .decoded_instr_valid_i ( issue_instr_valid_rename_sb ), + .decoded_instr_ack_o ( issue_ack_sb_rename ), + .issue_instr_o ( issue_instr_sb_iro ), + .issue_instr_valid_o ( issue_instr_valid_sb_iro ), + .issue_ack_i ( issue_ack_iro_sb ), - .commit_instr_o ( commit_instr_o ), - .commit_ack_i ( commit_ack_i ), - - .decoded_instr_i ( issue_instr_rename_sb ), - .decoded_instr_valid_i ( issue_instr_valid_rename_sb ), - .decoded_instr_ack_o ( issue_ack_sb_rename ), - - .issue_instr_o ( issue_instr_sb_iro ), - .issue_instr_valid_o ( issue_instr_valid_sb_iro ), - .issue_ack_i ( issue_ack_iro_sb ), - .resolved_branch_i ( resolved_branch_i ), - .trans_id_i ( trans_id_i ), - .wbdata_i ( wbdata_i ), - .ex_i ( ex_ex_i ), - .wb_valid_i ( wb_valid_i ) + .resolved_branch_i ( resolved_branch_i ), + .trans_id_i ( trans_id_i ), + .wbdata_i ( wbdata_i ), + .ex_i ( ex_ex_i ), + .* ); // --------------------------------------------------------- @@ -162,7 +167,11 @@ module issue_stage #( .rs2_o ( rs2_iro_sb ), .rs2_i ( rs2_sb_iro ), .rs2_valid_i ( rs2_valid_iro_sb ), - .rd_clobber_i ( rd_clobber_sb_iro ), + .rs3_o ( rs3_iro_sb ), + .rs3_i ( rs3_sb_iro ), + .rs3_valid_i ( rs3_valid_iro_sb ), + .rd_clobber_gpr_i ( rd_clobber_gpr_sb_iro ), + .rd_clobber_fpr_i ( rd_clobber_fpr_sb_iro ), .* ); diff --git a/src/load_unit.sv b/src/load_unit.sv index 5d299d279..39437fe68 100644 --- a/src/load_unit.sv +++ b/src/load_unit.sv @@ -299,17 +299,10 @@ module load_unit ( // prepare these signals for faster selection in the next cycle assign signed_d = load_data_d.operator inside {LW, LH, LB}; - assign fp_sign_d = 1'b0; - assign idx_d = (load_data_d.operator inside {LW}) ? load_data_d.address_offset + 3 : - (load_data_d.operator inside {LH}) ? load_data_d.address_offset + 1 : - load_data_d.address_offset; - - // use this with FP support: - // assign signed_d = load_data_d.operator inside {LW, LH, LB}; - // assign fp_sign_d = load_data_d.operator inside {FLW, FLH, FLB}; - // assign idx_d = (load_data_d.operator inside {LW, FLW}) ? load_data_d.address_offset + 3 : - // (load_data_d.operator inside {LH, FLH}) ? load_data_d.address_offset + 1 : - // load_data_d.address_offset; + assign fp_sign_d = load_data_d.operator inside {FLW, FLH, FLB}; + assign idx_d = (load_data_d.operator inside {LW, FLW}) ? load_data_d.address_offset + 3 : + (load_data_d.operator inside {LH, FLH}) ? load_data_d.address_offset + 1 : + load_data_d.address_offset; assign sign_bits = { req_port_i.data_rdata[63], @@ -328,25 +321,13 @@ module load_unit ( // result mux always_comb begin unique case (load_data_q.operator) - LW, LWU: begin - result_o = {{32{sign_bit}}, shifted_data[31:0]}; - end - LH, LHU: result_o = {{48{sign_bit}}, shifted_data[15:0]}; - LB, LBU: result_o = {{56{sign_bit}}, shifted_data[7:0]}; + LW, LWU, FLW: result_o = {{32{sign_bit}}, shifted_data[31:0]}; + LH, LHU, FLH: result_o = {{48{sign_bit}}, shifted_data[15:0]}; + LB, LBU, FLB: result_o = {{56{sign_bit}}, shifted_data[7:0]}; default: result_o = shifted_data; endcase end - // use this with FP support: - // always_comb begin - // unique case (load_data_q.operator) - // LW, LWU, FLW: result_o = {{32{sign_bit}}, shifted_data[31:0]}; - // LH, LHU, FLH: result_o = {{48{sign_bit}}, shifted_data[15:0]}; - // LB, LBU, FLB: result_o = {{56{sign_bit}}, shifted_data[7:0]}; - // default: result_o = shifted_data; - // endcase - // end - always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs if (~rst_ni) begin idx_q <= 0; diff --git a/src/lsu.sv b/src/lsu.sv index ef2d8fbfe..1f668ff87 100644 --- a/src/lsu.sv +++ b/src/lsu.sv @@ -280,7 +280,7 @@ module lsu #( if (lsu_ctrl.valid) begin case (lsu_ctrl.operator) // double word - LD, SD, + LD, SD, FLD, FSD, AMO_LRD, AMO_SCD, AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD, AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND, @@ -290,7 +290,7 @@ module lsu #( end end // word - LW, LWU, SW, + LW, LWU, SW, FLW, FSW, AMO_LRW, AMO_SCW, AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW, AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW, @@ -300,7 +300,7 @@ module lsu #( end end // half word - LH, LHU, SH: begin + LH, LHU, SH, FLH, FSH: begin if (lsu_ctrl.vaddr[0] != 1'b0) begin data_misaligned = 1'b1; end @@ -366,6 +366,7 @@ module lsu #( .ready_o ( lsu_ready_o ), .* ); + endmodule // ------------------ diff --git a/src/lsu_arbiter.sv b/src/lsu_arbiter.sv index 0f4c73df5..7dafa1466 100644 --- a/src/lsu_arbiter.sv +++ b/src/lsu_arbiter.sv @@ -40,9 +40,10 @@ module lsu_arbiter ( // RR fashion. FIFOs need to be 2 deep in order to unconditionally accept loads and stores since we can // have a maximum of 2 outstanding loads. // if there are valid elements in the fifos, the unit posts the result on its output ports and expects it - // to be consumed unconditionally + // to be consumed unconditionally - localparam int DEPTH = 2; + // Important: this needs to be greater than 2 to unconditionally acept incoming requests + localparam int DEPTH = 4; typedef struct packed { logic [TRANS_ID_BITS-1:0] trans_id; @@ -64,9 +65,9 @@ module lsu_arbiter ( assign ld_in.result = ld_result_i; assign ld_in.ex = ld_ex_i; - assign trans_id_o = (idx) ? st_out.trans_id : ld_out.trans_id; - assign result_o = (idx) ? st_out.result : ld_out.result; - assign ex_o = (idx) ? st_out.ex : ld_out.ex; + assign trans_id_o = (idx) ? st_out.trans_id : ld_out.trans_id; + assign result_o = (idx) ? st_out.result : ld_out.result; + assign ex_o = (idx) ? st_out.ex : ld_out.ex; // round robin with "lookahead" for 2 requesters rrarbiter #( @@ -85,7 +86,7 @@ module lsu_arbiter ( fifo_v2 #( .dtype ( fifo_t ), .DEPTH ( DEPTH ) - ) i_ld_fifo ( + ) i_ld_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_i ( flush_i ), @@ -98,12 +99,12 @@ module lsu_arbiter ( .push_i ( ld_valid_i ), .data_o ( ld_out ), .pop_i ( ld_ren ) - ); + ); fifo_v2 #( .dtype ( fifo_t ), .DEPTH ( DEPTH ) - ) i_st_fifo ( + ) i_st_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_i ( flush_i ), @@ -116,7 +117,7 @@ module lsu_arbiter ( .push_i ( st_valid_i ), .data_o ( st_out ), .pop_i ( st_ren ) - ); + ); `ifndef SYNTHESIS diff --git a/src/re_name.sv b/src/re_name.sv index 93def90f9..4b2e22509 100644 --- a/src/re_name.sv +++ b/src/re_name.sv @@ -41,34 +41,48 @@ module re_name ( // keep track of re-naming data structures logic [31:0] re_name_table_gpr_n, re_name_table_gpr_q; + logic [31:0] re_name_table_fpr_n, re_name_table_fpr_q; // ------------------- // Re-naming // ------------------- always_comb begin // MSB of the renamed source register addresses - logic name_bit_rs1, name_bit_rs2, name_bit_rd; + logic name_bit_rs1, name_bit_rs2, name_bit_rs3, name_bit_rd; // default assignments re_name_table_gpr_n = re_name_table_gpr_q; + re_name_table_fpr_n = re_name_table_fpr_q; issue_instr_o = issue_instr_i; if (issue_ack_i && !flush_unissied_instr_i) begin // if we acknowledge the instruction tic the corresponding destination register - re_name_table_gpr_n[issue_instr_i.rd] = re_name_table_gpr_q[issue_instr_i.rd] ^ 1'b1; + if (is_rd_fpr(issue_instr_i.op)) + re_name_table_fpr_n[issue_instr_i.rd] = re_name_table_fpr_q[issue_instr_i.rd] ^ 1'b1; + else + re_name_table_gpr_n[issue_instr_i.rd] = re_name_table_gpr_q[issue_instr_i.rd] ^ 1'b1; end // select name bit according to the register file used for source operands - name_bit_rs1 = re_name_table_gpr_q[issue_instr_i.rs1]; - name_bit_rs2 = re_name_table_gpr_q[issue_instr_i.rs2]; + name_bit_rs1 = is_rs1_fpr(issue_instr_i.op) ? re_name_table_fpr_q[issue_instr_i.rs1] + : re_name_table_gpr_q[issue_instr_i.rs1]; + name_bit_rs2 = is_rs2_fpr(issue_instr_i.op) ? re_name_table_fpr_q[issue_instr_i.rs2] + : re_name_table_gpr_q[issue_instr_i.rs2]; + // rs3 is only used in certain FP operations and held like an immediate + name_bit_rs3 = re_name_table_fpr_q[issue_instr_i.result[4:0]]; // make sure only the addr bits are read // select name bit according to the state it will have after renaming - name_bit_rd = re_name_table_gpr_q[issue_instr_i.rd] ^ (issue_instr_i.rd != '0); // don't rename x0 + name_bit_rd = is_rd_fpr(issue_instr_i.op) ? re_name_table_fpr_q[issue_instr_i.rd] ^ 1'b1 + : re_name_table_gpr_q[issue_instr_i.rd] ^ (issue_instr_i.rd != '0); // don't rename x0 // re-name the source registers issue_instr_o.rs1 = { ENABLE_RENAME & name_bit_rs1, issue_instr_i.rs1[4:0] }; issue_instr_o.rs2 = { ENABLE_RENAME & name_bit_rs2, issue_instr_i.rs2[4:0] }; + // re-name the third operand in imm if it's actually an operand + if (is_imm_fpr(issue_instr_i.op)) + issue_instr_o.result = { ENABLE_RENAME & name_bit_rs3, issue_instr_i.result[4:0]}; + // re-name the destination register issue_instr_o.rd = { ENABLE_RENAME & name_bit_rd, issue_instr_i.rd[4:0] }; @@ -78,6 +92,7 @@ module re_name ( // Handle flushes if (flush_i) begin re_name_table_gpr_n = '0; + re_name_table_fpr_n = '0; end end @@ -88,8 +103,10 @@ module re_name ( always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin re_name_table_gpr_q <= '0; + re_name_table_fpr_q <= '0; end else begin re_name_table_gpr_q <= re_name_table_gpr_n; + re_name_table_fpr_q <= re_name_table_fpr_n; end end endmodule diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 5ed3e587e..990c59858 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -25,7 +25,8 @@ module scoreboard #( input logic flush_i, // flush whole scoreboard input logic unresolved_branch_i, // we have an unresolved branch // list of clobbered registers to issue stage - output fu_t [2**REG_ADDR_SIZE:0] rd_clobber_o, + output fu_t [2**REG_ADDR_SIZE:0] rd_clobber_gpr_o, + output fu_t [2**REG_ADDR_SIZE:0] rd_clobber_fpr_o, // regfile like interface to operand read stage input logic [REG_ADDR_SIZE-1:0] rs1_i, @@ -36,12 +37,16 @@ module scoreboard #( output logic [63:0] rs2_o, output logic rs2_valid_o, + input logic [REG_ADDR_SIZE-1:0] rs3_i, + output logic [FLEN-1:0] rs3_o, + output logic rs3_valid_o, + // advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer output scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_o, input logic [NR_COMMIT_PORTS-1:0] commit_ack_i, - // instruction to put on top of scoreboard e.g. : top pointer - // we can always put this instruction to the to p unless we signal with asserted full_o + // instruction to put on top of scoreboard e.g.: top pointer + // we can always put this instruction to the top unless we signal with asserted full_o input scoreboard_entry_t decoded_instr_i, input logic decoded_instr_valid_i, output logic decoded_instr_ack_o, @@ -66,9 +71,9 @@ module scoreboard #( scoreboard_entry_t sbe; // this is the score board entry we will send to ex } mem_q [NR_ENTRIES-1:0], mem_n [NR_ENTRIES-1:0]; - logic [$clog2(NR_ENTRIES)-1:0] issue_cnt_n, issue_cnt_q; - logic [$clog2(NR_ENTRIES)-1:0] issue_pointer_n, issue_pointer_q; - logic [$clog2(NR_ENTRIES)-1:0] commit_pointer_n, commit_pointer_q; + logic [BITS_ENTRIES-1:0] issue_cnt_n, issue_cnt_q; + logic [BITS_ENTRIES-1:0] issue_pointer_n, issue_pointer_q; + logic [BITS_ENTRIES-1:0] commit_pointer_n, commit_pointer_q; logic issue_full; // the issue queue is full don't issue any new instructions @@ -76,7 +81,7 @@ module scoreboard #( // output commit instruction directly always_comb begin : commit_ports - for (logic [$clog2(NR_ENTRIES)-1:0] i = 0; i < NR_COMMIT_PORTS; i++) + for (logic [BITS_ENTRIES-1:0] i = 0; i < NR_COMMIT_PORTS; i++) commit_instr_o[i] = mem_q[commit_pointer_q + i].sbe; end @@ -94,8 +99,8 @@ module scoreboard #( // maintain a FIFO with issued instructions // keep track of all issued instructions always_comb begin : issue_fifo - automatic logic [$clog2(NR_ENTRIES)-1:0] issue_cnt; - automatic logic [$clog2(NR_ENTRIES)-1:0] commit_pointer; + automatic logic [BITS_ENTRIES-1:0] issue_cnt; + automatic logic [BITS_ENTRIES-1:0] commit_pointer; commit_pointer = commit_pointer_q; issue_cnt = issue_cnt_q; @@ -124,13 +129,13 @@ module scoreboard #( mem_n[trans_id_i[i]].sbe.valid = 1'b1; mem_n[trans_id_i[i]].sbe.result = wbdata_i[i]; // save the target address of a branch (needed for debug in commit stage) - if (resolved_branch_i.valid) begin - mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address; - end + mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address; // write the exception back if it is valid - if (ex_i[i].valid) begin + if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i]; - end + // write the fflags back from the FPU (exception valid is never set), leave tval intact + else if (mem_n[trans_id_i[i]].sbe.fu inside {FPU, FPU_VEC}) + mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause; end end @@ -138,7 +143,7 @@ module scoreboard #( // Commit Port // ------------ // we've got an acknowledge from commit - for (logic [$clog2(NR_ENTRIES)-1:0] i = 0; i < NR_COMMIT_PORTS; i++) begin + for (logic [BITS_ENTRIES-1:0] i = 0; i < NR_COMMIT_PORTS; i++) begin if (commit_ack_i[i]) begin // decrease the issue counter issue_cnt--; @@ -149,6 +154,7 @@ module scoreboard #( commit_pointer++; end end + // ------ // Flush // ------ @@ -164,6 +170,7 @@ module scoreboard #( commit_pointer = '0; end end + // update issue counter issue_cnt_n = issue_cnt; // update commit potiner @@ -175,16 +182,20 @@ module scoreboard #( // ------------------- // rd_clobber output: output currently clobbered destination registers always_comb begin : clobber_output - rd_clobber_o = '{default: NONE}; + rd_clobber_gpr_o = '{default: NONE}; + rd_clobber_fpr_o = '{default: NONE}; // check for all valid entries and set the clobber register accordingly for (int unsigned i = 0; i < NR_ENTRIES; i++) begin if (mem_q[i].issued) begin // output the functional unit which is going to clobber this register - rd_clobber_o[mem_q[i].sbe.rd] = mem_q[i].sbe.fu; + if (is_rd_fpr(mem_q[i].sbe.op)) + rd_clobber_fpr_o[mem_q[i].sbe.rd] = mem_q[i].sbe.fu; + else + rd_clobber_gpr_o[mem_q[i].sbe.rd] = mem_q[i].sbe.fu; end end - // the zero register is always free - rd_clobber_o[0] = NONE; + // the gpr zero register is always free + rd_clobber_gpr_o[0] = NONE; end // ---------------------------------- @@ -194,20 +205,26 @@ module scoreboard #( always_comb begin : read_operands rs1_o = 64'b0; rs2_o = 64'b0; + rs3_o = '0; rs1_valid_o = 1'b0; rs2_valid_o = 1'b0; + rs3_valid_o = 1'b0; for (int unsigned i = 0; i < NR_ENTRIES; i++) begin // only consider this entry if it is valid if (mem_q[i].issued) begin // look at the appropriate fields and look whether there was an - // instruction that wrote the rd field before, first for RS1 and then for RS2 - if (mem_q[i].sbe.rd == rs1_i) begin + // instruction that wrote the rd field before, first for RS1 and then for RS2, then for RS3 + // we check the type of the stored result register file against issued register file + if ((mem_q[i].sbe.rd == rs1_i) && (is_rd_fpr(mem_q[i].sbe.op) == is_rs1_fpr(issue_instr_o.op))) begin rs1_o = mem_q[i].sbe.result; rs1_valid_o = mem_q[i].sbe.valid; - end else if (mem_q[i].sbe.rd == rs2_i) begin + end else if ((mem_q[i].sbe.rd == rs2_i) && (is_rd_fpr(mem_q[i].sbe.op) == is_rs2_fpr(issue_instr_o.op))) begin rs2_o = mem_q[i].sbe.result; rs2_valid_o = mem_q[i].sbe.valid; + end else if ((mem_q[i].sbe.rd == rs3_i) && (is_rd_fpr(mem_q[i].sbe.op) == is_imm_fpr(issue_instr_o.op))) begin + rs3_o = mem_q[i].sbe.result; + rs3_valid_o = mem_q[i].sbe.valid; end end end @@ -218,22 +235,30 @@ module scoreboard #( // provide a direct combinational path from WB a.k.a forwarding // make sure that we are not forwarding a result that got an exception for (int unsigned j = 0; j < NR_WB_PORTS; j++) begin - if (mem_q[trans_id_i[j]].sbe.rd == rs1_i && wb_valid_i[j] && ~ex_i[j].valid) begin + if (mem_q[trans_id_i[j]].sbe.rd == rs1_i && wb_valid_i[j] && ~ex_i[j].valid + && (is_rd_fpr(mem_q[trans_id_i[j]].sbe.op) == is_rs1_fpr(issue_instr_o.op))) begin rs1_o = wbdata_i[j]; rs1_valid_o = wb_valid_i[j]; break; end - if (mem_q[trans_id_i[j]].sbe.rd == rs2_i && wb_valid_i[j] && ~ex_i[j].valid) begin + if (mem_q[trans_id_i[j]].sbe.rd == rs2_i && wb_valid_i[j] && ~ex_i[j].valid + && (is_rd_fpr(mem_q[trans_id_i[j]].sbe.op) == is_rs2_fpr(issue_instr_o.op))) begin rs2_o = wbdata_i[j]; rs2_valid_o = wb_valid_i[j]; break; end + if (mem_q[trans_id_i[j]].sbe.rd == rs3_i && wb_valid_i[j] && ~ex_i[j].valid + && (is_rd_fpr(mem_q[trans_id_i[j]].sbe.op) == is_imm_fpr(issue_instr_o.op))) begin + rs3_o = wbdata_i[j]; + rs3_valid_o = wb_valid_i[j]; + break; + end end // make sure we didn't read the zero register - if (rs1_i == '0) + if (rs1_i == '0 && ~is_rs1_fpr(issue_instr_o.op)) // only GPR reg0 is 0 rs1_valid_o = 1'b0; - if (rs2_i == '0) + if (rs2_i == '0 && ~is_rs2_fpr(issue_instr_o.op)) // only GPR reg0 is 0 rs2_valid_o = 1'b0; end @@ -254,12 +279,12 @@ module scoreboard #( `ifndef SYNTHESIS `ifndef verilator initial begin - assert (NR_ENTRIES == 2**$clog2(NR_ENTRIES)) else $fatal("Scoreboard size needs to be a power of two."); + assert (NR_ENTRIES == 2**BITS_ENTRIES) else $fatal("Scoreboard size needs to be a power of two."); end // assert that zero is never set assert property ( - @(posedge clk_i) rst_ni |-> (rd_clobber_o[0] == NONE)) + @(posedge clk_i) rst_ni |-> (rd_clobber_gpr_o[0] == NONE)) else $error ("RD 0 should not bet set"); // assert that we never acknowledge a commit if the instruction is not valid assert property ( diff --git a/src/tech_cells_generic b/src/tech_cells_generic new file mode 160000 index 000000000..ffe7818dc --- /dev/null +++ b/src/tech_cells_generic @@ -0,0 +1 @@ +Subproject commit ffe7818dc24eba29cf3634d404d1b3b85034272b diff --git a/src/util/cluster_clock_gating.sv b/src/util/cluster_clock_gating.sv deleted file mode 100644 index f2b10b29f..000000000 --- a/src/util/cluster_clock_gating.sv +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// -// Behavioural GLock Gating -// File: cluster_clock_gating.sv -// Author: ? -// Date: ? - -module cluster_clock_gating ( - input logic clk_i, - input logic en_i, - input logic test_en_i, - output logic clk_o - ); - -`ifdef PULP_FPGA_EMUL - // no clock gates in FPGA flow - assign clk_o = clk_i; -`elsif verilator - assign clk_o = clk_i; -`else - logic clk_en; - - always_latch - begin - if (clk_i == 1'b0) - clk_en <= en_i | test_en_i; - end - - assign clk_o = clk_i & clk_en; -`endif - -endmodule diff --git a/src/util/find_first_one.sv b/src/util/find_first_one.sv new file mode 100644 index 000000000..53653f20d --- /dev/null +++ b/src/util/find_first_one.sv @@ -0,0 +1,85 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. + + +/// A leading-one finder / leading zero counter. +/// Set FLIP to 0 for find_first_one => first_one_o is the index of the first one (from the LSB) +/// Set FLIP to 1 for leading zero counter => first_one_o is the number of leading zeroes (from the MSB) +module find_first_one #( + /// The width of the input vector. + parameter int WIDTH = -1, + parameter int FLIP = 0 +)( + input logic [WIDTH-1:0] in_i, + output logic [$clog2(WIDTH)-1:0] first_one_o, + output logic no_ones_o +); + + localparam int NUM_LEVELS = $clog2(WIDTH); + + // pragma translate_off + initial begin + assert(WIDTH >= 0); + end + // pragma translate_on + + logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut; + logic [2**NUM_LEVELS-1:0] sel_nodes; + logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes; + + logic [WIDTH-1:0] in_tmp; + + for (genvar i = 0; i < WIDTH; i++) begin + assign in_tmp[i] = FLIP ? in_i[WIDTH-1-i] : in_i[i]; + end + + for (genvar j = 0; j < WIDTH; j++) begin + assign index_lut[j] = j; + end + + for (genvar level = 0; level < NUM_LEVELS; level++) begin + + if (level < NUM_LEVELS-1) begin + for (genvar l = 0; l < 2**level; l++) begin + assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1]; + assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? + index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1]; + end + end + + if (level == NUM_LEVELS-1) begin + for (genvar k = 0; k < 2**level; k++) begin + // if two successive indices are still in the vector... + if (k * 2 < WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1]; + assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1]; + end + // if only the first index is still in the vector... + if (k * 2 == WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2]; + assign index_nodes[2**level-1+k] = index_lut[k*2]; + end + // if index is out of range + if (k * 2 > WIDTH-1) begin + assign sel_nodes[2**level-1+k] = 1'b0; + assign index_nodes[2**level-1+k] = '0; + end + end + end + end + + assign first_one_o = NUM_LEVELS > 0 ? index_nodes[0] : '0; + assign no_ones_o = NUM_LEVELS > 0 ? ~sel_nodes[0] : '1; + +endmodule diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index 144f275dd..d976179c6 100644 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -19,9 +19,12 @@ class instruction_trace_item; scoreboard_entry_t sbe; logic [31:0] pc; logic [31:0] instr; - logic [63:0] reg_file [32]; + logic [63:0] gp_reg_file [32]; + logic [63:0] fp_reg_file [32]; logic [4:0] read_regs [$]; + logic read_fpr [$]; logic [4:0] result_regs [$]; + logic result_fpr [$]; logic [63:0] imm; logic [63:0] result; logic [63:0] paddr; @@ -31,14 +34,15 @@ class instruction_trace_item; logic [4:0] rs1, rs2, rs3, rd; // constructor creating a new instruction trace item, e.g.: a single instruction with all relevant information - function new (time simtime, longint unsigned cycle, scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] reg_file [32], - logic [63:0] result, logic [63:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, branchpredict_t bp); + function new (time simtime, longint unsigned cycle, scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] gp_reg_file [32], + logic [63:0] fp_reg_file [32], logic [63:0] result, logic [63:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, branchpredict_t bp); this.simtime = simtime; this.cycle = cycle; this.pc = sbe.pc; this.sbe = sbe; this.instr = instr; - this.reg_file = reg_file; + this.gp_reg_file = gp_reg_file; + this.fp_reg_file = fp_reg_file; this.result = result; this.paddr = paddr; this.bp = bp; @@ -48,7 +52,8 @@ class instruction_trace_item; this.rs3 = instr[31:27]; this.rd = sbe.rd[4:0]; endfunction - // convert register address to ABI compatible form + + // convert gp register address to ABI compatible form function string regAddrToStr(logic [5:0] addr); case (addr[4:0]) 0: return "x0"; @@ -63,9 +68,64 @@ class instruction_trace_item; default: return $sformatf("s%0d", (addr[4:0] - 16)); endcase endfunction + // convert fp register address to ABI compatible form + function string fpRegAddrToStr(logic [5:0] addr); + case (addr) inside + [0:7] : return $sformatf("ft%0d", addr); + [8:9] : return $sformatf("fs%0d", (addr - 8)); + [10:17] : return $sformatf("fa%0d", (addr - 10)); + [18:27] : return $sformatf("fs%0d", (addr - 16)); + [28:31] : return $sformatf("ft%0d", (addr - 20)); + endcase + endfunction + + function string fpFmtToStr(logic [1:0] fmt); + case (fmt) + 2'b00 : return "s"; + 2'b01 : return "d"; + 2'b10 : return "h"; + 2'b11 : return "b"; + default : return "XX"; + endcase + endfunction + + function string fmvFpFmtToStr(logic [1:0] fmt); + case (fmt) + 2'b00 : return "w"; + 2'b01 : return "d"; + 2'b10 : return "h"; + 2'b11 : return "b"; + default : return "XX"; + endcase + endfunction + + function string intFmtToStr(logic [1:0] ifmt); + case (ifmt) + 2'b00 : return "w"; + 2'b01 : return "wu"; + 2'b10 : return "l"; + 2'b11 : return "lu"; + default : return "XX"; + endcase + endfunction + + function string fpRmToStr(logic [2:0] rm); + case (rm) + 3'b000 : return "rne"; + 3'b001 : return "rtz"; + 3'b010 : return "rdn"; + 3'b011 : return "rup"; + 3'b100 : return "rmm"; + 3'b111 : return "dyn"; // what is this called in rv binutils? + default: return "INVALID"; + endcase + endfunction function string csrAddrToStr(logic [11:0] addr); case (addr) + riscv::CSR_FFLAGS: return "fflags"; + riscv::CSR_FRM: return "frm"; + riscv::CSR_FCSR: return "fcsr"; riscv::CSR_SSTATUS: return "sstatus"; riscv::CSR_SIE: return "sie"; riscv::CSR_STVEC: return "stvec"; @@ -120,7 +180,7 @@ class instruction_trace_item; function string printInstr(); string s; - casex (instr) + case (instr) inside // Aliases 32'h00_00_00_13: s = this.printMnemonic("nop"); // Regular opcodes @@ -174,6 +234,33 @@ class instruction_trace_item; INSTR_SRLW: s = this.printRInstr("srlw"); INSTR_SRAW: s = this.printRInstr("sraw"); INSTR_MULW: s = this.printMulInstr(1'b1); + // FP + INSTR_FMADD: s = this.printR4Instr("fmadd"); + INSTR_FMSUB: s = this.printR4Instr("fmsub"); + INSTR_FNSMSUB: s = this.printR4Instr("fnmsub"); + INSTR_FNMADD: s = this.printR4Instr("fnmadd"); + + INSTR_FADD: s = this.printRFBCInstr("fadd", 1'b1); + INSTR_FSUB: s = this.printRFBCInstr("fsub", 1'b1); + INSTR_FMUL: s = this.printRFInstr("fmul", 1'b1); + INSTR_FDIV: s = this.printRFInstr("fdiv", 1'b1); + INSTR_FSQRT: s = this.printRFInstr1Op("fsqrt", 1'b1); + INSTR_FSGNJ: s = this.printRFInstr("fsgnj", 1'b0); + INSTR_FSGNJN: s = this.printRFInstr("fsgnjn", 1'b0); + INSTR_FSGNJX: s = this.printRFInstr("fsgnjx", 1'b0); + INSTR_FMIN: s = this.printRFInstr("fmin", 1'b0); + INSTR_FMAX: s = this.printRFInstr("fmax", 1'b0); + INSTR_FLE: s = this.printRFInstr("fle", 1'b0); + INSTR_FLT: s = this.printRFInstr("flt", 1'b0); + INSTR_FEQ: s = this.printRFInstr("feq", 1'b0); + + INSTR_FCLASS: s = this.printRFInstr1Op("fclass", 1'b0); + + INSTR_FCVT_F2F, + INSTR_FMV_F2X, + INSTR_FMV_X2F, + INSTR_FCVT_F2I, + INSTR_FCVT_I2F: s = this.printFpSpecialInstr(); // these are a mess to do nicely // FENCE INSTR_FENCE: s = this.printMnemonic("fence"); INSTR_FENCEI: s = this.printMnemonic("fence.i"); @@ -201,14 +288,16 @@ class instruction_trace_item; INSTR_WFI: s = this.printMnemonic("wfi"); INSTR_SFENCE: s = this.printMnemonic("sfence.vma"); // loads and stores - INSTR_LOAD: s = this.printLoadInstr(); - INSTR_STORE: s = this.printStoreInstr(); + INSTR_LOAD, + INSTR_LOAD_FP: s = this.printLoadInstr(); + INSTR_STORE, + INSTR_STORE_FP: s = this.printStoreInstr(); INSTR_AMO: s = this.printAMOInstr(); default: s = this.printMnemonic("INVALID"); endcase - s = $sformatf("%10t %10d %s %h %h %h %-36s", simtime, + s = $sformatf("%8dns %8d %s %h %h %h %-36s", simtime, cycle, priv_lvl, sbe.pc, @@ -223,23 +312,29 @@ class instruction_trace_item; // s); foreach (result_regs[i]) begin - if (result_regs[i] != 0) + if (result_fpr[i]) + s = $sformatf("%s %-4s:%16x", s, fpRegAddrToStr(result_regs[i]), this.result); + else if (result_regs[i] != 0) s = $sformatf("%s %-4s:%16x", s, regAddrToStr(result_regs[i]), this.result); end foreach (read_regs[i]) begin - if (read_regs[i] != 0) - s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), reg_file[read_regs[i]]); + if (read_fpr[i]) + s = $sformatf("%s %-4s:%16x", s, fpRegAddrToStr(read_regs[i]), fp_reg_file[read_regs[i]]); + else if (read_regs[i] != 0) + s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), gp_reg_file[read_regs[i]]); end - casex (instr) + case (instr) inside // check of the instrction was a load or store - INSTR_STORE: begin - logic [63:0] vaddress = reg_file[read_regs[1]] + this.imm; + INSTR_STORE, + INSTR_STORE_FP: begin + logic [63:0] vaddress = gp_reg_file[read_regs[1]] + this.imm; s = $sformatf("%s VA: %x PA: %x", s, vaddress, this.paddr); end - INSTR_LOAD: begin - logic [63:0] vaddress = reg_file[read_regs[0]] + this.imm; + INSTR_LOAD, + INSTR_LOAD_FP: begin + logic [63:0] vaddress = gp_reg_file[read_regs[0]] + this.imm; s = $sformatf("%s VA: %x PA: %x", s, vaddress, this.paddr); end endcase @@ -261,48 +356,131 @@ class instruction_trace_item; function string printRInstr(input string mnemonic); - result_regs.push_back(sbe.rd); - read_regs.push_back(sbe.rs1); - read_regs.push_back(sbe.rs2); + result_regs.push_back(rd); + result_fpr.push_back(1'b0); + read_regs.push_back(rs1); + read_fpr.push_back(1'b0); + read_regs.push_back(rs2); + read_fpr.push_back(1'b0); - return $sformatf("%-16s %s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2)); + return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), regAddrToStr(rs2)); endfunction // printRInstr + function string printRFBCInstr(input string mnemonic, input bit use_rnd); + + result_regs.push_back(rd); + result_fpr.push_back(is_rd_fpr(sbe.op)); + read_regs.push_back(rs2); + read_fpr.push_back(is_rs2_fpr(sbe.op)); + read_regs.push_back(sbe.result[4:0]); + read_fpr.push_back(is_imm_fpr(sbe.op)); + + if (use_rnd && instr[14:12]!=3'b111) + return $sformatf("%-12s %4s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2), is_imm_fpr(sbe.op)?fpRegAddrToStr(sbe.result[4:0]):regAddrToStr(sbe.result[4:0]), fpRmToStr(instr[14:12])); + else + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2), is_imm_fpr(sbe.op)?fpRegAddrToStr(sbe.result[4:0]):regAddrToStr(sbe.result[4:0])); + endfunction // printRFInstr + + function string printRFInstr(input string mnemonic, input bit use_rnd); + + result_regs.push_back(rd); + result_fpr.push_back(is_rd_fpr(sbe.op)); + read_regs.push_back(rs1); + read_fpr.push_back(is_rs1_fpr(sbe.op)); + read_regs.push_back(rs2); + read_fpr.push_back(is_rs2_fpr(sbe.op)); + + if (use_rnd && instr[14:12]!=3'b111) + return $sformatf("%-12s %4s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2), fpRmToStr(instr[14:12])); + else + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2)); + endfunction // printRFInstr + + function string printRFInstr1Op(input string mnemonic, input bit use_rnd); + + result_regs.push_back(rd); + result_fpr.push_back(is_rd_fpr(sbe.op)); + read_regs.push_back(rs1); + read_fpr.push_back(is_rs1_fpr(sbe.op)); + + if (use_rnd && instr[14:12]!=3'b111) + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), fpRmToStr(instr[14:12])); + else + return $sformatf("%-12s %4s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1)); + endfunction // printRFInstr1Op + + function string printR4Instr(input string mnemonic); + + result_regs.push_back(rd); + result_fpr.push_back(1'b1); + read_regs.push_back(rs1); + read_fpr.push_back(1'b1); + read_regs.push_back(rs2); + read_fpr.push_back(1'b1); + read_regs.push_back(rs3); + read_fpr.push_back(1'b1); + + return $sformatf("%-12s %4s, %s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), fpRegAddrToStr(rd), fpRegAddrToStr(rs1), fpRegAddrToStr(rs2), fpRegAddrToStr(instr[31:27]), fpRmToStr(instr[14:12])); + endfunction // printR4Instr + + function string printFpSpecialInstr(); + + result_regs.push_back(rd); + result_fpr.push_back(is_rd_fpr(sbe.op)); + read_regs.push_back(rs1); + read_fpr.push_back(is_rs1_fpr(sbe.op)); + + case (sbe.op) + FCVT_F2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), fpFmtToStr(instr[21:20])), fpRegAddrToStr(rd), fpRegAddrToStr(rs1), fpRmToStr(instr[14:12])); + FCVT_F2I : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1), fpRmToStr(instr[14:12])); + FCVT_I2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20])), fpRegAddrToStr(rd), regAddrToStr(rs1), fpRmToStr(instr[14:12])); + FMV_F2X : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1)); + FMV_X2F : return $sformatf("%-12s %4s, %s", $sformatf("fmv.%s.x", fmvFpFmtToStr(instr[26:25])), fpRegAddrToStr(rd), regAddrToStr(rs1)); + endcase + endfunction + function string printIInstr(input string mnemonic); - result_regs.push_back(sbe.rd); - read_regs.push_back(sbe.rs1); + result_regs.push_back(rd); + result_fpr.push_back(1'b0); + read_regs.push_back(rs1); + read_fpr.push_back(1'b0); - if (sbe.rs1 == 0) - return $sformatf("%-16s %s, %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); + if (rs1 == 0) + return $sformatf("%-12s %4s, %0d", mnemonic, regAddrToStr(rd), $signed(sbe.result)); - return $sformatf("%-16s %s, %s, %0d", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), $signed(sbe.result)); + return $sformatf("%-12s %4s, %s, %0d", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), $signed(sbe.result)); endfunction // printIInstr function string printIuInstr(input string mnemonic); - result_regs.push_back(sbe.rd); - read_regs.push_back(sbe.rs1); + result_regs.push_back(rd); + result_fpr.push_back(1'b0); + read_regs.push_back(rs1); + read_fpr.push_back(1'b0); - return $sformatf("%-16s %s, %s, 0x%0x", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), sbe.result); + return $sformatf("%-12s %4s, %s, 0x%0x", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), sbe.result); endfunction // printIuInstr function string printSBInstr(input string mnemonic); - read_regs.push_back(sbe.rs1); - read_regs.push_back(sbe.rs2); + read_regs.push_back(rs1); + read_fpr.push_back(1'b0); + read_regs.push_back(rs2); + read_fpr.push_back(1'b0); - if (sbe.rs2 == 0) - return $sformatf("%-16s %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), $signed(sbe.result)); + if (rs2 == 0) + return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(rs1), $signed(sbe.result)); else - return $sformatf("%-16s %s, %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2), $signed(sbe.result)); + return $sformatf("%-12s %4s, %s, pc + %0d", mnemonic, regAddrToStr(rs1), regAddrToStr(rs2), $signed(sbe.result)); endfunction // printIuInstr function string printUInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); + result_fpr.push_back(1'b0); - return $sformatf("%-16s %s, 0x%0h", mnemonic, regAddrToStr(sbe.rd), sbe.result[31:12]); + return $sformatf("%-12s %4s, 0x%0h", mnemonic, regAddrToStr(rd), sbe.result[31:12]); endfunction // printUInstr function string printJump(); @@ -329,58 +507,70 @@ class instruction_trace_item; function string printUJInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); + result_fpr.push_back(1'b0); // jump instruction - if (sbe.rd == 0) - return $sformatf("%-16s pc + %0d", mnemonic, $signed(sbe.result)); + if (rd == 0) + return $sformatf("%-12s pc + %0d", mnemonic, $signed(sbe.result)); else - return $sformatf("%-16s %s, pc + %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); + return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(rd), $signed(sbe.result)); endfunction // printUJInstr function string printCSRInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); + result_fpr.push_back(1'b0); if (instr[14] == 0) begin - read_regs.push_back(sbe.rs1); - if (sbe.rd != 0 && sbe.rs1 != 0) begin - return $sformatf("%-16s %s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + read_regs.push_back(rs1); + read_fpr.push_back(1'b0); + if (rd != 0 && rs1 != 0) begin + return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), csrAddrToStr(sbe.result[11:0])); // don't display instructions which write to zero - end else if (sbe.rd == 0) begin - return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); - end else if (sbe.rs1 == 0) begin - return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); + end else if (rd == 0) begin + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rs1), csrAddrToStr(sbe.result[11:0])); + end else if (rs1 == 0) begin + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rd), csrAddrToStr(sbe.result[11:0])); end end else begin - if (sbe.rd != 0 && sbe.rs1 != 0) begin - return $sformatf("%-16s %s, %d, %s", mnemonic, regAddrToStr(sbe.rd), $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + if (rd != 0 && rs1 != 0) begin + return $sformatf("%-12s %4s, %d, %s", mnemonic, regAddrToStr(rd), $unsigned(rs1), csrAddrToStr(sbe.result[11:0])); // don't display instructions which write to zero - end else if (sbe.rd == 0) begin - return $sformatf("%-16s %d, %s", mnemonic, $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); - end else if (sbe.rs1 == 0) begin - return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); + end else if (rd == 0) begin + return $sformatf("%-14s %2d, %s", mnemonic, $unsigned(rs1), csrAddrToStr(sbe.result[11:0])); + end else if (rs1 == 0) begin + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rd), csrAddrToStr(sbe.result[11:0])); end end endfunction // printCSRInstr function string printLoadInstr(); string mnemonic; - case (instr[14:12]) - 3'b000: mnemonic = "lb"; - 3'b001: mnemonic = "lh"; - 3'b010: mnemonic = "lw"; - 3'b100: mnemonic = "lbu"; - 3'b101: mnemonic = "lhu"; - 3'b110: mnemonic = "lwu"; - 3'b011: mnemonic = "ld"; - default: return printMnemonic("INVALID"); - endcase - result_regs.push_back(sbe.rd); - read_regs.push_back(sbe.rs1); - // save the immediate for calculating the virtual address - this.imm = sbe.result; + case (instr[14:12]) + 3'b000: mnemonic = "lb"; + 3'b001: mnemonic = "lh"; + 3'b010: mnemonic = "lw"; + 3'b100: mnemonic = "lbu"; + 3'b101: mnemonic = "lhu"; + 3'b110: mnemonic = "lwu"; + 3'b011: mnemonic = "ld"; + default: return printMnemonic("INVALID"); + endcase - return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + if (instr[6:0] == riscv::OpcodeLoadFp) + mnemonic = $sformatf("f%s",mnemonic); + + result_regs.push_back(rd); + result_fpr.push_back(is_rd_fpr(sbe.op)); + read_regs.push_back(rs1); + read_fpr.push_back(1'b0); + // save the immediate for calculating the virtual address + this.imm = sbe.result; + + if (instr[6:0] == riscv::OpcodeLoadFp) + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(rd), $signed(sbe.result), regAddrToStr(rs1)); + else + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(rd), $signed(sbe.result), regAddrToStr(rs1)); endfunction function string printStoreInstr(); @@ -393,12 +583,20 @@ class instruction_trace_item; default: return printMnemonic("INVALID"); endcase - read_regs.push_back(sbe.rs2); - read_regs.push_back(sbe.rs1); + if (instr[6:0] == riscv::OpcodeStoreFp) + mnemonic = $sformatf("f%s",mnemonic); + + read_regs.push_back(rs2); + read_fpr.push_back(is_rs2_fpr(sbe.op)); + read_regs.push_back(rs1); + read_fpr.push_back(1'b0); // save the immediate for calculating the virtual address this.imm = sbe.result; - return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); + if (instr[6:0] == riscv::OpcodeStoreFp) + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(rs2), $signed(sbe.result), regAddrToStr(rs1)); + else + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(rs2), $signed(sbe.result), regAddrToStr(rs1)); endfunction // printSInstr function string printAMOInstr(); diff --git a/src/util/instruction_tracer.svh b/src/util/instruction_tracer.svh index 2b96043c4..87f5d730a 100644 --- a/src/util/instruction_tracer.svh +++ b/src/util/instruction_tracer.svh @@ -25,8 +25,9 @@ class instruction_tracer; scoreboard_entry_t issue_sbe; // store resolved branches, get (mis-)predictions branchpredict_t bp [$]; - // shadow copy of the register file - logic [63:0] reg_file [32]; + // shadow copy of the register files + logic [63:0] gp_reg_file [32]; + logic [63:0] fp_reg_file [32]; // 64 bit clock tick count longint unsigned clk_ticks; int f, commit_log; @@ -60,7 +61,7 @@ class instruction_tracer; logic [31:0] decode_instruction, issue_instruction, issue_commit_instruction; scoreboard_entry_t commit_instruction; // initialize register 0 - reg_file [0] = 0; + gp_reg_file [0] = 0; forever begin automatic branchpredict_t bp_instruction = '0; @@ -125,10 +126,12 @@ class instruction_tracer; // the scoreboards issue entry still contains the immediate value as a result // check if the write back is valid, if not we need to source the result from the register file // as the most recent version of this register will be there. - if (tracer_if.pck.we[i]) begin + if (tracer_if.pck.we_gpr[i] || tracer_if.pck.we_fpr[i]) begin printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata[i], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); + end else if (is_rd_fpr(commit_instruction.op)) begin + printInstr(issue_sbe, issue_commit_instruction, fp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); end else begin - printInstr(issue_sbe, issue_commit_instruction, reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); + printInstr(issue_sbe, issue_commit_instruction, gp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, tracer_if.pck.debug_mode, bp_instruction); end end end @@ -142,13 +145,14 @@ class instruction_tracer; // ---------------------- // Commit Registers // ---------------------- - // update shadow reg file here + // update shadow reg files here for (int i = 0; i < 2; i++) begin - if (tracer_if.pck.we[i] && tracer_if.pck.waddr[i] != 5'b0) begin - reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; + if (tracer_if.pck.we_gpr[i] && tracer_if.pck.waddr[i] != 5'b0) begin + gp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; + end else if (tracer_if.pck.we_fpr[i]) begin + fp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; end end - // -------------- // Flush Signals // -------------- @@ -182,11 +186,11 @@ class instruction_tracer; endfunction function void printInstr(scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [63:0] paddr, riscv::priv_lvl_t priv_lvl, logic debug_mode, branchpredict_t bp); - instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr, priv_lvl, debug_mode, bp); + instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.gp_reg_file, this.fp_reg_file, result, paddr, priv_lvl, debug_mode, bp); // print instruction to console string print_instr = iti.printInstr(); if (ENABLE_SPIKE_COMMIT_LOG && !debug_mode) begin - $fwrite(this.commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result)); + $fwrite(this.commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result, is_rd_fpr(sbe.op))); end uvm_report_info( "Tracer", print_instr, UVM_HIGH); $fwrite(this.f, {print_instr, "\n"}); diff --git a/src/util/instruction_tracer_defines.svh b/src/util/instruction_tracer_defines.svh index 4b9756e45..e0f961819 100644 --- a/src/util/instruction_tracer_defines.svh +++ b/src/util/instruction_tracer_defines.svh @@ -28,23 +28,23 @@ parameter INSTR_BGE = { 7'b?, 5'b?, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranc parameter INSTR_BLTU = { 7'b?, 5'b?, 5'b?, 3'b110, 5'b?, riscv::OpcodeBranch }; parameter INSTR_BGEU = { 7'b?, 5'b?, 5'b?, 3'b111, 5'b?, riscv::OpcodeBranch }; -// OPIMM -parameter INSTR_LI = { 12'b?, 5'b0, 3'b000, 5'b?, riscv::OpcodeOpimm }; -parameter INSTR_ADDI = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpimm }; -parameter INSTR_SLTI = { 17'b?, 3'b010, 5'b?, riscv::OpcodeOpimm }; -parameter INSTR_SLTIU = { 17'b?, 3'b011, 5'b?, riscv::OpcodeOpimm }; -parameter INSTR_XORI = { 17'b?, 3'b100, 5'b?, riscv::OpcodeOpimm }; -parameter INSTR_ORI = { 17'b?, 3'b110, 5'b?, riscv::OpcodeOpimm }; -parameter INSTR_ANDI = { 17'b?, 3'b111, 5'b?, riscv::OpcodeOpimm }; -parameter INSTR_SLLI = { 6'b000000, 11'b?, 3'b001, 5'b?, riscv::OpcodeOpimm }; -parameter INSTR_SRLI = { 6'b000000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpimm }; -parameter INSTR_SRAI = { 6'b010000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpimm }; +// OP-IMM +parameter INSTR_LI = { 12'b?, 5'b0, 3'b000, 5'b?, riscv::OpcodeOpImm }; +parameter INSTR_ADDI = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm }; +parameter INSTR_SLTI = { 17'b?, 3'b010, 5'b?, riscv::OpcodeOpImm }; +parameter INSTR_SLTIU = { 17'b?, 3'b011, 5'b?, riscv::OpcodeOpImm }; +parameter INSTR_XORI = { 17'b?, 3'b100, 5'b?, riscv::OpcodeOpImm }; +parameter INSTR_ORI = { 17'b?, 3'b110, 5'b?, riscv::OpcodeOpImm }; +parameter INSTR_ANDI = { 17'b?, 3'b111, 5'b?, riscv::OpcodeOpImm }; +parameter INSTR_SLLI = { 6'b000000, 11'b?, 3'b001, 5'b?, riscv::OpcodeOpImm }; +parameter INSTR_SRLI = { 6'b000000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm }; +parameter INSTR_SRAI = { 6'b010000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm }; -// OPIMM32 -parameter INSTR_ADDIW = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpimm32 }; -parameter INSTR_SLLIW = { 7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOpimm32 }; -parameter INSTR_SRLIW = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpimm32 }; -parameter INSTR_SRAIW = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpimm32 }; +// OP-IMM-32 +parameter INSTR_ADDIW = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm32 }; +parameter INSTR_SLLIW = { 7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOpImm32 }; +parameter INSTR_SRLIW = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32 }; +parameter INSTR_SRAIW = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32 }; // OP parameter INSTR_ADD = { 7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp }; @@ -67,9 +67,10 @@ parameter INSTR_SRLW = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32 parameter INSTR_SRAW = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32 }; parameter INSTR_MULW = { 7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp32 }; -// FENCE -parameter INSTR_FENCE = { 4'b0, 8'b?, 13'b0, riscv::OpcodeFence }; -parameter INSTR_FENCEI = { 17'b0, 3'b001, 5'b0, riscv::OpcodeFence }; +// MISC-MEM +parameter INSTR_FENCE = { 4'b0, 8'b?, 13'b0, riscv::OpcodeMiscMem }; +parameter INSTR_FENCEI = { 17'b0, 3'b001, 5'b0, riscv::OpcodeMiscMem }; + // SYSTEM parameter INSTR_CSRW = { 12'b?, 5'b?, 3'b001, 5'b0, riscv::OpcodeSystem }; parameter INSTR_CSRRW = { 12'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeSystem }; @@ -101,9 +102,38 @@ parameter INSTR_DIVU = { 7'b0000001, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp } parameter INSTR_REM = { 7'b0000001, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp }; parameter INSTR_REMU = { 7'b0000001, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp }; +// RVFD +parameter INSTR_FMADD = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMadd}; +parameter INSTR_FMSUB = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMsub}; +parameter INSTR_FNSMSUB = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmsub}; +parameter INSTR_FNMADD = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmadd}; + +parameter INSTR_FADD = { 5'b00000, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FSUB = { 5'b00001, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FMUL = { 5'b00010, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FDIV = { 5'b00011, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FSQRT = { 5'b01011, 2'b?, 5'b0, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FSGNJ = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FSGNJN = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FSGNJX = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FMIN = { 5'b00101, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FMAX = { 5'b00101, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FLE = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FLT = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FEQ = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp}; + +parameter INSTR_FCVT_F2F = { 5'b01000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FMV_F2X = { 5'b11100, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FCLASS = { 5'b11100, 2'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FMV_X2F = { 5'b11110, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FCVT_F2I = { 5'b11000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; +parameter INSTR_FCVT_I2F = { 5'b11010, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; + // A parameter INSTR_AMO = {25'b?, riscv::OpcodeAmo }; // Load/Stores -parameter INSTR_LOAD = {25'b?, riscv::OpcodeLoad }; -parameter INSTR_STORE = {25'b?, riscv::OpcodeStore }; +parameter INSTR_LOAD = {25'b?, riscv::OpcodeLoad}; +parameter INSTR_LOAD_FP = {25'b?, riscv::OpcodeLoadFp}; +parameter INSTR_STORE = {25'b?, riscv::OpcodeStore}; +parameter INSTR_STORE_FP = {25'b?, riscv::OpcodeStoreFp}; diff --git a/src/util/instruction_tracer_if.sv b/src/util/instruction_tracer_if.sv index 86e8007e2..fe19684ea 100644 --- a/src/util/instruction_tracer_if.sv +++ b/src/util/instruction_tracer_if.sv @@ -32,7 +32,8 @@ interface instruction_tracer_if ( // WB stage logic [1:0][4:0] waddr; logic [1:0][63:0] wdata; - logic [1:0] we; + logic [1:0] we_gpr; + logic [1:0] we_fpr; // commit stage scoreboard_entry_t [1:0] commit_instr; // commit instruction logic [1:0] commit_ack; @@ -56,7 +57,7 @@ interface instruction_tracer_if ( clocking pck @(posedge clk); input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr, st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, resolve_branch, - wdata, we, commit_instr, commit_ack, exception, priv_lvl, debug_mode; + wdata, we_gpr, we_fpr, commit_instr, commit_ack, exception, priv_lvl, debug_mode; endclocking `endif diff --git a/tb/ariane_testharness.sv b/tb/ariane_testharness.sv index 3b451ca10..66103be8b 100644 --- a/tb/ariane_testharness.sv +++ b/tb/ariane_testharness.sv @@ -131,6 +131,7 @@ module ariane_testharness #( dmi_jtag i_dmi_jtag ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), + .testmode_i ( test_en ), .dmi_req_o ( jtag_dmi_req ), .dmi_req_valid_o ( jtag_req_valid ), .dmi_req_ready_i ( debug_req_ready ), @@ -300,6 +301,7 @@ module ariane_testharness #( ) i_clint ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), + .testmode_i ( test_en ), .slave ( master[1] ), .rtc_i ( rtc_i ), .timer_irq_o ( timer_irq ), diff --git a/tb/wave/wave_core.do b/tb/wave/wave_core.do index 8e74fc121..53caa9d0c 100644 --- a/tb/wave/wave_core.do +++ b/tb/wave/wave_core.do @@ -5,8 +5,8 @@ add wave -noupdate -group frontend -group icache /ariane_tb/dut/i_ariane/i_std_c add wave -noupdate -group frontend -group ras /ariane_tb/dut/i_ariane/i_frontend/i_ras/* add wave -noupdate -group frontend -group btb /ariane_tb/dut/i_ariane/i_frontend/i_btb/* add wave -noupdate -group frontend -group bht /ariane_tb/dut/i_ariane/i_frontend/i_bht/* -add wave -noupdate -group frontend -group instr_scan /ariane_tb/dut/i_ariane/i_frontend/*/i_instr_scan/* -add wave -noupdate -group frontend -group fetch_fifo /ariane_tb/dut/i_ariane/i_frontend/i_fetch_fifo/* +# add wave -noupdate -group frontend -group instr_scan /ariane_tb/dut/i_ariane/i_frontend/*/i_instr_scan/* +# add wave -noupdate -group frontend -group fetch_fifo /ariane_tb/dut/i_ariane/i_frontend/i_fetch_fifo/* add wave -noupdate -group id_stage -group decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/* add wave -noupdate -group id_stage -group compressed_decoder /ariane_tb/dut/i_ariane/id_stage_i/compressed_decoder_i/* @@ -22,6 +22,8 @@ add wave -noupdate -group ex_stage -group alu /ariane_tb/dut/i_ariane/ex_stage_i add wave -noupdate -group ex_stage -group mult /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/* add wave -noupdate -group ex_stage -group mult -group mul /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_mul/* add wave -noupdate -group ex_stage -group mult -group div /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_div/* +add wave -noupdate -group ex_stage -group fpu /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/* +add wave -noupdate -group ex_stage -group fpu -group fpnew /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/* add wave -noupdate -group ex_stage -group lsu /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/* add wave -noupdate -group ex_stage -group lsu -group lsu_bypass /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/lsu_bypass_i/*